Commit 31c0779e authored by MK's avatar MK
Browse files

Fix fname

1 merge request!19Dev/topic based text splitter
Showing with 37 additions and 8 deletions
+37 -8
<p align="center">
<picture>
<img src="docs/img/MyLogo.png" alt="Library scheme" height="200"/>
</picture>
</p>
<h2 align="center">
......
......@@ -11,12 +11,43 @@ def mutation_one_param(
high_spm: float,
low_n: int,
high_n: int,
low_back: float,
high_back: float,
low_back: int,
high_back: int,
low_decor: float,
high_decor: float,
elem_mutation_prob: float = 0.1,
):
"""
One-point mutation
Checking the probability of mutation for each of the elements
Parameters
----------
individ: List[float]
Individual to be processed
low_spb: float
The lower possible bound for sparsity regularizer of back topics
high_spb: float
The higher possible bound for sparsity regularizer of back topics
low_spm: float
The lower possible bound for sparsity regularizer of specific topics
high_spm: float
The higher possible bound for sparsity regularizer of specific topics
low_n: int
The lower possible bound for amount of iterations between stages
high_n: int
The higher possible bound for amount of iterations between stages
low_back:
The lower possible bound for amount of back topics
high_back:
The higher possible bound for amount of back topics
Returns
----------
Updated individuals with exchanged chromosome parts
"""
for i in range(len(individ)):
if random.random() <= elem_mutation_prob:
if i in [2, 3]:
......
......@@ -59,7 +59,7 @@ class Dataset:
_ppmi_dict_df_path: str = "ppmi_df.txt"
_ppmi_dict_tf_path: str = "ppmi_tf.txt"
_mutual_info_dict_path: str = "mutual_info_dict.pkl"
_texts_path: str = "ppp.csv"
_texts_path: str = "prep_df.csv"
_labels_path = "labels.pkl"
def __init__(self, base_path: str, topic_count: int):
......
......@@ -322,7 +322,7 @@ def mutual_info_dict_preparation(fname):
def prepare_all_artifacts(save_path: str):
DATASET_PATH = os.path.join(save_path, "ppp.csv")
DATASET_PATH = os.path.join(save_path, "prep_df.csv")
BATCHES_DIR = os.path.join(save_path, "batches")
WV_PATH = os.path.join(save_path, "test_set_data_voc.txt")
COOC_DICTIONARY_PATH = os.path.join(save_path, "cooc_dictionary.txt")
......@@ -333,7 +333,7 @@ def prepare_all_artifacts(save_path: str):
ppmi_dict_df = os.path.join(save_path, "ppmi_df.txt")
ppmi_dict_tf = os.path.join(save_path, "ppmi_tf.txt")
MUTUAL_INFO_DICT_PATH = os.path.join(save_path, "mutual_info_dict.pkl")
DOCUMENTS_TO_BATCH_PATH = os.path.join(save_path, "ppp.csv")
DOCUMENTS_TO_BATCH_PATH = os.path.join(save_path, "prep_df.csv")
# TODO: check why batch vectorizer is returned (unused further)
prepare_batch_vectorizer(
......
......@@ -164,7 +164,7 @@ def process_dataset(
:return:
"""
os.makedirs(save_path, exist_ok=True)
save_path = os.path.join(save_path, "ppp.csv")
save_path = os.path.join(save_path, "prep_df.csv")
data = pd.read_csv(fname) if isinstance(fname, str) else cast(pd.DataFrame, fname)
data = parallelize_dataframe(
data, lemmatize_text, n_cores, lang=lang, col_to_process=col_to_process
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment