Fix fname

31c0779e · MK · 05e92610 · 31c0779e · 31c0779e · 31c0779e
Commit 31c0779e authored 1 year ago by MK
Hide whitespace changes
Inline Side-by-side

Showing

with 37 additions and 8 deletions
+37 -8
--- a/README.md
+++ b/README.md

 <p align="center">
-<picture>
 <img src="docs/img/MyLogo.png" alt="Library scheme" height="200"/>
-</picture>
 </p>

 <h2 align="center">

--- a/autotm/algorithms_for_tuning/genetic_algorithm/mutation.py
+++ b/autotm/algorithms_for_tuning/genetic_algorithm/mutation.py
@@ -11,12 +11,43 @@ def mutation_one_param(
        high_spm: float,
        low_n: int,
        high_n: int,
-        low_back: float,
-        high_back: float,
+        low_back: int,
+        high_back: int,
        low_decor: float,
        high_decor: float,
        elem_mutation_prob: float = 0.1,
 ):
+    """
+    One-point mutation
+
+    Checking the probability of mutation for each of the elements
+
+    Parameters
+    ----------
+    individ: List[float]
+        Individual to be processed
+    low_spb: float
+        The lower possible bound for sparsity regularizer of back topics
+    high_spb: float
+        The higher possible bound for sparsity regularizer of back topics
+    low_spm: float
+        The lower possible bound for sparsity regularizer of specific topics
+    high_spm: float
+        The higher possible bound for sparsity regularizer of specific topics
+    low_n: int
+        The lower possible bound for amount of iterations between stages
+    high_n: int
+        The higher possible bound for amount of iterations between stages
+    low_back:
+        The lower possible bound for amount of back topics
+    high_back:
+        The higher possible bound for amount of back topics
+
+
+    Returns
+    ----------
+    Updated individuals with exchanged chromosome parts
+    """
    for i in range(len(individ)):
        if random.random() <= elem_mutation_prob:
            if i in [2, 3]:

--- a/autotm/fitness/tm.py
+++ b/autotm/fitness/tm.py
@@ -59,7 +59,7 @@ class Dataset:
    _ppmi_dict_df_path: str = "ppmi_df.txt"
    _ppmi_dict_tf_path: str = "ppmi_tf.txt"
    _mutual_info_dict_path: str = "mutual_info_dict.pkl"
-    _texts_path: str = "ppp.csv"
+    _texts_path: str = "prep_df.csv"
    _labels_path = "labels.pkl"

    def __init__(self, base_path: str, topic_count: int):

--- a/autotm/preprocessing/dictionaries_preparation.py
+++ b/autotm/preprocessing/dictionaries_preparation.py
@@ -322,7 +322,7 @@ def mutual_info_dict_preparation(fname):


 def prepare_all_artifacts(save_path: str):
-    DATASET_PATH = os.path.join(save_path, "ppp.csv")
+    DATASET_PATH = os.path.join(save_path, "prep_df.csv")
    BATCHES_DIR = os.path.join(save_path, "batches")
    WV_PATH = os.path.join(save_path, "test_set_data_voc.txt")
    COOC_DICTIONARY_PATH = os.path.join(save_path, "cooc_dictionary.txt")
@@ -333,7 +333,7 @@ def prepare_all_artifacts(save_path: str):
    ppmi_dict_df = os.path.join(save_path, "ppmi_df.txt")
    ppmi_dict_tf = os.path.join(save_path, "ppmi_tf.txt")
    MUTUAL_INFO_DICT_PATH = os.path.join(save_path, "mutual_info_dict.pkl")
-    DOCUMENTS_TO_BATCH_PATH = os.path.join(save_path, "ppp.csv")
+    DOCUMENTS_TO_BATCH_PATH = os.path.join(save_path, "prep_df.csv")

    # TODO: check why batch vectorizer is returned (unused further)
    prepare_batch_vectorizer(

--- a/autotm/preprocessing/text_preprocessing.py
+++ b/autotm/preprocessing/text_preprocessing.py
@@ -164,7 +164,7 @@ def process_dataset(
    :return:
    """
    os.makedirs(save_path, exist_ok=True)
-    save_path = os.path.join(save_path, "ppp.csv")
+    save_path = os.path.join(save_path, "prep_df.csv")
    data = pd.read_csv(fname) if isinstance(fname, str) else cast(pd.DataFrame, fname)
    data = parallelize_dataframe(
        data, lemmatize_text, n_cores, lang=lang, col_to_process=col_to_process