Added generation of column descriptions, updated project structure, slightly updated architecture

60b58ab3 · Alexey Lapin · 5df1e4a2 · 60b58ab3 · 60b58ab3 · 60b58ab3
Commit 60b58ab3 authored 8 months ago by Alexey Lapin
Expand all Hide whitespace changes
Inline Side-by-side

Showing

with 741 additions and 347 deletions
+741 -347
--- a/__init__.py
+++ b/__init__.py
--- a/examples/generate_description.py
+++ b/examples/generate_description.py
+from fedotllm.data import Dataset
+from fedotllm.web_api import WebAssistant
+from fedotllm.actions import ModelAction
+from pprint import pprint
+
+
+def main():
+    LLAMA8B = "http://10.32.2.2:8672/v1/chat/completions"
+    dataset = Dataset.load_from_path('datasets/titanic')
+    train = dataset.splits[0]
+    model = WebAssistant(LLAMA8B, model_type='8b')
+    action = ModelAction(model=model)
+    column_descriptions = action.generate_all_column_description(split=train, dataset=dataset)
+    train.set_column_descriptions(column_descriptions)
+    pprint(train.get_column_descriptions())
+
+
+if __name__ == '__main__':
+    main()
--- a/examples/launch_fedot.ipynb
+++ b/examples/launch_fedot.ipynb
--- a/llm_multi_call.ipynb
+++ b/llm_multi_call.ipynb
--- a/fedotllm/__init__.py
+++ b/fedotllm/__init__.py
--- a/fedotllm/actions.py
+++ b/fedotllm/actions.py
+import os
+import json
+from tenacity import retry, stop_after_attempt, wait_random_exponential, retry_if_exception_type
+from requests.exceptions import RequestException
+import pandas as pd
+from fedotllm.data import Dataset, Split
+import random
+import re
+import ast
+from tenacity import retry, stop_after_attempt
+from typing import Dict
+
+_MAX_RETRIES = 6
+
+
+class ModelAction():
+
+    def __init__(self, model) -> None:
+        self.model = model
+
+    def run_model_multicall(self, tokenizer, generation_config, prompts):
+        """Run all prompts on local model
+        """
+
+        responses = {}
+        for task in prompts:
+            messages = [
+                {"role": "system", "content": prompts[task]["system"]},
+                {"role": "context", "content": prompts[task]["context"]},
+                {"role": "user", "content": prompts[task]["task"]},
+            ]
+
+            input_ids = tokenizer.apply_chat_template(
+                messages,
+                add_generation_prompt=True,
+                return_tensors="pt"
+            ).to(self.model.device)
+
+            outputs = self.model.generate(
+                input_ids,
+                **generation_config
+            )
+            response = outputs[0][input_ids.shape[-1]:]
+            responses[task] = tokenizer.decode(
+                response, skip_special_tokens=True)
+
+        return responses
+
+    @retry(
+        wait=wait_random_exponential(min=1, max=60),
+        stop=stop_after_attempt(_MAX_RETRIES),
+        retry=(retry_if_exception_type(RequestException)
+               | retry_if_exception_type(RuntimeError)),
+        reraise=True
+    )
+    def run_web_model_multicall(self, prompts):
+        """Run all prompts on web model
+        """
+
+        responses = {}
+        for task in prompts:
+            self.model.set_sys_prompt(prompts[task]["system"])
+            self.model.add_context(prompts[task]["context"])
+
+            response = self.model(prompts[task]["task"], as_json=True)
+            responses[task] = response
+
+        return responses
+
+    @retry(
+        stop=stop_after_attempt(_MAX_RETRIES),
+        reraise=True
+    )
+    def __generate_column_description(self, column: pd.Series, split: Split, dataset: Dataset):
+        """
+        Generate a description for a column.
+
+        Args:
+        - column (pd.Series): The column for which the description is generated.
+        - dataset (Dataset): The dataset containing the column.
+
+        Returns:
+        dict: A dictionary containing the generated description for the column.
+
+        Raises:
+        RuntimeError: If the answer is not found in the response or if the 'data' node is not found in the response.
+        """
+        FIND_ANSWER = re.compile(
+            r"\{['\"]data['\"]\s*:\s*\{['\"]type['\"]\s*:\s*['\"]string['\"]\s*\,\s*['\"]description['\"]\s*:\s*['\"].*['\"]\}\}")
+
+        user_template = """Dataset Title: {title}
+        Dataset description: {ds_descr}
+        Column name: {col_name}
+        Column hint: {hint}
+        Column values: 
+        ```
+        {values}
+        ```
+        """
+
+        self.model.set_context(column.head(30).to_markdown(index=False))
+        column_uniq_vals = column.unique().tolist()
+        column_vals = pd.Series(column_uniq_vals if len(
+            column_uniq_vals) < 30 else random.sample(column_uniq_vals, k=30), name=column.name)
+        user_prompt = user_template.format(
+            title=dataset.name,
+            ds_descr=dataset.description,
+            col_name=column.name,
+            hint=split.get_column_hint(column.name),
+            values=column_vals.to_markdown(index=False)
+        )
+        response = self.model(user_prompt, as_json=True)
+        response = response.strip().replace('\n', '').capitalize()
+
+        answer = re.findall(FIND_ANSWER, response)
+        if not answer:
+            raise RuntimeError("Answer not found in: ", response)
+
+        dict_resp = ast.literal_eval(answer[0])
+        if "data" not in dict_resp:
+            raise RuntimeError("Data node not found in: ", response)
+        return dict_resp
+
+    def generate_all_column_description(self, split: Split, dataset: Dataset) -> Dict[str, str]:
+        """ Generate descriptions for all columns in the provided table.
+
+        Args:
+            split (pd.DataFrame): A split representing the table with columns to describe.
+            dataset (Dataset): The dataset used for generating column descriptions.
+
+        Returns:
+            A dictionary where keys are column names and values are descriptions generated for each column.
+        """
+
+        schema = {
+            "data": {
+                "type": "string",
+                "description": "one line plain text"
+            }
+        }
+
+        sys_prompt = """You are helpful AI assistant.
+        User will enter one column from dataset, and the assistant will make one sentence discription of data in this column.
+        Don't make assumptions about what values to plug into functions. Use column hint.
+        Output format: only JSON using the schema defined here: {schema}""".format(schema=json.dumps(schema))
+
+        self.model.set_sys_prompt(sys_prompt)
+
+        result = {}
+
+        for col_name in split.data.columns:
+            result[col_name] = self.__generate_column_description(column=split.data[col_name],
+                                                                  split=split,
+                                                                  dataset=dataset)['data']['description']
+        return result
+
+    def process_model_responses(responses):
+        responses["categorical_columns"] = responses["categorical_columns"].split(
+            "\n")
+        responses["task_type"] = responses["task_type"].lower()
+        return responses
+
+    def save_model_responses(responses, path):
+        with open(os.sep.join([path, 'model_responses.json']), 'w') as json_file:
+            json.dump(responses, json_file)
--- a/data.py
+++ b/data.py
 import json
 import os
-import requests
 import arff
 import pandas as pd

-from typing import Any
+from typing import Any, Dict
+


 class Split:
    """
    Split within dataset object
    """
-    def __init__(self, name, data, path, description) -> None:
+
+    def __init__(self, name: str, data: pd.DataFrame, path: str, description: str, columns: dict | None) -> None:
        """
        Initialize an instanse of a Split.
        """
@@ -19,44 +20,77 @@ class Split:
        self.data = data
        self.path = path
        self.description = description
+        
+        # init columns metadata info
+        self.columns_meta = {col: {} for col in data.columns}
+        if columns is not None:
+            for col, metainfo in columns.items():
+                if col not in self.columns_meta:
+                    raise RuntimeError("Failed to find the column {col} defined in the metadata.json file")
+                if 'hint' in metainfo:
+                    self.columns_meta[col]['hint'] = metainfo['hint']
+                if 'description' in metainfo:
+                    self.columns_meta[col]['description'] = metainfo['description']
+        

    def get_description(self):
        return f"The {self.name} split contains following columns: {self.data.columns}. It is described as {self.description}"
-    
+
    def get_text_columns(self):
        return list(self.data.select_dtypes(include=['object']).columns)
-    
+
    def get_numeric_columns(self):
        return list(self.data.select_dtypes(include=['number']).columns)

    def get_unique_counts(self):
        return self.data.apply(lambda col: col.nunique())
-    
+
    def get_unique_ratios(self):
-        return self.data.apply(lambda col: col.nunique() / len(col))
-    
+        return self.data.apply(lambda col: round(col.nunique() / len(col.dropna()), 2))
+
    def get_column_types(self):
        return self.data.apply(lambda col: "string" if col.name in self.get_text_columns() else "numeric")
-    
-    def get_head_by_column(self, column_name, count = 10):
+
+    def get_head_by_column(self, column_name, count=10):
        return list(self.data[column_name].head(count))
-                    
+
    def get_column_descriptions(self):
-        unique_counts = self.get_unique_counts()
-        unique_ratios = self.get_unique_ratios()
-        column_types = self.get_column_types()
-        
-        column_descriptions = [f"{column_name}: {column_types[column_name]}"
-                            f"{100 * unique_ratios[column_name]:.2f}% unique values, examples: {self.get_head_by_column(column_name)}"
-                            for column_name in self.data.columns]
-        return column_descriptions
+        return dict((key, value['description']) for key, value in self.columns_meta.items())
+    
+    def get_column_hint(self, column_name: str) -> None | str:
+        """ 
+        Get the hint associated with a specific column.
+
+        Args:
+            column_name (str): The name of the column.
+
+        Returns:
+            str or None: The hint associated with the column, or None if not found.
+        """
+        return self.columns_meta.get(column_name, None).get('hint', None)
+
+    def set_column_descriptions(self, column_description: Dict[str, str]) -> None:
+        """
+        Set descriptions for columns in the metadata.
+
+        Args:
+            column_description (Dict[str, str]): A dictionary where keys are column names and values are descriptions.
+
+        Returns:
+            None
+        """
+        for key, value in column_description.items():
+            if key in self.columns_meta:
+                self.columns_meta[key]['description'] = value
+


 class Dataset:
    """
    Dataset object that represents an ML task and may contain multiple splits
    """
-    def __init__(self, name, description, goal, splits, train_split_name) -> None:
+
+    def __init__(self, name, description, goal, splits) -> None:
        """
        Initialize an instance of a Dataset.
        """
@@ -64,7 +98,7 @@ class Dataset:
        self.description = description
        self.goal = goal
        self.splits = splits
-        self.train_split_name = train_split_name
+        

    @classmethod
    def load_from_path(cls, path):
@@ -75,42 +109,39 @@ class Dataset:
        """
        with open(os.sep.join([path, 'metadata.json']), 'r') as json_file:
            dataset_metadata = json.load(json_file)
-        
-        #load each split file
+
+        # load each split file
        splits = []
-        for split_name in dataset_metadata['split_names']:
-            split_path = os.sep.join([path, dataset_metadata["split_paths"][split_name]]) 
-            split_description = dataset_metadata["split_descriptions"][split_name] 
+        for split in dataset_metadata['splits']:
+            split_name = split['name']
+            split_path = os.sep.join([path, split["path"]])
+            split_description = split.get("description", '')
+            split_columns = split.get('columns', None)
            if split_path.split(".")[-1] == "csv":
                data = pd.read_csv(split_path)
-                split = Split(data = data,
-                              name = split_name,
-                              path = split_path,
-                              description = split_description)
+                split = Split(data=data,
+                              name=split_name,
+                              path=split_path,
+                              description=split_description,
+                              columns=split_columns)
                splits.append(split)
            elif split_path.split(".")[-1] == "arff":
-                data = arff.loadarff(split_path)
-                split = Split(data = pd.DataFrame(data[0]),
-                              name = split_name,
-                              path = split_path,
-                              description = split_description)
+                data = pd.DataFrame(arff.loadarff(split_path)[0])
+                split = Split(data=data,
+                              name=split_name,
+                              path=split_path,
+                              description=split_description,
+                              columns=split_columns)
                splits.append(split)
            else:
                print(f"split {split_path}: unsupported format")

-        #if we have model responses saved already = obasolete for now
-        # if os.path.exists(os.sep.join([path,'model_responses.json'])):
-        #     with open(os.sep.join([path, 'model_responses.json']), 'r') as json_file:
-        #         model_responses = json.load(json_file)
-        #         dataset_metadata.update(model_responses)
-        
        return cls(
-            name = dataset_metadata["name"],
-            description = dataset_metadata["description"],
-            goal = dataset_metadata["goal"],
-            splits = splits,
-            train_split_name = dataset_metadata["train_split_name"],
-            )
+            name=dataset_metadata["name"],
+            description=dataset_metadata["description"],
+            goal=dataset_metadata["goal"],
+            splits=splits,
+        )

    def get_description(self):
        train_split = next(split for split in self.splits if split.name == self.train_split_name)
@@ -118,18 +149,19 @@ class Dataset:
        column_descriptions = train_split.get_column_descriptions()

        introduction_lines = [
-            f"Assume we have a dataset called '{self.name}', which describes {self.description}, and the task is to {self.goal}.",
-            f""
-        ] + split_description_lines + [
-            f"Below is the type (numeric or string), unique value count and ratio for each column, and few examples of values:",
-            f""
-        ] + column_descriptions + [
-            f"",
-        ]
+                                 f"Assume we have a dataset called '{self.name}', which describes {self.description}, and the task is to {self.goal}.",
+                                 f""
+                             ] + split_description_lines + [
+                                 f"Below is the type (numeric or string), unique value count and ratio for each "
+                                 f"column, and few examples of values:",
+                                 f""
+                             ] + column_descriptions + [
+                                 f"",
+                             ]

        return "\n".join(introduction_lines)
-    
+
    def get_metadata_description(self):
-        splits_names = [split.name for split in self.splits ]
+        splits_names = [split.name for split in self.splits]
        description = f"name: {self.name} \ndescription: {self.description} \ntrain_split_name: {self.train_split_name} \nsplits: {splits_names}"
-        return description
\ No newline at end of file
+        return description
--- a/fedot_util.py
+++ b/fedot_util.py
--- a/fedotllm/formatting.py
+++ b/fedotllm/formatting.py
+from typing import Literal, Mapping, Tuple, Union
+import pandas as pd
+
+def format_dataframes(data: Union[pd.DataFrame, Mapping[str, pd.DataFrame]]) -> str:
+    """
+    Formats the input dataframe or dataframes into markdown format.
+
+    Args:
+    data (pd.DataFrame, Mapping[str, pd.DataFrame]): Input data to be formatted.
+
+    Returns:
+    str: Formatted data in markdown format.
+    """
+    outer = "\n\n```\n{}\n```\n\n"
+    if isinstance(data, pd.DataFrame):
+        inner = f"{data.to_markdown(index=False)}"
+    else:
+        inner = "\n\n".join(
+            f"### {key}\n{df.to_markdown(index=False)}"
+            for key, df in data.items()
+        )
+    return outer.format(inner)
+
+def filter_entities(panel_df: pd.DataFrame, basket: list):
+    entity_col = panel_df.columns[0]
+    # Filter rows where the value in the entity column is in the basket
+    df = panel_df[panel_df[entity_col].isin(basket)]
+    
+    if df.empty:
+        raise ValueError(f"No matching entities found in panel given basket: {basket}")
+
+    return df
--- a/prompts.py
+++ b/prompts.py
--- a/web_api.py
+++ b/web_api.py
@@ -29,8 +29,8 @@ class WebAssistant:
        """
        self._system_prompt = new_prompt

-    def add_context(self, context: str) -> None:
-        """Add a context to model's prompt
+    def set_context(self, context: str) -> None:
+        """Set a context to model's prompt

        Args:
            context (str): context related to question.
@@ -97,10 +97,11 @@ class WebAssistant:

        else:
            raise NotImplementedError("Model type not supported")
-        
-        response = requests.post(url=self._url, json=formatted_prompt, timeout=timeout)
-        if response.status_code != requests.codes.ok:
-            raise RuntimeError("Error while communicating with the model")
+        try:
+            response = requests.post(url=self._url, json=formatted_prompt, timeout=timeout)
+            response.raise_for_status()
+        except requests.exceptions.HTTPError as err:
+            raise RuntimeError(err)
        
        if kwargs.get('as_json'):
            try:

--- a/zip.py
+++ b/zip.py
--- a/launch_fedot.ipynb
+++ b/launch_fedot.ipynb
-%% Cell type:markdown id:5e96ab50-b396-4e48-81cc-160058738c4e tags:
-
-# Setup
-
-%% Cell type:code id:5ac5964c-acfa-4912-92af-7946c6fa7398 tags:
-
-``` 
-import os
-
-import numpy as np
-import pandas as pd
-
-from pprint import pprint
-
-from zip import unzip_archive
-from fedot_util import run_example
-from llm_util import run_web_model_multicall
-from web_api import WebAssistant
-from data import Dataset
-import prompts
-```
-
-%% Cell type:markdown id:1f2ac346 tags:
-
-# Загрузка данных
-
-%% Cell type:code id:2a1cabc3 tags:
-
-``` 
-dataset_path = [
-    'titanic',
-    'credit-g'
-][0]
-dataset_path = os.sep.join(['datasets', dataset_path])
-
-# zip_filename = f"{dataset_path}.zip"
-# os.makedirs(dataset_path, exist_ok=True)
-# unzip_archive(zip_filename, dataset_path)
-```
-
-%% Cell type:code id:O3IlOI4PeVXT tags:
-
-``` 
-dataset = Dataset.load_from_path(dataset_path)
-dataset_description = dataset.get_description()
-dataset_metadata_description = dataset.get_metadata_description()
-
-print(dataset_metadata_description)
-```
-
-%%%% Output: stream
-
-    name: titanic passengers survival
-     description: passengers survived the Titanic shipwreck
-     train_split_name: train
-     splits: dict_keys(['train', 'test_X', 'test_y'])
-
-%% Cell type:markdown id:b667d022 tags:
-
-# Запрос к web-модели
-
-%% Cell type:code id:06ec2375 tags:
-
-``` 
-task_prompts = {
-    "categorical_columns": {
-        "system": dataset_description,
-        "task": prompts.categorical_definition_prompt,
-        "context": prompts.categorical_definition_context,
-    },
-    "target_column": {
-        "system": dataset_description,
-        "task": prompts.target_definition_prompt,
-        "context": None,
-    },
-    "task_type": {
-        "system": dataset_description,
-        "task": prompts.task_definition_prompt,
-        "context": None,
-    }
-}
-
-#Выбор модели
-
-model_type = ["8b", "70b"][0]
-url = "http://10.32.2.2:8672/v1/chat/completions"
-
-if model_type == "70b":
-    url = "http://10.32.15.21:6672/generate"
-
-model = WebAssistant(url)
-responses = run_web_model_multicall(
-    model, task_prompts
-)
-pprint(responses)
-```
-
-%%%% Output: error
-
-    ---------------------------------------------------------------------------
-    ImportError                               Traceback (most recent call last)
-Cell     In[6], line 1
-    ----> 1 from prompts import categorical_definition_prompt, target_definition_prompt, task_definition_prompts
-          2 task_prompts = {
-          3     "categorical_columns": categorical_definition_prompt,
-          4     "target_column": target_definition_prompt,
-          5     "task_type": task_definition_prompts,
-          6 }
-          8 url = "http://10.32.15.21:6672/generate"
-    ImportError: cannot import name 'task_definition_prompts' from 'prompts' (c:\Users\Stas\Documents\python\AutoML-LLM\prompts.py)
-
-%% Cell type:markdown id:8382401d tags:
-
-# Запуск фреймворка
-
-%% Cell type:code id:aaJnOwCvr_wR tags:
-
-``` 
-
-if dataset_path == 'titanic':
-    test_df = dataset_metadata["splits"]["test_X"].merge(dataset_metadata["splits"]["test_y"],
-                                                         on='PassengerId', how='inner')
-else:
-    test_df = dataset_metadata["splits"]["test"]
-
-train_df = dataset_metadata["splits"]["train"]
-
-prediction = run_example(train_df = train_df, test_df = test_df,
-                          dataset_metadata = dataset_metadata)
-```
-
-%%%% Output: stream
-
-    Generations:   0%|          | 0/10000 [02:16<?, ?gen/s]
-
-%%%% Output: stream
-
-    {'roc_auc': 0.941, 'accuracy': 0.828}
-
-%% Cell type:code id:2f61d093 tags:
-
-``` 
-prediction[:5]
-```
-
-%%%% Output: execute_result
-
-    array([[0],
-           [0],
-           [0],
-           [0],
-           [1]], dtype=int64)
-
-%% Cell type:code id:2e45d4f3 tags:
-
-``` 
-result_df = pd.DataFrame(prediction, columns=[dataset_metadata["target_column"]])
-
-result_df.to_csv(f"{dataset_path}/predictions.csv")
-```
--- a/llm_util.py
+++ b/llm_util.py
-import os
-import json
-from tenacity import retry, stop_after_attempt, wait_random_exponential, retry_if_exception_type
-from requests.exceptions import RequestException
-
-_MAX_RETRIES = 6
-
-
-def run_model_multicall(model, tokenizer, generation_config, prompts):
-    """Run all prompts on local model
-    """
-    
-    responses = {}
-    for task in prompts:
-        messages = [
-            {"role": "system", "content": prompts[task]["system"]},
-            {"role": "context", "content": prompts[task]["context"]},
-            {"role": "user", "content": prompts[task]["task"]},
-        ]
-        
-        input_ids = tokenizer.apply_chat_template(
-            messages,
-            add_generation_prompt=True,
-            return_tensors="pt"
-        ).to(model.device)
-        
-        outputs = model.generate(
-            input_ids,
-            **generation_config
-        )
-        response = outputs[0][input_ids.shape[-1]:]
-        responses[task] = tokenizer.decode(response, skip_special_tokens=True)
-
-    return responses
-
-@retry(
-    wait=wait_random_exponential(min=1, max=60),
-    stop=stop_after_attempt(_MAX_RETRIES),
-    retry=(retry_if_exception_type(RequestException) | retry_if_exception_type(RuntimeError)),
-    reraise=True
-)
-def run_web_model_multicall(model, prompts):
-    """Run all prompts on web model
-    """
-
-    responses = {}
-    for task in prompts:
-
-        model.set_sys_prompt(prompts[task]["system"])
-        model.add_context(prompts[task]["context"])
-        
-        response = model(prompts[task]["task"], as_json=True)
-        responses[task] = response
-
-    return responses
-
-def process_model_responses(responses):
-    responses["categorical_columns"] = responses["categorical_columns"].split("\n")
-    responses["task_type"] = responses["task_type"].lower()
-    return responses
-
-def save_model_responses(responses, path):
-    with open(os.sep.join([path, 'model_responses.json']), 'w') as json_file:
-        json.dump(responses, json_file)
\ No newline at end of file
--- a/requirements.txt
+++ b/requirements.txt
@@ -22,6 +22,7 @@ cloudpickle==3.0.0
 colorlog==6.8.2
 comm==0.2.2
 contourpy==1.2.1
+cramjam==2.8.3
 cycler==0.12.1
 debugpy==1.8.2
 decorator==5.1.1
@@ -34,18 +35,21 @@ ete3==3.1.3
 exceptiongroup==1.2.2
 executing==2.0.1
 fastjsonschema==2.20.0
+fastparquet==2024.5.0
 fedot==0.7.3.2
-filelock==3.15.4
+FLAML==2.1.2
 fonttools==4.53.1
 fqdn==1.5.1
 fsspec==2024.6.1
-func_timeout==4.3.5
+func-timeout==4.3.5
+functime==0.9.5
 future==1.0.0
 graphviz==0.20.3
+greenlet==3.0.3
 h11==0.14.0
+holidays==0.53
 httpcore==1.0.5
 httpx==0.27.0
-huggingface-hub==0.23.5
 hyperopt==0.2.7
 idna==3.7
 imageio==2.34.2
@@ -72,7 +76,7 @@ jupyter_client==8.6.2
 jupyter_core==5.7.2
 jupyter_server==2.14.2
 jupyter_server_terminals==0.5.3
-jupyterlab==4.2.3
+jupyterlab==4.2.4
 jupyterlab_pygments==0.3.0
 jupyterlab_server==2.27.3
 jupyterlab_widgets==3.0.11
@@ -85,7 +89,6 @@ MarkupSafe==2.1.5
 matplotlib==3.9.1
 matplotlib-inline==0.1.7
 mistune==3.0.2
-mpmath==1.3.0
 multiprocess==0.70.16
 nbclient==0.10.0
 nbconvert==7.16.4
@@ -108,13 +111,15 @@ pillow==10.4.0
 platformdirs==4.2.2
 plotly==5.22.0
 pluggy==1.5.0
+polars==1.2.0
 prometheus_client==0.20.0
 prompt_toolkit==3.0.47
 psutil==6.0.0
 ptyprocess==0.7.0
 pure-eval==0.2.2
 py4j==0.10.9.7
-pyaml==24.4.0
+pyaml==24.7.0
+pyarrow==17.0.0
 pycparser==2.22
 Pygments==2.18.0
 pyparsing==3.1.2
@@ -129,12 +134,10 @@ qtconsole==5.5.2
 QtPy==2.4.1
 readthedocs-sphinx-search==0.3.2
 referencing==0.35.1
-regex==2024.5.15
 requests==2.32.3
 rfc3339-validator==0.1.4
 rfc3986-validator==0.1.1
 rpds-py==0.19.0
-safetensors==0.4.3
 SALib==1.4.8
 scikit-learn==1.2.2
 scikit-optimize==0.10.2
@@ -146,7 +149,7 @@ sktime==0.16.1
 sniffio==1.3.1
 snowballstemmer==2.2.0
 soupsieve==2.5
-Sphinx==7.4.5
+Sphinx==7.4.6
 sphinx-rtd-theme==2.0.0
 sphinxcontrib-applehelp==1.0.8
 sphinxcontrib-details-directive==0.1.0
@@ -159,23 +162,21 @@ sphinxcontrib-serializinghtml==1.1.10
 SQLAlchemy==2.0.31
 stack-data==0.6.3
 statsmodels==0.14.2
-sympy==1.13.0
+tabulate==0.9.0
 tenacity==8.5.0
 terminado==0.18.1
 testfixtures==8.3.0
 thegolem==0.4.0
 threadpoolctl==3.5.0
 tinycss2==1.3.0
-tokenizers==0.19.1
 tomli==2.0.1
-torch==2.3.1
 tornado==6.4.1
 tqdm==4.65.2
 traitlets==5.14.3
-transformers==4.42.4
 types-python-dateutil==2.9.0.20240316
 typing==3.7.4.3
 typing_extensions==4.12.2
+tzdata==2024.1
 uri-template==1.3.0
 urllib3==2.2.2
 wcwidth==0.2.13