List of tabular models update (#1379)

* work: added deprecated tag * work: set 'boosting' tag for gbm * docs: available tabular models * small fix * work: tests adaptation, closed old todo * fix: test update * work: update specific operators * fix: test models repository update * chore: operation repository update * Revert "chore: operation repository update" This reverts commit e81c8232. * chore: test_result_reproducing params update * chore: test_presets update * docs: available models update * fix: test_composer update * docs: decription update * docs: available models update * fix: dask-ml for python3.8 version update * fix: 2023.3.24 dask-ml version for py3.8 * Update requirements.txt * Update requirements.txt * Update requirements.txt * fix: fsspec==2023.1.0 * fix: update dask, dask-ml reqs * smallfix * Update requirements.txt

List of tabular models update (#1379)
* work: added deprecated tag * work: set 'boosting' tag for gbm * docs: available tabular models * small fix * work: tests adaptation, closed old todo * fix: test update * work: update specific operators * fix: test models repository update * chore: operation repository update * Revert "chore: operation repository update" This reverts commit e81c8232. * chore: test_result_reproducing params update * chore: test_presets update * docs: available models update * fix: test_composer update * docs: decription update * docs: available models update * fix: dask-ml for python3.8 version update * fix: 2023.3.24 dask-ml version for py3.8 * Update requirements.txt * Update requirements.txt * Update requirements.txt * fix: fsspec==2023.1.0 * fix: update dask, dask-ml reqs * smallfix * Update requirements.txt
6bae5465 · Dmitry Gilemkhanov · GitHub · eae485e1 · 6bae5465 · 6bae5465
Unverified Commit 6bae5465 authored 5 days ago by Dmitry Gilemkhanov Committed by GitHub 5 days ago
Hide whitespace changes
Inline Side-by-side

Showing

with 56 additions and 25 deletions
+56 -25
--- a/fedot/api/builder.py
+++ b/fedot/api/builder.py
@@ -258,7 +258,7 @@ class FedotBuilder:
        Args:
            available_operations: list of model names to use. Pick the names according to operations repository.

-                .. details:: Possible options:
+                .. details:: All options:

                    - ``adareg`` -> AdaBoost Regressor
                    - ``ar`` -> AutoRegression
@@ -331,6 +331,25 @@ class FedotBuilder:
                    - ``exog_ts`` -> Exogeneus Transformation
                    - ``topological_features`` -> Topological features

+                .. details:: Tabular models available for composing:
+
+                    - ``adareg`` -> AdaBoost Regressor
+                    - ``catboost`` -> Catboost Classifier
+                    - ``catboostreg`` -> Catboost Regressor
+                    - ``knn`` -> K-nearest neighbors Classifier
+                    - ``knnreg`` -> K-nearest neighbors Regressor
+                    - ``lasso`` -> Lasso Linear Regressor
+                    - ``lgbm`` -> Light Gradient Boosting Machine Classifier
+                    - ``lgbmreg`` -> Light Gradient Boosting Machine Regressor
+                    - ``linear`` -> Linear Regression Regressor
+                    - ``logit`` -> Logistic Regression Classifier
+                    - ``rf`` -> Random Forest Classifier
+                    - ``rfr`` -> Random Forest Regressor
+                    - ``ridge`` -> Ridge Linear Regressor
+                    - ``treg`` -> Extra Trees Regressor
+                    - ``xgboost`` -> Extreme Gradient Boosting Classifier
+                    - ``xgbreg`` -> Extreme Gradient Boosting Regressor
+
            max_depth: max depth of a pipeline. Defaults to ``6``.

            max_arity: max arity of a pipeline nodes. Defaults to ``3``.

--- a/fedot/core/composer/gp_composer/specific_operators.py
+++ b/fedot/core/composer/gp_composer/specific_operators.py
@@ -111,13 +111,13 @@ def add_resample_mutation(pipeline: Pipeline, **kwargs):


 def choose_new_model(boosting_model_candidates: List[str]) -> str:
-    """ Since 'linear' and 'dtreg' operations are suitable for solving the problem
+    """ Since 'ridge' and 'rfr' operations are suitable for solving the problem
    and they are simpler than others, they are preferred """

-    if 'linear' in boosting_model_candidates:
-        new_model = 'linear'
-    elif 'dtreg' in boosting_model_candidates:
-        new_model = 'dtreg'
+    if 'ridge' in boosting_model_candidates:
+        new_model = 'ridge'
+    elif 'rfr' in boosting_model_candidates:
+        new_model = 'rfr'
    else:
        new_model = choice(boosting_model_candidates)
    return new_model
--- a/fedot/core/repository/data/model_repository.json
+++ b/fedot/core/repository/data/model_repository.json
@@ -211,6 +211,7 @@
      "meta": "sklearn_class",
      "presets": ["fast_train"],
      "tags": [
+        "deprecated",
        "simple",
        "bayesian",
        "non_multi",
@@ -235,6 +236,7 @@
      "meta": "sklearn_class",
      "presets": ["fast_train", "*tree"],
      "tags": [
+        "deprecated",
        "simple",
        "tree",
        "interpretable",
@@ -245,6 +247,7 @@
      "meta": "sklearn_regr",
      "presets": ["fast_train", "ts", "*tree"],
      "tags": [
+        "deprecated",
        "tree",
        "interpretable",
        "non_linear"
@@ -254,6 +257,7 @@
      "meta": "sklearn_regr",
      "presets": ["*tree"],
      "tags": [
+        "deprecated",
        "boosting",
        "non_multi",
        "non_linear",
@@ -296,20 +300,20 @@
      "meta": "custom_class",
      "presets": ["fast_train"],
      "tags": [
-        "discriminant", "linear", "correct_params", "non-default"
+        "deprecated", "discriminant", "linear", "correct_params", "non-default"
      ]
    },
    "lgbm": {
      "meta": "boosting_class",
      "tags": [
-        "tree", "non_linear"
+        "tree", "non_linear", "boosting"
      ]
    },
    "lgbmreg": {
      "meta": "boosting_regr",
      "presets": ["*tree"],
      "tags": [
-        "tree", "non_multi", "non_linear"
+        "tree", "non_multi", "non_linear", "boosting"
      ]
    },
    "linear": {
@@ -332,6 +336,7 @@
    "mlp": {
      "meta": "sklearn_class",
      "tags": [
+        "deprecated",
        "neural",
        "non_linear"
      ]
@@ -340,6 +345,7 @@
      "meta": "sklearn_class",
      "presets": ["fast_train"],
      "tags": [
+        "deprecated",
        "non-default",
        "bayesian",
        "non_multi",
@@ -350,6 +356,7 @@
      "meta": "custom_class",
      "presets": ["fast_train"],
      "tags": [
+        "deprecated",
        "discriminant",
        "quadratic",
        "non_linear"
@@ -390,7 +397,7 @@
      "meta": "sklearn_regr",
      "presets": ["fast_train", "ts"],
      "tags": [
-        "non_multi", "non_linear"
+        "deprecated", "non_multi", "non_linear"
      ]
    },
    "stl_arima": {
@@ -455,6 +462,7 @@
    "svc": {
      "meta": "custom_class",
      "tags": [
+        "deprecated",
        "no_prob",
        "expensive",
        "non_linear"
@@ -463,6 +471,7 @@
    "svr": {
      "meta": "sklearn_regr",
      "tags": [
+        "deprecated",
        "non_multi",
        "non_linear"
      ]
@@ -480,14 +489,14 @@
      "meta": "boosting_class",
      "presets": ["*tree"],
      "tags": [
-        "tree", "non_linear"
+        "tree", "non_linear", "boosting"
      ]
    },
    "xgboostreg": {
      "meta": "boosting_regr",
      "presets": ["*tree"],
      "tags": [
-        "tree", "non_multi", "non_linear"
+        "tree", "non_multi", "non_linear", "boosting"
      ]
    },
    "cnn": {

--- a/fedot/core/repository/operation_types_repository.py
+++ b/fedot/core/repository/operation_types_repository.py
@@ -89,7 +89,7 @@ class OperationTypesRepository:
    def __init__(self, operation_type: str = 'model'):
        self.log = default_log(self)

-        self._tags_excluded_by_default = ['non-default', 'expensive']
+        self._tags_excluded_by_default = ['non-default', 'expensive', 'deprecated']
        OperationTypesRepository.init_default_repositories()

        self.operation_type = operation_type

--- a/requirements.txt
+++ b/requirements.txt
@@ -7,8 +7,9 @@ thegolem==0.4.1
 # Data
 numpy>=1.16.0, !=1.24.0
 pandas>=1.3.0; python_version >='3.8'
-dask-ml>=2024.4.4; python_version >= '3.10'
-dask-ml>=2023.5.0; python_version < '3.10'
+dask-ml==2024.4.4; python_version <= '3.10'
+dask-ml>2024.4.4; python_version > '3.10'
+dask>=2023.5.0; python_version < '3.10'

 # Models and frameworks
 anytree>=2.8.0
@@ -38,6 +39,8 @@ joblib>=0.17.0
 requests>=2.0
 typing>=3.7.0
 psutil>=5.9.2
+fsspec>=2024; python_version > '3.8'
+fsspec>=2024,<=2025.3.0; python_version <= '3.8'

 # Tests
 pytest>=6.2.0

--- a/test/integration/api/test_api_utils.py
+++ b/test/integration/api/test_api_utils.py
@@ -84,13 +84,13 @@ def test_the_formation_of_initial_assumption():

    train_input, _, _ = get_dataset(task_type='classification')
    train_input = DataPreprocessor().obligatory_prepare_for_fit(train_input)
-    available_operations = ['dt']
+    available_operations = ['rf']

    initial_assumptions = AssumptionsBuilder \
        .get(train_input) \
        .from_operations(available_operations) \
        .build()
-    res_init_assumption = Pipeline(PipelineNode('dt'))
+    res_init_assumption = Pipeline(PipelineNode('rf'))
    assert initial_assumptions[0].root_node.descriptive_id == res_init_assumption.root_node.descriptive_id



--- a/test/integration/api/test_main_api.py
+++ b/test/integration/api/test_main_api.py
@@ -89,8 +89,8 @@ def test_api_tune_correct(task_type, metric_name, pred_model):
 @pytest.mark.parametrize(
    "task_type, metric_name, pred_model",
    [
-        ("classification", "f1", "dt"),
-        ("regression", "rmse", "dtreg"),
+        ("classification", "f1", "rf"),
+        ("regression", "rmse", "rfr"),
    ],
 )
 def test_api_fit_atomized_model(task_type, metric_name, pred_model):

--- a/test/integration/real_applications/test_model_result_reproducing.py
+++ b/test/integration/real_applications/test_model_result_reproducing.py
@@ -30,8 +30,8 @@ def get_fitted_fedot(forecast_length, train_data, **kwargs):
              'task_params': TsForecastingParams(forecast_length=forecast_length),
              'seed': 1,
              'timeout': None,
-              'pop_size': 5,
-              'num_of_generations': 5,
+              'pop_size': 4,
+              'num_of_generations': 4,
              'with_tuning': False}
    params.update(kwargs)
    fedot = Fedot(**params)

--- a/test/unit/api/test_api_safety.py
+++ b/test/unit/api/test_api_safety.py
@@ -100,7 +100,7 @@ def test_api_fit_predict_with_pseudo_large_dataset_with_label_correct():
    model.predict(features=data)

    # there should be only tree like models + data operations
-    assert len(model.params.get('available_operations')) == 5
+    assert len(model.params.get('available_operations')) == 4
    assert 'logit' not in model.params.get('available_operations')



--- a/test/unit/api/test_presets.py
+++ b/test/unit/api/test_presets.py
@@ -12,8 +12,8 @@ from test.data.datasets import data_with_binary_features_and_categorical_target


 minimal_sets_for_fast_train_by_task = {
-    'classification': {'dt', 'logit', 'knn'},
-    'regression': {'dtreg', 'lasso', 'ridge', 'linear'},
+    'classification': {'rf', 'logit', 'knn'},
+    'regression': {'rfr', 'lasso', 'ridge', 'linear'},
    'ts_forecasting': {'ar', 'adareg', 'scaling', 'lasso'},
    'clustering': {'kmeans'}
 }

--- a/test/unit/optimizer/gp_operators/test_mutation.py
+++ b/test/unit/optimizer/gp_operators/test_mutation.py
@@ -74,7 +74,7 @@ def get_simple_linear_boosting_pipeline() -> Pipeline:
    node_pf = PipelineNode('poly_features', nodes_from=[node_scaling])
    node_rf = PipelineNode('rf', nodes_from=[node_pf])
    node_decompose = PipelineNode('class_decompose', nodes_from=[node_pf, node_rf])
-    node_linear = PipelineNode('linear', nodes_from=[node_decompose])
+    node_linear = PipelineNode('ridge', nodes_from=[node_decompose])
    final_node = PipelineNode('logit', nodes_from=[node_linear, node_rf])
    pipeline = Pipeline(final_node)
    return pipeline