Unverified Commit 6bae5465 authored by Dmitry Gilemkhanov's avatar Dmitry Gilemkhanov Committed by GitHub
Browse files

List of tabular models update (#1379)

* work: added deprecated tag

* work: set 'boosting' tag for gbm

* docs: available tabular models

* small fix

* work: tests adaptation, closed old todo

* fix: test update

* work: update specific operators

* fix: test models repository update

* chore: operation repository update

* Revert "chore: operation repository update"

This reverts commit e81c8232.

* chore: test_result_reproducing params update

* chore: test_presets update

* docs: available models update

* fix: test_composer update

* docs: decription update

* docs: available models update

* fix: dask-ml for python3.8 version update

* fix: 2023.3.24 dask-ml version for py3.8

* Update requirements.txt

* Update requirements.txt

* Update requirements.txt

* fix: fsspec==2023.1.0

* fix: update dask, dask-ml reqs

* smallfix

* Update requirements.txt
Showing with 56 additions and 25 deletions
+56 -25
......@@ -258,7 +258,7 @@ class FedotBuilder:
Args:
available_operations: list of model names to use. Pick the names according to operations repository.
.. details:: Possible options:
.. details:: All options:
- ``adareg`` -> AdaBoost Regressor
- ``ar`` -> AutoRegression
......@@ -331,6 +331,25 @@ class FedotBuilder:
- ``exog_ts`` -> Exogeneus Transformation
- ``topological_features`` -> Topological features
.. details:: Tabular models available for composing:
- ``adareg`` -> AdaBoost Regressor
- ``catboost`` -> Catboost Classifier
- ``catboostreg`` -> Catboost Regressor
- ``knn`` -> K-nearest neighbors Classifier
- ``knnreg`` -> K-nearest neighbors Regressor
- ``lasso`` -> Lasso Linear Regressor
- ``lgbm`` -> Light Gradient Boosting Machine Classifier
- ``lgbmreg`` -> Light Gradient Boosting Machine Regressor
- ``linear`` -> Linear Regression Regressor
- ``logit`` -> Logistic Regression Classifier
- ``rf`` -> Random Forest Classifier
- ``rfr`` -> Random Forest Regressor
- ``ridge`` -> Ridge Linear Regressor
- ``treg`` -> Extra Trees Regressor
- ``xgboost`` -> Extreme Gradient Boosting Classifier
- ``xgbreg`` -> Extreme Gradient Boosting Regressor
max_depth: max depth of a pipeline. Defaults to ``6``.
max_arity: max arity of a pipeline nodes. Defaults to ``3``.
......
......@@ -111,13 +111,13 @@ def add_resample_mutation(pipeline: Pipeline, **kwargs):
def choose_new_model(boosting_model_candidates: List[str]) -> str:
""" Since 'linear' and 'dtreg' operations are suitable for solving the problem
""" Since 'ridge' and 'rfr' operations are suitable for solving the problem
and they are simpler than others, they are preferred """
if 'linear' in boosting_model_candidates:
new_model = 'linear'
elif 'dtreg' in boosting_model_candidates:
new_model = 'dtreg'
if 'ridge' in boosting_model_candidates:
new_model = 'ridge'
elif 'rfr' in boosting_model_candidates:
new_model = 'rfr'
else:
new_model = choice(boosting_model_candidates)
return new_model
......@@ -211,6 +211,7 @@
"meta": "sklearn_class",
"presets": ["fast_train"],
"tags": [
"deprecated",
"simple",
"bayesian",
"non_multi",
......@@ -235,6 +236,7 @@
"meta": "sklearn_class",
"presets": ["fast_train", "*tree"],
"tags": [
"deprecated",
"simple",
"tree",
"interpretable",
......@@ -245,6 +247,7 @@
"meta": "sklearn_regr",
"presets": ["fast_train", "ts", "*tree"],
"tags": [
"deprecated",
"tree",
"interpretable",
"non_linear"
......@@ -254,6 +257,7 @@
"meta": "sklearn_regr",
"presets": ["*tree"],
"tags": [
"deprecated",
"boosting",
"non_multi",
"non_linear",
......@@ -296,20 +300,20 @@
"meta": "custom_class",
"presets": ["fast_train"],
"tags": [
"discriminant", "linear", "correct_params", "non-default"
"deprecated", "discriminant", "linear", "correct_params", "non-default"
]
},
"lgbm": {
"meta": "boosting_class",
"tags": [
"tree", "non_linear"
"tree", "non_linear", "boosting"
]
},
"lgbmreg": {
"meta": "boosting_regr",
"presets": ["*tree"],
"tags": [
"tree", "non_multi", "non_linear"
"tree", "non_multi", "non_linear", "boosting"
]
},
"linear": {
......@@ -332,6 +336,7 @@
"mlp": {
"meta": "sklearn_class",
"tags": [
"deprecated",
"neural",
"non_linear"
]
......@@ -340,6 +345,7 @@
"meta": "sklearn_class",
"presets": ["fast_train"],
"tags": [
"deprecated",
"non-default",
"bayesian",
"non_multi",
......@@ -350,6 +356,7 @@
"meta": "custom_class",
"presets": ["fast_train"],
"tags": [
"deprecated",
"discriminant",
"quadratic",
"non_linear"
......@@ -390,7 +397,7 @@
"meta": "sklearn_regr",
"presets": ["fast_train", "ts"],
"tags": [
"non_multi", "non_linear"
"deprecated", "non_multi", "non_linear"
]
},
"stl_arima": {
......@@ -455,6 +462,7 @@
"svc": {
"meta": "custom_class",
"tags": [
"deprecated",
"no_prob",
"expensive",
"non_linear"
......@@ -463,6 +471,7 @@
"svr": {
"meta": "sklearn_regr",
"tags": [
"deprecated",
"non_multi",
"non_linear"
]
......@@ -480,14 +489,14 @@
"meta": "boosting_class",
"presets": ["*tree"],
"tags": [
"tree", "non_linear"
"tree", "non_linear", "boosting"
]
},
"xgboostreg": {
"meta": "boosting_regr",
"presets": ["*tree"],
"tags": [
"tree", "non_multi", "non_linear"
"tree", "non_multi", "non_linear", "boosting"
]
},
"cnn": {
......
......@@ -89,7 +89,7 @@ class OperationTypesRepository:
def __init__(self, operation_type: str = 'model'):
self.log = default_log(self)
self._tags_excluded_by_default = ['non-default', 'expensive']
self._tags_excluded_by_default = ['non-default', 'expensive', 'deprecated']
OperationTypesRepository.init_default_repositories()
self.operation_type = operation_type
......
......@@ -7,8 +7,9 @@ thegolem==0.4.1
# Data
numpy>=1.16.0, !=1.24.0
pandas>=1.3.0; python_version >='3.8'
dask-ml>=2024.4.4; python_version >= '3.10'
dask-ml>=2023.5.0; python_version < '3.10'
dask-ml==2024.4.4; python_version <= '3.10'
dask-ml>2024.4.4; python_version > '3.10'
dask>=2023.5.0; python_version < '3.10'
# Models and frameworks
anytree>=2.8.0
......@@ -38,6 +39,8 @@ joblib>=0.17.0
requests>=2.0
typing>=3.7.0
psutil>=5.9.2
fsspec>=2024; python_version > '3.8'
fsspec>=2024,<=2025.3.0; python_version <= '3.8'
# Tests
pytest>=6.2.0
......
......@@ -84,13 +84,13 @@ def test_the_formation_of_initial_assumption():
train_input, _, _ = get_dataset(task_type='classification')
train_input = DataPreprocessor().obligatory_prepare_for_fit(train_input)
available_operations = ['dt']
available_operations = ['rf']
initial_assumptions = AssumptionsBuilder \
.get(train_input) \
.from_operations(available_operations) \
.build()
res_init_assumption = Pipeline(PipelineNode('dt'))
res_init_assumption = Pipeline(PipelineNode('rf'))
assert initial_assumptions[0].root_node.descriptive_id == res_init_assumption.root_node.descriptive_id
......
......@@ -89,8 +89,8 @@ def test_api_tune_correct(task_type, metric_name, pred_model):
@pytest.mark.parametrize(
"task_type, metric_name, pred_model",
[
("classification", "f1", "dt"),
("regression", "rmse", "dtreg"),
("classification", "f1", "rf"),
("regression", "rmse", "rfr"),
],
)
def test_api_fit_atomized_model(task_type, metric_name, pred_model):
......
......@@ -30,8 +30,8 @@ def get_fitted_fedot(forecast_length, train_data, **kwargs):
'task_params': TsForecastingParams(forecast_length=forecast_length),
'seed': 1,
'timeout': None,
'pop_size': 5,
'num_of_generations': 5,
'pop_size': 4,
'num_of_generations': 4,
'with_tuning': False}
params.update(kwargs)
fedot = Fedot(**params)
......
......@@ -100,7 +100,7 @@ def test_api_fit_predict_with_pseudo_large_dataset_with_label_correct():
model.predict(features=data)
# there should be only tree like models + data operations
assert len(model.params.get('available_operations')) == 5
assert len(model.params.get('available_operations')) == 4
assert 'logit' not in model.params.get('available_operations')
......
......@@ -12,8 +12,8 @@ from test.data.datasets import data_with_binary_features_and_categorical_target
minimal_sets_for_fast_train_by_task = {
'classification': {'dt', 'logit', 'knn'},
'regression': {'dtreg', 'lasso', 'ridge', 'linear'},
'classification': {'rf', 'logit', 'knn'},
'regression': {'rfr', 'lasso', 'ridge', 'linear'},
'ts_forecasting': {'ar', 'adareg', 'scaling', 'lasso'},
'clustering': {'kmeans'}
}
......
......@@ -74,7 +74,7 @@ def get_simple_linear_boosting_pipeline() -> Pipeline:
node_pf = PipelineNode('poly_features', nodes_from=[node_scaling])
node_rf = PipelineNode('rf', nodes_from=[node_pf])
node_decompose = PipelineNode('class_decompose', nodes_from=[node_pf, node_rf])
node_linear = PipelineNode('linear', nodes_from=[node_decompose])
node_linear = PipelineNode('ridge', nodes_from=[node_decompose])
final_node = PipelineNode('logit', nodes_from=[node_linear, node_rf])
pipeline = Pipeline(final_node)
return pipeline
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment