├── .editorconfig
├── .github
    ├── CONTRIBUTING.md
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── config.yml
    │   ├── feature_request.md
    │   └── question.md
    └── workflows
    │   ├── CI.yml
    │   ├── docs.yml
    │   ├── mirror.yml
    │   ├── publish_pypi.yml
    │   ├── tests_macos.yml
    │   ├── tests_ubuntu.yml
    │   └── tests_windows.yml
├── .gitignore
├── .gitlab
    ├── .gitlab-ci.yml
    └── release.yml
├── .jupyter
    └── jupyter_notebook_config.py
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── LICENSE
├── README.md
├── _config.yml
├── check_docs.py
├── docs
    ├── Makefile
    ├── _static
    │   └── style.css
    ├── _templates
    │   ├── autosummary
    │   │   ├── class.rst
    │   │   └── module.rst
    │   ├── classtemplate.rst
    │   └── functiontemplate.rst
    ├── conf.py
    ├── index.rst
    ├── mock_docs.py
    ├── pages
    │   ├── Installation.rst
    │   ├── Kaggle_Kernels.rst
    │   ├── Others.rst
    │   ├── Python-API.rst
    │   ├── Tutorials.rst
    │   ├── modules
    │   │   ├── addons.rst
    │   │   ├── automl.rst
    │   │   ├── dataset.rst
    │   │   ├── image.rst
    │   │   ├── ml_algo.rst
    │   │   ├── ml_algo.torch_based.rst
    │   │   ├── ml_algo.tuning.rst
    │   │   ├── pipelines.features.rst
    │   │   ├── pipelines.ml.rst
    │   │   ├── pipelines.rst
    │   │   ├── pipelines.selection.rst
    │   │   ├── reader.rst
    │   │   ├── report.rst
    │   │   ├── tasks.losses.rst
    │   │   ├── tasks.rst
    │   │   ├── text.rst
    │   │   ├── transformers.rst
    │   │   ├── utils.rst
    │   │   └── validation.rst
    │   └── tutorials
    │   │   ├── Tutorial_10_relational_data_with_star_scheme.nblink
    │   │   ├── Tutorial_11_time_series.nblink
    │   │   ├── Tutorial_12_AA_Test.nblink
    │   │   ├── Tutorial_13_AA_Test_multigroup_split.nblink
    │   │   ├── Tutorial_14_AB_Test.nblink
    │   │   ├── Tutorial_15_Matching.nblink
    │   │   ├── Tutorial_16_Matching_without_replacement.nblink
    │   │   ├── Tutorial_17_Modeling_Limit_Distribution.nblink
    │   │   ├── Tutorial_18_Test_Limit_Distribution.nblink
    │   │   ├── Tutorial_1_basics.nblink
    │   │   ├── Tutorial_2_WhiteBox_AutoWoE.nblink
    │   │   ├── Tutorial_3_sql_data_source.nblink
    │   │   ├── Tutorial_4_NLP_Interpretation.nblink
    │   │   ├── Tutorial_5_uplift.nblink
    │   │   ├── Tutorial_6_custom_pipeline.nblink
    │   │   ├── Tutorial_7_ICE_and_PDP_interpretation.nblink
    │   │   ├── Tutorial_8_CV_preset.nblink
    │   │   └── Tutorial_9_neural_networks.nblink
    └── requirements.txt
├── examples
    ├── README.md
    ├── data
    │   ├── ai92_value_77.csv
    │   ├── avito1k_train.csv
    │   ├── jobs_train.csv
    │   ├── meal_delivery_company
    │   │   ├── fulfilment_center_info.csv
    │   │   ├── meal_info.csv
    │   │   └── relational_main.csv.zip
    │   ├── sampled_app_train.csv
    │   └── ts_data.csv
    ├── demo0.py
    ├── demo1.py
    ├── demo10.py
    ├── demo11.py
    ├── demo12.py
    ├── demo13.py
    ├── demo14.py
    ├── demo15.py
    ├── demo2.py
    ├── demo3.py
    ├── demo4.py
    ├── demo5.py
    ├── demo6.py
    ├── demo7.py
    ├── demo8.py
    ├── demo9.py
    ├── optimization
    │   ├── conditional_parameters.py
    │   ├── custom_search_space.py
    │   └── sequential_parameter_search.py
    ├── simple_tabular_classification.py
    └── tutorials
    │   ├── Tutorial_10_relational_data_with_star_scheme.ipynb
    │   ├── Tutorial_11_time_series.ipynb
    │   ├── Tutorial_12_AA_Test.ipynb
    │   ├── Tutorial_13_AA_Test_multigroup_split.ipynb
    │   ├── Tutorial_14_AB_Test.ipynb
    │   ├── Tutorial_15_Matching.ipynb
    │   ├── Tutorial_16_Matching_without_replacement.ipynb
    │   ├── Tutorial_17_Modeling_Limit_Distribution.ipynb
    │   ├── Tutorial_18_Test_Limit_Distribution.ipynb
    │   ├── Tutorial_1_basics.ipynb
    │   ├── Tutorial_2_WhiteBox_AutoWoE.ipynb
    │   ├── Tutorial_3_sql_data_source.ipynb
    │   ├── Tutorial_4_NLP_Interpretation.ipynb
    │   ├── Tutorial_5_uplift.ipynb
    │   ├── Tutorial_6_custom_pipeline.ipynb
    │   ├── Tutorial_7_ICE_and_PDP_interpretation.ipynb
    │   ├── Tutorial_8_CV_preset.ipynb
    │   └── Tutorial_9_neural_networks.ipynb
├── imgs
    ├── GENERALL2X2.jpg
    ├── LightAutoML_logo_big.png
    ├── LightAutoML_logo_small.png
    ├── Star_scheme_tables.png
    ├── TabularAutoML_model_descr.png
    ├── TabularUtilizedAutoML_model_descr.png
    ├── autoint.png
    ├── denselight.png
    ├── densenet.png
    ├── fttransformer.png
    ├── lightautoml_icon_color.png
    ├── lightautoml_logo_color.png
    ├── lime.jpg
    ├── node.png
    ├── resnet.png
    ├── swa.png
    ├── tutorial_11_case_problem_statement.png
    ├── tutorial_11_general_problem_statement.png
    ├── tutorial_11_history_step_params.png
    ├── tutorial_11_transformers_params.png
    ├── tutorial_1_initial_report.png
    ├── tutorial_1_laml_big.png
    ├── tutorial_1_ml_pipeline.png
    ├── tutorial_1_pipeline.png
    ├── tutorial_1_unfolded_report.png
    ├── tutorial_2_initial_report.png
    ├── tutorial_2_pipeline.png
    ├── tutorial_2_unfolded_report.png
    ├── tutorial_3_initial_report.png
    ├── tutorial_3_unfolded_report.png
    ├── tutorial_blackbox_pipeline.png
    ├── tutorial_whitebox_report_1.png
    ├── tutorial_whitebox_report_2.png
    ├── tutorial_whitebox_report_3.png
    └── tutorial_whitebox_report_4.png
├── lightautoml
    ├── __init__.py
    ├── addons
    │   ├── __init__.py
    │   ├── autots
    │   │   └── base.py
    │   ├── hypex
    │   │   └── __init__.py
    │   ├── interpretation
    │   │   ├── __init__.py
    │   │   ├── data_process.py
    │   │   ├── l2x.py
    │   │   ├── l2x_model.py
    │   │   ├── lime.py
    │   │   └── utils.py
    │   ├── tabular_interpretation
    │   │   ├── __init__.py
    │   │   └── sswarm.py
    │   ├── uplift
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── metalearners.py
    │   │   ├── metrics.py
    │   │   └── utils.py
    │   └── utilization
    │   │   ├── __init__.py
    │   │   └── utilization.py
    ├── automl
    │   ├── __init__.py
    │   ├── base.py
    │   ├── blend.py
    │   └── presets
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── image_config.yml
    │   │   ├── image_presets.py
    │   │   ├── tabular_config.yml
    │   │   ├── tabular_configs
    │   │       ├── conf_0_sel_type_0.yml
    │   │       ├── conf_1_sel_type_1.yml
    │   │       ├── conf_2_select_mode_1_no_typ.yml
    │   │       ├── conf_3_sel_type_1_no_inter_lgbm.yml
    │   │       ├── conf_4_sel_type_0_no_int.yml
    │   │       ├── conf_5_sel_type_1_tuning_full.yml
    │   │       └── conf_6_sel_type_1_tuning_full_no_int_lgbm.yml
    │   │   ├── tabular_presets.py
    │   │   ├── text_config.yml
    │   │   ├── text_presets.py
    │   │   ├── time_series_config.yml
    │   │   ├── utils.py
    │   │   ├── whitebox_config.yml
    │   │   └── whitebox_presets.py
    ├── dataset
    │   ├── __init__.py
    │   ├── base.py
    │   ├── np_pd_dataset.py
    │   ├── roles.py
    │   ├── seq_np_pd_dataset.py
    │   └── utils.py
    ├── image
    │   ├── __init__.py
    │   ├── image.py
    │   └── utils.py
    ├── ml_algo
    │   ├── __init__.py
    │   ├── base.py
    │   ├── boost_cb.py
    │   ├── boost_lgbm.py
    │   ├── boost_xgb.py
    │   ├── dl_model.py
    │   ├── linear_sklearn.py
    │   ├── random_forest.py
    │   ├── tabnet
    │   │   └── utils.py
    │   ├── torch_based
    │   │   ├── __init__.py
    │   │   ├── autoint
    │   │   │   ├── autoint_utils.py
    │   │   │   └── ghost_norm.py
    │   │   ├── fttransformer
    │   │   │   └── fttransformer_utils.py
    │   │   ├── linear_model.py
    │   │   ├── nn_models.py
    │   │   └── node_nn_model.py
    │   ├── tuning
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── hyperopt.py
    │   │   └── optuna.py
    │   ├── utils.py
    │   └── whitebox.py
    ├── pipelines
    │   ├── __init__.py
    │   ├── features
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── generator_pipeline.py
    │   │   ├── image_pipeline.py
    │   │   ├── lgb_pipeline.py
    │   │   ├── linear_pipeline.py
    │   │   ├── text_pipeline.py
    │   │   ├── torch_pipeline.py
    │   │   └── wb_pipeline.py
    │   ├── ml
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── nested_ml_pipe.py
    │   │   └── whitebox_ml_pipe.py
    │   ├── selection
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── importance_based.py
    │   │   ├── linear_selector.py
    │   │   └── permutation_importance_based.py
    │   └── utils.py
    ├── reader
    │   ├── __init__.py
    │   ├── base.py
    │   ├── guess_roles.py
    │   ├── seq.py
    │   ├── tabular_batch_generator.py
    │   └── utils.py
    ├── report
    │   ├── __init__.py
    │   ├── lama_report_templates
    │   │   ├── binary_inference_section.html
    │   │   ├── feature_importance_section.html
    │   │   ├── feature_importance_utillized_section.html
    │   │   ├── interpretation_section.html
    │   │   ├── interpretation_subsection.html
    │   │   ├── lama_base_template.html
    │   │   ├── model_section.html
    │   │   ├── model_section_utilized.html
    │   │   ├── multiclass_inference_section.html
    │   │   ├── nlp_section.html
    │   │   ├── nlp_subsection.html
    │   │   ├── preset_section.html
    │   │   ├── reg_inference_section.html
    │   │   ├── results_section.html
    │   │   ├── train_set_section.html
    │   │   ├── train_set_section_utilized.html
    │   │   ├── uplift_section.html
    │   │   ├── uplift_subsection.html
    │   │   ├── utilized_data_subsections.html
    │   │   └── whitebox_section.html
    │   └── report_deco.py
    ├── tasks
    │   ├── __init__.py
    │   ├── base.py
    │   ├── common_metric.py
    │   ├── losses
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── cb.py
    │   │   ├── cb_custom.py
    │   │   ├── lgb.py
    │   │   ├── lgb_custom.py
    │   │   ├── sklearn.py
    │   │   ├── torch.py
    │   │   └── xgb.py
    │   └── utils.py
    ├── text
    │   ├── __init__.py
    │   ├── dl_transformers.py
    │   ├── dp_utils.py
    │   ├── embed.py
    │   ├── embed_dataset.py
    │   ├── nn_model.py
    │   ├── tokenizer.py
    │   ├── trainer.py
    │   ├── utils.py
    │   └── weighted_average_transformer.py
    ├── transformers
    │   ├── __init__.py
    │   ├── base.py
    │   ├── categorical.py
    │   ├── composite.py
    │   ├── datetime.py
    │   ├── decomposition.py
    │   ├── generator.py
    │   ├── groupby.py
    │   ├── image.py
    │   ├── numeric.py
    │   ├── seq.py
    │   ├── text.py
    │   └── utils.py
    ├── utils
    │   ├── __init__.py
    │   ├── installation.py
    │   ├── logging.py
    │   └── timer.py
    └── validation
    │   ├── __init__.py
    │   ├── base.py
    │   ├── np_iterators.py
    │   └── utils.py
├── pyproject.toml
├── scripts
    ├── README.md
    ├── exp_branch_push.py
    ├── experiments
    │   ├── run.py
    │   ├── run_tabular.py
    │   └── utils.py
    ├── poetry_fix.py
    └── run_tutorials.py
├── setup.cfg
├── setup.py
├── tests
    ├── __init__.py
    ├── conftest.py
    ├── integration
    │   ├── integration_utils.py
    │   ├── test_custom_2_level_stacking.py
    │   ├── test_demo0.py
    │   ├── test_demo1.py
    │   ├── test_demo10.py
    │   ├── test_demo11.py
    │   ├── test_demo12.py
    │   ├── test_demo13.py
    │   ├── test_demo15.py
    │   ├── test_demo2.py
    │   ├── test_demo3.py
    │   ├── test_demo4.py
    │   ├── test_demo5.py
    │   ├── test_demo6.py
    │   ├── test_demo7.py
    │   └── test_demo8.py
    └── unit
    │   ├── __init__.py
    │   ├── test_addons
    │       └── __init__.py
    │   ├── test_automl
    │       ├── __init__.py
    │       └── test_presets
    │       │   ├── __init__.py
    │       │   ├── presets_utils.py
    │       │   ├── test_tabularautoml.py
    │       │   ├── test_tabularautoml_nn.py
    │       │   ├── test_tabularautoml_xgb.py
    │       │   ├── test_tabularnlpautoml.py
    │       │   ├── test_tabularutilizedautoml.py
    │       │   ├── test_uplift.py
    │       │   └── test_whiteboxpreset.py
    │   ├── test_dataset
    │       └── __init__.py
    │   ├── test_image
    │       └── __init__.py
    │   ├── test_ml_algo
    │       ├── __init__.py
    │       └── test_optimization
    │       │   └── optuna
    │       │       └── test_optuna_tuner.py
    │   ├── test_pipelines
    │       └── __init__.py
    │   ├── test_reader
    │       └── __init__.py
    │   ├── test_report
    │       └── __init__.py
    │   ├── test_tasks
    │       └── __init__.py
    │   ├── test_text
    │       └── __init__.py
    │   ├── test_transformers
    │       ├── __init__.py
    │       └── test_numeric.py
    │   ├── test_utils
    │       ├── __init__.py
    │       └── test_logging.py
    │   └── test_validation
    │       └── __init__.py
└── tox.ini


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug
 3 | about: Create a bug report
 4 | title: ''
 5 | labels: bug
 6 | assignees:
 7 | 
 8 | ---
 9 | 
10 | ## 🐛 Bug
11 | 
12 | <!-- A clear and concise description of the bug. -->
13 | 
14 | ## Environment
15 | 1. lightautoml version:
16 | 2. python version:
17 | 3. OS:
18 | 4. pip freeze file:
19 | 
20 | ### To Reproduce
21 | Steps to reproduce the behavior:
22 | 1. first step
23 | 2. second step
24 | 
25 | <!-- If you have a code sample, error messages, stack traces, please provide it here as well -->
26 | 
27 | ### Expected behavior
28 | 
29 | <!-- A clear and concise description of what you expected to happen. -->
30 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
 1 | name: Bug Report
 2 | description: File a bug report
 3 | title: "[Bug]: "
 4 | labels: ["bug", "triage"]
 5 | assignees:
 6 |   - octocat
 7 | body:
 8 |   - type: markdown
 9 |     attributes:
10 |       value: |
11 |         Thanks for taking the time to fill out this bug report!
12 |   - type: input
13 |     id: contact
14 |     attributes:
15 |       label: Contact Details
16 |       description: How can we get in touch with you if we need more info?
17 |       placeholder: ex. email@example.com
18 |     validations:
19 |       required: false
20 |   - type: textarea
21 |     id: what-happened
22 |     attributes:
23 |       label: What happened?
24 |       description: Also tell us, what did you expect to happen?
25 |       placeholder: Tell us what you see!
26 |       value: "A bug happened!"
27 |     validations:
28 |       required: true
29 |   - type: dropdown
30 |     id: version
31 |     attributes:
32 |       label: Version
33 |       description: What version of our software are you running?
34 |       options:
35 |         - 1.0.2 (Default)
36 |         - 1.0.3 (Edge)
37 |     validations:
38 |       required: true
39 |   - type: dropdown
40 |     id: browsers
41 |     attributes:
42 |       label: What browsers are you seeing the problem on?
43 |       multiple: true
44 |       options:
45 |         - Firefox
46 |         - Chrome
47 |         - Safari
48 |         - Microsoft Edge
49 |   - type: textarea
50 |     id: logs
51 |     attributes:
52 |       label: Relevant log output
53 |       description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
54 |       render: shell
55 |   - type: checkboxes
56 |     id: terms
57 |     attributes:
58 |       label: Code of Conduct
59 |       description: By submitting this issue, you agree to follow our [Code of Conduct](https://example.com)
60 |       options:
61 |         - label: I agree to follow this project's Code of Conduct
62 |           required: true
63 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest a feature to implement
 4 | title: ''
 5 | labels: enhancement
 6 | assignees:
 7 | 
 8 | ---
 9 | 
10 | ## 🚀 Feature Request
11 | <!-- A clear and concise description of the feature proposal. -->
12 | 
13 | 
14 | ### Motivation
15 | <!-- Please outline the motivation for the proposal. If this is related to another GitHub issue, please link here too -->
16 | 
17 | 
18 | ### Proposal
19 | <!-- A clear and concise description of what you want to happen. -->
20 | 
21 | 
22 | ### Alternatives
23 | <!-- A clear and concise description of any alternative solutions or features you've considered. -->
24 | 
25 | 
26 | ### Additional context
27 | <!-- Add any other context or screenshots about the feature request here. -->
28 | 
29 | 
30 | ### Checklist
31 | - [ ] feature proposal description
32 | - [ ] motivation
33 | - [ ] additional context / proposal alternatives review
34 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: How to question
 3 | about: Asking how-to questions
 4 | title: ''
 5 | labels: help wanted, question
 6 | assignees:
 7 | ---
 8 | 
 9 | ## ❓ Questions and Help
10 | 
11 | ### Before asking:
12 | 1. search the issues.
13 | 2. search the docs.
14 | 
15 | 
16 | #### What is your question?
17 | 
18 | 
19 | #### Code
20 | <!-- Please paste a code snippet if your question requires it! -->
21 | 
22 | 
23 | #### What have you tried?
24 | 
25 | 
26 | ### Additional context
27 | <!-- Add any other context about the problem here. -->
28 | 


--------------------------------------------------------------------------------
/.github/workflows/CI.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   # Manually triggerable in github
 5 |   workflow_dispatch:
 6 | 
 7 |   push:
 8 |     paths-ignore:
 9 |       - "docs/**"
10 |       - "*.md"
11 |       - ".github/workflows/mirror.yml"
12 |       - ".gitlab/.gitlab-ci.yml"
13 | 
14 |   pull_request:
15 |     paths-ignore:
16 |       - "docs/**"
17 |       - "*.md"
18 |       - ".github/workflows/mirror.yml"
19 |       - ".gitlab/.gitlab-ci.yml"
20 | 
21 | jobs:
22 |   pre-commit:
23 |     runs-on: ubuntu-latest
24 |     steps:
25 |       - name: checkout
26 |         uses: actions/checkout@v4
27 | 
28 |       - uses: actions/setup-python@v4
29 |         with:
30 |           python-version: "3.9"
31 | 
32 |       - name: pre-commit
33 |         uses: pre-commit/action@v2.0.3
34 | 
35 |   linux-py39-tests:
36 |     needs: pre-commit
37 |     runs-on: ubuntu-latest
38 |     if: |
39 |       ( github.event_name == 'push' ) && ( needs.pre-commit.result == 'success' )
40 |       ||
41 |       ( github.event_name == 'pull_request' ) && ( needs.pre-commit.result == 'success' )
42 |       ||
43 |       ( github.event_name == 'workflow_dispatch' ) && ( needs.pre-commit.result == 'success' )
44 | 
45 |     steps:
46 |       - uses: actions/checkout@v4
47 | 
48 |       - name: Set up Python
49 |         uses: actions/setup-python@v4
50 | 
51 |       - uses: Gr1N/setup-poetry@v8
52 |         with:
53 |           poetry-version: 1.1.7
54 | 
55 |       # - name: update pip if python 3.12
56 |       #   run: pip install setuptools && python -m ensurepip --upgrade
57 | 
58 |       - name: install deps for Ubuntu
59 |         run: sudo apt-get install build-essential libcairo2 libpango-1.0-0 libpangocairo-1.0-0 libgdk-pixbuf2.0-0 libffi-dev shared-mime-info
60 | 
61 |       - name: install tox
62 |         run: |
63 |           python3 -m pip install --upgrade pip
64 |           pip3 install tox==3.28.0
65 |           pip3 install tox-gh-actions==2.12.0
66 | 
67 |       - name: test with tox
68 |         run: |
69 |           tox
70 | 


--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
 1 | name: docs
 2 | 
 3 | on:
 4 |   # At 20:59 every tuesday (23:59 MSK)
 5 |   schedule:
 6 |     - cron: 59 20 * * 2
 7 | 
 8 |   # Manually triggerable in github
 9 |   workflow_dispatch:
10 | 
11 | jobs:
12 |   codespell:
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |     - name: checkout
17 |       uses: actions/checkout@v4
18 | 
19 |     - name: codespell
20 |       uses: codespell-project/actions-codespell@v2
21 | 
22 |   docs:
23 |     runs-on: ubuntu-latest
24 |     steps:
25 |       - name: checkout
26 |         uses: actions/checkout@v4
27 | 
28 |       - name: setup Python
29 |         uses: actions/setup-python@v4
30 |         with:
31 |           python-version: 3.8
32 | 
33 |       - uses: Gr1N/setup-poetry@v9
34 |         with:
35 |           poetry-version: 1.1.7
36 | 
37 |       - name: installation pandoc
38 |         run: |
39 |           wget https://github.com/jgm/pandoc/releases/download/2.14.0.3/pandoc-2.14.0.3-1-amd64.deb
40 |           sudo dpkg -i pandoc-2.14.0.3-1-amd64.deb
41 | 
42 |       - name: poetry install
43 |         run: |
44 |           poetry run python scripts/poetry_fix.py -c
45 |           poetry install -E all
46 | 
47 |       - name: make documentations
48 |         run: |
49 |           cd docs
50 |           poetry run make html
51 | 


--------------------------------------------------------------------------------
/.github/workflows/mirror.yml:
--------------------------------------------------------------------------------
 1 | name: Mirror repo
 2 | 
 3 | on:
 4 |   push:
 5 | 
 6 |   # Manually triggerable in github
 7 |   workflow_dispatch:
 8 | 
 9 | jobs:
10 |   mirror:
11 |     runs-on: "ubuntu-latest"
12 |     steps:
13 |       - name: Configure Private Key
14 |         env:
15 |           SSH_PRIVATE_KEY: ${{ secrets.DESITNATION_REPO_PRIVATE_KEY }}
16 |         run: |
17 |           mkdir -p ~/.ssh
18 |           echo "$SSH_PRIVATE_KEY" > ~/.ssh/id_rsa
19 |           chmod 600 ~/.ssh/id_rsa
20 |           echo "Host *" >> ~/.ssh/config
21 |           echo "  StrictHostKeyChecking no" >> ~/.ssh/config
22 |           echo "  CheckHostIP no" >> ~/.ssh/config
23 |           echo "  LogLevel ERROR" >> ~/.ssh/config
24 |           echo "  UserKnownHostsFile /dev/null" >> ~/.ssh/config
25 | 
26 |       - name: Push mirror
27 |         env:
28 |           SOURCE_REPO: "https://github.com/${{ github.repository }}.git"
29 |           DESTINATION_REPO: "${{ secrets.DESTINATION_REPO }}"
30 |           BASE_REPO: "https://github.com/${{ github.repository }}"
31 |         run: |
32 |           git clone --quiet "$SOURCE_REPO" && cd `basename "$BASE_REPO"`
33 | 
34 |           git config --global user.name "${{ github.actor }}"
35 |           git config --global user.email "bot@example.com"
36 | 
37 |           BRANCH=${{ github.head_ref || github.ref_name }}
38 |           EVENT_ACTION=${{ github.event_name }}
39 |           if [[ "$branch" == "master" ]] || [[ "$branch" == "developer" ]] || [[ "$EVENT_ACTION" == "workflow_dispatch" ]]
40 |           then
41 |               FORCE_FLAG="--force"
42 |           else
43 |               FORCE_FLAG=""
44 |           fi
45 | 
46 |           git checkout --quiet $BRANCH
47 | 
48 |           git remote set-url --push origin "$DESTINATION_REPO"
49 |           git push $FORCE_FLAG --quiet -u origin $BRANCH
50 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_pypi.yml:
--------------------------------------------------------------------------------
 1 | name: build and publish to pypi
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |         - 'v*.*.*'
 7 | #   workflow_dispatch:
 8 | #     inputs:
 9 | #       tag:
10 | #         description: 'Tag'
11 | #         required: true
12 | #         default: 'v0.0.0'
13 | 
14 | jobs:
15 |   deploy:
16 | 
17 |     runs-on: ubuntu-latest
18 | 
19 |     steps:
20 |     - uses: actions/checkout@v4
21 | 
22 |     - uses: JRubics/poetry-publish@v2.0
23 |       with:
24 |         pypi_token: ${{ secrets.LAMA_PYPI_TOKEN }}
25 | 


--------------------------------------------------------------------------------
/.github/workflows/tests_macos.yml:
--------------------------------------------------------------------------------
 1 | name: tests_macos
 2 | 
 3 | on:
 4 |   # # At 20:59 every day (23:59 MSK)
 5 |   # schedule:
 6 |   #   - cron: 59 20 * * *
 7 | 
 8 |   # Manually triggerable in github
 9 |   workflow_dispatch:
10 | 
11 |   workflow_run:
12 |     workflows: ["tests_ubuntu"]
13 |     branches: [master]
14 |     types:
15 |       - completed
16 | 
17 | jobs:
18 |   macos-tests:
19 |     if: ${{ github.event.workflow_run.conclusion == 'success' }}
20 |     runs-on: macos-latest
21 |     strategy:
22 |       fail-fast: true
23 |       matrix:
24 |         python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
25 | 
26 |     steps:
27 |       - uses: actions/checkout@v2
28 | 
29 |       - name: Set up Python (Conda)
30 |         uses: conda-incubator/setup-miniconda@v3
31 |         with:
32 |           auto-update-conda: true
33 |           channels: conda-forge
34 |           python-version: ${{ matrix.python-version }}
35 |           auto-activate-base: true
36 |           activate-environment: test
37 | 
38 |       - name: install deps for MacOS
39 |         run: brew update && brew install libomp cairo pango gdk-pixbuf libffi
40 | 
41 |       - name: install conda dependencies
42 |         run: |
43 |           conda install pip numpy==1.26.4
44 | 
45 |       - name: install with pip
46 |         run: |
47 |           pip install tox==3.28.0
48 |           pip install tox-gh-actions==2.12.0
49 | 
50 |       - name: test with tox
51 |         run: |
52 |           tox
53 | 


--------------------------------------------------------------------------------
/.github/workflows/tests_ubuntu.yml:
--------------------------------------------------------------------------------
 1 | name: tests_ubuntu
 2 | 
 3 | on:
 4 |   # # At 20:59 every day (23:59 MSK)
 5 |   # schedule:
 6 |   #   - cron: 59 20 * * *
 7 | 
 8 |   # Manually triggerable in github
 9 |   workflow_dispatch:
10 | 
11 |   workflow_run:
12 |     workflows: ["CI"]
13 |     types:
14 |       - completed
15 | 
16 | jobs:
17 |   ubuntu-tests:
18 |     if: ${{ github.event.workflow_run.conclusion == 'success' }}
19 |     runs-on: ubuntu-latest
20 |     strategy:
21 |       fail-fast: true
22 |       matrix:
23 |         python-version: ["3.8", "3.10", "3.11", "3.12"] # "3.9" is tested in CI
24 | 
25 |     steps:
26 |       - uses: actions/checkout@v4
27 | 
28 |       - name: Set up Python ${{ matrix.python-version }}
29 |         uses: actions/setup-python@v4
30 |         with:
31 |           python-version: ${{ matrix.python-version }}
32 | 
33 |       - name: install deps for Ubuntu
34 |         run: sudo apt-get install build-essential libcairo2 libpango-1.0-0 libpangocairo-1.0-0 libgdk-pixbuf2.0-0 libffi-dev shared-mime-info
35 | 
36 |       - name: install tox
37 |         run: |
38 |           python3 -m pip install --upgrade pip
39 |           pip3 install tox==3.28.0
40 |           pip3 install tox-gh-actions==2.12.0
41 | 
42 |       - name: test with tox
43 |         run: |
44 |           tox
45 | 


--------------------------------------------------------------------------------
/.github/workflows/tests_windows.yml:
--------------------------------------------------------------------------------
 1 | name: tests_windows
 2 | 
 3 | on:
 4 |   # # At 20:59 every day (23:59 MSK)
 5 |   # schedule:
 6 |   #   - cron: 59 20 * * *
 7 | 
 8 |   # Manually triggerable in github
 9 |   workflow_dispatch:
10 | 
11 |   workflow_run:
12 |     workflows: ["tests_ubuntu"]
13 |     branches: [master]
14 |     types:
15 |       - completed
16 | 
17 | jobs:
18 |   windows-tests:
19 |     if: ${{ github.event.workflow_run.conclusion == 'success' }}
20 |     runs-on: windows-latest
21 |     strategy:
22 |       fail-fast: true
23 |       matrix:
24 |         python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
25 | 
26 |     steps:
27 |       - uses: actions/checkout@v2
28 | 
29 |       - name: Set up Python ${{ matrix.python-version }}
30 |         uses: actions/setup-python@v2
31 |         with:
32 |           python-version: ${{ matrix.python-version }}
33 | 
34 |       - name: setup-msys2
35 |         uses: msys2/setup-msys2@v2
36 |         with:
37 |           msystem: MINGW64
38 |           update: true
39 |           install: >-
40 |             mingw-w64-x86_64-cairo
41 | 
42 |       - name: install deps for Windows
43 |         run: pip3 install pycairo
44 | 
45 |       - name: install tox
46 |         run: |
47 |           python3 -m pip install --upgrade pip
48 |           pip3 install tox==3.28.0
49 |           pip3 install tox-gh-actions==2.12.0
50 | 
51 |       - name: test with tox
52 |         run: |
53 |           tox
54 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Files
  2 | *.csv
  3 | *.png
  4 | *.pickle
  5 | *.html
  6 | *.ipynb
  7 | 
  8 | # Byte-compiled / optimized / DLL files
  9 | __pycache__/
 10 | *.py[cod]
 11 | *$py.class
 12 | 
 13 | # C extensions
 14 | *.so
 15 | 
 16 | # DS_store
 17 | .DS_Store
 18 | 
 19 | # Distribution / packaging
 20 | .Python
 21 | build/
 22 | develop-eggs/
 23 | dist/
 24 | downloads/
 25 | eggs/
 26 | .eggs/
 27 | lib/
 28 | lib64/
 29 | parts/
 30 | sdist/
 31 | var/
 32 | wheels/
 33 | share/python-wheels/
 34 | *.egg-info/
 35 | .installed.cfg
 36 | *.egg
 37 | MANIFEST
 38 | tabularAutoML_model_report/
 39 | 
 40 | # PyInstaller
 41 | #  Usually these files are written by a python script from a template
 42 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 43 | *.manifest
 44 | *.spec
 45 | 
 46 | # Installer logs
 47 | pip-log.txt
 48 | pip-delete-this-directory.txt
 49 | 
 50 | # Unit test / coverage reports
 51 | htmlcov/
 52 | .tox/
 53 | .nox/
 54 | .coverage
 55 | .coverage.*
 56 | .cache
 57 | nosetests.xml
 58 | coverage.xml
 59 | *.cover
 60 | *.py,cover
 61 | .hypothesis/
 62 | .pytest_cache/
 63 | cover/
 64 | 
 65 | # Translations
 66 | *.mo
 67 | *.pot
 68 | 
 69 | # Django stuff:
 70 | *.log
 71 | local_settings.py
 72 | db.sqlite3
 73 | db.sqlite3-journal
 74 | 
 75 | # Flask stuff:
 76 | instance/
 77 | .webassets-cache
 78 | 
 79 | # Scrapy stuff:
 80 | .scrapy
 81 | 
 82 | # Sphinx documentation
 83 | docs/_build/
 84 | 
 85 | # PyBuilder
 86 | .pybuilder/
 87 | target/
 88 | 
 89 | # Jupyter Notebook
 90 | .ipynb_checkpoints
 91 | 
 92 | # IPython
 93 | profile_default/
 94 | ipython_config.py
 95 | 
 96 | # pyenv
 97 | #   For a library or package, you might want to ignore these files since the code is
 98 | #   intended to run in multiple environments; otherwise, check them in:
 99 | # .python-version
100 | 
101 | # pipenv
102 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
103 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
104 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
105 | #   install all needed dependencies.
106 | #Pipfile.lock
107 | 
108 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
109 | __pypackages__/
110 | 
111 | # Celery stuff
112 | celerybeat-schedule
113 | celerybeat.pid
114 | 
115 | # SageMath parsed files
116 | *.sage.py
117 | 
118 | # Environments
119 | .env
120 | .venv
121 | env/
122 | venv/
123 | ENV/
124 | env.bak/
125 | venv.bak/
126 | 
127 | # Spyder project settings
128 | .spyderproject
129 | .spyproject
130 | 
131 | # Rope project settings
132 | .ropeproject
133 | 
134 | # VSCode
135 | .vscode
136 | 
137 | # mkdocs documentation
138 | /site
139 | 
140 | # mypy
141 | .mypy_cache/
142 | .dmypy.json
143 | dmypy.json
144 | 
145 | # Pyre type checker
146 | .pyre/
147 | 
148 | # pytype static type analyzer
149 | .pytype/
150 | 
151 | # Cython debug symbols
152 | cython_debug/
153 | 
154 | # VSCode
155 | .vscode/
156 | 
157 | .idea/
158 | lama_venv/
159 | *.db
160 | 
161 | temp/
162 | 
163 | poetry.lock
164 | 


--------------------------------------------------------------------------------
/.gitlab/.gitlab-ci.yml:
--------------------------------------------------------------------------------
 1 | workflow:
 2 |   rules:
 3 |     - if: $CI_PIPELINE_SOURCE == "push"
 4 |       when: always
 5 |     - when: never
 6 | 
 7 | .job_template: &ssh_key_configuration
 8 |   before_script:
 9 |     - mkdir -p ~/.ssh
10 |     - echo "$SSH_PRIVATE_KEY" > ~/.ssh/id_rsa
11 |     - chmod 600 ~/.ssh/id_rsa
12 |     - echo "Host *" >> ~/.ssh/config
13 |     - echo "  StrictHostKeyChecking no" >> ~/.ssh/config
14 |     - echo "  CheckHostIP no" >> ~/.ssh/config
15 |     - echo "  LogLevel ERROR" >> ~/.ssh/config
16 |     - echo "  UserKnownHostsFile /dev/null" >> ~/.ssh/config
17 | 
18 | cache:
19 |   key: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
20 |   paths:
21 |     - ~/.cache/pip/
22 | 
23 | stages:
24 |   - mirror
25 |   - default
26 |   - all_pythons
27 |   - docs
28 | 
29 | 
30 | default:
31 |   stage: default
32 |   image: python:3.10
33 |   before_script:
34 |     - pip install tox
35 |   script:
36 |     - tox -e lint
37 |     - tox -e py310
38 | 
39 | 
40 | all_pythons:
41 |   stage: all_pythons
42 |   image: python:$PYTHON_VERSION
43 |   before_script:
44 |     - pip install tox
45 |   script:
46 |     - tox -e py${PYTHON_VERSION//./}
47 |   parallel:
48 |     matrix:
49 |       - PYTHON_VERSION: ["3.8", "3.9", "3.11", "3.12"]
50 | 
51 | docs:
52 |   stage: docs
53 |   image: python:3.10
54 |   before_script:
55 |     - pip install tox
56 |   script:
57 |     - tox -e codespell
58 | 
59 | 
60 | mirror-code:
61 |   <<: *ssh_key_configuration
62 |   stage: mirror
63 |   script: |
64 |     if [[ "${CI_COMMIT_REF_NAME}" == experiment* ]] || [[ "${CI_COMMIT_REF_NAME}" == AUTOML-* ]]
65 |     then
66 |         :
67 |     else
68 |       git branch -f ${CI_COMMIT_REF_NAME}
69 |       git config remote.github.url >&- || git remote add github "$DESTINATION_REPO"
70 |       git push -u github "${CI_COMMIT_REF_NAME}"
71 |     fi
72 | 


--------------------------------------------------------------------------------
/.gitlab/release.yml:
--------------------------------------------------------------------------------
 1 | workflow:
 2 |   rules:
 3 |     - if: $CI_PIPELINE_SOURCE == "push"
 4 | 
 5 | .job_template: &ssh_key_configuration
 6 |   before_script:
 7 |       # TODO: add clearml config
 8 |     - mkdir -p ~/.ssh
 9 |     - echo "$SSH_PRIVATE_KEY" > ~/.ssh/id_rsa
10 |     - chmod 600 ~/.ssh/id_rsa
11 |     - echo "Host *" >> ~/.ssh/config
12 |     - echo "  StrictHostKeyChecking no" >> ~/.ssh/config
13 |     - echo "  CheckHostIP no" >> ~/.ssh/config
14 |     - echo "  LogLevel ERROR" >> ~/.ssh/config
15 |     - echo "  UserKnownHostsFile /dev/null" >> ~/.ssh/config
16 | 
17 | stages:
18 |   - run_benchmark
19 | 
20 | mirror-code:
21 |   <<: *ssh_key_configuration
22 |   stage: run_benchmark
23 |   script: |
24 |     sh ./experiments/run_bench_release.sh Releases
25 | 


--------------------------------------------------------------------------------
/.jupyter/jupyter_notebook_config.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # Configuration file for jupyter-notebook.
 3 | 
 4 | # timeout of each cell
 5 | c.ExecutePreprocessor.timeout = 60 * 15
 6 | 
 7 | # Path to kernel
 8 | c.ExecutePreprocessor.kernel_name = "python3"
 9 | 
10 | # Remove metadata
11 | c.ClearMetadataPreprocessor.enabled = True
12 | c.ClearMetadataPreprocessor.clear_cell_metadata = True
13 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | default_stages:
 2 |   - commit
 3 | 
 4 | repos:
 5 |   - repo: https://github.com/psf/black
 6 |     rev: 20.8b1
 7 |     hooks:
 8 |       - id: black
 9 |         args: ["--config=pyproject.toml"]
10 |         additional_dependencies: ["click==8.0.4"]
11 | 
12 |   # - repo: https://github.com/PyCQA/isort
13 |   #   rev: 5.12.0
14 |   #   hooks:
15 |   #     - id: isort
16 |   #       args: ["--settings-path pyproject.toml"]
17 | 
18 |   - repo: https://github.com/pre-commit/pre-commit-hooks
19 |     rev: v3.4.0
20 |     hooks:
21 |       - id: trailing-whitespace
22 |       - id: end-of-file-fixer
23 |       - id: debug-statements
24 |       - id: check-yaml
25 | 
26 |   - repo: https://github.com/PyCQA/flake8
27 |     rev: 6.1.0
28 |     hooks:
29 |       - id: flake8
30 |         additional_dependencies: [flake8-docstrings]
31 | 
32 |   - repo: https://github.com/myint/rstcheck
33 |     rev: 3f92957478422df87bd730abde66f089cc1ee19b
34 |     hooks:
35 |       - id: rstcheck
36 | 
37 |   - repo: local
38 |     hooks:
39 |       - id: set-py-versions
40 |         name: set python versions
41 |         description: set python versions := [3.8, 3.13) to `pyproject.toml`
42 |         language: python
43 |         entry: python scripts/poetry_fix.py -f
44 |         pass_filenames: false
45 | 
46 |   # - repo: https://github.com/python-jsonschema/check-jsonschema
47 |   #   rev: 0.18.2
48 |   #   hooks:
49 |   #     - id: check-github-workflows
50 | 
51 |   - repo: local
52 |     hooks:
53 |       - id: exp-branch-push
54 |         name: experiment branch push
55 |         description: prevent pushing 'experiment/*' branches to LAMA github
56 |         stages: [push]
57 |         language: python
58 |         entry: python ./scripts/exp_branch_push.py
59 |         pass_filenames: false
60 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Set the version of Python and other tools you might need
 9 | build:
10 |   os: ubuntu-20.04
11 |   tools:
12 |     python: "3.10"
13 |   jobs:
14 |     pre_create_environment:
15 |       - asdf plugin add poetry
16 |       - asdf install poetry 1.8.0
17 |       - asdf global poetry 1.8.0
18 |       - poetry config virtualenvs.create false
19 |       - poetry run python scripts/poetry_fix.py -c
20 |     post_install:
21 |       - poetry install -E all
22 | 
23 | # Build documentation in the docs/ directory with Sphinx
24 | sphinx:
25 |   configuration: docs/conf.py
26 | 
27 | # Optionally build your docs in additional formats such as PDF
28 | formats: all
29 | 
30 | # Optionally set the version of Python and requirements required to build your docs
31 | python:
32 |   install:
33 |     - requirements: docs/requirements.txt
34 |     - path: .
35 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-architect
2 | 


--------------------------------------------------------------------------------
/check_docs.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | 
 5 | logging.basicConfig(format="[%(asctime)s] (%(levelname)s): %(message)s", level=logging.DEBUG)
 6 | 
 7 | logging.debug("Check that all .rst files compile to .html.")
 8 | 
 9 | DOCS_PATH = os.path.join(os.path.dirname(__file__), "docs")
10 | RSTS_PATH = os.path.join(DOCS_PATH, "generated")
11 | HTML_PATH = os.path.join(DOCS_PATH, os.path.join("_build", "html", "generated"))
12 | 
13 | if not os.path.exists(RSTS_PATH):
14 |     os.makedirs(RSTS_PATH)
15 | if not os.path.exists(HTML_PATH):
16 |     os.makedirs(HTML_PATH)
17 | 
18 | html_filenames = [os.path.splitext(name)[0] + ".html" for name in os.listdir(RSTS_PATH) if ".rst" in name]
19 | html_filenames = sorted(html_filenames)
20 | logging.debug(".rst filenames: {}".format(html_filenames))
21 | 
22 | for fname in html_filenames:
23 |     fpath = os.path.join(HTML_PATH, fname)
24 |     logging.debug("Check {}".format(fname))
25 |     assert os.path.exists(fpath), "File {} doesn`t exist.".format(fpath)
26 | 
27 | logging.debug("All files exists.")
28 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 
22 | clean:
23 | 	sphinx-build -M clean "$(SOURCEDIR)" "$(BUILDDIR)"
24 | 	sphinx-build -M clean "$(SOURCEDIR)" "imgs"
25 | 	sphinx-build -M clean "$(SOURCEDIR)" "pages/modules/generated/"
26 | 


--------------------------------------------------------------------------------
/docs/_static/style.css:
--------------------------------------------------------------------------------
 1 | .wy-nav-content {
 2 |     max-width: none;
 3 | }
 4 | 
 5 | .rst-content code.xref {
 6 |     /* !important prevents the common CSS stylesheets from overriding
 7 |          this as on RTD they are loaded after this stylesheet */
 8 |     color: #E74C3C
 9 | }
10 | 
11 | html.writer-html4 .rst-content dl:not(.docutils) dl:not(.field-list)>dt, html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) dl:not(.field-list)>dt {
12 |     border-left-color: rgb(9, 183, 14)
13 | }
14 | 


--------------------------------------------------------------------------------
/docs/_templates/autosummary/class.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | .. currentmodule:: {{ module }}
 4 | 
 5 | 
 6 | {{ name | underline}}
 7 | 
 8 | .. autoclass:: {{ name }}
 9 |     :members:
10 | 
11 | 
12 | ..
13 |   autogenerated from source/_templates/autosummary/class.rst
14 |   note it does not have :inherited-members:
15 | 


--------------------------------------------------------------------------------
/docs/_templates/autosummary/module.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | {{ name | underline }}
 5 | 
 6 | .. automodule:: {{ fullname }}
 7 | 
 8 |     {% block classes %}
 9 |     {% if classes %}
10 |     .. rubric:: {{ _('Classes') }}
11 | 
12 |     .. autosummary::
13 |         :toctree: generated
14 |         :nosignatures:
15 |         :template: classtemplate.rst
16 |     {% for item in classes %}
17 |         {{ item }}
18 |     {%- endfor %}
19 |     {% endif %}
20 |     {% endblock %}
21 | 
22 |     {% block functions %}
23 |     {% if functions %}
24 |     .. rubric:: {{ _('Functions') }}
25 | 
26 |     .. autosummary::
27 |         :toctree: generated
28 |         :nosignatures:
29 |         :template: functiontemplate.rst
30 |     {% for item in functions %}
31 |         {{ item }}
32 |     {%- endfor %}
33 |     {% endif %}
34 |     {% endblock %}
35 | 
36 | 
37 | {% block modules %}
38 | {% if modules %}
39 | .. rubric:: {{ _('Modules') }}
40 | 
41 | .. autosummary::
42 |     :toctree:
43 |     :recursive:
44 | {% for item in modules %}
45 |     {{ item }}
46 | {%- endfor %}
47 | {% endif %}
48 | {% endblock %}
49 | 


--------------------------------------------------------------------------------
/docs/_templates/classtemplate.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | .. currentmodule:: {{ module }}
 4 | 
 5 | 
 6 | {{ name | underline }}
 7 | 
 8 | .. autoclass:: {{ name }}
 9 |     :members:
10 | 
11 | 
12 | ..
13 |   autogenerated from source/_templates/classtemplate.rst
14 |   note it does not have :inherited-members:
15 | 


--------------------------------------------------------------------------------
/docs/_templates/functiontemplate.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | .. currentmodule:: {{ module }}
 4 | 
 5 | {{ name | underline }}
 6 | 
 7 | .. autofunction:: {{ fullname }}
 8 | 
 9 | ..
10 |   autogenerated from source/_templates/functiontemplate.rst
11 |   note it does not have :inherited-members:
12 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | LightAutoML documentation
 2 | =========================
 3 | 
 4 | `LightAutoML <https://github.com/AILab-MLTools/LightAutoML>`_ is open-source Python library aimed at automated machine learning.
 5 | It is designed to be lightweight and efficient for various tasks with tabular, text data.
 6 | LightAutoML provides easy-to-use pipeline creation, that enables:
 7 | 
 8 | - Automatic hyperparameter tuning, data processing.
 9 | - Automatic typing, feature selection.
10 | - Automatic time utilization.
11 | - Automatic report creation.
12 | - Easy-to-use modular scheme to create your own pipelines.
13 | 
14 | 
15 | 
16 | .. toctree::
17 |     :maxdepth: 1
18 |     :caption: Contents
19 | 
20 |     Installation Guide <pages/Installation>
21 |     Tutorials <pages/Tutorials>
22 |     Kaggle kernel examples of LightAutoML usage <pages/Kaggle_Kernels>
23 |     Courses, videos and papers <pages/Others>
24 |     Python-API <pages/Python-API>
25 | 
26 | 
27 | 
28 | Indices and Tables
29 | ==================
30 | 
31 | * :ref:`genindex`
32 | 


--------------------------------------------------------------------------------
/docs/mock_docs.py:
--------------------------------------------------------------------------------
 1 | """A one line summary of the module or program, terminated by a period.
 2 | 
 3 | Leave one blank line.  The rest of this docstring should contain an
 4 | overall description of the module or program.  Optionally, it may also
 5 | contain a brief description of exported classes and functions and/or usage
 6 | examples.
 7 | 
 8 |     Typical usage example:
 9 | 
10 |         >>> print('something')
11 |         something
12 |         >>> a = MyClass('be', 'or', 'not')
13 | 
14 | """
15 | 
16 | import datetime
17 | 
18 | 
19 | class MyClass:
20 |     """Description of class.
21 | 
22 |     Really do nothing.
23 | 
24 |     Attributes:
25 |         attr1 (str): Description of `attr1`.
26 |         attr2 (str): Description of `attr2`.
27 | 
28 |     Args:
29 |         attr1: Description of `attr1`.
30 |         attr2: Description of `attr2`.
31 | 
32 | 
33 |     """
34 | 
35 |     def __init__(self, attr1: str, attr2: str):
36 |         self.attr1 = attr1
37 |         self.attr2 = attr2
38 |         date = datetime.datetime.now()
39 |         print("{}.{}.{} {}:{}:{}".format(date.day, date.month, date.year, date.hour, date.minute, date.second))
40 | 
41 | 
42 | # .. toctree::
43 | #     :glob:
44 | #     :maxdepth: 1
45 | #     :caption: Tutorials
46 | #
47 | #     tutorials/tutor_1.ipynb
48 | #     tutorials/tutor_2.ipynb
49 | #     tutorials/tutor_3.ipynb
50 | 


--------------------------------------------------------------------------------
/docs/pages/Installation.rst:
--------------------------------------------------------------------------------
 1 | Installation Guide
 2 | ==================
 3 | 
 4 | 
 5 | Basic
 6 | -----
 7 | 
 8 | You can install library `LightAutoML` from PyPI.
 9 | 
10 | .. code-block:: bash
11 | 
12 |     pip install lightautoml
13 | 
14 | 
15 | Development
16 | -----------
17 | 
18 | You can also clone repository and install with poetry.
19 | First, install `poetry <https://python-poetry.org/docs/#installation>`_.
20 | Then,
21 | 
22 | .. code-block:: bash
23 | 
24 |     git clone git@github.com:AILab-MLTools/LightAutoML.git
25 |     cd LightAutoML
26 | 
27 |     # Create virtual environment inside your project directory
28 |     poetry config virtualenvs.in-project true
29 | 
30 |     # If you want to update dependencies, run the command:
31 |     poetry lock
32 | 
33 |     # Installation
34 |     poetry install
35 | 


--------------------------------------------------------------------------------
/docs/pages/Kaggle_Kernels.rst:
--------------------------------------------------------------------------------
 1 | Kaggle Kernels
 2 | ==============
 3 | 
 4 | * `Tabular Playground Series April 2021 competition solution <https://www.kaggle.com/alexryzhkov/n3-tps-april-21-lightautoml-starter>`_
 5 | * `Titanic competition solution (80% accuracy) <https://www.kaggle.com/alexryzhkov/lightautoml-titanic-love>`_
 6 | * `Titanic **12-code-lines** competition solution (78% accuracy) <https://www.kaggle.com/alexryzhkov/lightautoml-extreme-short-titanic-solution>`_
 7 | * `House prices competition solution <https://www.kaggle.com/alexryzhkov/lightautoml-houseprices-love>`_
 8 | * `Natural Language Processing with Disaster Tweets solution <https://www.kaggle.com/alexryzhkov/lightautoml-starter-nlp>`_
 9 | * `Tabular Playground Series March 2021 competition solution <https://www.kaggle.com/alexryzhkov/lightautoml-starter-for-tabulardatamarch>`_
10 | * `Tabular Playground Series February 2021 competition solution <https://www.kaggle.com/alexryzhkov/lightautoml-tabulardata-love>`_
11 | * `Interpretable WhiteBox solution <https://www.kaggle.com/simakov/lama-whitebox-preset-example>`_
12 | * `Custom ML pipeline elements inside existing ones <https://www.kaggle.com/simakov/lama-custom-automl-pipeline-example>`_
13 | * `Tabular Playground Series November 2022 competition solution with Neural Networks <https://www.kaggle.com/code/mikhailkuz/lightautoml-nn-happiness>`_
14 | 


--------------------------------------------------------------------------------
/docs/pages/Others.rst:
--------------------------------------------------------------------------------
 1 | Others
 2 | ======
 3 | 
 4 | 
 5 | LightAutoML crash courses
 6 | -------------------------
 7 | 
 8 |     `(Russian) AutoML course for OpenDataScience community <https://ods.ai/tracks/automl-course-part1>`_
 9 | 
10 | 
11 | Video guides
12 | ------------
13 | 
14 |     * (Russian) `LightAutoML webinar for Sberloga community <https://www.youtube.com/watch?v=ci8uqgWFJGg>`_ (`Alexander Ryzhkov <https://kaggle.com/alexryzhkov>`__), `Dmitry Simakov <https://kaggle.com/simakov>`__)
15 |     * (Russian) `LightAutoML hands-on tutorial in Kaggle Kernels <https://www.youtube.com/watch?v=TYu1UG-E9e8>`_ (`Alexander Ryzhkov <https://kaggle.com/alexryzhkov>`__)
16 |     * (English) `Automated Machine Learning with LightAutoML: theory and practice <https://www.youtube.com/watch?v=4pbO673B9Oo>`_ (`Alexander Ryzhkov <https://kaggle.com/alexryzhkov>`__)
17 |     * (English) `LightAutoML framework general overview, benchmarks and advantages for business <https://vimeo.com/485383651>`_ (`Alexander Ryzhkov <https://kaggle.com/alexryzhkov>`__)
18 |     * (English) `LightAutoML practical guide - ML pipeline presets overview <https://vimeo.com/487166940>`_ (`Dmitry Simakov <https://kaggle.com/simakov>`__)
19 | 
20 | 
21 | Papers
22 | ------
23 | 
24 |     Anton Vakhrushev, Alexander Ryzhkov, Dmitry Simakov, Rinchin Damdinov, Maxim Savchenko, Alexander Tuzhilin `"LightAutoML: AutoML Solution for a Large Financial Services Ecosystem" <https://arxiv.org/pdf/2109.01528.pdf>`_. arXiv:2109.01528, 2021.
25 | 
26 | 
27 | Articles about LightAutoML
28 | --------------------------
29 | 
30 |     * (English) `LightAutoML vs Titanic: 80% accuracy in several lines of code (Medium) <https://alexmryzhkov.medium.com/lightautoml-preset-usage-tutorial-2cce7da6f936>`_
31 |     * (English) `Hands-On Python Guide to LightAutoML – An Automatic ML Model Creation Framework (Analytic Indian Mag) <https://analyticsindiamag.com/hands-on-python-guide-to-lama-an-automatic-ml-model-creation-framework/?fbclid=IwAR0f0cVgQWaLI60m1IHMD6VZfmKce0ZXxw-O8VRTdRALsKtty8a-ouJex7g>`_
32 | 


--------------------------------------------------------------------------------
/docs/pages/Python-API.rst:
--------------------------------------------------------------------------------
 1 | Python-API
 2 | ==========
 3 | 
 4 | 
 5 | .. toctree::
 6 |     :maxdepth: 1
 7 |     :caption: Main modules
 8 | 
 9 |     modules/automl
10 |     modules/addons
11 |     modules/dataset
12 |     modules/image
13 |     modules/ml_algo
14 |     modules/ml_algo.tuning
15 |     modules/ml_algo.torch_based
16 |     modules/pipelines
17 |     modules/pipelines.selection
18 |     modules/pipelines.features
19 |     modules/pipelines.ml
20 |     modules/reader
21 |     modules/report
22 |     modules/tasks
23 |     modules/tasks.losses
24 |     modules/text
25 |     modules/transformers
26 |     modules/utils
27 |     modules/validation
28 | 


--------------------------------------------------------------------------------
/docs/pages/Tutorials.rst:
--------------------------------------------------------------------------------
 1 | Tutorials
 2 | =========
 3 | 
 4 | This section contains tutorials for both **LightAutoML** and **HypEx**, covering a wide range of use cases from basic model training to advanced hypothesis testing.
 5 | 
 6 | LightAutoML Tutorials
 7 | ---------------------
 8 | 
 9 | 
10 | .. toctree::
11 |     :maxdepth: 1
12 |     :caption: Core Features
13 | 
14 |     tutorials/Tutorial_1_basics.nblink
15 |     tutorials/Tutorial_2_WhiteBox_AutoWoE.nblink
16 |     tutorials/Tutorial_3_sql_data_source.nblink
17 |     tutorials/Tutorial_4_NLP_Interpretation.nblink
18 | 
19 | 
20 | .. toctree::
21 |     :maxdepth: 1
22 |     :caption: Advanced Topics
23 | 
24 |     tutorials/Tutorial_5_uplift.nblink
25 |     tutorials/Tutorial_6_custom_pipeline.nblink
26 |     tutorials/Tutorial_7_ICE_and_PDP_interpretation.nblink
27 |     tutorials/Tutorial_8_CV_preset.nblink
28 |     tutorials/Tutorial_9_neural_networks.nblink
29 |     tutorials/Tutorial_10_relational_data_with_star_scheme.nblink
30 |     tutorials/Tutorial_11_time_series.nblink
31 | 
32 | 
33 | HypEx Tutorials
34 | ---------------
35 | 
36 | 
37 | .. toctree::
38 |     :maxdepth: 1
39 |     :caption: A/B and A/A Testing
40 | 
41 |     tutorials/Tutorial_12_AA_Test.nblink
42 |     tutorials/Tutorial_13_AA_Test_multigroup_split.nblink
43 |     tutorials/Tutorial_14_AB_Test.nblink
44 | 
45 | 
46 | .. toctree::
47 |     :maxdepth: 1
48 |     :caption: Matching
49 | 
50 |     tutorials/Tutorial_15_Matching.nblink
51 |     tutorials/Tutorial_16_Matching_without_replacement.nblink
52 | 
53 | 
54 | .. toctree::
55 |     :maxdepth: 1
56 |     :caption: Modeling and Testing Limits
57 | 
58 |     tutorials/Tutorial_17_Modeling_Limit_Distribution.nblink
59 |     tutorials/Tutorial_18_Test_Limit_Distribution.nblink
60 | 


--------------------------------------------------------------------------------
/docs/pages/modules/addons.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | lightautoml.addons
 5 | ==================
 6 | 
 7 | Extensions of core functionality.
 8 | 
 9 | Utilization
10 | -----------
11 | 
12 | .. currentmodule:: lightautoml.addons.utilization
13 | 
14 | .. autosummary::
15 |     :toctree: ./generated
16 |     :nosignatures:
17 |     :template: classtemplate.rst
18 | 
19 |     ~utilization.TimeUtilization
20 | 
21 | HypEx -- Hypothesises and Experiments
22 | -------------------------------------
23 | 
24 | The official HypEx documentation can be found at:
25 | 
26 | `HypEx Documentation <https://hypex.readthedocs.io/en/latest/>`_
27 | 
28 | For a detailed reference, visit the HypEx API documentation.
29 | 


--------------------------------------------------------------------------------
/docs/pages/modules/automl.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | lightautoml.automl
 5 | ======================
 6 | 
 7 | The main module, which includes the AutoML class, blenders and ready-made presets.
 8 | 
 9 | .. currentmodule:: lightautoml.automl.base
10 | 
11 | .. autosummary::
12 |     :toctree: ./generated
13 |     :nosignatures:
14 |     :template: classtemplate.rst
15 | 
16 |     AutoML
17 | 
18 | 
19 | Presets
20 | -------
21 | 
22 | Presets for end-to-end model training for special tasks.
23 | 
24 | .. currentmodule:: lightautoml.automl.presets
25 | 
26 | .. autosummary::
27 |     :toctree: ./generated
28 |     :nosignatures:
29 |     :template: classtemplate.rst
30 | 
31 |     base.AutoMLPreset
32 |     tabular_presets.TabularAutoML
33 |     tabular_presets.TabularUtilizedAutoML
34 |     .. image_presets.TabularCVAutoML
35 |     text_presets.TabularNLPAutoML
36 |     whitebox_presets.WhiteBoxPreset
37 | 
38 | 
39 | Blenders
40 | --------
41 | 
42 | .. currentmodule:: lightautoml.automl.blend
43 | 
44 | .. autosummary::
45 |     :toctree: ./generated
46 |     :nosignatures:
47 |     :template: classtemplate.rst
48 | 
49 |     Blender
50 |     BestModelSelector
51 |     MeanBlender
52 |     WeightedBlender
53 | 


--------------------------------------------------------------------------------
/docs/pages/modules/dataset.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | lightautoml.dataset
 5 | ===================
 6 | 
 7 | Provides an internal interface for working with data.
 8 | 
 9 | Dataset Interfaces
10 | -------------------
11 | 
12 | .. currentmodule:: lightautoml.dataset
13 | 
14 | .. autosummary::
15 |     :toctree: ./generated
16 |     :nosignatures:
17 |     :template: classtemplate.rst
18 | 
19 |     base.LAMLColumn
20 |     base.LAMLDataset
21 |     np_pd_dataset.NumpyDataset
22 |     np_pd_dataset.PandasDataset
23 |     np_pd_dataset.CSRSparseDataset
24 | 
25 | Roles
26 | -----------
27 | 
28 | Role contains information about the column, which determines how it is processed.
29 | 
30 | .. currentmodule:: lightautoml.dataset.roles
31 | 
32 | .. autosummary::
33 |     :toctree: ./generated
34 |     :nosignatures:
35 |     :template: classtemplate.rst
36 | 
37 |     ColumnRole
38 |     NumericRole
39 |     CategoryRole
40 |     TextRole
41 |     DatetimeRole
42 |     TargetRole
43 |     GroupRole
44 |     DropRole
45 |     WeightsRole
46 |     FoldsRole
47 |     PathRole
48 | 
49 | 
50 | Utils
51 | ------------
52 | 
53 | Utilities for working with the structure of a dataset.
54 | 
55 | .. currentmodule:: lightautoml.dataset.utils
56 | 
57 | .. autosummary::
58 |     :toctree: ./generated
59 |     :nosignatures:
60 |     :template: functiontemplate.rst
61 | 
62 |     roles_parser
63 |     get_common_concat
64 |     numpy_and_pandas_concat
65 |     concatenate
66 | 


--------------------------------------------------------------------------------
/docs/pages/modules/image.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | lightautoml.image
 5 | =================
 6 | 
 7 | Provides an internal interface for working with image features.
 8 | 
 9 | Image Feature Extractors
10 | ------------------------
11 | 
12 | Image feature extractors based on color histograms and CNN embeddings.
13 | 
14 | .. currentmodule:: lightautoml.image.image
15 | 
16 | .. autosummary::
17 |     :toctree: ./generated
18 |     :nosignatures:
19 |     :template: classtemplate.rst
20 | 
21 |     CreateImageFeatures
22 |     TimmModelEmbedder
23 | 
24 | 
25 | PyTorch Image Datasets
26 | ------------------------
27 | 
28 | .. currentmodule:: lightautoml.image.image
29 | 
30 | .. autosummary::
31 |     :toctree: ./generated
32 |     :nosignatures:
33 |     :template: classtemplate.rst
34 | 
35 |     ImageTimmDataset
36 |     DeepTimmImageEmbedder
37 | 
38 | 
39 | Utils
40 | ---------
41 | 
42 | .. currentmodule:: lightautoml.image.utils
43 | 
44 | .. autosummary::
45 |     :toctree: ./generated
46 |     :nosignatures:
47 |     :template: functiontemplate.rst
48 | 
49 |     pil_loader
50 | 


--------------------------------------------------------------------------------
/docs/pages/modules/ml_algo.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | lightautoml.ml_algo
 5 | ===================
 6 | 
 7 | Models used for machine learning pipelines.
 8 | 
 9 | Base Classes
10 | ------------------------
11 | 
12 | .. currentmodule:: lightautoml.ml_algo.base
13 | 
14 | .. autosummary::
15 |     :toctree: ./generated
16 |     :nosignatures:
17 |     :template: classtemplate.rst
18 | 
19 |     MLAlgo
20 |     TabularMLAlgo
21 | 
22 | 
23 | Linear Models
24 | -------------------------
25 | 
26 | .. currentmodule:: lightautoml.ml_algo
27 | 
28 | .. autosummary::
29 |     :toctree: ./generated
30 |     :nosignatures:
31 |     :template: classtemplate.rst
32 | 
33 |     ~linear_sklearn.LinearLBFGS
34 |     ~linear_sklearn.LinearL1CD
35 |     ~dl_model.TorchModel
36 | 
37 | Boosted Trees
38 | -------------------------
39 | 
40 | .. currentmodule:: lightautoml.ml_algo
41 | 
42 | .. autosummary::
43 |     :toctree: ./generated
44 |     :nosignatures:
45 |     :template: classtemplate.rst
46 | 
47 |     ~boost_lgbm.BoostLGBM
48 |     ~boost_cb.BoostCB
49 | 
50 | 
51 | Neural Networks
52 | -------------------------
53 | 
54 | .. currentmodule:: lightautoml.ml_algo.torch_based
55 | 
56 | .. autosummary::
57 |     :toctree: ./generated
58 |     :nosignatures:
59 |     :template: classtemplate.rst
60 | 
61 |     ~nn_models.MLP
62 |     ~nn_models.DenseLightModel
63 |     ~nn_models.DenseModel
64 |     ~nn_models.ResNetModel
65 |     ~nn_models.SNN
66 | 
67 | 
68 | WhiteBox
69 | -------------------------
70 | 
71 | .. currentmodule:: lightautoml.ml_algo
72 | 
73 | .. autosummary::
74 |     :toctree: ./generated
75 |     :nosignatures:
76 |     :template: classtemplate.rst
77 | 
78 |     ~whitebox.WbMLAlgo
79 | 


--------------------------------------------------------------------------------
/docs/pages/modules/ml_algo.torch_based.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | lightautoml.ml_algo
 5 | ===================
 6 | 
 7 | Torch utils.
 8 | 
 9 | Pooling Strategies
10 | ------------------------------
11 | 
12 | .. currentmodule:: lightautoml.ml_algo.torch_based.nn_models
13 | 
14 | .. autosummary::
15 |     :toctree: ./generated
16 |     :nosignatures:
17 |     :template: classtemplate.rst
18 | 
19 |     SequenceAbstractPooler
20 |     SequenceClsPooler
21 |     SequenceMaxPooler
22 |     SequenceSumPooler
23 |     SequenceAvgPooler
24 |     SequenceIndentityPooler
25 | 


--------------------------------------------------------------------------------
/docs/pages/modules/ml_algo.tuning.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | lightautoml.ml_algo.tuning
 5 | ==========================
 6 | 
 7 | Bunch of classes for hyperparameters tuning.
 8 | 
 9 | Base Classes
10 | ------------------------
11 | 
12 | .. currentmodule:: lightautoml.ml_algo.tuning.base
13 | 
14 | .. autosummary::
15 |     :toctree: ./generated
16 |     :nosignatures:
17 |     :template: classtemplate.rst
18 | 
19 |     ParamsTuner
20 |     DefaultTuner
21 | 
22 | 
23 | Tuning with Optuna
24 | ------------------
25 | 
26 | .. currentmodule:: lightautoml.ml_algo.tuning.optuna
27 | 
28 | .. autosummary::
29 |     :toctree: ./generated
30 |     :nosignatures:
31 |     :template: classtemplate.rst
32 | 
33 |     OptunaTuner
34 |     DLOptunaTuner
35 | 


--------------------------------------------------------------------------------
/docs/pages/modules/pipelines.features.rst:
--------------------------------------------------------------------------------
  1 | .. role:: hidden
  2 |     :class: hidden-section
  3 | 
  4 | 
  5 | lightautoml.pipelines.features
  6 | ==============================
  7 | 
  8 | Pipelines for features generation.
  9 | 
 10 | Base Classes
 11 | -----------------
 12 | 
 13 | .. currentmodule:: lightautoml.pipelines.features.base
 14 | 
 15 | .. autosummary::
 16 |     :toctree: ./generated
 17 |     :nosignatures:
 18 |     :template: classtemplate.rst
 19 | 
 20 |     FeaturesPipeline
 21 |     EmptyFeaturePipeline
 22 |     TabularDataFeatures
 23 | 
 24 | 
 25 | 
 26 | Feature Pipelines for Boosting Models
 27 | -----------------------------------------
 28 | 
 29 | .. currentmodule:: lightautoml.pipelines.features.lgb_pipeline
 30 | 
 31 | .. autosummary::
 32 |     :toctree: ./generated
 33 |     :nosignatures:
 34 |     :template: classtemplate.rst
 35 | 
 36 |     LGBSimpleFeatures
 37 |     LGBAdvancedPipeline
 38 | 
 39 | 
 40 | Feature Pipelines for Linear Models
 41 | -----------------------------------
 42 | 
 43 | .. currentmodule:: lightautoml.pipelines.features.linear_pipeline
 44 | 
 45 | .. autosummary::
 46 |     :toctree: ./generated
 47 |     :nosignatures:
 48 |     :template: classtemplate.rst
 49 | 
 50 |     LinearFeatures
 51 | 
 52 | Feature Pipelines for WhiteBox
 53 | ------------------------------
 54 | 
 55 | .. currentmodule:: lightautoml.pipelines.features.wb_pipeline
 56 | 
 57 | .. autosummary::
 58 |     :toctree: ./generated
 59 |     :nosignatures:
 60 |     :template: classtemplate.rst
 61 | 
 62 |     WBFeatures
 63 | 
 64 | 
 65 | Image Feature Pipelines
 66 | ----------------------------------
 67 | 
 68 | .. currentmodule:: lightautoml.pipelines.features.image_pipeline
 69 | 
 70 | .. autosummary::
 71 |     :toctree: ./generated
 72 |     :nosignatures:
 73 |     :template: classtemplate.rst
 74 | 
 75 |     ImageDataFeatures
 76 |     ImageSimpleFeatures
 77 |     ImageAutoFeatures
 78 | 
 79 | 
 80 | Text Feature Pipelines
 81 | ------------------------------
 82 | 
 83 | .. currentmodule:: lightautoml.pipelines.features.text_pipeline
 84 | 
 85 | .. autosummary::
 86 |     :toctree: ./generated
 87 |     :nosignatures:
 88 |     :template: classtemplate.rst
 89 | 
 90 |     NLPDataFeatures
 91 |     TextAutoFeatures
 92 |     NLPTFiDFFeatures
 93 |     TextBertFeatures
 94 | 
 95 | 
 96 | Feature Pipelines for Neural Networks Models
 97 | ------------------------------------------------------
 98 | 
 99 | .. currentmodule:: lightautoml.pipelines.features.torch_pipeline
100 | 
101 | .. autosummary::
102 |     :toctree: ./generated
103 |     :nosignatures:
104 |     :template: classtemplate.rst
105 | 
106 |     TorchSimpleFeatures
107 | 


--------------------------------------------------------------------------------
/docs/pages/modules/pipelines.ml.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | 
 5 | lightautoml.pipelines.ml
 6 | ==============================
 7 | 
 8 | Pipelines that merge together single model training steps.
 9 | 
10 | Base Classes
11 | -----------------
12 | 
13 | .. currentmodule:: lightautoml.pipelines.ml.base
14 | 
15 | .. autosummary::
16 |     :toctree: ./generated
17 |     :nosignatures:
18 |     :template: classtemplate.rst
19 | 
20 |     MLPipeline
21 | 
22 | 
23 | Pipeline for Nested Cross-Validation
24 | ------------------------------------
25 | 
26 | .. currentmodule:: lightautoml.pipelines.ml.nested_ml_pipe
27 | 
28 | .. autosummary::
29 |     :toctree: ./generated
30 |     :nosignatures:
31 |     :template: classtemplate.rst
32 | 
33 |     NestedTabularMLAlgo
34 |     NestedTabularMLPipeline
35 | 
36 | Pipeline for WhiteBox
37 | ---------------------
38 | 
39 | .. currentmodule:: lightautoml.pipelines.ml.whitebox_ml_pipe
40 | 
41 | .. autosummary::
42 |     :toctree: ./generated
43 |     :nosignatures:
44 |     :template: classtemplate.rst
45 | 
46 |     WBPipeline
47 | 


--------------------------------------------------------------------------------
/docs/pages/modules/pipelines.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | 
 5 | lightautoml.pipelines
 6 | =====================
 7 | 
 8 | Pipelines for solving different tasks.
 9 | 
10 | Utils
11 | -------
12 | 
13 | .. currentmodule:: lightautoml.pipelines.utils
14 | 
15 | .. autosummary::
16 |     :toctree: ./generated
17 |     :nosignatures:
18 |     :template: functiontemplate.rst
19 | 
20 |     map_pipeline_names
21 |     get_columns_by_role
22 | 


--------------------------------------------------------------------------------
/docs/pages/modules/pipelines.selection.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | 
 5 | lightautoml.pipelines.selection
 6 | ===============================
 7 | 
 8 | Feature selection module for ML pipelines.
 9 | 
10 | Base Classes
11 | -----------------
12 | 
13 | .. currentmodule:: lightautoml.pipelines.selection.base
14 | 
15 | .. autosummary::
16 |     :toctree: ./generated
17 |     :nosignatures:
18 |     :template: classtemplate.rst
19 | 
20 |     ImportanceEstimator
21 |     SelectionPipeline
22 | 
23 | Importance Based Selectors
24 | --------------------------
25 | 
26 | .. currentmodule:: lightautoml.pipelines.selection
27 | 
28 | .. autosummary::
29 |     :toctree: ./generated
30 |     :nosignatures:
31 |     :template: classtemplate.rst
32 | 
33 |     ~importance_based.ModelBasedImportanceEstimator
34 |     ~importance_based.ImportanceCutoffSelector
35 |     ~permutation_importance_based.NpPermutationImportanceEstimator
36 |     ~permutation_importance_based.NpIterativeFeatureSelector
37 | 
38 | Other Selectors
39 | ----------------------
40 | 
41 | .. currentmodule:: lightautoml.pipelines.selection
42 | 
43 | .. autosummary::
44 |     :toctree: ./generated
45 |     :nosignatures:
46 |     :template: classtemplate.rst
47 | 
48 |     ~linear_selector.HighCorrRemoval
49 | 


--------------------------------------------------------------------------------
/docs/pages/modules/reader.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | 
 5 | lightautoml.reader
 6 | =====================
 7 | 
 8 | Utils for reading, training and analysing data.
 9 | 
10 | Readers
11 | -------------
12 | 
13 | .. currentmodule:: lightautoml.reader.base
14 | 
15 | .. autosummary::
16 |     :toctree: ./generated
17 |     :nosignatures:
18 |     :template: classtemplate.rst
19 | 
20 |     Reader
21 |     PandasToPandasReader
22 | 
23 | 
24 | Tabular Batch Generators
25 | -----------------------------
26 | 
27 | Batch Handler Classes
28 | ^^^^^^^^^^^^^^^^^^^^^
29 | 
30 | .. currentmodule:: lightautoml.reader.tabular_batch_generator
31 | 
32 | .. autosummary::
33 |     :toctree: ./generated
34 |     :nosignatures:
35 |     :template: classtemplate.rst
36 | 
37 |     Batch
38 |     FileBatch
39 |     BatchGenerator
40 |     DfBatchGenerator
41 |     FileBatchGenerator
42 | 
43 | Data Read Functions
44 | ^^^^^^^^^^^^^^^^^^^
45 | 
46 | .. currentmodule:: lightautoml.reader.tabular_batch_generator
47 | 
48 | .. autosummary::
49 |     :toctree: ./generated
50 |     :nosignatures:
51 |     :template: functiontemplate.rst
52 | 
53 |     read_batch
54 |     read_data
55 | 


--------------------------------------------------------------------------------
/docs/pages/modules/report.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | 
 5 | lightautoml.report
 6 | ==================
 7 | 
 8 | Report generators and templates.
 9 | 
10 | .. currentmodule:: lightautoml.report.report_deco
11 | 
12 | .. autosummary::
13 |     :toctree: ./generated
14 |     :nosignatures:
15 |     :template: classtemplate.rst
16 | 
17 |     ReportDeco
18 |     ReportDecoWhitebox
19 | 


--------------------------------------------------------------------------------
/docs/pages/modules/tasks.losses.rst:
--------------------------------------------------------------------------------
  1 | .. role:: hidden
  2 |     :class: hidden-section
  3 | 
  4 | 
  5 | lightautoml.tasks.losses
  6 | ==============================
  7 | 
  8 | Wrappers of loss and metric functions for different machine learning algorithms.
  9 | 
 10 | Base Classes
 11 | ------------
 12 | 
 13 | .. currentmodule:: lightautoml.tasks.losses.base
 14 | 
 15 | .. autosummary::
 16 |     :toctree: ./generated
 17 |     :nosignatures:
 18 |     :template: classtemplate.rst
 19 | 
 20 |     MetricFunc
 21 |     Loss
 22 | 
 23 | 
 24 | Wrappers for LightGBM
 25 | ---------------------
 26 | 
 27 | Classes
 28 | ^^^^^^^
 29 | 
 30 | .. currentmodule:: lightautoml.tasks.losses
 31 | 
 32 | .. autosummary::
 33 |     :toctree: ./generated
 34 |     :nosignatures:
 35 |     :template: classtemplate.rst
 36 | 
 37 |     ~lgb.LGBFunc
 38 |     ~lgb.LGBLoss
 39 | 
 40 | Functions
 41 | ^^^^^^^^^
 42 | 
 43 | .. currentmodule:: lightautoml.tasks.losses
 44 | 
 45 | .. autosummary::
 46 |     :toctree: ./generated
 47 |     :nosignatures:
 48 |     :template: functiontemplate.rst
 49 | 
 50 |     ~lgb_custom.softmax_ax1
 51 |     ~lgb_custom.lgb_f1_loss_multiclass
 52 | 
 53 | 
 54 | 
 55 | Wrappers for CatBoost
 56 | ---------------------
 57 | 
 58 | Classes
 59 | ^^^^^^^
 60 | 
 61 | .. currentmodule:: lightautoml.tasks.losses
 62 | 
 63 | .. autosummary::
 64 |     :toctree: ./generated
 65 |     :nosignatures:
 66 |     :template: classtemplate.rst
 67 | 
 68 |     ~cb.CBLoss
 69 |     ~cb_custom.CBCustomMetric
 70 |     ~cb_custom.CBRegressionMetric
 71 |     ~cb_custom.CBClassificationMetric
 72 |     ~cb_custom.CBMulticlassMetric
 73 | 
 74 | 
 75 | Functions
 76 | ^^^^^^^^^
 77 | 
 78 | .. currentmodule:: lightautoml.tasks.losses
 79 | 
 80 | .. autosummary::
 81 |     :toctree: ./generated
 82 |     :nosignatures:
 83 |     :template: functiontemplate.rst
 84 | 
 85 |     ~cb.cb_str_loss_wrapper
 86 | 
 87 | 
 88 | Wrappers for Sklearn
 89 | ---------------------
 90 | 
 91 | Classes
 92 | ^^^^^^^
 93 | 
 94 | .. currentmodule:: lightautoml.tasks.losses
 95 | 
 96 | .. autosummary::
 97 |     :toctree: ./generated
 98 |     :nosignatures:
 99 |     :template: classtemplate.rst
100 | 
101 |     ~sklearn.SKLoss
102 | 
103 | 
104 | Wrappers for Torch
105 | ---------------------
106 | 
107 | Classes
108 | ^^^^^^^
109 | 
110 | .. currentmodule:: lightautoml.tasks.losses
111 | 
112 | .. autosummary::
113 |     :toctree: ./generated
114 |     :nosignatures:
115 |     :template: classtemplate.rst
116 | 
117 |     ~torch.TorchLossWrapper
118 |     ~torch.TORCHLoss
119 | 
120 | 
121 | Functions
122 | ^^^^^^^^^
123 | 
124 | .. currentmodule:: lightautoml.tasks.losses
125 | 
126 | .. autosummary::
127 |     :toctree: ./generated
128 |     :nosignatures:
129 |     :template: functiontemplate.rst
130 | 
131 |     ~torch.torch_rmsle
132 |     ~torch.torch_quantile
133 |     ~torch.torch_fair
134 |     ~torch.torch_huber
135 |     ~torch.torch_f1
136 |     ~torch.torch_mape
137 | 


--------------------------------------------------------------------------------
/docs/pages/modules/tasks.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | 
 5 | lightautoml.tasks
 6 | ==============================
 7 | 
 8 | 
 9 | Task Class
10 | ----------
11 | 
12 | .. currentmodule:: lightautoml.tasks.base
13 | 
14 | .. autosummary::
15 |     :toctree: ./generated
16 |     :nosignatures:
17 |     :template: classtemplate.rst
18 | 
19 |     Task
20 | 
21 | 
22 | 
23 | 
24 | Common Metrics
25 | -----------------------
26 | 
27 | Classes
28 | ^^^^^^^^^^^
29 | 
30 | .. currentmodule:: lightautoml.tasks.common_metric
31 | 
32 | .. autosummary::
33 |     :toctree: ./generated
34 |     :nosignatures:
35 |     :template: classtemplate.rst
36 | 
37 |     F1Factory
38 |     BestClassBinaryWrapper
39 |     BestClassMulticlassWrapper
40 | 
41 | 
42 | Functions
43 | ^^^^^^^^^^
44 | 
45 | .. currentmodule:: lightautoml.tasks.common_metric
46 | 
47 | .. autosummary::
48 |     :toctree: ./generated
49 |     :nosignatures:
50 |     :template: functiontemplate.rst
51 | 
52 |     mean_quantile_error
53 |     mean_huber_error
54 |     mean_fair_error
55 |     mean_absolute_percentage_error
56 |     roc_auc_ovr
57 |     rmsle
58 |     auc_mu
59 | 


--------------------------------------------------------------------------------
/docs/pages/modules/text.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | 
 5 | lightautoml.text
 6 | ==============================
 7 | 
 8 | Provides an internal interface for working with text features.
 9 | 
10 | Sentence Embedders
11 | ------------------------------
12 | 
13 | .. currentmodule:: lightautoml.text
14 | 
15 | .. autosummary::
16 |     :toctree: ./generated
17 |     :nosignatures:
18 |     :template: classtemplate.rst
19 | 
20 |     ~dl_transformers.DLTransformer
21 |     ~dl_transformers.BOREP
22 |     ~dl_transformers.RandomLSTM
23 |     ~dl_transformers.BertEmbedder
24 |     ~weighted_average_transformer.WeightedAverageTransformer
25 | 
26 | 
27 | Torch Datasets for Text
28 | ------------------------------
29 | 
30 | .. currentmodule:: lightautoml.text
31 | 
32 | .. autosummary::
33 |     :toctree: ./generated
34 |     :nosignatures:
35 |     :template: classtemplate.rst
36 | 
37 |     ~embed_dataset.BertDataset
38 |     ~embed_dataset.EmbedDataset
39 | 
40 | 
41 | Tokenizers
42 | ------------------------------
43 | 
44 | .. currentmodule:: lightautoml.text
45 | 
46 | .. autosummary::
47 |     :toctree: ./generated
48 |     :nosignatures:
49 |     :template: classtemplate.rst
50 | 
51 |     ~tokenizer.BaseTokenizer
52 |     ~tokenizer.SimpleRuTokenizer
53 |     ~tokenizer.SimpleEnTokenizer
54 | 
55 | 
56 | Utils
57 | ------------------------------
58 | 
59 | .. currentmodule:: lightautoml.text
60 | 
61 | .. autosummary::
62 |     :toctree: ./generated
63 |     :nosignatures:
64 |     :template: functiontemplate.rst
65 | 
66 |     ~utils.seed_everything
67 |     ~utils.parse_devices
68 |     ~utils.custom_collate
69 |     ~utils.single_text_hash
70 |     ~utils.get_textarr_hash
71 | 


--------------------------------------------------------------------------------
/docs/pages/modules/transformers.rst:
--------------------------------------------------------------------------------
  1 | .. role:: hidden
  2 |     :class: hidden-section
  3 | 
  4 | 
  5 | lightautoml.transformers
  6 | ==============================
  7 | 
  8 | Basic feature generation steps and helper utils.
  9 | 
 10 | Base Classes
 11 | ------------------------------
 12 | 
 13 | .. currentmodule:: lightautoml.transformers.base
 14 | 
 15 | .. autosummary::
 16 |     :toctree: ./generated
 17 |     :nosignatures:
 18 |     :template: classtemplate.rst
 19 | 
 20 |     LAMLTransformer
 21 |     SequentialTransformer
 22 |     UnionTransformer
 23 |     ColumnsSelector
 24 |     ColumnwiseUnion
 25 |     BestOfTransformers
 26 |     ConvertDataset
 27 |     ChangeRoles
 28 | 
 29 | 
 30 | Numeric
 31 | ------------------------------
 32 | 
 33 | .. currentmodule:: lightautoml.transformers.numeric
 34 | 
 35 | .. autosummary::
 36 |     :toctree: ./generated
 37 |     :nosignatures:
 38 |     :template: classtemplate.rst
 39 | 
 40 |     NaNFlags
 41 |     FillnaMedian
 42 |     FillnaMean
 43 |     FillInf
 44 |     LogOdds
 45 |     StandardScaler
 46 |     QuantileBinning
 47 |     QuantileTransformer
 48 | 
 49 | 
 50 | Categorical
 51 | ------------------------------
 52 | 
 53 | .. currentmodule:: lightautoml.transformers.categorical
 54 | 
 55 | .. autosummary::
 56 |     :toctree: ./generated
 57 |     :nosignatures:
 58 |     :template: classtemplate.rst
 59 | 
 60 |     LabelEncoder
 61 |     OHEEncoder
 62 |     FreqEncoder
 63 |     OrdinalEncoder
 64 |     TargetEncoder
 65 |     MultiClassTargetEncoder
 66 |     CatIntersectstions
 67 | 
 68 | 
 69 | Datetime
 70 | ------------------------------
 71 | 
 72 | .. currentmodule:: lightautoml.transformers.datetime
 73 | 
 74 | .. autosummary::
 75 |     :toctree: ./generated
 76 |     :nosignatures:
 77 |     :template: classtemplate.rst
 78 | 
 79 |     TimeToNum
 80 |     BaseDiff
 81 |     DateSeasons
 82 | 
 83 | 
 84 | Decompositions
 85 | ------------------------------
 86 | 
 87 | .. currentmodule:: lightautoml.transformers.decomposition
 88 | 
 89 | .. autosummary::
 90 |     :toctree: ./generated
 91 |     :nosignatures:
 92 |     :template: classtemplate.rst
 93 | 
 94 |     PCATransformer
 95 |     SVDTransformer
 96 | 
 97 | 
 98 | Text
 99 | ------------------------------
100 | 
101 | .. currentmodule:: lightautoml.transformers.text
102 | 
103 | .. autosummary::
104 |     :toctree: ./generated
105 |     :nosignatures:
106 |     :template: classtemplate.rst
107 | 
108 |     TunableTransformer
109 |     TfidfTextTransformer
110 |     TokenizerTransformer
111 |     OneToOneTransformer
112 |     ConcatTextTransformer
113 |     AutoNLPWrap
114 | 
115 | 
116 | Image
117 | ------------------------------
118 | 
119 | .. currentmodule:: lightautoml.transformers.image
120 | 
121 | .. autosummary::
122 |     :toctree: ./generated
123 |     :nosignatures:
124 |     :template: classtemplate.rst
125 | 
126 |     ImageFeaturesTransformer
127 |     AutoCVWrap
128 | 


--------------------------------------------------------------------------------
/docs/pages/modules/utils.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | 
 5 | lightautoml.utils
 6 | ==============================
 7 | 
 8 | Common util tools.
 9 | 
10 | Timer
11 | ------------------------------
12 | 
13 | 
14 | .. currentmodule:: lightautoml.utils.timer
15 | 
16 | .. autosummary::
17 |     :toctree: ./generated
18 |     :nosignatures:
19 |     :template: classtemplate.rst
20 | 
21 |     Timer
22 |     PipelineTimer
23 |     TaskTimer
24 | 


--------------------------------------------------------------------------------
/docs/pages/modules/validation.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | 
 5 | lightautoml.validation
 6 | ==============================
 7 | 
 8 | The module provide classes and functions for model validation.
 9 | 
10 | Iterators
11 | ------------------------------
12 | 
13 | .. currentmodule:: lightautoml.validation
14 | 
15 | .. autosummary::
16 |     :toctree: ./generated
17 |     :nosignatures:
18 |     :template: classtemplate.rst
19 | 
20 |     ~base.TrainValidIterator
21 |     ~base.DummyIterator
22 |     ~base.HoldoutIterator
23 |     ~base.CustomIterator
24 |     ~np_iterators.FoldsIterator
25 |     ~np_iterators.TimeSeriesIterator
26 | 
27 | 
28 | Iterators Getters and Utils
29 | ------------------------------
30 | 
31 | 
32 | .. currentmodule:: lightautoml.validation
33 | 
34 | .. autosummary::
35 |     :toctree: ./generated
36 |     :nosignatures:
37 |     :template: functiontemplate.rst
38 | 
39 |     ~utils.create_validation_iterator
40 |     ~np_iterators.get_numpy_iterator
41 | 


--------------------------------------------------------------------------------
/docs/pages/tutorials/Tutorial_10_relational_data_with_star_scheme.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../examples/tutorials/Tutorial_10_relational_data_with_star_scheme.ipynb",
3 |     "extra-media": [
4 |         "../../../imgs"
5 |     ]
6 | }
7 | 


--------------------------------------------------------------------------------
/docs/pages/tutorials/Tutorial_11_time_series.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../examples/tutorials/Tutorial_11_time_series.ipynb",
3 |     "extra-media": [
4 |         "../../../imgs"
5 |     ]
6 | }
7 | 


--------------------------------------------------------------------------------
/docs/pages/tutorials/Tutorial_12_AA_Test.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../examples/tutorials/Tutorial_12_AA_Test.ipynb",
3 |     "extra-media": [
4 |         "../../../imgs"
5 |     ]
6 | }
7 | 


--------------------------------------------------------------------------------
/docs/pages/tutorials/Tutorial_13_AA_Test_multigroup_split.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../examples/tutorials/Tutorial_13_AA_Test_multigroup_split.ipynb",
3 |     "extra-media": [
4 |         "../../../imgs"
5 |     ]
6 | }
7 | 


--------------------------------------------------------------------------------
/docs/pages/tutorials/Tutorial_14_AB_Test.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../examples/tutorials/Tutorial_14_AB_Test.ipynb",
3 |     "extra-media": [
4 |         "../../../imgs"
5 |     ]
6 | }
7 | 


--------------------------------------------------------------------------------
/docs/pages/tutorials/Tutorial_15_Matching.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../examples/tutorials/Tutorial_15_Matching.ipynb",
3 |     "extra-media": [
4 |         "../../../imgs"
5 |     ]
6 | }
7 | 


--------------------------------------------------------------------------------
/docs/pages/tutorials/Tutorial_16_Matching_without_replacement.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../examples/tutorials/Tutorial_16_Matching_without_replacement.ipynb",
3 |     "extra-media": [
4 |         "../../../imgs"
5 |     ]
6 | }
7 | 


--------------------------------------------------------------------------------
/docs/pages/tutorials/Tutorial_17_Modeling_Limit_Distribution.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../examples/tutorials/Tutorial_17_Modeling_Limit_Distribution.ipynb",
3 |     "extra-media": [
4 |         "../../../imgs"
5 |     ]
6 | }
7 | 


--------------------------------------------------------------------------------
/docs/pages/tutorials/Tutorial_18_Test_Limit_Distribution.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../examples/tutorials/Tutorial_18_Test_Limit_Distribution.ipynb",
3 |     "extra-media": [
4 |         "../../../imgs"
5 |     ]
6 | }
7 | 


--------------------------------------------------------------------------------
/docs/pages/tutorials/Tutorial_1_basics.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../examples/tutorials/Tutorial_1_basics.ipynb",
3 |     "extra-media": [
4 |         "../../../imgs"
5 |     ]
6 | }
7 | 


--------------------------------------------------------------------------------
/docs/pages/tutorials/Tutorial_2_WhiteBox_AutoWoE.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../examples/tutorials/Tutorial_2_WhiteBox_AutoWoE.ipynb",
3 |     "extra-media": [
4 |         "../../../imgs"
5 |     ]
6 | }
7 | 


--------------------------------------------------------------------------------
/docs/pages/tutorials/Tutorial_3_sql_data_source.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../examples/tutorials/Tutorial_3_sql_data_source.ipynb",
3 |     "extra-media": [
4 |         "../../../imgs"
5 |     ]
6 | }
7 | 


--------------------------------------------------------------------------------
/docs/pages/tutorials/Tutorial_4_NLP_Interpretation.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../examples/tutorials/Tutorial_4_NLP_Interpretation.ipynb",
3 |     "extra-media": [
4 |         "../../../imgs"
5 |     ]
6 | }
7 | 


--------------------------------------------------------------------------------
/docs/pages/tutorials/Tutorial_5_uplift.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../examples/tutorials/Tutorial_5_uplift.ipynb",
3 |     "extra-media": [
4 |         "../../../imgs"
5 |     ]
6 | }
7 | 


--------------------------------------------------------------------------------
/docs/pages/tutorials/Tutorial_6_custom_pipeline.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../examples/tutorials/Tutorial_6_custom_pipeline.ipynb",
3 |     "extra-media": [
4 |         "../../../imgs"
5 |     ]
6 | }
7 | 


--------------------------------------------------------------------------------
/docs/pages/tutorials/Tutorial_7_ICE_and_PDP_interpretation.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../examples/tutorials/Tutorial_7_ICE_and_PDP_interpretation.ipynb",
3 |     "extra-media": [
4 |         "../../../imgs"
5 |     ]
6 | }
7 | 


--------------------------------------------------------------------------------
/docs/pages/tutorials/Tutorial_8_CV_preset.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../examples/tutorials/Tutorial_8_CV_preset.ipynb",
3 |     "extra-media": [
4 |         "../../../imgs"
5 |     ]
6 | }
7 | 


--------------------------------------------------------------------------------
/docs/pages/tutorials/Tutorial_9_neural_networks.nblink:
--------------------------------------------------------------------------------
1 | {
2 |     "path": "../../../examples/tutorials/Tutorial_9_neural_networks.ipynb",
3 |     "extra-media": [
4 |         "../../../imgs"
5 |     ]
6 | }
7 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | ipykernel
2 | nbsphinx
3 | nbsphinx-link
4 | sphinx-autodoc-typehints
5 | pandoc
6 | jupyter
7 | prompt-toolkit<3.0.0,!=3.0.1,>=2.0.0
8 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | # Examples
 2 | ## Competitions
 3 | | Place         | Competition   | Description | Solution  |
 4 | | ------ |:------------- | --------- | --------- |
 5 | | 1st | [2024 AutoML Grand Prix](https://www.kaggle.com/automl-grand-prix) | Team "LightAutoML testers" | [1 stage](https://www.kaggle.com/competitions/playground-series-s4e5/discussion/500700), [3 stage](https://www.kaggle.com/competitions/playground-series-s4e7/discussion/516860), [4 stage](https://www.kaggle.com/competitions/playground-series-s4e8/discussion/523732), [5 stage](https://www.kaggle.com/competitions/playground-series-s4e9/discussion/531884) |
 6 | 
 7 | 
 8 | ## Code snippets
 9 | 1. `demo0.py` - building ML pipeline from blocks and fit + predict the pipeline itself.
10 | 2. `demo1.py` - several ML pipelines creation (using importances based cutoff feature selector) to build 2 level stacking using AutoML class
11 | 3. `demo2.py` - several ML pipelines creation (using iteartive feature selection algorithm) to build 2 level stacking using AutoML class
12 | 4. `demo3.py` - several ML pipelines creation (using combination of cutoff and iterative FS algos) to build 2 level stacking using AutoML class
13 | 5. `demo4.py` - creation of classification and regression tasks for AutoML with loss and evaluation metric setup
14 | 6. `demo5.py` - 2 level stacking using AutoML class with different algos on first level including LGBM, Linear and LinearL1
15 | 7. `demo6.py` - AutoML with nested CV usage
16 | 8. `demo7.py` - AutoML preset usage for tabular datasets (predefined structure of AutoML pipeline and simple interface for users without building from blocks)
17 | 9. `demo8.py` - creation pipelines from blocks to build AutoML, solving multiclass classification task
18 | 10. `demo9.py` - AutoML time utilization preset usage for tabular datasets (predefined structure of AutoML pipeline and simple interface for users without building from blocks)
19 | 11. `demo10.py` - creation pipelines from blocks (including CatBoost) to build AutoML, solving multiclass classification task
20 | 12. `demo11.py` - AutoML NLP preset usage for tabular datasets with text columns
21 | 13. `demo12.py` - AutoML tabular preset usage with custom validation scheme and multiprocessed inference
22 | 14. `demo13.py` - AutoML TS preset usage with lag and diff transformers' parameters selection
23 | 15. `demo14.py` - Groupby features (using TabularAutoML preset and custom pipeline)
24 | 


--------------------------------------------------------------------------------
/examples/data/meal_delivery_company/fulfilment_center_info.csv:
--------------------------------------------------------------------------------
 1 | center_id,city_code,region_code,center_type,op_area
 2 | 11,679,56,TYPE_A,3.7
 3 | 13,590,56,TYPE_B,6.7
 4 | 124,590,56,TYPE_C,4
 5 | 66,648,34,TYPE_A,4.1
 6 | 94,632,34,TYPE_C,3.6
 7 | 64,553,77,TYPE_A,4.4
 8 | 129,593,77,TYPE_A,3.9
 9 | 139,693,34,TYPE_C,2.8
10 | 88,526,34,TYPE_A,4.1
11 | 143,562,77,TYPE_B,3.8
12 | 101,699,85,TYPE_C,2.8
13 | 86,699,85,TYPE_C,4
14 | 32,526,34,TYPE_A,3.8
15 | 149,478,77,TYPE_A,2.4
16 | 152,576,34,TYPE_B,4
17 | 92,526,34,TYPE_C,2.9
18 | 27,713,85,TYPE_A,4.5
19 | 14,654,56,TYPE_C,2.7
20 | 26,515,77,TYPE_C,3
21 | 104,647,56,TYPE_A,4.5
22 | 77,676,34,TYPE_A,3.8
23 | 23,698,23,TYPE_A,3.4
24 | 97,628,77,TYPE_A,4.6
25 | 146,526,34,TYPE_B,5
26 | 113,680,77,TYPE_C,4
27 | 145,620,77,TYPE_A,3.9
28 | 80,604,56,TYPE_C,5.1
29 | 55,647,56,TYPE_C,2
30 | 186,649,34,TYPE_A,3.4
31 | 99,596,71,TYPE_A,4.5
32 | 91,590,56,TYPE_C,0.9
33 | 20,522,56,TYPE_A,4
34 | 106,675,34,TYPE_A,4
35 | 81,526,34,TYPE_A,4
36 | 73,576,34,TYPE_A,4
37 | 29,526,34,TYPE_C,4
38 | 43,590,56,TYPE_A,5.1
39 | 102,593,77,TYPE_A,2.8
40 | 61,473,77,TYPE_A,4.5
41 | 50,556,77,TYPE_A,4.8
42 | 83,659,77,TYPE_A,5.3
43 | 57,541,77,TYPE_C,2.8
44 | 126,577,56,TYPE_A,2.7
45 | 177,683,56,TYPE_A,3.4
46 | 67,638,56,TYPE_B,7
47 | 174,700,56,TYPE_A,7
48 | 59,456,56,TYPE_A,4.2
49 | 58,695,77,TYPE_C,3.8
50 | 65,602,34,TYPE_A,4.8
51 | 39,526,34,TYPE_C,3.8
52 | 132,522,56,TYPE_A,3.9
53 | 89,703,56,TYPE_A,4.8
54 | 162,526,34,TYPE_C,2
55 | 75,651,77,TYPE_B,4.7
56 | 72,638,56,TYPE_C,3.9
57 | 41,590,56,TYPE_C,1.9
58 | 10,590,56,TYPE_B,6.3
59 | 110,485,77,TYPE_A,3.8
60 | 52,685,56,TYPE_B,5.6
61 | 93,461,34,TYPE_A,3.9
62 | 74,702,35,TYPE_A,2.8
63 | 34,615,34,TYPE_B,4.2
64 | 137,590,56,TYPE_A,4.4
65 | 153,590,56,TYPE_A,3.9
66 | 24,614,85,TYPE_B,3.6
67 | 109,599,56,TYPE_A,3.6
68 | 108,579,56,TYPE_B,4.4
69 | 36,517,56,TYPE_B,4.4
70 | 157,609,93,TYPE_A,4.1
71 | 17,517,56,TYPE_A,3.2
72 | 161,658,34,TYPE_B,3.9
73 | 42,561,77,TYPE_B,3.9
74 | 53,590,56,TYPE_A,3.8
75 | 30,604,56,TYPE_A,3.5
76 | 76,614,85,TYPE_A,3
77 | 68,676,34,TYPE_B,4.1
78 | 51,638,56,TYPE_A,7
79 | 


--------------------------------------------------------------------------------
/examples/data/meal_delivery_company/meal_info.csv:
--------------------------------------------------------------------------------
 1 | meal_id,category,cuisine
 2 | 1885,Beverages,Thai
 3 | 1993,Beverages,Thai
 4 | 2539,Beverages,Thai
 5 | 1248,Beverages,Indian
 6 | 2631,Beverages,Indian
 7 | 1311,Extras,Thai
 8 | 1062,Beverages,Italian
 9 | 1778,Beverages,Italian
10 | 1803,Extras,Thai
11 | 1198,Extras,Thai
12 | 2707,Beverages,Italian
13 | 1847,Soup,Thai
14 | 1438,Soup,Thai
15 | 2494,Soup,Thai
16 | 2760,Other Snacks,Thai
17 | 2490,Salad,Italian
18 | 1109,Rice Bowl,Indian
19 | 2290,Rice Bowl,Indian
20 | 1525,Other Snacks,Thai
21 | 2704,Other Snacks,Thai
22 | 1878,Starters,Thai
23 | 2640,Starters,Thai
24 | 2577,Starters,Thai
25 | 1754,Sandwich,Italian
26 | 1971,Sandwich,Italian
27 | 2306,Pasta,Italian
28 | 2139,Beverages,Indian
29 | 2826,Sandwich,Italian
30 | 2664,Salad,Italian
31 | 2569,Salad,Italian
32 | 1230,Beverages,Continental
33 | 1207,Beverages,Continental
34 | 2322,Beverages,Continental
35 | 2492,Desert,Indian
36 | 1216,Pasta,Italian
37 | 1727,Rice Bowl,Indian
38 | 1902,Biryani,Indian
39 | 1247,Biryani,Indian
40 | 2304,Desert,Indian
41 | 1543,Desert,Indian
42 | 1770,Biryani,Indian
43 | 2126,Pasta,Italian
44 | 1558,Pizza,Continental
45 | 2581,Pizza,Continental
46 | 1962,Pizza,Continental
47 | 1571,Fish,Continental
48 | 2956,Fish,Continental
49 | 2104,Fish,Continental
50 | 2444,Seafood,Continental
51 | 2867,Seafood,Continental
52 | 1445,Seafood,Continental
53 | 


--------------------------------------------------------------------------------
/examples/data/meal_delivery_company/relational_main.csv.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/examples/data/meal_delivery_company/relational_main.csv.zip


--------------------------------------------------------------------------------
/examples/demo11.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import shutil
 5 | 
 6 | import numpy as np
 7 | import pandas as pd
 8 | 
 9 | from sklearn.metrics import mean_squared_error
10 | from sklearn.model_selection import train_test_split
11 | 
12 | from lightautoml.automl.presets.text_presets import TabularNLPAutoML
13 | from lightautoml.tasks import Task
14 | 
15 | 
16 | np.random.seed(42)
17 | 
18 | data = pd.read_csv("./data/avito1k_train.csv")
19 | 
20 | train, test = train_test_split(data, test_size=500, random_state=42)
21 | 
22 | roles = {
23 |     "target": "deal_probability",
24 |     "group": ["user_id"],
25 |     "text": ["description", "title", "param_1", "param_2", "param_3"],
26 | }
27 | 
28 | task = Task("reg")
29 | 
30 | automl = TabularNLPAutoML(task=task, timeout=600)
31 | oof_pred = automl.fit_predict(train, roles=roles)
32 | test_pred = automl.predict(test)
33 | not_nan = np.any(~np.isnan(oof_pred.data), axis=1)
34 | 
35 | print("Check scores...")
36 | print("OOF score: {}".format(mean_squared_error(train[roles["target"]].values[not_nan], oof_pred.data[not_nan][:, 0])))
37 | print("TEST score: {}".format(mean_squared_error(test[roles["target"]].values, test_pred.data[:, 0])))
38 | 
39 | shutil.rmtree("./models", ignore_errors=True)
40 | 


--------------------------------------------------------------------------------
/examples/demo12.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import numpy as np
 5 | import pandas as pd
 6 | 
 7 | from sklearn.metrics import roc_auc_score
 8 | from sklearn.model_selection import train_test_split
 9 | 
10 | from lightautoml.automl.presets.tabular_presets import TabularAutoML
11 | from lightautoml.tasks import Task
12 | from lightautoml.validation.np_iterators import TimeSeriesIterator
13 | 
14 | 
15 | ################################
16 | # Features:
17 | # - working with np.arrays
18 | # - working with file
19 | # - custom time series split
20 | # - parallel/batch inference
21 | ################################
22 | 
23 | 
24 | np.random.seed(42)
25 | 
26 | data = pd.read_csv("./data/sampled_app_train.csv")
27 | 
28 | data["BIRTH_DATE"] = (np.datetime64("2018-01-01") + data["DAYS_BIRTH"].astype(np.dtype("timedelta64[D]"))).astype(str)
29 | data["EMP_DATE"] = (
30 |     np.datetime64("2018-01-01") + np.clip(data["DAYS_EMPLOYED"], None, 0).astype(np.dtype("timedelta64[D]"))
31 | ).astype(str)
32 | 
33 | data["report_dt"] = np.datetime64("2018-01-01")
34 | 
35 | data["constant"] = 1
36 | data["allnan"] = np.nan
37 | 
38 | data.drop(["DAYS_BIRTH", "DAYS_EMPLOYED"], axis=1, inplace=True)
39 | 
40 | train, test = train_test_split(data, test_size=2000, random_state=42)
41 | # create time series iterator that is passed as cv_func
42 | cv_iter = TimeSeriesIterator(train["EMP_DATE"].astype(np.datetime64), n_splits=5, sorted_kfold=False)
43 | 
44 | # train dataset may be passed as dict of np.ndarray
45 | train = {
46 |     "data": train[["AMT_CREDIT", "AMT_ANNUITY"]].values,
47 |     "target": train["TARGET"].values,
48 | }
49 | 
50 | task = Task(
51 |     "binary",
52 | )
53 | 
54 | automl = TabularAutoML(
55 |     task=task,
56 |     timeout=200,
57 | )
58 | oof_pred = automl.fit_predict(train, train_features=["AMT_CREDIT", "AMT_ANNUITY"], cv_iter=cv_iter)
59 | # prediction can be made on file by
60 | test.to_csv("temp_test_data.csv", index=False)
61 | test_pred = automl.predict("temp_test_data.csv", batch_size=100, n_jobs=4)
62 | 
63 | print("Check scores...")
64 | oof_prediction = oof_pred.data[:, 0]
65 | not_empty = np.logical_not(np.isnan(oof_prediction))
66 | 
67 | print(f'OOF score: {roc_auc_score(train["target"][not_empty], oof_prediction[not_empty])}')
68 | print(f'TEST score: {roc_auc_score(test["TARGET"].values, test_pred.data[:, 0])}')
69 | 


--------------------------------------------------------------------------------
/examples/demo13.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | from sklearn.metrics import mean_absolute_error
 5 | 
 6 | from lightautoml.addons.autots.base import AutoTS
 7 | from lightautoml.tasks import Task
 8 | 
 9 | 
10 | np.random.seed(42)
11 | 
12 | data = pd.read_csv("data/ai92_value_77.csv")
13 | horizon = 30
14 | 
15 | train = data[:-horizon]
16 | test = data[-horizon:]
17 | 
18 | roles = {"target": "value", "datetime": "date"}
19 | 
20 | seq_params = {
21 |     "seq0": {
22 |         "case": "next_values",
23 |         "params": {"n_target": horizon, "history": np.maximum(7, horizon), "step": 1, "test_last": True},
24 |     },
25 | }
26 | 
27 | # True (then set default values) / False; int, list or np.array
28 | # default: lag_features=30, diff_features=7
29 | transformers_params = {
30 |     "lag_features": [0, 1, 2, 3, 5, 10],
31 |     "lag_time_features": [0, 1, 2],
32 |     "diff_features": [0, 1, 3, 4],
33 | }
34 | 
35 | task = Task("multi:reg", greater_is_better=False, metric="mae", loss="mae")
36 | 
37 | automl = AutoTS(
38 |     task,
39 |     seq_params=seq_params,
40 |     trend_params={
41 |         "trend": False,
42 |     },
43 |     transformers_params=transformers_params,
44 | )
45 | train_pred, _ = automl.fit_predict(train, roles, verbose=4)
46 | forecast, _ = automl.predict(train)
47 | 
48 | print("Check scores...")
49 | print("TEST score: {}".format(mean_absolute_error(test[roles["target"]].values, forecast.data)))
50 | 


--------------------------------------------------------------------------------
/examples/demo14.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | from lightautoml.addons.hypex import Matcher
 4 | 
 5 | 
 6 | df = pd.read_csv("data/sampled_matching.csv").drop(["Unnamed: 0"], axis=1)
 7 | 
 8 | print(df.shape)
 9 | print(df.columns)
10 | 
11 | target = "created_variable"
12 | treatment = "is_tb_pilot"
13 | 
14 | 
15 | matcher = Matcher(df, target, treatment, is_feature_select=False, quality_check=True)
16 | 
17 | matcher.estimate()
18 | 
19 | print(matcher.matcher.ATE)
20 | print(matcher.matcher.quality_dict)
21 | 


--------------------------------------------------------------------------------
/examples/demo6.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | 
 5 | """AutoML with nested CV usage."""
 6 | 
 7 | import numpy as np
 8 | import pandas as pd
 9 | 
10 | from sklearn.metrics import roc_auc_score
11 | from sklearn.model_selection import train_test_split
12 | 
13 | from lightautoml.automl.presets.tabular_presets import TabularAutoML
14 | from lightautoml.dataset.roles import DatetimeRole
15 | from lightautoml.tasks import Task
16 | 
17 | 
18 | np.random.seed(42)
19 | 
20 | data = pd.read_csv("./data/sampled_app_train.csv")
21 | 
22 | data["BIRTH_DATE"] = (np.datetime64("2018-01-01") + data["DAYS_BIRTH"].astype(np.dtype("timedelta64[D]"))).astype(str)
23 | data["EMP_DATE"] = (
24 |     np.datetime64("2018-01-01") + np.clip(data["DAYS_EMPLOYED"], None, 0).astype(np.dtype("timedelta64[D]"))
25 | ).astype(str)
26 | 
27 | data["report_dt"] = np.datetime64("2018-01-01")
28 | 
29 | data["constant"] = 1
30 | data["allnan"] = np.nan
31 | 
32 | data.drop(["DAYS_BIRTH", "DAYS_EMPLOYED"], axis=1, inplace=True)
33 | 
34 | train, test = train_test_split(data, test_size=2000, random_state=42)
35 | 
36 | roles = {
37 |     "target": "TARGET",
38 |     DatetimeRole(base_date=True, seasonality=(), base_feats=False): "report_dt",
39 | }
40 | 
41 | task = Task(
42 |     "binary",
43 | )
44 | 
45 | automl = TabularAutoML(
46 |     task=task,
47 |     timeout=600,
48 |     general_params={
49 |         "use_algos": [
50 |             [
51 |                 "linear_l2",
52 |                 "lgb",
53 |             ],
54 |             ["linear_l2", "lgb"],
55 |         ],
56 |         "nested_cv": True,
57 |         "skip_conn": True,
58 |     },
59 |     nested_cv_params={"cv": 5, "n_folds": None},
60 | )
61 | 
62 | oof_pred = automl.fit_predict(train, roles=roles)
63 | test_pred = automl.predict(test)
64 | 
65 | not_nan = np.any(~np.isnan(oof_pred.data), axis=1)
66 | 
67 | print(f"OOF score: {roc_auc_score(train[roles['target']].values[not_nan], oof_pred.data[not_nan][:, 0])}")
68 | print(f"TEST score: {roc_auc_score(test[roles['target']].values, test_pred.data[:, 0])}")
69 | 


--------------------------------------------------------------------------------
/examples/demo7.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import numpy as np
 5 | import pandas as pd
 6 | 
 7 | from sklearn.metrics import roc_auc_score
 8 | from sklearn.model_selection import train_test_split
 9 | 
10 | from lightautoml.automl.presets.tabular_presets import TabularAutoML
11 | from lightautoml.dataset.roles import DatetimeRole
12 | from lightautoml.tasks import Task
13 | 
14 | 
15 | np.random.seed(42)
16 | 
17 | data = pd.read_csv("./data/sampled_app_train.csv")
18 | 
19 | data["BIRTH_DATE"] = (np.datetime64("2018-01-01") + data["DAYS_BIRTH"].astype(np.dtype("timedelta64[D]"))).astype(str)
20 | data["EMP_DATE"] = (
21 |     np.datetime64("2018-01-01") + np.clip(data["DAYS_EMPLOYED"], None, 0).astype(np.dtype("timedelta64[D]"))
22 | ).astype(str)
23 | 
24 | data["report_dt"] = np.datetime64("2018-01-01")
25 | 
26 | data["constant"] = 1
27 | data["allnan"] = np.nan
28 | 
29 | data.drop(["DAYS_BIRTH", "DAYS_EMPLOYED"], axis=1, inplace=True)
30 | 
31 | train, test = train_test_split(data, test_size=2000, random_state=42)
32 | 
33 | roles = {
34 |     "target": "TARGET",
35 |     DatetimeRole(base_date=True, seasonality=(), base_feats=False): "report_dt",
36 | }
37 | 
38 | task = Task(
39 |     "binary",
40 | )
41 | 
42 | automl = TabularAutoML(
43 |     task=task,
44 |     timeout=3600,
45 | )
46 | oof_pred = automl.fit_predict(train, roles=roles)
47 | test_pred = automl.predict(test)
48 | 
49 | not_nan = np.any(~np.isnan(oof_pred.data), axis=1)
50 | 
51 | print("Check scores...")
52 | print("OOF score: {}".format(roc_auc_score(train[roles["target"]].values[not_nan], oof_pred.data[not_nan][:, 0])))
53 | print("TEST score: {}".format(roc_auc_score(test[roles["target"]].values, test_pred.data[:, 0])))
54 | 


--------------------------------------------------------------------------------
/examples/demo9.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | """AutoML time utilization preset usage for tabular datasets.
 5 | 
 6 | Predefined structure of AutoML pipeline and simple interface for users without building from blocks.
 7 | 
 8 | """
 9 | 
10 | import numpy as np
11 | import pandas as pd
12 | 
13 | from sklearn.metrics import roc_auc_score
14 | from sklearn.model_selection import train_test_split
15 | 
16 | from lightautoml.automl.presets.tabular_presets import TabularUtilizedAutoML
17 | from lightautoml.dataset.roles import DatetimeRole
18 | from lightautoml.tasks import Task
19 | 
20 | 
21 | np.random.seed(42)
22 | 
23 | data = pd.read_csv("./data/sampled_app_train.csv")
24 | 
25 | data["BIRTH_DATE"] = (np.datetime64("2018-01-01") + data["DAYS_BIRTH"].astype(np.dtype("timedelta64[D]"))).astype(str)
26 | data["EMP_DATE"] = (
27 |     np.datetime64("2018-01-01") + np.clip(data["DAYS_EMPLOYED"], None, 0).astype(np.dtype("timedelta64[D]"))
28 | ).astype(str)
29 | 
30 | data["report_dt"] = np.datetime64("2018-01-01")
31 | 
32 | data["constant"] = 1
33 | data["allnan"] = np.nan
34 | 
35 | data.drop(["DAYS_BIRTH", "DAYS_EMPLOYED"], axis=1, inplace=True)
36 | 
37 | train, test = train_test_split(data, test_size=2000, random_state=42)
38 | 
39 | roles = {
40 |     "target": "TARGET",
41 |     DatetimeRole(base_date=True, seasonality=(), base_feats=False): "report_dt",
42 | }
43 | 
44 | task = Task("binary")
45 | 
46 | automl = TabularUtilizedAutoML(
47 |     task=task,
48 |     timeout=600,
49 | )
50 | oof_pred = automl.fit_predict(train, roles=roles)
51 | test_pred = automl.predict(test)
52 | 
53 | # use only not nan
54 | not_nan = np.any(~np.isnan(oof_pred.data), axis=1)
55 | 
56 | print(f"OOF score: {roc_auc_score(train['TARGET'].values[not_nan], oof_pred.data[not_nan])}")
57 | print(f"TEST score: {roc_auc_score(test[roles['target']].values, test_pred.data[:, 0])}")
58 | 


--------------------------------------------------------------------------------
/examples/optimization/conditional_parameters.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | 
 3 | """Simple example for conditional parameters with OptunaTuner."""
 4 | 
 5 | import copy
 6 | 
 7 | import optuna
 8 | import pandas as pd
 9 | 
10 | from sklearn.metrics import roc_auc_score
11 | from sklearn.model_selection import train_test_split
12 | 
13 | from lightautoml.automl.presets.tabular_presets import TabularAutoML
14 | from lightautoml.tasks import Task
15 | 
16 | 
17 | # load and prepare data
18 | data = pd.read_csv("./data/sampled_app_train.csv")
19 | train_data, test_data = train_test_split(data, test_size=0.2, stratify=data["TARGET"], random_state=42)
20 | 
21 | 
22 | def sample(estimated_n_trials: int, trial: optuna.trial.Trial, suggested_params: dict):
23 |     trial_values = copy.copy(suggested_params)
24 |     trial_values["feature_fraction"] = trial.suggest_float("feature_fraction", low=0.5, high=1.0)
25 | 
26 |     if trial_values["feature_fraction"] > 0.7:
27 |         trial_values["min_sum_hessian_in_leaf"] = trial.suggest_float(
28 |             "min_sum_hessian_in_leaf", low=0.5, high=1, log=True
29 |         )
30 |     else:
31 |         trial_values["min_sum_hessian_in_leaf"] = trial.suggest_float(
32 |             "min_sum_hessian_in_leaf", low=0, high=0.5, log=True
33 |         )
34 | 
35 |     return trial_values
36 | 
37 | 
38 | # run automl with custom search spaces
39 | automl = TabularAutoML(
40 |     task=Task("binary"),
41 |     lgb_params={"optimization_search_space": sample},
42 | )
43 | oof_predictions = automl.fit_predict(train_data, roles={"target": "TARGET", "drop": ["SK_ID_CURR"]})
44 | te_pred = automl.predict(test_data)
45 | 
46 | # calculate scores
47 | print(f"Score for out-of-fold predictions: {roc_auc_score(train_data['TARGET'].values, oof_predictions.data[:, 0])}")
48 | print(f"Score for hold-out: {roc_auc_score(test_data['TARGET'].values, te_pred.data[:, 0])}")
49 | 


--------------------------------------------------------------------------------
/examples/optimization/custom_search_space.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | 
 3 | """Simple example for binary classification on tabular data."""
 4 | 
 5 | import pandas as pd
 6 | 
 7 | from sklearn.metrics import roc_auc_score
 8 | from sklearn.model_selection import train_test_split
 9 | 
10 | from lightautoml.automl.presets.tabular_presets import TabularAutoML
11 | from lightautoml.ml_algo.tuning.base import Distribution
12 | from lightautoml.ml_algo.tuning.base import SearchSpace
13 | from lightautoml.tasks import Task
14 | 
15 | 
16 | # load and prepare data
17 | data = pd.read_csv("./data/sampled_app_train.csv")
18 | train_data, test_data = train_test_split(data, test_size=0.2, stratify=data["TARGET"], random_state=42)
19 | 
20 | # run automl with custom search spaces
21 | automl = TabularAutoML(
22 |     task=Task("binary"),
23 |     lgb_params={
24 |         "optimization_search_space": {
25 |             "feature_fraction": SearchSpace(Distribution.UNIFORM, low=0.5, high=1.0),
26 |             "min_sum_hessian_in_leaf": SearchSpace(Distribution.LOGUNIFORM, low=1e-3, high=10.0),
27 |         }
28 |     },
29 | )
30 | oof_predictions = automl.fit_predict(train_data, roles={"target": "TARGET", "drop": ["SK_ID_CURR"]})
31 | te_pred = automl.predict(test_data)
32 | 
33 | # calculate scores
34 | print(f"Score for out-of-fold predictions: {roc_auc_score(train_data['TARGET'].values, oof_predictions.data[:, 0])}")
35 | print(f"Score for hold-out: {roc_auc_score(test_data['TARGET'].values, te_pred.data[:, 0])}")
36 | 


--------------------------------------------------------------------------------
/examples/optimization/sequential_parameter_search.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | 
 3 | """Simple example for sequential parameter search with OptunaTuner."""
 4 | 
 5 | import copy
 6 | 
 7 | import optuna
 8 | import pandas as pd
 9 | 
10 | from sklearn.metrics import roc_auc_score
11 | from sklearn.model_selection import train_test_split
12 | 
13 | from lightautoml.automl.presets.tabular_presets import TabularAutoML
14 | from lightautoml.tasks import Task
15 | 
16 | 
17 | # load and prepare data
18 | data = pd.read_csv("./data/sampled_app_train.csv")
19 | train_data, test_data = train_test_split(data, test_size=0.2, stratify=data["TARGET"], random_state=42)
20 | 
21 | 
22 | def sample(estimated_n_trials: int, trial: optuna.trial.Trial, suggested_params: dict):
23 |     trial_values = copy.copy(suggested_params)
24 | 
25 |     for feature_fraction in range(10):
26 |         feature_fraction = feature_fraction / 10
27 |         trial_values["feature_fraction"] = feature_fraction
28 |         trial_values["min_sum_hessian_in_leaf"] = trial.suggest_float("min_sum_hessian_in_leaf", low=0.5, high=1)
29 |         yield trial_values
30 | 
31 | 
32 | # run automl with custom search spaces
33 | automl = TabularAutoML(
34 |     task=Task("binary"),
35 |     lgb_params={"optimization_search_space": sample},
36 | )
37 | oof_predictions = automl.fit_predict(train_data, roles={"target": "TARGET", "drop": ["SK_ID_CURR"]})
38 | te_pred = automl.predict(test_data)
39 | 
40 | # calculate scores
41 | print(f"Score for out-of-fold predictions: {roc_auc_score(train_data['TARGET'].values, oof_predictions.data[:, 0])}")
42 | print(f"Score for hold-out: {roc_auc_score(test_data['TARGET'].values, te_pred.data[:, 0])}")
43 | 


--------------------------------------------------------------------------------
/examples/simple_tabular_classification.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | 
 3 | """Simple example for binary classification on tabular data."""
 4 | 
 5 | import pandas as pd
 6 | 
 7 | from sklearn.metrics import roc_auc_score
 8 | from sklearn.model_selection import train_test_split
 9 | 
10 | from lightautoml.automl.presets.tabular_presets import TabularAutoML
11 | from lightautoml.tasks import Task
12 | 
13 | 
14 | # load and prepare data
15 | data = pd.read_csv("./data/sampled_app_train.csv")
16 | train_data, test_data = train_test_split(data, test_size=0.2, stratify=data["TARGET"], random_state=42)
17 | 
18 | # run automl
19 | automl = TabularAutoML(task=Task("binary"))
20 | oof_predictions = automl.fit_predict(train_data, roles={"target": "TARGET", "drop": ["SK_ID_CURR"]})
21 | te_pred = automl.predict(test_data)
22 | 
23 | # calculate scores
24 | print(f"Score for out-of-fold predictions: {roc_auc_score(train_data['TARGET'].values, oof_predictions.data[:, 0])}")
25 | print(f"Score for hold-out: {roc_auc_score(test_data['TARGET'].values, te_pred.data[:, 0])}")
26 | 


--------------------------------------------------------------------------------
/imgs/GENERALL2X2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/GENERALL2X2.jpg


--------------------------------------------------------------------------------
/imgs/LightAutoML_logo_big.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/LightAutoML_logo_big.png


--------------------------------------------------------------------------------
/imgs/LightAutoML_logo_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/LightAutoML_logo_small.png


--------------------------------------------------------------------------------
/imgs/Star_scheme_tables.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/Star_scheme_tables.png


--------------------------------------------------------------------------------
/imgs/TabularAutoML_model_descr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/TabularAutoML_model_descr.png


--------------------------------------------------------------------------------
/imgs/TabularUtilizedAutoML_model_descr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/TabularUtilizedAutoML_model_descr.png


--------------------------------------------------------------------------------
/imgs/autoint.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/autoint.png


--------------------------------------------------------------------------------
/imgs/denselight.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/denselight.png


--------------------------------------------------------------------------------
/imgs/densenet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/densenet.png


--------------------------------------------------------------------------------
/imgs/fttransformer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/fttransformer.png


--------------------------------------------------------------------------------
/imgs/lightautoml_icon_color.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/lightautoml_icon_color.png


--------------------------------------------------------------------------------
/imgs/lightautoml_logo_color.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/lightautoml_logo_color.png


--------------------------------------------------------------------------------
/imgs/lime.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/lime.jpg


--------------------------------------------------------------------------------
/imgs/node.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/node.png


--------------------------------------------------------------------------------
/imgs/resnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/resnet.png


--------------------------------------------------------------------------------
/imgs/swa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/swa.png


--------------------------------------------------------------------------------
/imgs/tutorial_11_case_problem_statement.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/tutorial_11_case_problem_statement.png


--------------------------------------------------------------------------------
/imgs/tutorial_11_general_problem_statement.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/tutorial_11_general_problem_statement.png


--------------------------------------------------------------------------------
/imgs/tutorial_11_history_step_params.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/tutorial_11_history_step_params.png


--------------------------------------------------------------------------------
/imgs/tutorial_11_transformers_params.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/tutorial_11_transformers_params.png


--------------------------------------------------------------------------------
/imgs/tutorial_1_initial_report.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/tutorial_1_initial_report.png


--------------------------------------------------------------------------------
/imgs/tutorial_1_laml_big.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/tutorial_1_laml_big.png


--------------------------------------------------------------------------------
/imgs/tutorial_1_ml_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/tutorial_1_ml_pipeline.png


--------------------------------------------------------------------------------
/imgs/tutorial_1_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/tutorial_1_pipeline.png


--------------------------------------------------------------------------------
/imgs/tutorial_1_unfolded_report.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/tutorial_1_unfolded_report.png


--------------------------------------------------------------------------------
/imgs/tutorial_2_initial_report.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/tutorial_2_initial_report.png


--------------------------------------------------------------------------------
/imgs/tutorial_2_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/tutorial_2_pipeline.png


--------------------------------------------------------------------------------
/imgs/tutorial_2_unfolded_report.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/tutorial_2_unfolded_report.png


--------------------------------------------------------------------------------
/imgs/tutorial_3_initial_report.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/tutorial_3_initial_report.png


--------------------------------------------------------------------------------
/imgs/tutorial_3_unfolded_report.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/tutorial_3_unfolded_report.png


--------------------------------------------------------------------------------
/imgs/tutorial_blackbox_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/tutorial_blackbox_pipeline.png


--------------------------------------------------------------------------------
/imgs/tutorial_whitebox_report_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/tutorial_whitebox_report_1.png


--------------------------------------------------------------------------------
/imgs/tutorial_whitebox_report_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/tutorial_whitebox_report_2.png


--------------------------------------------------------------------------------
/imgs/tutorial_whitebox_report_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/tutorial_whitebox_report_3.png


--------------------------------------------------------------------------------
/imgs/tutorial_whitebox_report_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/imgs/tutorial_whitebox_report_4.png


--------------------------------------------------------------------------------
/lightautoml/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import sys
 4 | 
 5 | 
 6 | _root_logger = logging.getLogger()
 7 | _logger = logging.getLogger(__name__)
 8 | _logger.setLevel(logging.WARNING)
 9 | 
10 | # if root logger has handlers, propagate messages up and let root logger process them
11 | if not _root_logger.hasHandlers():
12 |     _logger.addHandler(logging.StreamHandler(sys.stdout))
13 |     _logger.propagate = False
14 | 
15 | __all__ = [
16 |     "automl",
17 |     "dataset",
18 |     "ml_algo",
19 |     "pipelines",
20 |     "image",
21 |     "reader",
22 |     "transformers",
23 |     "validation",
24 |     "text",
25 |     "tasks",
26 |     "utils",
27 |     "addons",
28 |     "report",
29 | ]
30 | 
31 | if os.getenv("DOCUMENTATION_ENV") is None:
32 |     try:
33 |         import importlib.metadata as importlib_metadata
34 |     except ModuleNotFoundError:
35 |         import importlib_metadata
36 | 
37 |     __version__ = importlib_metadata.version(__name__)
38 | 


--------------------------------------------------------------------------------
/lightautoml/addons/__init__.py:
--------------------------------------------------------------------------------
1 | """Extensions of core functionality."""
2 | 
3 | __all__ = ["utilization", "uplift", "interpretation", "autots", "hypex"]
4 | 


--------------------------------------------------------------------------------
/lightautoml/addons/hypex/__init__.py:
--------------------------------------------------------------------------------
 1 | """HypEx Addon for LightAutoML.
 2 | 
 3 | This module forwards all imports from the official HypEx package,
 4 | maintaining the same API structure as in the original library.
 5 | 
 6 | Requirements:
 7 |     - Install LightAutoML with HypEx support:
 8 |       `pip install lightautoml[hypex]`
 9 | 
10 | Examples:
11 |     Importing models and utilities as in HypEx:
12 | 
13 |     >>> from lightautoml.addons.hypex import AATest
14 |     >>> from lightautoml.addons.hypex.utils.tutorial_data_creation import create_test_data
15 | 
16 |     Creating test data:
17 |     >>> some_large_dataframe = create_test_data(
18 |     ...     rs=52, na_step=10, nan_cols=['age', 'gender'], num_users=100_000
19 |     ... )
20 | 
21 | Raises:
22 |     ImportError: If HypEx is not installed.
23 | """
24 | 
25 | import importlib
26 | import sys
27 | 
28 | MODULE_NAME = "hypex"
29 | 
30 | try:
31 |     hypex = importlib.import_module(MODULE_NAME)
32 | except ImportError:
33 |     raise ImportError(
34 |         f"{MODULE_NAME} is not installed. Please install it using " f"'pip install lightautoml[{MODULE_NAME}]'."
35 |     )
36 | 
37 | sys.modules["lightautoml.addons.hypex"] = hypex
38 | 


--------------------------------------------------------------------------------
/lightautoml/addons/interpretation/__init__.py:
--------------------------------------------------------------------------------
1 | from .l2x import L2XTextExplainer
2 | from .lime import LimeTextExplainer
3 | 
4 | 
5 | __all__ = ["LimeTextExplainer", "L2XTextExplainer", "SSWARM"]
6 | 


--------------------------------------------------------------------------------
/lightautoml/addons/tabular_interpretation/__init__.py:
--------------------------------------------------------------------------------
1 | from .sswarm import SSWARM
2 | 
3 | 
4 | __all__ = ["SSWARM"]
5 | 


--------------------------------------------------------------------------------
/lightautoml/addons/uplift/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/lightautoml/addons/uplift/__init__.py


--------------------------------------------------------------------------------
/lightautoml/addons/utilization/__init__.py:
--------------------------------------------------------------------------------
1 | """Tools to configure resources utilization."""
2 | from .utilization import TimeUtilization
3 | 
4 | 
5 | __all__ = ["TimeUtilization"]
6 | 


--------------------------------------------------------------------------------
/lightautoml/automl/__init__.py:
--------------------------------------------------------------------------------
1 | """The main module, which includes the AutoML class, blenders and ready-made presets."""
2 | 
3 | __all__ = ["base", "presets", "blend"]
4 | 


--------------------------------------------------------------------------------
/lightautoml/automl/presets/__init__.py:
--------------------------------------------------------------------------------
 1 | """Presets for end-to-end model training for special tasks."""
 2 | 
 3 | __all__ = [
 4 |     "base",
 5 |     "tabular_presets",
 6 |     "image_presets",
 7 |     "text_presets",
 8 |     "whitebox_presets",
 9 | ]
10 | 


--------------------------------------------------------------------------------
/lightautoml/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | """Provides an internal interface for working with data."""
2 | 
3 | __all__ = ["base", "roles", "np_pd_dataset", "utils"]
4 | 


--------------------------------------------------------------------------------
/lightautoml/image/__init__.py:
--------------------------------------------------------------------------------
1 | """Provides an internal interface for working with image features."""
2 | 
3 | 
4 | __all__ = ["image"]
5 | 


--------------------------------------------------------------------------------
/lightautoml/image/utils.py:
--------------------------------------------------------------------------------
 1 | """Image utils."""
 2 | 
 3 | from PIL import Image
 4 | 
 5 | 
 6 | def pil_loader(path: str) -> Image:
 7 |     """Load image from paths.
 8 | 
 9 |     Args:
10 |         path: Image path.
11 | 
12 |     Returns:
13 |         Loaded PIL Image in rgb.
14 | 
15 |     """
16 |     with open(path, "rb") as f:
17 |         img = Image.open(f)
18 |         return img.convert("RGB")
19 | 


--------------------------------------------------------------------------------
/lightautoml/ml_algo/__init__.py:
--------------------------------------------------------------------------------
 1 | """Modules with machine learning algorithms and hyperparameters tuning tools."""
 2 | 
 3 | __all__ = [
 4 |     "tuning",
 5 |     "base",
 6 |     "boost_lgbm",
 7 |     "boost_cb",
 8 |     "linear_sklearn",
 9 |     "dl_model",
10 |     "torch_based",
11 |     "whitebox",
12 |     "utils",
13 | ]
14 | 


--------------------------------------------------------------------------------
/lightautoml/ml_algo/torch_based/__init__.py:
--------------------------------------------------------------------------------
1 | """Models based on Torch library."""
2 | 
3 | __all__ = ["linear_model", "nn_models"]
4 | 


--------------------------------------------------------------------------------
/lightautoml/ml_algo/torch_based/autoint/ghost_norm.py:
--------------------------------------------------------------------------------
 1 | """Module for Ghost Batch Norm and variations.
 2 | 
 3 | Ghost Batch Norm: https://arxiv.org/pdf/1705.08741.pdf
 4 | 
 5 | """
 6 | 
 7 | from math import ceil
 8 | from typing import Union
 9 | 
10 | import torch
11 | from torch import Tensor
12 | from torch import nn
13 | 
14 | 
15 | class GhostNorm(nn.Module):
16 |     """Ghost Normalization.
17 | 
18 |     https://arxiv.org/pdf/1705.08741.pdf
19 | 
20 |     Args:
21 |         inner_norm : torch.nn.Module (initialiezd)
22 |             examples: `nn.BatchNorm1d`, `nn.LayerNorm`
23 |         virtual_batch_size : int
24 |         device : string or torch.device, optional
25 |             default is "cpu"
26 |     """
27 | 
28 |     def __init__(
29 |         self,
30 |         inner_norm: nn.Module,
31 |         virtual_batch_size: int,
32 |         device: Union[str, torch.device] = "cpu",
33 |     ):
34 |         super().__init__()
35 |         self.virtual_batch_size = virtual_batch_size
36 |         self.inner_norm = inner_norm
37 |         self.to(device)
38 | 
39 |     def forward(self, x: Tensor) -> Tensor:
40 |         """Transform the input tensor.
41 | 
42 |         Args:
43 |             x : torch.Tensor
44 | 
45 |         Returns:
46 |             torch.Tensor
47 | 
48 |         """
49 |         chunk_size = int(ceil(x.shape[0] / self.virtual_batch_size))
50 |         chunk_norm = [self.inner_norm(chunk) for chunk in x.chunk(chunk_size, dim=0)]
51 |         return torch.cat(chunk_norm, dim=0)
52 | 
53 | 
54 | class GhostBatchNorm(GhostNorm):
55 |     """Ghost Normalization, using BatchNorm1d as inner normalization.
56 | 
57 |     https://arxiv.org/pdf/1705.08741.pdf
58 | 
59 |     Args:
60 |         num_features : int
61 |         virtual_batch_size : int, optional
62 |             default is 64
63 |         momentum : float, optional
64 |             default is 0.1
65 |         device : string or torch.device, optional
66 |             default is "cpu"
67 |     """
68 | 
69 |     def __init__(
70 |         self,
71 |         num_features: int,
72 |         virtual_batch_size: int = 64,
73 |         momentum: float = 0.1,
74 |         device: Union[str, torch.device] = "cpu",
75 |     ):
76 |         super().__init__(
77 |             inner_norm=nn.BatchNorm1d(num_features, momentum=momentum),
78 |             virtual_batch_size=virtual_batch_size,
79 |         )
80 | 


--------------------------------------------------------------------------------
/lightautoml/ml_algo/tuning/__init__.py:
--------------------------------------------------------------------------------
1 | """Bunch of classes for hyperparameters tuning."""
2 | 
3 | __all__ = ["base", "optuna"]
4 | 


--------------------------------------------------------------------------------
/lightautoml/ml_algo/tuning/base.py:
--------------------------------------------------------------------------------
  1 | """Base classes to implement hyperparameter tuning."""
  2 | 
  3 | from abc import ABC
  4 | from abc import abstractmethod
  5 | from typing import Dict
  6 | from typing import Optional
  7 | from typing import Tuple
  8 | from typing import overload
  9 | 
 10 | from ...dataset.base import LAMLDataset
 11 | 
 12 | # if TYPE_CHECKING:
 13 | from ...ml_algo.base import MLAlgo
 14 | from ...validation.base import TrainValidIterator
 15 | 
 16 | 
 17 | class DistributionBase(ABC):
 18 |     """_summary_.
 19 | 
 20 |     Args:
 21 |         ABC (_type_): _description_
 22 |     """
 23 | 
 24 |     pass
 25 | 
 26 | 
 27 | class Choice(DistributionBase):
 28 |     """_summary_.
 29 | 
 30 |     Args:
 31 |         DistributionBase (_type_): _description_
 32 |     """
 33 | 
 34 |     def __init__(self, options) -> None:
 35 |         self.options = options
 36 | 
 37 | 
 38 | class Uniform(DistributionBase):
 39 |     """_summary_.
 40 | 
 41 |     Args:
 42 |         DistributionBase (_type_): _description_
 43 |     """
 44 | 
 45 |     def __init__(self, low, high, q=None, log=False) -> None:
 46 |         self.low = low
 47 |         self.high = high
 48 |         self.q = q
 49 |         self.log = log
 50 | 
 51 | 
 52 | class Normal(DistributionBase):
 53 |     """_summary_.
 54 | 
 55 |     Args:
 56 |         DistributionBase (_type_): _description_
 57 |     """
 58 | 
 59 |     def __init__(self, low, high, q=None, log=False) -> None:
 60 |         self.low = low
 61 |         self.high = high
 62 |         self.q = q
 63 |         self.log = log
 64 | 
 65 | 
 66 | class ParamsTuner(ABC):
 67 |     """Base abstract class for hyperparameters tuners."""
 68 | 
 69 |     _name: str = "AbstractTuner"
 70 |     _best_params: Dict = None
 71 |     _fit_on_holdout: bool = False  # if tuner should be fitted on holdout set
 72 | 
 73 |     @property
 74 |     def best_params(self) -> dict:
 75 |         """Get best params.
 76 | 
 77 |         Returns:
 78 |             Dict with best fitted params.
 79 | 
 80 |         """
 81 |         assert hasattr(self, "_best_params"), "ParamsTuner should be fitted first"
 82 |         return self._best_params
 83 | 
 84 |     @overload
 85 |     def fit(
 86 |         self,
 87 |         ml_algo: "MLAlgo",
 88 |         train_valid_iterator: Optional[TrainValidIterator] = None,
 89 |     ) -> Tuple["MLAlgo", LAMLDataset]:
 90 |         ...
 91 | 
 92 |     @abstractmethod
 93 |     def fit(
 94 |         self,
 95 |         ml_algo: "MLAlgo",
 96 |         train_valid_iterator: Optional[TrainValidIterator] = None,
 97 |     ) -> Tuple[None, None]:
 98 |         """Tune model hyperparameters.
 99 | 
100 |         Args:
101 |             ml_algo: ML algorithm.
102 |             train_valid_iterator: Classic cv-iterator.
103 | 
104 |         Returns:
105 |             (None, None) if ml_algo is fitted or models are not fitted during training,
106 |             (BestMLAlgo, BestPredictionsLAMLDataset) otherwise.
107 | 
108 |         """
109 | 
110 | 
111 | class DefaultTuner(ParamsTuner):
112 |     """Default realization of ParamsTuner - just take algo's defaults."""
113 | 
114 |     _name: str = "DefaultTuner"
115 | 
116 |     def fit(
117 |         self,
118 |         ml_algo: "MLAlgo",
119 |         train_valid_iterator: Optional[TrainValidIterator] = None,
120 |     ) -> Tuple[None, None]:
121 |         """Default fit method - just save defaults.
122 | 
123 |         Args:
124 |             ml_algo: Algorithm that is tuned.
125 |             train_valid_iterator: Empty.
126 | 
127 |         Returns:
128 |             Tuple (None, None).
129 |         """
130 |         self._best_params = ml_algo.init_params_on_input(train_valid_iterator=train_valid_iterator)
131 |         return None, None
132 | 


--------------------------------------------------------------------------------
/lightautoml/ml_algo/tuning/hyperopt.py:
--------------------------------------------------------------------------------
1 | """Classes to implement hyperparameter tuning using HyperOpt."""
2 | 


--------------------------------------------------------------------------------
/lightautoml/pipelines/__init__.py:
--------------------------------------------------------------------------------
1 | """Pipelines for solvinng different tasks."""
2 | 
3 | __all__ = ["ml", "features", "selection", "utils"]
4 | 


--------------------------------------------------------------------------------
/lightautoml/pipelines/features/__init__.py:
--------------------------------------------------------------------------------
 1 | """Pipelines for features generation."""
 2 | 
 3 | __all__ = [
 4 |     "base",
 5 |     "lgb_pipeline",
 6 |     "image_pipeline",
 7 |     "linear_pipeline",
 8 |     "text_pipeline",
 9 |     "wb_pipeline",
10 |     "torch_pipeline",
11 | ]
12 | 


--------------------------------------------------------------------------------
/lightautoml/pipelines/features/wb_pipeline.py:
--------------------------------------------------------------------------------
 1 | """Whitebox features."""
 2 | 
 3 | import numpy as np
 4 | 
 5 | from ...dataset.np_pd_dataset import PandasDataset
 6 | from ...dataset.roles import NumericRole
 7 | from ...transformers.base import ColumnsSelector
 8 | from ...transformers.base import LAMLTransformer
 9 | from ...transformers.base import UnionTransformer
10 | from ..utils import get_columns_by_role
11 | from .base import FeaturesPipeline
12 | from .base import TabularDataFeatures
13 | 
14 | 
15 | class WBFeatures(FeaturesPipeline, TabularDataFeatures):
16 |     """Simple WhiteBox pipeline.
17 | 
18 |     Just handles dates, other are handled inside WhiteBox.
19 | 
20 |     """
21 | 
22 |     def create_pipeline(self, train: PandasDataset) -> LAMLTransformer:
23 |         """Create pipeline for WhiteBox.
24 | 
25 |         Args:
26 |             train: Dataset with train features.
27 | 
28 |         Returns:
29 |             Transformer.
30 | 
31 |         """
32 |         others = get_columns_by_role(train, "Category") + get_columns_by_role(train, "Numeric")
33 | 
34 |         transformer_list = [
35 |             self.get_datetime_diffs(train),
36 |             self.get_datetime_seasons(train, NumericRole(np.float32)),
37 |             ColumnsSelector(others),
38 |         ]
39 | 
40 |         # final pipeline
41 |         union_all = UnionTransformer([x for x in transformer_list if x is not None])
42 | 
43 |         return union_all
44 | 


--------------------------------------------------------------------------------
/lightautoml/pipelines/ml/__init__.py:
--------------------------------------------------------------------------------
1 | """Pipelines that merge together single model training steps."""
2 | 
3 | __all__ = ["base"]
4 | 


--------------------------------------------------------------------------------
/lightautoml/pipelines/selection/__init__.py:
--------------------------------------------------------------------------------
1 | """Feature selection module for ML pipelines."""
2 | 
3 | __all__ = [
4 |     "base",
5 |     "importance_based",
6 |     "permutation_importance_based",
7 |     "linear_selector",
8 | ]
9 | 


--------------------------------------------------------------------------------
/lightautoml/pipelines/selection/importance_based.py:
--------------------------------------------------------------------------------
 1 | """Importance based selectors."""
 2 | 
 3 | from typing import Optional
 4 | from typing import TypeVar
 5 | 
 6 | from lightautoml.validation.base import TrainValidIterator
 7 | 
 8 | from ...dataset.base import LAMLDataset
 9 | from ...ml_algo.base import MLAlgo
10 | from ..features.base import FeaturesPipeline
11 | from .base import ImportanceEstimator
12 | from .base import SelectionPipeline
13 | 
14 | 
15 | ImportanceEstimatedAlgo = TypeVar("ImportanceEstimatedAlgo", bound=ImportanceEstimator)
16 | 
17 | 
18 | class ModelBasedImportanceEstimator(ImportanceEstimator):
19 |     """Base class for performing feature selection using model feature importances."""
20 | 
21 |     def fit(
22 |         self,
23 |         train_valid: Optional[TrainValidIterator] = None,
24 |         ml_algo: Optional[ImportanceEstimatedAlgo] = None,
25 |         preds: Optional[LAMLDataset] = None,
26 |     ):
27 |         """Find the importances of features.
28 | 
29 |         Args:
30 |             train_valid: dataset iterator.
31 |             ml_algo: ML algorithm used for importance estimation.
32 |             preds: predicted target values.
33 | 
34 |         """
35 |         assert (
36 |             ml_algo is not None
37 |         ), "ModelBasedImportanceEstimator: raw importances are None and no MLAlgo to calculate them."
38 |         self.raw_importances = ml_algo.get_features_score()
39 | 
40 | 
41 | class ImportanceCutoffSelector(SelectionPipeline):
42 |     """Selector based on importance threshold.
43 | 
44 |     It is important that data which passed to ``.fit``
45 |     should be ok to fit `ml_algo` or preprocessing pipeline should be defined.
46 | 
47 |     Args:
48 |         feature_pipeline: Composition of feature transforms.
49 |         ml_algo: Tuple (MlAlgo, ParamsTuner).
50 |         imp_estimator: Feature importance estimator.
51 |         fit_on_holdout: If use the holdout iterator.
52 |         cutoff: Threshold to cut-off features.
53 | 
54 |     """
55 | 
56 |     def __init__(
57 |         self,
58 |         feature_pipeline: Optional[FeaturesPipeline],
59 |         ml_algo: MLAlgo,
60 |         imp_estimator: ImportanceEstimator,
61 |         fit_on_holdout: bool = True,
62 |         cutoff: float = 0.0,
63 |     ):
64 |         super().__init__(feature_pipeline, ml_algo, imp_estimator, fit_on_holdout)
65 |         self.cutoff = cutoff
66 | 
67 |     def perform_selection(self, train_valid: Optional[TrainValidIterator] = None):
68 |         """Select features based on cutoff value.
69 | 
70 |         Args:
71 |             train_valid: Not used.
72 | 
73 |         """
74 |         imp = self.imp_estimator.get_features_score()
75 |         self.map_raw_feature_importances(imp)
76 |         selected = self.mapped_importances.index.values[self.mapped_importances.values > self.cutoff]
77 |         if len(selected) == 0:
78 |             selected = self.mapped_importances.index.values[:1]
79 |         self._selected_features = list(selected)
80 | 


--------------------------------------------------------------------------------
/lightautoml/pipelines/selection/linear_selector.py:
--------------------------------------------------------------------------------
 1 | """Selectors for linear models."""
 2 | 
 3 | from typing import Optional
 4 | from typing import Union
 5 | 
 6 | import numpy as np
 7 | 
 8 | from scipy.sparse import linalg as sp_linalg
 9 | 
10 | from ...validation.base import TrainValidIterator
11 | from .base import SelectionPipeline
12 | 
13 | 
14 | class HighCorrRemoval(SelectionPipeline):
15 |     """Selector to remove highly correlated features.
16 | 
17 |     Del totally correlated feats to speedup L1 regression models.
18 |     For sparse data cosine will be used.
19 |     It's not exact, but ok for remove very high correlations.
20 | 
21 |     Args:
22 |         corr_co: Similarity threshold.
23 |         subsample: Number (int) of samples, or frac (float) from full dataset.
24 |         random_state: Random seed for subsample.
25 |         **kwargs: Additional parameters. Used for initialization of parent class.
26 | 
27 |     """
28 | 
29 |     def __init__(self, corr_co: float = 0.98, subsample: Union[int, float] = 100000, random_state: int = 42, **kwargs):
30 |         super().__init__(**kwargs)
31 |         self.corr_co = corr_co
32 |         self.subsample = subsample
33 |         self.random_state = random_state
34 | 
35 |     def perform_selection(self, train_valid: Optional[TrainValidIterator]):
36 |         """Select features to save in dataset during selection.
37 | 
38 |         Method is used to perform selection based on features correlation.
39 |         Should save ``_selected_features`` attribute in the end of working.
40 | 
41 |         Args:
42 |             train_valid: Classic cv-iterator.
43 | 
44 |         """
45 |         train = train_valid.train.data
46 |         target = train_valid.train.target
47 | 
48 |         if train.shape[1] == 1:
49 |             self._selected_features = train_valid.features
50 |             return
51 | 
52 |         if self.subsample != 1 and self.subsample < train.shape[0]:
53 |             if self.subsample < 1:
54 |                 subsample = int(train.shape[0] * self.subsample)
55 |             else:
56 |                 subsample = int(self.subsample)
57 | 
58 |             idx = np.random.RandomState(self.random_state + 1).permutation(train.shape[0])[:subsample]
59 |             train, target = train[idx], target[idx]
60 | 
61 |         # correlation or cosine
62 |         if type(train) is np.ndarray:
63 |             corr = np.corrcoef(train, rowvar=False)
64 | 
65 |         else:
66 |             xtx = train.T * train
67 |             norm = sp_linalg.norm(train, axis=0)
68 |             corr = np.array(xtx / (norm[:, np.newaxis] * norm[np.newaxis, :]))
69 |             del xtx
70 | 
71 |         sl = np.triu(np.abs(corr) > self.corr_co, k=1)
72 |         grid_x, grid_y = np.meshgrid(np.arange(sl.shape[0]), np.arange(sl.shape[0]))
73 | 
74 |         removed = set()
75 | 
76 |         for x, y in zip(grid_x[sl], grid_y[sl]):
77 |             if x not in removed:
78 |                 removed.add(y)
79 | 
80 |         const = np.arange(corr.shape[0])[np.isnan(np.diagonal(corr))]
81 |         for i in const:
82 |             removed.add(i)
83 | 
84 |         self._selected_features = [x for (n, x) in enumerate(train_valid.features) if n not in removed]
85 | 


--------------------------------------------------------------------------------
/lightautoml/pipelines/utils.py:
--------------------------------------------------------------------------------
 1 | """Pipeline utils."""
 2 | 
 3 | from typing import Any
 4 | from typing import List
 5 | from typing import Optional
 6 | from typing import Sequence
 7 | 
 8 | from lightautoml.dataset.base import LAMLDataset
 9 | 
10 | 
11 | def map_pipeline_names(input_names: Sequence[str], output_names: Sequence[str]) -> List[Optional[str]]:
12 |     """Pipelines create name in the way 'prefix__feature_name'.
13 | 
14 |     Multiple pipelines will create names
15 |     in the way 'prefix1__prefix2__feature_name'.
16 |     This function maps initial features names to outputs.
17 |     Result may be not exact in some rare cases,
18 |     but it's ok for real pipelines.
19 | 
20 |     Args:
21 |         input_names: Initial feature names.
22 |         output_names: Output feature names.
23 | 
24 |     Returns:
25 |         Mapping between feature names.
26 | 
27 |     """
28 |     # TODO: Add assert here
29 |     mapped: List[Optional[str]] = [None] * len(output_names)
30 |     s_in = set(input_names)
31 | 
32 |     for n, name in enumerate(output_names):
33 |         splitted = name.split("__")
34 | 
35 |         for i in range(len(splitted)):
36 |             name = "__".join(splitted[i:])
37 |             if name in s_in:
38 |                 mapped[n] = name
39 |                 break
40 | 
41 |     assert None not in mapped, "Can not infer names. For feature selection purposes use simple pipeline (one-to-one)"
42 | 
43 |     return mapped
44 | 
45 | 
46 | def get_columns_by_role(dataset: LAMLDataset, role_name: str, **kwargs: Any) -> List[str]:
47 |     """Search for columns with specific role and attributes when building pipeline.
48 | 
49 |     Args:
50 |         dataset: Dataset to search.
51 |         role_name: Name of features role.
52 |         **kwargs: Specific parameters values to search.
53 |             Example: search for categories with OHE processing only.
54 | 
55 |     Returns:
56 |         List of str features names.
57 | 
58 |     """
59 |     features = []
60 |     inv_roles = dataset.inverse_roles
61 |     for role in inv_roles:
62 |         if role.name == role_name:
63 |             flg = True
64 |             # TODO: maybe refactor
65 |             for k in kwargs:
66 |                 try:
67 |                     attr = getattr(role, k)
68 |                 except AttributeError:
69 |                     flg = False
70 |                     break
71 |                 if attr != kwargs[k]:
72 |                     flg = False
73 |                     break
74 |             if flg:
75 |                 features.extend(inv_roles[role])
76 | 
77 |     return sorted(features)
78 | 


--------------------------------------------------------------------------------
/lightautoml/reader/__init__.py:
--------------------------------------------------------------------------------
1 | """Utils for training and analysing data."""
2 | 
3 | __all__ = ["base", "utils", "tabular_batch_generator"]
4 | 


--------------------------------------------------------------------------------
/lightautoml/reader/utils.py:
--------------------------------------------------------------------------------
 1 | """Reader utils."""
 2 | 
 3 | from typing import Callable
 4 | from typing import Optional
 5 | from typing import Union
 6 | 
 7 | import numpy as np
 8 | 
 9 | from sklearn.model_selection import GroupKFold
10 | from sklearn.model_selection import KFold
11 | from sklearn.model_selection import StratifiedKFold
12 | 
13 | from ..tasks import Task
14 | 
15 | 
16 | def set_sklearn_folds(
17 |     task: Task,
18 |     target: np.ndarray,
19 |     cv: Union[Callable, int] = 5,
20 |     random_state: int = 42,
21 |     group: Optional[np.ndarray] = None,
22 | ) -> Optional[np.ndarray]:
23 |     """Determines the cross-validation splitting strategy.
24 | 
25 |     Args:
26 |         task: If `'binary'` or `'multiclass'` used stratified cv.
27 |         target: Target values.
28 |         cv: Specifies number of folds.
29 |         random_state: Determines random number generation.
30 |         group: For group k-folding.
31 | 
32 |     Returns:
33 |         Array with fold indices.
34 | 
35 |     """
36 |     if type(cv) is int:
37 |         if group is not None:
38 |             split = GroupKFold(cv).split(group, group, group)
39 |         elif task.name in ["binary", "multiclass"]:
40 | 
41 |             split = StratifiedKFold(cv, random_state=random_state, shuffle=True).split(target, target)
42 |         else:
43 |             split = KFold(cv, random_state=random_state, shuffle=True).split(target, target)
44 | 
45 |         folds = np.zeros(target.shape[0], dtype=np.int32)
46 |         for n, (f0, f1) in enumerate(split):
47 |             folds[f1] = n
48 | 
49 |         return folds
50 | 
51 |     return
52 | 


--------------------------------------------------------------------------------
/lightautoml/report/__init__.py:
--------------------------------------------------------------------------------
 1 | """Report generators and templates."""
 2 | 
 3 | from lightautoml.utils.installation import __validate_extra_deps
 4 | 
 5 | from .report_deco import ReportDeco
 6 | from .report_deco import ReportDecoNLP
 7 | from .report_deco import ReportDecoWhitebox
 8 | 
 9 | 
10 | __validate_extra_deps("pdf")
11 | 
12 | 
13 | __all__ = ["ReportDeco", "ReportDecoWhitebox", "ReportDecoNLP"]
14 | 


--------------------------------------------------------------------------------
/lightautoml/report/lama_report_templates/binary_inference_section.html:
--------------------------------------------------------------------------------
 1 | <li>
 2 |     <h3 class="caret" id="section3">{{ title }}</h3>
 3 |     <ul class="nested">
 4 |         <li>
 5 |             <h4 class="caret" id="section3_2">ROC curve</h4>
 6 |             <ul class="nested">
 7 |                 <p><img src={{roc_curve}} alt="AUC valid plot full"></p>
 8 |             </ul>
 9 |         </li>
10 |         <li>
11 |             <h4 class="caret" id="section3_3">ROC-PR curve</h4>
12 |             <ul class="nested">
13 |                 <p><img src={{pr_curve}} alt="PR valid plot full"></p>
14 |             </ul>
15 |         </li>
16 |         <li>
17 |             <h4 class="caret" id="section3_4">Pie F1 metric</h4>
18 |             <ul class="nested">
19 |                 <p><img src={{pie_f1_metric}} alt="Pie F1 metric"></p>
20 |             </ul>
21 |         </li>
22 |         <li>
23 |             <h4 class="caret" id="section3_5">Distribution of object predictions by bins</h4>
24 |             <ul class="nested">
25 |                 <p><img src={{preds_distribution_by_bins}} alt="preds_distribution_by_bins"></p>
26 |                 <p><img src={{distribution_of_logits}} alt="distribution_of_logits"></p>
27 |             </ul>
28 |         </li>
29 |         <li>
30 |             <h4 class="caret" id="section3_6">Distribution of Logits by bins</h4>
31 |             <ul class="nested">
32 |                 {{ sample_bins_table }}
33 |             </ul>
34 |         </li>
35 |     </ul>
36 | </li>
37 | 


--------------------------------------------------------------------------------
/lightautoml/report/lama_report_templates/feature_importance_section.html:
--------------------------------------------------------------------------------
 1 | <li>
 2 |     <h2 class="caret" id="section1">Feature importance</h2>
 3 |     <ul class="nested">
 4 |         {% if feature_importance is not none %}
 5 |             <p>Feature importance calculation method: <b>{{fi_method}}</b>.</p>
 6 |             <p><img src={{feature_importance}} alt="feature_importance"></p>
 7 |         {% else %}
 8 |             <p>No feature importance provided for original features.</p>
 9 |         {% endif %}
10 |     </ul>
11 | </li>
12 | 


--------------------------------------------------------------------------------
/lightautoml/report/lama_report_templates/feature_importance_utillized_section.html:
--------------------------------------------------------------------------------
 1 | <li>
 2 |     <h2 class="caret" id="section1">Feature importance</h2>
 3 |     <ul class="nested">
 4 |         {% if feature_importance is not none %}
 5 |             <p>Feature importance calculation method: <b>{{fi_method}}</b>.</p>
 6 |             <p><img src={{feature_importance}} alt="feature_importance"></p>
 7 |         {% else %}
 8 |             <p>No feature importance provided for original features or used presets do not provide feature importance. </p>
 9 |             <p>Try increasing the timeout to use more presets.</p>
10 |         {% endif %}
11 |     </ul>
12 | </li>
13 | 


--------------------------------------------------------------------------------
/lightautoml/report/lama_report_templates/interpretation_section.html:
--------------------------------------------------------------------------------
 1 | <li>
 2 |     <h2 class="caret" id="section2">PDP interpretation</h2>
 3 |     <ul class="nested">
 4 |         {% if interpretation_top is not none %}
 5 |             {% for section in interpretation_top %}
 6 |                 {{ section }}
 7 |             {% endfor %}
 8 |         {% else %}
 9 |             <p>Pass <b>valid_data</b> to build PDP interpretation section.</p>
10 |         {% endif %}
11 |     </ul>
12 | </li>
13 | 


--------------------------------------------------------------------------------
/lightautoml/report/lama_report_templates/interpretation_subsection.html:
--------------------------------------------------------------------------------
1 | <li>
2 |     <h3 class="caret" id="section3">{{ feature_name }}</h3>
3 |     <ul class="nested">
4 |         <p><img src={{feature_interpretation_plot}} alt="feature_interpretation_plot"></p>
5 |     </ul>
6 | </li>
7 | 


--------------------------------------------------------------------------------
/lightautoml/report/lama_report_templates/lama_base_template.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | 
  4 | <head>
  5 |     <meta charset="UTF-8">
  6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0, minimal-ui">
  7 |     <title>LAMA report</title>
  8 | </head>
  9 | <style>
 10 |     table,
 11 |     th,
 12 |     td {
 13 |         border: 1px solid black;
 14 |         border-collapse: collapse;
 15 |         padding: 5px;
 16 |     }
 17 | 
 18 |     /* Remove default bullets */
 19 |     ul,
 20 |     #myUL {
 21 |         list-style-type: none;
 22 |     }
 23 | 
 24 |     /* Remove margins and padding from the parent ul */
 25 |     #myUL {
 26 |         margin: 0;
 27 |         padding: 0;
 28 |     }
 29 | 
 30 |     /* Style the caret/arrow */
 31 |     .caret {
 32 |         cursor: pointer;
 33 |         user-select: none;
 34 |         /* Prevent text selection */
 35 |     }
 36 | 
 37 |     /* Create the caret/arrow with a unicode, and style it */
 38 |     .caret::before {
 39 |         content: "\25B6";
 40 |         color: black;
 41 |         display: inline-block;
 42 |         margin-right: 6px;
 43 |     }
 44 | 
 45 |     /* Rotate the caret/arrow icon when clicked on (using JavaScript) */
 46 |     .caret-down::before {
 47 |         transform: rotate(90deg);
 48 |     }
 49 | 
 50 |     /* Hide the nested list */
 51 |     .nested {
 52 |         display: none;
 53 |     }
 54 | 
 55 |     /* Show the nested list when the user clicks on the caret/arrow (with JavaScript) */
 56 |     .active {
 57 |         display: block;
 58 |     }
 59 |     {% if pdf %}
 60 |     /* For WeasyPrint */
 61 |     @media print {
 62 |         @page {
 63 |             size: A4 landscape;
 64 |         }
 65 |         * {
 66 |             font-family: Arial, Helvetica, sans-serif;
 67 |         }
 68 |         td, th {
 69 |             font-size: 10px;
 70 |             padding: 3px;
 71 |         }
 72 |         img {
 73 |             max-height: 500px;
 74 |             max-width: 800px;
 75 |         }
 76 |         .nested {
 77 |             display: block;
 78 |         }
 79 |         .caret::before {
 80 |             transform: rotate(90deg);
 81 |         }
 82 |     }
 83 |     {% endif %}
 84 | </style>
 85 | <!--
 86 | <script src="shap.js"></script>
 87 | 
 88 | -->
 89 | 
 90 | <body>
 91 |     <div>
 92 |         <h1>LAMA report</h1>
 93 |     </div>
 94 |     <ul id="myUL">
 95 |         {% for section in sections %}
 96 |             {{ section }}
 97 |         {% endfor %}
 98 |     </ul>
 99 |     <script type="text/javascript">
100 |         var toggler = document.getElementsByClassName("caret");
101 |         var i;
102 | 
103 |         for (i = 0; i < toggler.length; i++) {
104 |             toggler[i].addEventListener("click", function () {
105 |                 this.parentElement.querySelector(".nested").classList.toggle("active");
106 |                 this.classList.toggle("caret-down");
107 |             });
108 |         }
109 |     </script>
110 | </body>
111 | 
112 | </html>
113 | 


--------------------------------------------------------------------------------
/lightautoml/report/lama_report_templates/model_section.html:
--------------------------------------------------------------------------------
 1 | <li>
 2 |     <h2 class="caret caret-down" id="section1">Model overview</h2>
 3 |     <ul class="nested active">
 4 |         <li>
 5 |             <h3 class="caret" id="section1_1">Model parameters</h3>
 6 |             <ul class="nested">
 7 |                 <p>The following model is loaded: <b>{{ model_name }}</b>.</p>
 8 |                 <p>Model parameters are described below:</p>
 9 | 
10 |                 {{ model_parameters }}
11 |             </ul>
12 |         </li>
13 |         <li>
14 |             <h3 class="caret" id="section1_2">Summary results</h3>
15 |             <ul class="nested">
16 |                 {% if model_summary is not none %}
17 |                     <p>Results for data samples:</p>
18 | 
19 |                     {{ model_summary }}
20 |                 {% else %}
21 |                     <p>Use fit_predict() for training model.</p>
22 |                 {% endif %}
23 |             </ul>
24 |         </li>
25 |     </ul>
26 | </li>
27 | 


--------------------------------------------------------------------------------
/lightautoml/report/lama_report_templates/model_section_utilized.html:
--------------------------------------------------------------------------------
 1 | <li>
 2 |     <h2 class="caret caret-down" id="section1">Model overview</h2>
 3 |     <ul class="nested active">
 4 |         <li>
 5 |             <h3 class="caret" id="section1_1">Summary results</h3>
 6 |             <ul class="nested">
 7 |                 {% if model_summary is not none %}
 8 |                     <p>Results for data samples:</p>
 9 | 
10 |                     {{ model_summary }}
11 |                 {% else %}
12 |                     <p>Use fit_predict() for training model.</p>
13 |                 {% endif %}
14 |             </ul>
15 |         </li>
16 |         <li>
17 |             <h3 class="caret" id="section1_2">Prediction formula</h3>
18 |             <ul class="nested">
19 |                 {% if pred_formula is not none %}
20 |                    {{ pred_formula }}
21 |                 {% else %}
22 |                     <p>Use fit_predict() for training model.</p>
23 |                 {% endif %}
24 |             </ul>
25 |         </li>
26 |         <li>
27 |             <h3 class="caret" id="section1_3">Model parameters</h3>
28 |             <ul class="nested">
29 |                 <p>The following model is loaded: <b>{{ model_name }}</b>.</p>
30 | 
31 |                 {% if model_presets is not none %}
32 |                     <p>Parameters of applied presets are described below:</p>
33 |                     {% for preset in model_presets %}
34 |                         {{ preset }}
35 |                     {% endfor %}
36 |                 {% else %}
37 |                     <p>Use fit_predict() for training model.
38 |                         Description of applied presets will appear in this section.</p>
39 |                 {% endif %}
40 |             </ul>
41 |         </li>
42 |     </ul>
43 | </li>
44 | 


--------------------------------------------------------------------------------
/lightautoml/report/lama_report_templates/multiclass_inference_section.html:
--------------------------------------------------------------------------------
 1 | <li>
 2 |     <h3 class="caret" id="section3">{{ title }}</h3>
 3 |     <ul class="nested">
 4 |         <li>
 5 |             <h4 class="caret" id="section3_1">Metrics for separate classes</h4>
 6 |             <ul class="nested">
 7 |                 {{classification_report}}
 8 |             </ul>
 9 |         </li>
10 |         <li>
11 |             <h4 class="caret" id="section3_2">Confusion matrix</h4>
12 |             <ul class="nested">
13 |                 Confusion matrix is normalized over the true (rows) conditions.
14 |                 <p><img src={{confusion_matrix}} alt="confusion_matrix"></p>
15 |             </ul>
16 |         </li>
17 |     </ul>
18 | </li>
19 | 


--------------------------------------------------------------------------------
/lightautoml/report/lama_report_templates/nlp_section.html:
--------------------------------------------------------------------------------
1 | <li>
2 |     <h2 class="caret caret-down" id="section2">NLP section</h2>
3 |     <ul class="nested active">
4 |         {% for section in nlp_subsections %}
5 |             {{ section }}
6 |         {% endfor %}
7 |     </ul>
8 | </li>
9 | 


--------------------------------------------------------------------------------
/lightautoml/report/lama_report_templates/nlp_subsection.html:
--------------------------------------------------------------------------------
1 | <li>
2 |     <h3 class="caret" id="section3">{{ title }}</h3>
3 |     <ul class="nested">
4 |         <p><img src={{char_len_hist}} alt="char_len_histogram"></p>
5 |         <p><img src={{tokens_len_hist}} alt="tokens_len_histogram"></p>
6 |     </ul>
7 | </li>
8 | 


--------------------------------------------------------------------------------
/lightautoml/report/lama_report_templates/preset_section.html:
--------------------------------------------------------------------------------
1 | <li>
2 |     <h4 class="caret" id="section3">{{ preset_name }}</h4>
3 |     <ul class="nested">
4 |         {{ model_parameters }}
5 |     </ul>
6 | </li>
7 | 


--------------------------------------------------------------------------------
/lightautoml/report/lama_report_templates/reg_inference_section.html:
--------------------------------------------------------------------------------
 1 | <li>
 2 |     <h3 class="caret" id="section3">{{ title }}</h3>
 3 |     <ul class="nested">
 4 |         <li>
 5 |             <h4 class="caret" id="section3_1">Distribution of targets</h4>
 6 |             <ul class="nested">
 7 |                 <p><img src={{target_distribution}} alt="target_distribution"></p>
 8 |             </ul>
 9 |         </li>
10 |         <li>
11 |             <h4 class="caret" id="section3_2">Predictions vs labels difference histogram</h4>
12 |             <ul class="nested">
13 |                 <p><img src={{error_hist}} alt="error_hist"></p>
14 |             </ul>
15 |         </li>
16 |         <li>
17 |             <h4 class="caret" id="section3_3">Predictions vs labels scatter plot</h4>
18 |             <ul class="nested">
19 |                 <p><img src={{scatter_plot}} alt="scatter_plot"></p>
20 |             </ul>
21 |         </li>
22 |     </ul>
23 | </li>
24 | 


--------------------------------------------------------------------------------
/lightautoml/report/lama_report_templates/results_section.html:
--------------------------------------------------------------------------------
1 | <li>
2 |     <h2 class="caret caret-down" id="section2">Detailed model results</h2>
3 |     <ul class="nested active">
4 |         {% for section in model_results %}
5 |             {{ section }}
6 |         {% endfor %}
7 |     </ul>
8 | </li>
9 | 


--------------------------------------------------------------------------------
/lightautoml/report/lama_report_templates/train_set_section.html:
--------------------------------------------------------------------------------
 1 | <li>
 2 |     <h2 class="caret caret-down" id="section2">Data overview</h2>
 3 |     <ul class="nested active">
 4 |         <li>
 5 |             <h3 class="caret" id="section2_1">Train data summary</h3>
 6 |             <ul class="nested">
 7 |                 {{ train_data_overview }}
 8 |             </ul>
 9 |         </li>
10 |         <li>
11 |             <h3 class="caret" id="section2_2">Train data details</h3>
12 |             <ul class="nested">
13 |                 <li>
14 |                     <h4 class="caret" id="section2_2_1">Numerical features</h4>
15 |                     <ul class="nested">
16 |                         {% if numerical_features_table is not none %}
17 |                         {{ numerical_features_table }}
18 |                         {% else %}
19 |                         <p>No numerical features.</p>
20 |                         {% endif %}
21 |                     </ul>
22 |                 </li>
23 |                 <li>
24 |                     <h4 class="caret" id="section2_2_2">Categorical features</h4>
25 |                     <ul class="nested">
26 |                         {% if categorical_features_table is not none %}
27 |                         {{ categorical_features_table }}
28 |                         {% else %}
29 |                         <p>No categorical features.</p>
30 |                         {% endif %}
31 |                     </ul>
32 |                 </li>
33 |                 <li>
34 |                     <h4 class="caret" id="section2_2_3">Datetime features</h4>
35 |                     <ul class="nested">
36 |                         {% if datetime_features_table is not none %}
37 |                         {{ datetime_features_table }}
38 |                         {% else %}
39 |                         <p>No datetime features.</p>
40 |                         {% endif %}
41 |                     </ul>
42 |                 </li>
43 |                 <li>
44 |                     <h4 class="caret" id="section2_2_4">Textual features</h4>
45 |                     <ul class="nested">
46 |                         {% if text_features_table is not none %}
47 |                         {{ text_features_table }}
48 |                         {% else %}
49 |                         <p>No textual features.</p>
50 |                         {% endif %}
51 |                     </ul>
52 |                 </li>
53 |                 <li>
54 |                     <h4 class="caret" id="section2_2_5">Dropped features</h4>
55 |                     <ul class="nested">
56 |                         {% if dropped_features_table is not none %}
57 |                         <p>Some features were excluded from the training set.</p>
58 |                         <p>Except {{ target }} variable which was used as target, also there were excluded variables, in which NaN rate exceeds <i>max_nan_rate</i> = {{ max_nan_rate }}. In addition to this, the variables with <i>max_constant_rate</i> > {{ max_constant_rate }} excluded:</p>
59 |                         {{ dropped_features_table }}
60 |                         {% else %}
61 |                         <p>No dropped features.</p>
62 |                         {% endif %}
63 | 
64 |                     </ul>
65 |                 </li>
66 |             </ul>
67 |         </li>
68 |     </ul>
69 | </li>
70 | 


--------------------------------------------------------------------------------
/lightautoml/report/lama_report_templates/train_set_section_utilized.html:
--------------------------------------------------------------------------------
 1 | <li>
 2 |     <h2 class="caret caret-down" id="section2">Data overview</h2>
 3 |     <ul class="nested active">
 4 |         <li>
 5 |             <h3 class="caret" id="section2_1">Roles table</h3>
 6 |             <ul class="nested">
 7 |                 <p>Each reader has its own preprocessing configuration and type guessing, thus
 8 |                     each reader recognises feature roles differently.
 9 |                 </p>
10 |                 <p>Table below summarizes the guessed feature roles for all applied presets.</p>
11 |                 <p>"N" - numerical, "C" - categorical, "D" - datetime, "T" - textual, "-" - dropped.</p>
12 |                 {{ roles_table }}
13 |             </ul>
14 |         </li>
15 | 
16 |         <li>
17 |             <h3 class="caret" id="section2_2">Precise description</h3>
18 |             <ul class="nested">
19 |                 <p>Below is the precise description of training data for each preset.</p>
20 |                 {% for section in data_sections %}
21 |                     {{ section }}
22 |                 {% endfor %}
23 |             </ul>
24 |         </li>
25 |     </ul>
26 | </li>
27 | 


--------------------------------------------------------------------------------
/lightautoml/report/lama_report_templates/uplift_section.html:
--------------------------------------------------------------------------------
1 | <li>
2 |     <h2 class="caret caret-down" id="section2">Uplift performance</h2>
3 |     <ul class="nested active">
4 |         {% for section in uplift_results %}
5 |             {{ section }}
6 |         {% endfor %}
7 |     </ul>
8 | </li>
9 | 


--------------------------------------------------------------------------------
/lightautoml/report/lama_report_templates/uplift_subsection.html:
--------------------------------------------------------------------------------
 1 | <li>
 2 |     <h3 class="caret" id="section3">{{ title }}</h3>
 3 |     <ul class="nested">
 4 |         <li>
 5 |             <h4 class="caret" id="uplift_1">Test sample summary</h4>
 6 |             <ul class="nested">
 7 |                 {{ test_data_overview }}
 8 |             </ul>
 9 |         </li>
10 |         <li>
11 |             <h4 class="caret" id="uplift_1">Uplift curves</h4>
12 |             <ul class="nested">
13 |                 <p><img src={{uplift_curve}} alt="Uplift curve"></p>
14 |             </ul>
15 |         </li>
16 |         <li>
17 |             <h4 class="caret" id="uplift_2">Uplift distribution KDE</h4>
18 |             <ul class="nested">
19 |                 <p><img src={{uplift_distribution}} alt="preds_distribution_by_bins"></p>
20 |             </ul>
21 |         </li>
22 |         <li>
23 |             <h4 class="caret" id="uplift_3">Uplift distribution by bins</h4>
24 |             <ul class="nested">
25 |                 {{ uplift_bins_table }}
26 |             </ul>
27 |         </li>
28 |     </ul>
29 | </li>
30 | 


--------------------------------------------------------------------------------
/lightautoml/report/lama_report_templates/utilized_data_subsections.html:
--------------------------------------------------------------------------------
 1 | <li>
 2 |     <h4 class="caret" id="section2">{{ preset_name }}</h4>
 3 |     <ul class="nested">
 4 |         <li>
 5 |             <h5 class="caret" id="section2_1">Train data summary</h5>
 6 |             <ul class="nested">
 7 |                 {{ train_data_overview }}
 8 |             </ul>
 9 |         </li>
10 |         <li>
11 |             <h5 class="caret" id="section2_2">Train data details</h5>
12 |             <ul class="nested">
13 |                 <li>
14 |                     <h5 class="caret" id="section2_2_1">Numerical features</h5>
15 |                     <ul class="nested">
16 |                         {% if numerical_features_table is not none %}
17 |                         {{ numerical_features_table }}
18 |                         {% else %}
19 |                         <p>No numerical features.</p>
20 |                         {% endif %}
21 |                     </ul>
22 |                 </li>
23 |                 <li>
24 |                     <h5 class="caret" id="section2_2_2">Categorical features</h5>
25 |                     <ul class="nested">
26 |                         {% if categorical_features_table is not none %}
27 |                         {{ categorical_features_table }}
28 |                         {% else %}
29 |                         <p>No categorical features.</p>
30 |                         {% endif %}
31 |                     </ul>
32 |                 </li>
33 |                 <li>
34 |                     <h5 class="caret" id="section2_2_3">Datetime features</h5>
35 |                     <ul class="nested">
36 |                         {% if datetime_features_table is not none %}
37 |                         {{ datetime_features_table }}
38 |                         {% else %}
39 |                         <p>No datetime features.</p>
40 |                         {% endif %}
41 |                     </ul>
42 |                 </li>
43 |                 <li>
44 |                     <h5 class="caret" id="section2_2_4">Textual features</h5>
45 |                     <ul class="nested">
46 |                         {% if text_features_table is not none %}
47 |                         {{ text_features_table }}
48 |                         {% else %}
49 |                         <p>No textual features.</p>
50 |                         {% endif %}
51 |                     </ul>
52 |                 </li>
53 |                 <li>
54 |                     <h5 class="caret" id="section2_2_5">Dropped features</h5>
55 |                     <ul class="nested">
56 |                         {% if dropped_features_table is not none %}
57 |                         <p>Some features were excluded from the training set.</p>
58 |                         <p>Except {{ target }} variable which was used as target, also there were excluded variables, in which NaN rate exceeds <i>max_nan_rate</i> = {{ max_nan_rate }}. In addition to this, the variables with <i>max_constant_rate</i> > {{ max_constant_rate }} excluded:</p>
59 |                         {{ dropped_features_table }}
60 |                         {% else %}
61 |                         <p>No dropped features.</p>
62 |                         {% endif %}
63 | 
64 |                     </ul>
65 |                 </li>
66 |             </ul>
67 |         </li>
68 |     </ul>
69 | </li>
70 | 


--------------------------------------------------------------------------------
/lightautoml/tasks/__init__.py:
--------------------------------------------------------------------------------
1 | """Define the task to solve its loss, metric."""
2 | 
3 | from .base import Task
4 | 
5 | 
6 | __all__ = ["losses", "base", "common_metric", "utils", "Task"]
7 | 


--------------------------------------------------------------------------------
/lightautoml/tasks/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | """Set of loss functions for different machine learning algorithms."""
 2 | 
 3 | from .base import _valid_str_metric_names
 4 | from .cb import CBLoss
 5 | from .lgb import LGBLoss
 6 | from .xgb import XGBLoss
 7 | from .sklearn import SKLoss
 8 | from .torch import TORCHLoss
 9 | from .torch import TorchLossWrapper
10 | 
11 | 
12 | __all__ = [
13 |     "XGBLoss",
14 |     "LGBLoss",
15 |     "TORCHLoss",
16 |     "SKLoss",
17 |     "CBLoss",
18 |     "_valid_str_metric_names",
19 |     "TorchLossWrapper",
20 | ]
21 | 


--------------------------------------------------------------------------------
/lightautoml/tasks/losses/lgb_custom.py:
--------------------------------------------------------------------------------
 1 | """Custom metrics and loss functions for LightGBM."""
 2 | 
 3 | from typing import Tuple
 4 | 
 5 | import lightgbm as lgb
 6 | import numpy as np
 7 | 
 8 | from scipy.special import softmax
 9 | 
10 | 
11 | def softmax_ax1(x: np.ndarray) -> np.ndarray:
12 |     """Softmax columnwise.
13 | 
14 |     Args:
15 |         x: input.
16 | 
17 |     Returns:
18 |         softmax values.
19 | 
20 |     """
21 |     return softmax(x, axis=1)
22 | 
23 | 
24 | def lgb_f1_loss_multiclass(
25 |     preds: np.ndarray, train_data: lgb.Dataset, clip: float = 1e-5
26 | ) -> Tuple[np.ndarray, np.ndarray]:
27 |     """Custom loss for optimizing f1.
28 | 
29 |     Args:
30 |         preds: Predctions.
31 |         train_data: Dataset in LightGBM format.
32 |         clip: Clump constant.
33 | 
34 |     Returns:
35 |         Gradient, hessian.
36 | 
37 |     """
38 |     y_true = train_data.get_label().astype(np.int32)
39 |     preds = preds.reshape((y_true.shape[0], -1), order="F")
40 |     # softmax
41 |     preds = np.clip(softmax_ax1(preds), clip, 1 - clip)
42 |     # make ohe
43 |     y_ohe = np.zeros_like(preds)
44 |     np.add.at(y_ohe, (np.arange(y_true.shape[0]), y_true), 1)
45 |     # grad
46 |     grad = (preds - y_ohe) * preds
47 |     # hess
48 |     hess = (1 - preds) * preds * np.clip((2 * preds - y_ohe), 1e-3, np.inf)
49 |     # reshape back preds
50 |     return grad.reshape((-1,), order="F"), hess.reshape((-1,), order="F")
51 | 


--------------------------------------------------------------------------------
/lightautoml/tasks/losses/sklearn.py:
--------------------------------------------------------------------------------
 1 | """Metrics and loss functions for scikit-learn models."""
 2 | 
 3 | import logging
 4 | 
 5 | from typing import Callable
 6 | from typing import Dict
 7 | from typing import Optional
 8 | from typing import Union
 9 | 
10 | import numpy as np
11 | 
12 | from .base import Loss
13 | from .base import fw_rmsle
14 | 
15 | 
16 | logger = logging.getLogger(__name__)
17 | 
18 | 
19 | _sk_loss_mapping = {"rmsle": ("mse", fw_rmsle, np.expm1)}
20 | 
21 | _sk_force_metric = {
22 |     "rmsle": ("mse", None, None),
23 | }
24 | 
25 | 
26 | class SKLoss(Loss):
27 |     """Loss used for scikit-learn.
28 | 
29 |     Args:
30 |         loss: One of default loss function.
31 |             Valid are: 'logloss', 'mse', 'crossentropy', 'rmsle'.
32 |         loss_params: Additional loss parameters.
33 |         fw_func: Forward transformation.
34 |             Used for transformation of target and item weights.
35 |         bw_func: backward transformation.
36 |             Used for predict values transformation.
37 | 
38 |     """
39 | 
40 |     def __init__(
41 |         self,
42 |         loss: str,
43 |         loss_params: Optional[Dict] = None,
44 |         fw_func: Optional[Callable] = None,
45 |         bw_func: Optional[Callable] = None,
46 |     ):
47 |         assert loss in [
48 |             "logloss",
49 |             "mse",
50 |             "mae",
51 |             "crossentropy",
52 |             "rmsle",
53 |         ], "Not supported in sklearn in general case."
54 |         self.flg_regressor = loss in ["mse", "rmsle"]
55 | 
56 |         if loss in _sk_loss_mapping:
57 |             self.loss, fw_func, bw_func = _sk_loss_mapping[loss]
58 |         else:
59 |             self.loss = loss
60 |             # set forward and backward transformations
61 |             if fw_func is not None:
62 |                 self._fw_func = fw_func
63 |             if bw_func is not None:
64 |                 self._bw_func = bw_func
65 | 
66 |         self.loss_params = loss_params
67 | 
68 |     def set_callback_metric(
69 |         self,
70 |         metric: Union[str, Callable],
71 |         greater_is_better: Optional[bool] = None,
72 |         metric_params: Optional[Dict] = None,
73 |         task_name: Optional[str] = None,
74 |     ):
75 |         """Callback metric setter.
76 | 
77 |         Uses default callback of parent class `Loss`.
78 | 
79 |         Args:
80 |             metric: Callback metric.
81 |             greater_is_better: Whether or not higher value is better.
82 |             metric_params: Additional metric parameters.
83 |             task_name: Name of task.
84 | 
85 |         """
86 |         if self.loss in _sk_force_metric:
87 |             metric, greater_is_better, metric_params = _sk_force_metric[self.loss]
88 |             logger.info2("For sklearn {0} callback metric switched to {1}".format(self.loss, metric))
89 | 
90 |         super().set_callback_metric(metric, greater_is_better, metric_params, task_name)
91 | 


--------------------------------------------------------------------------------
/lightautoml/tasks/utils.py:
--------------------------------------------------------------------------------
 1 | """."""
 2 | 
 3 | from typing import Callable
 4 | 
 5 | import numpy as np
 6 | 
 7 | 
 8 | def infer_gib(metric: Callable) -> bool:
 9 |     """Infer greater is better from metric.
10 | 
11 |     Args:
12 |         metric: Score or loss function.
13 | 
14 |     Returns:
15 |         ```True``` if grater is better.
16 | 
17 |     Raises:
18 |         AssertionError: If there is no way to order the predictions.
19 | 
20 |     """
21 |     label = np.array([0, 1])
22 |     pred = np.array([0.1, 0.9])
23 | 
24 |     g_val = metric(label, pred)
25 |     b_val = metric(label, pred[::-1])
26 | 
27 |     assert g_val != b_val, "Cannot infer greater is better from metric." " Should be set manually."
28 | 
29 |     return g_val > b_val
30 | 
31 | 
32 | def infer_gib_multiclass(metric: Callable) -> bool:
33 |     """Infer greater is better from metric.
34 | 
35 |     Args:
36 |         metric: Metric function. It must take two arguments y_true, y_pred.
37 | 
38 |     Returns:
39 |         ```True``` if grater is better.
40 | 
41 |     Raises:
42 |         AssertionError: If there is no way to order the predictions.
43 | 
44 |     """
45 |     label = np.array([0, 1, 2])
46 |     pred = np.array([[0.9, 0.05, 0.05], [0.05, 0.9, 0.05], [0.05, 0.05, 0.9]])
47 | 
48 |     g_val = metric(label, pred)
49 |     b_val = metric(label, pred[::-1])
50 | 
51 |     assert g_val != b_val, "Cannot infer greater is better from metric. " "Should be set manually."
52 | 
53 |     return g_val > b_val
54 | 


--------------------------------------------------------------------------------
/lightautoml/text/__init__.py:
--------------------------------------------------------------------------------
 1 | """Provides an internal interface for working with text features."""
 2 | 
 3 | 
 4 | __all__ = [
 5 |     "tokenizer",
 6 |     "dl_transformers",
 7 |     "sentence_pooling",
 8 |     "weighted_average_transformer",
 9 |     "embed_dataset",
10 | ]
11 | 


--------------------------------------------------------------------------------
/lightautoml/text/embed_dataset.py:
--------------------------------------------------------------------------------
 1 | """Pytorch Datasets for text features."""
 2 | 
 3 | from typing import Any
 4 | from typing import Dict
 5 | from typing import Sequence
 6 | from typing import Union
 7 | 
 8 | import numpy as np
 9 | 
10 | 
11 | try:
12 |     from transformers import AutoTokenizer
13 | except:
14 |     import warnings
15 | 
16 |     warnings.warn("'transformers' - package isn't installed")
17 | 
18 | 
19 | class BertDataset:
20 |     """Dataset class with transformers tokenization.
21 | 
22 |     Class for preparing transformers input.
23 | 
24 |     Args:
25 |         sentences: List of tokenized sentences.
26 |         max_length: Max sentence length.
27 |         model_name: Name of transformer model.
28 |         **kwargs: Other.
29 | 
30 |     """
31 | 
32 |     def __init__(self, sentences: Sequence[str], max_length: int, model_name: str, **kwargs: Any):
33 |         self.sentences = sentences
34 |         self.max_length = max_length
35 |         self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
36 | 
37 |     def __getitem__(self, idx: int) -> Dict[str, np.ndarray]:
38 |         sent = self.sentences[idx]
39 |         _split = sent.split("[SEP]")
40 |         sent = _split if len(_split) == 2 else (sent,)
41 |         data = self.tokenizer.encode_plus(
42 |             *sent, add_special_tokens=True, max_length=self.max_length, padding="max_length", truncation=True
43 |         )
44 |         return {i: np.array(data[i]) for i in data.keys()}
45 | 
46 |     def __len__(self) -> int:
47 |         return len(self.sentences)
48 | 
49 | 
50 | class EmbedDataset:
51 |     """Dataset class for extracting word embeddings.
52 | 
53 |     Class for transforming list of tokens to dict of embeddings and sentence length.
54 | 
55 |     Args:
56 |         sentences: List of tokenized sentences.
57 |         embedding_model: word2vec, fasstext, etc.
58 |             Should have dict interface {<word>: <embedding>}.
59 |         max_length: Max sentence length.
60 |         embed_size: Size of embedding.
61 |         **kwargs: Not used.
62 | 
63 |     """
64 | 
65 |     def __init__(self, sentences: Sequence[str], embedding_model: Dict, max_length: int, embed_size: int, **kwargs):
66 |         self.sentences = sentences
67 |         self.embedding_model = embedding_model
68 |         self.max_length = max_length
69 |         self.embed_size = embed_size
70 | 
71 |     def __getitem__(self, idx: int) -> Dict[str, Union[Sequence, int]]:
72 |         result = np.zeros((self.max_length, self.embed_size))
73 |         length = 0
74 |         for word in self.sentences[idx]:
75 |             if word in self.embedding_model:
76 |                 result[length, :] = self.embedding_model[word]
77 |                 length += 1
78 |                 if length >= self.max_length:
79 |                     break
80 |         return {"text": result, "length": length if length > 0 else 1}
81 | 
82 |     def __len__(self) -> int:
83 |         return len(self.sentences)
84 | 


--------------------------------------------------------------------------------
/lightautoml/transformers/__init__.py:
--------------------------------------------------------------------------------
1 | """Basic feature generation steps and helper utils."""
2 | 
3 | __all__ = ["base", "categorical", "datetime", "numeric", "composite", "utils"]
4 | 


--------------------------------------------------------------------------------
/lightautoml/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """Common util tools."""
2 | 
3 | __all__ = ["timer"]
4 | 


--------------------------------------------------------------------------------
/lightautoml/utils/installation.py:
--------------------------------------------------------------------------------
 1 | """Tools for partial installation."""
 2 | 
 3 | import os
 4 | 
 5 | 
 6 | try:
 7 |     from importlib.metadata import PackageNotFoundError
 8 |     from importlib.metadata import distribution
 9 | except ModuleNotFoundError:
10 |     from importlib_metadata import PackageNotFoundError, distribution
11 | 
12 | import logging
13 | 
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | def __validate_extra_deps(extra_section: str, error: bool = False) -> None:
19 |     """Check if extra dependencies is installed.
20 | 
21 |     Args:
22 |         extra_section: Name of extra dependencies
23 |         error: How to process error
24 | 
25 |     """
26 |     ignore_deps = os.environ.get("DOCUMENTATION_ENV", False)
27 | 
28 |     md = distribution("lightautoml").metadata
29 |     extra_pattern = 'extra == "{}"'.format(extra_section)
30 |     reqs_info = []
31 |     for k, v in md.items():
32 |         if k == "Requires-Dist" and extra_pattern in v:
33 |             req = v.split(";")[0].split()[0]
34 |             reqs_info.append(req)
35 | 
36 |     for req_info in reqs_info:
37 |         lib_name: str = req_info.split()[0]
38 |         try:
39 |             distribution(lib_name)
40 |         except PackageNotFoundError as e:
41 |             # Print warning
42 |             logger.warning(
43 |                 "'%s' extra dependency package '%s' isn't installed. "
44 |                 "Look at README.md in repo 'LightAutoML' for installation instructions.",
45 |                 extra_section,
46 |                 lib_name,
47 |             )
48 | 
49 |             if not ignore_deps:
50 |                 if error:
51 |                     raise e
52 | 


--------------------------------------------------------------------------------
/lightautoml/validation/__init__.py:
--------------------------------------------------------------------------------
1 | """The module provide classes and functions for model validation."""
2 | 
3 | __all__ = ["base", "np_iterators", "utils"]
4 | 


--------------------------------------------------------------------------------
/lightautoml/validation/utils.py:
--------------------------------------------------------------------------------
 1 | """Validation utils."""
 2 | 
 3 | from typing import Callable
 4 | from typing import Optional
 5 | from typing import Union
 6 | from typing import cast
 7 | 
 8 | from ..dataset.base import LAMLDataset
 9 | from ..dataset.np_pd_dataset import CSRSparseDataset
10 | from ..dataset.np_pd_dataset import NumpyDataset
11 | from ..dataset.np_pd_dataset import PandasDataset
12 | from .base import DummyIterator
13 | from .base import HoldoutIterator
14 | from .base import TrainValidIterator
15 | from .np_iterators import get_numpy_iterator
16 | 
17 | 
18 | NpDataset = Union[CSRSparseDataset, NumpyDataset, PandasDataset]
19 | 
20 | 
21 | def create_validation_iterator(
22 |     train: LAMLDataset,
23 |     valid: Optional[LAMLDataset] = None,
24 |     n_folds: Optional[int] = None,
25 |     cv_iter: Optional[Callable] = None,
26 | ) -> TrainValidIterator:
27 |     """Creates train-validation iterator.
28 | 
29 |     If train is one of common datasets types (``PandasDataset``, ``NumpyDataset``, ``CSRSparseDataset``)
30 |     the :func:`~lightautoml.validation.np_iterators.get_numpy_iterator` will be used.
31 |     Else if validation dataset is defined, the holdout-iterator will be used.
32 |     Else the dummy iterator will be used.
33 | 
34 |     Args:
35 |         train: Dataset to train.
36 |         valid: Optional dataset for validate.
37 |         n_folds: maximum number of folds to iterate. If ``None`` - iterate through all folds.
38 |         cv_iter: Takes dataset as input and return an iterator of indexes of train/valid for train dataset.
39 | 
40 |     Returns:
41 |         New iterator.
42 | 
43 |     """
44 |     if type(train) in [PandasDataset, NumpyDataset, CSRSparseDataset]:
45 |         train = cast(NpDataset, train)
46 |         valid = cast(NpDataset, valid)
47 |         iterator = get_numpy_iterator(train, valid, n_folds, cv_iter)
48 | 
49 |     else:
50 |         if valid is not None:
51 |             iterator = HoldoutIterator(train, valid)
52 |         else:
53 |             iterator = DummyIterator(train)
54 | 
55 |     return iterator
56 | 


--------------------------------------------------------------------------------
/scripts/README.md:
--------------------------------------------------------------------------------
 1 | # Useful scripts
 2 | 
 3 | 1. ```poetry_fix.py``` fixes problem: too long time of poetry lock command. It re-writes a single python version in the ```pyproject.toml```, which helps resolve all dependencies in a short time.
 4 | 
 5 |     ```bash
 6 | 
 7 |     # Set current python version from poetry env
 8 |     # After that you can easily run command: `poetry lock`
 9 |     poetry run python poetry_fix.py -c
10 | 
11 |     # Set all python versions before `git push` or `poetry publish`
12 |     poetry run python poetry_fix.py -f
13 | 
14 |     ```
15 | 
16 |     **Warning**: You must set the default version before publishing the library to PyPI.
17 | 
18 | 2. ```run_tutorials.py``` - execute tutorials in CLI. The execution drops in case of an any error. More information in `help`.
19 | 
20 |     ```bash
21 | 
22 |     # Run all tutorials except those excluded by default.
23 |     poetry run python scripts/run_tutorials.py
24 | 
25 |     # Run tutorials (1, 2)
26 |     poetry run python scripts/run_tutorials -t 1 -t 2
27 | 
28 |     ```
29 | 


--------------------------------------------------------------------------------
/scripts/exp_branch_push.py:
--------------------------------------------------------------------------------
 1 | """Experiment branch filter."""
 2 | 
 3 | import os
 4 | import subprocess
 5 | 
 6 | 
 7 | LAMA_GITHUB_URL = "git@github.com:sb-ai-lab/LightAutoML.git"
 8 | EXPERIMENT_BRANCH_PREFIX = "experiment/"
 9 | 
10 | REMOTE_URL = os.getenv("PRE_COMMIT_REMOTE_URL")
11 | BRANCH_NAME = subprocess.run(["git", "rev-parse", "--abbrev-ref", "HEAD"], stdout=subprocess.PIPE).stdout.decode(
12 |     "utf-8"
13 | )
14 | 
15 | if BRANCH_NAME.startswith(EXPERIMENT_BRANCH_PREFIX) and REMOTE_URL == LAMA_GITHUB_URL:
16 |     raise RuntimeError("Prevent push 'experiment/' branches to LAMA Github")
17 | 


--------------------------------------------------------------------------------
/scripts/experiments/utils.py:
--------------------------------------------------------------------------------
 1 | """Utils for running experiments."""
 2 | 
 3 | import os
 4 | import time
 5 | 
 6 | 
 7 | class Timer:  # noqa: D101
 8 |     @staticmethod
 9 |     def _zero():
10 |         return 0
11 | 
12 |     def __init__(self, clock=time.time, enabled=True):
13 |         self.start = 0
14 |         self.stop = 0
15 |         self._time = clock if enabled else Timer._zero
16 |         self._tick = 0
17 | 
18 |     def __enter__(self):
19 |         self.start = self._tick = self._time()
20 |         return self
21 | 
22 |     def __exit__(self, *args):
23 |         self.stop = self._tick = self._time()
24 | 
25 |     @property
26 |     def tick(self):
27 |         """Make one tick."""
28 |         if self.stop > 0:
29 |             return -1
30 |         now = self._time()
31 |         tick = now - self._tick
32 |         self._tick = now
33 |         return tick
34 | 
35 |     @property
36 |     def duration(self):
37 |         """Get dureation in seconds."""
38 |         if self.stop > 0:
39 |             return self.stop - self.start
40 |         return self._time() - self.start
41 | 
42 | 
43 | def install_lightautoml():
44 |     """Install lightautoml using pip."""
45 |     # os.system("curl -sSL https://install.python-poetry.org | ../../bin/python -vvv -")
46 |     # os.system("/root/.local/bin/poetry build")
47 |     # os.system("ls -la ./dist/")
48 |     os.system("pip install packaging==22.0")
49 |     os.system("python scripts/poetry_fix.py -f")
50 |     os.system("../../bin/pip install .")  # ./dist/*.whl
51 | 
52 | 
53 | #        .pip install --upgrade pip
54 | # poetry config virtualenvs.create false --local
55 | # poetry run python ./scripts/poetry_fix.py -c
56 | # ls -la
57 | # poetry run pip install pillow==9.2.0
58 | # poetry install
59 | # poetry run pip freeze
60 | # poetry run python -c "import sys; print(sys.path)"
61 | 


--------------------------------------------------------------------------------
/scripts/poetry_fix.py:
--------------------------------------------------------------------------------
 1 | """Specify python version."""
 2 | 
 3 | import argparse
 4 | import fileinput
 5 | import re
 6 | import sys
 7 | 
 8 | from pathlib import Path
 9 | from typing import Optional
10 | 
11 | 
12 | PYPROJECT_TOML = Path("pyproject.toml")
13 | ALL_PYTHON_DEPS = ">=3.8"
14 | PYTHON_DEPS = {8: "~3.8.0", 9: "~3.9.0", 10: "~3.10.0", 11: "~3.11.0", 12: "~3.12.0"}
15 | PYTHON_DEPS_PATTERN = '^python = ".*"$'
16 | 
17 | 
18 | def _check_py_version():
19 |     py_version = sys.version_info
20 | 
21 |     if py_version.major != 3:
22 |         raise RuntimeError("Works only with python 3")
23 | 
24 |     if py_version.minor not in PYTHON_DEPS:
25 |         raise RuntimeError(f"Works only with python 3.[{list(PYTHON_DEPS)}]")
26 | 
27 | 
28 | def _set_version(py_version: Optional[int] = None):
29 |     for line in fileinput.input(PYPROJECT_TOML.name, inplace=1):
30 |         if re.search(PYTHON_DEPS_PATTERN, line):
31 |             if py_version is None:
32 |                 version = ALL_PYTHON_DEPS
33 |             else:
34 |                 version = PYTHON_DEPS[py_version]
35 |             line = 'python = "{}"\n'.format(version)
36 | 
37 |         sys.stdout.write(line)
38 | 
39 | 
40 | def main():
41 |     """Cli."""
42 |     _check_py_version()
43 | 
44 |     parser = argparse.ArgumentParser()
45 |     group = parser.add_mutually_exclusive_group()
46 |     group.add_argument(
47 |         "-c",
48 |         "--current",
49 |         action="store_true",
50 |         help="Set current python version in `pyproject.toml`",
51 |     )
52 |     group.add_argument("-f", "--full", action="store_true", help="Set all pythons versions in `pyproject.toml`")
53 | 
54 |     args = parser.parse_args()
55 | 
56 |     if args.current:
57 |         _set_version(sys.version_info.minor)
58 |     elif args.full:
59 |         _set_version(None)
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     main()
64 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [darglint]
 2 | docstring_style = google
 3 | strictness = short
 4 | ignore = DAR401, DAR402
 5 | 
 6 | 
 7 | [flake8]
 8 | max-line-length = 120
 9 | ignore =  D104, D105, D107, E402, E203, W503, W605, E722, E501
10 | docstring-convention = google
11 | per-file-ignores =
12 |     lightautoml/automl/presets/tabular_presets.py:D102
13 |     lightautoml/automl/presets/text_presets.py:D102
14 |     lightautoml/automl/presets/utils.py:D100,D102,D103
15 |     lightautoml/addons/autots/*:D100,D101,D102,D200,D205,D212,D415
16 |     lightautoml/reader/seq.py:D100,D101,D102,D200,D205,D212,D415
17 |     lightautoml/addons/interpretation/*:D100,D101,D102,D200,D205,D212,D415
18 |     lightautoml/addons/utilization/*:D102
19 |     lightautoml/report/report_deco.py:D101,D102,D103,D205,D212,D412,D415
20 |     lightautoml/text/sentence_pooling.py:D102
21 |     lightautoml/utils/*:D101,D102,D103
22 |     lightautoml/addons/hypex/tests/*:D103,D100
23 |     lightautoml/addons/hypex/ABTesting/ab_tester.py:D100,D101,D102,D103
24 |     lightautoml/addons/hypex/utils/tutorial_data_creation.py:D100
25 |     docs/*:D100,D103
26 |     examples/*:D100,D103
27 |     tests/*:D100,D101,D102,D103
28 |     check_docs.py:D100
29 | exclude =
30 |     .git
31 |     __pycache__
32 |     setup.py
33 |     build
34 |     dist
35 |     releases
36 |     .venv
37 |     .tox
38 |     .mypy_cache
39 |     .pytest_cache
40 |     .vscode
41 |     .github
42 | 
43 | 
44 | [rstcheck]
45 | ignore_directives=one,two,three
46 | ignore_roles=src,RFC
47 | ignore_messages=(Duplicate implicit target name|Unknown directive type "autoclass".|No directive entry for "autoclass" in module "docutils.parsers.rst.languages.en".|Unknown directive type "automodule".|Unknown directive type "autofunction".|No directive entry for "autofunction" in module "docutils.parsers.rst.languages.en".|No directive entry for "automodule" in module "docutils.parsers.rst.languages.en".)
48 | ignore_language=python
49 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # we use poetry for our build, but this file seems to be required
 4 | # in order to get GitHub dependencies graph to work
 5 | 
 6 | import setuptools
 7 | 
 8 | 
 9 | if __name__ == "__main__":
10 |     setuptools.setup(name="lightautoml")
11 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/tests/__init__.py


--------------------------------------------------------------------------------
/tests/integration/integration_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from lightautoml.dataset.roles import TargetRole
 4 | from joblib import load
 5 | 
 6 | from sklearn.metrics import log_loss
 7 | from sklearn.metrics import mean_squared_error
 8 | from sklearn.metrics import roc_auc_score
 9 | 
10 | 
11 | def load_and_test_automl(filename, task, score, pred, data, target_name):
12 |     automl = load(filename)
13 | 
14 |     test_pred_joblib = automl.predict(data)
15 | 
16 |     if task.name == "binary":
17 |         score_new = roc_auc_score(data[target_name].values, test_pred_joblib.data[:, 0])
18 |     elif task.name == "multiclass":
19 |         score_new = log_loss(data[target_name].map(automl.reader.targets_mapping), test_pred_joblib.data)
20 |     elif task.name == "reg":
21 |         score_new = mean_squared_error(data[target_name].values, test_pred_joblib.data[:, 0])
22 | 
23 |     np.testing.assert_almost_equal(score, score_new, decimal=3)
24 |     np.testing.assert_allclose(pred.data[:, 0], test_pred_joblib.data[:, 0])
25 | 
26 | 
27 | def get_target_name(roles):
28 |     for key, value in roles.items():
29 |         if (key == "target") or isinstance(key, TargetRole):
30 |             return value
31 | 


--------------------------------------------------------------------------------
/tests/integration/test_demo10.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import numpy as np
 5 | 
 6 | from sklearn.metrics import log_loss
 7 | 
 8 | from lightautoml.automl.base import AutoML
 9 | from lightautoml.automl.blend import WeightedBlender
10 | from lightautoml.ml_algo.boost_cb import BoostCB
11 | from lightautoml.ml_algo.linear_sklearn import LinearLBFGS
12 | from lightautoml.ml_algo.tuning.optuna import OptunaTuner
13 | from lightautoml.pipelines.features.lgb_pipeline import LGBAdvancedPipeline
14 | from lightautoml.pipelines.features.lgb_pipeline import LGBSimpleFeatures
15 | from lightautoml.pipelines.features.linear_pipeline import LinearFeatures
16 | from lightautoml.pipelines.ml.base import MLPipeline
17 | from lightautoml.pipelines.selection.importance_based import (
18 |     ImportanceCutoffSelector,
19 |     ModelBasedImportanceEstimator,
20 | )
21 | from lightautoml.reader.base import PandasToPandasReader
22 | from lightautoml.utils.timer import PipelineTimer
23 | 
24 | # demo of timer, blender and multiclass
25 | 
26 | np.random.seed(42)
27 | 
28 | 
29 | def test_some_pipeline(sampled_app_train_test, multiclass_task):
30 | 
31 |     train, test = sampled_app_train_test
32 | 
33 |     timer = PipelineTimer(600, mode=2)
34 | 
35 |     timer_gbm = timer.get_task_timer("gbm")
36 |     feat_sel_0 = LGBSimpleFeatures()
37 |     mod_sel_0 = BoostCB(timer=timer_gbm)
38 |     imp_sel_0 = ModelBasedImportanceEstimator()
39 |     selector_0 = ImportanceCutoffSelector(
40 |         feat_sel_0,
41 |         mod_sel_0,
42 |         imp_sel_0,
43 |         cutoff=0,
44 |     )
45 | 
46 |     feats_gbm_0 = LGBAdvancedPipeline(top_intersections=4, feats_imp=imp_sel_0)
47 |     timer_gbm_0 = timer.get_task_timer("gbm")
48 |     timer_gbm_1 = timer.get_task_timer("gbm")
49 | 
50 |     gbm_0 = BoostCB(timer=timer_gbm_0, default_params={"devices": "0"})
51 |     gbm_1 = BoostCB(timer=timer_gbm_1, default_params={"devices": "0"})
52 | 
53 |     tuner_0 = OptunaTuner(n_trials=10, timeout=10, fit_on_holdout=True)
54 |     gbm_lvl0 = MLPipeline(
55 |         [(gbm_0, tuner_0), gbm_1],
56 |         pre_selection=selector_0,
57 |         features_pipeline=feats_gbm_0,
58 |         post_selection=None,
59 |     )
60 | 
61 |     feats_reg_0 = LinearFeatures(output_categories=True, sparse_ohe="auto")
62 | 
63 |     timer_reg = timer.get_task_timer("reg")
64 |     reg_0 = LinearLBFGS(timer=timer_reg)
65 | 
66 |     reg_lvl0 = MLPipeline([reg_0], pre_selection=None, features_pipeline=feats_reg_0, post_selection=None)
67 | 
68 |     reader = PandasToPandasReader(
69 |         multiclass_task,
70 |         samples=None,
71 |         max_nan_rate=1,
72 |         max_constant_rate=1,
73 |         advanced_roles=True,
74 |         drop_score_co=-1,
75 |         n_jobs=1,
76 |     )
77 |     blender = WeightedBlender()
78 | 
79 |     automl = AutoML(
80 |         reader=reader,
81 |         levels=[[gbm_lvl0, reg_lvl0]],
82 |         timer=timer,
83 |         blender=blender,
84 |         skip_conn=False,
85 |     )
86 | 
87 |     oof_pred = automl.fit_predict(train, roles={"target": "TARGET"})
88 |     test_pred = automl.predict(test)
89 | 
90 |     not_nan = np.any(~np.isnan(oof_pred.data), axis=1)
91 | 
92 |     oof_score = log_loss(train["TARGET"].values[not_nan], oof_pred.data[not_nan])
93 |     assert oof_score < 1
94 | 
95 |     test_score = log_loss(test["TARGET"].values, test_pred.data)
96 |     assert test_score < 1
97 | 


--------------------------------------------------------------------------------
/tests/integration/test_demo11.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import shutil
 5 | 
 6 | import numpy as np
 7 | 
 8 | from sklearn.metrics import mean_squared_error
 9 | 
10 | from lightautoml.automl.presets.text_presets import TabularNLPAutoML
11 | 
12 | 
13 | np.random.seed(42)
14 | 
15 | 
16 | def test_tabularnlp(avito1k_train_test, avito1k_roles, regression_task):
17 |     train, test = avito1k_train_test
18 | 
19 |     roles = avito1k_roles
20 | 
21 |     task = regression_task
22 | 
23 |     automl = TabularNLPAutoML(task=task, timeout=600)
24 |     oof_pred = automl.fit_predict(train, roles=roles)
25 |     test_pred = automl.predict(test)
26 |     not_nan = np.any(~np.isnan(oof_pred.data), axis=1)
27 |     target = roles["target"]
28 | 
29 |     oof_score = mean_squared_error(train[target].values[not_nan], oof_pred.data[not_nan][:, 0])
30 | 
31 |     assert oof_score < 0.2
32 | 
33 |     test_score = mean_squared_error(test[target].values, test_pred.data[:, 0])
34 |     assert test_score < 0.2
35 | 
36 |     shutil.rmtree("./models", ignore_errors=True)
37 | 


--------------------------------------------------------------------------------
/tests/integration/test_demo12.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import os
 5 | import numpy as np
 6 | import tempfile
 7 | 
 8 | from sklearn.metrics import roc_auc_score
 9 | 
10 | from lightautoml.automl.presets.tabular_presets import TabularAutoML
11 | from lightautoml.validation.np_iterators import TimeSeriesIterator
12 | 
13 | 
14 | np.random.seed(42)
15 | 
16 | 
17 | def test_tabular_with_dates(sampled_app_train_test, binary_task):
18 | 
19 |     train, test = sampled_app_train_test
20 | 
21 |     # create time series iterator that is passed as cv_func
22 |     cv_iter = TimeSeriesIterator(train["EMP_DATE"].astype("datetime64[ns]"), n_splits=5, sorted_kfold=False)
23 | 
24 |     # train dataset may be passed as dict of np.ndarray
25 |     train = {
26 |         "data": train[["AMT_CREDIT", "AMT_ANNUITY"]].values,
27 |         "target": train["TARGET"].values,
28 |     }
29 | 
30 |     task = binary_task
31 | 
32 |     automl = TabularAutoML(
33 |         task=task,
34 |         timeout=200,
35 |     )
36 |     oof_pred = automl.fit_predict(train, train_features=["AMT_CREDIT", "AMT_ANNUITY"], cv_iter=cv_iter)
37 | 
38 |     with tempfile.TemporaryDirectory() as tmpdirname:
39 |         # prediction can be made on file by
40 |         tmp_file = os.path.join(tmpdirname, "temp_test_data.csv")
41 |         test.to_csv(tmp_file, index=False)
42 |         test_pred = automl.predict(tmp_file, batch_size=100, n_jobs=4)
43 | 
44 |     oof_prediction = oof_pred.data[:, 0]
45 |     not_empty = np.logical_not(np.isnan(oof_prediction))
46 | 
47 |     oof_score = roc_auc_score(train["target"][not_empty], oof_prediction[not_empty])
48 |     assert oof_score > 0.52
49 | 
50 |     test_score = roc_auc_score(test["TARGET"].values, test_pred.data[:, 0])
51 |     assert test_score > 0.51
52 | 


--------------------------------------------------------------------------------
/tests/integration/test_demo13.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from sklearn.metrics import mean_absolute_error
 4 | 
 5 | from lightautoml.addons.autots.base import AutoTS
 6 | from lightautoml.tasks import Task
 7 | 
 8 | 
 9 | np.random.seed(42)
10 | 
11 | 
12 | def test_autots(ai92_value_77_train_test):
13 | 
14 |     train, test, horizon = ai92_value_77_train_test
15 |     roles = {"target": "value", "datetime": "date"}
16 | 
17 |     seq_params = {
18 |         "seq0": {
19 |             "case": "next_values",
20 |             "params": {"n_target": horizon, "history": np.maximum(7, horizon), "step": 1, "test_last": True},
21 |         },
22 |     }
23 | 
24 |     # True (then set default values) / False; int, list or np.array
25 |     # default: lag_features=30, diff_features=7
26 |     transformers_params = {
27 |         "lag_features": [0, 1, 2, 3, 5, 10],
28 |         "lag_time_features": [0, 1, 2],
29 |         "diff_features": [0, 1, 3, 4],
30 |     }
31 | 
32 |     task = Task("multi:reg", greater_is_better=False, metric="mae", loss="mae")
33 | 
34 |     reader_params = {
35 |         "seq_params": seq_params,
36 |         "transformers_params": transformers_params,
37 |     }
38 |     automl = AutoTS(
39 |         task,
40 |         reader_params=reader_params,
41 |         time_series_trend_params={
42 |             "trend": False,
43 |         },
44 |     )
45 |     automl.fit_predict(train, roles, verbose=4)
46 |     forecast, _ = automl.predict(train)
47 | 
48 |     test_score = mean_absolute_error(test[roles["target"]].values, forecast)
49 |     assert test_score < 22e4  # TODO: 2e5
50 | 


--------------------------------------------------------------------------------
/tests/integration/test_demo4.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from sklearn.metrics import roc_auc_score
 4 | 
 5 | from lightautoml.automl.base import AutoML
 6 | from lightautoml.ml_algo.boost_lgbm import BoostLGBM
 7 | from lightautoml.pipelines.features.lgb_pipeline import LGBSimpleFeatures
 8 | from lightautoml.pipelines.ml.base import MLPipeline
 9 | from lightautoml.reader.base import PandasToPandasReader
10 | from lightautoml.tasks import Task
11 | 
12 | np.random.seed(42)
13 | 
14 | 
15 | def test_different_task_params(sampled_app_train_test):
16 | 
17 |     train_data, test_data = sampled_app_train_test
18 | 
19 |     for task_params, target in zip(
20 |         [
21 |             {"name": "binary"},
22 |             {"name": "binary", "metric": roc_auc_score},
23 |             {"name": "reg", "loss": "mse", "metric": "r2"},
24 |             {"name": "reg", "loss": "rmsle", "metric": "rmsle"},
25 |             {
26 |                 "name": "reg",
27 |                 "loss": "quantile",
28 |                 "loss_params": {"q": 0.9},
29 |                 "metric": "quantile",
30 |                 "metric_params": {"q": 0.9},
31 |             },
32 |         ],
33 |         ["TARGET", "TARGET", "AMT_CREDIT", "AMT_CREDIT", "AMT_CREDIT"],
34 |     ):
35 | 
36 |         task = Task(**task_params)
37 | 
38 |         reader = PandasToPandasReader(task, cv=5, random_state=1)
39 | 
40 |         # pipeline 1 level parts
41 |         pipe = LGBSimpleFeatures()
42 | 
43 |         model2 = BoostLGBM(
44 |             default_params={
45 |                 "learning_rate": 0.025,
46 |                 "num_leaves": 64,
47 |                 "seed": 2,
48 |                 "num_threads": 5,
49 |             }
50 |         )
51 | 
52 |         pipeline_lvl1 = MLPipeline(
53 |             [model2],
54 |             pre_selection=None,  # selector,
55 |             features_pipeline=pipe,
56 |             post_selection=None,
57 |         )
58 | 
59 |         automl = AutoML(
60 |             reader,
61 |             [
62 |                 [pipeline_lvl1],
63 |             ],
64 |             skip_conn=False,
65 |             # debug=True,
66 |         )
67 | 
68 |         oof_pred = automl.fit_predict(train_data, roles={"target": target}, verbose=1)
69 |     # assert for last oof score
70 |     assert task.metric_func(train_data[target].values, oof_pred.data[:, 0]) < 10 ** 5
71 |     assert task.metric_func(test_data[target].values, automl.predict(test_data).data[:, 0]) < 10 ** 5
72 | 


--------------------------------------------------------------------------------
/tests/integration/test_demo6.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | 
 5 | """AutoML with nested CV usage."""
 6 | 
 7 | import numpy as np
 8 | 
 9 | from sklearn.metrics import roc_auc_score
10 | 
11 | from lightautoml.automl.presets.tabular_presets import TabularAutoML
12 | from lightautoml.dataset.roles import DatetimeRole
13 | 
14 | 
15 | np.random.seed(42)
16 | 
17 | 
18 | def test_tabularautoml_2lvl(sampled_app_train_test, binary_task):
19 | 
20 |     train, test = sampled_app_train_test
21 | 
22 |     roles = {
23 |         "target": "TARGET",
24 |         DatetimeRole(base_date=True, seasonality=(), base_feats=False): "report_dt",
25 |     }
26 | 
27 |     task = binary_task
28 | 
29 |     automl = TabularAutoML(
30 |         task=task,
31 |         timeout=600,
32 |         general_params={
33 |             "use_algos": [
34 |                 [
35 |                     "linear_l2",
36 |                     "lgb",
37 |                 ],
38 |                 ["linear_l2", "lgb"],
39 |             ],
40 |             "nested_cv": True,
41 |             "skip_conn": True,
42 |         },
43 |         nested_cv_params={"cv": 5, "n_folds": None},
44 |         debug=True,
45 |     )
46 | 
47 |     oof_pred = automl.fit_predict(train, roles=roles, verbose=5)
48 |     test_pred = automl.predict(test)
49 | 
50 |     not_nan = np.any(~np.isnan(oof_pred.data), axis=1)
51 | 
52 |     oof_score = roc_auc_score(train[roles["target"]].values[not_nan], oof_pred.data[not_nan][:, 0])
53 |     assert oof_score > 0.75
54 | 
55 |     test_score = roc_auc_score(test[roles["target"]].values, test_pred.data[:, 0])
56 |     assert test_score > 0.7
57 | 


--------------------------------------------------------------------------------
/tests/integration/test_demo7.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import numpy as np
 5 | 
 6 | from sklearn.metrics import roc_auc_score
 7 | 
 8 | from lightautoml.automl.presets.tabular_presets import TabularAutoML
 9 | from lightautoml.dataset.roles import DatetimeRole
10 | 
11 | 
12 | np.random.seed(42)
13 | 
14 | 
15 | def test_classic_tabularautoml(sampled_app_train_test, binary_task):
16 | 
17 |     train, test = sampled_app_train_test
18 | 
19 |     roles = {
20 |         "target": "TARGET",
21 |         DatetimeRole(base_date=True, seasonality=(), base_feats=False): "report_dt",
22 |     }
23 | 
24 |     task = binary_task
25 | 
26 |     automl = TabularAutoML(
27 |         task=task,
28 |         timeout=3600,
29 |         debug=True,
30 |     )
31 |     oof_pred = automl.fit_predict(train, roles=roles, verbose=5)
32 |     test_pred = automl.predict(test)
33 | 
34 |     not_nan = np.any(~np.isnan(oof_pred.data), axis=1)
35 | 
36 |     oof_score = roc_auc_score(train[roles["target"]].values[not_nan], oof_pred.data[not_nan][:, 0])
37 |     assert oof_score > 0.7
38 | 
39 |     test_score = roc_auc_score(test[roles["target"]].values, test_pred.data[:, 0])
40 |     assert test_score > 0.7
41 | 


--------------------------------------------------------------------------------
/tests/integration/test_demo8.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import numpy as np
 5 | 
 6 | from sklearn.metrics import log_loss
 7 | 
 8 | from lightautoml.automl.base import AutoML
 9 | from lightautoml.automl.blend import WeightedBlender
10 | from lightautoml.ml_algo.boost_lgbm import BoostLGBM
11 | from lightautoml.ml_algo.linear_sklearn import LinearLBFGS
12 | from lightautoml.ml_algo.tuning.optuna import OptunaTuner
13 | from lightautoml.pipelines.features.lgb_pipeline import LGBAdvancedPipeline
14 | from lightautoml.pipelines.features.lgb_pipeline import LGBSimpleFeatures
15 | from lightautoml.pipelines.features.linear_pipeline import LinearFeatures
16 | from lightautoml.pipelines.ml.base import MLPipeline
17 | from lightautoml.pipelines.selection.importance_based import ImportanceCutoffSelector
18 | from lightautoml.pipelines.selection.importance_based import (
19 |     ModelBasedImportanceEstimator,
20 | )
21 | from lightautoml.reader.base import PandasToPandasReader
22 | from lightautoml.utils.timer import PipelineTimer
23 | 
24 | 
25 | def test_lgbm_linear_pipeline(sampled_app_train_test, multiclass_task):
26 | 
27 |     # demo of timer, blender and multiclass
28 |     np.random.seed(42)
29 |     train, test = sampled_app_train_test
30 |     timer = PipelineTimer(600, mode=2)
31 | 
32 |     timer_gbm = timer.get_task_timer("gbm")
33 |     feat_sel_0 = LGBSimpleFeatures()
34 |     mod_sel_0 = BoostLGBM(timer=timer_gbm)
35 |     imp_sel_0 = ModelBasedImportanceEstimator()
36 |     selector_0 = ImportanceCutoffSelector(
37 |         feat_sel_0,
38 |         mod_sel_0,
39 |         imp_sel_0,
40 |         cutoff=0,
41 |     )
42 | 
43 |     feats_gbm_0 = LGBAdvancedPipeline(top_intersections=4, output_categories=True, feats_imp=imp_sel_0)
44 |     timer_gbm_0 = timer.get_task_timer("gbm")
45 |     timer_gbm_1 = timer.get_task_timer("gbm")
46 | 
47 |     gbm_0 = BoostLGBM(timer=timer_gbm_0)
48 |     gbm_1 = BoostLGBM(timer=timer_gbm_1)
49 | 
50 |     tuner_0 = OptunaTuner(n_trials=10, timeout=10, fit_on_holdout=True)
51 |     gbm_lvl0 = MLPipeline(
52 |         [(gbm_0, tuner_0), gbm_1],
53 |         pre_selection=selector_0,
54 |         features_pipeline=feats_gbm_0,
55 |         post_selection=None,
56 |     )
57 | 
58 |     feats_reg_0 = LinearFeatures(output_categories=True, sparse_ohe="auto")
59 | 
60 |     timer_reg = timer.get_task_timer("reg")
61 |     reg_0 = LinearLBFGS(timer=timer_reg)
62 | 
63 |     reg_lvl0 = MLPipeline([reg_0], pre_selection=None, features_pipeline=feats_reg_0, post_selection=None)
64 | 
65 |     reader = PandasToPandasReader(
66 |         multiclass_task,
67 |         samples=None,
68 |         max_nan_rate=1,
69 |         max_constant_rate=1,
70 |         advanced_roles=True,
71 |         drop_score_co=-1,
72 |         n_jobs=1,
73 |     )
74 | 
75 |     blender = WeightedBlender()
76 | 
77 |     automl = AutoML(
78 |         reader=reader,
79 |         levels=[[gbm_lvl0, reg_lvl0]],
80 |         timer=timer,
81 |         blender=blender,
82 |         debug=True,
83 |         skip_conn=False,
84 |     )
85 |     oof_pred = automl.fit_predict(train, roles={"target": "TARGET"}, verbose=5)
86 |     test_pred = automl.predict(test)
87 | 
88 |     not_nan = np.any(~np.isnan(oof_pred.data), axis=1)
89 | 
90 |     oof_score = log_loss(train["TARGET"].values[not_nan], oof_pred.data[not_nan, :])
91 |     assert oof_score < 1
92 | 
93 |     test_score = log_loss(test["TARGET"].values, test_pred.data)
94 |     assert test_score < 1
95 | 


--------------------------------------------------------------------------------
/tests/unit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/tests/unit/__init__.py


--------------------------------------------------------------------------------
/tests/unit/test_addons/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/tests/unit/test_addons/__init__.py


--------------------------------------------------------------------------------
/tests/unit/test_automl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/tests/unit/test_automl/__init__.py


--------------------------------------------------------------------------------
/tests/unit/test_automl/test_presets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/tests/unit/test_automl/test_presets/__init__.py


--------------------------------------------------------------------------------
/tests/unit/test_automl/test_presets/presets_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle
 3 | from pytest import approx
 4 | import tempfile
 5 | 
 6 | from sklearn.metrics import log_loss
 7 | from sklearn.metrics import mean_squared_error
 8 | from sklearn.metrics import roc_auc_score
 9 | 
10 | from lightautoml.dataset.roles import TargetRole
11 | 
12 | 
13 | def check_pickling(automl, ho_score, task, test_data, target_name):
14 |     with tempfile.TemporaryDirectory() as tmpdirname:
15 |         filename = os.path.join(tmpdirname, "automl.pickle")
16 |         with open(filename, "wb") as f:
17 |             pickle.dump(automl, f)
18 | 
19 |         with open(filename, "rb") as f:
20 |             automl = pickle.load(f)
21 | 
22 |         test_pred = automl.predict(test_data)
23 | 
24 |         if task.name == "binary":
25 |             ho_score_new = roc_auc_score(test_data[target_name].values, test_pred.data[:, 0])
26 |         elif task.name == "multiclass":
27 |             ho_score_new = log_loss(test_data[target_name].map(automl.reader.target_mapping), test_pred.data)
28 |         elif task.name == "reg":
29 |             ho_score_new = mean_squared_error(test_data[target_name].values, test_pred.data[:, 0])
30 | 
31 |         assert ho_score == approx(ho_score_new, rel=1e-3)
32 | 
33 | 
34 | def get_target_name(roles):
35 |     for key, value in roles.items():
36 |         if (key == "target") or isinstance(key, TargetRole):
37 |             return value
38 | 


--------------------------------------------------------------------------------
/tests/unit/test_automl/test_presets/test_tabularautoml.py:
--------------------------------------------------------------------------------
 1 | from sklearn.metrics import roc_auc_score
 2 | 
 3 | from lightautoml.automl.presets.tabular_presets import TabularAutoML
 4 | from tests.unit.test_automl.test_presets.presets_utils import check_pickling
 5 | from tests.unit.test_automl.test_presets.presets_utils import get_target_name
 6 | 
 7 | 
 8 | class TestTabularAutoML:
 9 |     def test_fit_predict(self, sampled_app_train_test, sampled_app_roles, binary_task):
10 |         # load and prepare data
11 |         train, test = sampled_app_train_test
12 | 
13 |         # run automl
14 |         automl = TabularAutoML(task=binary_task)
15 |         oof_predictions = automl.fit_predict(train, roles=sampled_app_roles, verbose=10)
16 |         ho_predictions = automl.predict(test)
17 | 
18 |         # calculate scores
19 |         target_name = get_target_name(sampled_app_roles)
20 |         oof_score = roc_auc_score(train[target_name].values, oof_predictions.data[:, 0])
21 |         ho_score = roc_auc_score(test[target_name].values, ho_predictions.data[:, 0])
22 | 
23 |         # checks
24 |         assert oof_score > 0.73
25 |         assert ho_score > 0.72
26 | 
27 |         check_pickling(automl, ho_score, binary_task, test, target_name)
28 | 


--------------------------------------------------------------------------------
/tests/unit/test_automl/test_presets/test_tabularautoml_nn.py:
--------------------------------------------------------------------------------
 1 | from sklearn.metrics import roc_auc_score
 2 | 
 3 | from lightautoml.automl.presets.tabular_presets import TabularAutoML
 4 | from tests.unit.test_automl.test_presets.presets_utils import check_pickling
 5 | from tests.unit.test_automl.test_presets.presets_utils import get_target_name
 6 | 
 7 | 
 8 | class TestTabularAutoML:
 9 |     """Neural network test based on out-of-fold and test scores."""
10 | 
11 |     def test_fit_predict(self, sampled_app_train_test, sampled_app_roles, binary_task):
12 |         """Test function."""
13 |         # load and prepare data
14 |         train, test = sampled_app_train_test
15 | 
16 |         # run automl
17 |         automl = TabularAutoML(
18 |             debug=True,
19 |             task=binary_task,
20 |             general_params={"use_algos": [["mlp"]]},
21 |             nn_params={"n_epochs": 10, "bs": 128, "num_workers": 0, "path_to_save": None, "freeze_defaults": True},
22 |         )
23 |         oof_predictions = automl.fit_predict(train, roles=sampled_app_roles, verbose=10)
24 |         ho_predictions = automl.predict(test)
25 | 
26 |         # calculate scores
27 |         target_name = get_target_name(sampled_app_roles)
28 |         oof_score = roc_auc_score(train[target_name].values, oof_predictions.data[:, 0])
29 |         ho_score = roc_auc_score(test[target_name].values, ho_predictions.data[:, 0])
30 | 
31 |         # checks
32 |         assert oof_score > 0.58
33 |         assert ho_score > 0.58
34 | 
35 |         check_pickling(automl, ho_score, binary_task, test, target_name)
36 | 


--------------------------------------------------------------------------------
/tests/unit/test_automl/test_presets/test_tabularautoml_xgb.py:
--------------------------------------------------------------------------------
 1 | from sklearn.metrics import roc_auc_score
 2 | 
 3 | from lightautoml.automl.presets.tabular_presets import TabularAutoML
 4 | from tests.unit.test_automl.test_presets.presets_utils import check_pickling
 5 | from tests.unit.test_automl.test_presets.presets_utils import get_target_name
 6 | 
 7 | 
 8 | class TestTabularAutoMLXGB:
 9 |     def test_fit_predict(self, sampled_app_train_test, sampled_app_roles, binary_task):
10 |         # load and prepare data
11 |         train, test = sampled_app_train_test
12 | 
13 |         # run automl
14 |         automl = TabularAutoML(task=binary_task, general_params={"use_algos": [["xgb"]]})
15 |         oof_predictions = automl.fit_predict(train, roles=sampled_app_roles, verbose=10)
16 |         ho_predictions = automl.predict(test)
17 | 
18 |         # calculate scores
19 |         target_name = get_target_name(sampled_app_roles)
20 |         oof_score = roc_auc_score(train[target_name].values, oof_predictions.data[:, 0])
21 |         ho_score = roc_auc_score(test[target_name].values, ho_predictions.data[:, 0])
22 | 
23 |         # checks
24 |         assert oof_score > 0.69
25 |         assert ho_score > 0.69
26 | 
27 |         check_pickling(automl, ho_score, binary_task, test, target_name)
28 | 


--------------------------------------------------------------------------------
/tests/unit/test_automl/test_presets/test_tabularnlpautoml.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from sklearn.metrics import mean_squared_error
 4 | 
 5 | from lightautoml.automl.presets.text_presets import TabularNLPAutoML
 6 | from tests.unit.test_automl.test_presets.presets_utils import check_pickling
 7 | from tests.unit.test_automl.test_presets.presets_utils import get_target_name
 8 | 
 9 | 
10 | class TestTabularNLPAutoML:
11 |     def test_fit_predict(self, avito1k_train_test, avito1k_roles, regression_task):
12 |         # load and prepare data
13 |         train, test = avito1k_train_test
14 | 
15 |         # run automl
16 |         automl = TabularNLPAutoML(task=regression_task, timeout=600)
17 |         oof_pred = automl.fit_predict(train, roles=avito1k_roles, verbose=10)
18 |         test_pred = automl.predict(test)
19 |         not_nan = np.any(~np.isnan(oof_pred.data), axis=1)
20 | 
21 |         target_name = get_target_name(avito1k_roles)
22 |         oof_score = mean_squared_error(train[target_name].values[not_nan], oof_pred.data[not_nan][:, 0])
23 |         ho_score = mean_squared_error(test[target_name].values, test_pred.data[:, 0])
24 | 
25 |         # checks
26 |         assert oof_score < 0.7
27 |         assert ho_score < 0.7
28 | 
29 |         check_pickling(automl, ho_score, regression_task, test, target_name)
30 | 


--------------------------------------------------------------------------------
/tests/unit/test_automl/test_presets/test_tabularutilizedautoml.py:
--------------------------------------------------------------------------------
 1 | from sklearn.metrics import roc_auc_score
 2 | 
 3 | from lightautoml.automl.presets.tabular_presets import TabularAutoML
 4 | from tests.unit.test_automl.test_presets.presets_utils import check_pickling
 5 | from tests.unit.test_automl.test_presets.presets_utils import get_target_name
 6 | 
 7 | 
 8 | class TabularUtilizedAutoML:
 9 |     def test_fit_predict(self, sampled_app_train_test, sampled_app_roles, binary_task):
10 |         # load and prepare data
11 |         train, test = sampled_app_train_test
12 | 
13 |         # run automl
14 |         automl = TabularAutoML(task=binary_task)
15 |         oof_predictions = automl.fit_predict(train, roles=sampled_app_roles, verbose=10)
16 |         ho_predictions = automl.predict(test)
17 | 
18 |         # calculate scores
19 |         target_name = get_target_name(sampled_app_roles)
20 |         oof_score = roc_auc_score(train[target_name].values, oof_predictions.data[:, 0])
21 |         ho_score = roc_auc_score(test[target_name].values, ho_predictions.data[:, 0])
22 | 
23 |         # checks
24 |         assert oof_score > 0.73
25 |         assert ho_score > 0.72
26 | 
27 |         check_pickling(automl, ho_score, binary_task, test, target_name)
28 | 


--------------------------------------------------------------------------------
/tests/unit/test_automl/test_presets/test_uplift.py:
--------------------------------------------------------------------------------
 1 | from sklearn.metrics import roc_auc_score
 2 | 
 3 | # from tests.unit.test_automl.test_presets.presets_utils import check_pickling
 4 | 
 5 | import copy
 6 | from lightautoml.addons.uplift.base import AutoUplift
 7 | from lightautoml.addons.uplift.metrics import (
 8 |     calculate_min_max_uplift_auc,
 9 |     calculate_uplift_auc,
10 | )
11 | 
12 | 
13 | class TestAutoUpliftPreset:
14 |     def test_fit_predict(self, uplift_data_train_test, sampled_app_roles, binary_task):
15 |         # load and prepare data
16 |         train, test, test_target, test_treatment = uplift_data_train_test
17 | 
18 |         # run automl
19 |         autouplift = AutoUplift(
20 |             binary_task,
21 |             metric="adj_qini",
22 |             has_report=True,
23 |             test_size=0.2,
24 |             timeout=200,
25 |             cpu_limit=1,
26 |             # gpu_ids=["0"]
27 |             # timeout_metalearner=5
28 |         )
29 | 
30 |         uplift_data_roles = copy.deepcopy(sampled_app_roles)
31 |         uplift_data_roles["treatment"] = "CODE_GENDER"
32 | 
33 |         autouplift.fit(train, uplift_data_roles, verbose=1)
34 | 
35 |         best_metalearner = autouplift.create_best_metalearner(
36 |             update_metalearner_params={"timeout": None}, update_baselearner_params={"timeout": 30}
37 |         )
38 |         best_metalearner.fit(train, uplift_data_roles)
39 |         _ = best_metalearner.predict(test)
40 | 
41 |         uplift_pred, treatment_pred, control_pred = best_metalearner.predict(test)
42 |         uplift_pred = uplift_pred.ravel()
43 | 
44 |         # calculate scores
45 |         roc_auc_treatment = roc_auc_score(test_target[test_treatment == 1], treatment_pred[test_treatment == 1])
46 |         roc_auc_control = roc_auc_score(test_target[test_treatment == 0], control_pred[test_treatment == 0])
47 | 
48 |         uplift_auc_algo = calculate_uplift_auc(test_target, uplift_pred, test_treatment, normed=False)
49 |         uplift_auc_algo_normed = calculate_uplift_auc(test_target, uplift_pred, test_treatment, normed=True)
50 |         auc_base, auc_perfect = calculate_min_max_uplift_auc(test_target, test_treatment)
51 | 
52 |         print("--- Check scores ---")
53 |         print('OOF scores "ROC_AUC":')
54 |         print("\tTreatment = {:.5f}".format(roc_auc_treatment))
55 |         print("\tControl   = {:.5f}".format(roc_auc_control))
56 |         print('Uplift score of test group (default="adj_qini"):')
57 |         print("\tBaseline      = {:.5f}".format(auc_base))
58 |         print("\tAlgo (Normed) = {:.5f} ({:.5f})".format(uplift_auc_algo, uplift_auc_algo_normed))
59 |         print("\tPerfect       = {:.5f}".format(auc_perfect))
60 | 
61 |         # Uplift score of test group (default="adj_qini"):
62 |         #         Baseline      = 0.01340
63 |         #         Algo (Normed) = 0.03012 (0.20648)
64 |         #         Perfect       = 0.09438
65 | 
66 |         # checks
67 |         assert roc_auc_treatment > 0.68  # 0.69535
68 |         assert roc_auc_control > 0.71  # 0.73022
69 | 
70 |         # check_pickling(autouplift, ho_score, binary_task, test, target_name)
71 | 


--------------------------------------------------------------------------------
/tests/unit/test_automl/test_presets/test_whiteboxpreset.py:
--------------------------------------------------------------------------------
 1 | from sklearn.metrics import roc_auc_score
 2 | 
 3 | from lightautoml.automl.presets.whitebox_presets import WhiteBoxPreset
 4 | from tests.unit.test_automl.test_presets.presets_utils import check_pickling
 5 | from tests.unit.test_automl.test_presets.presets_utils import get_target_name
 6 | 
 7 | 
 8 | class TestWhiteBoxPreset:
 9 |     def test_fit_predict(self, jobs_train_test, jobs_roles, binary_task):
10 |         # load and prepare data
11 |         train, test = jobs_train_test
12 | 
13 |         # run automl
14 |         automl = WhiteBoxPreset(binary_task)
15 |         oof_predictions = automl.fit_predict(train.reset_index(drop=True), roles=jobs_roles, verbose=10)
16 |         ho_predictions = automl.predict(test)
17 | 
18 |         # calculate scores
19 |         target_name = get_target_name(jobs_roles)
20 |         oof_score = roc_auc_score(train[target_name].values, oof_predictions.data[:, 0])
21 |         ho_score = roc_auc_score(test[target_name].values, ho_predictions.data[:, 0])
22 | 
23 |         # checks
24 |         assert oof_score > 0.75
25 |         assert ho_score > 0.75
26 | 
27 |         check_pickling(automl, ho_score, binary_task, test, target_name)
28 | 


--------------------------------------------------------------------------------
/tests/unit/test_dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/tests/unit/test_dataset/__init__.py


--------------------------------------------------------------------------------
/tests/unit/test_image/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/tests/unit/test_image/__init__.py


--------------------------------------------------------------------------------
/tests/unit/test_ml_algo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/tests/unit/test_ml_algo/__init__.py


--------------------------------------------------------------------------------
/tests/unit/test_ml_algo/test_optimization/optuna/test_optuna_tuner.py:
--------------------------------------------------------------------------------
 1 | from unittest import mock
 2 | 
 3 | import pytest
 4 | 
 5 | from lightautoml.ml_algo.boost_lgbm import BoostLGBM
 6 | from lightautoml.ml_algo.tuning.base import Normal
 7 | from lightautoml.ml_algo.tuning.base import Uniform
 8 | from lightautoml.ml_algo.tuning.optuna import OptunaTuner
 9 | 
10 | 
11 | # from lightautoml.dataset.np_pd_dataset import PandasDataset
12 | # from lightautoml.dataset.utils import roles_parser
13 | # from lightautoml.pipelines.features.lgb_pipeline import LGBSimpleFeatures
14 | # from lightautoml.validation.np_iterators import FoldsIterator
15 | 
16 | 
17 | # @pytest.mark.parametrize(
18 | #     "sampled_app_train_test",
19 | #     [
20 | #         (1000),
21 | #     ],
22 | #     indirect=["sampled_app_train_test"],
23 | # )
24 | # def test_params_values_ranges(
25 | #     sampled_app_train_test,
26 | #     sampled_app_roles,
27 | #     binary_task,
28 | # ):
29 | 
30 | #     train, _ = sampled_app_train_test
31 | 
32 | #     features_pipeline = LGBSimpleFeatures()
33 | #     iterator = FoldsIterator(
34 | #         PandasDataset(
35 | #             data=train,
36 | #             roles=roles_parser(sampled_app_roles),
37 | #             task=binary_task,
38 | #         )
39 | #     )
40 | 
41 | #     iterator = iterator.apply_feature_pipeline(features_pipeline)
42 | 
43 | #     model = BoostLGBM(
44 | #         default_params={"num_trees": 1, "random_state": 42},
45 | #         freeze_defaults=True,
46 | #         optimization_search_space={
47 | #             "feature_fraction": SearchSpace(Distribution.UNIFORM, low=0.5, high=1.0),
48 | #             "min_sum_hessian_in_leaf": SearchSpace(Distribution.CHOICE, choices=[0.5, 0.8]),
49 | #         },
50 | #     )
51 | 
52 | #     params_tuner = OptunaTuner(n_trials=10, timeout=300)
53 | #     params_tuner.fit(
54 | #         ml_algo=model,
55 | #         train_valid_iterator=iterator,
56 | #     )
57 | 
58 | #     # check that the hyperparameters values are in the defined search space
59 | #     for trial in params_tuner.study.get_trials():
60 | #         assert (trial.params["feature_fraction"] >= 0) and (trial.params["feature_fraction"] <= 1)
61 | #         assert trial.params["min_sum_hessian_in_leaf"] in [0.5, 0.8]
62 | 
63 | #     # check time, n_trials
64 | 
65 | #     # check best params
66 | #     assert (params_tuner.best_params["feature_fraction"] == 0.7993292420985183) and (
67 | #         params_tuner.best_params["min_sum_hessian_in_leaf"] == 0.5
68 | #     )
69 | 
70 | 
71 | def test_invalid_distributions():
72 |     iterator_mock = mock.MagicMock()
73 | 
74 |     model = BoostLGBM(
75 |         default_params={"num_trees": 1, "random_state": 42},
76 |         freeze_defaults=True,
77 |         optimization_search_space={
78 |             "feature_fraction": Uniform(low=0.5, high=1.0),
79 |             "min_sum_hessian_in_leaf": Normal(low=1, high=2),  # distribution is not supported by Optuna
80 |         },
81 |     )
82 | 
83 |     params_tuner = OptunaTuner(n_trials=10, timeout=300)
84 | 
85 |     with pytest.raises(Exception):
86 |         params_tuner.fit(
87 |             ml_algo=model,
88 |             train_valid_iterator=iterator_mock,
89 |         )
90 | 


--------------------------------------------------------------------------------
/tests/unit/test_pipelines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/tests/unit/test_pipelines/__init__.py


--------------------------------------------------------------------------------
/tests/unit/test_reader/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/tests/unit/test_reader/__init__.py


--------------------------------------------------------------------------------
/tests/unit/test_report/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/tests/unit/test_report/__init__.py


--------------------------------------------------------------------------------
/tests/unit/test_tasks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/tests/unit/test_tasks/__init__.py


--------------------------------------------------------------------------------
/tests/unit/test_text/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/tests/unit/test_text/__init__.py


--------------------------------------------------------------------------------
/tests/unit/test_transformers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/tests/unit/test_transformers/__init__.py


--------------------------------------------------------------------------------
/tests/unit/test_transformers/test_numeric.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from lightautoml.transformers.numeric import FillnaMean
 4 | from lightautoml.transformers.numeric import FillnaMedian
 5 | from lightautoml.transformers.numeric import QuantileTransformer
 6 | 
 7 | 
 8 | def test_fillnamean(lamldataset_with_na):
 9 |     transformer = FillnaMean()
10 |     output = transformer.fit_transform(lamldataset_with_na)
11 | 
12 |     assert output.data[:, 0].mean() == 4
13 |     assert output.data[:, 1].mean() == 5
14 |     assert output.data[:, 2].mean() == 0
15 | 
16 | 
17 | def test_fillnamedian(lamldataset_with_na):
18 |     transformer = FillnaMedian()
19 |     output = transformer.fit_transform(lamldataset_with_na)
20 | 
21 |     assert output.data[:, 0].mean() == 4
22 |     assert output.data[:, 1].mean() == 5
23 |     assert output.data[:, 2].mean() == 0
24 | 
25 | 
26 | def test_quantiletransformer(lamldataset_30_2):
27 |     transformer = QuantileTransformer(noise=None)
28 |     output = transformer.fit_transform(lamldataset_30_2)
29 | 
30 |     # raise(Exception(output.data))
31 |     np.testing.assert_allclose(
32 |         output.data,
33 |         np.array(
34 |             [
35 |                 [-5.19933758, -5.19933758],
36 |                 [-1.47640435, -1.48183072],
37 |                 [-1.42177828, -1.44872465],
38 |                 [-1.24067307, -1.25262296],
39 |                 [-1.02813514, -1.06089913],
40 |                 [-0.87314381, -0.95310275],
41 |                 [-0.86592145, -0.86396215],
42 |                 [-0.62097828, -0.60156557],
43 |                 [-0.50478792, -0.5339135],
44 |                 [-0.50373715, -0.48136567],
45 |                 [-0.39911771, -0.32828215],
46 |                 [-0.36893762, -0.30284499],
47 |                 [-0.18779519, -0.24328491],
48 |                 [-0.12682175, -0.13728361],
49 |                 [-0.01319139, 0.02800416],
50 |                 [0.01861645, 0.04895583],
51 |                 [0.13783602, 0.13759678],
52 |                 [0.1464553, 0.23178871],
53 |                 [0.2576737, 0.38408782],
54 |                 [0.35208669, 0.41496716],
55 |                 [0.59203696, 0.44743911],
56 |                 [0.6766203, 0.46734882],
57 |                 [0.75052545, 0.50159806],
58 |                 [0.77324891, 0.80971082],
59 |                 [0.95569552, 0.86776588],
60 |                 [1.18959458, 1.09445074],
61 |                 [1.22743552, 1.26058256],
62 |                 [1.43696861, 1.27169708],
63 |                 [1.55529186, 1.77127928],
64 |                 [5.19933758, 5.19933758],
65 |             ]
66 |         ),
67 |         atol=1e-5,
68 |         rtol=1e-5,
69 |     )
70 | 


--------------------------------------------------------------------------------
/tests/unit/test_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/tests/unit/test_utils/__init__.py


--------------------------------------------------------------------------------
/tests/unit/test_utils/test_logging.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import os
 5 | 
 6 | import pytest
 7 | 
 8 | from lightautoml.automl.presets.tabular_presets import TabularAutoML
 9 | 
10 | 
11 | @pytest.mark.parametrize(
12 |     "sampled_app_train_test, verbose, log_file",
13 |     [
14 |         (1000, 0, "log_file.log"),
15 |         # (10, 'log_file.log'),
16 |     ],
17 |     indirect=["sampled_app_train_test"],
18 | )
19 | def test_logging(
20 |     capsys,
21 |     tmp_path,
22 |     sampled_app_train_test,
23 |     sampled_app_roles,
24 |     binary_task,
25 |     verbose,
26 |     log_file,
27 | ):
28 |     train, _ = sampled_app_train_test
29 | 
30 |     if log_file:
31 |         log_file = os.path.join(tmp_path, "log_file.log")
32 | 
33 |     automl = TabularAutoML(
34 |         task=binary_task,
35 |         tuning_params={"max_tuning_iter": 3, "max_tuning_time": 30},
36 |         lgb_params={"default_params": {"num_trees": 5}},
37 |     )
38 | 
39 |     automl.fit_predict(
40 |         train,
41 |         roles=sampled_app_roles,
42 |         verbose=verbose,
43 |         log_file=log_file,
44 |     )
45 | 
46 |     sys_out, sys_err = capsys.readouterr()
47 | 
48 |     if log_file:
49 |         assert os.path.exists(log_file)
50 | 
51 |     if verbose == 0:
52 |         assert sys_out == ""
53 |         assert sys_err == ""
54 | 
55 |     # If log_file contains exact same that in stdout at max verbose value
56 |     # if (verbose >= 4) and (log_file is not None):
57 |     #     sys_out_lines = sys_out.split('\n')
58 |     #     with open(log_file) as f:
59 |     #         for line_file, line_stdout in zip(f, sys_out_lines):
60 |     #             # remove message prefixes and compare
61 |     #             assert re.split(r'^(?:[^\t\r\n]+\t){5}([01])(?:\t|$)', line_file) == re.split(r'\s(.*)', line_stdout)
62 | 
63 | 
64 | # def test_logging_verbose_switching():
65 | # def test_logging_custom_pipeline():
66 | 


--------------------------------------------------------------------------------
/tests/unit/test_validation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sb-ai-lab/LightAutoML/b53830f84dc1ceec0112c7905be950304fafaa9f/tests/unit/test_validation/__init__.py


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
  1 | [tox]
  2 | min_version = 3.28.0
  3 | isolated_build = True
  4 | envlist =
  5 |     py{38, 39, 310, 311, 312},
  6 |     lint,
  7 |     docs,
  8 |     ;  typing,
  9 |     ; build,
 10 |     codespell
 11 | 
 12 | ; [tox:.package]
 13 | # note tox will use the same python version as under what tox is installed to package
 14 | # so unless this is python 3 you can require a given python version for the packaging
 15 | # environment via the basepython key
 16 | basepython = python3
 17 | 
 18 | [gh-actions]
 19 | python =
 20 |     3.8: py38
 21 |     3.9: py39
 22 |     3.10: py310
 23 |     3.11: py311
 24 |     3.12: py312
 25 | 
 26 | [gh-actions:env]
 27 | PLATFORM =
 28 |     ubuntu-latest: linux
 29 |     macos-latest: macos
 30 |     windows-latest: windows
 31 | 
 32 | [testenv]
 33 | skip_install = true
 34 | allowlist_externals = make
 35 | deps =
 36 |     -e .[all]
 37 |     pytest >= 6.2.5
 38 |     jupyter
 39 | commands = pytest {posargs} -v --basetemp="{envtmpdir}" --log-level=DEBUG
 40 | 
 41 | [testenv:lint]
 42 | skip_install = true
 43 | deps =
 44 |     pre-commit == 2.15.0
 45 | commands =
 46 |     pre-commit install
 47 |     pre-commit run --all-files
 48 | 
 49 | [testenv:docs]
 50 | requires = python >= 3.8
 51 | changedir = docs
 52 | deps =
 53 |     sphinx == 5.3.0 # extras = ["autdoc", "autosummary", "intersphinx", "napoleon", "viewcode"]
 54 |     sphinx-autodoc-typehints >=1.19.5
 55 |     sphinx-rtd-theme >=1.1.1
 56 |     nbsphinx == 0.8.10
 57 |     nbsphinx-link == 1.3.0
 58 |     doc8 == 0.10.1
 59 |     rstcheck == 3.3.1
 60 |     pandoc == 2.0.1
 61 |     ipython >=3.8
 62 | commands =
 63 |     make clean html
 64 |     python ../check_docs.py
 65 | 
 66 | ; [testenv:typing]
 67 | ; description = run type checks
 68 | ; deps =
 69 | ;     mypy >= 0.991
 70 | ; commands =
 71 | ;     mypy {posargs:lightautoml tests}
 72 | 
 73 | ; [testenv:build]
 74 | ; skip_install = true
 75 | ; deps =
 76 | ;     poetry >= 1.1.7
 77 | ; commands =
 78 | ;     poetry run python scripts/poetry_fix.py -f
 79 | ;     poetry build
 80 | 
 81 | [testenv:codespell]
 82 | skip_install = true
 83 | deps =
 84 |     codespell
 85 | commands =
 86 |     codespell --skip="docs,_build,imgs"
 87 | 
 88 | # example:
 89 | # tox -e exp -- --dataset_project=Datasets_with_metadata --tags=binary openml
 90 | # tox -e exp -- --dataset_project=Datasets_with_metadata --dataset=CIFAR_10_openml --queue=gpu_queue
 91 | # tox -e exp -- --dataset_project=Datasets_with_metadata --tags=multiclass --queue=gpu_queue --n_datasets=5 --name=mlp --min_num_obs=100000
 92 | # Notion: args [--tags=binary openml] means tag is binary OR tag is openml
 93 | [testenv:exp]
 94 | deps =
 95 |     clearml
 96 |     pandas
 97 |     numpy
 98 | commands =
 99 |     python scripts/experiments/run.py {posargs}
100 | 
101 | [testenv:exp_bonus]
102 | deps =
103 |     -e .[all]
104 |     clearml
105 | commands =
106 |     python scripts/experiments/run.py {posargs}
107 | 


--------------------------------------------------------------------------------