├── sklift ├── tests │ ├── __init__.py │ ├── test_import.py │ ├── conftest.py │ ├── test_models.py │ ├── test_datasets.py │ ├── test_viz.py │ └── test_metrics.py ├── __init__.py ├── utils │ ├── __init__.py │ └── utils.py ├── models │ └── __init__.py ├── datasets │ ├── __init__.py │ └── descr │ │ ├── megafon.rst │ │ ├── x5.rst │ │ ├── criteo.rst │ │ ├── hillstrom.rst │ │ └── lenta.rst ├── viz │ └── __init__.py └── metrics │ └── __init__.py ├── test_requirements.txt ├── docs ├── tutorials.rst ├── requirements.txt ├── _static │ ├── sklift-logo.png │ ├── soc_net_logo.png │ ├── images │ │ ├── SoloModel.png │ │ ├── memchik_RU.png │ │ ├── sklift_404.png │ │ ├── SoloModel_RU.png │ │ ├── readme_img1.png │ │ ├── quick_start_wau.png │ │ ├── x5_table_scheme.png │ │ ├── TwoModels_vanila.png │ │ ├── quick_start_qini.png │ │ ├── quick_start_uplift.png │ │ ├── TwoModels_vanila_RU.png │ │ ├── TwoModels_ddr_control.png │ │ ├── TwoModels_ddr_control_RU.png │ │ ├── user_guide │ │ │ ├── ug_clients_types.jpg │ │ │ ├── ug_data_collection.gif │ │ │ ├── ug_revert_label_mem.png │ │ │ ├── ug_uplift_approaches.png │ │ │ └── ug_comparison_with_other_models.png │ │ └── SoloModel_treatment_intercation.png │ ├── sklift-github-logo.png │ └── css │ │ └── custom.css ├── api │ ├── models │ │ ├── TwoModels.rst │ │ ├── SoloModel.rst │ │ ├── ClassTransformation.rst │ │ ├── ClassTransformationReg.rst │ │ └── index.rst │ ├── metrics │ │ ├── qini_curve.rst │ │ ├── uplift_at_k.rst │ │ ├── uplift_curve.rst │ │ ├── qini_auc_score.rst │ │ ├── uplift_auc_score.rst │ │ ├── max_prof_uplift.rst │ │ ├── perfect_qini_curve.rst │ │ ├── make_uplift_scorer.rst │ │ ├── perfect_uplift_curve.rst │ │ ├── uplift_by_percentile.rst │ │ ├── treatment_balance_curve.rst │ │ ├── weighted_average_uplift.rst │ │ ├── average_squared_deviation.rst │ │ ├── response_rate_by_percentile.rst │ │ └── index.rst │ ├── datasets │ │ ├── get_data_dir.rst │ │ ├── clear_data_dir.rst │ │ ├── index.rst │ │ ├── fetch_hillstrom.rst │ │ ├── fetch_lenta.rst │ │ ├── fetch_megafon.rst │ │ ├── fetch_x5.rst │ │ └── fetch_criteo.rst │ ├── viz │ │ ├── plot_qini_curve.rst │ │ ├── plot_uplift_curve.rst │ │ ├── plot_uplift_preds.rst │ │ ├── plot_uplift_by_percentile.rst │ │ ├── plot_treatment_balance_curve.rst │ │ └── index.rst │ └── index.rst ├── user_guide │ ├── introduction │ │ ├── index.rst │ │ ├── data_collection.rst │ │ ├── comparison.rst │ │ ├── clients.rst │ │ └── cate.rst │ ├── models │ │ ├── index.rst │ │ ├── classification.rst │ │ ├── transformed_outcome.rst │ │ ├── solo_model.rst │ │ ├── revert_label.rst │ │ └── two_models.rst │ └── index.rst ├── _templates │ ├── footer.html │ ├── layout.html │ └── breadcrumbs.html ├── refs.bib ├── 404.rst ├── install.rst ├── Readme.rst ├── hall_of_fame.rst ├── Makefile ├── make.bat ├── contributing.md ├── conf.py ├── quick_start.rst ├── index.rst └── changelog.md ├── .coveragerc ├── .gitattributes ├── MANIFEST.in ├── requirements.txt ├── pytest.ini ├── .github ├── ISSUE_TEMPLATE │ ├── doc-report.md │ ├── feature-request.md │ └── bug-report.md ├── workflows │ ├── PyPi_upload.yml │ └── ci-test.yml ├── pull_request_template.md ├── CONTRIBUTING.md └── CODE_OF_CONDUCT.md ├── .readthedocs.yml ├── LICENSE ├── setup.py ├── .gitignore ├── notebooks ├── Readme.rst ├── pipeline_usage_EN.ipynb └── pipeline_usage_RU.ipynb └── Readme.rst /sklift/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test_requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pytest-cov -------------------------------------------------------------------------------- /sklift/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.5.1' 2 | -------------------------------------------------------------------------------- /docs/tutorials.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../notebooks/Readme.rst -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = sklift/tests/*,*__init__.py* 3 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.py linguist-language=python 2 | *.ipynb linguist-documentation -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.rst 2 | recursive-include sklift/datasets/ *.rst 3 | include MANIFEST.in -------------------------------------------------------------------------------- /sklift/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import check_is_binary 2 | 3 | __all__ = ['check_is_binary'] -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx==5.1.1 2 | sphinx-rtd-theme==1.0.0 3 | myst-parser 4 | sphinxcontrib-bibtex -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scikit-learn>=0.21.0 2 | numpy>=1.16 3 | pandas 4 | matplotlib 5 | requests 6 | tqdm 7 | -------------------------------------------------------------------------------- /docs/_static/sklift-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/sklift-logo.png -------------------------------------------------------------------------------- /docs/_static/soc_net_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/soc_net_logo.png -------------------------------------------------------------------------------- /docs/_static/images/SoloModel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/SoloModel.png -------------------------------------------------------------------------------- /docs/_static/images/memchik_RU.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/memchik_RU.png -------------------------------------------------------------------------------- /docs/_static/images/sklift_404.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/sklift_404.png -------------------------------------------------------------------------------- /docs/_static/images/SoloModel_RU.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/SoloModel_RU.png -------------------------------------------------------------------------------- /docs/_static/images/readme_img1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/readme_img1.png -------------------------------------------------------------------------------- /docs/_static/sklift-github-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/sklift-github-logo.png -------------------------------------------------------------------------------- /docs/_static/images/quick_start_wau.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/quick_start_wau.png -------------------------------------------------------------------------------- /docs/_static/images/x5_table_scheme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/x5_table_scheme.png -------------------------------------------------------------------------------- /docs/_static/images/TwoModels_vanila.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/TwoModels_vanila.png -------------------------------------------------------------------------------- /docs/_static/images/quick_start_qini.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/quick_start_qini.png -------------------------------------------------------------------------------- /docs/_static/images/quick_start_uplift.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/quick_start_uplift.png -------------------------------------------------------------------------------- /docs/_static/images/TwoModels_vanila_RU.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/TwoModels_vanila_RU.png -------------------------------------------------------------------------------- /docs/_static/images/TwoModels_ddr_control.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/TwoModels_ddr_control.png -------------------------------------------------------------------------------- /docs/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | .wy-side-nav-search, .wy-nav-top { 2 | background: #0062a2; 3 | } 4 | 5 | .wy-breadcrumbs { 6 | font-size: 12px; 7 | } -------------------------------------------------------------------------------- /docs/_static/images/TwoModels_ddr_control_RU.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/TwoModels_ddr_control_RU.png -------------------------------------------------------------------------------- /docs/_static/images/user_guide/ug_clients_types.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/user_guide/ug_clients_types.jpg -------------------------------------------------------------------------------- /docs/_static/images/user_guide/ug_data_collection.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/user_guide/ug_data_collection.gif -------------------------------------------------------------------------------- /docs/_static/images/user_guide/ug_revert_label_mem.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/user_guide/ug_revert_label_mem.png -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = --cache-clear --cov-report html --cov-report xml --cov-report term-missing --cov-config=.coveragerc --cov=sklift --junitxml=pytest.xml -------------------------------------------------------------------------------- /docs/_static/images/SoloModel_treatment_intercation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/SoloModel_treatment_intercation.png -------------------------------------------------------------------------------- /docs/_static/images/user_guide/ug_uplift_approaches.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/user_guide/ug_uplift_approaches.png -------------------------------------------------------------------------------- /docs/_static/images/user_guide/ug_comparison_with_other_models.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/user_guide/ug_comparison_with_other_models.png -------------------------------------------------------------------------------- /sklift/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .models import SoloModel, ClassTransformation, ClassTransformationReg, TwoModels 2 | 3 | __all__ = [SoloModel, ClassTransformation, ClassTransformationReg, TwoModels] 4 | -------------------------------------------------------------------------------- /docs/api/models/TwoModels.rst: -------------------------------------------------------------------------------- 1 | ******************************** 2 | `sklift.models <./>`_.TwoModels 3 | ******************************** 4 | 5 | .. autoclass:: sklift.models.models.TwoModels 6 | :members: -------------------------------------------------------------------------------- /docs/api/metrics/qini_curve.rst: -------------------------------------------------------------------------------- 1 | ***************************************** 2 | `sklift.metrics <./>`_.qini_curve 3 | ***************************************** 4 | 5 | .. autofunction:: sklift.metrics.metrics.qini_curve -------------------------------------------------------------------------------- /docs/api/metrics/uplift_at_k.rst: -------------------------------------------------------------------------------- 1 | ***************************************** 2 | `sklift.metrics <./>`_.uplift_at_k 3 | ***************************************** 4 | 5 | .. autofunction:: sklift.metrics.metrics.uplift_at_k -------------------------------------------------------------------------------- /docs/api/metrics/uplift_curve.rst: -------------------------------------------------------------------------------- 1 | ***************************************** 2 | `sklift.metrics <./>`_.uplift_curve 3 | ***************************************** 4 | 5 | .. autofunction:: sklift.metrics.metrics.uplift_curve -------------------------------------------------------------------------------- /docs/api/datasets/get_data_dir.rst: -------------------------------------------------------------------------------- 1 | ***************************************** 2 | `sklift.datasets <./>`_.get_data_dir 3 | ***************************************** 4 | 5 | .. autofunction:: sklift.datasets.datasets.get_data_dir -------------------------------------------------------------------------------- /docs/api/models/SoloModel.rst: -------------------------------------------------------------------------------- 1 | ***************************************** 2 | `sklift.models <./>`_.SoloModel 3 | ***************************************** 4 | 5 | .. autoclass:: sklift.models.models.SoloModel 6 | :members: -------------------------------------------------------------------------------- /docs/api/datasets/clear_data_dir.rst: -------------------------------------------------------------------------------- 1 | ***************************************** 2 | `sklift.datasets <./>`_.clear_data_dir 3 | ***************************************** 4 | 5 | .. autofunction:: sklift.datasets.datasets.clear_data_dir -------------------------------------------------------------------------------- /docs/api/metrics/qini_auc_score.rst: -------------------------------------------------------------------------------- 1 | ***************************************** 2 | `sklift.metrics <./>`_.qini_auc_score 3 | ***************************************** 4 | 5 | .. autofunction:: sklift.metrics.metrics.qini_auc_score -------------------------------------------------------------------------------- /docs/api/viz/plot_qini_curve.rst: -------------------------------------------------------------------------------- 1 | *********************************************** 2 | `sklift.viz <./>`_.plot_qini_curve 3 | *********************************************** 4 | 5 | .. autofunction:: sklift.viz.base.plot_qini_curve -------------------------------------------------------------------------------- /docs/api/metrics/uplift_auc_score.rst: -------------------------------------------------------------------------------- 1 | ***************************************** 2 | `sklift.metrics <./>`_.uplift_auc_score 3 | ***************************************** 4 | 5 | .. autofunction:: sklift.metrics.metrics.uplift_auc_score -------------------------------------------------------------------------------- /docs/api/viz/plot_uplift_curve.rst: -------------------------------------------------------------------------------- 1 | *********************************************** 2 | `sklift.viz <./>`_.plot_uplift_curve 3 | *********************************************** 4 | 5 | .. autofunction:: sklift.viz.base.plot_uplift_curve -------------------------------------------------------------------------------- /docs/api/viz/plot_uplift_preds.rst: -------------------------------------------------------------------------------- 1 | *********************************************** 2 | `sklift.viz <./>`_.plot_uplift_preds 3 | *********************************************** 4 | 5 | .. autofunction:: sklift.viz.base.plot_uplift_preds -------------------------------------------------------------------------------- /docs/api/metrics/max_prof_uplift.rst: -------------------------------------------------------------------------------- 1 | ********************************************** 2 | `sklift.metrics <./>`_.max_prof_uplift 3 | ********************************************** 4 | 5 | .. autofunction:: sklift.metrics.metrics.max_prof_uplift -------------------------------------------------------------------------------- /docs/api/metrics/perfect_qini_curve.rst: -------------------------------------------------------------------------------- 1 | ********************************************** 2 | `sklift.metrics <./>`_.perfect_qini_curve 3 | ********************************************** 4 | 5 | .. autofunction:: sklift.metrics.metrics.perfect_qini_curve -------------------------------------------------------------------------------- /docs/user_guide/introduction/index.rst: -------------------------------------------------------------------------------- 1 | ************* 2 | Introduction 3 | ************* 4 | 5 | .. toctree:: 6 | :maxdepth: 2 7 | :caption: Contents 8 | 9 | ./comparison 10 | ./cate 11 | ./data_collection 12 | ./clients 13 | -------------------------------------------------------------------------------- /sklift/tests/test_import.py: -------------------------------------------------------------------------------- 1 | try: 2 | from .. import * # noqa 3 | _top_import_error = None 4 | except Exception as e: 5 | _top_import_error = e 6 | 7 | 8 | def test_import_sklift(): 9 | assert _top_import_error is None 10 | -------------------------------------------------------------------------------- /docs/api/metrics/make_uplift_scorer.rst: -------------------------------------------------------------------------------- 1 | ********************************************** 2 | `sklift.metrics <./>`_.make_uplift_scorer 3 | ********************************************** 4 | 5 | .. autofunction:: sklift.metrics.metrics.make_uplift_scorer 6 | -------------------------------------------------------------------------------- /docs/api/metrics/perfect_uplift_curve.rst: -------------------------------------------------------------------------------- 1 | ********************************************** 2 | `sklift.metrics <./>`_.perfect_uplift_curve 3 | ********************************************** 4 | 5 | .. autofunction:: sklift.metrics.metrics.perfect_uplift_curve -------------------------------------------------------------------------------- /docs/api/metrics/uplift_by_percentile.rst: -------------------------------------------------------------------------------- 1 | ********************************************* 2 | `sklift.metrics <./>`_.uplift_by_percentile 3 | ********************************************* 4 | 5 | .. autofunction:: sklift.metrics.metrics.uplift_by_percentile -------------------------------------------------------------------------------- /docs/api/models/ClassTransformation.rst: -------------------------------------------------------------------------------- 1 | ***************************************** 2 | `sklift.models <./>`_.ClassTransformation 3 | ***************************************** 4 | 5 | .. autoclass:: sklift.models.models.ClassTransformation 6 | :members: -------------------------------------------------------------------------------- /docs/api/viz/plot_uplift_by_percentile.rst: -------------------------------------------------------------------------------- 1 | *********************************************** 2 | `sklift.viz <./>`_.plot_uplift_by_percentile 3 | *********************************************** 4 | 5 | .. autofunction:: sklift.viz.base.plot_uplift_by_percentile -------------------------------------------------------------------------------- /docs/api/metrics/treatment_balance_curve.rst: -------------------------------------------------------------------------------- 1 | *********************************************** 2 | `sklift.metrics <./>`_.treatment_balance_curve 3 | *********************************************** 4 | 5 | .. autofunction:: sklift.metrics.metrics.treatment_balance_curve -------------------------------------------------------------------------------- /docs/api/viz/plot_treatment_balance_curve.rst: -------------------------------------------------------------------------------- 1 | *********************************************** 2 | `sklift.viz <./>`_.plot_treatment_balance_curve 3 | *********************************************** 4 | 5 | .. autofunction:: sklift.viz.base.plot_treatment_balance_curve -------------------------------------------------------------------------------- /docs/api/metrics/weighted_average_uplift.rst: -------------------------------------------------------------------------------- 1 | ************************************************* 2 | `sklift.metrics <./>`_.weighted_average_uplift 3 | ************************************************* 4 | 5 | .. autofunction:: sklift.metrics.metrics.weighted_average_uplift -------------------------------------------------------------------------------- /docs/api/models/ClassTransformationReg.rst: -------------------------------------------------------------------------------- 1 | ******************************************** 2 | `sklift.models <./>`_.ClassTransformationReg 3 | ******************************************** 4 | 5 | .. autoclass:: sklift.models.models.ClassTransformationReg 6 | :members: -------------------------------------------------------------------------------- /docs/_templates/footer.html: -------------------------------------------------------------------------------- 1 | {% extends "!footer.html" %} 2 | {%- block extrafooter %} 3 |

If you find a mistake in the docs, please create an issue on github.

4 | {{ super() }} 5 | {% endblock %} -------------------------------------------------------------------------------- /docs/api/metrics/average_squared_deviation.rst: -------------------------------------------------------------------------------- 1 | ************************************************* 2 | `sklift.metrics <./>`_.average_squared_deviation 3 | ************************************************* 4 | 5 | .. autofunction:: sklift.metrics.metrics.average_squared_deviation -------------------------------------------------------------------------------- /docs/api/metrics/response_rate_by_percentile.rst: -------------------------------------------------------------------------------- 1 | **************************************************** 2 | `sklift.metrics <./>`_.response_rate_by_percentile 3 | **************************************************** 4 | 5 | .. autofunction:: sklift.metrics.metrics.response_rate_by_percentile -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/doc-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "Documentation" 3 | about: Report an issue related to https://scikit-uplift.readthedocs.io# 4 | --- 5 | 6 | ## 📚 Documentation 7 | 8 | -------------------------------------------------------------------------------- /docs/api/datasets/index.rst: -------------------------------------------------------------------------------- 1 | ************************ 2 | `sklift <../>`_.datasets 3 | ************************ 4 | 5 | .. toctree:: 6 | :maxdepth: 3 7 | 8 | ./clear_data_dir 9 | ./get_data_dir 10 | ./fetch_lenta 11 | ./fetch_x5 12 | ./fetch_criteo 13 | ./fetch_hillstrom 14 | ./fetch_megafon -------------------------------------------------------------------------------- /docs/api/viz/index.rst: -------------------------------------------------------------------------------- 1 | ********************** 2 | `sklift <../>`_.viz 3 | ********************** 4 | 5 | .. toctree:: 6 | :maxdepth: 3 7 | 8 | ./plot_uplift_preds 9 | ./plot_qini_curve 10 | ./plot_uplift_curve 11 | ./plot_treatment_balance_curve 12 | ./plot_uplift_by_percentile 13 | 14 | 15 | -------------------------------------------------------------------------------- /docs/api/models/index.rst: -------------------------------------------------------------------------------- 1 | ********************** 2 | `sklift <../>`_.models 3 | ********************** 4 | 5 | See :ref:`Models ` section of the User Guide for further details. 6 | 7 | .. toctree:: 8 | :maxdepth: 3 9 | 10 | ./SoloModel 11 | ./ClassTransformation 12 | ./ClassTransformationReg 13 | ./TwoModels -------------------------------------------------------------------------------- /docs/refs.bib: -------------------------------------------------------------------------------- 1 | @inproceedings{Diemert2018, 2 | author = {{Diemert Eustache, Betlei Artem} and Renaudin, Christophe and Massih-Reza, Amini}, 3 | title={A Large Scale Benchmark for Uplift Modeling}, 4 | publisher = {ACM}, 5 | booktitle = {Proceedings of the AdKDD and TargetAd Workshop, KDD, London,United Kingdom, August, 20, 2018}, 6 | year = {2018} 7 | } -------------------------------------------------------------------------------- /sklift/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .datasets import ( 2 | get_data_dir, 3 | clear_data_dir, 4 | fetch_x5, fetch_lenta, 5 | fetch_criteo, fetch_hillstrom, 6 | fetch_megafon 7 | ) 8 | 9 | __all__ = [ 10 | 'get_data_dir', 11 | 'clear_data_dir', 12 | 'fetch_x5', 'fetch_lenta', 13 | 'fetch_criteo', 'fetch_hillstrom', 14 | 'fetch_megafon' 15 | ] -------------------------------------------------------------------------------- /sklift/viz/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import ( 2 | plot_uplift_curve, plot_qini_curve, plot_uplift_preds, 3 | plot_uplift_by_percentile, plot_treatment_balance_curve, 4 | UpliftCurveDisplay 5 | ) 6 | 7 | __all__ = [ 8 | 'plot_uplift_curve', 'plot_qini_curve', 'plot_uplift_preds', 9 | 'plot_uplift_by_percentile', 'plot_treatment_balance_curve', 10 | 'UpliftCurveDisplay' 11 | ] 12 | -------------------------------------------------------------------------------- /docs/404.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | ******************* 4 | 404 Page Not Found 5 | ******************* 6 | 7 | .. image:: _static/images/sklift_404.png 8 | :alt: 404 Page not found 9 | :align: center 10 | :width: 250 px 11 | :height: 250 px 12 | 13 | Sorry, we couldn't find that page. 14 | 15 | Try using the search box or go to the `homepage `__. 16 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "Feature request" 3 | about: Suggest an idea for improve scikit-uplift 4 | --- 5 | 6 | ## 💡 Feature request 7 | 8 | 9 | 10 | ## Motivation 11 | 12 | 13 | 14 | ## Additional context 15 | 16 | -------------------------------------------------------------------------------- /docs/user_guide/models/index.rst: -------------------------------------------------------------------------------- 1 | .. _models: 2 | 3 | .. meta:: 4 | :description lang=en: 5 | Introduction to approaches for building uplift model with examples 6 | on Python using scikit-uplift (sklift) package. 7 | 8 | ****** 9 | Models 10 | ****** 11 | 12 | .. toctree:: 13 | :maxdepth: 3 14 | :caption: Contents 15 | 16 | ./classification 17 | ./solo_model 18 | ./revert_label 19 | ./transformed_outcome 20 | ./two_models -------------------------------------------------------------------------------- /docs/api/index.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description lang=en: 3 | scikit-uplift (sklift) api reference for modeling uplift and evaluate the causal effect of a treatment 4 | in scikit-learn style in python 5 | 6 | ************ 7 | API sklift 8 | ************ 9 | 10 | This is the modules reference of scikit-uplift. 11 | 12 | .. toctree:: 13 | :maxdepth: 3 14 | 15 | ./models/index 16 | ./metrics/index 17 | ./viz/index 18 | ./datasets/index -------------------------------------------------------------------------------- /docs/install.rst: -------------------------------------------------------------------------------- 1 | ************* 2 | Installation 3 | ************* 4 | 5 | .. _PyPI: https://pypi.org/project/scikit-uplift/ 6 | .. _source: https://github.com/maks-sh/scikit-uplift 7 | 8 | **Install** the package by the following command from PyPI_: 9 | 10 | .. code-block:: bash 11 | 12 | pip install scikit-uplift 13 | 14 | Or install from source_: 15 | 16 | .. code-block:: bash 17 | 18 | git clone https://github.com/maks-sh/scikit-uplift.git 19 | cd scikit-uplift 20 | python setup.py install -------------------------------------------------------------------------------- /docs/Readme.rst: -------------------------------------------------------------------------------- 1 | .. _uplift-modeling.com: https://www.uplift-modeling.com/en/latest/index.html 2 | 3 | Documentation 4 | =============== 5 | 6 | The full documentation is available at `uplift-modeling.com`_. 7 | 8 | Or you can build the documentation locally using `Sphinx `_ 1.4 or later: 9 | 10 | .. code-block:: bash 11 | 12 | cd docs 13 | pip install -r requirements.txt 14 | make html 15 | 16 | And if you now point your browser to ``_build/html/index.html``, you should see a documentation site. 17 | -------------------------------------------------------------------------------- /sklift/utils/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def check_is_binary(array): 4 | """Checker if array consists of int or float binary values 0 (0.) and 1 (1.) 5 | 6 | Args: 7 | array (1d array-like): Array to check. 8 | """ 9 | 10 | if not np.all(np.unique(array) == np.array([0, 1])): 11 | raise ValueError(f"Input array is not binary. " 12 | f"Array should contain only int or float binary values 0 (or 0.) and 1 (or 1.). " 13 | f"Got values {np.unique(array)}.") 14 | -------------------------------------------------------------------------------- /docs/api/metrics/index.rst: -------------------------------------------------------------------------------- 1 | ************************ 2 | `sklift <../>`_.metrics 3 | ************************ 4 | 5 | .. toctree:: 6 | :maxdepth: 3 7 | 8 | ./uplift_at_k 9 | ./uplift_curve 10 | ./perfect_uplift_curve 11 | ./uplift_auc_score 12 | ./qini_curve 13 | ./perfect_qini_curve 14 | ./qini_auc_score 15 | ./weighted_average_uplift 16 | ./uplift_by_percentile 17 | ./response_rate_by_percentile 18 | ./treatment_balance_curve 19 | ./average_squared_deviation 20 | ./max_prof_uplift 21 | ./make_uplift_scorer -------------------------------------------------------------------------------- /docs/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | {%- block extrahead %} 3 | {{ super() }} 4 | 5 | {% endblock %} 6 | {% block menu %} 7 |
8 | 9 |
10 |
11 | {{ super() }} 12 | {% endblock %} 13 | 14 | -------------------------------------------------------------------------------- /docs/api/datasets/fetch_hillstrom.rst: -------------------------------------------------------------------------------- 1 | .. _Hillstrom: 2 | 3 | **************************************** 4 | `sklift.datasets <./>`_.fetch_hillstrom 5 | **************************************** 6 | 7 | .. autofunction:: sklift.datasets.datasets.fetch_hillstrom 8 | 9 | .. include:: ../../../sklift/datasets/descr/hillstrom.rst 10 | 11 | About Hillstrom 12 | ################## 13 | 14 | The dataset was provided by Kevin Hillstorm. 15 | Kevin is President of MineThatData, a consultancy that helps CEOs understand the complex relationship between Customers, Advertising, Products, Brands, and Channels. 16 | 17 | Link to the blog: https://blog.minethatdata.com/ -------------------------------------------------------------------------------- /docs/_templates/breadcrumbs.html: -------------------------------------------------------------------------------- 1 | {% extends "!breadcrumbs.html" %} 2 | {% block breadcrumbs_aside %} 3 |
4 | Issue 5 | Watch 6 | Fork 7 |
8 | {% endblock %} -------------------------------------------------------------------------------- /docs/hall_of_fame.rst: -------------------------------------------------------------------------------- 1 | ************* 2 | Hall of Fame 3 | ************* 4 | 5 | Here are the links to the competitions, names of the winners and to their solutions, where scikit-uplift was used. 6 | 7 | `X5 Retail Hero: Uplift Modeling for Promotional Campaign `_ 8 | ======================================================================================================================== 9 | 10 | Predict how much the purchase probability could increase as a result of sending an advertising SMS. 11 | 12 | 2. `Kirill Liksakov `_ 13 | `solution `_ 14 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /sklift/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from .metrics import ( 2 | uplift_curve, perfect_uplift_curve, uplift_auc_score, 3 | qini_curve, perfect_qini_curve, qini_auc_score, 4 | uplift_at_k, response_rate_by_percentile, 5 | weighted_average_uplift, uplift_by_percentile, treatment_balance_curve, 6 | average_squared_deviation, make_uplift_scorer, max_prof_uplift 7 | ) 8 | 9 | __all__ = [ 10 | 'uplift_curve', 'perfect_uplift_curve', 'uplift_auc_score', 11 | 'qini_curve', 'perfect_qini_curve', 'qini_auc_score', 12 | 'uplift_at_k', 'response_rate_by_percentile', 13 | 'weighted_average_uplift', 'uplift_by_percentile', 'treatment_balance_curve', 14 | 'average_squared_deviation', 'make_uplift_scorer', 'max_prof_uplift' 15 | ] 16 | -------------------------------------------------------------------------------- /.github/workflows/PyPi_upload.yml: -------------------------------------------------------------------------------- 1 | name: Upload to PyPi 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | deploy: 9 | 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@v2 14 | - name: Set up Python 15 | uses: actions/setup-python@v2 16 | with: 17 | python-version: '3.x' 18 | - name: Install dependencies 19 | run: | 20 | python -m pip install --upgrade pip 21 | pip install setuptools wheel twine 22 | - name: Build and publish 23 | env: 24 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 25 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 26 | run: | 27 | python setup.py sdist bdist_wheel 28 | twine upload dist/* 29 | -------------------------------------------------------------------------------- /docs/api/datasets/fetch_lenta.rst: -------------------------------------------------------------------------------- 1 | .. _Lenta: 2 | 3 | *********************************** 4 | `sklift.datasets <./>`_.fetch_lenta 5 | *********************************** 6 | 7 | .. autofunction:: sklift.datasets.datasets.fetch_lenta 8 | 9 | .. include:: ../../../sklift/datasets/descr/lenta.rst 10 | 11 | About Lenta 12 | ################## 13 | 14 | .. figure:: https://upload.wikimedia.org/wikipedia/commons/7/73/Lenta_logo.svg 15 | 16 | `Lenta (Russian: Лентa) `__ is a Russian super - and hypermarket chain. With 149 locations across the country, 17 | it is one of Russia's largest retail chains in addition to being the country's second largest hypermarket chain. 18 | 19 | Link to the company's website: https://www.lenta.com/ -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Build documentation in the docs/ directory with Sphinx 8 | build: 9 | os: ubuntu-20.04 10 | tools: 11 | python: "3.8" 12 | # jobs: 13 | # pre_build: 14 | # - cp -r notebooks docs/ 15 | 16 | # Build documentation in the docs/ directory with Sphinx 17 | sphinx: 18 | builder: html 19 | configuration: docs/conf.py 20 | fail_on_warning: false 21 | 22 | # Optionally build your docs in additional formats such as PDF and ePub 23 | formats: 24 | - htmlzip 25 | 26 | # Optionally set the version of Python and requirements required to build your docs 27 | python: 28 | install: 29 | - requirements: docs/requirements.txt 30 | - requirements: requirements.txt -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "Bug Report" 3 | about: Submit a bug report to help us improve scikit-uplift 4 | --- 5 | 6 | ## 🐛 Bug 7 | 8 | 9 | 10 | ## To Reproduce 11 | 12 | Steps to reproduce the behavior: 13 | 14 | 1. 15 | 1. 16 | 1. 17 | 18 | 19 | 20 | ## Expected behavior 21 | 22 | 23 | 24 | ## Environment 25 | 26 | - scikit-uplift version (e.g., 0.1.2): 27 | - scikit-learn version (e.g., 0.22.2): 28 | - Python version (e.g., 3.7): 29 | - OS (e.g., Linux): 30 | - Any other relevant information: 31 | 32 | ## Additional context 33 | 34 | -------------------------------------------------------------------------------- /sklift/datasets/descr/megafon.rst: -------------------------------------------------------------------------------- 1 | MegaFon Uplift Competition Dataset 2 | ===================================== 3 | 4 | `Machine learning competition website `_. 5 | 6 | Data description 7 | ################ 8 | 9 | The dataset is provided by MegaFon at the MegaFon Uplift Competition hosted in may 2021. 10 | 11 | The dataset contains generated synthetic data, trying to bring them closer to the real case that they encountered. 12 | 13 | 14 | 15 | Fields 16 | ################ 17 | 18 | * X_1...X_50: anonymized feature set 19 | * treatment_group (str): treatment/control group flag 20 | * conversion (binary): customer purchasing 21 | 22 | Key figures 23 | ################ 24 | * Format: CSV 25 | * Size: 554M 26 | * Rows: 600,000 27 | * Response Ratio: .2 28 | * Treatment Ratio: .5 29 | 30 | 31 | -------------------------------------------------------------------------------- /docs/api/datasets/fetch_megafon.rst: -------------------------------------------------------------------------------- 1 | .. _Megafon: 2 | 3 | *************************************** 4 | `sklift.datasets <./>`_.fetch_megafon 5 | *************************************** 6 | 7 | .. autofunction:: sklift.datasets.datasets.fetch_megafon 8 | 9 | .. include:: ../../../sklift/datasets/descr/megafon.rst 10 | 11 | About MegaFon 12 | ################## 13 | 14 | .. figure:: https://upload.wikimedia.org/wikipedia/commons/9/9e/MegaFon_logo.svg 15 | 16 | `MegaFon (Russian: МегаФон) `__ , previously known as North-West GSM, is the second largest mobile phone operator and the third largest telecom operator in Russia. 17 | It works in the GSM, UMTS and LTE standard. As of June 2012, the company serves 62.1 million subscribers in Russia and 1.6 million in Tajikistan. It is headquartered in Moscow. 18 | 19 | Link to the company's website: https://megafon.ru/ -------------------------------------------------------------------------------- /docs/api/datasets/fetch_x5.rst: -------------------------------------------------------------------------------- 1 | .. _X5: 2 | 3 | *********************************** 4 | `sklift.datasets <./>`_.fetch_x5 5 | *********************************** 6 | 7 | .. autofunction:: sklift.datasets.datasets.fetch_x5 8 | 9 | .. include:: ../../../sklift/datasets/descr/x5.rst 10 | 11 | About X5 12 | ################## 13 | 14 | .. figure:: https://upload.wikimedia.org/wikipedia/en/8/83/X5_Retail_Group_logo_2015.png 15 | 16 | `X5 Group `__ is a leading Russian food retailer. 17 | The Company operates several retail formats: proximity stores under the Pyaterochka brand, 18 | supermarkets under the Perekrestok brand and hypermarkets under the Karusel brand, as well as the Perekrestok.ru online market, 19 | the 5Post parcel and Dostavka.Pyaterochka and Perekrestok. Bystro food delivery services. 20 | 21 | Link to the company's website: https://www.x5.ru/ -------------------------------------------------------------------------------- /docs/api/datasets/fetch_criteo.rst: -------------------------------------------------------------------------------- 1 | .. _Criteo: 2 | 3 | ************************************** 4 | `sklift.datasets <./>`_.fetch_criteo 5 | ************************************** 6 | 7 | .. autofunction:: sklift.datasets.datasets.fetch_criteo 8 | 9 | .. include:: ../../../sklift/datasets/descr/criteo.rst 10 | 11 | About Criteo 12 | ################## 13 | 14 | .. figure:: https://upload.wikimedia.org/wikipedia/commons/d/d2/Criteo_logo21.svg 15 | 16 | `Criteo `__ is an advertising company that provides online display advertisements. 17 | The company was founded and is headquartered in Paris, France. Criteo's product is a form of display advertising, 18 | which displays interactive banner advertisements, generated based on the online browsing preferences and behaviour for each customer. 19 | The solution operates on a pay per click/cost per click (CPC) basis. 20 | 21 | Link to the company's website: https://www.criteo.com/ -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Maksim Shevchenko 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /sklift/datasets/descr/x5.rst: -------------------------------------------------------------------------------- 1 | X5 RetailHero Uplift Modeling Dataset 2 | ===================================== 3 | 4 | The dataset is provided by X5 Retail Group at the RetailHero hackaton hosted in winter 2019. 5 | 6 | The dataset contains raw retail customer purchases, raw information about products and general info about customers. 7 | 8 | 9 | `Machine learning competition website `_. 10 | 11 | Data description 12 | ################ 13 | 14 | Data contains several parts: 15 | 16 | * train.csv: a subset of clients for training. The column *treatment_flg* indicates if there was a communication. The column *target* shows if there was a purchase afterward; 17 | * clients.csv: general info about clients; 18 | * purchases.csv: clients’ purchase history prior to communication. 19 | 20 | .. image:: ../../_static/images/x5_table_scheme.png 21 | :alt: X5 table schema 22 | 23 | Fields 24 | ################ 25 | 26 | * treatment_flg (binary): information on performed communication 27 | * target (binary): customer purchasing 28 | 29 | Key figures 30 | ################ 31 | 32 | * Format: CSV 33 | * Size: 647M (compressed) 4.17GB (uncompressed) 34 | * Rows: 35 | 36 | * in 'clients.csv': 400,162 37 | * in 'purchases.csv': 45,786,568 38 | * in 'uplift_train.csv': 200,039 39 | 40 | * Response Ratio: .62 41 | * Treatment Ratio: .5 42 | 43 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "Pull request" 3 | about: Make changes in scikit-uplift 4 | --- 5 | 6 | ## 📑 Description of the Change 7 | 8 | 13 | 14 | ## Verification Process 15 | 16 | 24 | 25 | ## Release Notes 26 | 27 | 42 | 43 | ## Additional info 44 | 45 | -------------------------------------------------------------------------------- /docs/user_guide/index.rst: -------------------------------------------------------------------------------- 1 | .. _user_guide: 2 | 3 | ********** 4 | User Guide 5 | ********** 6 | 7 | .. image:: https://habrastorage.org/webt/hf/7i/nu/hf7inuu3agtnwl1yo0g--mznzno.jpeg 8 | :alt: Cover of User Guide for uplift modeling and causal inference 9 | 10 | Uplift modeling estimates the effect of communication action on some customer outcomes and gives an opportunity to efficiently target customers which are most likely to respond to a marketing campaign. 11 | It is relatively easy to implement, but surprisingly poorly covered in the machine learning courses and literature. 12 | This guide is going to shed some light on the essentials of causal inference estimating and uplift modeling. 13 | 14 | .. toctree:: 15 | :maxdepth: 3 16 | :caption: Contents 17 | 18 | ./introduction/index 19 | ./models/index 20 | 21 | Credits 22 | -------- 23 | 24 | **Authors:** 25 | 26 | - `Irina Elisova `_ 27 | - `Maksim Shevchenko `_ 28 | 29 | **Acknowledgements:** 30 | 31 | - `Kirill Liksakov `_ - uplift metrics research 32 | - `Alina Zhukova `_ - artwork: User Guide cover and key pictures 33 | 34 | Citations 35 | ---------- 36 | 37 | If you find this User Guide useful for your research, please consider citing: 38 | 39 | .. code:: latex 40 | 41 | @misc{user-guide-for-uplift-modeling, 42 | author = {Maksim Shevchenko, Irina Elisova}, 43 | title = {User Guide for uplift modeling and casual inference}, 44 | year = {2020}, 45 | publisher = {GitHub}, 46 | journal = {GitHub repository}, 47 | howpublished = {\url{https://www.uplift-modeling.com/en/latest/user_guide/index.html}} 48 | } -------------------------------------------------------------------------------- /sklift/datasets/descr/criteo.rst: -------------------------------------------------------------------------------- 1 | Criteo Uplift Modeling Dataset 2 | ================================ 3 | This is a copy of `Criteo AI Lab Uplift Prediction dataset `_. 4 | 5 | Data description 6 | ################ 7 | 8 | This dataset is constructed by assembling data resulting from several incrementality tests, a particular randomized trial procedure where a random part of the population is prevented from being targeted by advertising. 9 | 10 | 11 | Fields 12 | ################ 13 | 14 | Here is a detailed description of the fields (they are comma-separated in the file): 15 | 16 | * **f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11**: feature values (dense, float) 17 | * **treatment**: treatment group. Flag if a company participates in the RTB auction for a particular user (binary: 1 = treated, 0 = control) 18 | * **exposure**: treatment effect, whether the user has been effectively exposed. Flag if a company wins in the RTB auction for the user (binary) 19 | * **conversion**: whether a conversion occured for this user (binary, label) 20 | * **visit**: whether a visit occured for this user (binary, label) 21 | 22 | 23 | Key figures 24 | ################ 25 | * Format: CSV 26 | * Size: 297M (compressed) 3,2GB (uncompressed) 27 | * Rows: 13,979,592 28 | * Response Ratio: 29 | 30 | * Average `Visit` Rate: .046992 31 | * Average `Conversion` Rate: .00292 32 | 33 | * Treatment Ratio: .85 34 | 35 | 36 | 37 | This dataset is released along with the paper: 38 | “*A Large Scale Benchmark for Uplift Modeling*" 39 | Eustache Diemert, Artem Betlei, Christophe Renaudin; (Criteo AI Lab), Massih-Reza Amini (LIG, Grenoble INP) 40 | This work was published in: `AdKDD 2018 `_ Workshop, in conjunction with KDD 2018. 41 | -------------------------------------------------------------------------------- /docs/user_guide/models/classification.rst: -------------------------------------------------------------------------------- 1 | *********************** 2 | Approach classification 3 | *********************** 4 | 5 | Uplift modeling techniques can be grouped into :guilabel:`data preprocessing` and :guilabel:`data processing` approaches. 6 | 7 | .. image:: ../../_static/images/user_guide/ug_uplift_approaches.png 8 | :align: center 9 | :alt: Classification of uplift modeling techniques: data preprocessing and data processing 10 | 11 | Data preprocessing 12 | ==================== 13 | 14 | In the :guilabel:`preprocessing` approaches, existing out-of-the-box learning methods are used, after pre- or post-processing of the data and outcomes. 15 | 16 | A popular and generic data preprocessing approach is :ref:`the flipped label approach `, also called class transformation approach. 17 | 18 | Other data preprocessing approaches extend the set of predictor variables to allow for the estimation of uplift. An example is :ref:`the single model with treatment as feature `. 19 | 20 | Data processing 21 | ==================== 22 | 23 | In the :guilabel:`data processing` approaches, new learning methods and methodologies are developed that aim to optimize expected uplift more directly. 24 | 25 | Data processing techniques include two categories: :guilabel:`indirect` and :guilabel:`direct` estimation approaches. 26 | 27 | :guilabel:`Indirect` estimation approaches include :ref:`the two-model model approach `. 28 | 29 | :guilabel:`Direct` estimation approaches are typically adaptations from decision tree algorithms. The adoptions include modified the splitting criteria and dedicated pruning techniques. 30 | 31 | References 32 | ========== 33 | 34 | 1️⃣ Devriendt, Floris, Tias Guns and Wouter Verbeke. “Learning to rank for uplift modeling.” ArXiv abs/2002.05897 (2020): n. pag. 35 | -------------------------------------------------------------------------------- /docs/user_guide/models/transformed_outcome.rst: -------------------------------------------------------------------------------- 1 | .. _ClassTransformationReg: 2 | 3 | ******************** 4 | Transformed Outcome 5 | ******************** 6 | 7 | Let's redefine target variable, which indicates that treatment make some impact on target or 8 | did target is negative without treatment: 9 | 10 | .. math:: 11 | Z = Y * \frac{(W - p)}{(p * (1 - p))} 12 | 13 | * :math:`Y` - target vector, 14 | * :math:`W` - vector of binary communication flags, and 15 | * :math:`p` is a *propensity score* (the probabilty that each :math:`y_i` is assigned to the treatment group.). 16 | 17 | It is important to note here that it is possible to estimate :math:`p` as the proportion of objects with :math:`W = 1` 18 | in the sample. Or use the method from [2], in which it is proposed to evaluate math:`p` as a function of :math:`X` by 19 | training the classifier on the available data :math:`X = x`, and taking the communication flag vector math:`W` as 20 | the target variable. 21 | 22 | .. image:: https://habrastorage.org/r/w1560/webt/35/d2/z_/35d2z_-3yhyqhwtw-mt-npws6xk.png 23 | :align: center 24 | :alt: Transformation of the target in Transformed Outcome approach 25 | 26 | After applying the formula, we get a new target variable :math:`Z_i` and can train a regression model with the error 27 | functional :math:`MSE= \frac{1}{n}\sum_{i=0}^{n} (Z_i - \hat{Z_i})^2`. Since it is precisely when using MSE that the 28 | predictions of the model are the conditional mathematical expectation of the target variable. 29 | 30 | It can be proved that the conditional expectation of the transformed target :math:`Z_i` is the desired causal effect: 31 | 32 | .. math:: 33 | E[Z_i| X_i = x] = Y_i^1 - Y_i^0 = \tau_i 34 | 35 | .. hint:: 36 | In sklift this approach corresponds to the :class:`.ClassTransformationReg` class. 37 | 38 | References 39 | ========== 40 | 41 | 1️⃣ Susan Athey and Guido W Imbens. Machine learning methods for estimating heterogeneouscausal effects. stat, 1050:5, 2015. 42 | 43 | 2️⃣ P. Richard Hahn, Jared S. Murray, and Carlos Carvalho. Bayesian regression tree models for causal inference: regularization, confounding, and heterogeneous effects. 2019. -------------------------------------------------------------------------------- /docs/user_guide/introduction/data_collection.rst: -------------------------------------------------------------------------------- 1 | ********************** 2 | Data collection 3 | ********************** 4 | 5 | We need to evaluate a difference between two events that are mutually exclusive for a particular customer (either we communicate with a person, or we don't; you can't do both actions at the same time). This is why there are additional requirements for collecting data when building an uplift model. 6 | 7 | There are few additional steps different from a standard data collection procedure. You should run an experiment: 8 | 9 | 1. Randomly divide a representative part of the customer base into a treatment (receiving communication) and a control (receiving no communication) groups; 10 | 2. Evaluate the marketing experiment for the treatment group. 11 | 12 | Data collected from the marketing experiment consists of the customer's responses to the marketing offer (target). 13 | 14 | The only difference between the experiment and the future uplift model's campaign is a fact that in the first case we choose random customers to make a promotion. In the second case, the choice of a customer to communicate with is based on the predicted value returned by the uplift model. If the marketing campaign significantly differs from the experiment used to collect data, the model will be less accurate. 15 | 16 | There is a trick: before running the marketing campaign, it is recommended to randomly subset a small part of the customer base and divide it into a control and a treatment group again, similar to the previous experiment. Using this data, you will not only be able to accurately evaluate the effectiveness of the campaign but also collect additional data for a further model retraining. 17 | 18 | .. image:: ../../_static/images/user_guide/ug_data_collection.gif 19 | :alt: Animation: Design of a train data collection experiment for uplift modeling 20 | 21 | It is recommended to configure a development of the uplift model and the campaign launch as an iterative process: each iteration will collect new training data. It should consist of a mix of a random customer subset and customers selected by the model. 22 | 23 | References 24 | ========== 25 | 26 | 1️⃣ Verbeke, Wouter & Baesens, Bart & Bravo, Cristián. (2018). Profit Driven Business Analytics: A Practitioner's Guide to Transforming Big Data into Added Value. -------------------------------------------------------------------------------- /docs/user_guide/introduction/comparison.rst: -------------------------------------------------------------------------------- 1 | **************************** 2 | Uplift vs other models 3 | **************************** 4 | 5 | Companies use various channels to promote a product to a customer: it can be SMS, push notification, chatbot message in social networks, and many others. 6 | There are several ways to use machine learning to select customers for a marketing campaign: 7 | 8 | .. image:: ../../_static/images/user_guide/ug_comparison_with_other_models.png 9 | :alt: Comparison with other models 10 | 11 | - :guilabel:`The Look-alike model` (or Positive Unlabeled Learning) evaluates a probability that the customer is going to accomplish a target action. A training dataset contains known positive objects (for instance, users who have installed an app) and random negative objects (a random subset of all other customers who have not installed the app). The model searches for customers who are similar to those who made the target action. 12 | - :guilabel:`The Response model` evaluates the probability that the customer is going to accomplish the target action if there was a communication (a.k.a treatment). In this case, the training dataset is data collected after some interaction with the customers. In contrast to the first approach, we have confirmed positive and negative observations at our disposal (for instance, the customer who decides to issue a credit card or to decline an offer). 13 | - :guilabel:`The Uplift model` evaluates the net effect of communication by trying to select only those customers who are going to perform the target action only when there is some advertising exposure presenting to them. The model predicts a difference between the customer's behavior when there is a treatment (communication) and when there is no treatment (no communication). 14 | 15 | When should we use uplift modeling? 16 | 17 | Uplift modeling is used when the customer's target action is likely to happen without any communication. 18 | For instance, we want to promote a popular product but we don't want to spend our marketing budget on customers who will buy the product anyway with or without communication. 19 | If the product is not popular and it has to be promoted to be bought, then a task turns to the response modeling task. 20 | 21 | References 22 | ========== 23 | 24 | 1️⃣ Radcliffe, N.J. (2007). Using control groups to target on predicted lift: Building and assessing uplift model. Direct Market J Direct Market Assoc Anal Council, 1:14–21, 2007. -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to scikit-uplift 2 | 3 | First off, thanks for taking the time to contribute! 🙌👍🎉 4 | 5 | All development is done on GitHub: https://github.com/maks-sh/scikit-uplift. 6 | 7 | ## Submitting a bug report or a feature request 8 | 9 | We use GitHub issues to track all bugs and feature requests. 10 | Feel free to open an issue if you have found a bug or wish to see a feature implemented at https://github.com/maks-sh/scikit-uplift/issues. 11 | 12 | ## Contributing code 13 | 14 | ### How to contribute 15 | 16 | The code in the master branch should meet the current release. 17 | So, please make a pull request to the ``dev`` branch. 18 | 19 | 1. Fork the [project repository](https://github.com/maks-sh/scikit-uplift). 20 | 2. Clone your fork of the scikit-uplift repo from your GitHub account to your local disk: 21 | ``` bash 22 | $ git clone https://github.com/YourName/scikit-uplift 23 | $ cd scikit-uplift 24 | ``` 25 | 3. Add the upstream remote. This saves a reference to the main scikit-uplift repository, which you can use to keep your repository synchronized with the latest changes: 26 | ``` bash 27 | $ git remote add upstream https://github.com/maks-sh/scikit-uplift.git 28 | ``` 29 | 4. Synchronize your ``dev`` branch with the upstream ``dev`` branch: 30 | ``` bash 31 | $ git checkout dev 32 | $ git pull upstream dev 33 | ``` 34 | 5. Create a feature branch to hold your development changes: 35 | ``` bash 36 | $ git checkout -b feature/my_new_feature 37 | ``` 38 | and start making changes. Always use a feature branch. It’s a good practice. 39 | 6. Develop the feature on your feature branch on your computer, using Git to do the version control. When you’re done editing, add changed files using ``git add .`` and then ``git commit`` 40 | Then push the changes to your GitHub account with: 41 | 42 | ``` bash 43 | $ git push -u origin feature/my_new_feature 44 | ``` 45 | 7. Create a pull request from your fork into ``dev`` branch. 46 | 47 | ### Styleguides 48 | 49 | #### Python 50 | 51 | We follow the PEP8 style guide for Python. Docstrings follow google style. 52 | 53 | #### Git Commit Messages 54 | 55 | * Use the present tense ("Add feature" not "Added feature") 56 | * Use the imperative mood ("Move cursor to..." not "Moves cursor to...") 57 | * Limit the first line to 72 characters or less 58 | * Reference issues and pull requests liberally after the first line 59 | -------------------------------------------------------------------------------- /sklift/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | from collections import defaultdict 3 | 4 | import numpy as np 5 | import pandas as pd 6 | import pytest 7 | 8 | n_vals = (100, 1000) 9 | k_vals = (1, 5) 10 | np_types = (np.int32, np.float32, np.float64) 11 | dataset_types = ('numpy', 'pandas') 12 | 13 | 14 | @pytest.fixture 15 | def sensitive_classification_dataset(): 16 | df = pd.DataFrame( 17 | { 18 | "x1": [1, 0, 1, 0, 1, 0, 1, 1], 19 | "x2": [0, 0, 0, 0, 0, 1, 1, 1], 20 | "y": [1, 1, 1, 0, 1, 0, 0, 0], 21 | "treat": [1, 1, 1, 1, 0, 0, 0, 1] 22 | } 23 | ) 24 | 25 | return df[["x1", "x2"]], df["y"], df["treat"] 26 | 27 | 28 | @pytest.fixture( 29 | scope="module", params=[_ for _ in itertools.product(n_vals, k_vals, np_types, dataset_types)] 30 | ) 31 | def random_xy_dataset_regr(request): 32 | n, k, np_type, dataset_type = request.param 33 | np.random.seed(42) 34 | X = np.random.normal(0, 2, (n, k)).astype(np_type) 35 | y = np.random.normal(0, 2, (n,)) 36 | treat = (np.random.normal(0, 2, (n,)) > 0.0).astype(int) 37 | if dataset_type == 'numpy': 38 | return X, y, treat 39 | return pd.DataFrame(X), pd.Series(y), pd.Series(treat) 40 | 41 | 42 | @pytest.fixture( 43 | scope="module", params=[_ for _ in itertools.product(n_vals, k_vals, np_types, dataset_types)] 44 | ) 45 | def random_xyt_dataset_clf(request): 46 | n, k, np_type, dataset_type = request.param 47 | X, y, treat = None, None, None 48 | mean_target_ctrl, mean_target_trmnt = 0, 0 49 | """ 50 | The main rule for creating a random dataset is that 51 | the average conversions in the control and experimental groups 52 | should not be equal to 0 or 1. 53 | """ 54 | while ((mean_target_ctrl == 0) or (mean_target_ctrl == 1) or 55 | (mean_target_trmnt == 0) or (mean_target_trmnt == 1)): 56 | np.random.seed(42) 57 | X = np.random.normal(0, 2, (n, k)).astype(np_type) 58 | y = (np.random.normal(0, 2, (n,)) > 0.0).astype(int) 59 | treat = (np.random.normal(0, 2, (n,)) > 0.0).astype(int) 60 | dd = defaultdict(list) 61 | for key, val in zip(treat, y): 62 | dd[key].append(val) 63 | mean_target_ctrl = np.mean(dd[0]) 64 | mean_target_trmnt = np.mean(dd[1]) 65 | 66 | if dataset_type == 'numpy': 67 | return X, y, treat 68 | return pd.DataFrame(X), pd.Series(y), pd.Series(treat) 69 | 70 | -------------------------------------------------------------------------------- /sklift/datasets/descr/hillstrom.rst: -------------------------------------------------------------------------------- 1 | Kevin Hillstrom Dataset: MineThatData 2 | ===================================== 3 | 4 | Data description 5 | ################ 6 | 7 | This is a copy of `MineThatData E-Mail Analytics And Data Mining Challenge dataset `_. 8 | 9 | This dataset contains 64,000 customers who last purchased within twelve months. 10 | The customers were involved in an e-mail test. 11 | 12 | * 1/3 were randomly chosen to receive an e-mail campaign featuring Mens merchandise. 13 | * 1/3 were randomly chosen to receive an e-mail campaign featuring Womens merchandise. 14 | * 1/3 were randomly chosen to not receive an e-mail campaign. 15 | 16 | During a period of two weeks following the e-mail campaign, results were tracked. 17 | Your job is to tell the world if the Mens or Womens e-mail campaign was successful. 18 | 19 | Fields 20 | ################ 21 | 22 | Historical customer attributes at your disposal include: 23 | 24 | * Recency: Months since last purchase. 25 | * History_Segment: Categorization of dollars spent in the past year. 26 | * History: Actual dollar value spent in the past year. 27 | * Mens: 1/0 indicator, 1 = customer purchased Mens merchandise in the past year. 28 | * Womens: 1/0 indicator, 1 = customer purchased Womens merchandise in the past year. 29 | * Zip_Code: Classifies zip code as Urban, Suburban, or Rural. 30 | * Newbie: 1/0 indicator, 1 = New customer in the past twelve months. 31 | * Channel: Describes the channels the customer purchased from in the past year. 32 | 33 | Another variable describes the e-mail campaign the customer received: 34 | 35 | * Segment 36 | 37 | * Mens E-Mail 38 | * Womens E-Mail 39 | * No E-Mail 40 | 41 | Finally, we have a series of variables describing activity in the two weeks following delivery of the e-mail campaign: 42 | 43 | * Visit: 1/0 indicator, 1 = Customer visited website in the following two weeks. 44 | * Conversion: 1/0 indicator, 1 = Customer purchased merchandise in the following two weeks. 45 | * Spend: Actual dollars spent in the following two weeks. 46 | 47 | Key figures 48 | ################ 49 | 50 | * Format: CSV 51 | * Size: 433KB (compressed) 4,935KB (uncompressed) 52 | * Rows: 64,000 53 | * Response Ratio: 54 | 55 | * Average `visit` Rate: .15, 56 | * Average `conversion` Rate: .009, 57 | * the values in the `spend` column are unevenly distributed from 0.0 to 499.0 58 | 59 | * Treatment Ratio: The parts are distributed evenly between the *three* classes -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import sys 4 | from shutil import rmtree 5 | 6 | from setuptools import Command, find_packages, setup 7 | 8 | # Package meta-data. 9 | NAME = "scikit-uplift" 10 | DESCRIPTION = "Classic approaches of Uplift modelling in scikit-learn style in python" 11 | MAINTAINER = 'Maksim Shevchenko' 12 | URL = "https://github.com/maks-sh/scikit-uplift" 13 | REQUIRES_PYTHON = ">=3.4.0" 14 | 15 | here = os.path.abspath(os.path.dirname(__file__)) 16 | 17 | with open(os.path.join(here, 'Readme.rst'), encoding="utf-8") as f: 18 | LONG_DESCRIPTION = f.read() 19 | 20 | # What packages are required for this module to be executed? 21 | try: 22 | with open(os.path.join(here, "requirements.txt"), encoding="utf-8") as f: 23 | REQUIRED = f.read().split("\n") 24 | except FileNotFoundError: 25 | REQUIRED = [] 26 | 27 | # What packages are optional? 28 | EXTRAS = {"test": ["pytest", "pytest-cov"]} 29 | 30 | 31 | def get_version(): 32 | version_file = os.path.join(here, "sklift", "__init__.py") 33 | with open(version_file, encoding="utf-8") as f: 34 | return re.search(r'^__version__ = [\'"]([^\'"]*)[\'"]', f.read(), re.M).group(1) 35 | 36 | 37 | def get_test_requirements(): 38 | pass 39 | 40 | 41 | class UploadCommand(Command): 42 | """Support setup.py upload.""" 43 | 44 | description = "Build and publish the package." 45 | user_options = [] 46 | 47 | @staticmethod 48 | def status(s): 49 | """Print things in bold.""" 50 | print(s) 51 | 52 | def initialize_options(self): 53 | pass 54 | 55 | def finalize_options(self): 56 | pass 57 | 58 | def run(self): 59 | try: 60 | self.status("Removing previous builds...") 61 | rmtree(os.path.join(here, "dist")) 62 | except OSError: 63 | pass 64 | 65 | self.status("Building Source and Wheel (universal) distribution...") 66 | os.system("{0} setup.py sdist bdist_wheel --universal".format(sys.executable)) 67 | 68 | sys.exit() 69 | 70 | 71 | setup( 72 | name=NAME, 73 | version=get_version(), 74 | description=DESCRIPTION, 75 | long_description=LONG_DESCRIPTION, 76 | long_description_content_type="text/x-rst", 77 | maintainer=MAINTAINER, 78 | url=URL, 79 | packages=find_packages(exclude=["tests", "docs", "images"]), 80 | include_package_data=True, 81 | install_requires=REQUIRED, 82 | extras_require=EXTRAS, 83 | classifiers=[ 84 | "Programming Language :: Python :: 3", 85 | "Operating System :: OS Independent", 86 | ], 87 | cmdclass={"upload": UploadCommand}, 88 | ) 89 | -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | # Contributing to scikit-uplift 2 | 3 | First off, thanks for taking the time to contribute! 🙌👍🎉 4 | 5 | All development is done on GitHub: [https://github.com/maks-sh/scikit-uplift](https://github.com/maks-sh/scikit-uplift). 6 | 7 | ## Submitting a bug report or a feature request 8 | 9 | We use GitHub issues to track all bugs and feature requests. 10 | Feel free to open an issue if you have found a bug or wish to see a feature implemented at [https://github.com/maks-sh/scikit-uplift/issues](https://github.com/maks-sh/scikit-uplift/issues). 11 | 12 | ## Contributing code 13 | 14 | ### How to contribute 15 | 16 | The code in the master branch should meet the current release. 17 | So, please make a pull request to the ``dev`` branch. 18 | 19 | 1. Fork the [project repository](https://github.com/maks-sh/scikit-uplift). 20 | 2. Clone your fork of the scikit-uplift repo from your GitHub account to your local disk: 21 | ``` bash 22 | $ git clone https://github.com/YourName/scikit-uplift 23 | $ cd scikit-uplift 24 | ``` 25 | 3. Add the upstream remote. This saves a reference to the main scikit-uplift repository, which you can use to keep your repository synchronized with the latest changes: 26 | ``` bash 27 | $ git remote add upstream https://github.com/maks-sh/scikit-uplift.git 28 | ``` 29 | 4. Synchronize your ``dev`` branch with the upstream ``dev`` branch: 30 | ``` bash 31 | $ git checkout dev 32 | $ git pull upstream dev 33 | ``` 34 | 5. Create a feature branch to hold your development changes: 35 | ``` bash 36 | $ git checkout -b feature/my_new_feature 37 | ``` 38 | and start making changes. Always use a feature branch. It’s a good practice. 39 | 6. Develop the feature on your feature branch on your computer, using Git to do the version control. When you’re done editing, add changed files using ``git add .`` and then ``git commit`` 40 | Then push the changes to your GitHub account with: 41 | 42 | ``` bash 43 | $ git push -u origin feature/my_new_feature 44 | ``` 45 | 7. Create a pull request from your fork into ``dev`` branch. 46 | 47 | ### Styleguides 48 | 49 | #### Python 50 | 51 | We follow the PEP8 style guide for Python. Docstrings follow [google style](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html). 52 | 53 | #### Git Commit Messages 54 | 55 | * Use the present tense ("Add feature" not "Added feature") 56 | * Use the imperative mood ("Move file to..." not "Moves file to...") 57 | * Limit the first line to 72 characters or less 58 | * Reference issues and pull requests liberally after the first line 59 | * If you want to use emojis, use them at the beginning of the line. -------------------------------------------------------------------------------- /docs/user_guide/introduction/clients.rst: -------------------------------------------------------------------------------- 1 | ****************************************** 2 | Types of customers 3 | ****************************************** 4 | 5 | We can determine 4 types of customers based on a response to treatment: 6 | 7 | .. image:: ../../_static/images/user_guide/ug_clients_types.jpg 8 | :alt: Classification of customers based on their response to a treatment 9 | :width: 268 px 10 | :height: 282 px 11 | :align: center 12 | 13 | - :guilabel:`Do-Not-Disturbs` *(a.k.a. Sleeping-dogs)* have a strong negative response to marketing communication. They are going to purchase if *NOT* treated and will *NOT* purchase *IF* treated. It is not only a wasted marketing budget but also a negative impact. For instance, customers targeted could result in rejecting current products or services. In terms of math: :math:`W_i = 1, Y_i = 0` or :math:`W_i = 0, Y_i = 1`. 14 | - :guilabel:`Lost Causes` will *NOT* purchase the product *NO MATTER* they are contacted or not. The marketing budget in this case is also wasted because it has no effect. In terms of math: :math:`W_i = 1, Y_i = 0` or :math:`W_i = 0, Y_i = 0`. 15 | - :guilabel:`Sure Things` will purchase *ANYWAY* no matter they are contacted or not. There is no motivation to spend the budget because it also has no effect. In terms of math: :math:`W_i = 1, Y_i = 1` or :math:`W_i = 0, Y_i = 1`. 16 | - :guilabel:`Persuadables` will always respond *POSITIVE* to marketing communication. They are going to purchase *ONLY* if contacted (or sometimes they purchase *MORE* or *EARLIER* only if contacted). This customer's type should be the only target for the marketing campaign. In terms of math: :math:`W_i = 0, Y_i = 0` or :math:`W_i = 1, Y_i = 1`. 17 | 18 | Because we can't communicate and not communicate with the customer at the same time, we will never be able to observe exactly which type a particular customer belongs to. 19 | 20 | Depends on the product characteristics and the customer base structure some types may be absent. In addition, a customer response depends heavily on various characteristics of the campaign, such as a communication channel or a type and a size of the marketing offer. To maximize profit, these parameters should be selected. 21 | 22 | Thus, when predicting uplift score and selecting a segment by the highest score, we are trying to find the only one type: **persuadables**. 23 | 24 | References 25 | ========== 26 | 27 | 1️⃣ Kane, K., V. S. Y. Lo, and J. Zheng. Mining for the Truly Responsive Customers and Prospects Using True-Lift Modeling: Comparison of New and Existing Methods. Journal of Marketing Analytics 2 (4): 218–238. 2014. 28 | 29 | 2️⃣ Verbeke, Wouter & Baesens, Bart & Bravo, Cristián. (2018). Profit Driven Business Analytics: A Practitioner's Guide to Transforming Big Data into Added Value. -------------------------------------------------------------------------------- /docs/user_guide/models/solo_model.rst: -------------------------------------------------------------------------------- 1 | .. _SoloModel: 2 | 3 | ********************************* 4 | Single model approaches 5 | ********************************* 6 | 7 | Single model with treatment as feature 8 | ======================================== 9 | 10 | The most intuitive and simple uplift modeling technique. A training set consists of two groups: treatment samples and control samples. There is also a binary treatment flag added as a feature to the training set. After the model is trained, at the scoring time it is going to be applied twice: 11 | with the treatment flag equals `1` and with the treatment flag equals `0`. Subtracting these model's outcomes for each test sample, we will get an estimate of the uplift. 12 | 13 | .. image:: ../../_static/images/SoloModel.png 14 | :align: center 15 | :alt: Solo model dummy method 16 | 17 | .. hint:: 18 | In sklift this approach corresponds to the :class:`.SoloModel` class and the **dummy** method. 19 | 20 | Treatment interaction 21 | ========================= 22 | 23 | The single model approach has various modifications. For instance, we can update the number of attributes in the training set by adding 24 | the product of each attribute and the treatment flag: 25 | 26 | .. image:: ../../_static/images/SoloModel_treatment_intercation.png 27 | :align: center 28 | :alt: Solo model treatment interaction method 29 | 30 | .. hint:: 31 | In sklift this approach corresponds to the :class:`.SoloModel` class and the **treatment_interaction** method. 32 | 33 | 34 | 35 | References 36 | ========== 37 | 38 | 1️⃣ Lo, Victor. (2002). The True Lift Model - A Novel Data Mining Approach to Response Modeling in Database Marketing. SIGKDD Explorations. 4. 78-86. 39 | 40 | Examples using ``sklift.models.SoloModel`` 41 | ============================================ 42 | 43 | .. |Open In Colab1| image:: https://colab.research.google.com/assets/colab-badge.svg 44 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero_EN.ipynb 45 | .. |Open In Colab2| image:: https://colab.research.google.com/assets/colab-badge.svg 46 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero.ipynb 47 | 48 | 1. The overview of the basic approaches to solving the Uplift Modeling problem 49 | 50 | .. list-table:: 51 | :align: center 52 | :widths: 12 15 10 8 53 | 54 | * - In English 🇬🇧 55 | - |Open In Colab1| 56 | - `nbviewer `__ 57 | - `github `__ 58 | * - In Russian 🇷🇺 59 | - |Open In Colab2| 60 | - `nbviewer `__ 61 | - `github `__ -------------------------------------------------------------------------------- /docs/user_guide/introduction/cate.rst: -------------------------------------------------------------------------------- 1 | ****************************************** 2 | Causal Inference: Basics 3 | ****************************************** 4 | 5 | In a perfect world, we want to calculate a difference in a person's reaction received communication, and the reaction without receiving any communication. 6 | But there is a problem: we can not make a communication (send an e-mail) and do not make a communication (no e-mail) at the same time. 7 | 8 | .. image:: https://habrastorage.org/webt/fl/fi/dz/flfidz416o7of5j0nmgdjqqkzfe.jpeg 9 | :alt: Joke about Schrodinger's cat 10 | :align: center 11 | 12 | Denoting :math:`Y_i^1` person :math:`i`’s outcome when receives the treatment (a presence of the communication) and :math:`Y_i^0` :math:`i`’s outcome when he receives no treatment (control, no communication), the :guilabel:`causal effect` :math:`\tau_i` of the treatment *vis-a-vis* no treatment is given by: 13 | 14 | .. math:: 15 | \tau_i = Y_i^1 - Y_i^0 16 | 17 | Researchers are typically interested in estimating the :guilabel:`Conditional Average Treatment Effect` (CATE), that is, the expected causal effect of the treatment for a subgroup in the population: 18 | 19 | .. math:: 20 | CATE = E[Y_i^1 \vert X_i] - E[Y_i^0 \vert X_i] 21 | 22 | Where :math:`X_i` - features vector describing :math:`i`-th person. 23 | 24 | We can observe neither causal effect nor CATE for the :math:`i`-th object, and, accordingly, we can't optimize it. 25 | But we can estimate CATE or *uplift* of an object: 26 | 27 | .. math:: 28 | \textbf{uplift} = \widehat{CATE} = E[Y_i \vert X_i = x, W_i = 1] - E[Y_i \vert X_i = x, W_i = 0] 29 | 30 | Where: 31 | 32 | - :math:`W_i \in {0, 1}` - a binary variable: 1 if person :math:`i` receives the :guilabel:`treatment group`, and 0 if person :math:`i` receives no treatment :guilabel:`control group`; 33 | - :math:`Y_i` - person :math:`i`’s observed outcome, which is equal: 34 | 35 | .. math:: 36 | Y_i = W_i * Y_i^1 + (1 - W_i) * Y_i^0 = \ 37 | \begin{cases} 38 | Y_i^1, & \mbox{if } W_i = 1 \\ 39 | Y_i^0, & \mbox{if } W_i = 0 \\ 40 | \end{cases} 41 | 42 | This won’t identify the CATE unless one is willing to assume that :math:`W_i` is independent of :math:`Y_i^1` and :math:`Y_i^0` conditional on :math:`X_i`. This assumption is the so-called *Unconfoundedness Assumption* or the *Conditional Independence Assumption* (CIA) found in the social sciences and medical literature. 43 | This assumption holds true when treatment assignment is random conditional on :math:`X_i`. 44 | Briefly, this can be written as: 45 | 46 | .. math:: 47 | CIA : \{Y_i^0, Y_i^1\} \perp \!\!\! \perp W_i \vert X_i 48 | 49 | Also, introduce additional useful notation. 50 | Let us define the :guilabel:`propensity score`, :math:`p(X_i) = P(W_i = 1| X_i)`, i.e. the probability of treatment given :math:`X_i`. 51 | 52 | References 53 | ========== 54 | 55 | 1️⃣ Gutierrez, P., & Gérardy, J. Y. (2017). Causal Inference and Uplift Modelling: A Review of the Literature. In International Conference on Predictive Applications and APIs (pp. 1-13). -------------------------------------------------------------------------------- /.github/workflows/ci-test.yml: -------------------------------------------------------------------------------- 1 | name: Python package 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request_target: 7 | 8 | jobs: 9 | test: 10 | name: Check tests 11 | runs-on: ${{ matrix.os }} 12 | env: 13 | # fix the python version and the operating system for codecoverage commentator 14 | USING_COVERAGE_PY: '3.8' 15 | USING_COVERAGE_OS: 'ubuntu-latest' 16 | outputs: 17 | # fix the results of pytest for unix 18 | output1: ${{ steps.pytest.outputs.exit_code }} 19 | 20 | strategy: 21 | matrix: 22 | os: ['ubuntu-latest', 'windows-latest', 'macos-latest'] 23 | python-version: ['3.6', '3.7', '3.8', '3.9'] 24 | # GitHub does not cancel all in-progress jobs if any matrix job fails 25 | fail-fast: false 26 | 27 | steps: 28 | - uses: actions/checkout@v2 29 | # Install python 30 | - name: Set up Python ${{ matrix.python-version }} 31 | uses: actions/setup-python@v2 32 | with: 33 | python-version: ${{ matrix.python-version }} 34 | # Update pip and install dependencies 35 | - name: Install dependencies 36 | run: | 37 | python -m pip install --upgrade pip 38 | pip install . -r test_requirements.txt -r requirements.txt 39 | # Pytest in windows 40 | - name: Run PyTest windows 41 | if: ${{ matrix.os == 'windows-latest' }} 42 | run: | 43 | pytest | tee pytest-coverage.txt 44 | # Pytest in unix. Exit code of this run captures the exit status of tee and not of pytest 45 | # So, use $PIPESTATUS that holds the exit status of each command in pipeline 46 | - name: Run PyTest unix 47 | if: ${{ matrix.os != 'windows-latest' }} 48 | id: pytest 49 | run: | 50 | pytest | tee pytest-coverage.txt; 51 | exit_code=${PIPESTATUS[0]}; 52 | echo "::set-output name=exit_code::$exit_code" 53 | # Сomment on the results of the test coverage 54 | - name: Comment coverage 55 | if: contains(env.USING_COVERAGE_PY, matrix.python-version) && contains(env.USING_COVERAGE_OS, matrix.os) 56 | uses: MishaKav/pytest-coverage-comment@v1.1.6 57 | with: 58 | pytest-coverage-path: ./pytest-coverage.txt 59 | junitxml-path: ./pytest.xml 60 | # For unix workflow should have failed if exit code of pytest were 1 61 | - name: Check fail of pytest unix 62 | if: ${{ matrix.os != 'windows-latest' && steps.pytest.outputs.exit_code == 1 }} 63 | uses: actions/github-script@v3 64 | with: 65 | script: | 66 | core.setFailed('Some tests failed!') 67 | 68 | check_sphinx_build: 69 | name: Check Sphinx build for docs 70 | runs-on: ubuntu-latest 71 | strategy: 72 | matrix: 73 | python-version: [3.8] 74 | steps: 75 | - name: Checkout 76 | uses: actions/checkout@v2 77 | - name: Set up Python 78 | uses: actions/setup-python@v2 79 | with: 80 | python-version: ${{ matrix.python-version }} 81 | - name: Update pip and install dependencies 82 | run: | 83 | python -m pip install --upgrade pip 84 | pip install -r docs/requirements.txt -r requirements.txt 85 | - name: Run Sphinx 86 | run: sphinx-build -W -b html docs /tmp/_docs_build -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | import sys 4 | import datetime 5 | 6 | sys.path.insert(0, os.path.abspath("../")) 7 | 8 | 9 | def get_version(): 10 | current_dir = os.path.abspath(os.path.dirname(__file__)) 11 | root = os.path.dirname(current_dir) 12 | version_file = os.path.join(root, "sklift", "__init__.py") 13 | with open(version_file) as f: 14 | return re.search(r'^__version__ = [\'"]([^\'"]*)[\'"]', f.read(), re.M).group(1) 15 | 16 | # Configuration file for the Sphinx documentation builder. 17 | # 18 | # This file only contains a selection of the most common options. For a full 19 | # list see the documentation: 20 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 21 | 22 | # -- Path setup -------------------------------------------------------------- 23 | 24 | # If extensions (or modules to document with autodoc) are in another directory, 25 | # add these directories to sys.path here. If the directory is relative to the 26 | # documentation root, use os.path.abspath to make it absolute, like shown here. 27 | # 28 | # import os 29 | # import sys 30 | # sys.path.insert(0, os.path.abspath('.')) 31 | 32 | 33 | # -- Project information ----------------------------------------------------- 34 | 35 | project = 'scikit-uplift' 36 | author = 'Maksim Shevchenko and Contributors' 37 | copyright = "{}, {}".format(datetime.datetime.now().year, author) 38 | 39 | # The full version, including alpha/beta/rc tags 40 | release = get_version() 41 | 42 | 43 | # -- General configuration --------------------------------------------------- 44 | 45 | # Add any Sphinx extension module names here, as strings. They can be 46 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 47 | # ones. 48 | extensions = [ 49 | "sphinx.ext.autodoc", 50 | "sphinx.ext.viewcode", 51 | "sphinx.ext.mathjax", 52 | "sphinx.ext.napoleon", 53 | "myst_parser", 54 | "sphinx.ext.intersphinx", 55 | "sphinxcontrib.bibtex" 56 | ] 57 | 58 | bibtex_bibfiles = ['refs.bib'] 59 | bibtex_reference_style = 'author_year' 60 | 61 | master_doc = 'index' 62 | 63 | # Add any paths that contain templates here, relative to this directory. 64 | templates_path = ['_templates'] 65 | 66 | # List of patterns, relative to source directory, that match files and 67 | # directories to ignore when looking for source files. 68 | # This pattern also affects html_static_path and html_extra_path. 69 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'Readme.rst'] 70 | 71 | 72 | # -- Options for HTML output ------------------------------------------------- 73 | 74 | # The theme to use for HTML and HTML Help pages. See the documentation for 75 | # a list of builtin themes. 76 | # 77 | html_theme = 'sphinx_rtd_theme' 78 | 79 | # Add any paths that contain custom static files (such as style sheets) here, 80 | # relative to this directory. They are copied after the builtin static files, 81 | # so a file named "default.css" will overwrite the builtin "default.css". 82 | html_static_path = ['_static'] 83 | html_css_files = [ 84 | 'css/custom.css', 85 | ] 86 | html_js_files = ['https://buttons.github.io/buttons.js'] 87 | html_logo = "./_static/sklift-logo.png" 88 | 89 | # Removing the view source link 90 | html_show_sourcelink = False 91 | 92 | html_theme_options = { 93 | 'navigation_depth': 3, 94 | } 95 | 96 | trim_footnote_reference_space = True 97 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at team@uplift-modeling.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq -------------------------------------------------------------------------------- /sklift/tests/test_models.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import pytest 4 | import numpy as np 5 | import pandas as pd 6 | from sklearn.linear_model import LogisticRegression, LinearRegression 7 | from sklearn.pipeline import Pipeline 8 | from sklearn.preprocessing import StandardScaler 9 | 10 | from ..models import ( 11 | SoloModel, 12 | ClassTransformation, 13 | TwoModels 14 | ) 15 | 16 | 17 | @pytest.mark.parametrize( 18 | "model", 19 | [ 20 | SoloModel(LogisticRegression(), method='dummy'), 21 | SoloModel(LogisticRegression(), method='treatment_interaction'), 22 | ClassTransformation(LogisticRegression()), 23 | TwoModels(LogisticRegression(), LogisticRegression(), method='vanilla'), 24 | TwoModels(LogisticRegression(), LogisticRegression(), method='ddr_control'), 25 | TwoModels(LogisticRegression(), LogisticRegression(), method='ddr_treatment'), 26 | ] 27 | ) 28 | def test_shape_classification(model, random_xyt_dataset_clf): 29 | X, y, treat = random_xyt_dataset_clf 30 | assert model.fit(X, y, treat).predict(X).shape[0] == y.shape[0] 31 | pipe = Pipeline(steps=[("scaler", StandardScaler()), ("clf", model)]) 32 | assert pipe.fit(X, y, clf__treatment=treat).predict(X).shape[0] == y.shape[0] 33 | 34 | 35 | @pytest.mark.parametrize( 36 | "model", 37 | [ 38 | SoloModel(LinearRegression(), method='dummy'), 39 | SoloModel(LinearRegression(), method='treatment_interaction'), 40 | TwoModels(LinearRegression(), LinearRegression(), method='vanilla'), 41 | TwoModels(LinearRegression(), LinearRegression(), method='ddr_control'), 42 | TwoModels(LinearRegression(), LinearRegression(), method='ddr_treatment'), 43 | ] 44 | ) 45 | def test_shape_regression(model, random_xy_dataset_regr): 46 | X, y, treat = random_xy_dataset_regr 47 | assert model.fit(X, y, treat).predict(X).shape[0] == y.shape[0] 48 | pipe = Pipeline(steps=[("scaler", StandardScaler()), ("clf", model)]) 49 | assert pipe.fit(X, y, clf__treatment=treat).predict(X).shape[0] == y.shape[0] 50 | 51 | @pytest.mark.parametrize( 52 | "model", 53 | [ 54 | SoloModel(LogisticRegression(), method='dummy'), 55 | SoloModel(LogisticRegression(), method='treatment_interaction'), 56 | ] 57 | ) 58 | def test_solomodel_fit_error(model): 59 | X, y, treatment = [[1., 0., 0.],[1., 0., 0.],[1., 0., 0.]], [1., 2., 3.], [0., 1., 0.] 60 | with pytest.raises(TypeError): 61 | model.fit(X, y, treatment) 62 | 63 | @pytest.mark.parametrize( 64 | "model", 65 | [ 66 | SoloModel(LogisticRegression(), method='dummy'), 67 | SoloModel(LogisticRegression(), method='treatment_interaction'), 68 | ] 69 | ) 70 | def test_solomodel_pred_error(model): 71 | X_train, y_train, treat_train = (np.array([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]]), 72 | np.array([0.0, 0.0, 1.0]), np.array([0.0, 1.0, 1.0])) 73 | model.fit(X_train, y_train, treat_train) 74 | with pytest.raises(TypeError): 75 | model.predict(1) 76 | 77 | @pytest.mark.parametrize("method", ['method']) 78 | def test_solomodel_method_error(method): 79 | with pytest.raises(ValueError): 80 | SoloModel(LogisticRegression(), method=method) 81 | 82 | def test_classtransformation_fit_error(): 83 | X, y, treatment = [[1., 0., 0.],[1., 0., 0.],[1., 0., 0.]], [1., 2., 3.], [0., 1., 0.] 84 | with pytest.raises(ValueError): 85 | ClassTransformation(LogisticRegression()).fit(X, y, treatment) 86 | 87 | @pytest.mark.parametrize("method", ['method']) 88 | def test_twomodels_method_error(method): 89 | with pytest.raises(ValueError): 90 | TwoModels(LinearRegression(), LinearRegression(), method=method) 91 | 92 | def test_same_estimator_error(): 93 | est = LinearRegression() 94 | with pytest.raises(ValueError): 95 | TwoModels(est, est) 96 | 97 | @pytest.mark.parametrize( 98 | "X, y, treatment", 99 | [ 100 | (pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),columns=['a', 'b', 'c'], index=[0,1,2]), 101 | pd.Series(np.array([1, 0, 1]),index=[0,2,3]), pd.Series(np.array([0, 0, 1]),index=[0,1,2])), 102 | (pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),columns=['a', 'b', 'c'], index=[0,1,2]), 103 | pd.Series(np.array([1, 0, 1]),index=[0,1,2]), pd.Series(np.array([0, 0, 1]),index=[1,2,3])) 104 | ] 105 | ) 106 | def test_input_data(X, y, treatment): 107 | model = TwoModels(LinearRegression(), LinearRegression()) 108 | with pytest.warns(UserWarning): 109 | model.fit(X, y, treatment) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### JupyterNotebooks template 3 | # gitignore template for Jupyter Notebooks 4 | # website: http://jupyter.org/ 5 | 6 | .ipynb_checkpoints 7 | */.ipynb_checkpoints/* 8 | 9 | # Remove previous ipynb_checkpoints 10 | # git rm -r .ipynb_checkpoints/ 11 | # 12 | 13 | ### Python template 14 | # Byte-compiled / optimized / DLL files 15 | __pycache__/ 16 | *.py[cod] 17 | *$py.class 18 | 19 | # C extensions 20 | *.so 21 | 22 | # Distribution / packaging 23 | .Python 24 | build/ 25 | develop-eggs/ 26 | dist/ 27 | downloads/ 28 | eggs/ 29 | .eggs/ 30 | lib/ 31 | lib64/ 32 | parts/ 33 | sdist/ 34 | var/ 35 | wheels/ 36 | pip-wheel-metadata/ 37 | share/python-wheels/ 38 | *.egg-info/ 39 | .installed.cfg 40 | *.egg 41 | MANIFEST 42 | 43 | # PyInstaller 44 | # Usually these files are written by a python script from a template 45 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 46 | *.manifest 47 | *.spec 48 | 49 | # Installer logs 50 | pip-log.txt 51 | pip-delete-this-directory.txt 52 | 53 | # Unit test / coverage reports 54 | htmlcov/ 55 | .tox/ 56 | .nox/ 57 | .coverage 58 | .coverage.* 59 | .cache 60 | nosetests.xml 61 | coverage.xml 62 | *.cover 63 | .hypothesis/ 64 | .pytest_cache/ 65 | pytest.xml 66 | 67 | # Translations 68 | *.mo 69 | *.pot 70 | 71 | # Django stuff: 72 | *.log 73 | local_settings.py 74 | db.sqlite3 75 | db.sqlite3-journal 76 | 77 | # Flask stuff: 78 | instance/ 79 | .webassets-cache 80 | 81 | # Scrapy stuff: 82 | .scrapy 83 | 84 | # Sphinx documentation 85 | docs/_build/ 86 | 87 | # PyBuilder 88 | target/ 89 | 90 | # Jupyter Notebook 91 | .ipynb_checkpoints 92 | 93 | # IPython 94 | profile_default/ 95 | ipython_config.py 96 | 97 | # pyenv 98 | .python-version 99 | 100 | # pipenv 101 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 102 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 103 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 104 | # install all needed dependencies. 105 | #Pipfile.lock 106 | 107 | # celery beat schedule file 108 | celerybeat-schedule 109 | 110 | # SageMath parsed files 111 | *.sage.py 112 | 113 | # Environments 114 | .env 115 | .venv 116 | env/ 117 | venv/ 118 | ENV/ 119 | env.bak/ 120 | venv.bak/ 121 | 122 | # Spyder project settings 123 | .spyderproject 124 | .spyproject 125 | 126 | # Rope project settings 127 | .ropeproject 128 | 129 | # mkdocs documentation 130 | /site 131 | 132 | # mypy 133 | .mypy_cache/ 134 | .dmypy.json 135 | dmypy.json 136 | 137 | # Pyre type checker 138 | .pyre/ 139 | 140 | ### JetBrains template 141 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 142 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 143 | 144 | # User-specific stuff 145 | .idea/* 146 | .idea/**/workspace.xml 147 | .idea/**/tasks.xml 148 | .idea/**/usage.statistics.xml 149 | .idea/**/dictionaries 150 | .idea/**/shelf 151 | 152 | # Generated files 153 | .idea/**/contentModel.xml 154 | 155 | # Sensitive or high-churn files 156 | .idea/**/dataSources/ 157 | .idea/**/dataSources.ids 158 | .idea/**/dataSources.local.xml 159 | .idea/**/sqlDataSources.xml 160 | .idea/**/dynamic.xml 161 | .idea/**/uiDesigner.xml 162 | .idea/**/dbnavigator.xml 163 | 164 | # Gradle 165 | .idea/**/gradle.xml 166 | .idea/**/libraries 167 | 168 | # Gradle and Maven with auto-import 169 | # When using Gradle or Maven with auto-import, you should exclude module files, 170 | # since they will be recreated, and may cause churn. Uncomment if using 171 | # auto-import. 172 | # .idea/modules.xml 173 | # .idea/*.iml 174 | # .idea/modules 175 | # *.iml 176 | # *.ipr 177 | 178 | # CMake 179 | cmake-build-*/ 180 | 181 | # Mongo Explorer plugin 182 | .idea/**/mongoSettings.xml 183 | 184 | # File-based project format 185 | *.iws 186 | 187 | # IntelliJ 188 | out/ 189 | 190 | # mpeltonen/sbt-idea plugin 191 | .idea_modules/ 192 | 193 | # JIRA plugin 194 | atlassian-ide-plugin.xml 195 | 196 | # Cursive Clojure plugin 197 | .idea/replstate.xml 198 | 199 | # Crashlytics plugin (for Android Studio and IntelliJ) 200 | com_crashlytics_export_strings.xml 201 | crashlytics.properties 202 | crashlytics-build.properties 203 | fabric.properties 204 | 205 | # Editor-based Rest Client 206 | .idea/httpRequests 207 | 208 | # Android studio 3.1+ serialized cache file 209 | .idea/caches/build_file_checksums.ser 210 | 211 | notebooks/content/* 212 | notebooks/catboost_info 213 | notebooks/*.tmp 214 | 215 | ### PSD logo 216 | *.psd 217 | -------------------------------------------------------------------------------- /docs/quick_start.rst: -------------------------------------------------------------------------------- 1 | .. _RU: https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero.ipynb 2 | .. _EN: https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero_EN.ipynb 3 | 4 | .. |Open In Colab1| image:: https://colab.research.google.com/assets/colab-badge.svg 5 | .. _Open In Colab1: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero_EN.ipynb 6 | 7 | .. |Open In Colab2| image:: https://colab.research.google.com/assets/colab-badge.svg 8 | .. _Open In Colab2: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero.ipynb 9 | 10 | *********** 11 | Quick Start 12 | *********** 13 | 14 | See the **RetailHero tutorial notebook** (`EN`_ |Open In Colab1|_, `RU`_ |Open In Colab2|_) for details. 15 | 16 | Train and predict your uplift model 17 | ==================================== 18 | 19 | Use the intuitive python API to train uplift models with `sklift.models `__. 20 | 21 | .. code-block:: python 22 | :linenos: 23 | 24 | # import approaches 25 | from sklift.models import SoloModel, ClassTransformation 26 | # import any estimator adheres to scikit-learn conventions. 27 | from lightgbm import LGBMClassifier 28 | 29 | # define models 30 | estimator = LGBMClassifier(n_estimators=10) 31 | 32 | # define metamodel 33 | slearner = SoloModel(estimator=estimator) 34 | 35 | # fit model 36 | slearner.fit( 37 | X=X_tr, 38 | y=y_tr, 39 | treatment=trmnt_tr, 40 | ) 41 | 42 | # predict uplift 43 | uplift_slearner = slearner.predict(X_val) 44 | 45 | Evaluate your uplift model 46 | =========================== 47 | 48 | Uplift model evaluation metrics are available in `sklift.metrics `__. 49 | 50 | .. code-block:: python 51 | :linenos: 52 | 53 | # import metrics to evaluate your model 54 | from sklift.metrics import ( 55 | uplift_at_k, uplift_auc_score, qini_auc_score, weighted_average_uplift 56 | ) 57 | 58 | 59 | # Uplift@30% 60 | uplift_at_k = uplift_at_k(y_true=y_val, uplift=uplift_slearner, 61 | treatment=trmnt_val, 62 | strategy='overall', k=0.3) 63 | 64 | # Area Under Qini Curve 65 | qini_coef = qini_auc_score(y_true=y_val, uplift=uplift_slearner, 66 | treatment=trmnt_val) 67 | 68 | # Area Under Uplift Curve 69 | uplift_auc = uplift_auc_score(y_true=y_val, uplift=uplift_slearner, 70 | treatment=trmnt_val) 71 | 72 | # Weighted average uplift 73 | wau = weighted_average_uplift(y_true=y_val, uplift=uplift_slearner, 74 | treatment=trmnt_val) 75 | 76 | Vizualize the results 77 | ====================== 78 | 79 | Visualize performance metrics with `sklift.viz `__. 80 | 81 | .. code-block:: python 82 | :linenos: 83 | 84 | from sklift.viz import plot_qini_curve 85 | import matplotlib.pyplot as plt 86 | 87 | fig, ax = plt.subplots(1, 1) 88 | ax.set_title('Qini curves') 89 | 90 | plot_qini_curve( 91 | y_test, uplift_slearner, trmnt_test, 92 | perfect=True, name='Slearner', ax=ax 93 | ); 94 | 95 | plot_qini_curve( 96 | y_test, uplift_revert, trmnt_test, 97 | perfect=False, name='Revert label', ax=ax 98 | ); 99 | 100 | .. image:: _static/images/quick_start_qini.png 101 | :alt: Example of some models qini curves, perfect qini curve and random qini curve 102 | 103 | 104 | .. code-block:: python 105 | :linenos: 106 | 107 | from sklift.viz import plot_uplift_curve 108 | import matplotlib.pyplot as plt 109 | 110 | fig, ax = plt.subplots(1, 1) 111 | ax.set_title('Uplift curves') 112 | 113 | plot_uplift_curve( 114 | y_test, uplift_slearner, trmnt_test, 115 | perfect=True, name='Slearner', ax=ax 116 | ); 117 | 118 | plot_uplift_curve( 119 | y_test, uplift_revert, trmnt_test, 120 | perfect=False, name='Revert label', ax=ax 121 | ); 122 | 123 | .. image:: _static/images/quick_start_uplift.png 124 | :alt: Example of some uplift curves, perfect uplift curve and random uplift curve 125 | 126 | .. code-block:: python 127 | :linenos: 128 | 129 | from sklift.viz import plot_uplift_by_percentile 130 | 131 | plot_uplift_by_percentile(y_true=y_val, uplift=uplift_preds, 132 | treatment=treat_val, kind='bar') 133 | 134 | .. image:: _static/images/quick_start_wau.png 135 | :alt: Uplift by percentile visualization 136 | -------------------------------------------------------------------------------- /sklift/datasets/descr/lenta.rst: -------------------------------------------------------------------------------- 1 | Lenta Uplift Modeling Dataset 2 | ================================ 3 | 4 | Data description 5 | ################ 6 | 7 | An uplift modeling dataset containing data about Lenta's customers grociery shopping and related marketing campaigns. 8 | 9 | Source: **BigTarget Hackathon** hosted by Lenta and Microsoft in summer 2020. 10 | 11 | Fields 12 | ################ 13 | 14 | Major features: 15 | 16 | * ``group`` (str): treatment/control group flag 17 | * ``response_att`` (binary): target 18 | * ``gender`` (str): customer gender 19 | * ``age`` (float): customer age 20 | * ``main_format`` (int): store type (1 - grociery store, 0 - superstore) 21 | 22 | 23 | .. list-table:: 24 | :align: center 25 | :header-rows: 1 26 | :widths: 5 5 27 | 28 | * - Feature 29 | - Description 30 | * - CardHolder 31 | - customer id 32 | * - customer 33 | - age 34 | * - children 35 | - number of children 36 | * - cheque_count_[3,6,12]m_g* 37 | - number of customer receipts collected within last 3, 6, 12 months 38 | before campaign. g* is a product group 39 | * - crazy_purchases_cheque_count_[1,3,6,12]m 40 | - number of customer receipts with items purchased on "crazy" 41 | marketing campaign collected within last 1, 3, 6, 12 months before campaign 42 | * - crazy_purchases_goods_count_[6,12]m 43 | - items amount purchased on "crazy" marketing campaign collected 44 | within last 6, 12 months before campaign 45 | * - disc_sum_6m_g34 46 | - discount sum for past 6 month on a 34 product group 47 | * - food_share_[15d,1m] 48 | - food share in customer purchases for 15 days, 1 month 49 | * - gender 50 | - customer gender 51 | * - group 52 | - treatment/control group flag 53 | * - k_var_cheque_[15d,3m] 54 | - average check coefficient of variation for 15 days, 3 months 55 | * - k_var_cheque_category_width_15d 56 | - coefficient of variation of the average number of purchased 57 | categories (2nd level of the hierarchy) in one receipt for 15 days 58 | * - k_var_cheque_group_width_15d 59 | - coefficient of variation of the average number of purchased 60 | groups (1st level of the hierarchy) in one receipt for 15 days 61 | * - k_var_count_per_cheque_[15d,1m,3m,6m]_g* 62 | - unique product id (SKU) coefficient of variation for 15 days, 1, 3 ,6 months 63 | for g* product group 64 | * - k_var_days_between_visits_[15d,1m,3m] 65 | - coefficient of variation of the average period between visits 66 | for 15 days, 1 month, 3 months 67 | * - k_var_disc_per_cheque_15d 68 | - discount sum coefficient of variation for 15 days 69 | * - k_var_disc_share_[15d,1m,3m,6m,12m]_g* 70 | - discount amount coefficient of variation for 15 days, 1 month, 3 months, 6 months, 12 months 71 | for g* product group 72 | * - k_var_discount_depth_[15d,1m] 73 | - discount amount coefficient of variation for 15 days, 1 month 74 | * - k_var_sku_per_cheque_15d 75 | - number of unique product ids (SKU) coefficient of variation 76 | for 15 days 77 | * - k_var_sku_price_12m_g* 78 | - price coefficient of variation for 15 days, 3, 6, 12 months 79 | for g* product group 80 | * - main_format 81 | - store type (1 - grociery store, 0 - superstore) 82 | * - mean_discount_depth_15d 83 | - mean discount depth for 15 days 84 | * - months_from_register 85 | - number of months from a moment of register 86 | * - perdelta_days_between_visits_15_30d 87 | - timdelta in percent between visits during the first half 88 | of the month and visits during second half of the month 89 | * - promo_share_15d 90 | - promo goods share in the customer bucket 91 | * - response_att 92 | - binary target variable = store visit 93 | * - response_sms 94 | - share of customer responses to previous SMS. 95 | Response = store visit 96 | * - response_viber 97 | - share of responses to previous Viber messages. 98 | Response = store visit 99 | * - sale_count_[3,6,12]m_g* 100 | - number of purchased items from the group * for 3, 6, 12 months 101 | * - sale_sum_[3,6,12]m_g* 102 | - sum of sales from the group * for 3, 6, 12 months 103 | * - stdev_days_between_visits_15d 104 | - coefficient of variation of the days between visits for 15 days 105 | * - stdev_discount_depth_[15d,1m] 106 | - discount sum coefficient of variation for 15 days, 1 month 107 | 108 | Key figures 109 | ################ 110 | 111 | * Format: CSV 112 | * Size: 153M (compressed) 567M (uncompressed) 113 | * Rows: 687,029 114 | * Response Ratio: .1 115 | * Treatment Ratio: .75 116 | 117 | -------------------------------------------------------------------------------- /docs/user_guide/models/revert_label.rst: -------------------------------------------------------------------------------- 1 | .. _ClassTransformation: 2 | 3 | ******************** 4 | Class Transformation 5 | ******************** 6 | 7 | .. warning:: 8 | This approach is only suitable for classification problem 9 | 10 | Simple yet powerful and mathematically proven uplift modeling method, presented in 2012. 11 | The main idea is to predict a slightly changed target :math:`Z_i`: 12 | 13 | .. math:: 14 | Z_i = Y_i \cdot W_i + (1 - Y_i) \cdot (1 - W_i), 15 | 16 | * :math:`Z_i` - a new target for the :math:`i` customer; 17 | 18 | * :math:`Y_i` - a previous target for the :math:`i` customer; 19 | 20 | * :math:`W_i` - treatment flag assigned to the :math:`i` customer. 21 | 22 | In other words, the new target equals 1 if a response in the treatment group is as good as a response in the control group and equals 0 otherwise: 23 | 24 | .. math:: 25 | Z_i = \begin{cases} 26 | 1, & \mbox{if } W_i = 1 \mbox{ and } Y_i = 1 \\ 27 | 1, & \mbox{if } W_i = 0 \mbox{ and } Y_i = 0 \\ 28 | 0, & \mbox{otherwise} 29 | \end{cases} 30 | 31 | Let's go deeper and estimate the conditional probability of the target variable: 32 | 33 | .. math:: 34 | P(Z=1|X = x) = \\ 35 | = P(Z=1|X = x, W = 1) \cdot P(W = 1|X = x) + \\ 36 | + P(Z=1|X = x, W = 0) \cdot P(W = 0|X = x) = \\ 37 | = P(Y=1|X = x, W = 1) \cdot P(W = 1|X = x) + \\ 38 | + P(Y=0|X = x, W = 0) \cdot P(W = 0|X = x). 39 | 40 | We assume that :math:`W` is independent of :math:`X = x` by design. 41 | Thus we have: :math:`P(W | X = x) = P(W)` and 42 | 43 | .. math:: 44 | P(Z=1|X = x) = \\ 45 | = P^T(Y=1|X = x) \cdot P(W = 1) + \\ 46 | + P^C(Y=0|X = x) \cdot P(W = 0) 47 | 48 | Also, we assume that :math:`P(W = 1) = P(W = 0) = \frac{1}{2}`, which means that during the experiment the control and the treatment groups 49 | were divided in equal proportions. Then we get the following: 50 | 51 | .. math:: 52 | P(Z=1|X = x) = \\ 53 | = P^T(Y=1|X = x) \cdot \frac{1}{2} + P^C(Y=0|X = x) \cdot \frac{1}{2} \Rightarrow \\ 54 | 55 | 2 \cdot P(Z=1|X = x) = \\ 56 | = P^T(Y=1|X = x) + P^C(Y=0|X = x) = \\ 57 | = P^T(Y=1|X = x) + 1 - P^C(Y=1|X = x) \Rightarrow \\ 58 | \Rightarrow P^T(Y=1|X = x) - P^C(Y=1|X = x) = \\ 59 | = uplift = 2 \cdot P(Z=1|X = x) - 1 60 | 61 | .. image:: ../../_static/images/user_guide/ug_revert_label_mem.png 62 | :align: center 63 | :alt: Mem about class transformation approach for uplift modeling 64 | 65 | Thus, by doubling the estimate of the new target :math:`Z` and subtracting one we will get an estimation of the uplift: 66 | 67 | .. math:: 68 | uplift = 2 \cdot P(Z=1) - 1 69 | 70 | 71 | This approach is based on the assumption: :math:`P(W = 1) = P(W = 0) = \frac{1}{2}`. That is the reason that it has to be used 72 | only in cases where the number of treated customers (communication) is equal to the number of control customers (no communication). 73 | 74 | .. hint:: 75 | In sklift this approach corresponds to the :class:`.ClassTransformation` class. 76 | 77 | References 78 | ========== 79 | 80 | 1️⃣ Maciej Jaskowski and Szymon Jaroszewicz. Uplift modeling for clinical trial data. ICML Workshop on Clinical Data Analysis, 2012. 81 | 82 | Examples using ``sklift.models.ClassTransformation`` 83 | ==================================================== 84 | 85 | .. |Open In Colab1| image:: https://colab.research.google.com/assets/colab-badge.svg 86 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero_EN.ipynb 87 | .. |Open In Colab2| image:: https://colab.research.google.com/assets/colab-badge.svg 88 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero.ipynb 89 | 90 | 1. The overview of the basic approaches to the Uplift Modeling problem 91 | 92 | .. list-table:: 93 | :align: center 94 | :widths: 12 15 10 8 95 | 96 | * - In English 🇬🇧 97 | - |Open In Colab1| 98 | - `nbviewer `__ 99 | - `github `__ 100 | * - In Russian 🇷🇺 101 | - |Open In Colab2| 102 | - `nbviewer `__ 103 | - `github `__ 104 | 105 | 2. The 2nd place solution of X5 RetailHero uplift contest by `Kirill Liksakov `_ 106 | 107 | .. list-table:: 108 | :align: center 109 | :widths: 12 10 8 110 | 111 | * - In English 🇬🇧 112 | - `nbviewer `__ 113 | - `github `__ -------------------------------------------------------------------------------- /docs/user_guide/models/two_models.rst: -------------------------------------------------------------------------------- 1 | .. _TwoModels: 2 | 3 | ************************** 4 | Two models approaches 5 | ************************** 6 | 7 | .. _in the scikit-learn documentation: https://scikit-learn.org/stable/modules/calibration.html 8 | 9 | The two models approach can be found in almost every uplift modeling research. It is often used as a baseline model. 10 | 11 | Two independent models 12 | ========================== 13 | 14 | .. hint:: 15 | In sklift this approach corresponds to the :class:`sklift.models.TwoModels` class and the **vanilla** method. 16 | 17 | The main idea is to estimate the conditional probabilities of the treatment and control groups separately. 18 | 19 | 1. Train the first model using the treatment set. 20 | 2. Train the second model using the control set. 21 | 3. Inference: subtract the control model scores from the treatment model scores. 22 | 23 | .. image:: ../../_static/images/TwoModels_vanila.png 24 | :align: center 25 | :alt: Two independent models vanilla 26 | 27 | The main disadvantage of this method is that if the uplift signal is weak, it can be lost since both models focus on predicting an original response, not the uplift. 28 | 29 | Two dependent models 30 | ======================== 31 | 32 | The dependent data representation approach is based on the classifier chain method originally developed 33 | for multi-class classification problems. The idea is that if there are :math:`L` different labels, you can build 34 | :math:`L` different classifiers, each of which solves the problem of binary classification and in the learning process, 35 | each subsequent classifier uses the predictions of the previous ones as additional features. 36 | The authors of this method proposed to use the same idea to solve the problem of uplift modeling in two stages. 37 | 38 | .. hint:: 39 | In sklift this approach corresponds to the :class:`.TwoModels` class and the **ddr_control** method. 40 | 41 | At the beginning, we train the classifier based on the control data: 42 | 43 | .. math:: 44 | P^C = P(Y=1| X, W = 0), 45 | 46 | Next, we estimate the :math:`P_C` predictions and use them as a feature for the second classifier. 47 | It effectively reflects a dependency between treatment and control datasets: 48 | 49 | .. math:: 50 | P^T = P(Y=1| X, P_C(X), W = 1) 51 | 52 | To get the uplift for each observation, calculate the difference: 53 | 54 | .. math:: 55 | uplift(x_i) = P^T (x_i, P_C(x_i)) - P^C(x_i) 56 | 57 | Intuitively, the second classifier learns the difference between the expected probability in the treatment and the control sets which is 58 | the uplift. 59 | 60 | .. image:: ../../_static/images/TwoModels_ddr_control.png 61 | :align: center 62 | :alt: Two independent models dependent data representation control 63 | 64 | Similarly, you can first train the :math:`P_T` classifier and then use its predictions as a feature for 65 | the :math:`P_C` classifier. 66 | 67 | .. hint:: 68 | In sklift this approach corresponds to the :class:`.TwoModels` class and the **ddr_treatment** method. 69 | 70 | There is an important remark about the data nature. 71 | It is important to calibrate the model's scores into probabilities if treatment and control data have a different nature. 72 | Model calibration techniques are well described `in the scikit-learn documentation`_. 73 | 74 | References 75 | ========== 76 | 77 | 1️⃣ Betlei, Artem & Diemert, Eustache & Amini, Massih-Reza. (2018). Uplift Prediction with Dependent Feature Representation in Imbalanced Treatment and Control Conditions: 25th International Conference, ICONIP 2018, Siem Reap, Cambodia, December 13–16, 2018, Proceedings, Part V. 10.1007/978-3-030-04221-9_5. 78 | 79 | 2️⃣ Zhao, Yan & Fang, Xiao & Simchi-Levi, David. (2017). Uplift Modeling with Multiple Treatments and General Response Types. 10.1137/1.9781611974973.66. 80 | 81 | Examples using ``sklift.models.TwoModels`` 82 | ============================================ 83 | 84 | .. |Open In Colab1| image:: https://colab.research.google.com/assets/colab-badge.svg 85 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero_EN.ipynb 86 | .. |Open In Colab2| image:: https://colab.research.google.com/assets/colab-badge.svg 87 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero.ipynb 88 | 89 | 1. The overview of the basic approaches to solving the Uplift Modeling problem 90 | 91 | .. list-table:: 92 | :align: center 93 | :widths: 12 15 10 8 94 | 95 | * - In English 🇬🇧 96 | - |Open In Colab1| 97 | - `nbviewer `__ 98 | - `github `__ 99 | * - In Russian 🇷🇺 100 | - |Open In Colab2| 101 | - `nbviewer `__ 102 | - `github `__ -------------------------------------------------------------------------------- /sklift/tests/test_datasets.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import sklearn 3 | 4 | from functools import partial 5 | 6 | from ..datasets import ( 7 | clear_data_dir, 8 | fetch_lenta, fetch_x5, 9 | fetch_criteo, fetch_hillstrom, 10 | fetch_megafon 11 | ) 12 | 13 | 14 | fetch_criteo10 = partial(fetch_criteo, percent10=True) 15 | 16 | @pytest.fixture(scope="session", autouse=True) 17 | def clear(): 18 | # prepare something ahead of all tests 19 | clear_data_dir() 20 | 21 | 22 | @pytest.fixture 23 | def lenta_dataset() -> dict: 24 | data = {'keys': ['data', 'target', 'treatment', 'DESCR', 'feature_names', 'target_name', 'treatment_name'], 25 | 'data.shape': (687029, 193), 'target.shape': (687029,), 'treatment.shape': (687029,)} 26 | return data 27 | 28 | 29 | def test_fetch_lenta(lenta_dataset): 30 | data = fetch_lenta() 31 | assert isinstance(data, sklearn.utils.Bunch) 32 | assert set(data.keys()) == set(lenta_dataset['keys']) 33 | assert data.data.shape == lenta_dataset['data.shape'] 34 | assert data.target.shape == lenta_dataset['target.shape'] 35 | assert data.treatment.shape == lenta_dataset['treatment.shape'] 36 | 37 | #@pytest.fixture 38 | #def x5_dataset() -> dict: 39 | # data = {'keys': ['data', 'target', 'treatment', 'DESCR', 'feature_names', 'target_name', 'treatment_name'], 40 | # 'data.keys': ['clients', 'train', 'purchases'], 'clients.shape': (400162, 5), 41 | # 'train.shape': (200039, 1), 'target.shape': (200039,), 'treatment.shape': (200039,)} 42 | # return data 43 | 44 | # 45 | #def test_fetch_x5(x5_dataset): 46 | # data = fetch_x5() 47 | # assert isinstance(data, sklearn.utils.Bunch) 48 | # assert set(data.keys()) == set(x5_dataset['keys']) 49 | # assert set(data.data.keys()) == set(x5_dataset['data.keys']) 50 | # assert data.data.clients.shape == x5_dataset['clients.shape'] 51 | # assert data.data.train.shape == x5_dataset['train.shape'] 52 | # assert data.target.shape == x5_dataset['target.shape'] 53 | # assert data.treatment.shape == x5_dataset['treatment.shape'] 54 | 55 | 56 | @pytest.fixture 57 | def criteo10_dataset() -> dict: 58 | data = {'keys': ['data', 'target', 'treatment', 'DESCR', 'feature_names', 'target_name', 'treatment_name'], 59 | 'data.shape': (1397960, 12)} 60 | return data 61 | 62 | 63 | @pytest.mark.parametrize( 64 | 'target_col, target_shape', 65 | [('visit', (1397960,)), 66 | ('conversion', (1397960,)), 67 | ('all', (1397960, 2))] 68 | ) 69 | @pytest.mark.parametrize( 70 | 'treatment_col, treatment_shape', 71 | [('exposure', (1397960,)), 72 | ('treatment', (1397960,)), 73 | ('all', (1397960, 2))] 74 | ) 75 | def test_fetch_criteo10( 76 | criteo10_dataset, 77 | target_col, target_shape, 78 | treatment_col, treatment_shape 79 | ): 80 | data = fetch_criteo10(target_col=target_col, treatment_col=treatment_col) 81 | assert isinstance(data, sklearn.utils.Bunch) 82 | assert set(data.keys()) == set(criteo10_dataset['keys']) 83 | assert data.data.shape == criteo10_dataset['data.shape'] 84 | assert data.target.shape == target_shape 85 | assert data.treatment.shape == treatment_shape 86 | 87 | @pytest.mark.parametrize( 88 | 'target_col, treatment_col', 89 | [('visit','new_trmnt'), ('new_target','treatment')] 90 | ) 91 | def test_fetch_criteo_errors(target_col, treatment_col): 92 | with pytest.raises(ValueError): 93 | fetch_criteo(target_col=target_col, treatment_col=treatment_col) 94 | 95 | 96 | @pytest.fixture 97 | def hillstrom_dataset() -> dict: 98 | data = {'keys': ['data', 'target', 'treatment', 'DESCR', 'feature_names', 'target_name', 'treatment_name'], 99 | 'data.shape': (64000, 8), 'treatment.shape': (64000,)} 100 | return data 101 | 102 | 103 | @pytest.mark.parametrize( 104 | 'target_col, target_shape', 105 | [('visit', (64_000,)), 106 | ('conversion', (64_000,)), 107 | ('spend', (64_000,)), 108 | ('all', (64_000, 3))] 109 | ) 110 | def test_fetch_hillstrom( 111 | hillstrom_dataset, 112 | target_col, target_shape 113 | ): 114 | data = fetch_hillstrom(target_col=target_col) 115 | assert isinstance(data, sklearn.utils.Bunch) 116 | assert set(data.keys()) == set(hillstrom_dataset['keys']) 117 | assert data.data.shape == hillstrom_dataset['data.shape'] 118 | assert data.target.shape == target_shape 119 | assert data.treatment.shape == hillstrom_dataset['treatment.shape'] 120 | 121 | def test_fetch_hillstrom_error(): 122 | with pytest.raises(ValueError): 123 | fetch_hillstrom(target_col='new_target') 124 | 125 | 126 | @pytest.fixture 127 | def megafon_dataset() -> dict: 128 | data = {'keys': ['data', 'target', 'treatment', 'DESCR', 'feature_names', 'target_name', 'treatment_name'], 129 | 'data.shape': (600000, 50), 'target.shape': (600000,), 'treatment.shape': (600000,)} 130 | return data 131 | 132 | 133 | def test_fetch_megafon(megafon_dataset): 134 | data = fetch_megafon() 135 | assert isinstance(data, sklearn.utils.Bunch) 136 | assert set(data.keys()) == set(megafon_dataset['keys']) 137 | assert data.data.shape == megafon_dataset['data.shape'] 138 | assert data.target.shape == megafon_dataset['target.shape'] 139 | assert data.treatment.shape == megafon_dataset['treatment.shape'] 140 | 141 | 142 | def check_return_X_y_t(bunch, dataset_func): 143 | X_y_t_tuple = dataset_func(return_X_y_t=True) 144 | assert isinstance(X_y_t_tuple, tuple) 145 | assert X_y_t_tuple[0].shape == bunch.data.shape 146 | assert X_y_t_tuple[1].shape == bunch.target.shape 147 | assert X_y_t_tuple[2].shape == bunch.treatment.shape 148 | 149 | 150 | @pytest.mark.parametrize("fetch_func", [fetch_hillstrom, fetch_criteo10, fetch_lenta, fetch_megafon]) 151 | def test_return_X_y_t(fetch_func): 152 | data = fetch_func() 153 | check_return_X_y_t(data, fetch_func) 154 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. |Open In Colab3| image:: https://colab.research.google.com/assets/colab-badge.svg 2 | .. _Open In Colab3: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/pipeline_usage_EN.ipynb 3 | 4 | .. |Open In Colab4| image:: https://colab.research.google.com/assets/colab-badge.svg 5 | .. _Open In Colab4: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/uplift_model_selection_tutorial.ipynb 6 | 7 | .. |Contribs| image:: https://contrib.rocks/image?repo=maks-sh/scikit-uplift 8 | :target: https://github.com/maks-sh/scikit-uplift/graphs/contributors 9 | :alt: Contributors 10 | 11 | ************** 12 | scikit-uplift 13 | ************** 14 | 15 | **scikit-uplift (sklift)** is an uplift modeling python package that provides fast sklearn-style models implementation, evaluation metrics and visualization tools. 16 | 17 | The main idea is to provide easy-to-use and fast python package for uplift modeling. It delivers the model interface with the familiar scikit-learn API. One can use any popular estimator (for instance, from the Catboost library). 18 | 19 | *Uplift modeling* estimates a causal effect of treatment and uses it to effectively target customers that are most likely to respond to a marketing campaign. 20 | 21 | **Use cases for uplift modeling:** 22 | 23 | * Target customers in the marketing campaign. Quite useful in promotion of some popular product where there is a big part of customers who make a target action by themself without any influence. By modeling uplift you can find customers who are likely to make the target action (for instance, install an app) only when treated (for instance, received a push). 24 | 25 | * Combine a churn model and an uplift model to offer some bonus to a group of customers who are likely to churn. 26 | 27 | * Select a tiny group of customers in the campaign where a price per customer is high. 28 | 29 | Read more about *uplift modeling* problem in :ref:`the User Guide `. 30 | 31 | Articles in russian on habr.com: `Part 1 `__ , 32 | `Part 2 `__ 33 | and `Part 3 `__. 34 | 35 | Why sklift 36 | ############# 37 | 38 | - Сomfortable and intuitive *scikit-learn*-like API; 39 | 40 | - More uplift metrics than you have ever seen in one place! Include brilliants like *Area Under Uplift Curve* (AUUC) or *Area Under Qini Curve* (Qini coefficient) with ideal cases; 41 | 42 | - Supporting any estimator compatible with scikit-learn (e.g. Xgboost, LightGBM, Catboost, etc.); 43 | 44 | - All approaches can be used in the ``sklearn.pipeline``. See the example of usage on `the Tutorials page `__; 45 | 46 | - Also metrics are compatible with the classes from ``sklearn.model_selection``. See the example of usage on `the Tutorials page `__; 47 | 48 | - Almost all implemented approaches solve classification and regression problems; 49 | 50 | - Nice and useful viz for analysing a performance model. 51 | 52 | 53 | **The package currently supports the following methods:** 54 | 55 | 1. Solo Model (aka S-learner or Treatment Dummy, Treatment interaction) approach 56 | 2. Class Transformation (aka Class Variable Transformation or Revert Label) approach 57 | 3. Two Models (aka X-learner, or naïve approach, or difference score method, or double classifier approach) approach, including Dependent Data Representation 58 | 59 | **And the following metrics:** 60 | 61 | 1. Uplift@k 62 | 2. Area Under Uplift Curve 63 | 3. Area Under Qini Curve 64 | 4. Weighted average uplift 65 | 66 | Project info 67 | ############# 68 | 69 | * GitHub repository: https://github.com/maks-sh/scikit-uplift 70 | * Github examples: https://github.com/maks-sh/scikit-uplift/tree/master/notebooks 71 | * Documentation: https://www.uplift-modeling.com/en/latest/index.html 72 | * Contributing guide: https://www.uplift-modeling.com/en/latest/contributing.html 73 | * License: `MIT `__ 74 | 75 | Community 76 | ############# 77 | 78 | Sklift is being actively maintained and welcomes new contributors of all experience levels. 79 | 80 | - Please see our `Contributing Guide `_ for more details. 81 | - By participating in this project, you agree to abide by its `Code of Conduct `__. 82 | 83 | Thanks to all our contributors! 84 | 85 | |Contribs| 86 | 87 | If you have any questions, please contact us at team@uplift-modeling.com 88 | 89 | .. toctree:: 90 | :hidden: 91 | 92 | self 93 | 94 | .. toctree:: 95 | :maxdepth: 2 96 | :caption: Contents 97 | 98 | install 99 | quick_start 100 | user_guide/index 101 | api/index 102 | tutorials 103 | contributing 104 | changelog 105 | hall_of_fame 106 | 107 | 108 | =============== 109 | 110 | Papers and materials 111 | ##################### 112 | 113 | 1. Gutierrez, P., & Gérardy, J. Y. 114 | Causal Inference and Uplift Modelling: A Review of the Literature. 115 | In International Conference on Predictive Applications and APIs (pp. 1-13). 116 | 117 | 2. Artem Betlei, Criteo Research; Eustache Diemert, Criteo Research; Massih-Reza Amini, Univ. Grenoble Alpes 118 | Dependent and Shared Data Representations improve Uplift Prediction in Imbalanced Treatment Conditions 119 | FAIM'18 Workshop on CausalML. 120 | 121 | 3. Eustache Diemert, Artem Betlei, Christophe Renaudin, and Massih-Reza Amini. 2018. 122 | A Large Scale Benchmark for Uplift Modeling. 123 | In Proceedings of AdKDD & TargetAd (ADKDD’18). ACM, New York, NY, USA, 6 pages. 124 | 125 | 4. Athey, Susan, and Imbens, Guido. 2015. 126 | Machine learning methods for estimating heterogeneous causal effects. 127 | Preprint, arXiv:1504.01132. Google Scholar. 128 | 129 | 5. Oscar Mesalles Naranjo. 2012. 130 | Testing a New Metric for Uplift Models. 131 | Dissertation Presented for the Degree of MSc in Statistics and Operational Research. 132 | 133 | 6. Kane, K., V. S. Y. Lo, and J. Zheng. 2014. 134 | Mining for the Truly Responsive Customers and Prospects Using True-Lift Modeling: 135 | Comparison of New and Existing Methods. 136 | Journal of Marketing Analytics 2 (4): 218–238. 137 | 138 | 7. Maciej Jaskowski and Szymon Jaroszewicz. 139 | Uplift modeling for clinical trial data. 140 | ICML Workshop on Clinical Data Analysis, 2012. 141 | 142 | 8. Lo, Victor. 2002. 143 | The True Lift Model - A Novel Data Mining Approach to Response Modeling in Database Marketing. 144 | SIGKDD Explorations. 4. 78-86. 145 | 146 | 9. Zhao, Yan & Fang, Xiao & Simchi-Levi, David. 2017. 147 | Uplift Modeling with Multiple Treatments and General Response Types. 10.1137/1.9781611974973.66. 148 | 149 | 10. Nicholas J Radcliffe. 2007. 150 | Using control groups to target on predicted lift: Building and assessing uplift model. 151 | Direct Marketing Analytics Journal, (3):14–21, 2007. 152 | 153 | 11. Devriendt, F., Guns, T., & Verbeke, W. 2020. 154 | Learning to rank for uplift modeling. ArXiv, abs/2002.05897. 155 | 156 | =============== 157 | 158 | Tags 159 | ##### 160 | **EN**: uplift modeling, uplift modelling, causal inference, causal effect, causality, individual treatment effect, true lift, net lift, incremental modeling 161 | 162 | **RU**: аплифт моделирование, Uplift модель 163 | 164 | **ZH**: uplift增量建模, 因果推断, 因果效应, 因果关系, 个体干预因果效应, 真实增量, 净增量, 增量建模 165 | -------------------------------------------------------------------------------- /notebooks/Readme.rst: -------------------------------------------------------------------------------- 1 | .. _The overview of the basic approaches to solving the Uplift Modeling problem: https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero_EN.ipynb 2 | .. |Open In Colab1| image:: https://colab.research.google.com/assets/colab-badge.svg 3 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero_EN.ipynb 4 | .. |Open In Colab2| image:: https://colab.research.google.com/assets/colab-badge.svg 5 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero.ipynb 6 | 7 | .. _Example of usage model from sklift.models in sklearn.pipeline: https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/pipeline_usage_EN.ipynb 8 | .. |Open In Colab3| image:: https://colab.research.google.com/assets/colab-badge.svg 9 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/pipeline_usage_EN.ipynb 10 | .. |Open In Colab4| image:: https://colab.research.google.com/assets/colab-badge.svg 11 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/pipeline_usage_RU.ipynb 12 | 13 | .. _Example of usage model from sklift.models in sklearn.model_selection: https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/uplift_model_selection_tutorial.ipynb 14 | .. |Open In Colab5| image:: https://colab.research.google.com/assets/colab-badge.svg 15 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/uplift_model_selection_tutorial.ipynb 16 | 17 | .. |Open In Colab6| image:: https://colab.research.google.com/assets/colab-badge.svg 18 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/EDA_Lenta_dataset.ipynb 19 | 20 | .. _EDA of X5 dataset: https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/EDA_x5_dataset.ipynb 21 | .. |Open In Colab7| image:: https://colab.research.google.com/assets/colab-badge.svg 22 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/EDA_x5_dataset.ipynb 23 | 24 | .. _EDA of Criteo dataset: https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/EDA_Criteo_dataset.ipynb 25 | .. |Open In Colab8| image:: https://colab.research.google.com/assets/colab-badge.svg 26 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/EDA_Criteo_dataset.ipynb 27 | 28 | .. _EDA of Hillstrom dataset: https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/EDA_Hillstrom_dataset.ipynb 29 | .. |Open In Colab9| image:: https://colab.research.google.com/assets/colab-badge.svg 30 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/EDA_Hillstrom_dataset.ipynb 31 | 32 | .. _EDA of Megafon dataset: https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/EDA_Megafon_dataset.ipynb 33 | .. |Open In Colab10| image:: https://colab.research.google.com/assets/colab-badge.svg 34 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/EDA_Megafon_dataset.ipynb 35 | 36 | 37 | 38 | ********** 39 | Tutorials 40 | ********** 41 | 42 | Basic 43 | ######## 44 | 45 | It is better to start scikit-uplift from the basic tutorials. 46 | 47 | `The overview of the basic approaches to solving the Uplift Modeling problem`_ 48 | ---------------------------------------------------------------------------------- 49 | 50 | .. list-table:: 51 | :align: center 52 | :widths: 12 15 10 8 53 | 54 | * - In English 🇬🇧 55 | - |Open In Colab1| 56 | - `nbviewer `__ 57 | - `github `__ 58 | * - In Russian 🇷🇺 59 | - |Open In Colab2| 60 | - `nbviewer `__ 61 | - `github `__ 62 | 63 | `Uplift modeling metrics`_ 64 | ---------------------------------------------------------------------------------- 65 | 66 | .. list-table:: 67 | :align: center 68 | :widths: 12 15 10 8 69 | 70 | * - In English 🇬🇧 71 | - |Open In Colab1| 72 | - `nbviewer `__ 73 | - `github `__ 74 | 75 | `Example of usage model from sklift.models in sklearn.pipeline`_ 76 | ---------------------------------------------------------------------------------- 77 | 78 | .. list-table:: 79 | :align: center 80 | :widths: 12 15 10 8 81 | 82 | * - In English 🇬🇧 83 | - |Open In Colab3| 84 | - `nbviewer `__ 85 | - `github `__ 86 | * - In Russian 🇷🇺 87 | - |Open In Colab4| 88 | - `nbviewer `__ 89 | - `github `__ 90 | 91 | `Example of usage model from sklift.models in sklearn.model_selection`_ 92 | ---------------------------------------------------------------------------------- 93 | 94 | .. list-table:: 95 | :align: center 96 | :widths: 12 15 10 8 97 | 98 | * - In English 🇬🇧 99 | - |Open In Colab5| 100 | - `nbviewer `__ 101 | - `github `__ 102 | 103 | Exploratory data analysis 104 | ############################ 105 | 106 | The package contains various public datasets for uplift modeling. 107 | Below you find jupyter notebooks with EDA of these datasets and a simple baseline. 108 | 109 | .. list-table:: 110 | :align: center 111 | :widths: 30 12 15 10 8 112 | 113 | * - EDA of :ref:`Lenta dataset ` 114 | - In English 🇬🇧 115 | - |Open In Colab6| 116 | - `nbviewer `__ 117 | - `github `__ 118 | * - EDA of :ref:`X5 dataset ` 119 | - In English 🇬🇧 120 | - |Open In Colab7| 121 | - `nbviewer `__ 122 | - `github `__ 123 | * - EDA of :ref:`Criteo dataset ` 124 | - In English 🇬🇧 125 | - |Open In Colab8| 126 | - `nbviewer `__ 127 | - `github `__ 128 | * - EDA of :ref:`Hillstrom dataset ` 129 | - In English 🇬🇧 130 | - |Open In Colab9| 131 | - `nbviewer `__ 132 | - `github `__ 133 | * - EDA of :ref:`Megafon dataset ` 134 | - In English 🇬🇧 135 | - |Open In Colab10| 136 | - `nbviewer `__ 137 | - `github `__ 138 | -------------------------------------------------------------------------------- /sklift/tests/test_viz.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from numpy.testing import assert_allclose 5 | 6 | from ..viz import plot_qini_curve, plot_uplift_curve, plot_uplift_preds, plot_uplift_by_percentile, plot_treatment_balance_curve 7 | from ..metrics import qini_curve, perfect_qini_curve, uplift_curve, perfect_uplift_curve 8 | from ..viz import UpliftCurveDisplay 9 | 10 | from sklearn.tree import DecisionTreeClassifier 11 | from ..models import SoloModel 12 | 13 | import matplotlib as mpl 14 | 15 | def make_predictions(): 16 | X_train, y_train, treat_train = (np.array([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]]), 17 | np.array([0.0, 0.0, 1.0]), np.array([0.0, 1.0, 1.0])) 18 | X_val, y_val, treat_val = (np.array([[5.1, 3.4, 1.5, 0.2], [5.0, 3.5, 1.3, 0.3], [4.5, 2.3, 1.3, 0.3]]), 19 | np.array([0.0, 1.0, 0.0]), np.array([0.0, 1.0, 1.0])) 20 | 21 | model = DecisionTreeClassifier(random_state=0) 22 | 23 | s_model = SoloModel(model) 24 | s_model = s_model.fit(X_train, y_train, treat_train) 25 | uplift_preds = s_model.predict(X_val) 26 | 27 | return y_val, uplift_preds, treat_val 28 | 29 | @pytest.mark.parametrize("random", [True, False]) 30 | @pytest.mark.parametrize("perfect", [True, False]) 31 | @pytest.mark.parametrize("negative_effect", [True, False]) 32 | def test_plot_qini_curve(random, perfect, negative_effect): 33 | y_true, uplift, treatment = make_predictions() 34 | 35 | viz = plot_qini_curve(y_true, uplift, treatment, random, perfect, negative_effect) 36 | 37 | x_actual, y_actual = qini_curve(y_true, uplift, treatment) 38 | 39 | assert_allclose(viz.x_actual, x_actual) 40 | assert_allclose(viz.y_actual, y_actual) 41 | 42 | if random: 43 | x_baseline, y_baseline = x_actual, x_actual * y_actual[-1] / len(y_true) 44 | assert_allclose(viz.x_baseline, x_baseline) 45 | assert_allclose(viz.y_baseline, y_baseline) 46 | 47 | if perfect: 48 | x_perfect, y_perfect = perfect_qini_curve( 49 | y_true, treatment, negative_effect) 50 | 51 | assert_allclose(viz.x_perfect, x_perfect) 52 | assert_allclose(viz.y_perfect, y_perfect) 53 | 54 | assert isinstance(viz.line_, mpl.lines.Line2D) 55 | assert isinstance(viz.ax_, mpl.axes.Axes) 56 | assert isinstance(viz.figure_, mpl.figure.Figure) 57 | 58 | 59 | @pytest.mark.parametrize( 60 | "qini_auc, estimator_name, expected_label", 61 | [ 62 | (0.61, None, "plot_qini_curve = 0.61"), 63 | (0.61, "first", "first (plot_qini_curve = 0.61)"), 64 | (None, "None", "None") 65 | ] 66 | ) 67 | def test_default_labels(qini_auc, estimator_name, expected_label): 68 | x_actual = np.array([0, 1, 2, 3, 5, 6]) 69 | y_actual = np.array([0.0, 1.0, 2.0, 3.0, 2.5, 1.5]) 70 | 71 | disp = UpliftCurveDisplay( 72 | x_actual=x_actual, 73 | y_actual=y_actual, 74 | estimator_name=estimator_name 75 | ).plot(qini_auc, title="plot_qini_curve") 76 | 77 | assert disp.line_.get_label() == expected_label 78 | 79 | 80 | @pytest.mark.parametrize("random", [True, False]) 81 | @pytest.mark.parametrize("perfect", [True, False]) 82 | def test_plot_uplift_curve(random, perfect): 83 | y_true, uplift, treatment = make_predictions() 84 | 85 | viz = plot_uplift_curve(y_true, uplift, treatment, random, perfect) 86 | 87 | x_actual, y_actual = uplift_curve(y_true, uplift, treatment) 88 | 89 | assert_allclose(viz.x_actual, x_actual) 90 | assert_allclose(viz.y_actual, y_actual) 91 | 92 | if random: 93 | x_baseline, y_baseline = x_actual, x_actual * y_actual[-1] / len(y_true) 94 | assert_allclose(viz.x_baseline, x_baseline) 95 | assert_allclose(viz.y_baseline, y_baseline) 96 | 97 | if perfect: 98 | x_perfect, y_perfect = perfect_uplift_curve( 99 | y_true, treatment) 100 | 101 | assert_allclose(viz.x_perfect, x_perfect) 102 | assert_allclose(viz.y_perfect, y_perfect) 103 | 104 | assert isinstance(viz.line_, mpl.lines.Line2D) 105 | assert isinstance(viz.ax_, mpl.axes.Axes) 106 | assert isinstance(viz.figure_, mpl.figure.Figure) 107 | 108 | 109 | @pytest.mark.parametrize( 110 | "uplift_auc, estimator_name, expected_label", 111 | [ 112 | (0.75, None, "plot_uplift_curve = 0.75"), 113 | (0.75, "first", "first (plot_uplift_curve = 0.75)"), 114 | (None, "None", "None") 115 | ] 116 | ) 117 | def test_default_labels(uplift_auc, estimator_name, expected_label): 118 | x_actual = np.array([0, 1, 2, 3, 5, 6]) 119 | y_actual = np.array([0.0, 1.0, 2.0, 3.0, 2.5, 1.5]) 120 | 121 | disp = UpliftCurveDisplay( 122 | x_actual=x_actual, 123 | y_actual=y_actual, 124 | estimator_name=estimator_name 125 | ).plot(uplift_auc, title="plot_uplift_curve") 126 | 127 | assert disp.line_.get_label() == expected_label 128 | 129 | 130 | def test_plot_uplift_preds(): 131 | trmnt_preds = np.array([1,1,0,1,1,1]) 132 | ctrl_preds = np.array([0,1,0,1,0,1]) 133 | 134 | viz = plot_uplift_preds(trmnt_preds, ctrl_preds, log=True, bins=5) 135 | 136 | assert isinstance(viz[0], mpl.axes.Axes) 137 | assert isinstance(viz[1], mpl.axes.Axes) 138 | assert isinstance(viz[2], mpl.axes.Axes) 139 | 140 | with pytest.raises(ValueError): 141 | plot_uplift_preds(trmnt_preds, ctrl_preds, log=True, bins=0) 142 | 143 | def test_plot_uplift_by_percentile(): 144 | y_true, uplift, treatment = make_predictions() 145 | 146 | viz = plot_uplift_by_percentile(y_true, uplift, treatment, strategy='overall',kind='line', bins=1, string_percentiles=True) 147 | 148 | assert viz.get_title() == "Uplift by percentile\nweighted average uplift = 0.5000" 149 | assert viz.get_xlabel() == "Percentile" 150 | assert viz.get_ylabel() == "Uplift = treatment response rate - control response rate" 151 | assert isinstance(viz, mpl.axes.Axes) 152 | viz = plot_uplift_by_percentile(y_true, uplift, treatment, strategy='by_group',kind='bar', bins=1, string_percentiles=False) 153 | 154 | assert viz[0].get_title() == "Uplift by percentile\nweighted average uplift = 0.5000" 155 | assert viz[1].get_xlabel() == "Percentile" 156 | assert viz[1].get_title() == "Response rate by percentile" 157 | assert isinstance(viz[0], mpl.axes.Axes) 158 | assert isinstance(viz[1], mpl.axes.Axes) 159 | viz = plot_uplift_by_percentile(y_true, uplift, treatment, strategy='by_group',kind='bar', bins=1, string_percentiles=True) 160 | 161 | assert viz[0].get_title() == "Uplift by percentile\nweighted average uplift = 0.5000" 162 | assert viz[1].get_xlabel() == "Percentile" 163 | assert viz[1].get_title() == "Response rate by percentile" 164 | assert isinstance(viz[0], mpl.axes.Axes) 165 | assert isinstance(viz[1], mpl.axes.Axes) 166 | 167 | viz = plot_uplift_by_percentile(y_true, uplift, treatment, strategy='by_group',kind='line', bins=1, string_percentiles=False) 168 | assert isinstance(viz, mpl.axes.Axes) 169 | 170 | 171 | @pytest.mark.parametrize( 172 | "strategy, kind, bins, string_percentiles", 173 | [ 174 | ("new_strategy", "bar", 1, False), 175 | ("by_group", "new_bar", 1, False), 176 | ("by_group", "bar", 0, False), 177 | ("by_group", "bar", 100, False), 178 | ("by_group", "bar", 1, 5) 179 | 180 | ] 181 | ) 182 | def test_plot_uplift_by_percentile_errors(strategy, kind, bins, string_percentiles): 183 | y_true, uplift, treatment = make_predictions() 184 | with pytest.raises(ValueError): 185 | viz = plot_uplift_by_percentile(y_true, uplift, treatment, strategy=strategy, kind=kind, bins=bins, string_percentiles=string_percentiles) 186 | 187 | 188 | def test_plot_treatment_balance_curve(): 189 | y_true, uplift, treatment = make_predictions() 190 | 191 | viz = plot_treatment_balance_curve(uplift, treatment, winsize=0.5) 192 | 193 | assert viz.get_title() == "Treatment balance curve" 194 | assert viz.get_xlabel() == "Percentage targeted" 195 | assert viz.get_ylabel() == "Balance: treatment / (treatment + control)" 196 | assert isinstance(viz, mpl.axes.Axes) 197 | 198 | def test_plot_treatment_balance_errors(): 199 | y_true, uplift, treatment = make_predictions() 200 | with pytest.raises(ValueError): 201 | viz = plot_treatment_balance_curve(uplift, treatment, winsize=5) -------------------------------------------------------------------------------- /Readme.rst: -------------------------------------------------------------------------------- 1 | .. -*- mode: rst -*- 2 | 3 | |Python3|_ |PyPi|_ |Docs|_ |License|_ 4 | 5 | .. |Python3| image:: https://img.shields.io/badge/python-3-blue.svg 6 | .. _Python3: https://badge.fury.io/py/scikit-uplift 7 | 8 | .. |PyPi| image:: https://badge.fury.io/py/scikit-uplift.svg 9 | .. _PyPi: https://badge.fury.io/py/scikit-uplift 10 | 11 | .. |Docs| image:: https://readthedocs.org/projects/scikit-uplift/badge/?version=latest 12 | .. _Docs: https://www.uplift-modeling.com/en/latest/ 13 | 14 | .. |License| image:: https://img.shields.io/badge/license-MIT-green 15 | .. _License: https://github.com/maks-sh/scikit-uplift/blob/master/LICENSE 16 | 17 | .. |Open In Colab1| image:: https://colab.research.google.com/assets/colab-badge.svg 18 | .. _Open In Colab1: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero_EN.ipynb 19 | 20 | .. |Open In Colab2| image:: https://colab.research.google.com/assets/colab-badge.svg 21 | .. _Open In Colab2: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero.ipynb 22 | 23 | .. |Open In Colab3| image:: https://colab.research.google.com/assets/colab-badge.svg 24 | .. _Open In Colab3: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/pipeline_usage_EN.ipynb 25 | 26 | .. |Open In Colab4| image:: https://colab.research.google.com/assets/colab-badge.svg 27 | .. _Open In Colab4: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/pipeline_usage_RU.ipynb 28 | 29 | .. _uplift-modeling.com: https://www.uplift-modeling.com/en/latest/ 30 | 31 | .. image:: https://raw.githubusercontent.com/maks-sh/scikit-uplift/dev/docs/_static/sklift-github-logo.png 32 | :align: center 33 | :alt: scikit-uplift: uplift modeling in scikit-learn style in python 34 | 35 | .. |Contribs| image:: https://contrib.rocks/image?repo=maks-sh/scikit-uplift 36 | :target: https://github.com/maks-sh/scikit-uplift/graphs/contributors 37 | :alt: Contributors 38 | 39 | scikit-uplift 40 | =============== 41 | 42 | **scikit-uplift (sklift)** is an uplift modeling python package that provides fast sklearn-style models implementation, evaluation metrics and visualization tools. 43 | 44 | Uplift modeling estimates a causal effect of treatment and uses it to effectively target customers that are most likely to respond to a marketing campaign. 45 | 46 | **Use cases for uplift modeling:** 47 | 48 | * Target customers in the marketing campaign. Quite useful in promotion of some popular product where there is a big part of customers who make a target action by themself without any influence. By modeling uplift you can find customers who are likely to make the target action (for instance, install an app) only when treated (for instance, received a push). 49 | 50 | * Combine a churn model and an uplift model to offer some bonus to a group of customers who are likely to churn. 51 | 52 | * Select a tiny group of customers in the campaign where a price per customer is high. 53 | 54 | Read more about uplift modeling problem in `User Guide `__. 55 | 56 | Articles in russian on habr.com: `Part 1 `__ , 57 | `Part 2 `__ 58 | and `Part 3 `__. 59 | 60 | Why sklift 61 | ------------- 62 | 63 | - Сomfortable and intuitive *scikit-learn*-like API; 64 | 65 | - More uplift metrics than you have ever seen in one place! Include brilliants like *Area Under Uplift Curve* (AUUC) or *Area Under Qini Curve* (Qini coefficient) with ideal cases; 66 | 67 | - Supporting any estimator compatible with scikit-learn (e.g. Xgboost, LightGBM, Catboost, etc.); 68 | 69 | - All approaches can be used in the ``sklearn.pipeline``. See the example of usage on `the Tutorials page `__; 70 | 71 | - Also metrics are compatible with the classes from ``sklearn.model_selection``. See the example of usage on `the Tutorials page `__; 72 | 73 | - Almost all implemented approaches solve classification and regression problems; 74 | 75 | - Nice and useful viz for analysing a performance model. 76 | 77 | Installation 78 | ------------- 79 | 80 | **Install** the package by the following command from PyPI: 81 | 82 | .. code-block:: bash 83 | 84 | pip install scikit-uplift 85 | 86 | Or install from source: 87 | 88 | .. code-block:: bash 89 | 90 | git clone https://github.com/maks-sh/scikit-uplift.git 91 | cd scikit-uplift 92 | python setup.py install 93 | 94 | Documentation 95 | -------------- 96 | 97 | The full documentation is available at `uplift-modeling.com`_. 98 | 99 | Or you can build the documentation locally using `Sphinx `_ 1.4 or later: 100 | 101 | .. code-block:: bash 102 | 103 | cd docs 104 | pip install -r requirements.txt 105 | make html 106 | 107 | And if you now point your browser to ``_build/html/index.html``, you should see a documentation site. 108 | 109 | Quick Start 110 | ----------- 111 | 112 | See the **RetailHero tutorial notebook** (`EN `__ |Open In Colab1|_, `RU `__ |Open In Colab2|_) for details. 113 | 114 | **Train and predict uplift model** 115 | 116 | Use the intuitive python API to train uplift models with `sklift.models `__. 117 | 118 | .. code-block:: python 119 | 120 | # import approaches 121 | from sklift.models import SoloModel, ClassTransformation 122 | # import any estimator adheres to scikit-learn conventions. 123 | from lightgbm import LGBMClassifier 124 | 125 | # define models 126 | estimator = LGBMClassifier(n_estimators=10) 127 | 128 | # define metamodel 129 | slearner = SoloModel(estimator=estimator) 130 | 131 | # fit model 132 | slearner.fit( 133 | X=X_tr, 134 | y=y_tr, 135 | treatment=trmnt_tr, 136 | ) 137 | 138 | # predict uplift 139 | uplift_slearner = slearner.predict(X_val) 140 | 141 | **Evaluate your uplift model** 142 | 143 | Uplift model evaluation metrics are available in `sklift.metrics `__. 144 | 145 | .. code-block:: python 146 | 147 | # import metrics to evaluate your model 148 | from sklift.metrics import ( 149 | uplift_at_k, uplift_auc_score, qini_auc_score, weighted_average_uplift 150 | ) 151 | 152 | 153 | # Uplift@30% 154 | uplift_at_k = uplift_at_k(y_true=y_val, uplift=uplift_slearner, 155 | treatment=trmnt_val, 156 | strategy='overall', k=0.3) 157 | 158 | # Area Under Qini Curve 159 | qini_coef = qini_auc_score(y_true=y_val, uplift=uplift_slearner, 160 | treatment=trmnt_val) 161 | 162 | # Area Under Uplift Curve 163 | uplift_auc = uplift_auc_score(y_true=y_val, uplift=uplift_slearner, 164 | treatment=trmnt_val) 165 | 166 | # Weighted average uplift 167 | wau = weighted_average_uplift(y_true=y_val, uplift=uplift_slearner, 168 | treatment=trmnt_val) 169 | 170 | **Vizualize the results** 171 | 172 | Visualize performance metrics with `sklift.viz `__. 173 | 174 | .. code-block:: python 175 | 176 | from sklift.viz import plot_qini_curve 177 | import matplotlib.pyplot as plt 178 | 179 | fig, ax = plt.subplots(1, 1) 180 | ax.set_title('Qini curves') 181 | 182 | plot_qini_curve( 183 | y_test, uplift_slearner, trmnt_test, 184 | perfect=True, name='Slearner', ax=ax 185 | ); 186 | 187 | plot_qini_curve( 188 | y_test, uplift_revert, trmnt_test, 189 | perfect=False, name='Revert label', ax=ax 190 | ); 191 | 192 | .. image:: docs/_static/images/quick_start_qini.png 193 | :width: 514px 194 | :height: 400px 195 | :alt: Example of some models qini curves, perfect qini curve and random qini curve 196 | 197 | Development 198 | ----------- 199 | 200 | We welcome new contributors of all experience levels. 201 | 202 | - Please see our `Contributing Guide `_ for more details. 203 | - By participating in this project, you agree to abide by its `Code of Conduct `__. 204 | 205 | Thanks to all our contributors! 206 | 207 | |Contribs| 208 | 209 | If you have any questions, please contact us at team@uplift-modeling.com 210 | 211 | Important links 212 | ~~~~~~~~~~~~~~~ 213 | 214 | - Official source code repo: https://github.com/maks-sh/scikit-uplift/ 215 | - Issue tracker: https://github.com/maks-sh/scikit-uplift/issues 216 | - Documentation: https://www.uplift-modeling.com/en/latest/ 217 | - User Guide: https://www.uplift-modeling.com/en/latest/user_guide/index.html 218 | - Contributing guide: https://www.uplift-modeling.com/en/latest/contributing.html 219 | - Release History: https://www.uplift-modeling.com/en/latest/changelog.html 220 | 221 | =============== 222 | 223 | Papers and materials 224 | --------------------- 225 | 1. Gutierrez, P., & Gérardy, J. Y. 226 | Causal Inference and Uplift Modelling: A Review of the Literature. 227 | In International Conference on Predictive Applications and APIs (pp. 1-13). 228 | 229 | 2. Artem Betlei, Criteo Research; Eustache Diemert, Criteo Research; Massih-Reza Amini, Univ. Grenoble Alpes 230 | Dependent and Shared Data Representations improve Uplift Prediction in Imbalanced Treatment Conditions 231 | FAIM'18 Workshop on CausalML. 232 | 233 | 3. Eustache Diemert, Artem Betlei, Christophe Renaudin, and Massih-Reza Amini. 2018. 234 | A Large Scale Benchmark for Uplift Modeling. 235 | In Proceedings of AdKDD & TargetAd (ADKDD’18). ACM, New York, NY, USA, 6 pages. 236 | 237 | 4. Athey, Susan, and Imbens, Guido. 2015. 238 | Machine learning methods for estimating heterogeneous causal effects. 239 | Preprint, arXiv:1504.01132. Google Scholar. 240 | 241 | 5. Oscar Mesalles Naranjo. 2012. 242 | Testing a New Metric for Uplift Models. 243 | Dissertation Presented for the Degree of MSc in Statistics and Operational Research. 244 | 245 | 6. Kane, K., V. S. Y. Lo, and J. Zheng. 2014. 246 | Mining for the Truly Responsive Customers and Prospects Using True-Lift Modeling: 247 | Comparison of New and Existing Methods. 248 | Journal of Marketing Analytics 2 (4): 218–238. 249 | 250 | 7. Maciej Jaskowski and Szymon Jaroszewicz. 251 | Uplift modeling for clinical trial data. 252 | ICML Workshop on Clinical Data Analysis, 2012. 253 | 254 | 8. Lo, Victor. 2002. 255 | The True Lift Model - A Novel Data Mining Approach to Response Modeling in Database Marketing. 256 | SIGKDD Explorations. 4. 78-86. 257 | 258 | 9. Zhao, Yan & Fang, Xiao & Simchi-Levi, David. 2017. 259 | Uplift Modeling with Multiple Treatments and General Response Types. 10.1137/1.9781611974973.66. 260 | 261 | 10. Nicholas J Radcliffe. 2007. 262 | Using control groups to target on predicted lift: Building and assessing uplift model. Direct Marketing Analytics Journal, (3):14–21, 2007. 263 | 264 | 11. Devriendt, F., Guns, T., & Verbeke, W. 2020. 265 | Learning to rank for uplift modeling. ArXiv, abs/2002.05897. 266 | 267 | =============== 268 | 269 | Tags 270 | ~~~~~~~~~~~~~~~ 271 | **EN**: uplift modeling, uplift modelling, causal inference, causal effect, causality, individual treatment effect, true lift, net lift, incremental modeling 272 | 273 | **RU**: аплифт моделирование, Uplift модель 274 | 275 | **ZH**: uplift增量建模, 因果推断, 因果效应, 因果关系, 个体干预因果效应, 真实增量, 净增量, 增量建模 276 | 277 | -------------------------------------------------------------------------------- /notebooks/pipeline_usage_EN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Example of usage model from sklift.models in sklearn.pipeline\n", 8 | "\n", 9 | "
\n", 10 | "
\n", 11 | " \n", 12 | " \n", 13 | " \n", 14 | "
\n", 15 | " SCIKIT-UPLIFT REPO | \n", 16 | " SCIKIT-UPLIFT DOCS | \n", 17 | " USER GUIDE\n", 18 | "
\n", 19 | " RUSSIAN VERSION\n", 20 | "\n", 21 | "
" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": { 27 | "ExecuteTime": { 28 | "end_time": "2020-04-26T12:44:35.435852Z", 29 | "start_time": "2020-04-26T12:44:35.239050Z" 30 | } 31 | }, 32 | "source": [ 33 | "This is a simple example on how to use [sklift.models](https://scikit-uplift.readthedocs.io/en/latest/api/models.html) with [sklearn.pipeline](https://scikit-learn.org/stable/modules/classes.html#module-sklearn.pipeline).\n", 34 | "\n", 35 | "The data is taken from [MineThatData E-Mail Analytics And Data Mining Challenge dataset by Kevin Hillstrom](https://blog.minethatdata.com/2008/03/minethatdata-e-mail-analytics-and-data.html).\n", 36 | "\n", 37 | "This dataset contains 64,000 customers who last purchased within twelve months. The customers were involved in an e-mail test:\n", 38 | "* 1/3 were randomly chosen to receive an e-mail campaign featuring Mens merchandise.\n", 39 | "* 1/3 were randomly chosen to receive an e-mail campaign featuring Womens merchandise.\n", 40 | "* 1/3 were randomly chosen to not receive an e-mail campaign.\n", 41 | "\n", 42 | "During a period of two weeks following the e-mail campaign, results were tracked. The task is to tell the world if the Mens or Womens e-mail campaign was successful.\n", 43 | "\n", 44 | "The full description of the dataset can be found at the [link](https://blog.minethatdata.com/2008/03/minethatdata-e-mail-analytics-and-data.html).\n", 45 | "\n", 46 | "Firstly, install the necessary libraries:" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 1, 52 | "metadata": { 53 | "ExecuteTime": { 54 | "end_time": "2021-02-07T01:01:39.897817Z", 55 | "start_time": "2021-02-07T01:01:39.890409Z" 56 | } 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "!pip install scikit-uplift xgboost==1.0.2 category_encoders==2.1.0 -U" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "For simplicity of the example, we will leave only two user segments:\n", 68 | "* those who were sent an e-mail advertising campaign with women's products;\n", 69 | "* those who were not sent out the ad campaign.\n", 70 | "\n", 71 | "We will use the `visit` variable as the target variable." 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 2, 77 | "metadata": { 78 | "ExecuteTime": { 79 | "end_time": "2021-02-07T01:01:42.438253Z", 80 | "start_time": "2021-02-07T01:01:39.901510Z" 81 | }, 82 | "scrolled": true 83 | }, 84 | "outputs": [ 85 | { 86 | "name": "stdout", 87 | "output_type": "stream", 88 | "text": [ 89 | "Shape of the dataset before processing: (64000, 8)\n", 90 | "Shape of the dataset after processing: (42693, 8)\n" 91 | ] 92 | }, 93 | { 94 | "data": { 95 | "text/html": [ 96 | "
\n", 97 | "\n", 110 | "\n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | "
recencyhistory_segmenthistorymenswomenszip_codenewbiechannel
0102) $100 - $200142.4410Surburban0Phone
163) $200 - $350329.0811Rural1Web
272) $100 - $200180.6501Surburban1Web
421) $0 - $10045.3410Urban0Web
562) $100 - $200134.8301Surburban0Phone
\n", 182 | "
" 183 | ], 184 | "text/plain": [ 185 | " recency history_segment history mens womens zip_code newbie channel\n", 186 | "0 10 2) $100 - $200 142.44 1 0 Surburban 0 Phone\n", 187 | "1 6 3) $200 - $350 329.08 1 1 Rural 1 Web\n", 188 | "2 7 2) $100 - $200 180.65 0 1 Surburban 1 Web\n", 189 | "4 2 1) $0 - $100 45.34 1 0 Urban 0 Web\n", 190 | "5 6 2) $100 - $200 134.83 0 1 Surburban 0 Phone" 191 | ] 192 | }, 193 | "execution_count": 2, 194 | "metadata": {}, 195 | "output_type": "execute_result" 196 | } 197 | ], 198 | "source": [ 199 | "import pandas as pd\n", 200 | "from sklift.datasets import fetch_hillstrom\n", 201 | "\n", 202 | "\n", 203 | "%matplotlib inline\n", 204 | "\n", 205 | "bunch = fetch_hillstrom(target_col='visit')\n", 206 | "\n", 207 | "dataset, target, treatment = bunch['data'], bunch['target'], bunch['treatment']\n", 208 | "\n", 209 | "print(f'Shape of the dataset before processing: {dataset.shape}')\n", 210 | "\n", 211 | "# Selecting two segments\n", 212 | "dataset = dataset[treatment!='Mens E-Mail']\n", 213 | "target = target[treatment!='Mens E-Mail']\n", 214 | "treatment = treatment[treatment!='Mens E-Mail'].map({\n", 215 | " 'Womens E-Mail': 1,\n", 216 | " 'No E-Mail': 0\n", 217 | "})\n", 218 | "\n", 219 | "print(f'Shape of the dataset after processing: {dataset.shape}')\n", 220 | "dataset.head()" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "Divide all the data into a training and validation sample:" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 3, 233 | "metadata": { 234 | "ExecuteTime": { 235 | "end_time": "2021-02-07T01:01:42.579775Z", 236 | "start_time": "2021-02-07T01:01:42.442595Z" 237 | } 238 | }, 239 | "outputs": [], 240 | "source": [ 241 | "from sklearn.model_selection import train_test_split\n", 242 | "\n", 243 | "\n", 244 | "X_tr, X_val, y_tr, y_val, treat_tr, treat_val = train_test_split(\n", 245 | " dataset, target, treatment, test_size=0.5, random_state=42\n", 246 | ")" 247 | ] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "metadata": {}, 252 | "source": [ 253 | "Select categorical features:" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 4, 259 | "metadata": { 260 | "ExecuteTime": { 261 | "end_time": "2021-02-07T01:01:42.600915Z", 262 | "start_time": "2021-02-07T01:01:42.585066Z" 263 | } 264 | }, 265 | "outputs": [ 266 | { 267 | "name": "stdout", 268 | "output_type": "stream", 269 | "text": [ 270 | "['history_segment', 'zip_code', 'channel']\n" 271 | ] 272 | } 273 | ], 274 | "source": [ 275 | "cat_cols = X_tr.select_dtypes(include='object').columns.tolist()\n", 276 | "print(cat_cols)" 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "metadata": {}, 282 | "source": [ 283 | "Create the necessary objects and combining them into a pipieline:" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 5, 289 | "metadata": { 290 | "ExecuteTime": { 291 | "end_time": "2021-02-07T01:01:42.703537Z", 292 | "start_time": "2021-02-07T01:01:42.603875Z" 293 | } 294 | }, 295 | "outputs": [], 296 | "source": [ 297 | "from sklearn.pipeline import Pipeline\n", 298 | "from category_encoders import CatBoostEncoder\n", 299 | "from sklift.models import ClassTransformation\n", 300 | "from xgboost import XGBClassifier\n", 301 | "\n", 302 | "\n", 303 | "encoder = CatBoostEncoder(cols=cat_cols)\n", 304 | "estimator = XGBClassifier(max_depth=2, random_state=42)\n", 305 | "ct = ClassTransformation(estimator=estimator)\n", 306 | "\n", 307 | "my_pipeline = Pipeline([\n", 308 | " ('encoder', encoder),\n", 309 | " ('model', ct)\n", 310 | "])" 311 | ] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "metadata": {}, 316 | "source": [ 317 | "Train pipeline as usual, but adding the treatment column in the step model as a parameter `model__treatment`." 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 6, 323 | "metadata": { 324 | "ExecuteTime": { 325 | "end_time": "2021-02-07T01:01:44.020040Z", 326 | "start_time": "2021-02-07T01:01:42.707311Z" 327 | } 328 | }, 329 | "outputs": [ 330 | { 331 | "name": "stderr", 332 | "output_type": "stream", 333 | "text": [ 334 | "/Users/Maksim/Library/Python/3.6/lib/python/site-packages/sklearn/pipeline.py:354: UserWarning: It is recommended to use this approach on treatment balanced data. Current sample size is unbalanced.\n", 335 | " self._final_estimator.fit(Xt, y, **fit_params)\n" 336 | ] 337 | } 338 | ], 339 | "source": [ 340 | "my_pipeline = my_pipeline.fit(\n", 341 | " X=X_tr,\n", 342 | " y=y_tr,\n", 343 | " model__treatment=treat_tr\n", 344 | ")" 345 | ] 346 | }, 347 | { 348 | "cell_type": "markdown", 349 | "metadata": { 350 | "ExecuteTime": { 351 | "end_time": "2020-04-26T18:07:44.970856Z", 352 | "start_time": "2020-04-26T18:07:44.964624Z" 353 | } 354 | }, 355 | "source": [ 356 | "Predict the uplift and calculate the uplift@30%" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": 7, 362 | "metadata": { 363 | "ExecuteTime": { 364 | "end_time": "2021-02-07T01:01:44.184968Z", 365 | "start_time": "2021-02-07T01:01:44.047865Z" 366 | } 367 | }, 368 | "outputs": [ 369 | { 370 | "name": "stdout", 371 | "output_type": "stream", 372 | "text": [ 373 | "uplift@30%: 0.0661\n" 374 | ] 375 | } 376 | ], 377 | "source": [ 378 | "from sklift.metrics import uplift_at_k\n", 379 | "\n", 380 | "\n", 381 | "uplift_predictions = my_pipeline.predict(X_val)\n", 382 | "\n", 383 | "uplift_30 = uplift_at_k(y_val, uplift_predictions, treat_val, strategy='overall')\n", 384 | "print(f'uplift@30%: {uplift_30:.4f}')" 385 | ] 386 | } 387 | ], 388 | "metadata": { 389 | "kernelspec": { 390 | "display_name": "python 3", 391 | "language": "python", 392 | "name": "python3" 393 | }, 394 | "language_info": { 395 | "codemirror_mode": { 396 | "name": "ipython", 397 | "version": 3 398 | }, 399 | "file_extension": ".py", 400 | "mimetype": "text/x-python", 401 | "name": "python", 402 | "nbconvert_exporter": "python", 403 | "pygments_lexer": "ipython3", 404 | "version": "3.6.1" 405 | } 406 | }, 407 | "nbformat": 4, 408 | "nbformat_minor": 2 409 | } 410 | -------------------------------------------------------------------------------- /notebooks/pipeline_usage_RU.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Пример использование подходов из sklift.models в sklearn.pipeline\n", 8 | "\n", 9 | "
\n", 10 | "
\n", 11 | " \n", 12 | " \n", 13 | " \n", 14 | "
\n", 15 | " SCIKIT-UPLIFT REPO | \n", 16 | " SCIKIT-UPLIFT DOCS | \n", 17 | " USER GUIDE\n", 18 | "
\n", 19 | " ENGLISH VERSION\n", 20 | "\n", 21 | "
" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "В данном ноутбуке рассмотрим простой пример применения одного из подходов прогнозирования uplift в [sklearn.pipeline](https://scikit-learn.org/stable/modules/classes.html#module-sklearn.pipeline).\n", 29 | "\n", 30 | "Данные для примера взяты из [MineThatData E-Mail Analytics And Data Mining Challenge dataset by Kevin Hillstrom](https://blog.minethatdata.com/2008/03/minethatdata-e-mail-analytics-and-data.html). Этот набор данных содержит 64 000 клиентов, которые в последний раз совершали покупки в течение двенадцати месяцев. Среди клиентов была проведена рекламная кампания с помощью email рассылки:\n", 31 | "\n", 32 | "* 1/3 клиентов были выбраны случайным образом для получения электронного письма, рекламирующего мужскую продукцию;\n", 33 | "* 1/3 клиентов были выбраны случайным образом для получения электронного письма, рекламирующего женскую продукцию;\n", 34 | "* С оставшейся 1/3 коммуникацию не проводили.\n", 35 | "\n", 36 | "Для каждого клиента из выборки замерили факт перехода по ссылке в письме, факт совершения покупки и сумму трат за две недели, следущими после получения письма.\n", 37 | "\n", 38 | "Полное описание датасета можной найти по [ссылке](https://blog.minethatdata.com/2008/03/minethatdata-e-mail-analytics-and-data.html).\n", 39 | "\n", 40 | "Установим необходимые библиотеки:" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 1, 46 | "metadata": { 47 | "ExecuteTime": { 48 | "end_time": "2021-02-07T01:01:58.302718Z", 49 | "start_time": "2021-02-07T01:01:58.298524Z" 50 | } 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "# pip install scikit-uplift xgboost==1.0.2 category_encoders==2.1.0 -U" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "Для простоты примера оставим только два сегмента пользователей:\n", 62 | "* тем, кому рассылалась по электронной почте рекламная кампания с участием женских товаров;\n", 63 | "* тем, кому не рассылалась рекламная кампания.\n", 64 | "\n", 65 | "В качестве целевой переменной будем использовать переменную `visit`." 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 2, 71 | "metadata": { 72 | "ExecuteTime": { 73 | "end_time": "2021-02-07T01:01:59.884250Z", 74 | "start_time": "2021-02-07T01:01:58.315398Z" 75 | } 76 | }, 77 | "outputs": [ 78 | { 79 | "name": "stdout", 80 | "output_type": "stream", 81 | "text": [ 82 | "Размер датасета до обработки: (64000, 8)\n", 83 | "Размер датасета после обработки: (42693, 8)\n" 84 | ] 85 | }, 86 | { 87 | "data": { 88 | "text/html": [ 89 | "
\n", 90 | "\n", 103 | "\n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | "
recencyhistory_segmenthistorymenswomenszip_codenewbiechannel
0102) $100 - $200142.4410Surburban0Phone
163) $200 - $350329.0811Rural1Web
272) $100 - $200180.6501Surburban1Web
421) $0 - $10045.3410Urban0Web
562) $100 - $200134.8301Surburban0Phone
\n", 175 | "
" 176 | ], 177 | "text/plain": [ 178 | " recency history_segment history mens womens zip_code newbie channel\n", 179 | "0 10 2) $100 - $200 142.44 1 0 Surburban 0 Phone\n", 180 | "1 6 3) $200 - $350 329.08 1 1 Rural 1 Web\n", 181 | "2 7 2) $100 - $200 180.65 0 1 Surburban 1 Web\n", 182 | "4 2 1) $0 - $100 45.34 1 0 Urban 0 Web\n", 183 | "5 6 2) $100 - $200 134.83 0 1 Surburban 0 Phone" 184 | ] 185 | }, 186 | "execution_count": 2, 187 | "metadata": {}, 188 | "output_type": "execute_result" 189 | } 190 | ], 191 | "source": [ 192 | "import pandas as pd\n", 193 | "from sklift.datasets import fetch_hillstrom\n", 194 | "\n", 195 | "\n", 196 | "%matplotlib inline\n", 197 | "\n", 198 | "bunch = fetch_hillstrom(target_col='visit')\n", 199 | "\n", 200 | "dataset, target, treatment = bunch['data'], bunch['target'], bunch['treatment']\n", 201 | "\n", 202 | "print(f'Размер датасета до обработки: {dataset.shape}')\n", 203 | "\n", 204 | "# Selecting two segments\n", 205 | "dataset = dataset[treatment!='Mens E-Mail']\n", 206 | "target = target[treatment!='Mens E-Mail']\n", 207 | "treatment = treatment[treatment!='Mens E-Mail'].map({\n", 208 | " 'Womens E-Mail': 1,\n", 209 | " 'No E-Mail': 0\n", 210 | "})\n", 211 | "\n", 212 | "print(f'Размер датасета после обработки: {dataset.shape}')\n", 213 | "dataset.head()" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": [ 220 | "Разобъем все данные на обучающую и валидационную выборку:" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 3, 226 | "metadata": { 227 | "ExecuteTime": { 228 | "end_time": "2021-02-07T01:01:59.976727Z", 229 | "start_time": "2021-02-07T01:01:59.889576Z" 230 | } 231 | }, 232 | "outputs": [], 233 | "source": [ 234 | "from sklearn.model_selection import train_test_split\n", 235 | "\n", 236 | "\n", 237 | "X_tr, X_val, y_tr, y_val, treat_tr, treat_val = train_test_split(\n", 238 | " dataset, target, treatment, test_size=0.5, random_state=42\n", 239 | ")" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": [ 246 | "Select categorical features:" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 4, 252 | "metadata": { 253 | "ExecuteTime": { 254 | "end_time": "2021-02-07T01:02:00.003357Z", 255 | "start_time": "2021-02-07T01:01:59.983254Z" 256 | } 257 | }, 258 | "outputs": [ 259 | { 260 | "name": "stdout", 261 | "output_type": "stream", 262 | "text": [ 263 | "['history_segment', 'zip_code', 'channel']\n" 264 | ] 265 | } 266 | ], 267 | "source": [ 268 | "cat_cols = X_tr.select_dtypes(include='object').columns.tolist()\n", 269 | "print(cat_cols)" 270 | ] 271 | }, 272 | { 273 | "cell_type": "markdown", 274 | "metadata": {}, 275 | "source": [ 276 | "Создадим нужные объекты и объединим их в pipieline." 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 5, 282 | "metadata": { 283 | "ExecuteTime": { 284 | "end_time": "2021-02-07T01:02:00.079199Z", 285 | "start_time": "2021-02-07T01:02:00.009314Z" 286 | } 287 | }, 288 | "outputs": [], 289 | "source": [ 290 | "from sklearn.pipeline import Pipeline\n", 291 | "from category_encoders import CatBoostEncoder\n", 292 | "from sklift.models import ClassTransformation\n", 293 | "from xgboost import XGBClassifier\n", 294 | "\n", 295 | "\n", 296 | "encoder = CatBoostEncoder(cols=cat_cols)\n", 297 | "estimator = XGBClassifier(max_depth=2, random_state=42)\n", 298 | "ct = ClassTransformation(estimator=estimator)\n", 299 | "\n", 300 | "my_pipeline = Pipeline([\n", 301 | " ('encoder', encoder),\n", 302 | " ('model', ct)\n", 303 | "])" 304 | ] 305 | }, 306 | { 307 | "cell_type": "markdown", 308 | "metadata": { 309 | "ExecuteTime": { 310 | "end_time": "2020-04-26T18:02:52.236917Z", 311 | "start_time": "2020-04-26T18:02:52.110138Z" 312 | } 313 | }, 314 | "source": [ 315 | "Обучать pipeline будем как обычно, но колонку treatment добавим как параметр шага model: `model__treatment`." 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": 6, 321 | "metadata": { 322 | "ExecuteTime": { 323 | "end_time": "2021-02-07T01:02:01.332880Z", 324 | "start_time": "2021-02-07T01:02:00.085047Z" 325 | } 326 | }, 327 | "outputs": [ 328 | { 329 | "name": "stderr", 330 | "output_type": "stream", 331 | "text": [ 332 | "/Users/Maksim/Library/Python/3.6/lib/python/site-packages/sklearn/pipeline.py:354: UserWarning: It is recommended to use this approach on treatment balanced data. Current sample size is unbalanced.\n", 333 | " self._final_estimator.fit(Xt, y, **fit_params)\n" 334 | ] 335 | } 336 | ], 337 | "source": [ 338 | "my_pipeline = my_pipeline.fit(\n", 339 | " X=X_tr,\n", 340 | " y=y_tr,\n", 341 | " model__treatment=treat_tr\n", 342 | ")" 343 | ] 344 | }, 345 | { 346 | "cell_type": "markdown", 347 | "metadata": {}, 348 | "source": [ 349 | "Предскажем uplift и посчитаем uplift@30%" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": 7, 355 | "metadata": { 356 | "ExecuteTime": { 357 | "end_time": "2021-02-07T01:02:01.476617Z", 358 | "start_time": "2021-02-07T01:02:01.335371Z" 359 | } 360 | }, 361 | "outputs": [ 362 | { 363 | "name": "stdout", 364 | "output_type": "stream", 365 | "text": [ 366 | "uplift@30%: 0.0661\n" 367 | ] 368 | } 369 | ], 370 | "source": [ 371 | "from sklift.metrics import uplift_at_k\n", 372 | "\n", 373 | "\n", 374 | "uplift_predictions = my_pipeline.predict(X_val)\n", 375 | "\n", 376 | "uplift_30 = uplift_at_k(y_val, uplift_predictions, treat_val, strategy='overall')\n", 377 | "print(f'uplift@30%: {uplift_30:.4f}')" 378 | ] 379 | } 380 | ], 381 | "metadata": { 382 | "kernelspec": { 383 | "display_name": "python 3", 384 | "language": "python", 385 | "name": "python3" 386 | }, 387 | "language_info": { 388 | "codemirror_mode": { 389 | "name": "ipython", 390 | "version": 3 391 | }, 392 | "file_extension": ".py", 393 | "mimetype": "text/x-python", 394 | "name": "python", 395 | "nbconvert_exporter": "python", 396 | "pygments_lexer": "ipython3", 397 | "version": "3.6.1" 398 | }, 399 | "pycharm": { 400 | "stem_cell": { 401 | "cell_type": "raw", 402 | "source": [], 403 | "metadata": { 404 | "collapsed": false 405 | } 406 | } 407 | } 408 | }, 409 | "nbformat": 4, 410 | "nbformat_minor": 2 411 | } -------------------------------------------------------------------------------- /docs/changelog.md: -------------------------------------------------------------------------------- 1 | # Release History 2 | 3 | ## Legend for changelogs 4 | 5 | * 🔥 something big that you couldn’t do before. 6 | * 💥 something that you couldn’t do before. 7 | * 📝 a miscellaneous minor improvement. 8 | * 🔨 something that previously didn’t work as documented – or according to reasonable expectations – should now work. 9 | * ❗️ you will need to change your code to have the same effect in the future; or a feature will be removed in the future. 10 | 11 | ## Version 0.5.1 12 | 13 | ### [sklift.models](https://www.uplift-modeling.com/en/v0.5.1/api/models/index.html) 14 | 15 | * 📝 Add docs page for [ClassTransformationReg](https://www.uplift-modeling.com/en/v0.5.1/api/models/ClassTransformationReg.html) model. 16 | 17 | ### [sklift.datasets](https://www.uplift-modeling.com/en/v0.5.1/api/datasets/index.html) 18 | 19 | * 🔨 Fix bug in [fetch_x5](https://www.uplift-modeling.com/en/v0.5.1/api/datasets/fetch_x5.html) func. 20 | 21 | ### [User Guide](https://www.uplift-modeling.com/en/v0.5.1/user_guide/index.html) 22 | 23 | * 📝 Add page for [Transformed Outcome](https://www.uplift-modeling.com/en/v0.5.1/user_guide/models/transformed_outcome.html) approach. 24 | 25 | 26 | ## Version 0.5.0 27 | 28 | ### [sklift.models](https://www.uplift-modeling.com/en/v0.5.0/api/models/index.html) 29 | 30 | * 🔥 Add ClassTransformationReg model by [@mcullan](https://github.com/mcullan) and [@ElisovaIra](https://github.com/ElisovaIra). 31 | * 🔨 Add the ability to process a series with different indexes in the [TwoModels](https://www.uplift-modeling.com/en/v0.5.0/api/models.html#sklift.models.models.TwoModels) by [@flashlight101](https://github.com/flashlight101). 32 | 33 | ### [sklift.metrics](https://www.uplift-modeling.com/en/v0.5.0/api/index/metrics.html) 34 | 35 | * 🔥 Add new metric [Maximum profit uplift measure](https://www.uplift-modeling.com/en/v0.5.0/api/metrics/max_prof_uplift.html) by [@rooti123](https://github.com/rooti123). 36 | 37 | ### [sklift.datasets](https://www.uplift-modeling.com/en/v0.5.0/api/datasets/index.html) 38 | 39 | * 💥 Add checker based on hash for all datasets by [@flashlight101](https://github.com/flashlight101) 40 | * 📝 Add [scheme](https://www.uplift-modeling.com/en/v0.5.0/api/datasets/fetch_x5.html) of x5 dataframes. 41 | 42 | ### Miscellaneous 43 | * 📝 Improve Chinese tags by [@00helloworld](https://github.com/00helloworld) 44 | 45 | ## Version 0.4.1 46 | 47 | ### [sklift.datasets](https://www.uplift-modeling.com/en/v0.4.1/api/datasets/index.html) 48 | 49 | * 🔨 Fix bug in dataset links. 50 | * 📝 Add about a company section 51 | 52 | ## Version 0.4.0 53 | 54 | ### [sklift.metrics](https://www.uplift-modeling.com/en/v0.4.0/api/index/metrics.html) 55 | 56 | * 🔥 Add [make_uplift_scorer](https://www.uplift-modeling.com/en/v0.4.0/api/metrics/make_uplift_scorer.html) function for interacting with the module ``sklearn.model_selection`` by [@wrapper228](https://github.com/wrapper228). 57 | * 🔥 Add new metric [average_squared_deviation](https://www.uplift-modeling.com/en/v0.4.0/api/metrics/average_squared_deviation.html) function by [@Mogby](https://github.com/Mogby). 58 | 59 | ### [sklift.viz](https://www.uplift-modeling.com/en/v0.4.0/api/viz/index.html) 60 | 61 | * 🔥 Added the ability to draw multiple plot on the same graph of [plot_uplift_curve](https://www.uplift-modeling.com/en/v0.4.0/api/viz/plot_uplift_curve.html) function and [plot_qini_curve](https://www.uplift-modeling.com/en/v0.4.0/api/viz/plot_qini_curve.html) function by [@flashlight101](https://github.com/flashlight101). 62 | 63 | ### [sklift.datasets](https://www.uplift-modeling.com/en/v0.4.0/api/datasets/index.html) 64 | 65 | * 💥 Add new dataset [fetch_megafon](https://www.uplift-modeling.com/en/v0.4.0/api/datasets/fetch_megafon.html) function by [@ezhdi](https://github.com/ezhdi). 66 | * 📝 Improve documentation of [sklift.datasets](https://www.uplift-modeling.com/en/v0.4.0/api/datasets/index.html) by [@flashlight101](https://github.com/flashlight101) and [@ezhdi](https://github.com/ezhdi). 67 | 68 | 69 | ### Miscellaneous 70 | 71 | * 💥 Add new tutorial [Example of usage model from sklift.models in sklearn.model_selection](https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/uplift_model_selection_tutorial.ipynb) by [@wrapper228](https://github.com/wrapper228). 72 | * 💥 Increased test coverage from 30% to 82% by [@flashlight101](https://github.com/flashlight101) and [@Ksyula](https://github.com/Ksyula) 73 | * 📝 Add EDA of available datasets on [Tutorials](https://www.uplift-modeling.com/en/v0.4.0/tutorials.html) page by [@lyutov89](https://github.com/lyutov89), [@ezhdi](https://github.com/ezhdi), [@patpanda94](https://github.com/patpanda94) and [@Ksyula](https://github.com/Ksyula). 74 | * 📝 Imporve ["RetailHero tutorial"](https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero_EN.ipynb) by [@Ksyula](https://github.com/Ksyula). 75 | 76 | ## Version 0.3.2 77 | 78 | ### [sklift.datasets](https://www.uplift-modeling.com/en/v0.3.2/api/datasets/index.html) 79 | 80 | * 🔨 Fix bug in [fetch_x5](https://www.uplift-modeling.com/en/v0.3.2/api/datasets/fetch_x5.html) function by [@Muhamob](https://github.com/Muhamob). 81 | 82 | ### [sklift.metrics](https://www.uplift-modeling.com/en/v0.3.2/api/index/metrics.html) 83 | 84 | * 📝 Fix docstring in [uplift_by_percentile](https://www.uplift-modeling.com/en/v0.3.2/api/metrics/uplift_by_percentile.html) function by [@ElisovaIra](https://github.com/ElisovaIra). 85 | 86 | ### [sklift.viz](https://www.uplift-modeling.com/en/v0.3.2/api/viz/index.html) 87 | 88 | * 🔨 Fix bug in [plot_uplift_preds](https://www.uplift-modeling.com/en/v0.3.2/api/viz/plot_uplift_preds.html) function by [@bwbelljr](https://github.com/bwbelljr). 89 | 90 | ### Miscellaneous 91 | 92 | * 📝 Change some images in ["RetailHero tutorial"](https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero_EN.ipynb). 93 | 94 | ## Version 0.3.1 95 | 96 | ### [sklift.datasets](https://www.uplift-modeling.com/en/v0.3.1/api/datasets/index.html) 97 | 98 | * 🔨 Fix bugs in [sklift.datasets](https://www.uplift-modeling.com/en/v0.3.1/api/datasets/index.html) 99 | 100 | ### [sklift.metrics](https://www.uplift-modeling.com/en/v0.3.1/api/index/metrics.html) 101 | 102 | * 📝 Imporve [uplift_by_percentile](https://www.uplift-modeling.com/en/v0.3.1/api/metrics/uplift_by_percentile.html) function by [@ElisovaIra](https://github.com/ElisovaIra). 103 | 104 | ### Miscellaneous 105 | 106 | * 💥 Add tutorial ["Uplift modeling metrics"](https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/uplift_metrics_tutorial.ipynb) by [@ElisovaIra](https://github.com/ElisovaIra). 107 | 108 | ## Version 0.3.0 109 | 110 | ### [sklift.datasets](https://www.uplift-modeling.com/en/v0.3.0/api/datasets/index.html) 111 | 112 | * 🔥 Add [sklift.datasets](https://www.uplift-modeling.com/en/v0.3.0/api/datasets/index.html) by [@ElisovaIra](https://github.com/ElisovaIra), [@RobbStarkk](https://github.com/RobbStarkk), [@acssar](https://github.com/acssar), [@tankudo](https://github.com/tankudo), [@flashlight101](https://github.com/flashlight101), [@semenova-pd](https://github.com/semenova-pd), [@timfex](https://github.com/timfex) 113 | 114 | ### [sklift.models](https://www.uplift-modeling.com/en/v0.3.0/api/models/index.html) 115 | 116 | * 📝 Add different checkers by [@ElisovaIra](https://github.com/ElisovaIra) 117 | 118 | ### [sklift.metrics](https://www.uplift-modeling.com/en/v0.3.0/api/metrics/index.html) 119 | 120 | * 📝 Add different checkers by [@ElisovaIra](https://github.com/ElisovaIra) 121 | 122 | ### [sklift.viz](https://www.uplift-modeling.com/en/v0.3.0/api/viz/index.html) 123 | 124 | * 📝 Fix conflicting and duplicating default values by [@denniskorablev](https://github.com/denniskorablev) 125 | 126 | ### [User Guide](https://www.uplift-modeling.com/en/v0.3.0/user_guide/index.html) 127 | 128 | * 📝 Fix typos 129 | 130 | ## Version 0.2.0 131 | 132 | ### [User Guide](https://www.uplift-modeling.com/en/v0.2.0/user_guide/index.html) 133 | 134 | * 🔥 Add [User Guide](https://www.uplift-modeling.com/en/v0.2.0/user_guide/index.html) 135 | 136 | ### [sklift.models](https://www.uplift-modeling.com/en/v0.2.0/api/models/index.html) 137 | 138 | * 💥 Add `treatment interaction` method to [SoloModel](https://www.uplift-modeling.com/en/v0.2.0/api/models/SoloModel.html) approach by [@AdiVarma27](https://github.com/AdiVarma27). 139 | 140 | ### [sklift.metrics](https://www.uplift-modeling.com/en/v0.2.0/api/index/metrics.html) 141 | 142 | * 💥 Add [uplift_by_percentile](https://www.uplift-modeling.com/en/v0.2.0/api/metrics/uplift_by_percentile.html) function by [@ElisovaIra](https://github.com/ElisovaIra). 143 | * 💥 Add [weighted_average_uplift](https://www.uplift-modeling.com/en/v0.2.0/api/metrics/weighted_average_uplift.html) function by [@ElisovaIra](https://github.com/ElisovaIra). 144 | * 💥 Add [perfect_uplift_curve](https://www.uplift-modeling.com/en/v0.2.0/api/metrics/perfect_uplift_curve.html) function. 145 | * 💥 Add [perfect_qini_curve](https://www.uplift-modeling.com/en/v0.2.0/api/metrics/perfect_qini_curve.html) function. 146 | * 🔨 Add normalization in [uplift_auc_score](https://www.uplift-modeling.com/en/v0.2.0/api/metrics/uplift_auc_score.html) and [qini_auc_score](https://www.uplift-modeling.com/en/v0.2.0/api/metrics/qini_auc_score.html) functions. 147 | * ❗ Remove metrics `auuc` and `auqc`. In exchange for them use respectively [uplift_auc_score](https://www.uplift-modeling.com/en/v0.2.0/api/metrics/uplift_auc_score.html) and [qini_auc_score](https://www.uplift-modeling.com/en/v0.2.0/api/metrics/qini_auc_score.html) 148 | 149 | ### [sklift.viz](https://www.uplift-modeling.com/en/v0.2.0/api/viz/index.html) 150 | 151 | * 💥 Add [plot_uplift_curve](https://www.uplift-modeling.com/en/v0.2.0/api/viz/plot_uplift_curve.html) function. 152 | * 💥 Add [plot_qini_curve](https://www.uplift-modeling.com/en/v0.2.0/api/viz/plot_qini_curve.html) function. 153 | * ❗ Remove `plot_uplift_qini_curves`. 154 | 155 | ### Miscellaneous 156 | 157 | * 💥 Add contributors in main Readme and in main page of docs. 158 | * 💥 Add [contributing guide](https://www.uplift-modeling.com/en/v0.2.0/contributing.html). 159 | * 💥 Add [code of conduct](https://github.com/maks-sh/scikit-uplift/blob/master/.github/CODE_OF_CONDUCT.md). 160 | * 📝 Reformat [Tutorials](https://www.uplift-modeling.com/en/v0.2.0/tutorials.html) page. 161 | * 📝 Add github buttons in docs. 162 | * 📝 Add logo compatibility with pypi. 163 | 164 | ## Version 0.1.2 165 | 166 | ### [sklift.models](https://www.uplift-modeling.com/en/v0.1.2/api/models.html) 167 | 168 | * 🔨 Fix bugs in [TwoModels](https://www.uplift-modeling.com/en/v0.1.2/api/models.html#sklift.models.models.TwoModels) for regression problem. 169 | * 📝 Minor code refactoring. 170 | 171 | ### [sklift.metrics](https://www.uplift-modeling.com/en/v0.1.2/api/metrics.html) 172 | 173 | * 📝 Minor code refactoring. 174 | 175 | ### [sklift.viz](https://www.uplift-modeling.com/en/v0.1.2/api/viz.html) 176 | 177 | * 💥 Add bar plot in [plot_uplift_by_percentile](https://www.uplift-modeling.com/en/v0.1.2/api/viz.html#sklift.viz.base.plot_uplift_by_percentile) by [@ElisovaIra](https://github.com/ElisovaIra). 178 | * 🔨 Fix bug in [plot_uplift_by_percentile](https://www.uplift-modeling.com/en/v0.1.2/api/viz.html#sklift.viz.base.plot_uplift_by_percentile). 179 | * 📝 Minor code refactoring. 180 | 181 | ## Version 0.1.1 182 | 183 | ### [sklift.viz](https://www.uplift-modeling.com/en/v0.1.1/api/viz.html) 184 | 185 | * 💥 Add [plot_uplift_by_percentile](https://www.uplift-modeling.com/en/v0.1.1/api/viz.html#sklift.viz.base.plot_uplift_by_percentile) by [@ElisovaIra](https://github.com/ElisovaIra). 186 | * 🔨 Fix bug with import [plot_treatment_balance_curve](https://www.uplift-modeling.com/en/v0.1.1/api/viz.html#sklift.viz.base.plot_treatment_balance_curve). 187 | 188 | ### [sklift.metrics](https://www.uplift-modeling.com/en/v0.1.1/api/metrics.html) 189 | 190 | * 💥 Add [response_rate_by_percentile](https://www.uplift-modeling.com/en/v0.1.1/api/viz.html#sklift.metrics.metrics.response_rate_by_percentile) by [@ElisovaIra](https://github.com/ElisovaIra). 191 | * 🔨 Fix bug with import [uplift_auc_score](https://www.uplift-modeling.com/en/v0.1.1/api/metrics.html#sklift.metrics.metrics.uplift_auc_score) and [qini_auc_score](https://www.uplift-modeling.com/en/v0.1.1/metrics.html#sklift.metrics.metrics.qini_auc_score). 192 | * 📝 Fix typos in docstrings. 193 | 194 | ### Miscellaneous 195 | 196 | * 💥 Add tutorial ["Example of usage model from sklift.models in sklearn.pipeline"](https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/pipeline_usage_EN.ipynb). 197 | * 📝 Add link to Release History in main Readme.md. 198 | 199 | ## Version 0.1.0 200 | 201 | ### [sklift.models](https://www.uplift-modeling.com/en/v0.1.0/api/models.html) 202 | 203 | * 📝 Fix typo in [TwoModels](https://www.uplift-modeling.com/en/v0.1.0/api/models.html#sklift.models.models.TwoModels) docstring by [@spiaz](https://github.com/spiaz). 204 | * 📝 Improve docstrings and add references to all approaches. 205 | 206 | ### [sklift.metrics](https://www.uplift-modeling.com/en/v0.1.0/api/metrics.html) 207 | 208 | * 💥 Add [treatment_balance_curve](https://www.uplift-modeling.com/en/v0.1.0/api/metrics.html#sklift.metrics.metrics.treatment_balance_curve) by [@spiaz](https://github.com/spiaz). 209 | * ❗️ The metrics `auuc` and `auqc` are now respectively renamed to [uplift_auc_score](https://www.uplift-modeling.com/en/v0.1.0/api/metrics.html#sklift.metrics.metrics.uplift_auc_score) and [qini_auc_score](https://www.uplift-modeling.com/en/v0.1.0/metrics.html#sklift.metrics.metrics.qini_auc_score). So, `auuc` and `auqc` will be removed in 0.2.0. 210 | * ❗️ Add a new parameter `startegy` in [uplift_at_k](https://www.uplift-modeling.com/en/v0.1.0/metrics.html#sklift.metrics.metrics.uplift_at_k). 211 | 212 | ### [sklift.viz](https://www.uplift-modeling.com/en/v0.1.0/api/viz.html) 213 | 214 | * 💥 Add [plot_treatment_balance_curve](https://www.uplift-modeling.com/en/v0.1.0/api/viz.html#sklift.viz.base.plot_treatment_balance_curve) by [@spiaz](https://github.com/spiaz). 215 | * 📝 fix typo in [plot_uplift_qini_curves](https://www.uplift-modeling.com/en/v0.1.0/api/viz.html#sklift.viz.base.plot_uplift_qini_curves) by [@spiaz](https://github.com/spiaz). 216 | 217 | ### Miscellaneous 218 | 219 | * ❗️ Remove sklift.preprocess submodule. 220 | * 💥 Add compatibility of tutorials with colab and add colab buttons by [@ElMaxuno](https://github.com/ElMaxuno). 221 | * 💥 Add Changelog. 222 | * 📝 Change the documentation structure. Add next pages: [Tutorials](https://www.uplift-modeling.com/en/v0.1.0/tutorials.html), [Release History](https://www.uplift-modeling.com/en/v0.1.0/changelog.html) and [Hall of fame](https://www.uplift-modeling.com/en/v0.1.0/hall_of_fame.html). -------------------------------------------------------------------------------- /sklift/tests/test_metrics.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | 5 | from sklearn.tree import DecisionTreeClassifier 6 | from ..models import SoloModel 7 | 8 | from sklearn.utils._testing import assert_array_almost_equal 9 | 10 | from ..metrics import make_uplift_scorer 11 | from ..metrics import uplift_curve, uplift_auc_score, perfect_uplift_curve 12 | from ..metrics import qini_curve, qini_auc_score, perfect_qini_curve 13 | from ..metrics import (uplift_at_k, response_rate_by_percentile, 14 | weighted_average_uplift, uplift_by_percentile, treatment_balance_curve, average_squared_deviation) 15 | 16 | 17 | def make_predictions(binary): 18 | X_train, y_train, treat_train = (np.array([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]]), 19 | np.array([0.0, 0.0, 1.0]), np.array([0.0, 1.0, 1.0])) 20 | X_val, y_val, treat_val = (np.array([[5.1, 3.4, 1.5, 0.2], [5.0, 3.5, 1.3, 0.3], [4.5, 2.3, 1.3, 0.3]]), 21 | np.array([0.0, 1.0, 0.0]), np.array([0.0, 1.0, 1.0])) 22 | 23 | if not binary: 24 | y_train, y_val = (np.array([2.0, 0.0, 1.0]), np.array([0.0, 1.0, 2.0])) 25 | 26 | model = DecisionTreeClassifier(random_state=0) 27 | 28 | s_model = SoloModel(model) 29 | s_model = s_model.fit(X_train, y_train, treat_train) 30 | uplift_preds = s_model.predict(X_val) 31 | 32 | return y_val, uplift_preds, treat_val 33 | 34 | 35 | @pytest.mark.parametrize( 36 | "binary, test_x_actual, test_y_actual", 37 | [ 38 | (True, np.array([0, 3]), np.array([0, 1.5, ])), 39 | (False, np.array([0, 2, 3]), np.array([0.0, 3, 4.5])) 40 | ] 41 | ) 42 | def test_uplift_curve(binary, test_x_actual, test_y_actual): 43 | y_true, uplift, treatment = make_predictions(binary) 44 | 45 | if binary == False: 46 | with pytest.raises(Exception): 47 | x_actual, y_actual = uplift_curve(y_true, uplift, treatment) 48 | else: 49 | x_actual, y_actual = uplift_curve(y_true, uplift, treatment) 50 | 51 | assert_array_almost_equal(x_actual, test_x_actual) 52 | assert_array_almost_equal(y_actual, test_y_actual) 53 | assert x_actual.shape == y_actual.shape 54 | 55 | 56 | def test_uplift_curve_hard(): 57 | with pytest.raises(Exception): 58 | y_true, uplift, treatment = make_predictions(binary=True) 59 | y_true = np.zeros(y_true.shape) 60 | 61 | x_actual, y_actual = uplift_curve(y_true, uplift, treatment) 62 | 63 | assert_array_almost_equal(x_actual, np.array([0, 3])) 64 | assert_array_almost_equal(y_actual, np.array([0.0, 0.0])) 65 | 66 | y_true = np.ones(y_true.shape) 67 | 68 | x_actual, y_actual = uplift_curve(y_true, uplift, treatment) 69 | 70 | assert_array_almost_equal(x_actual, np.array([0, 3])) 71 | assert_array_almost_equal(y_actual, np.array([0.0, 0.0])) 72 | 73 | 74 | @pytest.mark.parametrize( 75 | "binary, test_x_actual, test_y_actual", 76 | [ 77 | (True, np.array([0, 1, 2, 3]), np.array([0., 1., 2., 1.5])), 78 | (False, np.array([0, 1, 2, 3]), np.array([0., 1., 2., 4.5])) 79 | ] 80 | ) 81 | def test_perfect_uplift_curve(binary, test_x_actual, test_y_actual): 82 | y_true, uplift, treatment = make_predictions(binary) 83 | if binary == False: 84 | with pytest.raises(Exception): 85 | x_actual, y_actual = perfect_uplift_curve(y_true, treatment) 86 | else: 87 | x_actual, y_actual = perfect_uplift_curve(y_true, treatment) 88 | assert_array_almost_equal(x_actual, test_x_actual) 89 | assert_array_almost_equal(y_actual, test_y_actual) 90 | assert x_actual.shape == y_actual.shape 91 | 92 | 93 | def test_perfect_uplift_curve_hard(): 94 | with pytest.raises(Exception): 95 | y_true, uplift, treatment = make_predictions(binary=True) 96 | y_true = np.zeros(y_true.shape) 97 | 98 | x_actual, y_actual = perfect_uplift_curve(y_true, treatment) 99 | 100 | assert_array_almost_equal(x_actual, np.array([0, 1, 3])) 101 | assert_array_almost_equal(y_actual, np.array([0.0, 0.0, 0.0])) 102 | 103 | y_true = np.ones(y_true.shape) 104 | 105 | x_actual, y_actual = perfect_uplift_curve(y_true, treatment) 106 | 107 | assert_array_almost_equal(x_actual, np.array([0, 2, 3])) 108 | assert_array_almost_equal(y_actual, np.array([0.0, 2.0, 0.0])) 109 | 110 | 111 | def test_uplift_auc_score(): 112 | y_true = [0, 1] 113 | uplift = [0.1, 0.3] 114 | treatment = [1, 0] 115 | assert_array_almost_equal(uplift_auc_score(y_true, uplift, treatment), 0.) 116 | 117 | y_true = [1, 0] 118 | uplift = [0.1, 0.3] 119 | treatment = [0, 1] 120 | assert_array_almost_equal(uplift_auc_score(y_true, uplift, treatment), 1.) 121 | 122 | with pytest.raises(Exception): 123 | y_true = [1, 1] 124 | uplift = [0.1, 0.3] 125 | treatment = [0, 1] 126 | assert_array_almost_equal(uplift_auc_score(y_true, uplift, treatment), 1.) 127 | 128 | y_true = [1, 1] 129 | uplift = [0.1, 0.3] 130 | treatment = [1, 0] 131 | assert_array_almost_equal(uplift_auc_score(y_true, uplift, treatment), -1.) 132 | 133 | y_true = [0, 1, 2] 134 | uplift = [0.1, 0.3, 0.9] 135 | treatment = [0, 1, 0] 136 | assert_array_almost_equal(uplift_auc_score(y_true, uplift, treatment), -1.333333) 137 | 138 | y_true = [0, 1, 2] 139 | uplift = [0.1, 0.3, 0.9] 140 | treatment = [1, 0, 1] 141 | assert_array_almost_equal(uplift_auc_score(y_true, uplift, treatment), 1.333333) 142 | 143 | 144 | @pytest.mark.parametrize( 145 | "binary, test_x_actual, test_y_actual", 146 | [ 147 | (True, np.array([0, 3]), np.array([0, 1., ])), 148 | (False, np.array([0, 2, 3]), np.array([0., 3, 3.])) 149 | ] 150 | ) 151 | def test_qini_curve(binary, test_x_actual, test_y_actual): 152 | y_true, uplift, treatment = make_predictions(binary) 153 | 154 | if binary == False: 155 | with pytest.raises(Exception): 156 | x_actual, y_actual = qini_curve(y_true, uplift, treatment) 157 | else: 158 | x_actual, y_actual = qini_curve(y_true, uplift, treatment) 159 | assert_array_almost_equal(x_actual, test_x_actual) 160 | assert_array_almost_equal(y_actual, test_y_actual) 161 | assert x_actual.shape == y_actual.shape 162 | 163 | 164 | def test_qini_curve_hard(): 165 | with pytest.raises(Exception): 166 | y_true, uplift, treatment = make_predictions(binary=True) 167 | y_true = np.zeros(y_true.shape) 168 | 169 | x_actual, y_actual = qini_curve(y_true, uplift, treatment) 170 | 171 | assert_array_almost_equal(x_actual, np.array([0, 3])) 172 | assert_array_almost_equal(y_actual, np.array([0.0, 0.0])) 173 | 174 | y_true = np.ones(y_true.shape) 175 | 176 | x_actual, y_actual = qini_curve(y_true, uplift, treatment) 177 | 178 | assert_array_almost_equal(x_actual, np.array([0, 3])) 179 | assert_array_almost_equal(y_actual, np.array([0.0, 0.0])) 180 | 181 | 182 | @pytest.mark.parametrize( 183 | "binary, negative_effect, test_x_actual, test_y_actual", 184 | [ 185 | (True, True, np.array([0, 1, 3]), np.array([0., 1., 1.])), 186 | (True, False, np.array([0., 1., 3.]), np.array([0., 1., 1.])), 187 | ] 188 | ) 189 | def test_perfect_qini_curve(binary, negative_effect, test_x_actual, test_y_actual): 190 | y_true, uplift, treatment = make_predictions(binary) 191 | 192 | x_actual, y_actual = perfect_qini_curve(y_true, treatment, negative_effect=negative_effect) 193 | 194 | assert_array_almost_equal(x_actual, test_x_actual) 195 | assert_array_almost_equal(y_actual, test_y_actual) 196 | assert x_actual.shape == y_actual.shape 197 | 198 | 199 | def test_perfect_qini_curve_hard(): 200 | with pytest.raises(Exception): 201 | y_true, uplift, treatment = make_predictions(binary=True) 202 | y_true = np.zeros(y_true.shape) 203 | 204 | x_actual, y_actual = perfect_qini_curve(y_true, treatment, negative_effect=True) 205 | 206 | assert_array_almost_equal(x_actual, np.array([0, 3])) 207 | assert_array_almost_equal(y_actual, np.array([0.0, 0.0])) 208 | 209 | x_actual, y_actual = perfect_qini_curve(y_true, treatment, negative_effect=False) 210 | 211 | assert_array_almost_equal(x_actual, np.array([0., 0., 3.])) 212 | assert_array_almost_equal(y_actual, np.array([0.0, 0.0, 0.0])) 213 | 214 | y_true = np.ones(y_true.shape) 215 | 216 | x_actual, y_actual = perfect_qini_curve(y_true, treatment, negative_effect=True) 217 | 218 | assert_array_almost_equal(x_actual, np.array([0, 2, 3])) 219 | assert_array_almost_equal(y_actual, np.array([0.0, 2.0, 0.0])) 220 | 221 | x_actual, y_actual = perfect_qini_curve(y_true, treatment, negative_effect=False) 222 | 223 | assert_array_almost_equal(x_actual, np.array([0., 0., 3.])) 224 | assert_array_almost_equal(y_actual, np.array([0.0, 0.0, 0.0])) 225 | 226 | def test_perfect_qini_curve_error(): 227 | y_true, uplift, treatment = make_predictions(binary=True) 228 | with pytest.raises(TypeError): 229 | perfect_qini_curve(y_true, treatment, negative_effect=5) 230 | 231 | 232 | 233 | def test_qini_auc_score(): 234 | y_true = [0, 1] 235 | uplift = [0.1, 0.3] 236 | treatment = [1, 0] 237 | assert_array_almost_equal(qini_auc_score(y_true, uplift, treatment), 1.) 238 | 239 | y_true = [1, 0] 240 | uplift = [0.1, 0.3] 241 | treatment = [0, 1] 242 | assert_array_almost_equal(qini_auc_score(y_true, uplift, treatment), 1.) 243 | 244 | with pytest.raises(Exception): 245 | y_true = [1, 1] 246 | uplift = [0.1, 0.3] 247 | treatment = [0, 1] 248 | assert_array_almost_equal(qini_auc_score(y_true, uplift, treatment), 1.) 249 | 250 | y_true = [1, 1] 251 | uplift = [0.1, 0.3] 252 | treatment = [1, 0] 253 | assert_array_almost_equal(qini_auc_score(y_true, uplift, treatment), 0.) 254 | 255 | y_true = [0, 1, 2] 256 | uplift = [0.1, 0.3, 0.9] 257 | treatment = [0, 1, 0] 258 | assert_array_almost_equal(qini_auc_score(y_true, uplift, treatment), -0.5) 259 | 260 | y_true = [0, 1, 2] 261 | uplift = [0.1, 0.3, 0.9] 262 | treatment = [1, 0, 1] 263 | assert_array_almost_equal(qini_auc_score(y_true, uplift, treatment), 0.75) 264 | 265 | def test_qini_auc_score_error(): 266 | y_true = [1, 0] 267 | uplift = [0.1, 0.3] 268 | treatment = [0, 1] 269 | with pytest.raises(TypeError): 270 | qini_auc_score(y_true, uplift, treatment, negative_effect=5) 271 | 272 | 273 | def test_uplift_at_k(): 274 | y_true, uplift, treatment = make_predictions(binary=True) 275 | 276 | assert_array_almost_equal(uplift_at_k(y_true, uplift, treatment, strategy='by_group', k=1), np.array([0.])) 277 | #assert_array_almost_equal(uplift_at_k(y_true, uplift, treatment, strategy='overall', k=2), np.array([0.])) 278 | 279 | @pytest.mark.parametrize( 280 | "strategy, k", 281 | [ 282 | ('new_strategy', 1), 283 | ('by_group', -0.5), 284 | ('by_group', '1'), 285 | ('by_group', 2) 286 | ] 287 | ) 288 | def test_uplift_at_k_errors(strategy, k): 289 | y_true, uplift, treatment = make_predictions(binary=True) 290 | with pytest.raises(ValueError): 291 | uplift_at_k(y_true, uplift, treatment, strategy, k) 292 | 293 | 294 | @pytest.mark.parametrize( 295 | "strategy, group, response_rate", 296 | [ 297 | ('overall', 'treatment', np.array([[0.5], [0.125], [2.]])), 298 | ('by_group', 'treatment', np.array([[0.5], [0.125], [2.]])), 299 | ('overall', 'control', np.array([[0.], [0.], [1.]])), 300 | ('by_group', 'control', np.array([[0.], [0.], [1.]])) 301 | ] 302 | ) 303 | def test_response_rate_by_percentile(strategy, group, response_rate): 304 | y_true, uplift, treatment = make_predictions(binary=True) 305 | 306 | assert_array_almost_equal(response_rate_by_percentile(y_true, uplift, treatment, group, strategy, bins=1), 307 | response_rate) 308 | 309 | @pytest.mark.parametrize( 310 | "strategy, group, bins", 311 | [ 312 | ('new_strategy', 'control', 1), 313 | ('by_group', 'ctrl', 1), 314 | ('by_group', 'control', 0.5), 315 | ('by_group', 'control', 9999) 316 | ] 317 | ) 318 | def test_response_rate_by_percentile_errors(strategy, group, bins): 319 | y_true, uplift, treatment = make_predictions(binary=True) 320 | with pytest.raises(ValueError): 321 | response_rate_by_percentile(y_true, uplift, treatment, group=group, strategy=strategy, bins=bins) 322 | 323 | @pytest.mark.parametrize( 324 | "strategy, weighted_average", 325 | [ 326 | ('overall', 0.5), 327 | ('by_group', 0.5) 328 | ] 329 | ) 330 | def test_weighted_average_uplift(strategy, weighted_average): 331 | y_true, uplift, treatment = make_predictions(binary=True) 332 | 333 | assert_array_almost_equal(weighted_average_uplift(y_true, uplift, treatment, strategy, bins=1), weighted_average) 334 | 335 | 336 | @pytest.mark.parametrize( 337 | "strategy, bins", 338 | [ 339 | ('new_strategy', 1), 340 | ('by_group', 0.5), 341 | ('by_group', 9999) 342 | ] 343 | ) 344 | def test_weighted_average_uplift_errors(strategy, bins): 345 | y_true, uplift, treatment = make_predictions(binary=True) 346 | with pytest.raises(ValueError): 347 | weighted_average_uplift(y_true, uplift, treatment, strategy=strategy, bins=bins) 348 | 349 | 350 | @pytest.mark.parametrize( 351 | "strategy, bins, std, total, string_percentiles, data", 352 | [ 353 | ('overall', 1, False, False, False, np.array([[2., 1., 0.5, 0., 0.5]])), 354 | ('overall', 1, True, True, True, np.array([[2., 1., 0.5, 0., 0.5, 0.353553, 0., 0.353553], 355 | [2., 1., 0.5, 0., 0.5, 0.353553, 0., 0.353553]])), 356 | ('by_group', 1, False, False, False, np.array([[2., 1., 0.5, 0., 0.5]])), 357 | ('by_group', 1, True, True, True, np.array([[2., 1., 0.5, 0., 0.5, 0.353553, 0., 0.353553], 358 | [2., 1., 0.5, 0., 0.5, 0.353553, 0., 0.353553]])) 359 | ] 360 | ) 361 | def test_uplift_by_percentile(strategy, bins, std, total, string_percentiles, data): 362 | y_true, uplift, treatment = make_predictions(binary=True) 363 | 364 | assert_array_almost_equal( 365 | uplift_by_percentile(y_true, uplift, treatment, strategy, bins, std, total, string_percentiles), data) 366 | 367 | @pytest.mark.parametrize( 368 | "strategy, bins, std, total, string_percentiles", 369 | [ 370 | ('new_strategy', 1, True, True, True), 371 | ('by_group', 0.5, True, True, True), 372 | ('by_group', 9999, True, True, True), 373 | ('by_group', 1, 2, True, True), 374 | ('by_group', 1, True, True, 2), 375 | ('by_group', 1, True, 2, True) 376 | ] 377 | ) 378 | def test_uplift_by_percentile_errors(strategy, bins, std, total, string_percentiles): 379 | y_true, uplift, treatment = make_predictions(binary=True) 380 | with pytest.raises(ValueError): 381 | uplift_by_percentile(y_true, uplift, treatment, strategy, bins, std, total, string_percentiles) 382 | 383 | 384 | def test_treatment_balance_curve(): 385 | y_true, uplift, treatment = make_predictions(binary=True) 386 | 387 | idx, balance = treatment_balance_curve(uplift, treatment, winsize=2) 388 | assert_array_almost_equal(idx, np.array([1., 100.])) 389 | assert_array_almost_equal(balance, np.array([1., 0.5])) 390 | 391 | @pytest.mark.parametrize( 392 | "strategy", 393 | [ 394 | ('overall'), 395 | ('by_group') 396 | ] 397 | ) 398 | def test_average_squared_deviation(strategy): 399 | y_true, uplift, treatment = make_predictions(binary=True) 400 | assert (average_squared_deviation(y_true, uplift, treatment, y_true, uplift, treatment, strategy, bins=1) == 0) 401 | 402 | @pytest.mark.parametrize( 403 | "strategy, bins", 404 | [ 405 | ('new_strategy', 1), 406 | ('by_group', 0.5), 407 | ('by_group', 9999) 408 | ] 409 | ) 410 | def test_average_squared_deviation_errors(strategy, bins): 411 | y_true, uplift, treatment = make_predictions(binary=True) 412 | with pytest.raises(ValueError): 413 | average_squared_deviation(y_true, uplift, treatment, y_true, uplift, treatment, strategy=strategy, bins=bins) 414 | 415 | def test_metric_name_error(): 416 | with pytest.raises(ValueError): 417 | make_uplift_scorer('new_scorer', [0, 1]) 418 | 419 | def test_make_scorer_error(): 420 | with pytest.raises(TypeError): 421 | make_uplift_scorer('qini_auc_score', []) 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | --------------------------------------------------------------------------------