├── sklift
├── tests
│ ├── __init__.py
│ ├── test_import.py
│ ├── conftest.py
│ ├── test_models.py
│ ├── test_datasets.py
│ ├── test_viz.py
│ └── test_metrics.py
├── __init__.py
├── utils
│ ├── __init__.py
│ └── utils.py
├── models
│ └── __init__.py
├── datasets
│ ├── __init__.py
│ └── descr
│ │ ├── megafon.rst
│ │ ├── x5.rst
│ │ ├── criteo.rst
│ │ ├── hillstrom.rst
│ │ └── lenta.rst
├── viz
│ └── __init__.py
└── metrics
│ └── __init__.py
├── test_requirements.txt
├── docs
├── tutorials.rst
├── requirements.txt
├── _static
│ ├── sklift-logo.png
│ ├── soc_net_logo.png
│ ├── images
│ │ ├── SoloModel.png
│ │ ├── memchik_RU.png
│ │ ├── sklift_404.png
│ │ ├── SoloModel_RU.png
│ │ ├── readme_img1.png
│ │ ├── quick_start_wau.png
│ │ ├── x5_table_scheme.png
│ │ ├── TwoModels_vanila.png
│ │ ├── quick_start_qini.png
│ │ ├── quick_start_uplift.png
│ │ ├── TwoModels_vanila_RU.png
│ │ ├── TwoModels_ddr_control.png
│ │ ├── TwoModels_ddr_control_RU.png
│ │ ├── user_guide
│ │ │ ├── ug_clients_types.jpg
│ │ │ ├── ug_data_collection.gif
│ │ │ ├── ug_revert_label_mem.png
│ │ │ ├── ug_uplift_approaches.png
│ │ │ └── ug_comparison_with_other_models.png
│ │ └── SoloModel_treatment_intercation.png
│ ├── sklift-github-logo.png
│ └── css
│ │ └── custom.css
├── api
│ ├── models
│ │ ├── TwoModels.rst
│ │ ├── SoloModel.rst
│ │ ├── ClassTransformation.rst
│ │ ├── ClassTransformationReg.rst
│ │ └── index.rst
│ ├── metrics
│ │ ├── qini_curve.rst
│ │ ├── uplift_at_k.rst
│ │ ├── uplift_curve.rst
│ │ ├── qini_auc_score.rst
│ │ ├── uplift_auc_score.rst
│ │ ├── max_prof_uplift.rst
│ │ ├── perfect_qini_curve.rst
│ │ ├── make_uplift_scorer.rst
│ │ ├── perfect_uplift_curve.rst
│ │ ├── uplift_by_percentile.rst
│ │ ├── treatment_balance_curve.rst
│ │ ├── weighted_average_uplift.rst
│ │ ├── average_squared_deviation.rst
│ │ ├── response_rate_by_percentile.rst
│ │ └── index.rst
│ ├── datasets
│ │ ├── get_data_dir.rst
│ │ ├── clear_data_dir.rst
│ │ ├── index.rst
│ │ ├── fetch_hillstrom.rst
│ │ ├── fetch_lenta.rst
│ │ ├── fetch_megafon.rst
│ │ ├── fetch_x5.rst
│ │ └── fetch_criteo.rst
│ ├── viz
│ │ ├── plot_qini_curve.rst
│ │ ├── plot_uplift_curve.rst
│ │ ├── plot_uplift_preds.rst
│ │ ├── plot_uplift_by_percentile.rst
│ │ ├── plot_treatment_balance_curve.rst
│ │ └── index.rst
│ └── index.rst
├── user_guide
│ ├── introduction
│ │ ├── index.rst
│ │ ├── data_collection.rst
│ │ ├── comparison.rst
│ │ ├── clients.rst
│ │ └── cate.rst
│ ├── models
│ │ ├── index.rst
│ │ ├── classification.rst
│ │ ├── transformed_outcome.rst
│ │ ├── solo_model.rst
│ │ ├── revert_label.rst
│ │ └── two_models.rst
│ └── index.rst
├── _templates
│ ├── footer.html
│ ├── layout.html
│ └── breadcrumbs.html
├── refs.bib
├── 404.rst
├── install.rst
├── Readme.rst
├── hall_of_fame.rst
├── Makefile
├── make.bat
├── contributing.md
├── conf.py
├── quick_start.rst
├── index.rst
└── changelog.md
├── .coveragerc
├── .gitattributes
├── MANIFEST.in
├── requirements.txt
├── pytest.ini
├── .github
├── ISSUE_TEMPLATE
│ ├── doc-report.md
│ ├── feature-request.md
│ └── bug-report.md
├── workflows
│ ├── PyPi_upload.yml
│ └── ci-test.yml
├── pull_request_template.md
├── CONTRIBUTING.md
└── CODE_OF_CONDUCT.md
├── .readthedocs.yml
├── LICENSE
├── setup.py
├── .gitignore
├── notebooks
├── Readme.rst
├── pipeline_usage_EN.ipynb
└── pipeline_usage_RU.ipynb
└── Readme.rst
/sklift/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/test_requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-cov
--------------------------------------------------------------------------------
/sklift/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.5.1'
2 |
--------------------------------------------------------------------------------
/docs/tutorials.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../notebooks/Readme.rst
--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit = sklift/tests/*,*__init__.py*
3 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.py linguist-language=python
2 | *.ipynb linguist-documentation
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.rst
2 | recursive-include sklift/datasets/ *.rst
3 | include MANIFEST.in
--------------------------------------------------------------------------------
/sklift/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import check_is_binary
2 |
3 | __all__ = ['check_is_binary']
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx==5.1.1
2 | sphinx-rtd-theme==1.0.0
3 | myst-parser
4 | sphinxcontrib-bibtex
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scikit-learn>=0.21.0
2 | numpy>=1.16
3 | pandas
4 | matplotlib
5 | requests
6 | tqdm
7 |
--------------------------------------------------------------------------------
/docs/_static/sklift-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/sklift-logo.png
--------------------------------------------------------------------------------
/docs/_static/soc_net_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/soc_net_logo.png
--------------------------------------------------------------------------------
/docs/_static/images/SoloModel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/SoloModel.png
--------------------------------------------------------------------------------
/docs/_static/images/memchik_RU.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/memchik_RU.png
--------------------------------------------------------------------------------
/docs/_static/images/sklift_404.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/sklift_404.png
--------------------------------------------------------------------------------
/docs/_static/images/SoloModel_RU.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/SoloModel_RU.png
--------------------------------------------------------------------------------
/docs/_static/images/readme_img1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/readme_img1.png
--------------------------------------------------------------------------------
/docs/_static/sklift-github-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/sklift-github-logo.png
--------------------------------------------------------------------------------
/docs/_static/images/quick_start_wau.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/quick_start_wau.png
--------------------------------------------------------------------------------
/docs/_static/images/x5_table_scheme.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/x5_table_scheme.png
--------------------------------------------------------------------------------
/docs/_static/images/TwoModels_vanila.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/TwoModels_vanila.png
--------------------------------------------------------------------------------
/docs/_static/images/quick_start_qini.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/quick_start_qini.png
--------------------------------------------------------------------------------
/docs/_static/images/quick_start_uplift.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/quick_start_uplift.png
--------------------------------------------------------------------------------
/docs/_static/images/TwoModels_vanila_RU.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/TwoModels_vanila_RU.png
--------------------------------------------------------------------------------
/docs/_static/images/TwoModels_ddr_control.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/TwoModels_ddr_control.png
--------------------------------------------------------------------------------
/docs/_static/css/custom.css:
--------------------------------------------------------------------------------
1 | .wy-side-nav-search, .wy-nav-top {
2 | background: #0062a2;
3 | }
4 |
5 | .wy-breadcrumbs {
6 | font-size: 12px;
7 | }
--------------------------------------------------------------------------------
/docs/_static/images/TwoModels_ddr_control_RU.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/TwoModels_ddr_control_RU.png
--------------------------------------------------------------------------------
/docs/_static/images/user_guide/ug_clients_types.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/user_guide/ug_clients_types.jpg
--------------------------------------------------------------------------------
/docs/_static/images/user_guide/ug_data_collection.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/user_guide/ug_data_collection.gif
--------------------------------------------------------------------------------
/docs/_static/images/user_guide/ug_revert_label_mem.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/user_guide/ug_revert_label_mem.png
--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | addopts = --cache-clear --cov-report html --cov-report xml --cov-report term-missing --cov-config=.coveragerc --cov=sklift --junitxml=pytest.xml
--------------------------------------------------------------------------------
/docs/_static/images/SoloModel_treatment_intercation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/SoloModel_treatment_intercation.png
--------------------------------------------------------------------------------
/docs/_static/images/user_guide/ug_uplift_approaches.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/user_guide/ug_uplift_approaches.png
--------------------------------------------------------------------------------
/docs/_static/images/user_guide/ug_comparison_with_other_models.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maks-sh/scikit-uplift/master/docs/_static/images/user_guide/ug_comparison_with_other_models.png
--------------------------------------------------------------------------------
/sklift/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .models import SoloModel, ClassTransformation, ClassTransformationReg, TwoModels
2 |
3 | __all__ = [SoloModel, ClassTransformation, ClassTransformationReg, TwoModels]
4 |
--------------------------------------------------------------------------------
/docs/api/models/TwoModels.rst:
--------------------------------------------------------------------------------
1 | ********************************
2 | `sklift.models <./>`_.TwoModels
3 | ********************************
4 |
5 | .. autoclass:: sklift.models.models.TwoModels
6 | :members:
--------------------------------------------------------------------------------
/docs/api/metrics/qini_curve.rst:
--------------------------------------------------------------------------------
1 | *****************************************
2 | `sklift.metrics <./>`_.qini_curve
3 | *****************************************
4 |
5 | .. autofunction:: sklift.metrics.metrics.qini_curve
--------------------------------------------------------------------------------
/docs/api/metrics/uplift_at_k.rst:
--------------------------------------------------------------------------------
1 | *****************************************
2 | `sklift.metrics <./>`_.uplift_at_k
3 | *****************************************
4 |
5 | .. autofunction:: sklift.metrics.metrics.uplift_at_k
--------------------------------------------------------------------------------
/docs/api/metrics/uplift_curve.rst:
--------------------------------------------------------------------------------
1 | *****************************************
2 | `sklift.metrics <./>`_.uplift_curve
3 | *****************************************
4 |
5 | .. autofunction:: sklift.metrics.metrics.uplift_curve
--------------------------------------------------------------------------------
/docs/api/datasets/get_data_dir.rst:
--------------------------------------------------------------------------------
1 | *****************************************
2 | `sklift.datasets <./>`_.get_data_dir
3 | *****************************************
4 |
5 | .. autofunction:: sklift.datasets.datasets.get_data_dir
--------------------------------------------------------------------------------
/docs/api/models/SoloModel.rst:
--------------------------------------------------------------------------------
1 | *****************************************
2 | `sklift.models <./>`_.SoloModel
3 | *****************************************
4 |
5 | .. autoclass:: sklift.models.models.SoloModel
6 | :members:
--------------------------------------------------------------------------------
/docs/api/datasets/clear_data_dir.rst:
--------------------------------------------------------------------------------
1 | *****************************************
2 | `sklift.datasets <./>`_.clear_data_dir
3 | *****************************************
4 |
5 | .. autofunction:: sklift.datasets.datasets.clear_data_dir
--------------------------------------------------------------------------------
/docs/api/metrics/qini_auc_score.rst:
--------------------------------------------------------------------------------
1 | *****************************************
2 | `sklift.metrics <./>`_.qini_auc_score
3 | *****************************************
4 |
5 | .. autofunction:: sklift.metrics.metrics.qini_auc_score
--------------------------------------------------------------------------------
/docs/api/viz/plot_qini_curve.rst:
--------------------------------------------------------------------------------
1 | ***********************************************
2 | `sklift.viz <./>`_.plot_qini_curve
3 | ***********************************************
4 |
5 | .. autofunction:: sklift.viz.base.plot_qini_curve
--------------------------------------------------------------------------------
/docs/api/metrics/uplift_auc_score.rst:
--------------------------------------------------------------------------------
1 | *****************************************
2 | `sklift.metrics <./>`_.uplift_auc_score
3 | *****************************************
4 |
5 | .. autofunction:: sklift.metrics.metrics.uplift_auc_score
--------------------------------------------------------------------------------
/docs/api/viz/plot_uplift_curve.rst:
--------------------------------------------------------------------------------
1 | ***********************************************
2 | `sklift.viz <./>`_.plot_uplift_curve
3 | ***********************************************
4 |
5 | .. autofunction:: sklift.viz.base.plot_uplift_curve
--------------------------------------------------------------------------------
/docs/api/viz/plot_uplift_preds.rst:
--------------------------------------------------------------------------------
1 | ***********************************************
2 | `sklift.viz <./>`_.plot_uplift_preds
3 | ***********************************************
4 |
5 | .. autofunction:: sklift.viz.base.plot_uplift_preds
--------------------------------------------------------------------------------
/docs/api/metrics/max_prof_uplift.rst:
--------------------------------------------------------------------------------
1 | **********************************************
2 | `sklift.metrics <./>`_.max_prof_uplift
3 | **********************************************
4 |
5 | .. autofunction:: sklift.metrics.metrics.max_prof_uplift
--------------------------------------------------------------------------------
/docs/api/metrics/perfect_qini_curve.rst:
--------------------------------------------------------------------------------
1 | **********************************************
2 | `sklift.metrics <./>`_.perfect_qini_curve
3 | **********************************************
4 |
5 | .. autofunction:: sklift.metrics.metrics.perfect_qini_curve
--------------------------------------------------------------------------------
/docs/user_guide/introduction/index.rst:
--------------------------------------------------------------------------------
1 | *************
2 | Introduction
3 | *************
4 |
5 | .. toctree::
6 | :maxdepth: 2
7 | :caption: Contents
8 |
9 | ./comparison
10 | ./cate
11 | ./data_collection
12 | ./clients
13 |
--------------------------------------------------------------------------------
/sklift/tests/test_import.py:
--------------------------------------------------------------------------------
1 | try:
2 | from .. import * # noqa
3 | _top_import_error = None
4 | except Exception as e:
5 | _top_import_error = e
6 |
7 |
8 | def test_import_sklift():
9 | assert _top_import_error is None
10 |
--------------------------------------------------------------------------------
/docs/api/metrics/make_uplift_scorer.rst:
--------------------------------------------------------------------------------
1 | **********************************************
2 | `sklift.metrics <./>`_.make_uplift_scorer
3 | **********************************************
4 |
5 | .. autofunction:: sklift.metrics.metrics.make_uplift_scorer
6 |
--------------------------------------------------------------------------------
/docs/api/metrics/perfect_uplift_curve.rst:
--------------------------------------------------------------------------------
1 | **********************************************
2 | `sklift.metrics <./>`_.perfect_uplift_curve
3 | **********************************************
4 |
5 | .. autofunction:: sklift.metrics.metrics.perfect_uplift_curve
--------------------------------------------------------------------------------
/docs/api/metrics/uplift_by_percentile.rst:
--------------------------------------------------------------------------------
1 | *********************************************
2 | `sklift.metrics <./>`_.uplift_by_percentile
3 | *********************************************
4 |
5 | .. autofunction:: sklift.metrics.metrics.uplift_by_percentile
--------------------------------------------------------------------------------
/docs/api/models/ClassTransformation.rst:
--------------------------------------------------------------------------------
1 | *****************************************
2 | `sklift.models <./>`_.ClassTransformation
3 | *****************************************
4 |
5 | .. autoclass:: sklift.models.models.ClassTransformation
6 | :members:
--------------------------------------------------------------------------------
/docs/api/viz/plot_uplift_by_percentile.rst:
--------------------------------------------------------------------------------
1 | ***********************************************
2 | `sklift.viz <./>`_.plot_uplift_by_percentile
3 | ***********************************************
4 |
5 | .. autofunction:: sklift.viz.base.plot_uplift_by_percentile
--------------------------------------------------------------------------------
/docs/api/metrics/treatment_balance_curve.rst:
--------------------------------------------------------------------------------
1 | ***********************************************
2 | `sklift.metrics <./>`_.treatment_balance_curve
3 | ***********************************************
4 |
5 | .. autofunction:: sklift.metrics.metrics.treatment_balance_curve
--------------------------------------------------------------------------------
/docs/api/viz/plot_treatment_balance_curve.rst:
--------------------------------------------------------------------------------
1 | ***********************************************
2 | `sklift.viz <./>`_.plot_treatment_balance_curve
3 | ***********************************************
4 |
5 | .. autofunction:: sklift.viz.base.plot_treatment_balance_curve
--------------------------------------------------------------------------------
/docs/api/metrics/weighted_average_uplift.rst:
--------------------------------------------------------------------------------
1 | *************************************************
2 | `sklift.metrics <./>`_.weighted_average_uplift
3 | *************************************************
4 |
5 | .. autofunction:: sklift.metrics.metrics.weighted_average_uplift
--------------------------------------------------------------------------------
/docs/api/models/ClassTransformationReg.rst:
--------------------------------------------------------------------------------
1 | ********************************************
2 | `sklift.models <./>`_.ClassTransformationReg
3 | ********************************************
4 |
5 | .. autoclass:: sklift.models.models.ClassTransformationReg
6 | :members:
--------------------------------------------------------------------------------
/docs/_templates/footer.html:
--------------------------------------------------------------------------------
1 | {% extends "!footer.html" %}
2 | {%- block extrafooter %}
3 |
If you find a mistake in the docs, please create an issue on github.
4 | {{ super() }}
5 | {% endblock %}
--------------------------------------------------------------------------------
/docs/api/metrics/average_squared_deviation.rst:
--------------------------------------------------------------------------------
1 | *************************************************
2 | `sklift.metrics <./>`_.average_squared_deviation
3 | *************************************************
4 |
5 | .. autofunction:: sklift.metrics.metrics.average_squared_deviation
--------------------------------------------------------------------------------
/docs/api/metrics/response_rate_by_percentile.rst:
--------------------------------------------------------------------------------
1 | ****************************************************
2 | `sklift.metrics <./>`_.response_rate_by_percentile
3 | ****************************************************
4 |
5 | .. autofunction:: sklift.metrics.metrics.response_rate_by_percentile
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/doc-report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: "Documentation"
3 | about: Report an issue related to https://scikit-uplift.readthedocs.io#
4 | ---
5 |
6 | ## 📚 Documentation
7 |
8 |
--------------------------------------------------------------------------------
/docs/api/datasets/index.rst:
--------------------------------------------------------------------------------
1 | ************************
2 | `sklift <../>`_.datasets
3 | ************************
4 |
5 | .. toctree::
6 | :maxdepth: 3
7 |
8 | ./clear_data_dir
9 | ./get_data_dir
10 | ./fetch_lenta
11 | ./fetch_x5
12 | ./fetch_criteo
13 | ./fetch_hillstrom
14 | ./fetch_megafon
--------------------------------------------------------------------------------
/docs/api/viz/index.rst:
--------------------------------------------------------------------------------
1 | **********************
2 | `sklift <../>`_.viz
3 | **********************
4 |
5 | .. toctree::
6 | :maxdepth: 3
7 |
8 | ./plot_uplift_preds
9 | ./plot_qini_curve
10 | ./plot_uplift_curve
11 | ./plot_treatment_balance_curve
12 | ./plot_uplift_by_percentile
13 |
14 |
15 |
--------------------------------------------------------------------------------
/docs/api/models/index.rst:
--------------------------------------------------------------------------------
1 | **********************
2 | `sklift <../>`_.models
3 | **********************
4 |
5 | See :ref:`Models ` section of the User Guide for further details.
6 |
7 | .. toctree::
8 | :maxdepth: 3
9 |
10 | ./SoloModel
11 | ./ClassTransformation
12 | ./ClassTransformationReg
13 | ./TwoModels
--------------------------------------------------------------------------------
/docs/refs.bib:
--------------------------------------------------------------------------------
1 | @inproceedings{Diemert2018,
2 | author = {{Diemert Eustache, Betlei Artem} and Renaudin, Christophe and Massih-Reza, Amini},
3 | title={A Large Scale Benchmark for Uplift Modeling},
4 | publisher = {ACM},
5 | booktitle = {Proceedings of the AdKDD and TargetAd Workshop, KDD, London,United Kingdom, August, 20, 2018},
6 | year = {2018}
7 | }
--------------------------------------------------------------------------------
/sklift/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .datasets import (
2 | get_data_dir,
3 | clear_data_dir,
4 | fetch_x5, fetch_lenta,
5 | fetch_criteo, fetch_hillstrom,
6 | fetch_megafon
7 | )
8 |
9 | __all__ = [
10 | 'get_data_dir',
11 | 'clear_data_dir',
12 | 'fetch_x5', 'fetch_lenta',
13 | 'fetch_criteo', 'fetch_hillstrom',
14 | 'fetch_megafon'
15 | ]
--------------------------------------------------------------------------------
/sklift/viz/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import (
2 | plot_uplift_curve, plot_qini_curve, plot_uplift_preds,
3 | plot_uplift_by_percentile, plot_treatment_balance_curve,
4 | UpliftCurveDisplay
5 | )
6 |
7 | __all__ = [
8 | 'plot_uplift_curve', 'plot_qini_curve', 'plot_uplift_preds',
9 | 'plot_uplift_by_percentile', 'plot_treatment_balance_curve',
10 | 'UpliftCurveDisplay'
11 | ]
12 |
--------------------------------------------------------------------------------
/docs/404.rst:
--------------------------------------------------------------------------------
1 | :orphan:
2 |
3 | *******************
4 | 404 Page Not Found
5 | *******************
6 |
7 | .. image:: _static/images/sklift_404.png
8 | :alt: 404 Page not found
9 | :align: center
10 | :width: 250 px
11 | :height: 250 px
12 |
13 | Sorry, we couldn't find that page.
14 |
15 | Try using the search box or go to the `homepage `__.
16 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: "Feature request"
3 | about: Suggest an idea for improve scikit-uplift
4 | ---
5 |
6 | ## 💡 Feature request
7 |
8 |
9 |
10 | ## Motivation
11 |
12 |
13 |
14 | ## Additional context
15 |
16 |
--------------------------------------------------------------------------------
/docs/user_guide/models/index.rst:
--------------------------------------------------------------------------------
1 | .. _models:
2 |
3 | .. meta::
4 | :description lang=en:
5 | Introduction to approaches for building uplift model with examples
6 | on Python using scikit-uplift (sklift) package.
7 |
8 | ******
9 | Models
10 | ******
11 |
12 | .. toctree::
13 | :maxdepth: 3
14 | :caption: Contents
15 |
16 | ./classification
17 | ./solo_model
18 | ./revert_label
19 | ./transformed_outcome
20 | ./two_models
--------------------------------------------------------------------------------
/docs/api/index.rst:
--------------------------------------------------------------------------------
1 | .. meta::
2 | :description lang=en:
3 | scikit-uplift (sklift) api reference for modeling uplift and evaluate the causal effect of a treatment
4 | in scikit-learn style in python
5 |
6 | ************
7 | API sklift
8 | ************
9 |
10 | This is the modules reference of scikit-uplift.
11 |
12 | .. toctree::
13 | :maxdepth: 3
14 |
15 | ./models/index
16 | ./metrics/index
17 | ./viz/index
18 | ./datasets/index
--------------------------------------------------------------------------------
/docs/install.rst:
--------------------------------------------------------------------------------
1 | *************
2 | Installation
3 | *************
4 |
5 | .. _PyPI: https://pypi.org/project/scikit-uplift/
6 | .. _source: https://github.com/maks-sh/scikit-uplift
7 |
8 | **Install** the package by the following command from PyPI_:
9 |
10 | .. code-block:: bash
11 |
12 | pip install scikit-uplift
13 |
14 | Or install from source_:
15 |
16 | .. code-block:: bash
17 |
18 | git clone https://github.com/maks-sh/scikit-uplift.git
19 | cd scikit-uplift
20 | python setup.py install
--------------------------------------------------------------------------------
/docs/Readme.rst:
--------------------------------------------------------------------------------
1 | .. _uplift-modeling.com: https://www.uplift-modeling.com/en/latest/index.html
2 |
3 | Documentation
4 | ===============
5 |
6 | The full documentation is available at `uplift-modeling.com`_.
7 |
8 | Or you can build the documentation locally using `Sphinx `_ 1.4 or later:
9 |
10 | .. code-block:: bash
11 |
12 | cd docs
13 | pip install -r requirements.txt
14 | make html
15 |
16 | And if you now point your browser to ``_build/html/index.html``, you should see a documentation site.
17 |
--------------------------------------------------------------------------------
/sklift/utils/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def check_is_binary(array):
4 | """Checker if array consists of int or float binary values 0 (0.) and 1 (1.)
5 |
6 | Args:
7 | array (1d array-like): Array to check.
8 | """
9 |
10 | if not np.all(np.unique(array) == np.array([0, 1])):
11 | raise ValueError(f"Input array is not binary. "
12 | f"Array should contain only int or float binary values 0 (or 0.) and 1 (or 1.). "
13 | f"Got values {np.unique(array)}.")
14 |
--------------------------------------------------------------------------------
/docs/api/metrics/index.rst:
--------------------------------------------------------------------------------
1 | ************************
2 | `sklift <../>`_.metrics
3 | ************************
4 |
5 | .. toctree::
6 | :maxdepth: 3
7 |
8 | ./uplift_at_k
9 | ./uplift_curve
10 | ./perfect_uplift_curve
11 | ./uplift_auc_score
12 | ./qini_curve
13 | ./perfect_qini_curve
14 | ./qini_auc_score
15 | ./weighted_average_uplift
16 | ./uplift_by_percentile
17 | ./response_rate_by_percentile
18 | ./treatment_balance_curve
19 | ./average_squared_deviation
20 | ./max_prof_uplift
21 | ./make_uplift_scorer
--------------------------------------------------------------------------------
/docs/_templates/layout.html:
--------------------------------------------------------------------------------
1 | {% extends "!layout.html" %}
2 | {%- block extrahead %}
3 | {{ super() }}
4 |
5 | {% endblock %}
6 | {% block menu %}
7 |
8 |
9 |
10 |
11 | {{ super() }}
12 | {% endblock %}
13 |
14 |
--------------------------------------------------------------------------------
/docs/api/datasets/fetch_hillstrom.rst:
--------------------------------------------------------------------------------
1 | .. _Hillstrom:
2 |
3 | ****************************************
4 | `sklift.datasets <./>`_.fetch_hillstrom
5 | ****************************************
6 |
7 | .. autofunction:: sklift.datasets.datasets.fetch_hillstrom
8 |
9 | .. include:: ../../../sklift/datasets/descr/hillstrom.rst
10 |
11 | About Hillstrom
12 | ##################
13 |
14 | The dataset was provided by Kevin Hillstorm.
15 | Kevin is President of MineThatData, a consultancy that helps CEOs understand the complex relationship between Customers, Advertising, Products, Brands, and Channels.
16 |
17 | Link to the blog: https://blog.minethatdata.com/
--------------------------------------------------------------------------------
/docs/_templates/breadcrumbs.html:
--------------------------------------------------------------------------------
1 | {% extends "!breadcrumbs.html" %}
2 | {% block breadcrumbs_aside %}
3 |
8 | {% endblock %}
--------------------------------------------------------------------------------
/docs/hall_of_fame.rst:
--------------------------------------------------------------------------------
1 | *************
2 | Hall of Fame
3 | *************
4 |
5 | Here are the links to the competitions, names of the winners and to their solutions, where scikit-uplift was used.
6 |
7 | `X5 Retail Hero: Uplift Modeling for Promotional Campaign `_
8 | ========================================================================================================================
9 |
10 | Predict how much the purchase probability could increase as a result of sending an advertising SMS.
11 |
12 | 2. `Kirill Liksakov `_
13 | `solution `_
14 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/sklift/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | from .metrics import (
2 | uplift_curve, perfect_uplift_curve, uplift_auc_score,
3 | qini_curve, perfect_qini_curve, qini_auc_score,
4 | uplift_at_k, response_rate_by_percentile,
5 | weighted_average_uplift, uplift_by_percentile, treatment_balance_curve,
6 | average_squared_deviation, make_uplift_scorer, max_prof_uplift
7 | )
8 |
9 | __all__ = [
10 | 'uplift_curve', 'perfect_uplift_curve', 'uplift_auc_score',
11 | 'qini_curve', 'perfect_qini_curve', 'qini_auc_score',
12 | 'uplift_at_k', 'response_rate_by_percentile',
13 | 'weighted_average_uplift', 'uplift_by_percentile', 'treatment_balance_curve',
14 | 'average_squared_deviation', 'make_uplift_scorer', 'max_prof_uplift'
15 | ]
16 |
--------------------------------------------------------------------------------
/.github/workflows/PyPi_upload.yml:
--------------------------------------------------------------------------------
1 | name: Upload to PyPi
2 |
3 | on:
4 | release:
5 | types: [published]
6 |
7 | jobs:
8 | deploy:
9 |
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | - uses: actions/checkout@v2
14 | - name: Set up Python
15 | uses: actions/setup-python@v2
16 | with:
17 | python-version: '3.x'
18 | - name: Install dependencies
19 | run: |
20 | python -m pip install --upgrade pip
21 | pip install setuptools wheel twine
22 | - name: Build and publish
23 | env:
24 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
25 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
26 | run: |
27 | python setup.py sdist bdist_wheel
28 | twine upload dist/*
29 |
--------------------------------------------------------------------------------
/docs/api/datasets/fetch_lenta.rst:
--------------------------------------------------------------------------------
1 | .. _Lenta:
2 |
3 | ***********************************
4 | `sklift.datasets <./>`_.fetch_lenta
5 | ***********************************
6 |
7 | .. autofunction:: sklift.datasets.datasets.fetch_lenta
8 |
9 | .. include:: ../../../sklift/datasets/descr/lenta.rst
10 |
11 | About Lenta
12 | ##################
13 |
14 | .. figure:: https://upload.wikimedia.org/wikipedia/commons/7/73/Lenta_logo.svg
15 |
16 | `Lenta (Russian: Лентa) `__ is a Russian super - and hypermarket chain. With 149 locations across the country,
17 | it is one of Russia's largest retail chains in addition to being the country's second largest hypermarket chain.
18 |
19 | Link to the company's website: https://www.lenta.com/
--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | # Read the Docs configuration file
2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
3 |
4 | # Required
5 | version: 2
6 |
7 | # Build documentation in the docs/ directory with Sphinx
8 | build:
9 | os: ubuntu-20.04
10 | tools:
11 | python: "3.8"
12 | # jobs:
13 | # pre_build:
14 | # - cp -r notebooks docs/
15 |
16 | # Build documentation in the docs/ directory with Sphinx
17 | sphinx:
18 | builder: html
19 | configuration: docs/conf.py
20 | fail_on_warning: false
21 |
22 | # Optionally build your docs in additional formats such as PDF and ePub
23 | formats:
24 | - htmlzip
25 |
26 | # Optionally set the version of Python and requirements required to build your docs
27 | python:
28 | install:
29 | - requirements: docs/requirements.txt
30 | - requirements: requirements.txt
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: "Bug Report"
3 | about: Submit a bug report to help us improve scikit-uplift
4 | ---
5 |
6 | ## 🐛 Bug
7 |
8 |
9 |
10 | ## To Reproduce
11 |
12 | Steps to reproduce the behavior:
13 |
14 | 1.
15 | 1.
16 | 1.
17 |
18 |
19 |
20 | ## Expected behavior
21 |
22 |
23 |
24 | ## Environment
25 |
26 | - scikit-uplift version (e.g., 0.1.2):
27 | - scikit-learn version (e.g., 0.22.2):
28 | - Python version (e.g., 3.7):
29 | - OS (e.g., Linux):
30 | - Any other relevant information:
31 |
32 | ## Additional context
33 |
34 |
--------------------------------------------------------------------------------
/sklift/datasets/descr/megafon.rst:
--------------------------------------------------------------------------------
1 | MegaFon Uplift Competition Dataset
2 | =====================================
3 |
4 | `Machine learning competition website `_.
5 |
6 | Data description
7 | ################
8 |
9 | The dataset is provided by MegaFon at the MegaFon Uplift Competition hosted in may 2021.
10 |
11 | The dataset contains generated synthetic data, trying to bring them closer to the real case that they encountered.
12 |
13 |
14 |
15 | Fields
16 | ################
17 |
18 | * X_1...X_50: anonymized feature set
19 | * treatment_group (str): treatment/control group flag
20 | * conversion (binary): customer purchasing
21 |
22 | Key figures
23 | ################
24 | * Format: CSV
25 | * Size: 554M
26 | * Rows: 600,000
27 | * Response Ratio: .2
28 | * Treatment Ratio: .5
29 |
30 |
31 |
--------------------------------------------------------------------------------
/docs/api/datasets/fetch_megafon.rst:
--------------------------------------------------------------------------------
1 | .. _Megafon:
2 |
3 | ***************************************
4 | `sklift.datasets <./>`_.fetch_megafon
5 | ***************************************
6 |
7 | .. autofunction:: sklift.datasets.datasets.fetch_megafon
8 |
9 | .. include:: ../../../sklift/datasets/descr/megafon.rst
10 |
11 | About MegaFon
12 | ##################
13 |
14 | .. figure:: https://upload.wikimedia.org/wikipedia/commons/9/9e/MegaFon_logo.svg
15 |
16 | `MegaFon (Russian: МегаФон) `__ , previously known as North-West GSM, is the second largest mobile phone operator and the third largest telecom operator in Russia.
17 | It works in the GSM, UMTS and LTE standard. As of June 2012, the company serves 62.1 million subscribers in Russia and 1.6 million in Tajikistan. It is headquartered in Moscow.
18 |
19 | Link to the company's website: https://megafon.ru/
--------------------------------------------------------------------------------
/docs/api/datasets/fetch_x5.rst:
--------------------------------------------------------------------------------
1 | .. _X5:
2 |
3 | ***********************************
4 | `sklift.datasets <./>`_.fetch_x5
5 | ***********************************
6 |
7 | .. autofunction:: sklift.datasets.datasets.fetch_x5
8 |
9 | .. include:: ../../../sklift/datasets/descr/x5.rst
10 |
11 | About X5
12 | ##################
13 |
14 | .. figure:: https://upload.wikimedia.org/wikipedia/en/8/83/X5_Retail_Group_logo_2015.png
15 |
16 | `X5 Group `__ is a leading Russian food retailer.
17 | The Company operates several retail formats: proximity stores under the Pyaterochka brand,
18 | supermarkets under the Perekrestok brand and hypermarkets under the Karusel brand, as well as the Perekrestok.ru online market,
19 | the 5Post parcel and Dostavka.Pyaterochka and Perekrestok. Bystro food delivery services.
20 |
21 | Link to the company's website: https://www.x5.ru/
--------------------------------------------------------------------------------
/docs/api/datasets/fetch_criteo.rst:
--------------------------------------------------------------------------------
1 | .. _Criteo:
2 |
3 | **************************************
4 | `sklift.datasets <./>`_.fetch_criteo
5 | **************************************
6 |
7 | .. autofunction:: sklift.datasets.datasets.fetch_criteo
8 |
9 | .. include:: ../../../sklift/datasets/descr/criteo.rst
10 |
11 | About Criteo
12 | ##################
13 |
14 | .. figure:: https://upload.wikimedia.org/wikipedia/commons/d/d2/Criteo_logo21.svg
15 |
16 | `Criteo `__ is an advertising company that provides online display advertisements.
17 | The company was founded and is headquartered in Paris, France. Criteo's product is a form of display advertising,
18 | which displays interactive banner advertisements, generated based on the online browsing preferences and behaviour for each customer.
19 | The solution operates on a pay per click/cost per click (CPC) basis.
20 |
21 | Link to the company's website: https://www.criteo.com/
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Maksim Shevchenko
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/sklift/datasets/descr/x5.rst:
--------------------------------------------------------------------------------
1 | X5 RetailHero Uplift Modeling Dataset
2 | =====================================
3 |
4 | The dataset is provided by X5 Retail Group at the RetailHero hackaton hosted in winter 2019.
5 |
6 | The dataset contains raw retail customer purchases, raw information about products and general info about customers.
7 |
8 |
9 | `Machine learning competition website `_.
10 |
11 | Data description
12 | ################
13 |
14 | Data contains several parts:
15 |
16 | * train.csv: a subset of clients for training. The column *treatment_flg* indicates if there was a communication. The column *target* shows if there was a purchase afterward;
17 | * clients.csv: general info about clients;
18 | * purchases.csv: clients’ purchase history prior to communication.
19 |
20 | .. image:: ../../_static/images/x5_table_scheme.png
21 | :alt: X5 table schema
22 |
23 | Fields
24 | ################
25 |
26 | * treatment_flg (binary): information on performed communication
27 | * target (binary): customer purchasing
28 |
29 | Key figures
30 | ################
31 |
32 | * Format: CSV
33 | * Size: 647M (compressed) 4.17GB (uncompressed)
34 | * Rows:
35 |
36 | * in 'clients.csv': 400,162
37 | * in 'purchases.csv': 45,786,568
38 | * in 'uplift_train.csv': 200,039
39 |
40 | * Response Ratio: .62
41 | * Treatment Ratio: .5
42 |
43 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: "Pull request"
3 | about: Make changes in scikit-uplift
4 | ---
5 |
6 | ## 📑 Description of the Change
7 |
8 |
13 |
14 | ## Verification Process
15 |
16 |
24 |
25 | ## Release Notes
26 |
27 |
42 |
43 | ## Additional info
44 |
45 |
--------------------------------------------------------------------------------
/docs/user_guide/index.rst:
--------------------------------------------------------------------------------
1 | .. _user_guide:
2 |
3 | **********
4 | User Guide
5 | **********
6 |
7 | .. image:: https://habrastorage.org/webt/hf/7i/nu/hf7inuu3agtnwl1yo0g--mznzno.jpeg
8 | :alt: Cover of User Guide for uplift modeling and causal inference
9 |
10 | Uplift modeling estimates the effect of communication action on some customer outcomes and gives an opportunity to efficiently target customers which are most likely to respond to a marketing campaign.
11 | It is relatively easy to implement, but surprisingly poorly covered in the machine learning courses and literature.
12 | This guide is going to shed some light on the essentials of causal inference estimating and uplift modeling.
13 |
14 | .. toctree::
15 | :maxdepth: 3
16 | :caption: Contents
17 |
18 | ./introduction/index
19 | ./models/index
20 |
21 | Credits
22 | --------
23 |
24 | **Authors:**
25 |
26 | - `Irina Elisova `_
27 | - `Maksim Shevchenko `_
28 |
29 | **Acknowledgements:**
30 |
31 | - `Kirill Liksakov `_ - uplift metrics research
32 | - `Alina Zhukova `_ - artwork: User Guide cover and key pictures
33 |
34 | Citations
35 | ----------
36 |
37 | If you find this User Guide useful for your research, please consider citing:
38 |
39 | .. code:: latex
40 |
41 | @misc{user-guide-for-uplift-modeling,
42 | author = {Maksim Shevchenko, Irina Elisova},
43 | title = {User Guide for uplift modeling and casual inference},
44 | year = {2020},
45 | publisher = {GitHub},
46 | journal = {GitHub repository},
47 | howpublished = {\url{https://www.uplift-modeling.com/en/latest/user_guide/index.html}}
48 | }
--------------------------------------------------------------------------------
/sklift/datasets/descr/criteo.rst:
--------------------------------------------------------------------------------
1 | Criteo Uplift Modeling Dataset
2 | ================================
3 | This is a copy of `Criteo AI Lab Uplift Prediction dataset `_.
4 |
5 | Data description
6 | ################
7 |
8 | This dataset is constructed by assembling data resulting from several incrementality tests, a particular randomized trial procedure where a random part of the population is prevented from being targeted by advertising.
9 |
10 |
11 | Fields
12 | ################
13 |
14 | Here is a detailed description of the fields (they are comma-separated in the file):
15 |
16 | * **f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11**: feature values (dense, float)
17 | * **treatment**: treatment group. Flag if a company participates in the RTB auction for a particular user (binary: 1 = treated, 0 = control)
18 | * **exposure**: treatment effect, whether the user has been effectively exposed. Flag if a company wins in the RTB auction for the user (binary)
19 | * **conversion**: whether a conversion occured for this user (binary, label)
20 | * **visit**: whether a visit occured for this user (binary, label)
21 |
22 |
23 | Key figures
24 | ################
25 | * Format: CSV
26 | * Size: 297M (compressed) 3,2GB (uncompressed)
27 | * Rows: 13,979,592
28 | * Response Ratio:
29 |
30 | * Average `Visit` Rate: .046992
31 | * Average `Conversion` Rate: .00292
32 |
33 | * Treatment Ratio: .85
34 |
35 |
36 |
37 | This dataset is released along with the paper:
38 | “*A Large Scale Benchmark for Uplift Modeling*"
39 | Eustache Diemert, Artem Betlei, Christophe Renaudin; (Criteo AI Lab), Massih-Reza Amini (LIG, Grenoble INP)
40 | This work was published in: `AdKDD 2018 `_ Workshop, in conjunction with KDD 2018.
41 |
--------------------------------------------------------------------------------
/docs/user_guide/models/classification.rst:
--------------------------------------------------------------------------------
1 | ***********************
2 | Approach classification
3 | ***********************
4 |
5 | Uplift modeling techniques can be grouped into :guilabel:`data preprocessing` and :guilabel:`data processing` approaches.
6 |
7 | .. image:: ../../_static/images/user_guide/ug_uplift_approaches.png
8 | :align: center
9 | :alt: Classification of uplift modeling techniques: data preprocessing and data processing
10 |
11 | Data preprocessing
12 | ====================
13 |
14 | In the :guilabel:`preprocessing` approaches, existing out-of-the-box learning methods are used, after pre- or post-processing of the data and outcomes.
15 |
16 | A popular and generic data preprocessing approach is :ref:`the flipped label approach `, also called class transformation approach.
17 |
18 | Other data preprocessing approaches extend the set of predictor variables to allow for the estimation of uplift. An example is :ref:`the single model with treatment as feature `.
19 |
20 | Data processing
21 | ====================
22 |
23 | In the :guilabel:`data processing` approaches, new learning methods and methodologies are developed that aim to optimize expected uplift more directly.
24 |
25 | Data processing techniques include two categories: :guilabel:`indirect` and :guilabel:`direct` estimation approaches.
26 |
27 | :guilabel:`Indirect` estimation approaches include :ref:`the two-model model approach `.
28 |
29 | :guilabel:`Direct` estimation approaches are typically adaptations from decision tree algorithms. The adoptions include modified the splitting criteria and dedicated pruning techniques.
30 |
31 | References
32 | ==========
33 |
34 | 1️⃣ Devriendt, Floris, Tias Guns and Wouter Verbeke. “Learning to rank for uplift modeling.” ArXiv abs/2002.05897 (2020): n. pag.
35 |
--------------------------------------------------------------------------------
/docs/user_guide/models/transformed_outcome.rst:
--------------------------------------------------------------------------------
1 | .. _ClassTransformationReg:
2 |
3 | ********************
4 | Transformed Outcome
5 | ********************
6 |
7 | Let's redefine target variable, which indicates that treatment make some impact on target or
8 | did target is negative without treatment:
9 |
10 | .. math::
11 | Z = Y * \frac{(W - p)}{(p * (1 - p))}
12 |
13 | * :math:`Y` - target vector,
14 | * :math:`W` - vector of binary communication flags, and
15 | * :math:`p` is a *propensity score* (the probabilty that each :math:`y_i` is assigned to the treatment group.).
16 |
17 | It is important to note here that it is possible to estimate :math:`p` as the proportion of objects with :math:`W = 1`
18 | in the sample. Or use the method from [2], in which it is proposed to evaluate math:`p` as a function of :math:`X` by
19 | training the classifier on the available data :math:`X = x`, and taking the communication flag vector math:`W` as
20 | the target variable.
21 |
22 | .. image:: https://habrastorage.org/r/w1560/webt/35/d2/z_/35d2z_-3yhyqhwtw-mt-npws6xk.png
23 | :align: center
24 | :alt: Transformation of the target in Transformed Outcome approach
25 |
26 | After applying the formula, we get a new target variable :math:`Z_i` and can train a regression model with the error
27 | functional :math:`MSE= \frac{1}{n}\sum_{i=0}^{n} (Z_i - \hat{Z_i})^2`. Since it is precisely when using MSE that the
28 | predictions of the model are the conditional mathematical expectation of the target variable.
29 |
30 | It can be proved that the conditional expectation of the transformed target :math:`Z_i` is the desired causal effect:
31 |
32 | .. math::
33 | E[Z_i| X_i = x] = Y_i^1 - Y_i^0 = \tau_i
34 |
35 | .. hint::
36 | In sklift this approach corresponds to the :class:`.ClassTransformationReg` class.
37 |
38 | References
39 | ==========
40 |
41 | 1️⃣ Susan Athey and Guido W Imbens. Machine learning methods for estimating heterogeneouscausal effects. stat, 1050:5, 2015.
42 |
43 | 2️⃣ P. Richard Hahn, Jared S. Murray, and Carlos Carvalho. Bayesian regression tree models for causal inference: regularization, confounding, and heterogeneous effects. 2019.
--------------------------------------------------------------------------------
/docs/user_guide/introduction/data_collection.rst:
--------------------------------------------------------------------------------
1 | **********************
2 | Data collection
3 | **********************
4 |
5 | We need to evaluate a difference between two events that are mutually exclusive for a particular customer (either we communicate with a person, or we don't; you can't do both actions at the same time). This is why there are additional requirements for collecting data when building an uplift model.
6 |
7 | There are few additional steps different from a standard data collection procedure. You should run an experiment:
8 |
9 | 1. Randomly divide a representative part of the customer base into a treatment (receiving communication) and a control (receiving no communication) groups;
10 | 2. Evaluate the marketing experiment for the treatment group.
11 |
12 | Data collected from the marketing experiment consists of the customer's responses to the marketing offer (target).
13 |
14 | The only difference between the experiment and the future uplift model's campaign is a fact that in the first case we choose random customers to make a promotion. In the second case, the choice of a customer to communicate with is based on the predicted value returned by the uplift model. If the marketing campaign significantly differs from the experiment used to collect data, the model will be less accurate.
15 |
16 | There is a trick: before running the marketing campaign, it is recommended to randomly subset a small part of the customer base and divide it into a control and a treatment group again, similar to the previous experiment. Using this data, you will not only be able to accurately evaluate the effectiveness of the campaign but also collect additional data for a further model retraining.
17 |
18 | .. image:: ../../_static/images/user_guide/ug_data_collection.gif
19 | :alt: Animation: Design of a train data collection experiment for uplift modeling
20 |
21 | It is recommended to configure a development of the uplift model and the campaign launch as an iterative process: each iteration will collect new training data. It should consist of a mix of a random customer subset and customers selected by the model.
22 |
23 | References
24 | ==========
25 |
26 | 1️⃣ Verbeke, Wouter & Baesens, Bart & Bravo, Cristián. (2018). Profit Driven Business Analytics: A Practitioner's Guide to Transforming Big Data into Added Value.
--------------------------------------------------------------------------------
/docs/user_guide/introduction/comparison.rst:
--------------------------------------------------------------------------------
1 | ****************************
2 | Uplift vs other models
3 | ****************************
4 |
5 | Companies use various channels to promote a product to a customer: it can be SMS, push notification, chatbot message in social networks, and many others.
6 | There are several ways to use machine learning to select customers for a marketing campaign:
7 |
8 | .. image:: ../../_static/images/user_guide/ug_comparison_with_other_models.png
9 | :alt: Comparison with other models
10 |
11 | - :guilabel:`The Look-alike model` (or Positive Unlabeled Learning) evaluates a probability that the customer is going to accomplish a target action. A training dataset contains known positive objects (for instance, users who have installed an app) and random negative objects (a random subset of all other customers who have not installed the app). The model searches for customers who are similar to those who made the target action.
12 | - :guilabel:`The Response model` evaluates the probability that the customer is going to accomplish the target action if there was a communication (a.k.a treatment). In this case, the training dataset is data collected after some interaction with the customers. In contrast to the first approach, we have confirmed positive and negative observations at our disposal (for instance, the customer who decides to issue a credit card or to decline an offer).
13 | - :guilabel:`The Uplift model` evaluates the net effect of communication by trying to select only those customers who are going to perform the target action only when there is some advertising exposure presenting to them. The model predicts a difference between the customer's behavior when there is a treatment (communication) and when there is no treatment (no communication).
14 |
15 | When should we use uplift modeling?
16 |
17 | Uplift modeling is used when the customer's target action is likely to happen without any communication.
18 | For instance, we want to promote a popular product but we don't want to spend our marketing budget on customers who will buy the product anyway with or without communication.
19 | If the product is not popular and it has to be promoted to be bought, then a task turns to the response modeling task.
20 |
21 | References
22 | ==========
23 |
24 | 1️⃣ Radcliffe, N.J. (2007). Using control groups to target on predicted lift: Building and assessing uplift model. Direct Market J Direct Market Assoc Anal Council, 1:14–21, 2007.
--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to scikit-uplift
2 |
3 | First off, thanks for taking the time to contribute! 🙌👍🎉
4 |
5 | All development is done on GitHub: https://github.com/maks-sh/scikit-uplift.
6 |
7 | ## Submitting a bug report or a feature request
8 |
9 | We use GitHub issues to track all bugs and feature requests.
10 | Feel free to open an issue if you have found a bug or wish to see a feature implemented at https://github.com/maks-sh/scikit-uplift/issues.
11 |
12 | ## Contributing code
13 |
14 | ### How to contribute
15 |
16 | The code in the master branch should meet the current release.
17 | So, please make a pull request to the ``dev`` branch.
18 |
19 | 1. Fork the [project repository](https://github.com/maks-sh/scikit-uplift).
20 | 2. Clone your fork of the scikit-uplift repo from your GitHub account to your local disk:
21 | ``` bash
22 | $ git clone https://github.com/YourName/scikit-uplift
23 | $ cd scikit-uplift
24 | ```
25 | 3. Add the upstream remote. This saves a reference to the main scikit-uplift repository, which you can use to keep your repository synchronized with the latest changes:
26 | ``` bash
27 | $ git remote add upstream https://github.com/maks-sh/scikit-uplift.git
28 | ```
29 | 4. Synchronize your ``dev`` branch with the upstream ``dev`` branch:
30 | ``` bash
31 | $ git checkout dev
32 | $ git pull upstream dev
33 | ```
34 | 5. Create a feature branch to hold your development changes:
35 | ``` bash
36 | $ git checkout -b feature/my_new_feature
37 | ```
38 | and start making changes. Always use a feature branch. It’s a good practice.
39 | 6. Develop the feature on your feature branch on your computer, using Git to do the version control. When you’re done editing, add changed files using ``git add .`` and then ``git commit``
40 | Then push the changes to your GitHub account with:
41 |
42 | ``` bash
43 | $ git push -u origin feature/my_new_feature
44 | ```
45 | 7. Create a pull request from your fork into ``dev`` branch.
46 |
47 | ### Styleguides
48 |
49 | #### Python
50 |
51 | We follow the PEP8 style guide for Python. Docstrings follow google style.
52 |
53 | #### Git Commit Messages
54 |
55 | * Use the present tense ("Add feature" not "Added feature")
56 | * Use the imperative mood ("Move cursor to..." not "Moves cursor to...")
57 | * Limit the first line to 72 characters or less
58 | * Reference issues and pull requests liberally after the first line
59 |
--------------------------------------------------------------------------------
/sklift/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import itertools
2 | from collections import defaultdict
3 |
4 | import numpy as np
5 | import pandas as pd
6 | import pytest
7 |
8 | n_vals = (100, 1000)
9 | k_vals = (1, 5)
10 | np_types = (np.int32, np.float32, np.float64)
11 | dataset_types = ('numpy', 'pandas')
12 |
13 |
14 | @pytest.fixture
15 | def sensitive_classification_dataset():
16 | df = pd.DataFrame(
17 | {
18 | "x1": [1, 0, 1, 0, 1, 0, 1, 1],
19 | "x2": [0, 0, 0, 0, 0, 1, 1, 1],
20 | "y": [1, 1, 1, 0, 1, 0, 0, 0],
21 | "treat": [1, 1, 1, 1, 0, 0, 0, 1]
22 | }
23 | )
24 |
25 | return df[["x1", "x2"]], df["y"], df["treat"]
26 |
27 |
28 | @pytest.fixture(
29 | scope="module", params=[_ for _ in itertools.product(n_vals, k_vals, np_types, dataset_types)]
30 | )
31 | def random_xy_dataset_regr(request):
32 | n, k, np_type, dataset_type = request.param
33 | np.random.seed(42)
34 | X = np.random.normal(0, 2, (n, k)).astype(np_type)
35 | y = np.random.normal(0, 2, (n,))
36 | treat = (np.random.normal(0, 2, (n,)) > 0.0).astype(int)
37 | if dataset_type == 'numpy':
38 | return X, y, treat
39 | return pd.DataFrame(X), pd.Series(y), pd.Series(treat)
40 |
41 |
42 | @pytest.fixture(
43 | scope="module", params=[_ for _ in itertools.product(n_vals, k_vals, np_types, dataset_types)]
44 | )
45 | def random_xyt_dataset_clf(request):
46 | n, k, np_type, dataset_type = request.param
47 | X, y, treat = None, None, None
48 | mean_target_ctrl, mean_target_trmnt = 0, 0
49 | """
50 | The main rule for creating a random dataset is that
51 | the average conversions in the control and experimental groups
52 | should not be equal to 0 or 1.
53 | """
54 | while ((mean_target_ctrl == 0) or (mean_target_ctrl == 1) or
55 | (mean_target_trmnt == 0) or (mean_target_trmnt == 1)):
56 | np.random.seed(42)
57 | X = np.random.normal(0, 2, (n, k)).astype(np_type)
58 | y = (np.random.normal(0, 2, (n,)) > 0.0).astype(int)
59 | treat = (np.random.normal(0, 2, (n,)) > 0.0).astype(int)
60 | dd = defaultdict(list)
61 | for key, val in zip(treat, y):
62 | dd[key].append(val)
63 | mean_target_ctrl = np.mean(dd[0])
64 | mean_target_trmnt = np.mean(dd[1])
65 |
66 | if dataset_type == 'numpy':
67 | return X, y, treat
68 | return pd.DataFrame(X), pd.Series(y), pd.Series(treat)
69 |
70 |
--------------------------------------------------------------------------------
/sklift/datasets/descr/hillstrom.rst:
--------------------------------------------------------------------------------
1 | Kevin Hillstrom Dataset: MineThatData
2 | =====================================
3 |
4 | Data description
5 | ################
6 |
7 | This is a copy of `MineThatData E-Mail Analytics And Data Mining Challenge dataset `_.
8 |
9 | This dataset contains 64,000 customers who last purchased within twelve months.
10 | The customers were involved in an e-mail test.
11 |
12 | * 1/3 were randomly chosen to receive an e-mail campaign featuring Mens merchandise.
13 | * 1/3 were randomly chosen to receive an e-mail campaign featuring Womens merchandise.
14 | * 1/3 were randomly chosen to not receive an e-mail campaign.
15 |
16 | During a period of two weeks following the e-mail campaign, results were tracked.
17 | Your job is to tell the world if the Mens or Womens e-mail campaign was successful.
18 |
19 | Fields
20 | ################
21 |
22 | Historical customer attributes at your disposal include:
23 |
24 | * Recency: Months since last purchase.
25 | * History_Segment: Categorization of dollars spent in the past year.
26 | * History: Actual dollar value spent in the past year.
27 | * Mens: 1/0 indicator, 1 = customer purchased Mens merchandise in the past year.
28 | * Womens: 1/0 indicator, 1 = customer purchased Womens merchandise in the past year.
29 | * Zip_Code: Classifies zip code as Urban, Suburban, or Rural.
30 | * Newbie: 1/0 indicator, 1 = New customer in the past twelve months.
31 | * Channel: Describes the channels the customer purchased from in the past year.
32 |
33 | Another variable describes the e-mail campaign the customer received:
34 |
35 | * Segment
36 |
37 | * Mens E-Mail
38 | * Womens E-Mail
39 | * No E-Mail
40 |
41 | Finally, we have a series of variables describing activity in the two weeks following delivery of the e-mail campaign:
42 |
43 | * Visit: 1/0 indicator, 1 = Customer visited website in the following two weeks.
44 | * Conversion: 1/0 indicator, 1 = Customer purchased merchandise in the following two weeks.
45 | * Spend: Actual dollars spent in the following two weeks.
46 |
47 | Key figures
48 | ################
49 |
50 | * Format: CSV
51 | * Size: 433KB (compressed) 4,935KB (uncompressed)
52 | * Rows: 64,000
53 | * Response Ratio:
54 |
55 | * Average `visit` Rate: .15,
56 | * Average `conversion` Rate: .009,
57 | * the values in the `spend` column are unevenly distributed from 0.0 to 499.0
58 |
59 | * Treatment Ratio: The parts are distributed evenly between the *three* classes
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | import sys
4 | from shutil import rmtree
5 |
6 | from setuptools import Command, find_packages, setup
7 |
8 | # Package meta-data.
9 | NAME = "scikit-uplift"
10 | DESCRIPTION = "Classic approaches of Uplift modelling in scikit-learn style in python"
11 | MAINTAINER = 'Maksim Shevchenko'
12 | URL = "https://github.com/maks-sh/scikit-uplift"
13 | REQUIRES_PYTHON = ">=3.4.0"
14 |
15 | here = os.path.abspath(os.path.dirname(__file__))
16 |
17 | with open(os.path.join(here, 'Readme.rst'), encoding="utf-8") as f:
18 | LONG_DESCRIPTION = f.read()
19 |
20 | # What packages are required for this module to be executed?
21 | try:
22 | with open(os.path.join(here, "requirements.txt"), encoding="utf-8") as f:
23 | REQUIRED = f.read().split("\n")
24 | except FileNotFoundError:
25 | REQUIRED = []
26 |
27 | # What packages are optional?
28 | EXTRAS = {"test": ["pytest", "pytest-cov"]}
29 |
30 |
31 | def get_version():
32 | version_file = os.path.join(here, "sklift", "__init__.py")
33 | with open(version_file, encoding="utf-8") as f:
34 | return re.search(r'^__version__ = [\'"]([^\'"]*)[\'"]', f.read(), re.M).group(1)
35 |
36 |
37 | def get_test_requirements():
38 | pass
39 |
40 |
41 | class UploadCommand(Command):
42 | """Support setup.py upload."""
43 |
44 | description = "Build and publish the package."
45 | user_options = []
46 |
47 | @staticmethod
48 | def status(s):
49 | """Print things in bold."""
50 | print(s)
51 |
52 | def initialize_options(self):
53 | pass
54 |
55 | def finalize_options(self):
56 | pass
57 |
58 | def run(self):
59 | try:
60 | self.status("Removing previous builds...")
61 | rmtree(os.path.join(here, "dist"))
62 | except OSError:
63 | pass
64 |
65 | self.status("Building Source and Wheel (universal) distribution...")
66 | os.system("{0} setup.py sdist bdist_wheel --universal".format(sys.executable))
67 |
68 | sys.exit()
69 |
70 |
71 | setup(
72 | name=NAME,
73 | version=get_version(),
74 | description=DESCRIPTION,
75 | long_description=LONG_DESCRIPTION,
76 | long_description_content_type="text/x-rst",
77 | maintainer=MAINTAINER,
78 | url=URL,
79 | packages=find_packages(exclude=["tests", "docs", "images"]),
80 | include_package_data=True,
81 | install_requires=REQUIRED,
82 | extras_require=EXTRAS,
83 | classifiers=[
84 | "Programming Language :: Python :: 3",
85 | "Operating System :: OS Independent",
86 | ],
87 | cmdclass={"upload": UploadCommand},
88 | )
89 |
--------------------------------------------------------------------------------
/docs/contributing.md:
--------------------------------------------------------------------------------
1 | # Contributing to scikit-uplift
2 |
3 | First off, thanks for taking the time to contribute! 🙌👍🎉
4 |
5 | All development is done on GitHub: [https://github.com/maks-sh/scikit-uplift](https://github.com/maks-sh/scikit-uplift).
6 |
7 | ## Submitting a bug report or a feature request
8 |
9 | We use GitHub issues to track all bugs and feature requests.
10 | Feel free to open an issue if you have found a bug or wish to see a feature implemented at [https://github.com/maks-sh/scikit-uplift/issues](https://github.com/maks-sh/scikit-uplift/issues).
11 |
12 | ## Contributing code
13 |
14 | ### How to contribute
15 |
16 | The code in the master branch should meet the current release.
17 | So, please make a pull request to the ``dev`` branch.
18 |
19 | 1. Fork the [project repository](https://github.com/maks-sh/scikit-uplift).
20 | 2. Clone your fork of the scikit-uplift repo from your GitHub account to your local disk:
21 | ``` bash
22 | $ git clone https://github.com/YourName/scikit-uplift
23 | $ cd scikit-uplift
24 | ```
25 | 3. Add the upstream remote. This saves a reference to the main scikit-uplift repository, which you can use to keep your repository synchronized with the latest changes:
26 | ``` bash
27 | $ git remote add upstream https://github.com/maks-sh/scikit-uplift.git
28 | ```
29 | 4. Synchronize your ``dev`` branch with the upstream ``dev`` branch:
30 | ``` bash
31 | $ git checkout dev
32 | $ git pull upstream dev
33 | ```
34 | 5. Create a feature branch to hold your development changes:
35 | ``` bash
36 | $ git checkout -b feature/my_new_feature
37 | ```
38 | and start making changes. Always use a feature branch. It’s a good practice.
39 | 6. Develop the feature on your feature branch on your computer, using Git to do the version control. When you’re done editing, add changed files using ``git add .`` and then ``git commit``
40 | Then push the changes to your GitHub account with:
41 |
42 | ``` bash
43 | $ git push -u origin feature/my_new_feature
44 | ```
45 | 7. Create a pull request from your fork into ``dev`` branch.
46 |
47 | ### Styleguides
48 |
49 | #### Python
50 |
51 | We follow the PEP8 style guide for Python. Docstrings follow [google style](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html).
52 |
53 | #### Git Commit Messages
54 |
55 | * Use the present tense ("Add feature" not "Added feature")
56 | * Use the imperative mood ("Move file to..." not "Moves file to...")
57 | * Limit the first line to 72 characters or less
58 | * Reference issues and pull requests liberally after the first line
59 | * If you want to use emojis, use them at the beginning of the line.
--------------------------------------------------------------------------------
/docs/user_guide/introduction/clients.rst:
--------------------------------------------------------------------------------
1 | ******************************************
2 | Types of customers
3 | ******************************************
4 |
5 | We can determine 4 types of customers based on a response to treatment:
6 |
7 | .. image:: ../../_static/images/user_guide/ug_clients_types.jpg
8 | :alt: Classification of customers based on their response to a treatment
9 | :width: 268 px
10 | :height: 282 px
11 | :align: center
12 |
13 | - :guilabel:`Do-Not-Disturbs` *(a.k.a. Sleeping-dogs)* have a strong negative response to marketing communication. They are going to purchase if *NOT* treated and will *NOT* purchase *IF* treated. It is not only a wasted marketing budget but also a negative impact. For instance, customers targeted could result in rejecting current products or services. In terms of math: :math:`W_i = 1, Y_i = 0` or :math:`W_i = 0, Y_i = 1`.
14 | - :guilabel:`Lost Causes` will *NOT* purchase the product *NO MATTER* they are contacted or not. The marketing budget in this case is also wasted because it has no effect. In terms of math: :math:`W_i = 1, Y_i = 0` or :math:`W_i = 0, Y_i = 0`.
15 | - :guilabel:`Sure Things` will purchase *ANYWAY* no matter they are contacted or not. There is no motivation to spend the budget because it also has no effect. In terms of math: :math:`W_i = 1, Y_i = 1` or :math:`W_i = 0, Y_i = 1`.
16 | - :guilabel:`Persuadables` will always respond *POSITIVE* to marketing communication. They are going to purchase *ONLY* if contacted (or sometimes they purchase *MORE* or *EARLIER* only if contacted). This customer's type should be the only target for the marketing campaign. In terms of math: :math:`W_i = 0, Y_i = 0` or :math:`W_i = 1, Y_i = 1`.
17 |
18 | Because we can't communicate and not communicate with the customer at the same time, we will never be able to observe exactly which type a particular customer belongs to.
19 |
20 | Depends on the product characteristics and the customer base structure some types may be absent. In addition, a customer response depends heavily on various characteristics of the campaign, such as a communication channel or a type and a size of the marketing offer. To maximize profit, these parameters should be selected.
21 |
22 | Thus, when predicting uplift score and selecting a segment by the highest score, we are trying to find the only one type: **persuadables**.
23 |
24 | References
25 | ==========
26 |
27 | 1️⃣ Kane, K., V. S. Y. Lo, and J. Zheng. Mining for the Truly Responsive Customers and Prospects Using True-Lift Modeling: Comparison of New and Existing Methods. Journal of Marketing Analytics 2 (4): 218–238. 2014.
28 |
29 | 2️⃣ Verbeke, Wouter & Baesens, Bart & Bravo, Cristián. (2018). Profit Driven Business Analytics: A Practitioner's Guide to Transforming Big Data into Added Value.
--------------------------------------------------------------------------------
/docs/user_guide/models/solo_model.rst:
--------------------------------------------------------------------------------
1 | .. _SoloModel:
2 |
3 | *********************************
4 | Single model approaches
5 | *********************************
6 |
7 | Single model with treatment as feature
8 | ========================================
9 |
10 | The most intuitive and simple uplift modeling technique. A training set consists of two groups: treatment samples and control samples. There is also a binary treatment flag added as a feature to the training set. After the model is trained, at the scoring time it is going to be applied twice:
11 | with the treatment flag equals `1` and with the treatment flag equals `0`. Subtracting these model's outcomes for each test sample, we will get an estimate of the uplift.
12 |
13 | .. image:: ../../_static/images/SoloModel.png
14 | :align: center
15 | :alt: Solo model dummy method
16 |
17 | .. hint::
18 | In sklift this approach corresponds to the :class:`.SoloModel` class and the **dummy** method.
19 |
20 | Treatment interaction
21 | =========================
22 |
23 | The single model approach has various modifications. For instance, we can update the number of attributes in the training set by adding
24 | the product of each attribute and the treatment flag:
25 |
26 | .. image:: ../../_static/images/SoloModel_treatment_intercation.png
27 | :align: center
28 | :alt: Solo model treatment interaction method
29 |
30 | .. hint::
31 | In sklift this approach corresponds to the :class:`.SoloModel` class and the **treatment_interaction** method.
32 |
33 |
34 |
35 | References
36 | ==========
37 |
38 | 1️⃣ Lo, Victor. (2002). The True Lift Model - A Novel Data Mining Approach to Response Modeling in Database Marketing. SIGKDD Explorations. 4. 78-86.
39 |
40 | Examples using ``sklift.models.SoloModel``
41 | ============================================
42 |
43 | .. |Open In Colab1| image:: https://colab.research.google.com/assets/colab-badge.svg
44 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero_EN.ipynb
45 | .. |Open In Colab2| image:: https://colab.research.google.com/assets/colab-badge.svg
46 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero.ipynb
47 |
48 | 1. The overview of the basic approaches to solving the Uplift Modeling problem
49 |
50 | .. list-table::
51 | :align: center
52 | :widths: 12 15 10 8
53 |
54 | * - In English 🇬🇧
55 | - |Open In Colab1|
56 | - `nbviewer `__
57 | - `github `__
58 | * - In Russian 🇷🇺
59 | - |Open In Colab2|
60 | - `nbviewer `__
61 | - `github `__
--------------------------------------------------------------------------------
/docs/user_guide/introduction/cate.rst:
--------------------------------------------------------------------------------
1 | ******************************************
2 | Causal Inference: Basics
3 | ******************************************
4 |
5 | In a perfect world, we want to calculate a difference in a person's reaction received communication, and the reaction without receiving any communication.
6 | But there is a problem: we can not make a communication (send an e-mail) and do not make a communication (no e-mail) at the same time.
7 |
8 | .. image:: https://habrastorage.org/webt/fl/fi/dz/flfidz416o7of5j0nmgdjqqkzfe.jpeg
9 | :alt: Joke about Schrodinger's cat
10 | :align: center
11 |
12 | Denoting :math:`Y_i^1` person :math:`i`’s outcome when receives the treatment (a presence of the communication) and :math:`Y_i^0` :math:`i`’s outcome when he receives no treatment (control, no communication), the :guilabel:`causal effect` :math:`\tau_i` of the treatment *vis-a-vis* no treatment is given by:
13 |
14 | .. math::
15 | \tau_i = Y_i^1 - Y_i^0
16 |
17 | Researchers are typically interested in estimating the :guilabel:`Conditional Average Treatment Effect` (CATE), that is, the expected causal effect of the treatment for a subgroup in the population:
18 |
19 | .. math::
20 | CATE = E[Y_i^1 \vert X_i] - E[Y_i^0 \vert X_i]
21 |
22 | Where :math:`X_i` - features vector describing :math:`i`-th person.
23 |
24 | We can observe neither causal effect nor CATE for the :math:`i`-th object, and, accordingly, we can't optimize it.
25 | But we can estimate CATE or *uplift* of an object:
26 |
27 | .. math::
28 | \textbf{uplift} = \widehat{CATE} = E[Y_i \vert X_i = x, W_i = 1] - E[Y_i \vert X_i = x, W_i = 0]
29 |
30 | Where:
31 |
32 | - :math:`W_i \in {0, 1}` - a binary variable: 1 if person :math:`i` receives the :guilabel:`treatment group`, and 0 if person :math:`i` receives no treatment :guilabel:`control group`;
33 | - :math:`Y_i` - person :math:`i`’s observed outcome, which is equal:
34 |
35 | .. math::
36 | Y_i = W_i * Y_i^1 + (1 - W_i) * Y_i^0 = \
37 | \begin{cases}
38 | Y_i^1, & \mbox{if } W_i = 1 \\
39 | Y_i^0, & \mbox{if } W_i = 0 \\
40 | \end{cases}
41 |
42 | This won’t identify the CATE unless one is willing to assume that :math:`W_i` is independent of :math:`Y_i^1` and :math:`Y_i^0` conditional on :math:`X_i`. This assumption is the so-called *Unconfoundedness Assumption* or the *Conditional Independence Assumption* (CIA) found in the social sciences and medical literature.
43 | This assumption holds true when treatment assignment is random conditional on :math:`X_i`.
44 | Briefly, this can be written as:
45 |
46 | .. math::
47 | CIA : \{Y_i^0, Y_i^1\} \perp \!\!\! \perp W_i \vert X_i
48 |
49 | Also, introduce additional useful notation.
50 | Let us define the :guilabel:`propensity score`, :math:`p(X_i) = P(W_i = 1| X_i)`, i.e. the probability of treatment given :math:`X_i`.
51 |
52 | References
53 | ==========
54 |
55 | 1️⃣ Gutierrez, P., & Gérardy, J. Y. (2017). Causal Inference and Uplift Modelling: A Review of the Literature. In International Conference on Predictive Applications and APIs (pp. 1-13).
--------------------------------------------------------------------------------
/.github/workflows/ci-test.yml:
--------------------------------------------------------------------------------
1 | name: Python package
2 |
3 | on:
4 | push:
5 | branches: [ master ]
6 | pull_request_target:
7 |
8 | jobs:
9 | test:
10 | name: Check tests
11 | runs-on: ${{ matrix.os }}
12 | env:
13 | # fix the python version and the operating system for codecoverage commentator
14 | USING_COVERAGE_PY: '3.8'
15 | USING_COVERAGE_OS: 'ubuntu-latest'
16 | outputs:
17 | # fix the results of pytest for unix
18 | output1: ${{ steps.pytest.outputs.exit_code }}
19 |
20 | strategy:
21 | matrix:
22 | os: ['ubuntu-latest', 'windows-latest', 'macos-latest']
23 | python-version: ['3.6', '3.7', '3.8', '3.9']
24 | # GitHub does not cancel all in-progress jobs if any matrix job fails
25 | fail-fast: false
26 |
27 | steps:
28 | - uses: actions/checkout@v2
29 | # Install python
30 | - name: Set up Python ${{ matrix.python-version }}
31 | uses: actions/setup-python@v2
32 | with:
33 | python-version: ${{ matrix.python-version }}
34 | # Update pip and install dependencies
35 | - name: Install dependencies
36 | run: |
37 | python -m pip install --upgrade pip
38 | pip install . -r test_requirements.txt -r requirements.txt
39 | # Pytest in windows
40 | - name: Run PyTest windows
41 | if: ${{ matrix.os == 'windows-latest' }}
42 | run: |
43 | pytest | tee pytest-coverage.txt
44 | # Pytest in unix. Exit code of this run captures the exit status of tee and not of pytest
45 | # So, use $PIPESTATUS that holds the exit status of each command in pipeline
46 | - name: Run PyTest unix
47 | if: ${{ matrix.os != 'windows-latest' }}
48 | id: pytest
49 | run: |
50 | pytest | tee pytest-coverage.txt;
51 | exit_code=${PIPESTATUS[0]};
52 | echo "::set-output name=exit_code::$exit_code"
53 | # Сomment on the results of the test coverage
54 | - name: Comment coverage
55 | if: contains(env.USING_COVERAGE_PY, matrix.python-version) && contains(env.USING_COVERAGE_OS, matrix.os)
56 | uses: MishaKav/pytest-coverage-comment@v1.1.6
57 | with:
58 | pytest-coverage-path: ./pytest-coverage.txt
59 | junitxml-path: ./pytest.xml
60 | # For unix workflow should have failed if exit code of pytest were 1
61 | - name: Check fail of pytest unix
62 | if: ${{ matrix.os != 'windows-latest' && steps.pytest.outputs.exit_code == 1 }}
63 | uses: actions/github-script@v3
64 | with:
65 | script: |
66 | core.setFailed('Some tests failed!')
67 |
68 | check_sphinx_build:
69 | name: Check Sphinx build for docs
70 | runs-on: ubuntu-latest
71 | strategy:
72 | matrix:
73 | python-version: [3.8]
74 | steps:
75 | - name: Checkout
76 | uses: actions/checkout@v2
77 | - name: Set up Python
78 | uses: actions/setup-python@v2
79 | with:
80 | python-version: ${{ matrix.python-version }}
81 | - name: Update pip and install dependencies
82 | run: |
83 | python -m pip install --upgrade pip
84 | pip install -r docs/requirements.txt -r requirements.txt
85 | - name: Run Sphinx
86 | run: sphinx-build -W -b html docs /tmp/_docs_build
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | import re
2 | import os
3 | import sys
4 | import datetime
5 |
6 | sys.path.insert(0, os.path.abspath("../"))
7 |
8 |
9 | def get_version():
10 | current_dir = os.path.abspath(os.path.dirname(__file__))
11 | root = os.path.dirname(current_dir)
12 | version_file = os.path.join(root, "sklift", "__init__.py")
13 | with open(version_file) as f:
14 | return re.search(r'^__version__ = [\'"]([^\'"]*)[\'"]', f.read(), re.M).group(1)
15 |
16 | # Configuration file for the Sphinx documentation builder.
17 | #
18 | # This file only contains a selection of the most common options. For a full
19 | # list see the documentation:
20 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
21 |
22 | # -- Path setup --------------------------------------------------------------
23 |
24 | # If extensions (or modules to document with autodoc) are in another directory,
25 | # add these directories to sys.path here. If the directory is relative to the
26 | # documentation root, use os.path.abspath to make it absolute, like shown here.
27 | #
28 | # import os
29 | # import sys
30 | # sys.path.insert(0, os.path.abspath('.'))
31 |
32 |
33 | # -- Project information -----------------------------------------------------
34 |
35 | project = 'scikit-uplift'
36 | author = 'Maksim Shevchenko and Contributors'
37 | copyright = "{}, {}".format(datetime.datetime.now().year, author)
38 |
39 | # The full version, including alpha/beta/rc tags
40 | release = get_version()
41 |
42 |
43 | # -- General configuration ---------------------------------------------------
44 |
45 | # Add any Sphinx extension module names here, as strings. They can be
46 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
47 | # ones.
48 | extensions = [
49 | "sphinx.ext.autodoc",
50 | "sphinx.ext.viewcode",
51 | "sphinx.ext.mathjax",
52 | "sphinx.ext.napoleon",
53 | "myst_parser",
54 | "sphinx.ext.intersphinx",
55 | "sphinxcontrib.bibtex"
56 | ]
57 |
58 | bibtex_bibfiles = ['refs.bib']
59 | bibtex_reference_style = 'author_year'
60 |
61 | master_doc = 'index'
62 |
63 | # Add any paths that contain templates here, relative to this directory.
64 | templates_path = ['_templates']
65 |
66 | # List of patterns, relative to source directory, that match files and
67 | # directories to ignore when looking for source files.
68 | # This pattern also affects html_static_path and html_extra_path.
69 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'Readme.rst']
70 |
71 |
72 | # -- Options for HTML output -------------------------------------------------
73 |
74 | # The theme to use for HTML and HTML Help pages. See the documentation for
75 | # a list of builtin themes.
76 | #
77 | html_theme = 'sphinx_rtd_theme'
78 |
79 | # Add any paths that contain custom static files (such as style sheets) here,
80 | # relative to this directory. They are copied after the builtin static files,
81 | # so a file named "default.css" will overwrite the builtin "default.css".
82 | html_static_path = ['_static']
83 | html_css_files = [
84 | 'css/custom.css',
85 | ]
86 | html_js_files = ['https://buttons.github.io/buttons.js']
87 | html_logo = "./_static/sklift-logo.png"
88 |
89 | # Removing the view source link
90 | html_show_sourcelink = False
91 |
92 | html_theme_options = {
93 | 'navigation_depth': 3,
94 | }
95 |
96 | trim_footnote_reference_space = True
97 |
--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as
6 | contributors and maintainers pledge to making participation in our project and
7 | our community a harassment-free experience for everyone, regardless of age, body
8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 |
12 | ## Our Standards
13 |
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 |
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 |
23 | Examples of unacceptable behavior by participants include:
24 |
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 |
34 | ## Our Responsibilities
35 |
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 |
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 |
46 | ## Scope
47 |
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 |
55 | ## Enforcement
56 |
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at team@uplift-modeling.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 |
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 |
68 | ## Attribution
69 |
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 |
73 | [homepage]: https://www.contributor-covenant.org
74 |
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
--------------------------------------------------------------------------------
/sklift/tests/test_models.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | import pytest
4 | import numpy as np
5 | import pandas as pd
6 | from sklearn.linear_model import LogisticRegression, LinearRegression
7 | from sklearn.pipeline import Pipeline
8 | from sklearn.preprocessing import StandardScaler
9 |
10 | from ..models import (
11 | SoloModel,
12 | ClassTransformation,
13 | TwoModels
14 | )
15 |
16 |
17 | @pytest.mark.parametrize(
18 | "model",
19 | [
20 | SoloModel(LogisticRegression(), method='dummy'),
21 | SoloModel(LogisticRegression(), method='treatment_interaction'),
22 | ClassTransformation(LogisticRegression()),
23 | TwoModels(LogisticRegression(), LogisticRegression(), method='vanilla'),
24 | TwoModels(LogisticRegression(), LogisticRegression(), method='ddr_control'),
25 | TwoModels(LogisticRegression(), LogisticRegression(), method='ddr_treatment'),
26 | ]
27 | )
28 | def test_shape_classification(model, random_xyt_dataset_clf):
29 | X, y, treat = random_xyt_dataset_clf
30 | assert model.fit(X, y, treat).predict(X).shape[0] == y.shape[0]
31 | pipe = Pipeline(steps=[("scaler", StandardScaler()), ("clf", model)])
32 | assert pipe.fit(X, y, clf__treatment=treat).predict(X).shape[0] == y.shape[0]
33 |
34 |
35 | @pytest.mark.parametrize(
36 | "model",
37 | [
38 | SoloModel(LinearRegression(), method='dummy'),
39 | SoloModel(LinearRegression(), method='treatment_interaction'),
40 | TwoModels(LinearRegression(), LinearRegression(), method='vanilla'),
41 | TwoModels(LinearRegression(), LinearRegression(), method='ddr_control'),
42 | TwoModels(LinearRegression(), LinearRegression(), method='ddr_treatment'),
43 | ]
44 | )
45 | def test_shape_regression(model, random_xy_dataset_regr):
46 | X, y, treat = random_xy_dataset_regr
47 | assert model.fit(X, y, treat).predict(X).shape[0] == y.shape[0]
48 | pipe = Pipeline(steps=[("scaler", StandardScaler()), ("clf", model)])
49 | assert pipe.fit(X, y, clf__treatment=treat).predict(X).shape[0] == y.shape[0]
50 |
51 | @pytest.mark.parametrize(
52 | "model",
53 | [
54 | SoloModel(LogisticRegression(), method='dummy'),
55 | SoloModel(LogisticRegression(), method='treatment_interaction'),
56 | ]
57 | )
58 | def test_solomodel_fit_error(model):
59 | X, y, treatment = [[1., 0., 0.],[1., 0., 0.],[1., 0., 0.]], [1., 2., 3.], [0., 1., 0.]
60 | with pytest.raises(TypeError):
61 | model.fit(X, y, treatment)
62 |
63 | @pytest.mark.parametrize(
64 | "model",
65 | [
66 | SoloModel(LogisticRegression(), method='dummy'),
67 | SoloModel(LogisticRegression(), method='treatment_interaction'),
68 | ]
69 | )
70 | def test_solomodel_pred_error(model):
71 | X_train, y_train, treat_train = (np.array([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]]),
72 | np.array([0.0, 0.0, 1.0]), np.array([0.0, 1.0, 1.0]))
73 | model.fit(X_train, y_train, treat_train)
74 | with pytest.raises(TypeError):
75 | model.predict(1)
76 |
77 | @pytest.mark.parametrize("method", ['method'])
78 | def test_solomodel_method_error(method):
79 | with pytest.raises(ValueError):
80 | SoloModel(LogisticRegression(), method=method)
81 |
82 | def test_classtransformation_fit_error():
83 | X, y, treatment = [[1., 0., 0.],[1., 0., 0.],[1., 0., 0.]], [1., 2., 3.], [0., 1., 0.]
84 | with pytest.raises(ValueError):
85 | ClassTransformation(LogisticRegression()).fit(X, y, treatment)
86 |
87 | @pytest.mark.parametrize("method", ['method'])
88 | def test_twomodels_method_error(method):
89 | with pytest.raises(ValueError):
90 | TwoModels(LinearRegression(), LinearRegression(), method=method)
91 |
92 | def test_same_estimator_error():
93 | est = LinearRegression()
94 | with pytest.raises(ValueError):
95 | TwoModels(est, est)
96 |
97 | @pytest.mark.parametrize(
98 | "X, y, treatment",
99 | [
100 | (pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),columns=['a', 'b', 'c'], index=[0,1,2]),
101 | pd.Series(np.array([1, 0, 1]),index=[0,2,3]), pd.Series(np.array([0, 0, 1]),index=[0,1,2])),
102 | (pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),columns=['a', 'b', 'c'], index=[0,1,2]),
103 | pd.Series(np.array([1, 0, 1]),index=[0,1,2]), pd.Series(np.array([0, 0, 1]),index=[1,2,3]))
104 | ]
105 | )
106 | def test_input_data(X, y, treatment):
107 | model = TwoModels(LinearRegression(), LinearRegression())
108 | with pytest.warns(UserWarning):
109 | model.fit(X, y, treatment)
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by .ignore support plugin (hsz.mobi)
2 | ### JupyterNotebooks template
3 | # gitignore template for Jupyter Notebooks
4 | # website: http://jupyter.org/
5 |
6 | .ipynb_checkpoints
7 | */.ipynb_checkpoints/*
8 |
9 | # Remove previous ipynb_checkpoints
10 | # git rm -r .ipynb_checkpoints/
11 | #
12 |
13 | ### Python template
14 | # Byte-compiled / optimized / DLL files
15 | __pycache__/
16 | *.py[cod]
17 | *$py.class
18 |
19 | # C extensions
20 | *.so
21 |
22 | # Distribution / packaging
23 | .Python
24 | build/
25 | develop-eggs/
26 | dist/
27 | downloads/
28 | eggs/
29 | .eggs/
30 | lib/
31 | lib64/
32 | parts/
33 | sdist/
34 | var/
35 | wheels/
36 | pip-wheel-metadata/
37 | share/python-wheels/
38 | *.egg-info/
39 | .installed.cfg
40 | *.egg
41 | MANIFEST
42 |
43 | # PyInstaller
44 | # Usually these files are written by a python script from a template
45 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
46 | *.manifest
47 | *.spec
48 |
49 | # Installer logs
50 | pip-log.txt
51 | pip-delete-this-directory.txt
52 |
53 | # Unit test / coverage reports
54 | htmlcov/
55 | .tox/
56 | .nox/
57 | .coverage
58 | .coverage.*
59 | .cache
60 | nosetests.xml
61 | coverage.xml
62 | *.cover
63 | .hypothesis/
64 | .pytest_cache/
65 | pytest.xml
66 |
67 | # Translations
68 | *.mo
69 | *.pot
70 |
71 | # Django stuff:
72 | *.log
73 | local_settings.py
74 | db.sqlite3
75 | db.sqlite3-journal
76 |
77 | # Flask stuff:
78 | instance/
79 | .webassets-cache
80 |
81 | # Scrapy stuff:
82 | .scrapy
83 |
84 | # Sphinx documentation
85 | docs/_build/
86 |
87 | # PyBuilder
88 | target/
89 |
90 | # Jupyter Notebook
91 | .ipynb_checkpoints
92 |
93 | # IPython
94 | profile_default/
95 | ipython_config.py
96 |
97 | # pyenv
98 | .python-version
99 |
100 | # pipenv
101 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
102 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
103 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
104 | # install all needed dependencies.
105 | #Pipfile.lock
106 |
107 | # celery beat schedule file
108 | celerybeat-schedule
109 |
110 | # SageMath parsed files
111 | *.sage.py
112 |
113 | # Environments
114 | .env
115 | .venv
116 | env/
117 | venv/
118 | ENV/
119 | env.bak/
120 | venv.bak/
121 |
122 | # Spyder project settings
123 | .spyderproject
124 | .spyproject
125 |
126 | # Rope project settings
127 | .ropeproject
128 |
129 | # mkdocs documentation
130 | /site
131 |
132 | # mypy
133 | .mypy_cache/
134 | .dmypy.json
135 | dmypy.json
136 |
137 | # Pyre type checker
138 | .pyre/
139 |
140 | ### JetBrains template
141 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
142 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
143 |
144 | # User-specific stuff
145 | .idea/*
146 | .idea/**/workspace.xml
147 | .idea/**/tasks.xml
148 | .idea/**/usage.statistics.xml
149 | .idea/**/dictionaries
150 | .idea/**/shelf
151 |
152 | # Generated files
153 | .idea/**/contentModel.xml
154 |
155 | # Sensitive or high-churn files
156 | .idea/**/dataSources/
157 | .idea/**/dataSources.ids
158 | .idea/**/dataSources.local.xml
159 | .idea/**/sqlDataSources.xml
160 | .idea/**/dynamic.xml
161 | .idea/**/uiDesigner.xml
162 | .idea/**/dbnavigator.xml
163 |
164 | # Gradle
165 | .idea/**/gradle.xml
166 | .idea/**/libraries
167 |
168 | # Gradle and Maven with auto-import
169 | # When using Gradle or Maven with auto-import, you should exclude module files,
170 | # since they will be recreated, and may cause churn. Uncomment if using
171 | # auto-import.
172 | # .idea/modules.xml
173 | # .idea/*.iml
174 | # .idea/modules
175 | # *.iml
176 | # *.ipr
177 |
178 | # CMake
179 | cmake-build-*/
180 |
181 | # Mongo Explorer plugin
182 | .idea/**/mongoSettings.xml
183 |
184 | # File-based project format
185 | *.iws
186 |
187 | # IntelliJ
188 | out/
189 |
190 | # mpeltonen/sbt-idea plugin
191 | .idea_modules/
192 |
193 | # JIRA plugin
194 | atlassian-ide-plugin.xml
195 |
196 | # Cursive Clojure plugin
197 | .idea/replstate.xml
198 |
199 | # Crashlytics plugin (for Android Studio and IntelliJ)
200 | com_crashlytics_export_strings.xml
201 | crashlytics.properties
202 | crashlytics-build.properties
203 | fabric.properties
204 |
205 | # Editor-based Rest Client
206 | .idea/httpRequests
207 |
208 | # Android studio 3.1+ serialized cache file
209 | .idea/caches/build_file_checksums.ser
210 |
211 | notebooks/content/*
212 | notebooks/catboost_info
213 | notebooks/*.tmp
214 |
215 | ### PSD logo
216 | *.psd
217 |
--------------------------------------------------------------------------------
/docs/quick_start.rst:
--------------------------------------------------------------------------------
1 | .. _RU: https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero.ipynb
2 | .. _EN: https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero_EN.ipynb
3 |
4 | .. |Open In Colab1| image:: https://colab.research.google.com/assets/colab-badge.svg
5 | .. _Open In Colab1: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero_EN.ipynb
6 |
7 | .. |Open In Colab2| image:: https://colab.research.google.com/assets/colab-badge.svg
8 | .. _Open In Colab2: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero.ipynb
9 |
10 | ***********
11 | Quick Start
12 | ***********
13 |
14 | See the **RetailHero tutorial notebook** (`EN`_ |Open In Colab1|_, `RU`_ |Open In Colab2|_) for details.
15 |
16 | Train and predict your uplift model
17 | ====================================
18 |
19 | Use the intuitive python API to train uplift models with `sklift.models `__.
20 |
21 | .. code-block:: python
22 | :linenos:
23 |
24 | # import approaches
25 | from sklift.models import SoloModel, ClassTransformation
26 | # import any estimator adheres to scikit-learn conventions.
27 | from lightgbm import LGBMClassifier
28 |
29 | # define models
30 | estimator = LGBMClassifier(n_estimators=10)
31 |
32 | # define metamodel
33 | slearner = SoloModel(estimator=estimator)
34 |
35 | # fit model
36 | slearner.fit(
37 | X=X_tr,
38 | y=y_tr,
39 | treatment=trmnt_tr,
40 | )
41 |
42 | # predict uplift
43 | uplift_slearner = slearner.predict(X_val)
44 |
45 | Evaluate your uplift model
46 | ===========================
47 |
48 | Uplift model evaluation metrics are available in `sklift.metrics `__.
49 |
50 | .. code-block:: python
51 | :linenos:
52 |
53 | # import metrics to evaluate your model
54 | from sklift.metrics import (
55 | uplift_at_k, uplift_auc_score, qini_auc_score, weighted_average_uplift
56 | )
57 |
58 |
59 | # Uplift@30%
60 | uplift_at_k = uplift_at_k(y_true=y_val, uplift=uplift_slearner,
61 | treatment=trmnt_val,
62 | strategy='overall', k=0.3)
63 |
64 | # Area Under Qini Curve
65 | qini_coef = qini_auc_score(y_true=y_val, uplift=uplift_slearner,
66 | treatment=trmnt_val)
67 |
68 | # Area Under Uplift Curve
69 | uplift_auc = uplift_auc_score(y_true=y_val, uplift=uplift_slearner,
70 | treatment=trmnt_val)
71 |
72 | # Weighted average uplift
73 | wau = weighted_average_uplift(y_true=y_val, uplift=uplift_slearner,
74 | treatment=trmnt_val)
75 |
76 | Vizualize the results
77 | ======================
78 |
79 | Visualize performance metrics with `sklift.viz `__.
80 |
81 | .. code-block:: python
82 | :linenos:
83 |
84 | from sklift.viz import plot_qini_curve
85 | import matplotlib.pyplot as plt
86 |
87 | fig, ax = plt.subplots(1, 1)
88 | ax.set_title('Qini curves')
89 |
90 | plot_qini_curve(
91 | y_test, uplift_slearner, trmnt_test,
92 | perfect=True, name='Slearner', ax=ax
93 | );
94 |
95 | plot_qini_curve(
96 | y_test, uplift_revert, trmnt_test,
97 | perfect=False, name='Revert label', ax=ax
98 | );
99 |
100 | .. image:: _static/images/quick_start_qini.png
101 | :alt: Example of some models qini curves, perfect qini curve and random qini curve
102 |
103 |
104 | .. code-block:: python
105 | :linenos:
106 |
107 | from sklift.viz import plot_uplift_curve
108 | import matplotlib.pyplot as plt
109 |
110 | fig, ax = plt.subplots(1, 1)
111 | ax.set_title('Uplift curves')
112 |
113 | plot_uplift_curve(
114 | y_test, uplift_slearner, trmnt_test,
115 | perfect=True, name='Slearner', ax=ax
116 | );
117 |
118 | plot_uplift_curve(
119 | y_test, uplift_revert, trmnt_test,
120 | perfect=False, name='Revert label', ax=ax
121 | );
122 |
123 | .. image:: _static/images/quick_start_uplift.png
124 | :alt: Example of some uplift curves, perfect uplift curve and random uplift curve
125 |
126 | .. code-block:: python
127 | :linenos:
128 |
129 | from sklift.viz import plot_uplift_by_percentile
130 |
131 | plot_uplift_by_percentile(y_true=y_val, uplift=uplift_preds,
132 | treatment=treat_val, kind='bar')
133 |
134 | .. image:: _static/images/quick_start_wau.png
135 | :alt: Uplift by percentile visualization
136 |
--------------------------------------------------------------------------------
/sklift/datasets/descr/lenta.rst:
--------------------------------------------------------------------------------
1 | Lenta Uplift Modeling Dataset
2 | ================================
3 |
4 | Data description
5 | ################
6 |
7 | An uplift modeling dataset containing data about Lenta's customers grociery shopping and related marketing campaigns.
8 |
9 | Source: **BigTarget Hackathon** hosted by Lenta and Microsoft in summer 2020.
10 |
11 | Fields
12 | ################
13 |
14 | Major features:
15 |
16 | * ``group`` (str): treatment/control group flag
17 | * ``response_att`` (binary): target
18 | * ``gender`` (str): customer gender
19 | * ``age`` (float): customer age
20 | * ``main_format`` (int): store type (1 - grociery store, 0 - superstore)
21 |
22 |
23 | .. list-table::
24 | :align: center
25 | :header-rows: 1
26 | :widths: 5 5
27 |
28 | * - Feature
29 | - Description
30 | * - CardHolder
31 | - customer id
32 | * - customer
33 | - age
34 | * - children
35 | - number of children
36 | * - cheque_count_[3,6,12]m_g*
37 | - number of customer receipts collected within last 3, 6, 12 months
38 | before campaign. g* is a product group
39 | * - crazy_purchases_cheque_count_[1,3,6,12]m
40 | - number of customer receipts with items purchased on "crazy"
41 | marketing campaign collected within last 1, 3, 6, 12 months before campaign
42 | * - crazy_purchases_goods_count_[6,12]m
43 | - items amount purchased on "crazy" marketing campaign collected
44 | within last 6, 12 months before campaign
45 | * - disc_sum_6m_g34
46 | - discount sum for past 6 month on a 34 product group
47 | * - food_share_[15d,1m]
48 | - food share in customer purchases for 15 days, 1 month
49 | * - gender
50 | - customer gender
51 | * - group
52 | - treatment/control group flag
53 | * - k_var_cheque_[15d,3m]
54 | - average check coefficient of variation for 15 days, 3 months
55 | * - k_var_cheque_category_width_15d
56 | - coefficient of variation of the average number of purchased
57 | categories (2nd level of the hierarchy) in one receipt for 15 days
58 | * - k_var_cheque_group_width_15d
59 | - coefficient of variation of the average number of purchased
60 | groups (1st level of the hierarchy) in one receipt for 15 days
61 | * - k_var_count_per_cheque_[15d,1m,3m,6m]_g*
62 | - unique product id (SKU) coefficient of variation for 15 days, 1, 3 ,6 months
63 | for g* product group
64 | * - k_var_days_between_visits_[15d,1m,3m]
65 | - coefficient of variation of the average period between visits
66 | for 15 days, 1 month, 3 months
67 | * - k_var_disc_per_cheque_15d
68 | - discount sum coefficient of variation for 15 days
69 | * - k_var_disc_share_[15d,1m,3m,6m,12m]_g*
70 | - discount amount coefficient of variation for 15 days, 1 month, 3 months, 6 months, 12 months
71 | for g* product group
72 | * - k_var_discount_depth_[15d,1m]
73 | - discount amount coefficient of variation for 15 days, 1 month
74 | * - k_var_sku_per_cheque_15d
75 | - number of unique product ids (SKU) coefficient of variation
76 | for 15 days
77 | * - k_var_sku_price_12m_g*
78 | - price coefficient of variation for 15 days, 3, 6, 12 months
79 | for g* product group
80 | * - main_format
81 | - store type (1 - grociery store, 0 - superstore)
82 | * - mean_discount_depth_15d
83 | - mean discount depth for 15 days
84 | * - months_from_register
85 | - number of months from a moment of register
86 | * - perdelta_days_between_visits_15_30d
87 | - timdelta in percent between visits during the first half
88 | of the month and visits during second half of the month
89 | * - promo_share_15d
90 | - promo goods share in the customer bucket
91 | * - response_att
92 | - binary target variable = store visit
93 | * - response_sms
94 | - share of customer responses to previous SMS.
95 | Response = store visit
96 | * - response_viber
97 | - share of responses to previous Viber messages.
98 | Response = store visit
99 | * - sale_count_[3,6,12]m_g*
100 | - number of purchased items from the group * for 3, 6, 12 months
101 | * - sale_sum_[3,6,12]m_g*
102 | - sum of sales from the group * for 3, 6, 12 months
103 | * - stdev_days_between_visits_15d
104 | - coefficient of variation of the days between visits for 15 days
105 | * - stdev_discount_depth_[15d,1m]
106 | - discount sum coefficient of variation for 15 days, 1 month
107 |
108 | Key figures
109 | ################
110 |
111 | * Format: CSV
112 | * Size: 153M (compressed) 567M (uncompressed)
113 | * Rows: 687,029
114 | * Response Ratio: .1
115 | * Treatment Ratio: .75
116 |
117 |
--------------------------------------------------------------------------------
/docs/user_guide/models/revert_label.rst:
--------------------------------------------------------------------------------
1 | .. _ClassTransformation:
2 |
3 | ********************
4 | Class Transformation
5 | ********************
6 |
7 | .. warning::
8 | This approach is only suitable for classification problem
9 |
10 | Simple yet powerful and mathematically proven uplift modeling method, presented in 2012.
11 | The main idea is to predict a slightly changed target :math:`Z_i`:
12 |
13 | .. math::
14 | Z_i = Y_i \cdot W_i + (1 - Y_i) \cdot (1 - W_i),
15 |
16 | * :math:`Z_i` - a new target for the :math:`i` customer;
17 |
18 | * :math:`Y_i` - a previous target for the :math:`i` customer;
19 |
20 | * :math:`W_i` - treatment flag assigned to the :math:`i` customer.
21 |
22 | In other words, the new target equals 1 if a response in the treatment group is as good as a response in the control group and equals 0 otherwise:
23 |
24 | .. math::
25 | Z_i = \begin{cases}
26 | 1, & \mbox{if } W_i = 1 \mbox{ and } Y_i = 1 \\
27 | 1, & \mbox{if } W_i = 0 \mbox{ and } Y_i = 0 \\
28 | 0, & \mbox{otherwise}
29 | \end{cases}
30 |
31 | Let's go deeper and estimate the conditional probability of the target variable:
32 |
33 | .. math::
34 | P(Z=1|X = x) = \\
35 | = P(Z=1|X = x, W = 1) \cdot P(W = 1|X = x) + \\
36 | + P(Z=1|X = x, W = 0) \cdot P(W = 0|X = x) = \\
37 | = P(Y=1|X = x, W = 1) \cdot P(W = 1|X = x) + \\
38 | + P(Y=0|X = x, W = 0) \cdot P(W = 0|X = x).
39 |
40 | We assume that :math:`W` is independent of :math:`X = x` by design.
41 | Thus we have: :math:`P(W | X = x) = P(W)` and
42 |
43 | .. math::
44 | P(Z=1|X = x) = \\
45 | = P^T(Y=1|X = x) \cdot P(W = 1) + \\
46 | + P^C(Y=0|X = x) \cdot P(W = 0)
47 |
48 | Also, we assume that :math:`P(W = 1) = P(W = 0) = \frac{1}{2}`, which means that during the experiment the control and the treatment groups
49 | were divided in equal proportions. Then we get the following:
50 |
51 | .. math::
52 | P(Z=1|X = x) = \\
53 | = P^T(Y=1|X = x) \cdot \frac{1}{2} + P^C(Y=0|X = x) \cdot \frac{1}{2} \Rightarrow \\
54 |
55 | 2 \cdot P(Z=1|X = x) = \\
56 | = P^T(Y=1|X = x) + P^C(Y=0|X = x) = \\
57 | = P^T(Y=1|X = x) + 1 - P^C(Y=1|X = x) \Rightarrow \\
58 | \Rightarrow P^T(Y=1|X = x) - P^C(Y=1|X = x) = \\
59 | = uplift = 2 \cdot P(Z=1|X = x) - 1
60 |
61 | .. image:: ../../_static/images/user_guide/ug_revert_label_mem.png
62 | :align: center
63 | :alt: Mem about class transformation approach for uplift modeling
64 |
65 | Thus, by doubling the estimate of the new target :math:`Z` and subtracting one we will get an estimation of the uplift:
66 |
67 | .. math::
68 | uplift = 2 \cdot P(Z=1) - 1
69 |
70 |
71 | This approach is based on the assumption: :math:`P(W = 1) = P(W = 0) = \frac{1}{2}`. That is the reason that it has to be used
72 | only in cases where the number of treated customers (communication) is equal to the number of control customers (no communication).
73 |
74 | .. hint::
75 | In sklift this approach corresponds to the :class:`.ClassTransformation` class.
76 |
77 | References
78 | ==========
79 |
80 | 1️⃣ Maciej Jaskowski and Szymon Jaroszewicz. Uplift modeling for clinical trial data. ICML Workshop on Clinical Data Analysis, 2012.
81 |
82 | Examples using ``sklift.models.ClassTransformation``
83 | ====================================================
84 |
85 | .. |Open In Colab1| image:: https://colab.research.google.com/assets/colab-badge.svg
86 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero_EN.ipynb
87 | .. |Open In Colab2| image:: https://colab.research.google.com/assets/colab-badge.svg
88 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero.ipynb
89 |
90 | 1. The overview of the basic approaches to the Uplift Modeling problem
91 |
92 | .. list-table::
93 | :align: center
94 | :widths: 12 15 10 8
95 |
96 | * - In English 🇬🇧
97 | - |Open In Colab1|
98 | - `nbviewer `__
99 | - `github `__
100 | * - In Russian 🇷🇺
101 | - |Open In Colab2|
102 | - `nbviewer `__
103 | - `github `__
104 |
105 | 2. The 2nd place solution of X5 RetailHero uplift contest by `Kirill Liksakov `_
106 |
107 | .. list-table::
108 | :align: center
109 | :widths: 12 10 8
110 |
111 | * - In English 🇬🇧
112 | - `nbviewer `__
113 | - `github `__
--------------------------------------------------------------------------------
/docs/user_guide/models/two_models.rst:
--------------------------------------------------------------------------------
1 | .. _TwoModels:
2 |
3 | **************************
4 | Two models approaches
5 | **************************
6 |
7 | .. _in the scikit-learn documentation: https://scikit-learn.org/stable/modules/calibration.html
8 |
9 | The two models approach can be found in almost every uplift modeling research. It is often used as a baseline model.
10 |
11 | Two independent models
12 | ==========================
13 |
14 | .. hint::
15 | In sklift this approach corresponds to the :class:`sklift.models.TwoModels` class and the **vanilla** method.
16 |
17 | The main idea is to estimate the conditional probabilities of the treatment and control groups separately.
18 |
19 | 1. Train the first model using the treatment set.
20 | 2. Train the second model using the control set.
21 | 3. Inference: subtract the control model scores from the treatment model scores.
22 |
23 | .. image:: ../../_static/images/TwoModels_vanila.png
24 | :align: center
25 | :alt: Two independent models vanilla
26 |
27 | The main disadvantage of this method is that if the uplift signal is weak, it can be lost since both models focus on predicting an original response, not the uplift.
28 |
29 | Two dependent models
30 | ========================
31 |
32 | The dependent data representation approach is based on the classifier chain method originally developed
33 | for multi-class classification problems. The idea is that if there are :math:`L` different labels, you can build
34 | :math:`L` different classifiers, each of which solves the problem of binary classification and in the learning process,
35 | each subsequent classifier uses the predictions of the previous ones as additional features.
36 | The authors of this method proposed to use the same idea to solve the problem of uplift modeling in two stages.
37 |
38 | .. hint::
39 | In sklift this approach corresponds to the :class:`.TwoModels` class and the **ddr_control** method.
40 |
41 | At the beginning, we train the classifier based on the control data:
42 |
43 | .. math::
44 | P^C = P(Y=1| X, W = 0),
45 |
46 | Next, we estimate the :math:`P_C` predictions and use them as a feature for the second classifier.
47 | It effectively reflects a dependency between treatment and control datasets:
48 |
49 | .. math::
50 | P^T = P(Y=1| X, P_C(X), W = 1)
51 |
52 | To get the uplift for each observation, calculate the difference:
53 |
54 | .. math::
55 | uplift(x_i) = P^T (x_i, P_C(x_i)) - P^C(x_i)
56 |
57 | Intuitively, the second classifier learns the difference between the expected probability in the treatment and the control sets which is
58 | the uplift.
59 |
60 | .. image:: ../../_static/images/TwoModels_ddr_control.png
61 | :align: center
62 | :alt: Two independent models dependent data representation control
63 |
64 | Similarly, you can first train the :math:`P_T` classifier and then use its predictions as a feature for
65 | the :math:`P_C` classifier.
66 |
67 | .. hint::
68 | In sklift this approach corresponds to the :class:`.TwoModels` class and the **ddr_treatment** method.
69 |
70 | There is an important remark about the data nature.
71 | It is important to calibrate the model's scores into probabilities if treatment and control data have a different nature.
72 | Model calibration techniques are well described `in the scikit-learn documentation`_.
73 |
74 | References
75 | ==========
76 |
77 | 1️⃣ Betlei, Artem & Diemert, Eustache & Amini, Massih-Reza. (2018). Uplift Prediction with Dependent Feature Representation in Imbalanced Treatment and Control Conditions: 25th International Conference, ICONIP 2018, Siem Reap, Cambodia, December 13–16, 2018, Proceedings, Part V. 10.1007/978-3-030-04221-9_5.
78 |
79 | 2️⃣ Zhao, Yan & Fang, Xiao & Simchi-Levi, David. (2017). Uplift Modeling with Multiple Treatments and General Response Types. 10.1137/1.9781611974973.66.
80 |
81 | Examples using ``sklift.models.TwoModels``
82 | ============================================
83 |
84 | .. |Open In Colab1| image:: https://colab.research.google.com/assets/colab-badge.svg
85 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero_EN.ipynb
86 | .. |Open In Colab2| image:: https://colab.research.google.com/assets/colab-badge.svg
87 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero.ipynb
88 |
89 | 1. The overview of the basic approaches to solving the Uplift Modeling problem
90 |
91 | .. list-table::
92 | :align: center
93 | :widths: 12 15 10 8
94 |
95 | * - In English 🇬🇧
96 | - |Open In Colab1|
97 | - `nbviewer `__
98 | - `github `__
99 | * - In Russian 🇷🇺
100 | - |Open In Colab2|
101 | - `nbviewer `__
102 | - `github `__
--------------------------------------------------------------------------------
/sklift/tests/test_datasets.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import sklearn
3 |
4 | from functools import partial
5 |
6 | from ..datasets import (
7 | clear_data_dir,
8 | fetch_lenta, fetch_x5,
9 | fetch_criteo, fetch_hillstrom,
10 | fetch_megafon
11 | )
12 |
13 |
14 | fetch_criteo10 = partial(fetch_criteo, percent10=True)
15 |
16 | @pytest.fixture(scope="session", autouse=True)
17 | def clear():
18 | # prepare something ahead of all tests
19 | clear_data_dir()
20 |
21 |
22 | @pytest.fixture
23 | def lenta_dataset() -> dict:
24 | data = {'keys': ['data', 'target', 'treatment', 'DESCR', 'feature_names', 'target_name', 'treatment_name'],
25 | 'data.shape': (687029, 193), 'target.shape': (687029,), 'treatment.shape': (687029,)}
26 | return data
27 |
28 |
29 | def test_fetch_lenta(lenta_dataset):
30 | data = fetch_lenta()
31 | assert isinstance(data, sklearn.utils.Bunch)
32 | assert set(data.keys()) == set(lenta_dataset['keys'])
33 | assert data.data.shape == lenta_dataset['data.shape']
34 | assert data.target.shape == lenta_dataset['target.shape']
35 | assert data.treatment.shape == lenta_dataset['treatment.shape']
36 |
37 | #@pytest.fixture
38 | #def x5_dataset() -> dict:
39 | # data = {'keys': ['data', 'target', 'treatment', 'DESCR', 'feature_names', 'target_name', 'treatment_name'],
40 | # 'data.keys': ['clients', 'train', 'purchases'], 'clients.shape': (400162, 5),
41 | # 'train.shape': (200039, 1), 'target.shape': (200039,), 'treatment.shape': (200039,)}
42 | # return data
43 |
44 | #
45 | #def test_fetch_x5(x5_dataset):
46 | # data = fetch_x5()
47 | # assert isinstance(data, sklearn.utils.Bunch)
48 | # assert set(data.keys()) == set(x5_dataset['keys'])
49 | # assert set(data.data.keys()) == set(x5_dataset['data.keys'])
50 | # assert data.data.clients.shape == x5_dataset['clients.shape']
51 | # assert data.data.train.shape == x5_dataset['train.shape']
52 | # assert data.target.shape == x5_dataset['target.shape']
53 | # assert data.treatment.shape == x5_dataset['treatment.shape']
54 |
55 |
56 | @pytest.fixture
57 | def criteo10_dataset() -> dict:
58 | data = {'keys': ['data', 'target', 'treatment', 'DESCR', 'feature_names', 'target_name', 'treatment_name'],
59 | 'data.shape': (1397960, 12)}
60 | return data
61 |
62 |
63 | @pytest.mark.parametrize(
64 | 'target_col, target_shape',
65 | [('visit', (1397960,)),
66 | ('conversion', (1397960,)),
67 | ('all', (1397960, 2))]
68 | )
69 | @pytest.mark.parametrize(
70 | 'treatment_col, treatment_shape',
71 | [('exposure', (1397960,)),
72 | ('treatment', (1397960,)),
73 | ('all', (1397960, 2))]
74 | )
75 | def test_fetch_criteo10(
76 | criteo10_dataset,
77 | target_col, target_shape,
78 | treatment_col, treatment_shape
79 | ):
80 | data = fetch_criteo10(target_col=target_col, treatment_col=treatment_col)
81 | assert isinstance(data, sklearn.utils.Bunch)
82 | assert set(data.keys()) == set(criteo10_dataset['keys'])
83 | assert data.data.shape == criteo10_dataset['data.shape']
84 | assert data.target.shape == target_shape
85 | assert data.treatment.shape == treatment_shape
86 |
87 | @pytest.mark.parametrize(
88 | 'target_col, treatment_col',
89 | [('visit','new_trmnt'), ('new_target','treatment')]
90 | )
91 | def test_fetch_criteo_errors(target_col, treatment_col):
92 | with pytest.raises(ValueError):
93 | fetch_criteo(target_col=target_col, treatment_col=treatment_col)
94 |
95 |
96 | @pytest.fixture
97 | def hillstrom_dataset() -> dict:
98 | data = {'keys': ['data', 'target', 'treatment', 'DESCR', 'feature_names', 'target_name', 'treatment_name'],
99 | 'data.shape': (64000, 8), 'treatment.shape': (64000,)}
100 | return data
101 |
102 |
103 | @pytest.mark.parametrize(
104 | 'target_col, target_shape',
105 | [('visit', (64_000,)),
106 | ('conversion', (64_000,)),
107 | ('spend', (64_000,)),
108 | ('all', (64_000, 3))]
109 | )
110 | def test_fetch_hillstrom(
111 | hillstrom_dataset,
112 | target_col, target_shape
113 | ):
114 | data = fetch_hillstrom(target_col=target_col)
115 | assert isinstance(data, sklearn.utils.Bunch)
116 | assert set(data.keys()) == set(hillstrom_dataset['keys'])
117 | assert data.data.shape == hillstrom_dataset['data.shape']
118 | assert data.target.shape == target_shape
119 | assert data.treatment.shape == hillstrom_dataset['treatment.shape']
120 |
121 | def test_fetch_hillstrom_error():
122 | with pytest.raises(ValueError):
123 | fetch_hillstrom(target_col='new_target')
124 |
125 |
126 | @pytest.fixture
127 | def megafon_dataset() -> dict:
128 | data = {'keys': ['data', 'target', 'treatment', 'DESCR', 'feature_names', 'target_name', 'treatment_name'],
129 | 'data.shape': (600000, 50), 'target.shape': (600000,), 'treatment.shape': (600000,)}
130 | return data
131 |
132 |
133 | def test_fetch_megafon(megafon_dataset):
134 | data = fetch_megafon()
135 | assert isinstance(data, sklearn.utils.Bunch)
136 | assert set(data.keys()) == set(megafon_dataset['keys'])
137 | assert data.data.shape == megafon_dataset['data.shape']
138 | assert data.target.shape == megafon_dataset['target.shape']
139 | assert data.treatment.shape == megafon_dataset['treatment.shape']
140 |
141 |
142 | def check_return_X_y_t(bunch, dataset_func):
143 | X_y_t_tuple = dataset_func(return_X_y_t=True)
144 | assert isinstance(X_y_t_tuple, tuple)
145 | assert X_y_t_tuple[0].shape == bunch.data.shape
146 | assert X_y_t_tuple[1].shape == bunch.target.shape
147 | assert X_y_t_tuple[2].shape == bunch.treatment.shape
148 |
149 |
150 | @pytest.mark.parametrize("fetch_func", [fetch_hillstrom, fetch_criteo10, fetch_lenta, fetch_megafon])
151 | def test_return_X_y_t(fetch_func):
152 | data = fetch_func()
153 | check_return_X_y_t(data, fetch_func)
154 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. |Open In Colab3| image:: https://colab.research.google.com/assets/colab-badge.svg
2 | .. _Open In Colab3: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/pipeline_usage_EN.ipynb
3 |
4 | .. |Open In Colab4| image:: https://colab.research.google.com/assets/colab-badge.svg
5 | .. _Open In Colab4: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/uplift_model_selection_tutorial.ipynb
6 |
7 | .. |Contribs| image:: https://contrib.rocks/image?repo=maks-sh/scikit-uplift
8 | :target: https://github.com/maks-sh/scikit-uplift/graphs/contributors
9 | :alt: Contributors
10 |
11 | **************
12 | scikit-uplift
13 | **************
14 |
15 | **scikit-uplift (sklift)** is an uplift modeling python package that provides fast sklearn-style models implementation, evaluation metrics and visualization tools.
16 |
17 | The main idea is to provide easy-to-use and fast python package for uplift modeling. It delivers the model interface with the familiar scikit-learn API. One can use any popular estimator (for instance, from the Catboost library).
18 |
19 | *Uplift modeling* estimates a causal effect of treatment and uses it to effectively target customers that are most likely to respond to a marketing campaign.
20 |
21 | **Use cases for uplift modeling:**
22 |
23 | * Target customers in the marketing campaign. Quite useful in promotion of some popular product where there is a big part of customers who make a target action by themself without any influence. By modeling uplift you can find customers who are likely to make the target action (for instance, install an app) only when treated (for instance, received a push).
24 |
25 | * Combine a churn model and an uplift model to offer some bonus to a group of customers who are likely to churn.
26 |
27 | * Select a tiny group of customers in the campaign where a price per customer is high.
28 |
29 | Read more about *uplift modeling* problem in :ref:`the User Guide `.
30 |
31 | Articles in russian on habr.com: `Part 1 `__ ,
32 | `Part 2 `__
33 | and `Part 3 `__.
34 |
35 | Why sklift
36 | #############
37 |
38 | - Сomfortable and intuitive *scikit-learn*-like API;
39 |
40 | - More uplift metrics than you have ever seen in one place! Include brilliants like *Area Under Uplift Curve* (AUUC) or *Area Under Qini Curve* (Qini coefficient) with ideal cases;
41 |
42 | - Supporting any estimator compatible with scikit-learn (e.g. Xgboost, LightGBM, Catboost, etc.);
43 |
44 | - All approaches can be used in the ``sklearn.pipeline``. See the example of usage on `the Tutorials page `__;
45 |
46 | - Also metrics are compatible with the classes from ``sklearn.model_selection``. See the example of usage on `the Tutorials page `__;
47 |
48 | - Almost all implemented approaches solve classification and regression problems;
49 |
50 | - Nice and useful viz for analysing a performance model.
51 |
52 |
53 | **The package currently supports the following methods:**
54 |
55 | 1. Solo Model (aka S-learner or Treatment Dummy, Treatment interaction) approach
56 | 2. Class Transformation (aka Class Variable Transformation or Revert Label) approach
57 | 3. Two Models (aka X-learner, or naïve approach, or difference score method, or double classifier approach) approach, including Dependent Data Representation
58 |
59 | **And the following metrics:**
60 |
61 | 1. Uplift@k
62 | 2. Area Under Uplift Curve
63 | 3. Area Under Qini Curve
64 | 4. Weighted average uplift
65 |
66 | Project info
67 | #############
68 |
69 | * GitHub repository: https://github.com/maks-sh/scikit-uplift
70 | * Github examples: https://github.com/maks-sh/scikit-uplift/tree/master/notebooks
71 | * Documentation: https://www.uplift-modeling.com/en/latest/index.html
72 | * Contributing guide: https://www.uplift-modeling.com/en/latest/contributing.html
73 | * License: `MIT `__
74 |
75 | Community
76 | #############
77 |
78 | Sklift is being actively maintained and welcomes new contributors of all experience levels.
79 |
80 | - Please see our `Contributing Guide `_ for more details.
81 | - By participating in this project, you agree to abide by its `Code of Conduct `__.
82 |
83 | Thanks to all our contributors!
84 |
85 | |Contribs|
86 |
87 | If you have any questions, please contact us at team@uplift-modeling.com
88 |
89 | .. toctree::
90 | :hidden:
91 |
92 | self
93 |
94 | .. toctree::
95 | :maxdepth: 2
96 | :caption: Contents
97 |
98 | install
99 | quick_start
100 | user_guide/index
101 | api/index
102 | tutorials
103 | contributing
104 | changelog
105 | hall_of_fame
106 |
107 |
108 | ===============
109 |
110 | Papers and materials
111 | #####################
112 |
113 | 1. Gutierrez, P., & Gérardy, J. Y.
114 | Causal Inference and Uplift Modelling: A Review of the Literature.
115 | In International Conference on Predictive Applications and APIs (pp. 1-13).
116 |
117 | 2. Artem Betlei, Criteo Research; Eustache Diemert, Criteo Research; Massih-Reza Amini, Univ. Grenoble Alpes
118 | Dependent and Shared Data Representations improve Uplift Prediction in Imbalanced Treatment Conditions
119 | FAIM'18 Workshop on CausalML.
120 |
121 | 3. Eustache Diemert, Artem Betlei, Christophe Renaudin, and Massih-Reza Amini. 2018.
122 | A Large Scale Benchmark for Uplift Modeling.
123 | In Proceedings of AdKDD & TargetAd (ADKDD’18). ACM, New York, NY, USA, 6 pages.
124 |
125 | 4. Athey, Susan, and Imbens, Guido. 2015.
126 | Machine learning methods for estimating heterogeneous causal effects.
127 | Preprint, arXiv:1504.01132. Google Scholar.
128 |
129 | 5. Oscar Mesalles Naranjo. 2012.
130 | Testing a New Metric for Uplift Models.
131 | Dissertation Presented for the Degree of MSc in Statistics and Operational Research.
132 |
133 | 6. Kane, K., V. S. Y. Lo, and J. Zheng. 2014.
134 | Mining for the Truly Responsive Customers and Prospects Using True-Lift Modeling:
135 | Comparison of New and Existing Methods.
136 | Journal of Marketing Analytics 2 (4): 218–238.
137 |
138 | 7. Maciej Jaskowski and Szymon Jaroszewicz.
139 | Uplift modeling for clinical trial data.
140 | ICML Workshop on Clinical Data Analysis, 2012.
141 |
142 | 8. Lo, Victor. 2002.
143 | The True Lift Model - A Novel Data Mining Approach to Response Modeling in Database Marketing.
144 | SIGKDD Explorations. 4. 78-86.
145 |
146 | 9. Zhao, Yan & Fang, Xiao & Simchi-Levi, David. 2017.
147 | Uplift Modeling with Multiple Treatments and General Response Types. 10.1137/1.9781611974973.66.
148 |
149 | 10. Nicholas J Radcliffe. 2007.
150 | Using control groups to target on predicted lift: Building and assessing uplift model.
151 | Direct Marketing Analytics Journal, (3):14–21, 2007.
152 |
153 | 11. Devriendt, F., Guns, T., & Verbeke, W. 2020.
154 | Learning to rank for uplift modeling. ArXiv, abs/2002.05897.
155 |
156 | ===============
157 |
158 | Tags
159 | #####
160 | **EN**: uplift modeling, uplift modelling, causal inference, causal effect, causality, individual treatment effect, true lift, net lift, incremental modeling
161 |
162 | **RU**: аплифт моделирование, Uplift модель
163 |
164 | **ZH**: uplift增量建模, 因果推断, 因果效应, 因果关系, 个体干预因果效应, 真实增量, 净增量, 增量建模
165 |
--------------------------------------------------------------------------------
/notebooks/Readme.rst:
--------------------------------------------------------------------------------
1 | .. _The overview of the basic approaches to solving the Uplift Modeling problem: https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero_EN.ipynb
2 | .. |Open In Colab1| image:: https://colab.research.google.com/assets/colab-badge.svg
3 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero_EN.ipynb
4 | .. |Open In Colab2| image:: https://colab.research.google.com/assets/colab-badge.svg
5 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero.ipynb
6 |
7 | .. _Example of usage model from sklift.models in sklearn.pipeline: https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/pipeline_usage_EN.ipynb
8 | .. |Open In Colab3| image:: https://colab.research.google.com/assets/colab-badge.svg
9 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/pipeline_usage_EN.ipynb
10 | .. |Open In Colab4| image:: https://colab.research.google.com/assets/colab-badge.svg
11 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/pipeline_usage_RU.ipynb
12 |
13 | .. _Example of usage model from sklift.models in sklearn.model_selection: https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/uplift_model_selection_tutorial.ipynb
14 | .. |Open In Colab5| image:: https://colab.research.google.com/assets/colab-badge.svg
15 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/uplift_model_selection_tutorial.ipynb
16 |
17 | .. |Open In Colab6| image:: https://colab.research.google.com/assets/colab-badge.svg
18 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/EDA_Lenta_dataset.ipynb
19 |
20 | .. _EDA of X5 dataset: https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/EDA_x5_dataset.ipynb
21 | .. |Open In Colab7| image:: https://colab.research.google.com/assets/colab-badge.svg
22 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/EDA_x5_dataset.ipynb
23 |
24 | .. _EDA of Criteo dataset: https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/EDA_Criteo_dataset.ipynb
25 | .. |Open In Colab8| image:: https://colab.research.google.com/assets/colab-badge.svg
26 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/EDA_Criteo_dataset.ipynb
27 |
28 | .. _EDA of Hillstrom dataset: https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/EDA_Hillstrom_dataset.ipynb
29 | .. |Open In Colab9| image:: https://colab.research.google.com/assets/colab-badge.svg
30 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/EDA_Hillstrom_dataset.ipynb
31 |
32 | .. _EDA of Megafon dataset: https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/EDA_Megafon_dataset.ipynb
33 | .. |Open In Colab10| image:: https://colab.research.google.com/assets/colab-badge.svg
34 | :target: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/EDA_Megafon_dataset.ipynb
35 |
36 |
37 |
38 | **********
39 | Tutorials
40 | **********
41 |
42 | Basic
43 | ########
44 |
45 | It is better to start scikit-uplift from the basic tutorials.
46 |
47 | `The overview of the basic approaches to solving the Uplift Modeling problem`_
48 | ----------------------------------------------------------------------------------
49 |
50 | .. list-table::
51 | :align: center
52 | :widths: 12 15 10 8
53 |
54 | * - In English 🇬🇧
55 | - |Open In Colab1|
56 | - `nbviewer `__
57 | - `github `__
58 | * - In Russian 🇷🇺
59 | - |Open In Colab2|
60 | - `nbviewer `__
61 | - `github `__
62 |
63 | `Uplift modeling metrics`_
64 | ----------------------------------------------------------------------------------
65 |
66 | .. list-table::
67 | :align: center
68 | :widths: 12 15 10 8
69 |
70 | * - In English 🇬🇧
71 | - |Open In Colab1|
72 | - `nbviewer `__
73 | - `github `__
74 |
75 | `Example of usage model from sklift.models in sklearn.pipeline`_
76 | ----------------------------------------------------------------------------------
77 |
78 | .. list-table::
79 | :align: center
80 | :widths: 12 15 10 8
81 |
82 | * - In English 🇬🇧
83 | - |Open In Colab3|
84 | - `nbviewer `__
85 | - `github `__
86 | * - In Russian 🇷🇺
87 | - |Open In Colab4|
88 | - `nbviewer `__
89 | - `github `__
90 |
91 | `Example of usage model from sklift.models in sklearn.model_selection`_
92 | ----------------------------------------------------------------------------------
93 |
94 | .. list-table::
95 | :align: center
96 | :widths: 12 15 10 8
97 |
98 | * - In English 🇬🇧
99 | - |Open In Colab5|
100 | - `nbviewer `__
101 | - `github `__
102 |
103 | Exploratory data analysis
104 | ############################
105 |
106 | The package contains various public datasets for uplift modeling.
107 | Below you find jupyter notebooks with EDA of these datasets and a simple baseline.
108 |
109 | .. list-table::
110 | :align: center
111 | :widths: 30 12 15 10 8
112 |
113 | * - EDA of :ref:`Lenta dataset `
114 | - In English 🇬🇧
115 | - |Open In Colab6|
116 | - `nbviewer `__
117 | - `github `__
118 | * - EDA of :ref:`X5 dataset `
119 | - In English 🇬🇧
120 | - |Open In Colab7|
121 | - `nbviewer `__
122 | - `github `__
123 | * - EDA of :ref:`Criteo dataset `
124 | - In English 🇬🇧
125 | - |Open In Colab8|
126 | - `nbviewer `__
127 | - `github `__
128 | * - EDA of :ref:`Hillstrom dataset `
129 | - In English 🇬🇧
130 | - |Open In Colab9|
131 | - `nbviewer `__
132 | - `github `__
133 | * - EDA of :ref:`Megafon dataset `
134 | - In English 🇬🇧
135 | - |Open In Colab10|
136 | - `nbviewer `__
137 | - `github `__
138 |
--------------------------------------------------------------------------------
/sklift/tests/test_viz.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from numpy.testing import assert_allclose
5 |
6 | from ..viz import plot_qini_curve, plot_uplift_curve, plot_uplift_preds, plot_uplift_by_percentile, plot_treatment_balance_curve
7 | from ..metrics import qini_curve, perfect_qini_curve, uplift_curve, perfect_uplift_curve
8 | from ..viz import UpliftCurveDisplay
9 |
10 | from sklearn.tree import DecisionTreeClassifier
11 | from ..models import SoloModel
12 |
13 | import matplotlib as mpl
14 |
15 | def make_predictions():
16 | X_train, y_train, treat_train = (np.array([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]]),
17 | np.array([0.0, 0.0, 1.0]), np.array([0.0, 1.0, 1.0]))
18 | X_val, y_val, treat_val = (np.array([[5.1, 3.4, 1.5, 0.2], [5.0, 3.5, 1.3, 0.3], [4.5, 2.3, 1.3, 0.3]]),
19 | np.array([0.0, 1.0, 0.0]), np.array([0.0, 1.0, 1.0]))
20 |
21 | model = DecisionTreeClassifier(random_state=0)
22 |
23 | s_model = SoloModel(model)
24 | s_model = s_model.fit(X_train, y_train, treat_train)
25 | uplift_preds = s_model.predict(X_val)
26 |
27 | return y_val, uplift_preds, treat_val
28 |
29 | @pytest.mark.parametrize("random", [True, False])
30 | @pytest.mark.parametrize("perfect", [True, False])
31 | @pytest.mark.parametrize("negative_effect", [True, False])
32 | def test_plot_qini_curve(random, perfect, negative_effect):
33 | y_true, uplift, treatment = make_predictions()
34 |
35 | viz = plot_qini_curve(y_true, uplift, treatment, random, perfect, negative_effect)
36 |
37 | x_actual, y_actual = qini_curve(y_true, uplift, treatment)
38 |
39 | assert_allclose(viz.x_actual, x_actual)
40 | assert_allclose(viz.y_actual, y_actual)
41 |
42 | if random:
43 | x_baseline, y_baseline = x_actual, x_actual * y_actual[-1] / len(y_true)
44 | assert_allclose(viz.x_baseline, x_baseline)
45 | assert_allclose(viz.y_baseline, y_baseline)
46 |
47 | if perfect:
48 | x_perfect, y_perfect = perfect_qini_curve(
49 | y_true, treatment, negative_effect)
50 |
51 | assert_allclose(viz.x_perfect, x_perfect)
52 | assert_allclose(viz.y_perfect, y_perfect)
53 |
54 | assert isinstance(viz.line_, mpl.lines.Line2D)
55 | assert isinstance(viz.ax_, mpl.axes.Axes)
56 | assert isinstance(viz.figure_, mpl.figure.Figure)
57 |
58 |
59 | @pytest.mark.parametrize(
60 | "qini_auc, estimator_name, expected_label",
61 | [
62 | (0.61, None, "plot_qini_curve = 0.61"),
63 | (0.61, "first", "first (plot_qini_curve = 0.61)"),
64 | (None, "None", "None")
65 | ]
66 | )
67 | def test_default_labels(qini_auc, estimator_name, expected_label):
68 | x_actual = np.array([0, 1, 2, 3, 5, 6])
69 | y_actual = np.array([0.0, 1.0, 2.0, 3.0, 2.5, 1.5])
70 |
71 | disp = UpliftCurveDisplay(
72 | x_actual=x_actual,
73 | y_actual=y_actual,
74 | estimator_name=estimator_name
75 | ).plot(qini_auc, title="plot_qini_curve")
76 |
77 | assert disp.line_.get_label() == expected_label
78 |
79 |
80 | @pytest.mark.parametrize("random", [True, False])
81 | @pytest.mark.parametrize("perfect", [True, False])
82 | def test_plot_uplift_curve(random, perfect):
83 | y_true, uplift, treatment = make_predictions()
84 |
85 | viz = plot_uplift_curve(y_true, uplift, treatment, random, perfect)
86 |
87 | x_actual, y_actual = uplift_curve(y_true, uplift, treatment)
88 |
89 | assert_allclose(viz.x_actual, x_actual)
90 | assert_allclose(viz.y_actual, y_actual)
91 |
92 | if random:
93 | x_baseline, y_baseline = x_actual, x_actual * y_actual[-1] / len(y_true)
94 | assert_allclose(viz.x_baseline, x_baseline)
95 | assert_allclose(viz.y_baseline, y_baseline)
96 |
97 | if perfect:
98 | x_perfect, y_perfect = perfect_uplift_curve(
99 | y_true, treatment)
100 |
101 | assert_allclose(viz.x_perfect, x_perfect)
102 | assert_allclose(viz.y_perfect, y_perfect)
103 |
104 | assert isinstance(viz.line_, mpl.lines.Line2D)
105 | assert isinstance(viz.ax_, mpl.axes.Axes)
106 | assert isinstance(viz.figure_, mpl.figure.Figure)
107 |
108 |
109 | @pytest.mark.parametrize(
110 | "uplift_auc, estimator_name, expected_label",
111 | [
112 | (0.75, None, "plot_uplift_curve = 0.75"),
113 | (0.75, "first", "first (plot_uplift_curve = 0.75)"),
114 | (None, "None", "None")
115 | ]
116 | )
117 | def test_default_labels(uplift_auc, estimator_name, expected_label):
118 | x_actual = np.array([0, 1, 2, 3, 5, 6])
119 | y_actual = np.array([0.0, 1.0, 2.0, 3.0, 2.5, 1.5])
120 |
121 | disp = UpliftCurveDisplay(
122 | x_actual=x_actual,
123 | y_actual=y_actual,
124 | estimator_name=estimator_name
125 | ).plot(uplift_auc, title="plot_uplift_curve")
126 |
127 | assert disp.line_.get_label() == expected_label
128 |
129 |
130 | def test_plot_uplift_preds():
131 | trmnt_preds = np.array([1,1,0,1,1,1])
132 | ctrl_preds = np.array([0,1,0,1,0,1])
133 |
134 | viz = plot_uplift_preds(trmnt_preds, ctrl_preds, log=True, bins=5)
135 |
136 | assert isinstance(viz[0], mpl.axes.Axes)
137 | assert isinstance(viz[1], mpl.axes.Axes)
138 | assert isinstance(viz[2], mpl.axes.Axes)
139 |
140 | with pytest.raises(ValueError):
141 | plot_uplift_preds(trmnt_preds, ctrl_preds, log=True, bins=0)
142 |
143 | def test_plot_uplift_by_percentile():
144 | y_true, uplift, treatment = make_predictions()
145 |
146 | viz = plot_uplift_by_percentile(y_true, uplift, treatment, strategy='overall',kind='line', bins=1, string_percentiles=True)
147 |
148 | assert viz.get_title() == "Uplift by percentile\nweighted average uplift = 0.5000"
149 | assert viz.get_xlabel() == "Percentile"
150 | assert viz.get_ylabel() == "Uplift = treatment response rate - control response rate"
151 | assert isinstance(viz, mpl.axes.Axes)
152 | viz = plot_uplift_by_percentile(y_true, uplift, treatment, strategy='by_group',kind='bar', bins=1, string_percentiles=False)
153 |
154 | assert viz[0].get_title() == "Uplift by percentile\nweighted average uplift = 0.5000"
155 | assert viz[1].get_xlabel() == "Percentile"
156 | assert viz[1].get_title() == "Response rate by percentile"
157 | assert isinstance(viz[0], mpl.axes.Axes)
158 | assert isinstance(viz[1], mpl.axes.Axes)
159 | viz = plot_uplift_by_percentile(y_true, uplift, treatment, strategy='by_group',kind='bar', bins=1, string_percentiles=True)
160 |
161 | assert viz[0].get_title() == "Uplift by percentile\nweighted average uplift = 0.5000"
162 | assert viz[1].get_xlabel() == "Percentile"
163 | assert viz[1].get_title() == "Response rate by percentile"
164 | assert isinstance(viz[0], mpl.axes.Axes)
165 | assert isinstance(viz[1], mpl.axes.Axes)
166 |
167 | viz = plot_uplift_by_percentile(y_true, uplift, treatment, strategy='by_group',kind='line', bins=1, string_percentiles=False)
168 | assert isinstance(viz, mpl.axes.Axes)
169 |
170 |
171 | @pytest.mark.parametrize(
172 | "strategy, kind, bins, string_percentiles",
173 | [
174 | ("new_strategy", "bar", 1, False),
175 | ("by_group", "new_bar", 1, False),
176 | ("by_group", "bar", 0, False),
177 | ("by_group", "bar", 100, False),
178 | ("by_group", "bar", 1, 5)
179 |
180 | ]
181 | )
182 | def test_plot_uplift_by_percentile_errors(strategy, kind, bins, string_percentiles):
183 | y_true, uplift, treatment = make_predictions()
184 | with pytest.raises(ValueError):
185 | viz = plot_uplift_by_percentile(y_true, uplift, treatment, strategy=strategy, kind=kind, bins=bins, string_percentiles=string_percentiles)
186 |
187 |
188 | def test_plot_treatment_balance_curve():
189 | y_true, uplift, treatment = make_predictions()
190 |
191 | viz = plot_treatment_balance_curve(uplift, treatment, winsize=0.5)
192 |
193 | assert viz.get_title() == "Treatment balance curve"
194 | assert viz.get_xlabel() == "Percentage targeted"
195 | assert viz.get_ylabel() == "Balance: treatment / (treatment + control)"
196 | assert isinstance(viz, mpl.axes.Axes)
197 |
198 | def test_plot_treatment_balance_errors():
199 | y_true, uplift, treatment = make_predictions()
200 | with pytest.raises(ValueError):
201 | viz = plot_treatment_balance_curve(uplift, treatment, winsize=5)
--------------------------------------------------------------------------------
/Readme.rst:
--------------------------------------------------------------------------------
1 | .. -*- mode: rst -*-
2 |
3 | |Python3|_ |PyPi|_ |Docs|_ |License|_
4 |
5 | .. |Python3| image:: https://img.shields.io/badge/python-3-blue.svg
6 | .. _Python3: https://badge.fury.io/py/scikit-uplift
7 |
8 | .. |PyPi| image:: https://badge.fury.io/py/scikit-uplift.svg
9 | .. _PyPi: https://badge.fury.io/py/scikit-uplift
10 |
11 | .. |Docs| image:: https://readthedocs.org/projects/scikit-uplift/badge/?version=latest
12 | .. _Docs: https://www.uplift-modeling.com/en/latest/
13 |
14 | .. |License| image:: https://img.shields.io/badge/license-MIT-green
15 | .. _License: https://github.com/maks-sh/scikit-uplift/blob/master/LICENSE
16 |
17 | .. |Open In Colab1| image:: https://colab.research.google.com/assets/colab-badge.svg
18 | .. _Open In Colab1: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero_EN.ipynb
19 |
20 | .. |Open In Colab2| image:: https://colab.research.google.com/assets/colab-badge.svg
21 | .. _Open In Colab2: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero.ipynb
22 |
23 | .. |Open In Colab3| image:: https://colab.research.google.com/assets/colab-badge.svg
24 | .. _Open In Colab3: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/pipeline_usage_EN.ipynb
25 |
26 | .. |Open In Colab4| image:: https://colab.research.google.com/assets/colab-badge.svg
27 | .. _Open In Colab4: https://colab.research.google.com/github/maks-sh/scikit-uplift/blob/master/notebooks/pipeline_usage_RU.ipynb
28 |
29 | .. _uplift-modeling.com: https://www.uplift-modeling.com/en/latest/
30 |
31 | .. image:: https://raw.githubusercontent.com/maks-sh/scikit-uplift/dev/docs/_static/sklift-github-logo.png
32 | :align: center
33 | :alt: scikit-uplift: uplift modeling in scikit-learn style in python
34 |
35 | .. |Contribs| image:: https://contrib.rocks/image?repo=maks-sh/scikit-uplift
36 | :target: https://github.com/maks-sh/scikit-uplift/graphs/contributors
37 | :alt: Contributors
38 |
39 | scikit-uplift
40 | ===============
41 |
42 | **scikit-uplift (sklift)** is an uplift modeling python package that provides fast sklearn-style models implementation, evaluation metrics and visualization tools.
43 |
44 | Uplift modeling estimates a causal effect of treatment and uses it to effectively target customers that are most likely to respond to a marketing campaign.
45 |
46 | **Use cases for uplift modeling:**
47 |
48 | * Target customers in the marketing campaign. Quite useful in promotion of some popular product where there is a big part of customers who make a target action by themself without any influence. By modeling uplift you can find customers who are likely to make the target action (for instance, install an app) only when treated (for instance, received a push).
49 |
50 | * Combine a churn model and an uplift model to offer some bonus to a group of customers who are likely to churn.
51 |
52 | * Select a tiny group of customers in the campaign where a price per customer is high.
53 |
54 | Read more about uplift modeling problem in `User Guide `__.
55 |
56 | Articles in russian on habr.com: `Part 1 `__ ,
57 | `Part 2 `__
58 | and `Part 3 `__.
59 |
60 | Why sklift
61 | -------------
62 |
63 | - Сomfortable and intuitive *scikit-learn*-like API;
64 |
65 | - More uplift metrics than you have ever seen in one place! Include brilliants like *Area Under Uplift Curve* (AUUC) or *Area Under Qini Curve* (Qini coefficient) with ideal cases;
66 |
67 | - Supporting any estimator compatible with scikit-learn (e.g. Xgboost, LightGBM, Catboost, etc.);
68 |
69 | - All approaches can be used in the ``sklearn.pipeline``. See the example of usage on `the Tutorials page `__;
70 |
71 | - Also metrics are compatible with the classes from ``sklearn.model_selection``. See the example of usage on `the Tutorials page `__;
72 |
73 | - Almost all implemented approaches solve classification and regression problems;
74 |
75 | - Nice and useful viz for analysing a performance model.
76 |
77 | Installation
78 | -------------
79 |
80 | **Install** the package by the following command from PyPI:
81 |
82 | .. code-block:: bash
83 |
84 | pip install scikit-uplift
85 |
86 | Or install from source:
87 |
88 | .. code-block:: bash
89 |
90 | git clone https://github.com/maks-sh/scikit-uplift.git
91 | cd scikit-uplift
92 | python setup.py install
93 |
94 | Documentation
95 | --------------
96 |
97 | The full documentation is available at `uplift-modeling.com`_.
98 |
99 | Or you can build the documentation locally using `Sphinx `_ 1.4 or later:
100 |
101 | .. code-block:: bash
102 |
103 | cd docs
104 | pip install -r requirements.txt
105 | make html
106 |
107 | And if you now point your browser to ``_build/html/index.html``, you should see a documentation site.
108 |
109 | Quick Start
110 | -----------
111 |
112 | See the **RetailHero tutorial notebook** (`EN `__ |Open In Colab1|_, `RU `__ |Open In Colab2|_) for details.
113 |
114 | **Train and predict uplift model**
115 |
116 | Use the intuitive python API to train uplift models with `sklift.models `__.
117 |
118 | .. code-block:: python
119 |
120 | # import approaches
121 | from sklift.models import SoloModel, ClassTransformation
122 | # import any estimator adheres to scikit-learn conventions.
123 | from lightgbm import LGBMClassifier
124 |
125 | # define models
126 | estimator = LGBMClassifier(n_estimators=10)
127 |
128 | # define metamodel
129 | slearner = SoloModel(estimator=estimator)
130 |
131 | # fit model
132 | slearner.fit(
133 | X=X_tr,
134 | y=y_tr,
135 | treatment=trmnt_tr,
136 | )
137 |
138 | # predict uplift
139 | uplift_slearner = slearner.predict(X_val)
140 |
141 | **Evaluate your uplift model**
142 |
143 | Uplift model evaluation metrics are available in `sklift.metrics `__.
144 |
145 | .. code-block:: python
146 |
147 | # import metrics to evaluate your model
148 | from sklift.metrics import (
149 | uplift_at_k, uplift_auc_score, qini_auc_score, weighted_average_uplift
150 | )
151 |
152 |
153 | # Uplift@30%
154 | uplift_at_k = uplift_at_k(y_true=y_val, uplift=uplift_slearner,
155 | treatment=trmnt_val,
156 | strategy='overall', k=0.3)
157 |
158 | # Area Under Qini Curve
159 | qini_coef = qini_auc_score(y_true=y_val, uplift=uplift_slearner,
160 | treatment=trmnt_val)
161 |
162 | # Area Under Uplift Curve
163 | uplift_auc = uplift_auc_score(y_true=y_val, uplift=uplift_slearner,
164 | treatment=trmnt_val)
165 |
166 | # Weighted average uplift
167 | wau = weighted_average_uplift(y_true=y_val, uplift=uplift_slearner,
168 | treatment=trmnt_val)
169 |
170 | **Vizualize the results**
171 |
172 | Visualize performance metrics with `sklift.viz `__.
173 |
174 | .. code-block:: python
175 |
176 | from sklift.viz import plot_qini_curve
177 | import matplotlib.pyplot as plt
178 |
179 | fig, ax = plt.subplots(1, 1)
180 | ax.set_title('Qini curves')
181 |
182 | plot_qini_curve(
183 | y_test, uplift_slearner, trmnt_test,
184 | perfect=True, name='Slearner', ax=ax
185 | );
186 |
187 | plot_qini_curve(
188 | y_test, uplift_revert, trmnt_test,
189 | perfect=False, name='Revert label', ax=ax
190 | );
191 |
192 | .. image:: docs/_static/images/quick_start_qini.png
193 | :width: 514px
194 | :height: 400px
195 | :alt: Example of some models qini curves, perfect qini curve and random qini curve
196 |
197 | Development
198 | -----------
199 |
200 | We welcome new contributors of all experience levels.
201 |
202 | - Please see our `Contributing Guide `_ for more details.
203 | - By participating in this project, you agree to abide by its `Code of Conduct `__.
204 |
205 | Thanks to all our contributors!
206 |
207 | |Contribs|
208 |
209 | If you have any questions, please contact us at team@uplift-modeling.com
210 |
211 | Important links
212 | ~~~~~~~~~~~~~~~
213 |
214 | - Official source code repo: https://github.com/maks-sh/scikit-uplift/
215 | - Issue tracker: https://github.com/maks-sh/scikit-uplift/issues
216 | - Documentation: https://www.uplift-modeling.com/en/latest/
217 | - User Guide: https://www.uplift-modeling.com/en/latest/user_guide/index.html
218 | - Contributing guide: https://www.uplift-modeling.com/en/latest/contributing.html
219 | - Release History: https://www.uplift-modeling.com/en/latest/changelog.html
220 |
221 | ===============
222 |
223 | Papers and materials
224 | ---------------------
225 | 1. Gutierrez, P., & Gérardy, J. Y.
226 | Causal Inference and Uplift Modelling: A Review of the Literature.
227 | In International Conference on Predictive Applications and APIs (pp. 1-13).
228 |
229 | 2. Artem Betlei, Criteo Research; Eustache Diemert, Criteo Research; Massih-Reza Amini, Univ. Grenoble Alpes
230 | Dependent and Shared Data Representations improve Uplift Prediction in Imbalanced Treatment Conditions
231 | FAIM'18 Workshop on CausalML.
232 |
233 | 3. Eustache Diemert, Artem Betlei, Christophe Renaudin, and Massih-Reza Amini. 2018.
234 | A Large Scale Benchmark for Uplift Modeling.
235 | In Proceedings of AdKDD & TargetAd (ADKDD’18). ACM, New York, NY, USA, 6 pages.
236 |
237 | 4. Athey, Susan, and Imbens, Guido. 2015.
238 | Machine learning methods for estimating heterogeneous causal effects.
239 | Preprint, arXiv:1504.01132. Google Scholar.
240 |
241 | 5. Oscar Mesalles Naranjo. 2012.
242 | Testing a New Metric for Uplift Models.
243 | Dissertation Presented for the Degree of MSc in Statistics and Operational Research.
244 |
245 | 6. Kane, K., V. S. Y. Lo, and J. Zheng. 2014.
246 | Mining for the Truly Responsive Customers and Prospects Using True-Lift Modeling:
247 | Comparison of New and Existing Methods.
248 | Journal of Marketing Analytics 2 (4): 218–238.
249 |
250 | 7. Maciej Jaskowski and Szymon Jaroszewicz.
251 | Uplift modeling for clinical trial data.
252 | ICML Workshop on Clinical Data Analysis, 2012.
253 |
254 | 8. Lo, Victor. 2002.
255 | The True Lift Model - A Novel Data Mining Approach to Response Modeling in Database Marketing.
256 | SIGKDD Explorations. 4. 78-86.
257 |
258 | 9. Zhao, Yan & Fang, Xiao & Simchi-Levi, David. 2017.
259 | Uplift Modeling with Multiple Treatments and General Response Types. 10.1137/1.9781611974973.66.
260 |
261 | 10. Nicholas J Radcliffe. 2007.
262 | Using control groups to target on predicted lift: Building and assessing uplift model. Direct Marketing Analytics Journal, (3):14–21, 2007.
263 |
264 | 11. Devriendt, F., Guns, T., & Verbeke, W. 2020.
265 | Learning to rank for uplift modeling. ArXiv, abs/2002.05897.
266 |
267 | ===============
268 |
269 | Tags
270 | ~~~~~~~~~~~~~~~
271 | **EN**: uplift modeling, uplift modelling, causal inference, causal effect, causality, individual treatment effect, true lift, net lift, incremental modeling
272 |
273 | **RU**: аплифт моделирование, Uplift модель
274 |
275 | **ZH**: uplift增量建模, 因果推断, 因果效应, 因果关系, 个体干预因果效应, 真实增量, 净增量, 增量建模
276 |
277 |
--------------------------------------------------------------------------------
/notebooks/pipeline_usage_EN.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Example of usage model from sklift.models in sklearn.pipeline\n",
8 | "\n",
9 | "
\n",
10 | "\n",
11 | " \n",
12 | "
\n",
13 | " \n",
14 | "
\n",
15 | " SCIKIT-UPLIFT REPO | \n",
16 | " SCIKIT-UPLIFT DOCS | \n",
17 | " USER GUIDE\n",
18 | "
\n",
19 | " RUSSIAN VERSION\n",
20 | "\n",
21 | ""
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "metadata": {
27 | "ExecuteTime": {
28 | "end_time": "2020-04-26T12:44:35.435852Z",
29 | "start_time": "2020-04-26T12:44:35.239050Z"
30 | }
31 | },
32 | "source": [
33 | "This is a simple example on how to use [sklift.models](https://scikit-uplift.readthedocs.io/en/latest/api/models.html) with [sklearn.pipeline](https://scikit-learn.org/stable/modules/classes.html#module-sklearn.pipeline).\n",
34 | "\n",
35 | "The data is taken from [MineThatData E-Mail Analytics And Data Mining Challenge dataset by Kevin Hillstrom](https://blog.minethatdata.com/2008/03/minethatdata-e-mail-analytics-and-data.html).\n",
36 | "\n",
37 | "This dataset contains 64,000 customers who last purchased within twelve months. The customers were involved in an e-mail test:\n",
38 | "* 1/3 were randomly chosen to receive an e-mail campaign featuring Mens merchandise.\n",
39 | "* 1/3 were randomly chosen to receive an e-mail campaign featuring Womens merchandise.\n",
40 | "* 1/3 were randomly chosen to not receive an e-mail campaign.\n",
41 | "\n",
42 | "During a period of two weeks following the e-mail campaign, results were tracked. The task is to tell the world if the Mens or Womens e-mail campaign was successful.\n",
43 | "\n",
44 | "The full description of the dataset can be found at the [link](https://blog.minethatdata.com/2008/03/minethatdata-e-mail-analytics-and-data.html).\n",
45 | "\n",
46 | "Firstly, install the necessary libraries:"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 1,
52 | "metadata": {
53 | "ExecuteTime": {
54 | "end_time": "2021-02-07T01:01:39.897817Z",
55 | "start_time": "2021-02-07T01:01:39.890409Z"
56 | }
57 | },
58 | "outputs": [],
59 | "source": [
60 | "!pip install scikit-uplift xgboost==1.0.2 category_encoders==2.1.0 -U"
61 | ]
62 | },
63 | {
64 | "cell_type": "markdown",
65 | "metadata": {},
66 | "source": [
67 | "For simplicity of the example, we will leave only two user segments:\n",
68 | "* those who were sent an e-mail advertising campaign with women's products;\n",
69 | "* those who were not sent out the ad campaign.\n",
70 | "\n",
71 | "We will use the `visit` variable as the target variable."
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": 2,
77 | "metadata": {
78 | "ExecuteTime": {
79 | "end_time": "2021-02-07T01:01:42.438253Z",
80 | "start_time": "2021-02-07T01:01:39.901510Z"
81 | },
82 | "scrolled": true
83 | },
84 | "outputs": [
85 | {
86 | "name": "stdout",
87 | "output_type": "stream",
88 | "text": [
89 | "Shape of the dataset before processing: (64000, 8)\n",
90 | "Shape of the dataset after processing: (42693, 8)\n"
91 | ]
92 | },
93 | {
94 | "data": {
95 | "text/html": [
96 | "\n",
97 | "\n",
110 | "
\n",
111 | " \n",
112 | " \n",
113 | " | \n",
114 | " recency | \n",
115 | " history_segment | \n",
116 | " history | \n",
117 | " mens | \n",
118 | " womens | \n",
119 | " zip_code | \n",
120 | " newbie | \n",
121 | " channel | \n",
122 | "
\n",
123 | " \n",
124 | " \n",
125 | " \n",
126 | " | 0 | \n",
127 | " 10 | \n",
128 | " 2) $100 - $200 | \n",
129 | " 142.44 | \n",
130 | " 1 | \n",
131 | " 0 | \n",
132 | " Surburban | \n",
133 | " 0 | \n",
134 | " Phone | \n",
135 | "
\n",
136 | " \n",
137 | " | 1 | \n",
138 | " 6 | \n",
139 | " 3) $200 - $350 | \n",
140 | " 329.08 | \n",
141 | " 1 | \n",
142 | " 1 | \n",
143 | " Rural | \n",
144 | " 1 | \n",
145 | " Web | \n",
146 | "
\n",
147 | " \n",
148 | " | 2 | \n",
149 | " 7 | \n",
150 | " 2) $100 - $200 | \n",
151 | " 180.65 | \n",
152 | " 0 | \n",
153 | " 1 | \n",
154 | " Surburban | \n",
155 | " 1 | \n",
156 | " Web | \n",
157 | "
\n",
158 | " \n",
159 | " | 4 | \n",
160 | " 2 | \n",
161 | " 1) $0 - $100 | \n",
162 | " 45.34 | \n",
163 | " 1 | \n",
164 | " 0 | \n",
165 | " Urban | \n",
166 | " 0 | \n",
167 | " Web | \n",
168 | "
\n",
169 | " \n",
170 | " | 5 | \n",
171 | " 6 | \n",
172 | " 2) $100 - $200 | \n",
173 | " 134.83 | \n",
174 | " 0 | \n",
175 | " 1 | \n",
176 | " Surburban | \n",
177 | " 0 | \n",
178 | " Phone | \n",
179 | "
\n",
180 | " \n",
181 | "
\n",
182 | "
"
183 | ],
184 | "text/plain": [
185 | " recency history_segment history mens womens zip_code newbie channel\n",
186 | "0 10 2) $100 - $200 142.44 1 0 Surburban 0 Phone\n",
187 | "1 6 3) $200 - $350 329.08 1 1 Rural 1 Web\n",
188 | "2 7 2) $100 - $200 180.65 0 1 Surburban 1 Web\n",
189 | "4 2 1) $0 - $100 45.34 1 0 Urban 0 Web\n",
190 | "5 6 2) $100 - $200 134.83 0 1 Surburban 0 Phone"
191 | ]
192 | },
193 | "execution_count": 2,
194 | "metadata": {},
195 | "output_type": "execute_result"
196 | }
197 | ],
198 | "source": [
199 | "import pandas as pd\n",
200 | "from sklift.datasets import fetch_hillstrom\n",
201 | "\n",
202 | "\n",
203 | "%matplotlib inline\n",
204 | "\n",
205 | "bunch = fetch_hillstrom(target_col='visit')\n",
206 | "\n",
207 | "dataset, target, treatment = bunch['data'], bunch['target'], bunch['treatment']\n",
208 | "\n",
209 | "print(f'Shape of the dataset before processing: {dataset.shape}')\n",
210 | "\n",
211 | "# Selecting two segments\n",
212 | "dataset = dataset[treatment!='Mens E-Mail']\n",
213 | "target = target[treatment!='Mens E-Mail']\n",
214 | "treatment = treatment[treatment!='Mens E-Mail'].map({\n",
215 | " 'Womens E-Mail': 1,\n",
216 | " 'No E-Mail': 0\n",
217 | "})\n",
218 | "\n",
219 | "print(f'Shape of the dataset after processing: {dataset.shape}')\n",
220 | "dataset.head()"
221 | ]
222 | },
223 | {
224 | "cell_type": "markdown",
225 | "metadata": {},
226 | "source": [
227 | "Divide all the data into a training and validation sample:"
228 | ]
229 | },
230 | {
231 | "cell_type": "code",
232 | "execution_count": 3,
233 | "metadata": {
234 | "ExecuteTime": {
235 | "end_time": "2021-02-07T01:01:42.579775Z",
236 | "start_time": "2021-02-07T01:01:42.442595Z"
237 | }
238 | },
239 | "outputs": [],
240 | "source": [
241 | "from sklearn.model_selection import train_test_split\n",
242 | "\n",
243 | "\n",
244 | "X_tr, X_val, y_tr, y_val, treat_tr, treat_val = train_test_split(\n",
245 | " dataset, target, treatment, test_size=0.5, random_state=42\n",
246 | ")"
247 | ]
248 | },
249 | {
250 | "cell_type": "markdown",
251 | "metadata": {},
252 | "source": [
253 | "Select categorical features:"
254 | ]
255 | },
256 | {
257 | "cell_type": "code",
258 | "execution_count": 4,
259 | "metadata": {
260 | "ExecuteTime": {
261 | "end_time": "2021-02-07T01:01:42.600915Z",
262 | "start_time": "2021-02-07T01:01:42.585066Z"
263 | }
264 | },
265 | "outputs": [
266 | {
267 | "name": "stdout",
268 | "output_type": "stream",
269 | "text": [
270 | "['history_segment', 'zip_code', 'channel']\n"
271 | ]
272 | }
273 | ],
274 | "source": [
275 | "cat_cols = X_tr.select_dtypes(include='object').columns.tolist()\n",
276 | "print(cat_cols)"
277 | ]
278 | },
279 | {
280 | "cell_type": "markdown",
281 | "metadata": {},
282 | "source": [
283 | "Create the necessary objects and combining them into a pipieline:"
284 | ]
285 | },
286 | {
287 | "cell_type": "code",
288 | "execution_count": 5,
289 | "metadata": {
290 | "ExecuteTime": {
291 | "end_time": "2021-02-07T01:01:42.703537Z",
292 | "start_time": "2021-02-07T01:01:42.603875Z"
293 | }
294 | },
295 | "outputs": [],
296 | "source": [
297 | "from sklearn.pipeline import Pipeline\n",
298 | "from category_encoders import CatBoostEncoder\n",
299 | "from sklift.models import ClassTransformation\n",
300 | "from xgboost import XGBClassifier\n",
301 | "\n",
302 | "\n",
303 | "encoder = CatBoostEncoder(cols=cat_cols)\n",
304 | "estimator = XGBClassifier(max_depth=2, random_state=42)\n",
305 | "ct = ClassTransformation(estimator=estimator)\n",
306 | "\n",
307 | "my_pipeline = Pipeline([\n",
308 | " ('encoder', encoder),\n",
309 | " ('model', ct)\n",
310 | "])"
311 | ]
312 | },
313 | {
314 | "cell_type": "markdown",
315 | "metadata": {},
316 | "source": [
317 | "Train pipeline as usual, but adding the treatment column in the step model as a parameter `model__treatment`."
318 | ]
319 | },
320 | {
321 | "cell_type": "code",
322 | "execution_count": 6,
323 | "metadata": {
324 | "ExecuteTime": {
325 | "end_time": "2021-02-07T01:01:44.020040Z",
326 | "start_time": "2021-02-07T01:01:42.707311Z"
327 | }
328 | },
329 | "outputs": [
330 | {
331 | "name": "stderr",
332 | "output_type": "stream",
333 | "text": [
334 | "/Users/Maksim/Library/Python/3.6/lib/python/site-packages/sklearn/pipeline.py:354: UserWarning: It is recommended to use this approach on treatment balanced data. Current sample size is unbalanced.\n",
335 | " self._final_estimator.fit(Xt, y, **fit_params)\n"
336 | ]
337 | }
338 | ],
339 | "source": [
340 | "my_pipeline = my_pipeline.fit(\n",
341 | " X=X_tr,\n",
342 | " y=y_tr,\n",
343 | " model__treatment=treat_tr\n",
344 | ")"
345 | ]
346 | },
347 | {
348 | "cell_type": "markdown",
349 | "metadata": {
350 | "ExecuteTime": {
351 | "end_time": "2020-04-26T18:07:44.970856Z",
352 | "start_time": "2020-04-26T18:07:44.964624Z"
353 | }
354 | },
355 | "source": [
356 | "Predict the uplift and calculate the uplift@30%"
357 | ]
358 | },
359 | {
360 | "cell_type": "code",
361 | "execution_count": 7,
362 | "metadata": {
363 | "ExecuteTime": {
364 | "end_time": "2021-02-07T01:01:44.184968Z",
365 | "start_time": "2021-02-07T01:01:44.047865Z"
366 | }
367 | },
368 | "outputs": [
369 | {
370 | "name": "stdout",
371 | "output_type": "stream",
372 | "text": [
373 | "uplift@30%: 0.0661\n"
374 | ]
375 | }
376 | ],
377 | "source": [
378 | "from sklift.metrics import uplift_at_k\n",
379 | "\n",
380 | "\n",
381 | "uplift_predictions = my_pipeline.predict(X_val)\n",
382 | "\n",
383 | "uplift_30 = uplift_at_k(y_val, uplift_predictions, treat_val, strategy='overall')\n",
384 | "print(f'uplift@30%: {uplift_30:.4f}')"
385 | ]
386 | }
387 | ],
388 | "metadata": {
389 | "kernelspec": {
390 | "display_name": "python 3",
391 | "language": "python",
392 | "name": "python3"
393 | },
394 | "language_info": {
395 | "codemirror_mode": {
396 | "name": "ipython",
397 | "version": 3
398 | },
399 | "file_extension": ".py",
400 | "mimetype": "text/x-python",
401 | "name": "python",
402 | "nbconvert_exporter": "python",
403 | "pygments_lexer": "ipython3",
404 | "version": "3.6.1"
405 | }
406 | },
407 | "nbformat": 4,
408 | "nbformat_minor": 2
409 | }
410 |
--------------------------------------------------------------------------------
/notebooks/pipeline_usage_RU.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Пример использование подходов из sklift.models в sklearn.pipeline\n",
8 | "\n",
9 | "
\n",
10 | "\n",
11 | " \n",
12 | "
\n",
13 | " \n",
14 | "
\n",
15 | " SCIKIT-UPLIFT REPO | \n",
16 | " SCIKIT-UPLIFT DOCS | \n",
17 | " USER GUIDE\n",
18 | "
\n",
19 | " ENGLISH VERSION\n",
20 | "\n",
21 | ""
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "metadata": {},
27 | "source": [
28 | "В данном ноутбуке рассмотрим простой пример применения одного из подходов прогнозирования uplift в [sklearn.pipeline](https://scikit-learn.org/stable/modules/classes.html#module-sklearn.pipeline).\n",
29 | "\n",
30 | "Данные для примера взяты из [MineThatData E-Mail Analytics And Data Mining Challenge dataset by Kevin Hillstrom](https://blog.minethatdata.com/2008/03/minethatdata-e-mail-analytics-and-data.html). Этот набор данных содержит 64 000 клиентов, которые в последний раз совершали покупки в течение двенадцати месяцев. Среди клиентов была проведена рекламная кампания с помощью email рассылки:\n",
31 | "\n",
32 | "* 1/3 клиентов были выбраны случайным образом для получения электронного письма, рекламирующего мужскую продукцию;\n",
33 | "* 1/3 клиентов были выбраны случайным образом для получения электронного письма, рекламирующего женскую продукцию;\n",
34 | "* С оставшейся 1/3 коммуникацию не проводили.\n",
35 | "\n",
36 | "Для каждого клиента из выборки замерили факт перехода по ссылке в письме, факт совершения покупки и сумму трат за две недели, следущими после получения письма.\n",
37 | "\n",
38 | "Полное описание датасета можной найти по [ссылке](https://blog.minethatdata.com/2008/03/minethatdata-e-mail-analytics-and-data.html).\n",
39 | "\n",
40 | "Установим необходимые библиотеки:"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": 1,
46 | "metadata": {
47 | "ExecuteTime": {
48 | "end_time": "2021-02-07T01:01:58.302718Z",
49 | "start_time": "2021-02-07T01:01:58.298524Z"
50 | }
51 | },
52 | "outputs": [],
53 | "source": [
54 | "# pip install scikit-uplift xgboost==1.0.2 category_encoders==2.1.0 -U"
55 | ]
56 | },
57 | {
58 | "cell_type": "markdown",
59 | "metadata": {},
60 | "source": [
61 | "Для простоты примера оставим только два сегмента пользователей:\n",
62 | "* тем, кому рассылалась по электронной почте рекламная кампания с участием женских товаров;\n",
63 | "* тем, кому не рассылалась рекламная кампания.\n",
64 | "\n",
65 | "В качестве целевой переменной будем использовать переменную `visit`."
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": 2,
71 | "metadata": {
72 | "ExecuteTime": {
73 | "end_time": "2021-02-07T01:01:59.884250Z",
74 | "start_time": "2021-02-07T01:01:58.315398Z"
75 | }
76 | },
77 | "outputs": [
78 | {
79 | "name": "stdout",
80 | "output_type": "stream",
81 | "text": [
82 | "Размер датасета до обработки: (64000, 8)\n",
83 | "Размер датасета после обработки: (42693, 8)\n"
84 | ]
85 | },
86 | {
87 | "data": {
88 | "text/html": [
89 | "\n",
90 | "\n",
103 | "
\n",
104 | " \n",
105 | " \n",
106 | " | \n",
107 | " recency | \n",
108 | " history_segment | \n",
109 | " history | \n",
110 | " mens | \n",
111 | " womens | \n",
112 | " zip_code | \n",
113 | " newbie | \n",
114 | " channel | \n",
115 | "
\n",
116 | " \n",
117 | " \n",
118 | " \n",
119 | " | 0 | \n",
120 | " 10 | \n",
121 | " 2) $100 - $200 | \n",
122 | " 142.44 | \n",
123 | " 1 | \n",
124 | " 0 | \n",
125 | " Surburban | \n",
126 | " 0 | \n",
127 | " Phone | \n",
128 | "
\n",
129 | " \n",
130 | " | 1 | \n",
131 | " 6 | \n",
132 | " 3) $200 - $350 | \n",
133 | " 329.08 | \n",
134 | " 1 | \n",
135 | " 1 | \n",
136 | " Rural | \n",
137 | " 1 | \n",
138 | " Web | \n",
139 | "
\n",
140 | " \n",
141 | " | 2 | \n",
142 | " 7 | \n",
143 | " 2) $100 - $200 | \n",
144 | " 180.65 | \n",
145 | " 0 | \n",
146 | " 1 | \n",
147 | " Surburban | \n",
148 | " 1 | \n",
149 | " Web | \n",
150 | "
\n",
151 | " \n",
152 | " | 4 | \n",
153 | " 2 | \n",
154 | " 1) $0 - $100 | \n",
155 | " 45.34 | \n",
156 | " 1 | \n",
157 | " 0 | \n",
158 | " Urban | \n",
159 | " 0 | \n",
160 | " Web | \n",
161 | "
\n",
162 | " \n",
163 | " | 5 | \n",
164 | " 6 | \n",
165 | " 2) $100 - $200 | \n",
166 | " 134.83 | \n",
167 | " 0 | \n",
168 | " 1 | \n",
169 | " Surburban | \n",
170 | " 0 | \n",
171 | " Phone | \n",
172 | "
\n",
173 | " \n",
174 | "
\n",
175 | "
"
176 | ],
177 | "text/plain": [
178 | " recency history_segment history mens womens zip_code newbie channel\n",
179 | "0 10 2) $100 - $200 142.44 1 0 Surburban 0 Phone\n",
180 | "1 6 3) $200 - $350 329.08 1 1 Rural 1 Web\n",
181 | "2 7 2) $100 - $200 180.65 0 1 Surburban 1 Web\n",
182 | "4 2 1) $0 - $100 45.34 1 0 Urban 0 Web\n",
183 | "5 6 2) $100 - $200 134.83 0 1 Surburban 0 Phone"
184 | ]
185 | },
186 | "execution_count": 2,
187 | "metadata": {},
188 | "output_type": "execute_result"
189 | }
190 | ],
191 | "source": [
192 | "import pandas as pd\n",
193 | "from sklift.datasets import fetch_hillstrom\n",
194 | "\n",
195 | "\n",
196 | "%matplotlib inline\n",
197 | "\n",
198 | "bunch = fetch_hillstrom(target_col='visit')\n",
199 | "\n",
200 | "dataset, target, treatment = bunch['data'], bunch['target'], bunch['treatment']\n",
201 | "\n",
202 | "print(f'Размер датасета до обработки: {dataset.shape}')\n",
203 | "\n",
204 | "# Selecting two segments\n",
205 | "dataset = dataset[treatment!='Mens E-Mail']\n",
206 | "target = target[treatment!='Mens E-Mail']\n",
207 | "treatment = treatment[treatment!='Mens E-Mail'].map({\n",
208 | " 'Womens E-Mail': 1,\n",
209 | " 'No E-Mail': 0\n",
210 | "})\n",
211 | "\n",
212 | "print(f'Размер датасета после обработки: {dataset.shape}')\n",
213 | "dataset.head()"
214 | ]
215 | },
216 | {
217 | "cell_type": "markdown",
218 | "metadata": {},
219 | "source": [
220 | "Разобъем все данные на обучающую и валидационную выборку:"
221 | ]
222 | },
223 | {
224 | "cell_type": "code",
225 | "execution_count": 3,
226 | "metadata": {
227 | "ExecuteTime": {
228 | "end_time": "2021-02-07T01:01:59.976727Z",
229 | "start_time": "2021-02-07T01:01:59.889576Z"
230 | }
231 | },
232 | "outputs": [],
233 | "source": [
234 | "from sklearn.model_selection import train_test_split\n",
235 | "\n",
236 | "\n",
237 | "X_tr, X_val, y_tr, y_val, treat_tr, treat_val = train_test_split(\n",
238 | " dataset, target, treatment, test_size=0.5, random_state=42\n",
239 | ")"
240 | ]
241 | },
242 | {
243 | "cell_type": "markdown",
244 | "metadata": {},
245 | "source": [
246 | "Select categorical features:"
247 | ]
248 | },
249 | {
250 | "cell_type": "code",
251 | "execution_count": 4,
252 | "metadata": {
253 | "ExecuteTime": {
254 | "end_time": "2021-02-07T01:02:00.003357Z",
255 | "start_time": "2021-02-07T01:01:59.983254Z"
256 | }
257 | },
258 | "outputs": [
259 | {
260 | "name": "stdout",
261 | "output_type": "stream",
262 | "text": [
263 | "['history_segment', 'zip_code', 'channel']\n"
264 | ]
265 | }
266 | ],
267 | "source": [
268 | "cat_cols = X_tr.select_dtypes(include='object').columns.tolist()\n",
269 | "print(cat_cols)"
270 | ]
271 | },
272 | {
273 | "cell_type": "markdown",
274 | "metadata": {},
275 | "source": [
276 | "Создадим нужные объекты и объединим их в pipieline."
277 | ]
278 | },
279 | {
280 | "cell_type": "code",
281 | "execution_count": 5,
282 | "metadata": {
283 | "ExecuteTime": {
284 | "end_time": "2021-02-07T01:02:00.079199Z",
285 | "start_time": "2021-02-07T01:02:00.009314Z"
286 | }
287 | },
288 | "outputs": [],
289 | "source": [
290 | "from sklearn.pipeline import Pipeline\n",
291 | "from category_encoders import CatBoostEncoder\n",
292 | "from sklift.models import ClassTransformation\n",
293 | "from xgboost import XGBClassifier\n",
294 | "\n",
295 | "\n",
296 | "encoder = CatBoostEncoder(cols=cat_cols)\n",
297 | "estimator = XGBClassifier(max_depth=2, random_state=42)\n",
298 | "ct = ClassTransformation(estimator=estimator)\n",
299 | "\n",
300 | "my_pipeline = Pipeline([\n",
301 | " ('encoder', encoder),\n",
302 | " ('model', ct)\n",
303 | "])"
304 | ]
305 | },
306 | {
307 | "cell_type": "markdown",
308 | "metadata": {
309 | "ExecuteTime": {
310 | "end_time": "2020-04-26T18:02:52.236917Z",
311 | "start_time": "2020-04-26T18:02:52.110138Z"
312 | }
313 | },
314 | "source": [
315 | "Обучать pipeline будем как обычно, но колонку treatment добавим как параметр шага model: `model__treatment`."
316 | ]
317 | },
318 | {
319 | "cell_type": "code",
320 | "execution_count": 6,
321 | "metadata": {
322 | "ExecuteTime": {
323 | "end_time": "2021-02-07T01:02:01.332880Z",
324 | "start_time": "2021-02-07T01:02:00.085047Z"
325 | }
326 | },
327 | "outputs": [
328 | {
329 | "name": "stderr",
330 | "output_type": "stream",
331 | "text": [
332 | "/Users/Maksim/Library/Python/3.6/lib/python/site-packages/sklearn/pipeline.py:354: UserWarning: It is recommended to use this approach on treatment balanced data. Current sample size is unbalanced.\n",
333 | " self._final_estimator.fit(Xt, y, **fit_params)\n"
334 | ]
335 | }
336 | ],
337 | "source": [
338 | "my_pipeline = my_pipeline.fit(\n",
339 | " X=X_tr,\n",
340 | " y=y_tr,\n",
341 | " model__treatment=treat_tr\n",
342 | ")"
343 | ]
344 | },
345 | {
346 | "cell_type": "markdown",
347 | "metadata": {},
348 | "source": [
349 | "Предскажем uplift и посчитаем uplift@30%"
350 | ]
351 | },
352 | {
353 | "cell_type": "code",
354 | "execution_count": 7,
355 | "metadata": {
356 | "ExecuteTime": {
357 | "end_time": "2021-02-07T01:02:01.476617Z",
358 | "start_time": "2021-02-07T01:02:01.335371Z"
359 | }
360 | },
361 | "outputs": [
362 | {
363 | "name": "stdout",
364 | "output_type": "stream",
365 | "text": [
366 | "uplift@30%: 0.0661\n"
367 | ]
368 | }
369 | ],
370 | "source": [
371 | "from sklift.metrics import uplift_at_k\n",
372 | "\n",
373 | "\n",
374 | "uplift_predictions = my_pipeline.predict(X_val)\n",
375 | "\n",
376 | "uplift_30 = uplift_at_k(y_val, uplift_predictions, treat_val, strategy='overall')\n",
377 | "print(f'uplift@30%: {uplift_30:.4f}')"
378 | ]
379 | }
380 | ],
381 | "metadata": {
382 | "kernelspec": {
383 | "display_name": "python 3",
384 | "language": "python",
385 | "name": "python3"
386 | },
387 | "language_info": {
388 | "codemirror_mode": {
389 | "name": "ipython",
390 | "version": 3
391 | },
392 | "file_extension": ".py",
393 | "mimetype": "text/x-python",
394 | "name": "python",
395 | "nbconvert_exporter": "python",
396 | "pygments_lexer": "ipython3",
397 | "version": "3.6.1"
398 | },
399 | "pycharm": {
400 | "stem_cell": {
401 | "cell_type": "raw",
402 | "source": [],
403 | "metadata": {
404 | "collapsed": false
405 | }
406 | }
407 | }
408 | },
409 | "nbformat": 4,
410 | "nbformat_minor": 2
411 | }
--------------------------------------------------------------------------------
/docs/changelog.md:
--------------------------------------------------------------------------------
1 | # Release History
2 |
3 | ## Legend for changelogs
4 |
5 | * 🔥 something big that you couldn’t do before.
6 | * 💥 something that you couldn’t do before.
7 | * 📝 a miscellaneous minor improvement.
8 | * 🔨 something that previously didn’t work as documented – or according to reasonable expectations – should now work.
9 | * ❗️ you will need to change your code to have the same effect in the future; or a feature will be removed in the future.
10 |
11 | ## Version 0.5.1
12 |
13 | ### [sklift.models](https://www.uplift-modeling.com/en/v0.5.1/api/models/index.html)
14 |
15 | * 📝 Add docs page for [ClassTransformationReg](https://www.uplift-modeling.com/en/v0.5.1/api/models/ClassTransformationReg.html) model.
16 |
17 | ### [sklift.datasets](https://www.uplift-modeling.com/en/v0.5.1/api/datasets/index.html)
18 |
19 | * 🔨 Fix bug in [fetch_x5](https://www.uplift-modeling.com/en/v0.5.1/api/datasets/fetch_x5.html) func.
20 |
21 | ### [User Guide](https://www.uplift-modeling.com/en/v0.5.1/user_guide/index.html)
22 |
23 | * 📝 Add page for [Transformed Outcome](https://www.uplift-modeling.com/en/v0.5.1/user_guide/models/transformed_outcome.html) approach.
24 |
25 |
26 | ## Version 0.5.0
27 |
28 | ### [sklift.models](https://www.uplift-modeling.com/en/v0.5.0/api/models/index.html)
29 |
30 | * 🔥 Add ClassTransformationReg model by [@mcullan](https://github.com/mcullan) and [@ElisovaIra](https://github.com/ElisovaIra).
31 | * 🔨 Add the ability to process a series with different indexes in the [TwoModels](https://www.uplift-modeling.com/en/v0.5.0/api/models.html#sklift.models.models.TwoModels) by [@flashlight101](https://github.com/flashlight101).
32 |
33 | ### [sklift.metrics](https://www.uplift-modeling.com/en/v0.5.0/api/index/metrics.html)
34 |
35 | * 🔥 Add new metric [Maximum profit uplift measure](https://www.uplift-modeling.com/en/v0.5.0/api/metrics/max_prof_uplift.html) by [@rooti123](https://github.com/rooti123).
36 |
37 | ### [sklift.datasets](https://www.uplift-modeling.com/en/v0.5.0/api/datasets/index.html)
38 |
39 | * 💥 Add checker based on hash for all datasets by [@flashlight101](https://github.com/flashlight101)
40 | * 📝 Add [scheme](https://www.uplift-modeling.com/en/v0.5.0/api/datasets/fetch_x5.html) of x5 dataframes.
41 |
42 | ### Miscellaneous
43 | * 📝 Improve Chinese tags by [@00helloworld](https://github.com/00helloworld)
44 |
45 | ## Version 0.4.1
46 |
47 | ### [sklift.datasets](https://www.uplift-modeling.com/en/v0.4.1/api/datasets/index.html)
48 |
49 | * 🔨 Fix bug in dataset links.
50 | * 📝 Add about a company section
51 |
52 | ## Version 0.4.0
53 |
54 | ### [sklift.metrics](https://www.uplift-modeling.com/en/v0.4.0/api/index/metrics.html)
55 |
56 | * 🔥 Add [make_uplift_scorer](https://www.uplift-modeling.com/en/v0.4.0/api/metrics/make_uplift_scorer.html) function for interacting with the module ``sklearn.model_selection`` by [@wrapper228](https://github.com/wrapper228).
57 | * 🔥 Add new metric [average_squared_deviation](https://www.uplift-modeling.com/en/v0.4.0/api/metrics/average_squared_deviation.html) function by [@Mogby](https://github.com/Mogby).
58 |
59 | ### [sklift.viz](https://www.uplift-modeling.com/en/v0.4.0/api/viz/index.html)
60 |
61 | * 🔥 Added the ability to draw multiple plot on the same graph of [plot_uplift_curve](https://www.uplift-modeling.com/en/v0.4.0/api/viz/plot_uplift_curve.html) function and [plot_qini_curve](https://www.uplift-modeling.com/en/v0.4.0/api/viz/plot_qini_curve.html) function by [@flashlight101](https://github.com/flashlight101).
62 |
63 | ### [sklift.datasets](https://www.uplift-modeling.com/en/v0.4.0/api/datasets/index.html)
64 |
65 | * 💥 Add new dataset [fetch_megafon](https://www.uplift-modeling.com/en/v0.4.0/api/datasets/fetch_megafon.html) function by [@ezhdi](https://github.com/ezhdi).
66 | * 📝 Improve documentation of [sklift.datasets](https://www.uplift-modeling.com/en/v0.4.0/api/datasets/index.html) by [@flashlight101](https://github.com/flashlight101) and [@ezhdi](https://github.com/ezhdi).
67 |
68 |
69 | ### Miscellaneous
70 |
71 | * 💥 Add new tutorial [Example of usage model from sklift.models in sklearn.model_selection](https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/uplift_model_selection_tutorial.ipynb) by [@wrapper228](https://github.com/wrapper228).
72 | * 💥 Increased test coverage from 30% to 82% by [@flashlight101](https://github.com/flashlight101) and [@Ksyula](https://github.com/Ksyula)
73 | * 📝 Add EDA of available datasets on [Tutorials](https://www.uplift-modeling.com/en/v0.4.0/tutorials.html) page by [@lyutov89](https://github.com/lyutov89), [@ezhdi](https://github.com/ezhdi), [@patpanda94](https://github.com/patpanda94) and [@Ksyula](https://github.com/Ksyula).
74 | * 📝 Imporve ["RetailHero tutorial"](https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero_EN.ipynb) by [@Ksyula](https://github.com/Ksyula).
75 |
76 | ## Version 0.3.2
77 |
78 | ### [sklift.datasets](https://www.uplift-modeling.com/en/v0.3.2/api/datasets/index.html)
79 |
80 | * 🔨 Fix bug in [fetch_x5](https://www.uplift-modeling.com/en/v0.3.2/api/datasets/fetch_x5.html) function by [@Muhamob](https://github.com/Muhamob).
81 |
82 | ### [sklift.metrics](https://www.uplift-modeling.com/en/v0.3.2/api/index/metrics.html)
83 |
84 | * 📝 Fix docstring in [uplift_by_percentile](https://www.uplift-modeling.com/en/v0.3.2/api/metrics/uplift_by_percentile.html) function by [@ElisovaIra](https://github.com/ElisovaIra).
85 |
86 | ### [sklift.viz](https://www.uplift-modeling.com/en/v0.3.2/api/viz/index.html)
87 |
88 | * 🔨 Fix bug in [plot_uplift_preds](https://www.uplift-modeling.com/en/v0.3.2/api/viz/plot_uplift_preds.html) function by [@bwbelljr](https://github.com/bwbelljr).
89 |
90 | ### Miscellaneous
91 |
92 | * 📝 Change some images in ["RetailHero tutorial"](https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero_EN.ipynb).
93 |
94 | ## Version 0.3.1
95 |
96 | ### [sklift.datasets](https://www.uplift-modeling.com/en/v0.3.1/api/datasets/index.html)
97 |
98 | * 🔨 Fix bugs in [sklift.datasets](https://www.uplift-modeling.com/en/v0.3.1/api/datasets/index.html)
99 |
100 | ### [sklift.metrics](https://www.uplift-modeling.com/en/v0.3.1/api/index/metrics.html)
101 |
102 | * 📝 Imporve [uplift_by_percentile](https://www.uplift-modeling.com/en/v0.3.1/api/metrics/uplift_by_percentile.html) function by [@ElisovaIra](https://github.com/ElisovaIra).
103 |
104 | ### Miscellaneous
105 |
106 | * 💥 Add tutorial ["Uplift modeling metrics"](https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/uplift_metrics_tutorial.ipynb) by [@ElisovaIra](https://github.com/ElisovaIra).
107 |
108 | ## Version 0.3.0
109 |
110 | ### [sklift.datasets](https://www.uplift-modeling.com/en/v0.3.0/api/datasets/index.html)
111 |
112 | * 🔥 Add [sklift.datasets](https://www.uplift-modeling.com/en/v0.3.0/api/datasets/index.html) by [@ElisovaIra](https://github.com/ElisovaIra), [@RobbStarkk](https://github.com/RobbStarkk), [@acssar](https://github.com/acssar), [@tankudo](https://github.com/tankudo), [@flashlight101](https://github.com/flashlight101), [@semenova-pd](https://github.com/semenova-pd), [@timfex](https://github.com/timfex)
113 |
114 | ### [sklift.models](https://www.uplift-modeling.com/en/v0.3.0/api/models/index.html)
115 |
116 | * 📝 Add different checkers by [@ElisovaIra](https://github.com/ElisovaIra)
117 |
118 | ### [sklift.metrics](https://www.uplift-modeling.com/en/v0.3.0/api/metrics/index.html)
119 |
120 | * 📝 Add different checkers by [@ElisovaIra](https://github.com/ElisovaIra)
121 |
122 | ### [sklift.viz](https://www.uplift-modeling.com/en/v0.3.0/api/viz/index.html)
123 |
124 | * 📝 Fix conflicting and duplicating default values by [@denniskorablev](https://github.com/denniskorablev)
125 |
126 | ### [User Guide](https://www.uplift-modeling.com/en/v0.3.0/user_guide/index.html)
127 |
128 | * 📝 Fix typos
129 |
130 | ## Version 0.2.0
131 |
132 | ### [User Guide](https://www.uplift-modeling.com/en/v0.2.0/user_guide/index.html)
133 |
134 | * 🔥 Add [User Guide](https://www.uplift-modeling.com/en/v0.2.0/user_guide/index.html)
135 |
136 | ### [sklift.models](https://www.uplift-modeling.com/en/v0.2.0/api/models/index.html)
137 |
138 | * 💥 Add `treatment interaction` method to [SoloModel](https://www.uplift-modeling.com/en/v0.2.0/api/models/SoloModel.html) approach by [@AdiVarma27](https://github.com/AdiVarma27).
139 |
140 | ### [sklift.metrics](https://www.uplift-modeling.com/en/v0.2.0/api/index/metrics.html)
141 |
142 | * 💥 Add [uplift_by_percentile](https://www.uplift-modeling.com/en/v0.2.0/api/metrics/uplift_by_percentile.html) function by [@ElisovaIra](https://github.com/ElisovaIra).
143 | * 💥 Add [weighted_average_uplift](https://www.uplift-modeling.com/en/v0.2.0/api/metrics/weighted_average_uplift.html) function by [@ElisovaIra](https://github.com/ElisovaIra).
144 | * 💥 Add [perfect_uplift_curve](https://www.uplift-modeling.com/en/v0.2.0/api/metrics/perfect_uplift_curve.html) function.
145 | * 💥 Add [perfect_qini_curve](https://www.uplift-modeling.com/en/v0.2.0/api/metrics/perfect_qini_curve.html) function.
146 | * 🔨 Add normalization in [uplift_auc_score](https://www.uplift-modeling.com/en/v0.2.0/api/metrics/uplift_auc_score.html) and [qini_auc_score](https://www.uplift-modeling.com/en/v0.2.0/api/metrics/qini_auc_score.html) functions.
147 | * ❗ Remove metrics `auuc` and `auqc`. In exchange for them use respectively [uplift_auc_score](https://www.uplift-modeling.com/en/v0.2.0/api/metrics/uplift_auc_score.html) and [qini_auc_score](https://www.uplift-modeling.com/en/v0.2.0/api/metrics/qini_auc_score.html)
148 |
149 | ### [sklift.viz](https://www.uplift-modeling.com/en/v0.2.0/api/viz/index.html)
150 |
151 | * 💥 Add [plot_uplift_curve](https://www.uplift-modeling.com/en/v0.2.0/api/viz/plot_uplift_curve.html) function.
152 | * 💥 Add [plot_qini_curve](https://www.uplift-modeling.com/en/v0.2.0/api/viz/plot_qini_curve.html) function.
153 | * ❗ Remove `plot_uplift_qini_curves`.
154 |
155 | ### Miscellaneous
156 |
157 | * 💥 Add contributors in main Readme and in main page of docs.
158 | * 💥 Add [contributing guide](https://www.uplift-modeling.com/en/v0.2.0/contributing.html).
159 | * 💥 Add [code of conduct](https://github.com/maks-sh/scikit-uplift/blob/master/.github/CODE_OF_CONDUCT.md).
160 | * 📝 Reformat [Tutorials](https://www.uplift-modeling.com/en/v0.2.0/tutorials.html) page.
161 | * 📝 Add github buttons in docs.
162 | * 📝 Add logo compatibility with pypi.
163 |
164 | ## Version 0.1.2
165 |
166 | ### [sklift.models](https://www.uplift-modeling.com/en/v0.1.2/api/models.html)
167 |
168 | * 🔨 Fix bugs in [TwoModels](https://www.uplift-modeling.com/en/v0.1.2/api/models.html#sklift.models.models.TwoModels) for regression problem.
169 | * 📝 Minor code refactoring.
170 |
171 | ### [sklift.metrics](https://www.uplift-modeling.com/en/v0.1.2/api/metrics.html)
172 |
173 | * 📝 Minor code refactoring.
174 |
175 | ### [sklift.viz](https://www.uplift-modeling.com/en/v0.1.2/api/viz.html)
176 |
177 | * 💥 Add bar plot in [plot_uplift_by_percentile](https://www.uplift-modeling.com/en/v0.1.2/api/viz.html#sklift.viz.base.plot_uplift_by_percentile) by [@ElisovaIra](https://github.com/ElisovaIra).
178 | * 🔨 Fix bug in [plot_uplift_by_percentile](https://www.uplift-modeling.com/en/v0.1.2/api/viz.html#sklift.viz.base.plot_uplift_by_percentile).
179 | * 📝 Minor code refactoring.
180 |
181 | ## Version 0.1.1
182 |
183 | ### [sklift.viz](https://www.uplift-modeling.com/en/v0.1.1/api/viz.html)
184 |
185 | * 💥 Add [plot_uplift_by_percentile](https://www.uplift-modeling.com/en/v0.1.1/api/viz.html#sklift.viz.base.plot_uplift_by_percentile) by [@ElisovaIra](https://github.com/ElisovaIra).
186 | * 🔨 Fix bug with import [plot_treatment_balance_curve](https://www.uplift-modeling.com/en/v0.1.1/api/viz.html#sklift.viz.base.plot_treatment_balance_curve).
187 |
188 | ### [sklift.metrics](https://www.uplift-modeling.com/en/v0.1.1/api/metrics.html)
189 |
190 | * 💥 Add [response_rate_by_percentile](https://www.uplift-modeling.com/en/v0.1.1/api/viz.html#sklift.metrics.metrics.response_rate_by_percentile) by [@ElisovaIra](https://github.com/ElisovaIra).
191 | * 🔨 Fix bug with import [uplift_auc_score](https://www.uplift-modeling.com/en/v0.1.1/api/metrics.html#sklift.metrics.metrics.uplift_auc_score) and [qini_auc_score](https://www.uplift-modeling.com/en/v0.1.1/metrics.html#sklift.metrics.metrics.qini_auc_score).
192 | * 📝 Fix typos in docstrings.
193 |
194 | ### Miscellaneous
195 |
196 | * 💥 Add tutorial ["Example of usage model from sklift.models in sklearn.pipeline"](https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/pipeline_usage_EN.ipynb).
197 | * 📝 Add link to Release History in main Readme.md.
198 |
199 | ## Version 0.1.0
200 |
201 | ### [sklift.models](https://www.uplift-modeling.com/en/v0.1.0/api/models.html)
202 |
203 | * 📝 Fix typo in [TwoModels](https://www.uplift-modeling.com/en/v0.1.0/api/models.html#sklift.models.models.TwoModels) docstring by [@spiaz](https://github.com/spiaz).
204 | * 📝 Improve docstrings and add references to all approaches.
205 |
206 | ### [sklift.metrics](https://www.uplift-modeling.com/en/v0.1.0/api/metrics.html)
207 |
208 | * 💥 Add [treatment_balance_curve](https://www.uplift-modeling.com/en/v0.1.0/api/metrics.html#sklift.metrics.metrics.treatment_balance_curve) by [@spiaz](https://github.com/spiaz).
209 | * ❗️ The metrics `auuc` and `auqc` are now respectively renamed to [uplift_auc_score](https://www.uplift-modeling.com/en/v0.1.0/api/metrics.html#sklift.metrics.metrics.uplift_auc_score) and [qini_auc_score](https://www.uplift-modeling.com/en/v0.1.0/metrics.html#sklift.metrics.metrics.qini_auc_score). So, `auuc` and `auqc` will be removed in 0.2.0.
210 | * ❗️ Add a new parameter `startegy` in [uplift_at_k](https://www.uplift-modeling.com/en/v0.1.0/metrics.html#sklift.metrics.metrics.uplift_at_k).
211 |
212 | ### [sklift.viz](https://www.uplift-modeling.com/en/v0.1.0/api/viz.html)
213 |
214 | * 💥 Add [plot_treatment_balance_curve](https://www.uplift-modeling.com/en/v0.1.0/api/viz.html#sklift.viz.base.plot_treatment_balance_curve) by [@spiaz](https://github.com/spiaz).
215 | * 📝 fix typo in [plot_uplift_qini_curves](https://www.uplift-modeling.com/en/v0.1.0/api/viz.html#sklift.viz.base.plot_uplift_qini_curves) by [@spiaz](https://github.com/spiaz).
216 |
217 | ### Miscellaneous
218 |
219 | * ❗️ Remove sklift.preprocess submodule.
220 | * 💥 Add compatibility of tutorials with colab and add colab buttons by [@ElMaxuno](https://github.com/ElMaxuno).
221 | * 💥 Add Changelog.
222 | * 📝 Change the documentation structure. Add next pages: [Tutorials](https://www.uplift-modeling.com/en/v0.1.0/tutorials.html), [Release History](https://www.uplift-modeling.com/en/v0.1.0/changelog.html) and [Hall of fame](https://www.uplift-modeling.com/en/v0.1.0/hall_of_fame.html).
--------------------------------------------------------------------------------
/sklift/tests/test_metrics.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import numpy as np
4 |
5 | from sklearn.tree import DecisionTreeClassifier
6 | from ..models import SoloModel
7 |
8 | from sklearn.utils._testing import assert_array_almost_equal
9 |
10 | from ..metrics import make_uplift_scorer
11 | from ..metrics import uplift_curve, uplift_auc_score, perfect_uplift_curve
12 | from ..metrics import qini_curve, qini_auc_score, perfect_qini_curve
13 | from ..metrics import (uplift_at_k, response_rate_by_percentile,
14 | weighted_average_uplift, uplift_by_percentile, treatment_balance_curve, average_squared_deviation)
15 |
16 |
17 | def make_predictions(binary):
18 | X_train, y_train, treat_train = (np.array([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]]),
19 | np.array([0.0, 0.0, 1.0]), np.array([0.0, 1.0, 1.0]))
20 | X_val, y_val, treat_val = (np.array([[5.1, 3.4, 1.5, 0.2], [5.0, 3.5, 1.3, 0.3], [4.5, 2.3, 1.3, 0.3]]),
21 | np.array([0.0, 1.0, 0.0]), np.array([0.0, 1.0, 1.0]))
22 |
23 | if not binary:
24 | y_train, y_val = (np.array([2.0, 0.0, 1.0]), np.array([0.0, 1.0, 2.0]))
25 |
26 | model = DecisionTreeClassifier(random_state=0)
27 |
28 | s_model = SoloModel(model)
29 | s_model = s_model.fit(X_train, y_train, treat_train)
30 | uplift_preds = s_model.predict(X_val)
31 |
32 | return y_val, uplift_preds, treat_val
33 |
34 |
35 | @pytest.mark.parametrize(
36 | "binary, test_x_actual, test_y_actual",
37 | [
38 | (True, np.array([0, 3]), np.array([0, 1.5, ])),
39 | (False, np.array([0, 2, 3]), np.array([0.0, 3, 4.5]))
40 | ]
41 | )
42 | def test_uplift_curve(binary, test_x_actual, test_y_actual):
43 | y_true, uplift, treatment = make_predictions(binary)
44 |
45 | if binary == False:
46 | with pytest.raises(Exception):
47 | x_actual, y_actual = uplift_curve(y_true, uplift, treatment)
48 | else:
49 | x_actual, y_actual = uplift_curve(y_true, uplift, treatment)
50 |
51 | assert_array_almost_equal(x_actual, test_x_actual)
52 | assert_array_almost_equal(y_actual, test_y_actual)
53 | assert x_actual.shape == y_actual.shape
54 |
55 |
56 | def test_uplift_curve_hard():
57 | with pytest.raises(Exception):
58 | y_true, uplift, treatment = make_predictions(binary=True)
59 | y_true = np.zeros(y_true.shape)
60 |
61 | x_actual, y_actual = uplift_curve(y_true, uplift, treatment)
62 |
63 | assert_array_almost_equal(x_actual, np.array([0, 3]))
64 | assert_array_almost_equal(y_actual, np.array([0.0, 0.0]))
65 |
66 | y_true = np.ones(y_true.shape)
67 |
68 | x_actual, y_actual = uplift_curve(y_true, uplift, treatment)
69 |
70 | assert_array_almost_equal(x_actual, np.array([0, 3]))
71 | assert_array_almost_equal(y_actual, np.array([0.0, 0.0]))
72 |
73 |
74 | @pytest.mark.parametrize(
75 | "binary, test_x_actual, test_y_actual",
76 | [
77 | (True, np.array([0, 1, 2, 3]), np.array([0., 1., 2., 1.5])),
78 | (False, np.array([0, 1, 2, 3]), np.array([0., 1., 2., 4.5]))
79 | ]
80 | )
81 | def test_perfect_uplift_curve(binary, test_x_actual, test_y_actual):
82 | y_true, uplift, treatment = make_predictions(binary)
83 | if binary == False:
84 | with pytest.raises(Exception):
85 | x_actual, y_actual = perfect_uplift_curve(y_true, treatment)
86 | else:
87 | x_actual, y_actual = perfect_uplift_curve(y_true, treatment)
88 | assert_array_almost_equal(x_actual, test_x_actual)
89 | assert_array_almost_equal(y_actual, test_y_actual)
90 | assert x_actual.shape == y_actual.shape
91 |
92 |
93 | def test_perfect_uplift_curve_hard():
94 | with pytest.raises(Exception):
95 | y_true, uplift, treatment = make_predictions(binary=True)
96 | y_true = np.zeros(y_true.shape)
97 |
98 | x_actual, y_actual = perfect_uplift_curve(y_true, treatment)
99 |
100 | assert_array_almost_equal(x_actual, np.array([0, 1, 3]))
101 | assert_array_almost_equal(y_actual, np.array([0.0, 0.0, 0.0]))
102 |
103 | y_true = np.ones(y_true.shape)
104 |
105 | x_actual, y_actual = perfect_uplift_curve(y_true, treatment)
106 |
107 | assert_array_almost_equal(x_actual, np.array([0, 2, 3]))
108 | assert_array_almost_equal(y_actual, np.array([0.0, 2.0, 0.0]))
109 |
110 |
111 | def test_uplift_auc_score():
112 | y_true = [0, 1]
113 | uplift = [0.1, 0.3]
114 | treatment = [1, 0]
115 | assert_array_almost_equal(uplift_auc_score(y_true, uplift, treatment), 0.)
116 |
117 | y_true = [1, 0]
118 | uplift = [0.1, 0.3]
119 | treatment = [0, 1]
120 | assert_array_almost_equal(uplift_auc_score(y_true, uplift, treatment), 1.)
121 |
122 | with pytest.raises(Exception):
123 | y_true = [1, 1]
124 | uplift = [0.1, 0.3]
125 | treatment = [0, 1]
126 | assert_array_almost_equal(uplift_auc_score(y_true, uplift, treatment), 1.)
127 |
128 | y_true = [1, 1]
129 | uplift = [0.1, 0.3]
130 | treatment = [1, 0]
131 | assert_array_almost_equal(uplift_auc_score(y_true, uplift, treatment), -1.)
132 |
133 | y_true = [0, 1, 2]
134 | uplift = [0.1, 0.3, 0.9]
135 | treatment = [0, 1, 0]
136 | assert_array_almost_equal(uplift_auc_score(y_true, uplift, treatment), -1.333333)
137 |
138 | y_true = [0, 1, 2]
139 | uplift = [0.1, 0.3, 0.9]
140 | treatment = [1, 0, 1]
141 | assert_array_almost_equal(uplift_auc_score(y_true, uplift, treatment), 1.333333)
142 |
143 |
144 | @pytest.mark.parametrize(
145 | "binary, test_x_actual, test_y_actual",
146 | [
147 | (True, np.array([0, 3]), np.array([0, 1., ])),
148 | (False, np.array([0, 2, 3]), np.array([0., 3, 3.]))
149 | ]
150 | )
151 | def test_qini_curve(binary, test_x_actual, test_y_actual):
152 | y_true, uplift, treatment = make_predictions(binary)
153 |
154 | if binary == False:
155 | with pytest.raises(Exception):
156 | x_actual, y_actual = qini_curve(y_true, uplift, treatment)
157 | else:
158 | x_actual, y_actual = qini_curve(y_true, uplift, treatment)
159 | assert_array_almost_equal(x_actual, test_x_actual)
160 | assert_array_almost_equal(y_actual, test_y_actual)
161 | assert x_actual.shape == y_actual.shape
162 |
163 |
164 | def test_qini_curve_hard():
165 | with pytest.raises(Exception):
166 | y_true, uplift, treatment = make_predictions(binary=True)
167 | y_true = np.zeros(y_true.shape)
168 |
169 | x_actual, y_actual = qini_curve(y_true, uplift, treatment)
170 |
171 | assert_array_almost_equal(x_actual, np.array([0, 3]))
172 | assert_array_almost_equal(y_actual, np.array([0.0, 0.0]))
173 |
174 | y_true = np.ones(y_true.shape)
175 |
176 | x_actual, y_actual = qini_curve(y_true, uplift, treatment)
177 |
178 | assert_array_almost_equal(x_actual, np.array([0, 3]))
179 | assert_array_almost_equal(y_actual, np.array([0.0, 0.0]))
180 |
181 |
182 | @pytest.mark.parametrize(
183 | "binary, negative_effect, test_x_actual, test_y_actual",
184 | [
185 | (True, True, np.array([0, 1, 3]), np.array([0., 1., 1.])),
186 | (True, False, np.array([0., 1., 3.]), np.array([0., 1., 1.])),
187 | ]
188 | )
189 | def test_perfect_qini_curve(binary, negative_effect, test_x_actual, test_y_actual):
190 | y_true, uplift, treatment = make_predictions(binary)
191 |
192 | x_actual, y_actual = perfect_qini_curve(y_true, treatment, negative_effect=negative_effect)
193 |
194 | assert_array_almost_equal(x_actual, test_x_actual)
195 | assert_array_almost_equal(y_actual, test_y_actual)
196 | assert x_actual.shape == y_actual.shape
197 |
198 |
199 | def test_perfect_qini_curve_hard():
200 | with pytest.raises(Exception):
201 | y_true, uplift, treatment = make_predictions(binary=True)
202 | y_true = np.zeros(y_true.shape)
203 |
204 | x_actual, y_actual = perfect_qini_curve(y_true, treatment, negative_effect=True)
205 |
206 | assert_array_almost_equal(x_actual, np.array([0, 3]))
207 | assert_array_almost_equal(y_actual, np.array([0.0, 0.0]))
208 |
209 | x_actual, y_actual = perfect_qini_curve(y_true, treatment, negative_effect=False)
210 |
211 | assert_array_almost_equal(x_actual, np.array([0., 0., 3.]))
212 | assert_array_almost_equal(y_actual, np.array([0.0, 0.0, 0.0]))
213 |
214 | y_true = np.ones(y_true.shape)
215 |
216 | x_actual, y_actual = perfect_qini_curve(y_true, treatment, negative_effect=True)
217 |
218 | assert_array_almost_equal(x_actual, np.array([0, 2, 3]))
219 | assert_array_almost_equal(y_actual, np.array([0.0, 2.0, 0.0]))
220 |
221 | x_actual, y_actual = perfect_qini_curve(y_true, treatment, negative_effect=False)
222 |
223 | assert_array_almost_equal(x_actual, np.array([0., 0., 3.]))
224 | assert_array_almost_equal(y_actual, np.array([0.0, 0.0, 0.0]))
225 |
226 | def test_perfect_qini_curve_error():
227 | y_true, uplift, treatment = make_predictions(binary=True)
228 | with pytest.raises(TypeError):
229 | perfect_qini_curve(y_true, treatment, negative_effect=5)
230 |
231 |
232 |
233 | def test_qini_auc_score():
234 | y_true = [0, 1]
235 | uplift = [0.1, 0.3]
236 | treatment = [1, 0]
237 | assert_array_almost_equal(qini_auc_score(y_true, uplift, treatment), 1.)
238 |
239 | y_true = [1, 0]
240 | uplift = [0.1, 0.3]
241 | treatment = [0, 1]
242 | assert_array_almost_equal(qini_auc_score(y_true, uplift, treatment), 1.)
243 |
244 | with pytest.raises(Exception):
245 | y_true = [1, 1]
246 | uplift = [0.1, 0.3]
247 | treatment = [0, 1]
248 | assert_array_almost_equal(qini_auc_score(y_true, uplift, treatment), 1.)
249 |
250 | y_true = [1, 1]
251 | uplift = [0.1, 0.3]
252 | treatment = [1, 0]
253 | assert_array_almost_equal(qini_auc_score(y_true, uplift, treatment), 0.)
254 |
255 | y_true = [0, 1, 2]
256 | uplift = [0.1, 0.3, 0.9]
257 | treatment = [0, 1, 0]
258 | assert_array_almost_equal(qini_auc_score(y_true, uplift, treatment), -0.5)
259 |
260 | y_true = [0, 1, 2]
261 | uplift = [0.1, 0.3, 0.9]
262 | treatment = [1, 0, 1]
263 | assert_array_almost_equal(qini_auc_score(y_true, uplift, treatment), 0.75)
264 |
265 | def test_qini_auc_score_error():
266 | y_true = [1, 0]
267 | uplift = [0.1, 0.3]
268 | treatment = [0, 1]
269 | with pytest.raises(TypeError):
270 | qini_auc_score(y_true, uplift, treatment, negative_effect=5)
271 |
272 |
273 | def test_uplift_at_k():
274 | y_true, uplift, treatment = make_predictions(binary=True)
275 |
276 | assert_array_almost_equal(uplift_at_k(y_true, uplift, treatment, strategy='by_group', k=1), np.array([0.]))
277 | #assert_array_almost_equal(uplift_at_k(y_true, uplift, treatment, strategy='overall', k=2), np.array([0.]))
278 |
279 | @pytest.mark.parametrize(
280 | "strategy, k",
281 | [
282 | ('new_strategy', 1),
283 | ('by_group', -0.5),
284 | ('by_group', '1'),
285 | ('by_group', 2)
286 | ]
287 | )
288 | def test_uplift_at_k_errors(strategy, k):
289 | y_true, uplift, treatment = make_predictions(binary=True)
290 | with pytest.raises(ValueError):
291 | uplift_at_k(y_true, uplift, treatment, strategy, k)
292 |
293 |
294 | @pytest.mark.parametrize(
295 | "strategy, group, response_rate",
296 | [
297 | ('overall', 'treatment', np.array([[0.5], [0.125], [2.]])),
298 | ('by_group', 'treatment', np.array([[0.5], [0.125], [2.]])),
299 | ('overall', 'control', np.array([[0.], [0.], [1.]])),
300 | ('by_group', 'control', np.array([[0.], [0.], [1.]]))
301 | ]
302 | )
303 | def test_response_rate_by_percentile(strategy, group, response_rate):
304 | y_true, uplift, treatment = make_predictions(binary=True)
305 |
306 | assert_array_almost_equal(response_rate_by_percentile(y_true, uplift, treatment, group, strategy, bins=1),
307 | response_rate)
308 |
309 | @pytest.mark.parametrize(
310 | "strategy, group, bins",
311 | [
312 | ('new_strategy', 'control', 1),
313 | ('by_group', 'ctrl', 1),
314 | ('by_group', 'control', 0.5),
315 | ('by_group', 'control', 9999)
316 | ]
317 | )
318 | def test_response_rate_by_percentile_errors(strategy, group, bins):
319 | y_true, uplift, treatment = make_predictions(binary=True)
320 | with pytest.raises(ValueError):
321 | response_rate_by_percentile(y_true, uplift, treatment, group=group, strategy=strategy, bins=bins)
322 |
323 | @pytest.mark.parametrize(
324 | "strategy, weighted_average",
325 | [
326 | ('overall', 0.5),
327 | ('by_group', 0.5)
328 | ]
329 | )
330 | def test_weighted_average_uplift(strategy, weighted_average):
331 | y_true, uplift, treatment = make_predictions(binary=True)
332 |
333 | assert_array_almost_equal(weighted_average_uplift(y_true, uplift, treatment, strategy, bins=1), weighted_average)
334 |
335 |
336 | @pytest.mark.parametrize(
337 | "strategy, bins",
338 | [
339 | ('new_strategy', 1),
340 | ('by_group', 0.5),
341 | ('by_group', 9999)
342 | ]
343 | )
344 | def test_weighted_average_uplift_errors(strategy, bins):
345 | y_true, uplift, treatment = make_predictions(binary=True)
346 | with pytest.raises(ValueError):
347 | weighted_average_uplift(y_true, uplift, treatment, strategy=strategy, bins=bins)
348 |
349 |
350 | @pytest.mark.parametrize(
351 | "strategy, bins, std, total, string_percentiles, data",
352 | [
353 | ('overall', 1, False, False, False, np.array([[2., 1., 0.5, 0., 0.5]])),
354 | ('overall', 1, True, True, True, np.array([[2., 1., 0.5, 0., 0.5, 0.353553, 0., 0.353553],
355 | [2., 1., 0.5, 0., 0.5, 0.353553, 0., 0.353553]])),
356 | ('by_group', 1, False, False, False, np.array([[2., 1., 0.5, 0., 0.5]])),
357 | ('by_group', 1, True, True, True, np.array([[2., 1., 0.5, 0., 0.5, 0.353553, 0., 0.353553],
358 | [2., 1., 0.5, 0., 0.5, 0.353553, 0., 0.353553]]))
359 | ]
360 | )
361 | def test_uplift_by_percentile(strategy, bins, std, total, string_percentiles, data):
362 | y_true, uplift, treatment = make_predictions(binary=True)
363 |
364 | assert_array_almost_equal(
365 | uplift_by_percentile(y_true, uplift, treatment, strategy, bins, std, total, string_percentiles), data)
366 |
367 | @pytest.mark.parametrize(
368 | "strategy, bins, std, total, string_percentiles",
369 | [
370 | ('new_strategy', 1, True, True, True),
371 | ('by_group', 0.5, True, True, True),
372 | ('by_group', 9999, True, True, True),
373 | ('by_group', 1, 2, True, True),
374 | ('by_group', 1, True, True, 2),
375 | ('by_group', 1, True, 2, True)
376 | ]
377 | )
378 | def test_uplift_by_percentile_errors(strategy, bins, std, total, string_percentiles):
379 | y_true, uplift, treatment = make_predictions(binary=True)
380 | with pytest.raises(ValueError):
381 | uplift_by_percentile(y_true, uplift, treatment, strategy, bins, std, total, string_percentiles)
382 |
383 |
384 | def test_treatment_balance_curve():
385 | y_true, uplift, treatment = make_predictions(binary=True)
386 |
387 | idx, balance = treatment_balance_curve(uplift, treatment, winsize=2)
388 | assert_array_almost_equal(idx, np.array([1., 100.]))
389 | assert_array_almost_equal(balance, np.array([1., 0.5]))
390 |
391 | @pytest.mark.parametrize(
392 | "strategy",
393 | [
394 | ('overall'),
395 | ('by_group')
396 | ]
397 | )
398 | def test_average_squared_deviation(strategy):
399 | y_true, uplift, treatment = make_predictions(binary=True)
400 | assert (average_squared_deviation(y_true, uplift, treatment, y_true, uplift, treatment, strategy, bins=1) == 0)
401 |
402 | @pytest.mark.parametrize(
403 | "strategy, bins",
404 | [
405 | ('new_strategy', 1),
406 | ('by_group', 0.5),
407 | ('by_group', 9999)
408 | ]
409 | )
410 | def test_average_squared_deviation_errors(strategy, bins):
411 | y_true, uplift, treatment = make_predictions(binary=True)
412 | with pytest.raises(ValueError):
413 | average_squared_deviation(y_true, uplift, treatment, y_true, uplift, treatment, strategy=strategy, bins=bins)
414 |
415 | def test_metric_name_error():
416 | with pytest.raises(ValueError):
417 | make_uplift_scorer('new_scorer', [0, 1])
418 |
419 | def test_make_scorer_error():
420 | with pytest.raises(TypeError):
421 | make_uplift_scorer('qini_auc_score', [])
422 |
423 |
424 |
425 |
426 |
427 |
428 |
429 |
430 |
--------------------------------------------------------------------------------