├── .gitignore
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.MD
├── docs
    ├── .readthedocs.yml
    ├── Makefile
    ├── README.MD
    ├── base_model.rst
    ├── conf.py
    ├── contribute.rst
    ├── datasets
    │   ├── download_criteo_uplift_prediction.rst
    │   ├── download_hillstrom_email_marketing.rst
    │   ├── download_lalonde_nsw.rst
    │   ├── index.rst
    │   ├── load_criteo_uplift_prediction.rst
    │   ├── load_hillstrom_email_marketing.rst
    │   ├── load_lalonde_nsw.rst
    │   └── make_linear_regression.rst
    ├── examples.rst
    ├── index.rst
    ├── installation.rst
    ├── make.bat
    ├── metrics
    │   ├── get_average_effect.rst
    │   └── index.rst
    ├── model_selection
    │   ├── index.rst
    │   ├── train_test_split.rst
    │   └── treatment_cross_val_score.rst
    ├── requirements.txt
    ├── transformation
    │   ├── index.rst
    │   ├── jaskowski.rst
    │   ├── kane.rst
    │   ├── lai.rst
    │   ├── pessimistic.rst
    │   ├── reflective.rst
    │   └── transformation_base_model.rst
    ├── utils
    │   ├── download_file.rst
    │   ├── index.rst
    │   └── retrieve_from_gz.rst
    └── variable_selection
    │   ├── cadit.rst
    │   ├── dummy.rst
    │   ├── econometric.rst
    │   ├── index.rst
    │   └── two_model.rst
├── examples
    └── README.MD
├── pyuplift
    ├── __init__.py
    ├── base.py
    ├── datasets
    │   ├── __init__.py
    │   ├── generators
    │   │   ├── __init__.py
    │   │   └── linear.py
    │   └── loaders
    │   │   ├── __init__.py
    │   │   ├── criteo_uplift_prediction.py
    │   │   ├── hillstrom_email_marketing.py
    │   │   └── lalonde_nsw.py
    ├── metrics
    │   ├── __init__.py
    │   └── average_effect.py
    ├── model_selection
    │   ├── __init__.py
    │   ├── model_validation
    │   │   ├── __init__.py
    │   │   └── treatment_cross_validation.py
    │   └── splitters
    │   │   ├── __init__.py
    │   │   └── train_test_split.py
    ├── transformation
    │   ├── __init__.py
    │   ├── base.py
    │   ├── jaskowski.py
    │   ├── kane.py
    │   ├── lai.py
    │   ├── pessimistic.py
    │   └── reflective.py
    ├── utils
    │   ├── __init__.py
    │   ├── downloader.py
    │   └── retriever.py
    └── variable_selection
    │   ├── __init__.py
    │   ├── cadit.py
    │   ├── dummy.py
    │   ├── econometric.py
    │   └── two_model.py
├── resources
    ├── logo.psd
    └── pyuplift-logo.png
├── setup.cfg
├── setup.py
├── tests
    ├── README.MD
    ├── __init__.py
    ├── datasets
    │   ├── __init__.py
    │   ├── generators
    │   │   ├── __init__.py
    │   │   └── test_linear.py
    │   └── loaders
    │   │   ├── __init__.py
    │   │   ├── test_criteo_uplift_prediction.py
    │   │   ├── test_hillstrom_email_marketing.py
    │   │   └── test_lalonde_nsw.py
    ├── metrics
    │   ├── __init__.py
    │   └── test_average_effect.py
    ├── model_selection
    │   ├── __init__.py
    │   ├── model_validation
    │   │   ├── __init__.py
    │   │   └── test_treatment_cross_validation.py
    │   └── splitters
    │   │   ├── __init__.py
    │   │   └── test_train_test_split.py
    ├── transformation
    │   ├── __init__.py
    │   ├── base.py
    │   ├── test_jaskowski.py
    │   ├── test_kane.py
    │   ├── test_lai.py
    │   ├── test_pessimistic.py
    │   └── test_reflective.py
    ├── utils
    │   ├── __init__.py
    │   ├── data
    │   │   └── test.test.gz
    │   ├── test_downloader.py
    │   └── test_retriever.py
    └── variable_selection
    │   ├── __init__.py
    │   ├── base.py
    │   ├── test_cadit.py
    │   ├── test_dummy.py
    │   ├── test_econometric.py
    │   └── test_two_model.py
└── tutorials
    ├── EDA Hillstrom Email Marketing.ipynb
    ├── EDA Lalonde NSW.ipynb
    ├── Getting started.ipynb
    └── README.MD


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | .idea/
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .coverage
 43 | .coverage.*
 44 | .cache
 45 | nosetests.xml
 46 | coverage.xml
 47 | *.cover
 48 | .hypothesis/
 49 | .pytest_cache/
 50 | 
 51 | # Translations
 52 | *.mo
 53 | *.pot
 54 | 
 55 | # Django stuff:
 56 | *.log
 57 | local_settings.py
 58 | db.sqlite3
 59 | 
 60 | # Flask stuff:
 61 | instance/
 62 | .webassets-cache
 63 | 
 64 | # Scrapy stuff:
 65 | .scrapy
 66 | 
 67 | # Sphinx documentation
 68 | docs/_build/
 69 | 
 70 | # PyBuilder
 71 | target/
 72 | 
 73 | # Jupyter Notebook
 74 | .ipynb_checkpoints
 75 | 
 76 | # pyenv
 77 | .python-version
 78 | 
 79 | # celery beat schedule file
 80 | celerybeat-schedule
 81 | 
 82 | # SageMath parsed files
 83 | *.sage.py
 84 | 
 85 | # Environments
 86 | .env
 87 | .venv
 88 | env/
 89 | venv/
 90 | ENV/
 91 | env.bak/
 92 | venv.bak/
 93 | 
 94 | # Spyder project settings
 95 | .spyderproject
 96 | .spyproject
 97 | 
 98 | # Rope project settings
 99 | .ropeproject
100 | 
101 | # mkdocs documentation
102 | /site
103 | 
104 | # mypy
105 | .mypy_cache/
106 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: xenial
 2 | language: python
 3 | python:
 4 |   - "3.5"
 5 |   - "3.6"
 6 |   - "3.7"
 7 | install:
 8 |   - python setup.py install
 9 | script:
10 |   - pytest
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Artem Kuchumov
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.MD
2 | 


--------------------------------------------------------------------------------
/README.MD:
--------------------------------------------------------------------------------
 1 | ![](https://github.com/duketemon/pyuplift/raw/master/resources/pyuplift-logo.png)
 2 | 
 3 | [![Documentation Status](https://readthedocs.org/projects/pyuplift/badge/?version=latest)](https://pyuplift.readthedocs.io/en/latest/?badge=latest)
 4 | [![Build Status](https://travis-ci.org/duketemon/pyuplift.svg?branch=master)](https://travis-ci.org/duketemon/pyuplift)
 5 | [![PyPI - Python Version](https://img.shields.io/badge/python-3.5%20%7C%203.6%20%7C%203.7-blue.svg)](https://github.com/duketemon/pyuplift)
 6 | [![GitHub](https://img.shields.io/github/license/duketemon/pyuplift.svg)](https://github.com/duketemon/pyuplift/blob/master/LICENSE)
 7 | 
 8 | [Documentation](https://pyuplift.readthedocs.io) •
 9 | [License](https://github.com/duketemon/pyuplift/blob/master/LICENSE) •
10 | [How to contribute](#how-to-contribute) •
11 | [Uplift datasets](#uplift-datasets) •
12 | [Inspiration](#inspiration)
13 | 
14 | ## Installation
15 | ### Install from PyPI
16 | ```bash
17 | pip install pyuplift
18 | ```
19 | ### Install from source code
20 | ```bash
21 | git clone https://github.com/duketemon/pyuplift.git
22 | cd pyuplift
23 | python setup.py install
24 | ```
25 | 
26 | ## How to contribute
27 | Contributions are always welcomed. There is a lot of ways how you can help to the project.
28 | * Contribute to the [tests](https://github.com/duketemon/pyuplift/tree/master/tests) to make it more reliable.
29 | * Contribute to the [documentation](https://github.com/duketemon/pyuplift/tree/master/docs) to make it clearer for everyone.
30 | * Contribute to the [tutorials](https://github.com/duketemon/pyuplift/tree/master/tutorials) to share your experience with other users.
31 | * Look for [issues with tag "help wanted"](https://github.com/duketemon/pyuplift/issues?q=is%3Aissue+is%3Aopen+label%3A"help+wanted") and submit pull requests to address them.
32 | * [Open an issue](https://github.com/duketemon/pyuplift/issues) to report problems or recommend new features.
33 | 
34 | ## Uplift datasets
35 | * [Criteo Uplift Prediction](http://ailab.criteo.com/criteo-uplift-prediction-dataset)
36 | * [Hillstrom Email Marketing](https://blog.minethatdata.com/2008/05/best-answer-e-mail-analytics-challenge.html)
37 | * [Lalonde NSW](https://users.nber.org/~rdehejia/nswdata.html)
38 | 
39 | ## Compatible with
40 | * [NumPy](https://github.com/numpy/numpy)
41 | * [Scikit-learn](https://github.com/scikit-learn/scikit-learn)
42 | 
43 | ## Inspiration
44 | * [Identifying Individuals Who Are Truly Impacted by Treatment](https://www.researchgate.net/profile/Victor_Lo3/publication/270217235_Identifying_Individuals_Who_Are_Truly_Impacted_by_Treatment_Introduction_to_Recent_Advances_in_Uplift_Modeling/links/54a2dbbf0cf257a63604da2a/Identifying-Individuals-Who-Are-Truly-Impacted-by-Treatment-Introduction-to-Recent-Advances-in-Uplift-Modeling.pdf)
45 | * [Pinpointing the Persuadables: Convincing the Right Voters to Support Barack Obama](https://www.predictiveanalyticsworld.com/patimes/video-dan-porter-clip/2957)
46 | * [Revenue Uplift Modeling](https://www.researchgate.net/publication/321729653_Revenue_Uplift_Modeling)
47 | 
48 | ## References
49 | * Devriendt F, Moldovan D, Verbeke W. A literature survey and experimental evaluation of the state-of-the-art in uplift modeling: A stepping stone toward the development of prescriptive analytics. Big data. 2018 Mar 1;6(1):13-41.
50 | * Weisberg HI, Pontes VP. Post hoc subgroups in clinical trials: Anathema or analytics?. Clinical trials. 2015 Aug;12(4):357-64.
51 | * Lo VS. The true lift model: a novel data mining approach to response modeling in database marketing. ACM SIGKDD Explorations Newsletter. 2002 Dec 1;4(2):78-86.
52 | * Guelman L, Guillén M, Pérez-Marín AM. A decision support framework to implement optimal personalized marketing interventions. Decision Support Systems. 2015 Apr 1;72:24-32.
53 | * Tian L, Alizadeh AA, Gentles AJ, Tibshirani R. A simple method for estimating interactions between a treatment and a large number of covariates. Journal of the American Statistical Association. 2014 Oct 2;109(508):1517-32.
54 | 
55 | ## Notes
56 | The library was prepared within the framework of the Academic Fund Program at the National Research University Higher School of Economics (HSE) in 2019-2019 (grant № 19-04-048) and by the Russian Academic Excellence Project "5-100"
57 | 


--------------------------------------------------------------------------------
/docs/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | build:
2 |   image: latest
3 | python:
4 |   version: 3.7
5 | requirements_file: docs/requirements.txt


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = source
 8 | BUILDDIR      = build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/README.MD:
--------------------------------------------------------------------------------
1 | ## Documentation
2 | This directory contains the full manual and web site as displayed at https://pyuplift.readthedocs.io. Documentation for pyuplift is generated using [Sphinx](http://www.sphinx-doc.org/en/master/).
3 | 


--------------------------------------------------------------------------------
/docs/base_model.rst:
--------------------------------------------------------------------------------
1 | ##########
2 | Base Model
3 | ##########
4 | 
5 | The base class for all uplift estimators.
6 | 
7 | .. note::
8 |    This class should not be used directly. Use derived classes instead.
9 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import re
 4 | 
 5 | from sphinx.locale import _
 6 | from sphinx_rtd_theme import __version__
 7 | 
 8 | project = u'pyuplift'
 9 | slug = re.sub(r'\W+', '-', project.lower())
10 | version = __version__
11 | release = __version__
12 | author = u'Artem Kuchumov & contributors'
13 | copyright = author
14 | language = 'en'
15 | 
16 | extensions = [
17 |     'sphinx.ext.intersphinx',
18 |     'sphinx.ext.autodoc',
19 |     'sphinx.ext.mathjax',
20 |     'sphinx.ext.viewcode',
21 |     'sphinxcontrib.httpdomain',
22 | ]
23 | 
24 | templates_path = ['_templates']
25 | source_suffix = '.rst'
26 | exclude_patterns = []
27 | 
28 | master_doc = 'index'
29 | suppress_warnings = ['image.nonlocal_uri']
30 | pygments_style = 'default'
31 | 
32 | intersphinx_mapping = {
33 |     'rtd': ('https://docs.readthedocs.io/en/latest/', None),
34 |     'sphinx': ('http://www.sphinx-doc.org/en/stable/', None),
35 | }
36 | 
37 | html_theme = 'sphinx_rtd_theme'
38 | html_theme_options = {
39 |     'logo_only': True
40 | }
41 | html_theme_path = ["../.."]
42 | html_show_sourcelink = True
43 | htmlhelp_basename = slug
44 | 
45 | latex_documents = [
46 |     ('index', '{0}.tex'.format(slug), project, author, 'manual'),
47 | ]
48 | 
49 | man_pages = [
50 |     ('index', slug, project, [author], 1)
51 | ]
52 | 
53 | texinfo_documents = [
54 |     ('index', slug, project, author, slug, project, 'Miscellaneous'),
55 | ]
56 | 
57 | 
58 | # Extensions to theme docs
59 | def setup(app):
60 |     from sphinx.domains.python import PyField
61 |     from sphinx.util.docfields import Field
62 | 
63 |     app.add_object_type(
64 |         'confval',
65 |         'confval',
66 |         objname='configuration value',
67 |         indextemplate='pair: %s; configuration value',
68 |         doc_field_types=[
69 |             PyField(
70 |                 'type',
71 |                 label=_('Type'),
72 |                 has_arg=False,
73 |                 names=('type',),
74 |                 bodyrolename='class'
75 |             ),
76 |             Field(
77 |                 'default',
78 |                 label=_('Default'),
79 |                 has_arg=False,
80 |                 names=('default',),
81 |             ),
82 |         ]
83 |     )
84 | 


--------------------------------------------------------------------------------
/docs/contribute.rst:
--------------------------------------------------------------------------------
  1 | ######################
  2 | Contribute to pyuplift
  3 | ######################
  4 | Everyone is more than welcome to contribute. It is a way to make the project better and more accessible to more users.
  5 | 
  6 | **Guidelines**
  7 | 
  8 | * `Submit Pull Request`_
  9 | * `Git Workflow Howtos`_
 10 | 
 11 |   - `How to resolve conflict with master`_
 12 |   - `How to combine multiple commits into one`_
 13 |   - `What is the consequence of force push`_
 14 | 
 15 | * `Documents`_
 16 | 
 17 | *******************
 18 | Submit Pull Request
 19 | *******************
 20 | 
 21 | * Before submit, please rebase your code on the most recent version of master, you can do it by
 22 | 
 23 |   .. code-block:: bash
 24 | 
 25 |     git remote add upstream https://github.com/duketemon/pyuplift
 26 |     git fetch upstream
 27 |     git rebase upstream/master
 28 | 
 29 | * If you have multiple small commits,
 30 |   it might be good to merge them together(use git rebase then squash) into more meaningful groups.
 31 | * Send the pull request!
 32 | 
 33 |   - Fix the problems reported by automatic checks
 34 |   - If you are contributing a new module, consider add a testcase
 35 | 
 36 | *******************
 37 | Git Workflow Howtos
 38 | *******************
 39 | 
 40 | How to resolve conflict with master
 41 | ===================================
 42 | - First rebase to most recent master
 43 | 
 44 |   .. code-block:: bash
 45 | 
 46 |     # The first two steps can be skipped after you do it once.
 47 |     git remote add upstream https://github.com/duketemon/pyuplift
 48 |     git fetch upstream
 49 |     git rebase upstream/master
 50 | 
 51 | - The git may show some conflicts it cannot merge, say ``conflicted.py``.
 52 | 
 53 |   - Manually modify the file to resolve the conflict.
 54 |   - After you resolved the conflict, mark it as resolved by
 55 | 
 56 |     .. code-block:: bash
 57 | 
 58 |       git add conflicted.py
 59 | 
 60 | - Then you can continue rebase by
 61 | 
 62 |   .. code-block:: bash
 63 | 
 64 |     git rebase --continue
 65 | 
 66 | - Finally push to your fork, you may need to force push here.
 67 | 
 68 |   .. code-block:: bash
 69 | 
 70 |     git push --force
 71 | 
 72 | How to combine multiple commits into one
 73 | ========================================
 74 | Sometimes we want to combine multiple commits, especially when later commits are only fixes to previous ones,
 75 | to create a PR with set of meaningful commits. You can do it by following steps.
 76 | 
 77 | - Before doing so, configure the default editor of git if you haven't done so before.
 78 | 
 79 |   .. code-block:: bash
 80 | 
 81 |     git config core.editor the-editor-you-like
 82 | 
 83 | - Assume we want to merge last 3 commits, type the following commands
 84 | 
 85 |   .. code-block:: bash
 86 | 
 87 |     git rebase -i HEAD~3
 88 | 
 89 | - It will pop up an text editor. Set the first commit as ``pick``, and change later ones to ``squash``.
 90 | - After you saved the file, it will pop up another text editor to ask you modify the combined commit message.
 91 | - Push the changes to your fork, you need to force push.
 92 | 
 93 |   .. code-block:: bash
 94 | 
 95 |     git push --force
 96 | 
 97 | What is the consequence of force push
 98 | =====================================
 99 | The previous two tips requires force push, this is because we altered the path of the commits.
100 | It is fine to force push to your own fork, as long as the commits changed are only yours.
101 | 
102 | *********
103 | Documents
104 | *********
105 | * Documentation is built using sphinx.
106 | * Each document is written in `reStructuredText <http://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html>`_.
107 | * You can build document locally to see the effect.
108 | 
109 | 


--------------------------------------------------------------------------------
/docs/datasets/download_criteo_uplift_prediction.rst:
--------------------------------------------------------------------------------
 1 | #################################
 2 | download_criteo_uplift_prediction
 3 | #################################
 4 | 
 5 | Downloading the Criteo Uplift Prediction dataset.
 6 | 
 7 | ****************
 8 | Data description
 9 | ****************
10 | This dataset is constructed by assembling data resulting from several incrementality tests, a particular randomized trial procedure where a random part of the population is prevented from being targeted by advertising.
11 | It consists of 25M rows, each one representing a user with 11 features, a treatment indicator and 2 labels (visits and conversions).
12 | 
13 | *******
14 | Privacy
15 | *******
16 | For privacy reasons the data has been sub-sampled non-uniformly so that the original incrementality level cannot be deduced from the dataset while preserving a realistic, challenging benchmark.
17 | Feature names have been anonymized and their values randomly projected so as to keep predictive power while making it practically impossible to recover the original features or user context.
18 | 
19 | +--------------------------+------------+
20 | | Features                 |         11 |
21 | +--------------------------+------------+
22 | | Treatment                |          2 |
23 | +--------------------------+------------+ 
24 | | Samples total            | 25,309,483 |
25 | +--------------------------+------------+ 
26 | | Average visit rate       |    0.04132 |
27 | +--------------------------+------------+ 
28 | | Average conversion rate  |    0.00229 |
29 | +--------------------------+------------+
30 | 
31 | More information about dataset you can find in
32 | the `official dataset description <http://ailab.criteo.com/criteo-uplift-prediction-dataset>`_.
33 | 
34 | +-----------------+---------------------------------------------------------------------------------------------------------------------+
35 | | **Parameters:** | | **data_home**: str, default=None                                                                                  |
36 | |                 | |   The URL to file with data.                                                                                      |
37 | |                 | | **url**: str, default=https://s3.us-east-2.amazonaws.com/criteo-uplift-dataset/criteo-uplift.csv.gz               |
38 | |                 | |   The URL to file with data.                                                                                      |
39 | +-----------------+---------------------------------------------------------------------------------------------------------------------+
40 | | **Returns:**    | | **dataset**: dict                                                                                                 |
41 | |                 | |   Dictionary object with the following attributes:                                                                |
42 | |                 | | **dataset.description** : str                                                                                     |
43 | |                 | |   Description of the Criteo Uplift Prediction dataset.                                                            |
44 | |                 | | **dataset.data**: numpy ndarray of shape (25309483, 11)                                                           |
45 | |                 | |   Each row corresponding to the 11 feature values in order.                                                       |
46 | |                 | | **dataset.feature_names**: list, size 11                                                                          |
47 | |                 | |   List of feature names.                                                                                          |
48 | |                 | | **dataset.treatment**: numpy ndarray, shape (25309483,)                                                           |
49 | |                 | |   Each value corresponds to the treatment.                                                                        |
50 | |                 | | **dataset.target**: numpy array of shape (25309483,)                                                              |
51 | |                 | |   Each value corresponds to one of the outcomes. By default, it's `visit` outcome (look at `target_visit` below). |
52 | |                 | | **dataset.target_visit**: numpy array of shape (25309483,)                                                        |
53 | |                 | |   Each value corresponds to whether a visit occurred for this user (binary, label).                               |
54 | |                 | | **dataset.target_exposure**: numpy array of shape (25309483,)                                                     |
55 | |                 | |   Each value corresponds to treatment effect, whether the user has been effectively exposed (binary).             |
56 | |                 | | **dataset.target_conversion**: numpy array of shape (25309483,)                                                   |
57 | |                 | |   Each value corresponds to whether a conversion occurred for this user (binary, label).                          |
58 | +-----------------+---------------------------------------------------------------------------------------------------------------------+
59 | 
60 | ********
61 | Examples
62 | ********
63 | 
64 | .. code-block:: python3
65 | 
66 |    from pyuplift.datasets import download_criteo_uplift_prediction
67 |    download_criteo_uplift_prediction()
68 | 


--------------------------------------------------------------------------------
/docs/datasets/download_hillstrom_email_marketing.rst:
--------------------------------------------------------------------------------
 1 | ##################################
 2 | download_hillstrom_email_marketing
 3 | ##################################
 4 | 
 5 | Downloading the Hillstrom Email Marketing dataset.
 6 | 
 7 | ****************
 8 | Data description
 9 | ****************
10 | This dataset contains 64,000 customers who last purchased within twelve months. The customers were involved in an e-mail test.
11 | 
12 |  * 1/3 were randomly chosen to receive an e-mail campaign featuring Mens merchandise.
13 |  * 1/3 were randomly chosen to receive an e-mail campaign featuring Womens merchandise.
14 |  * 1/3 were randomly chosen to not receive an e-mail campaign.
15 | 
16 | During a period of two weeks following the e-mail campaign, results were tracked. Your job is to tell the world if the Mens or Womens e-mail campaign was successful.
17 | 
18 | +--------------------------+------------+
19 | | Features                 |          8 |
20 | +--------------------------+------------+
21 | | Treatment                |          3 |
22 | +--------------------------+------------+
23 | | Samples total            |     64,000 |
24 | +--------------------------+------------+
25 | | Average spend rate       |    1.05091 |
26 | +--------------------------+------------+
27 | | Average visit rate       |    0.14678 |
28 | +--------------------------+------------+
29 | | Average conversion rate  |    0.00903 |
30 | +--------------------------+------------+
31 | 
32 | More information about dataset you can find in the `official paper <http://minethatdata.com/Stochastic_Solutions_E-Mail_Challenge_2008.04.30.pdf>`_.
33 | 
34 | +-----------------+----------------------------------------------------------------------------------+
35 | | **Parameters**  | | **data_home: str**                                                             |
36 | |                 | |   Specify another download and cache folder for the dataset.                   |
37 | |                 | |   By default the dataset will be stored in the data folder in the same folder. |
38 | |                 | | **url: str**                                                                   |
39 | |                 | |   The URL to file with data.                                                   |
40 | +-----------------+----------------------------------------------------------------------------------+
41 | | **Returns**     | **None**                                                                         |
42 | +-----------------+----------------------------------------------------------------------------------+
43 | 
44 | ********
45 | Examples
46 | ********
47 | 
48 | .. code-block:: python3
49 | 
50 |    from pyuplift.datasets import download_hillstrom_email_marketing
51 |    download_hillstrom_email_marketing()
52 | 


--------------------------------------------------------------------------------
/docs/datasets/download_lalonde_nsw.rst:
--------------------------------------------------------------------------------
 1 | ####################
 2 | download_lalonde_nsw
 3 | ####################
 4 | 
 5 | Downloading the Lalonde NSW dataset.
 6 | 
 7 | ****************
 8 | Data description
 9 | ****************
10 | The dataset contains the treated and control units from the male sub-sample from the National Supported Work Demonstration as used by Lalonde in his paper.
11 | 
12 | +--------------------------+------------+
13 | | Features                 |          7 |
14 | +--------------------------+------------+
15 | | Treatment                |          2 |
16 | +--------------------------+------------+
17 | | Samples total            |        722 |
18 | +--------------------------+------------+
19 | 
20 | ********************
21 | Features description
22 | ********************
23 | * **treat** - an indicator variable for treatment status.
24 | * **age** - age in years.
25 | * **educ** - years of schooling.
26 | * **black** - indicator variable for blacks.
27 | * **hisp** - indicator variable for Hispanics.
28 | * **married** - indicator variable for martial status.
29 | * **nodegr** - indicator variable for high school diploma.
30 | * **re75** - real earnings in 1975.
31 | * **re78** - real earnings in 1978.
32 | 
33 | More information about dataset you can find `here <https://users.nber.org/~rdehejia/nswdata.html>`_.
34 | 
35 | +-----------------+----------------------------------------------------------------------------------+
36 | | **Parameters**  | | **data_home: str**                                                             |
37 | |                 | |   Specify another download and cache folder for the dataset.                   |
38 | |                 | |   By default the dataset will be stored in the data folder in the same folder. |
39 | |                 | | **control_data_url: str**                                                      |
40 | |                 | |   The URL to file with data of the control group.                              |
41 | |                 | | **treated_data_url: str**                                                      |
42 | |                 | |   The URL to file with data of the treated group.                              |
43 | |                 | | **separator: str**                                                             |
44 | |                 | |   The separator which used in the data files.                                  |
45 | |                 | | **column_names: list**                                                         |
46 | |                 | |   List of column names of the dataset.                                         |
47 | |                 | | **column_types: dict**                                                         |
48 | |                 | |   List of types for columns of the dataset.                                    |
49 | |                 | | **random_state: int**                                                          |
50 | |                 | |   The random seed.                                                             |
51 | +-----------------+----------------------------------------------------------------------------------+
52 | | **Returns**     | **None**                                                                         |
53 | +-----------------+----------------------------------------------------------------------------------+
54 | 
55 | 
56 | ********
57 | Examples
58 | ********
59 | 
60 | .. code-block:: python3
61 | 
62 |    from pyuplift.datasets import download_lalonde_nsw
63 |    download_lalonde_nsw()
64 | 


--------------------------------------------------------------------------------
/docs/datasets/index.rst:
--------------------------------------------------------------------------------
 1 | ########
 2 | Datasets
 3 | ########
 4 | 
 5 | .. toctree::
 6 |   :hidden:
 7 |   
 8 |   load_criteo_uplift_prediction
 9 |   download_criteo_uplift_prediction
10 |   load_hillstrom_email_marketing
11 |   download_hillstrom_email_marketing
12 |   load_lalonde_nsw
13 |   download_lalonde_nsw
14 |   make_linear_regression
15 | 
16 | The pyuplift.datasets module includes utilities to load datasets, including methods to download and return popular datasets. It also features some artificial data generators.
17 | 
18 | *******
19 | Loaders
20 | *******
21 | +--------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------+
22 | | `datasets.download_criteo_uplift_prediction([data_home, url]) <download_criteo_uplift_prediction.html>`_                                                           | Downloading the Criteo Uplift Prediction dataset.                  |
23 | +--------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------+
24 | | `datasets.load_criteo_uplift_prediction([data_home, download_if_missing]) <load_criteo_uplift_prediction.html>`_                                                   | Loading the Criteo Uplift Prediction dataset from the local file.  |
25 | +--------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------+
26 | | `datasets.download_hillstrom_email_marketing([data_home, url]) <download_hillstrom_email_marketing.html>`_                                                         | Downloading the Hillstrom Email Marketing dataset.                 |
27 | +--------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------+
28 | | `datasets.load_hillstrom_email_marketing([data_home, load_raw_data, download_if_missing]) <load_hillstrom_email_marketing.html>`_                                  | Loading the Hillstrom Email Marketing dataset from the local file. |
29 | +--------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------+
30 | | `datasets.download_lalonde_nsw([data_home, control_data_url, treated_data_url, separator, column_names, column_types, random_state]) <download_lalonde_nsw.html>`_ | Downloading the Lalonde NSW dataset.                               |
31 | +--------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------+
32 | | `datasets.load_lalonde_nsw([data_home, load_raw_data, download_if_missing]) <load_lalonde_nsw.html>`_                                                              | Loading the Lalonde NSW dataset from the local file.               |
33 | +--------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------+
34 | 
35 | **********
36 | Generators
37 | **********
38 | +----------------------------------------------------------------------------------------------+--------------------------------------------+
39 | | `datasets.make_linear_regression(size, [x1_params, x2_params, x3_params, t_params, e_params, | | Generate data by formula: Y' = X1+X2*T+E |
40 | | eps, seed]) <make_linear_regression.html>`_                                                  | | Y = Y', if Y' - int(Y') > eps,           |
41 | |                                                                                              | | Y = 0,  otherwise.                       |
42 | +----------------------------------------------------------------------------------------------+--------------------------------------------+
43 | 


--------------------------------------------------------------------------------
/docs/datasets/load_criteo_uplift_prediction.rst:
--------------------------------------------------------------------------------
 1 | #############################
 2 | load_criteo_uplift_prediction
 3 | #############################
 4 | 
 5 | Loading the Criteo Uplift Prediction dataset from the local file.
 6 | 
 7 | ****************
 8 | Data description
 9 | ****************
10 | This dataset is constructed by assembling data resulting from several incrementality tests, a particular randomized trial procedure where a random part of the population is prevented from being targeted by advertising.
11 | It consists of 25M rows, each one representing a user with 11 features, a treatment indicator and 2 labels (visits and conversions).
12 | 
13 | *******
14 | Privacy
15 | *******
16 | For privacy reasons the data has been sub-sampled non-uniformly so that the original incrementality level cannot be deduced from the dataset while preserving a realistic, challenging benchmark.
17 | Feature names have been anonymized and their values randomly projected so as to keep predictive power while making it practically impossible to recover the original features or user context.
18 | 
19 | +--------------------------+------------+
20 | | Features                 |         11 |
21 | +--------------------------+------------+
22 | | Treatment                |          2 |
23 | +--------------------------+------------+ 
24 | | Samples total            | 25,309,483 |
25 | +--------------------------+------------+ 
26 | | Average visit rate       |    0.04132 |
27 | +--------------------------+------------+ 
28 | | Average conversion rate  |    0.00229 |
29 | +--------------------------+------------+
30 | 
31 | More information about dataset you can find in
32 | the `official dataset description <http://ailab.criteo.com/criteo-uplift-prediction-dataset>`_.
33 | 
34 | +-----------------+---------------------------------------------------------------------------------------------------------------------+
35 | | **Parameters**  | | **data_home: str**                                                                                                |
36 | |                 | |   Specify another download and cache folder for the dataset.                                                      |
37 | |                 | |   By default the dataset will be stored in the data folder in the same folder.                                    |
38 | |                 | | **download_if_missing: bool, default=True**                                                                       |
39 | |                 | |   Download the dataset if it is not downloaded.                                                                   |
40 | +-----------------+---------------------------------------------------------------------------------------------------------------------+
41 | | **Returns:**    | | **dataset**: dict                                                                                                 |
42 | |                 | |   Dictionary object with the following attributes:                                                                |
43 | |                 | | **dataset.description** : str                                                                                     |
44 | |                 | |   Description of the Criteo Uplift Prediction dataset.                                                            |
45 | |                 | | **dataset.data**: numpy ndarray of shape (25309483, 11)                                                           |
46 | |                 | |   Each row corresponding to the 11 feature values in order.                                                       |
47 | |                 | | **dataset.feature_names**: list, size 11                                                                          |
48 | |                 | |   List of feature names.                                                                                          |
49 | |                 | | **dataset.treatment**: numpy ndarray, shape (25309483,)                                                           |
50 | |                 | |   Each value corresponds to the treatment.                                                                        |
51 | |                 | | **dataset.target**: numpy array of shape (25309483,)                                                              |
52 | |                 | |   Each value corresponds to one of the outcomes. By default, it's `visit` outcome (look at `target_visit` below). |
53 | |                 | | **dataset.target_visit**: numpy array of shape (25309483,)                                                        |
54 | |                 | |   Each value corresponds to whether a visit occurred for this user (binary, label).                               |
55 | |                 | | **dataset.target_exposure**: numpy array of shape (25309483,)                                                     |
56 | |                 | |   Each value corresponds to treatment effect, whether the user has been effectively exposed (binary).             |
57 | |                 | | **dataset.target_conversion**: numpy array of shape (25309483,)                                                   |
58 | |                 | |   Each value corresponds to whether a conversion occurred for this user (binary, label).                          |
59 | +-----------------+---------------------------------------------------------------------------------------------------------------------+
60 | 
61 | ********
62 | Examples
63 | ********
64 | 
65 | .. code-block:: python3
66 | 
67 |    from pyuplift.datasets import load_criteo_uplift_prediction
68 |    df = load_criteo_uplift_prediction()
69 |    print(df)
70 | 


--------------------------------------------------------------------------------
/docs/datasets/load_hillstrom_email_marketing.rst:
--------------------------------------------------------------------------------
 1 | ##############################
 2 | load_hillstrom_email_marketing
 3 | ##############################
 4 | 
 5 | Loading the Hillstrom Email Marketing dataset from the local file.
 6 | 
 7 | ****************
 8 | Data description
 9 | ****************
10 | This dataset contains 64,000 customers who last purchased within twelve months. The customers were involved in an e-mail test.
11 | 
12 |  * 1/3 were randomly chosen to receive an e-mail campaign featuring Mens merchandise.
13 |  * 1/3 were randomly chosen to receive an e-mail campaign featuring Womens merchandise.
14 |  * 1/3 were randomly chosen to not receive an e-mail campaign.
15 | 
16 | During a period of two weeks following the e-mail campaign, results were tracked. Your job is to tell the world if the Mens or Womens e-mail campaign was successful.
17 | 
18 | +--------------------------+------------+
19 | | Features                 |          8 |
20 | +--------------------------+------------+
21 | | Treatment                |          3 |
22 | +--------------------------+------------+
23 | | Samples total            |     64,000 |
24 | +--------------------------+------------+
25 | | Average spend rate       |    1.05091 |
26 | +--------------------------+------------+
27 | | Average visit rate       |    0.14678 |
28 | +--------------------------+------------+
29 | | Average conversion rate  |    0.00903 |
30 | +--------------------------+------------+
31 | 
32 | More information about dataset you can find in the `official paper <http://minethatdata.com/Stochastic_Solutions_E-Mail_Challenge_2008.04.30.pdf>`_.
33 | 
34 | +-----------------+----------------------------------------------------------------------------------------------------------------------------------------+
35 | | **Parameters:** | | **data_home**: str, default=None                                                                                                     |
36 | |                 | |   Specify another download and cache folder for the dataset.                                                                         |
37 | |                 | |   By default the dataset will be stored in the data folder in the same folder.                                                       |
38 | |                 | | **load_raw_data**: bool, default=False                                                                                               |
39 | |                 | |   The loading of raw or preprocessed data?                                                                                           |
40 | |                 | | **download_if_missing**: bool, default=True                                                                                          |
41 | |                 | |   Download the dataset if it is not downloaded.                                                                                      |
42 | +-----------------+----------------------------------------------------------------------------------------------------------------------------------------+
43 | | **Returns:**    | | **dataset**: dict                                                                                                                    |
44 | |                 | |   Dictionary object with the following attributes:                                                                                   |
45 | |                 | | **dataset.description** : str                                                                                                        |
46 | |                 | |   Description of the Hillstrom email marketing dataset.                                                                              |
47 | |                 | | **dataset.data**: numpy ndarray of shape (64000, 8)                                                                                  |
48 | |                 | |   Each row corresponding to the 8 feature values in order.                                                                           |
49 | |                 | | **dataset.feature_names**: list, size 8                                                                                              |
50 | |                 | |   List of feature names.                                                                                                             |
51 | |                 | | **dataset.treatment**: numpy ndarray, shape (64000,)                                                                                 |
52 | |                 | |   Each value corresponds to the treatment.                                                                                           |
53 | |                 | | **dataset.target**: numpy array of shape (64000,)                                                                                    |
54 | |                 | |   Each value corresponds to one of the outcomes. By default, it's `spend` outcome (look at `target_spend` below).                    |
55 | |                 | | **dataset.target_spend**: numpy array of shape (64000,)                                                                              |
56 | |                 | |   Each value corresponds to how much customers spent during a two-week outcome period.                                               |
57 | |                 | | **dataset.target_visit**: numpy array of shape (64000,)                                                                              |
58 | |                 | |   Each value corresponds to whether people visited the site during a two-week outcome period.                                        |
59 | |                 | | **dataset.target_conversion**: numpy array of shape (64000,)                                                                         |
60 | |                 | |   Each value corresponds to whether they purchased at the site (“conversion”) during a two-week outcome period.                      |
61 | +-----------------+----------------------------------------------------------------------------------------------------------------------------------------+
62 | 
63 | ********
64 | Examples
65 | ********
66 | 
67 | .. code-block:: python3
68 | 
69 |    from pyuplift.datasets import load_hillstrom_email_marketing
70 |    df = load_hillstrom_email_marketing()
71 |    print(df)
72 | 


--------------------------------------------------------------------------------
/docs/datasets/load_lalonde_nsw.rst:
--------------------------------------------------------------------------------
 1 | ################
 2 | load_lalonde_nsw
 3 | ################
 4 | 
 5 | Loading the Lalonde NSW dataset from the local file.
 6 | 
 7 | ****************
 8 | Data description
 9 | ****************
10 | The dataset contains the treated and control units from the male sub-sample from the National Supported Work Demonstration as used by Lalonde in his paper.
11 | 
12 | +--------------------------+------------+
13 | | Features                 |          7 |
14 | +--------------------------+------------+
15 | | Treatment                |          2 |
16 | +--------------------------+------------+
17 | | Samples total            |        722 |
18 | +--------------------------+------------+
19 | 
20 | ********************
21 | Features description
22 | ********************
23 | * **treat** - an indicator variable for treatment status.
24 | * **age** - age in years.
25 | * **educ** - years of schooling.
26 | * **black** - indicator variable for blacks.
27 | * **hisp** - indicator variable for Hispanics.
28 | * **married** - indicator variable for martial status.
29 | * **nodegr** - indicator variable for high school diploma.
30 | * **re75** - real earnings in 1975.
31 | * **re78** - real earnings in 1978.
32 | 
33 | More information about dataset you can find `here <https://users.nber.org/~rdehejia/nswdata.html>`_.
34 | 
35 | +-----------------+----------------------------------------------------------------------------------------------------------------------------------------+
36 | | **Parameters:** | | **data_home**: str, default=None                                                                                                     |
37 | |                 | |   Specify another download and cache folder for the dataset.                                                                         |
38 | |                 | |   By default the dataset will be stored in the data folder in the same folder.                                                       |
39 | |                 | | **download_if_missing**: bool, default=True                                                                                          |
40 | |                 | |   Download the dataset if it is not downloaded.                                                                                      |
41 | +-----------------+----------------------------------------------------------------------------------------------------------------------------------------+
42 | | **Returns:**    | | **dataset**: dict                                                                                                                    |
43 | |                 | |   Dictionary object with the following attributes:                                                                                   |
44 | |                 | | **dataset.description** : str                                                                                                        |
45 | |                 | |   Description of the Hillstrom email marketing dataset.                                                                              |
46 | |                 | | **dataset.data**: numpy ndarray of shape (722, 7)                                                                                    |
47 | |                 | |   Each row corresponding to the 7 feature values in order.                                                                           |
48 | |                 | | **dataset.feature_names**: list, size 7                                                                                              |
49 | |                 | |   List of feature names.                                                                                                             |
50 | |                 | | **dataset.treatment**: numpy ndarray, shape (722,)                                                                                   |
51 | |                 | |   Each value corresponds to the treatment.                                                                                           |
52 | |                 | | **dataset.target**: numpy array of shape (722,)                                                                                      |
53 | |                 | |   Each value corresponds to one of the outcomes. By default, it's `re78` outcome.                                                    |
54 | +-----------------+----------------------------------------------------------------------------------------------------------------------------------------+
55 | 
56 | ********
57 | Examples
58 | ********
59 | 
60 | .. code-block:: python3
61 | 
62 |    from pyuplift.datasets import load_lalonde_nsw
63 |    df = load_lalonde_nsw()
64 |    print(df)
65 | 


--------------------------------------------------------------------------------
/docs/datasets/make_linear_regression.rst:
--------------------------------------------------------------------------------
 1 | ######################
 2 | make_linear_regression
 3 | ######################
 4 | 
 5 | Generate data by formula.
 6 | 
 7 | ****************
 8 | Data description
 9 | ****************
10 | Synthetic data generated by Generate data by formula:
11 | 
12 |  | ``Y' = X1 + X2 * T + E``
13 |  | ``Y = Y', if Y' - int(Y') > eps,``
14 |  | ``Y = 0,  otherwise.``
15 | 
16 | Statistics for default parameters and size equals 100,000:
17 | 
18 | +--------------------------+-------------+
19 | |Features                  |           3 |
20 | +--------------------------+-------------+
21 | |Treatment                 |           2 |
22 | +--------------------------+-------------+ 
23 | |Samples total             |      `size` |
24 | +--------------------------+-------------+ 
25 | |Y not equals 0            |     0.49438 |
26 | +--------------------------+-------------+ 
27 | |Y values                  | 0 to 555.93 |
28 | +--------------------------+-------------+
29 | 
30 | 
31 | +-----------------+-----------------------------------------------------------------------------+
32 | | **Parameters:** | | **size**: integer                                                         |
33 | |                 | |   The number of observations.                                             |
34 | |                 | | **x1_params** : tuple(mu, sigma), default: (0, 1)                         |
35 | |                 | |   The feature with gaussian distribution and mean=mu, sd=sigma.           |
36 | |                 | |   X1 ~ N(mu, sigma)                                                       |
37 | |                 | | **x2_params** : tuple(mu, sigma), default: (0, 0.1)                       |
38 | |                 | |   The feature with gaussian distribution and mean=mu, sd=sigma.           |
39 | |                 | |   X2 ~ N(mu, sigma)                                                       |
40 | |                 | | **x3_params** : tuple(mu, sigma), default: (0, 1)                         |
41 | |                 | |   The feature with gaussian distribution and mean=mu, sd=sigma.           |
42 | |                 | |   X3 ~ N(mu, sigma)                                                       |
43 | |                 | | **t_params** : tuple(mu, sigma), default: (0, 1)                          |
44 | |                 | |   The treatment with uniform distribution. Min value=min, Max value=max-1 |
45 | |                 | |   T ~ R(min, max)                                                         |
46 | |                 | | **e_params** : tuple(mu, sigma), default: (0, 1)                          |
47 | |                 | |   The error with gaussian distribution and mean=mu, sd=sigma.             |
48 | |                 | |   E ~ N(mu, sigma)                                                        |
49 | |                 | | **eps** : tuple(mu, sigma), default: (0, 1)                               |
50 | |                 | |   The border value.                                                       |
51 | |                 | | **random_state** : integer, default=777                                   |
52 | |                 | |   random_state is the seed used by the random number generator.           |
53 | +-----------------+-----------------------------------------------------------------------------+
54 | | **Returns:**    | | **dataset**: pandas DataFrame                                             |
55 | |                 | |   Generated data.                                                         |
56 | +-----------------+-----------------------------------------------------------------------------+
57 | 
58 | ********
59 | Examples
60 | ********
61 | 
62 | .. code-block:: python3
63 | 
64 |    from pyuplift.datasets import make_linear_regression
65 |    df = make_linear_regression(10000)
66 |    print(df)
67 | 


--------------------------------------------------------------------------------
/docs/examples.rst:
--------------------------------------------------------------------------------
 1 | #################
 2 | Examples of Usage
 3 | #################
 4 | 
 5 | This section contains official examples of usage pyuplift package.
 6 | 
 7 | ********
 8 | Contents
 9 | ********
10 | - `Hillstrom Email Marketing dataset <https://github.com/duketemon/pyuplift/blob/master/examples/Hillstrom_Email_Marketing-usage.ipynb>`_
11 | - `Synthetic dataset <https://github.com/duketemon/pyuplift/blob/master/examples/Synthetic_data-usage.ipynb>`_
12 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | ######################
 2 | pyuplift documentation
 3 | ######################
 4 | 
 5 | **pyuplift** is a scientific uplift modeling library. It implements variable selection and transformation approaches. pyuplift provides API for work with such an uplift datasets as `Hillstrom Email Marketing <https://blog.minethatdata.com/2008/05/best-answer-e-mail-analytics-challenge.html>`_ and `Criteo Uplift Prediction <http://ailab.criteo.com/criteo-uplift-prediction-dataset>`_.
 6 | 
 7 | ********
 8 | Contents
 9 | ********
10 | 
11 | .. toctree::
12 |   :maxdepth: 2
13 |   :titlesonly:
14 | 
15 |   installation
16 |   examples
17 |   contribute
18 |   
19 | .. toctree::
20 |   :maxdepth: 2
21 |   :titlesonly:
22 |   :caption: API
23 | 
24 |   base_model
25 |   variable_selection/index
26 |   transformation/index
27 |   datasets/index
28 |   model_selection/index
29 |   metrics/index
30 |   utils/index
31 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
 1 | ##################
 2 | Installation Guide
 3 | ##################
 4 | 
 5 | *****************
 6 | Install from PyPI
 7 | *****************
 8 | 
 9 |   .. code-block:: bash
10 | 
11 |     pip install pyuplift
12 | 	
13 | 
14 | ************************
15 | Install from source code
16 | ************************
17 | 
18 |   .. code-block:: bash
19 | 
20 |     python setup.py install
21 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/metrics/get_average_effect.rst:
--------------------------------------------------------------------------------
 1 | ##################
 2 | get_average_effect
 3 | ##################
 4 | 
 5 | Estimating an average effect of the test set.
 6 | 
 7 | +-----------------+----------------------------------------------------------------------------------+
 8 | | **Parameters:** | | **y_test**: numpy array                                                        |
 9 | |                 | |   Actual y values.                                                             |
10 | |                 | | **t_test**: numpy array                                                        |
11 | |                 | |   Actual treatment values.                                                     |
12 | |                 | | **y_pred**: numpy array                                                        |
13 | |                 | |   Predicted y values by uplift model.                                          |
14 | |                 | | **test_share**: float                                                          |
15 | |                 | |   Share of the test data which will be taken for estimating an average effect. |
16 | +-----------------+----------------------------------------------------------------------------------+
17 | | **Returns:**    | | **average effect**: float                                                      |
18 | |                 | |   Average effect on the test set.                                              |
19 | +-----------------+----------------------------------------------------------------------------------+
20 | 
21 | ********
22 | Examples
23 | ********
24 | 
25 | .. code-block:: python3
26 | 
27 |    from pyuplift.metrics import get_average_effect
28 |    ...
29 |    model.fit(X_train, y_train, t_train)
30 |    y_pred = model.predict(X_test)
31 |    effect = get_average_effect(y_test, t_test, y_pred, test_share)
32 |    print(effect)
33 | 


--------------------------------------------------------------------------------
/docs/metrics/index.rst:
--------------------------------------------------------------------------------
 1 | #######
 2 | Metrics
 3 | #######
 4 | 
 5 | .. toctree::
 6 |   :hidden:
 7 |   
 8 |   get_average_effect
 9 | 
10 | The pyuplift.metrics module includes score functions, performance metrics and pairwise metrics and distance computations.
11 | 
12 | +-----------------------------------------------------------------------------------------------+-----------------------------------------------+
13 | | `metrics.get_average_effect(y_test, t_test, y_pred, [test_share]) <get_average_effect.html>`_ | Estimating an average effect of the test set. |
14 | +-----------------------------------------------------------------------------------------------+-----------------------------------------------+
15 | 


--------------------------------------------------------------------------------
/docs/model_selection/index.rst:
--------------------------------------------------------------------------------
 1 | ###############
 2 | Model Selection
 3 | ###############
 4 | 
 5 | .. toctree::
 6 |   :hidden:
 7 |   
 8 |   train_test_split
 9 |   treatment_cross_val_score
10 | 
11 | The pyuplift.model_selection module includes model validation and splitter functions.
12 | 
13 | ******************
14 | Splitter Functions
15 | ******************
16 | 
17 | +--------------------------------------------------------------------------------------------------------+---------------------------------------------------+
18 | | `model_selection.train_test_split(X, y, t, [train_share, random_state]) <train_test_split.html>`_      | Split X, y, t into random train and test subsets. |
19 | +--------------------------------------------------------------------------------------------------------+---------------------------------------------------+
20 | 
21 | 
22 | ****************
23 | Model validation
24 | ****************
25 | +-------------------------------------------------------------------------------------------------------------------------+----------------------------------------+
26 | | `model_selection.treatment_cross_val_score(X, y, t, model, [cv, train_share, seeds]) <treatment_cross_val_score.html>`_ | Evaluate a scores by cross-validation. |
27 | +-------------------------------------------------------------------------------------------------------------------------+----------------------------------------+
28 | 


--------------------------------------------------------------------------------
/docs/model_selection/train_test_split.rst:
--------------------------------------------------------------------------------
 1 | ################
 2 | train_test_split
 3 | ################
 4 | 
 5 | Split X, y, t into random train and test subsets.
 6 | 
 7 | +------------------+-----------------------------------------------------------------------------------------+
 8 | | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                             |
 9 | |                  | |   Matrix of features.                                                                 |
10 | |                  | | **y: numpy array with shape = [n_samples,]**                                          |
11 | |                  | |   Array of target of feature.                                                         |
12 | |                  | | **t: numpy array with shape = [n_samples,]**                                          |
13 | |                  | |   Array of treatments.                                                                |
14 | |                  | | **train_share: float, optional (default=0.7)**                                        |
15 | |                  | |   train_share represents the proportion of the dataset to include in the train split. |
16 | |                  | | **random_state: int, optional (default=None)**                                        |
17 | |                  | |   random_state is the seed used by the random number generator.                       |
18 | +------------------+-----------------------------------------------------------------------------------------+
19 | | **Return**       | | **X_train: numpy ndarray**                                                            |
20 | |                  | |   Train matrix of features.                                                           |
21 | |                  | | **X_test: numpy ndarray**                                                             |
22 | |                  | |   Test matrix of features.                                                            |
23 | |                  | | **y_train: numpy array**                                                              |
24 | |                  | |   Train array of target of feature.                                                   |
25 | |                  | | **y_test: numpy array**                                                               |
26 | |                  | |   Test array of target of feature.                                                    |
27 | |                  | | **t_train: numpy array**                                                              |
28 | |                  | |   Train array of treatments.                                                          |
29 | |                  | | **t_test: numpy array**                                                               |
30 | |                  | |   Test array of treatments.                                                           |
31 | +------------------+-----------------------------------------------------------------------------------------+
32 | 
33 | ********
34 | Examples
35 | ********
36 | 
37 | .. code-block:: python3
38 | 
39 |    from pyuplift.model_selection import train_test_split
40 |    ...
41 |    for seed in seeds:
42 |        X_train, X_test, y_train, y_test, t_train, t_test = train_test_split(X, y, t, train_share, seed)
43 |        model.fit(X_train, y_train, t_train)
44 |        score = get_average_effect(y_test, t_test, model.predict(X_test))
45 |        scores.append(score)
46 | 


--------------------------------------------------------------------------------
/docs/model_selection/treatment_cross_val_score.rst:
--------------------------------------------------------------------------------
 1 | #########################
 2 | treatment_cross_val_score
 3 | #########################
 4 | 
 5 | Evaluate a scores by cross-validation.
 6 | 
 7 | +------------------+-----------------------------------------------------------------------------------------+
 8 | | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                             |
 9 | |                  | |   Matrix of features.                                                                 |
10 | |                  | | **y: numpy array with shape = [n_samples,]**                                          |
11 | |                  | |   Array of target of feature.                                                         |
12 | |                  | | **t: numpy array with shape = [n_samples,]**                                          |
13 | |                  | |   Array of treatments.                                                                |
14 | |                  | | **train_share: float, optional (default=0.7)**                                        |
15 | |                  | |   train_share represents the proportion of the dataset to include in the train split. |
16 | |                  | | **random_state: int, optional (default=777)**                                         |
17 | |                  | |   random_state is the seed used by the random number generator.                       |
18 | +------------------+-----------------------------------------------------------------------------------------+
19 | | **Return**       | | **scores: numpy array of floats**                                                     |
20 | |                  | |   Array of scores of the estimator for each run of the cross validation.              |
21 | +------------------+-----------------------------------------------------------------------------------------+
22 | 
23 | ********
24 | Examples
25 | ********
26 | 
27 | .. code-block:: python3
28 | 
29 |    from pyuplift.model_selection import treatment_cross_val_score
30 |    ...
31 |    for model_name in models:
32 |        scores = treatment_cross_val_score(X, y, t, models[model_name], cv, seeds=seeds)
33 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinxcontrib-httpdomain
2 | sphinx
3 | 


--------------------------------------------------------------------------------
/docs/transformation/index.rst:
--------------------------------------------------------------------------------
 1 | ##############
 2 | Transformation
 3 | ##############
 4 | 
 5 | The pyuplift.transformation module includes classes which belongs to a transformation group of approaches.
 6 | 
 7 | .. toctree::
 8 |   :hidden:
 9 |   
10 |   transformation_base_model
11 |   lai
12 |   kane
13 |   jaskowski
14 |   pessimistic
15 |   reflective
16 | 
17 | +------------------------------------------------------------------------------+---------------------------------------------------------------------------+
18 | | `transformation.TransformationBaseModel() <transformation_base_model.html>`_ | A base model of all classes which implements a transformation approaches. |
19 | +------------------------------------------------------------------------------+---------------------------------------------------------------------------+
20 | | `transformation.Lai([model, use_weights]) <lai.html>`_                       | A Lai's approach.                                                         |
21 | +------------------------------------------------------------------------------+---------------------------------------------------------------------------+
22 | | `transformation.Kane([model, use_weights]) <kane.html>`_                     | A Kane's approach.                                                        |
23 | +------------------------------------------------------------------------------+---------------------------------------------------------------------------+
24 | | `transformation.Jaskowski([model]) <jaskowski.html>`_                        | A Jaskowski's approach.                                                   |
25 | +------------------------------------------------------------------------------+---------------------------------------------------------------------------+
26 | | `transformation.Pessimistic([model]) <pessimistic.html>`_                    | A pessimistic approach.                                                   |
27 | +------------------------------------------------------------------------------+---------------------------------------------------------------------------+
28 | | `transformation.Reflective([model]) <reflective.html>`_                      | A reflective approach.                                                    |
29 | +------------------------------------------------------------------------------+---------------------------------------------------------------------------+
30 | 


--------------------------------------------------------------------------------
/docs/transformation/jaskowski.rst:
--------------------------------------------------------------------------------
 1 | #########
 2 | Jaskowski
 3 | #########
 4 | 
 5 | The class which implements the Jaskowski's approach [1].
 6 | 
 7 | +----------------+-----------------------------------------------------------------------------------+
 8 | | **Parameters** | | **model : object, optional (default=sklearn.linear_model.LogisticRegression)**  |
 9 | |                | |   The classification model which will be used for predict uplift.               |
10 | +----------------+-----------------------------------------------------------------------------------+
11 | 
12 | 
13 | *******
14 | Methods
15 | *******
16 | +-----------------------------------------------+----------------------------------------------------+
17 | | :ref:`fit(self, X, y, t) <jask_fit>`          | Build the model from the training set (X, y, t).   |
18 | +-----------------------------------------------+----------------------------------------------------+
19 | | :ref:`predict(self, X, t=None) <jask_predict>`| Predict an uplift for X.                           |
20 | +-----------------------------------------------+----------------------------------------------------+
21 | 
22 | .. _jask_fit:
23 | 
24 | fit(self, X, y, t)
25 | ------------------
26 | Build the model from the training set (X, y, t).
27 | 
28 | +------------------+---------------------------------------------------------------------------------+
29 | | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
30 | |                  | |   Matrix of features.                                                         |
31 | |                  | | **y: numpy array with shape = [n_samples,]**                                  |
32 | |                  | |   Array of target of feature.                                                 |
33 | |                  | | **t: numpy array with shape = [n_samples,]**                                  |
34 | |                  | |   Array of treatments.                                                        |
35 | +------------------+---------------------------------------------------------------------------------+
36 | | **Returns**      | **self : object**                                                               |
37 | +------------------+---------------------------------------------------------------------------------+
38 | 
39 | .. _jask_predict:
40 | 
41 | predict(self, X, t=None)
42 | ------------------------
43 | Predict an uplift for X. 
44 | 
45 | +------------------+---------------------------------------------------------------------------------+
46 | | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
47 | |                  | |   Matrix of features.                                                         |
48 | |                  | | **t: numpy array with shape = [n_samples,] or None**                          |
49 | |                  | |   Array of treatments.                                                        |
50 | +------------------+---------------------------------------------------------------------------------+
51 | | **Returns**      | | **self : object**                                                             |
52 | |                  | |   The predicted values.                                                       |
53 | +------------------+---------------------------------------------------------------------------------+
54 | 
55 | **********
56 | References
57 | **********
58 | 1. A Literature Survey and Experimental Evaluation of the State-of-the-Art in Uplift Modeling: A Stepping Stone Toward the Development of Prescriptive Analytics by Floris Devriendt, Darie Moldovan, and Wouter Verbeke
59 | 
60 | 
61 | .. code-block:: python3
62 | 
63 |    from pyuplift.transformation import Jaskowski
64 |    ...
65 |    model = Jaskowski()
66 |    model.fit(X[train_indexes, :], y[train_indexes], t[train_indexes])
67 |    uplift = model.predict(X[test_indexes, :])
68 |    print(uplift)
69 |  


--------------------------------------------------------------------------------
/docs/transformation/kane.rst:
--------------------------------------------------------------------------------
 1 | ####
 2 | Kane
 3 | ####
 4 | 
 5 | The class which implements the Kane's approach [1].
 6 | 
 7 | +----------------+-----------------------------------------------------------------------------------+
 8 | | **Parameters** | | **model : object, optional (default=sklearn.linear_model.LogisticRegression)**  |
 9 | |                | |   The classification model which will be used for predict uplift.               |
10 | |                | | **use_weights : boolean, optional (default=False)**                             |
11 | |                | |   Use or not weights?                                                           |
12 | +----------------+-----------------------------------------------------------------------------------+
13 | 
14 | 
15 | *******
16 | Methods
17 | *******
18 | +-----------------------------------------------+----------------------------------------------------+
19 | | :ref:`fit(self, X, y, t) <kane_fit>`          | Build the model from the training set (X, y, t).   |
20 | +-----------------------------------------------+----------------------------------------------------+
21 | | :ref:`predict(self, X, t=None) <kane_predict>`| Predict an uplift for X.                           |
22 | +-----------------------------------------------+----------------------------------------------------+
23 | 
24 | .. _kane_fit:
25 | 
26 | fit(self, X, y, t)
27 | ------------------
28 | Build the model from the training set (X, y, t).
29 | 
30 | +------------------+---------------------------------------------------------------------------------+
31 | | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
32 | |                  | |   Matrix of features.                                                         |
33 | |                  | | **y: numpy array with shape = [n_samples,]**                                  |
34 | |                  | |   Array of target of feature.                                                 |
35 | |                  | | **t: numpy array with shape = [n_samples,]**                                  |
36 | |                  | |   Array of treatments.                                                        |
37 | +------------------+---------------------------------------------------------------------------------+
38 | | **Returns**      | **self : object**                                                               |
39 | +------------------+---------------------------------------------------------------------------------+
40 | 
41 | .. _kane_predict:
42 | 
43 | predict(self, X, t=None)
44 | ------------------------
45 | Predict an uplift for X. 
46 | 
47 | +------------------+---------------------------------------------------------------------------------+
48 | | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
49 | |                  | |   Matrix of features.                                                         |
50 | |                  | | **t: numpy array with shape = [n_samples,] or None**                          |
51 | |                  | |   Array of treatments.                                                        |
52 | +------------------+---------------------------------------------------------------------------------+
53 | | **Returns**      | | **self : object**                                                             |
54 | |                  | |   The predicted values.                                                       |
55 | +------------------+---------------------------------------------------------------------------------+
56 | 
57 | **********
58 | References
59 | **********
60 | 1. A Literature Survey and Experimental Evaluation of the State-of-the-Art in Uplift Modeling: A Stepping Stone Toward the Development of Prescriptive Analytics by Floris Devriendt, Darie Moldovan, and Wouter Verbeke
61 | 
62 | 
63 | .. code-block:: python3
64 | 
65 |    from pyuplift.transformation import Kane
66 |    ...
67 |    model = Kane()
68 |    model.fit(X[train_indexes, :], y[train_indexes], t[train_indexes])
69 |    uplift = model.predict(X[test_indexes, :])
70 |    print(uplift)
71 |  


--------------------------------------------------------------------------------
/docs/transformation/lai.rst:
--------------------------------------------------------------------------------
 1 | ###
 2 | Lai
 3 | ###
 4 | 
 5 | The class which implements the Lai's approach [1].
 6 | 
 7 | +----------------+-----------------------------------------------------------------------------------+
 8 | | **Parameters** | | **model : object, optional (default=sklearn.linear_model.LogisticRegression)**  |
 9 | |                | |   The classification model which will be used for predict uplift.               |
10 | |                | | **use_weights : boolean, optional (default=False)**                             |
11 | |                | |   Use or not weights?                                                           |
12 | +----------------+-----------------------------------------------------------------------------------+
13 | 
14 | 
15 | *******
16 | Methods
17 | *******
18 | +-----------------------------------------------+----------------------------------------------------+
19 | | :ref:`fit(self, X, y, t) <lai_fit>`           | Build a the model from the training set (X, y, t). |
20 | +-----------------------------------------------+----------------------------------------------------+
21 | | :ref:`predict(self, X, t=None) <lai_predict>` | Predict an uplift for X.                           |
22 | +-----------------------------------------------+----------------------------------------------------+
23 | 
24 | .. _lai_fit:
25 | 
26 | fit(self, X, y, t)
27 | ------------------
28 | Build a the model from the training set (X, y, t).
29 | 
30 | +------------------+---------------------------------------------------------------------------------+
31 | | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
32 | |                  | |   Matrix of features.                                                         |
33 | |                  | | **y: numpy array with shape = [n_samples,]**                                  |
34 | |                  | |   Array of target of feature.                                                 |
35 | |                  | | **t: numpy array with shape = [n_samples,]**                                  |
36 | |                  | |   Array of treatments.                                                        |
37 | +------------------+---------------------------------------------------------------------------------+
38 | | **Returns**      | **self : object**                                                               |
39 | +------------------+---------------------------------------------------------------------------------+
40 | 
41 | .. _lai_predict:
42 | 
43 | predict(self, X, t=None)
44 | ------------------------
45 | Predict an uplift for X. 
46 | 
47 | +------------------+---------------------------------------------------------------------------------+
48 | | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
49 | |                  | |   Matrix of features.                                                         |
50 | |                  | | **t: numpy array with shape = [n_samples,] or None**                          |
51 | |                  | |   Array of treatments.                                                        |
52 | +------------------+---------------------------------------------------------------------------------+
53 | | **Returns**      | | **self : object**                                                             |
54 | |                  | |   The predicted values.                                                       |
55 | +------------------+---------------------------------------------------------------------------------+
56 | 
57 | **********
58 | References
59 | **********
60 | 1. A Literature Survey and Experimental Evaluation of the State-of-the-Art in Uplift Modeling: A Stepping Stone Toward the Development of Prescriptive Analytics by Floris Devriendt, Darie Moldovan, and Wouter Verbeke
61 | 
62 | 
63 | .. code-block:: python3
64 | 
65 |    from pyuplift.transformation import Lai
66 |    ...
67 |    model = Lai()
68 |    model.fit(X[train_indexes, :], y[train_indexes], t[train_indexes])
69 |    uplift = model.predict(X[test_indexes, :])
70 |    print(uplift)
71 |  


--------------------------------------------------------------------------------
/docs/transformation/pessimistic.rst:
--------------------------------------------------------------------------------
 1 | ###########
 2 | Pessimistic
 3 | ###########
 4 | 
 5 | The class which implements the pessimistic approach [1].
 6 | 
 7 | +----------------+-----------------------------------------------------------------------------------+
 8 | | **Parameters** | | **model : object, optional (default=sklearn.linear_model.LogisticRegression)**  |
 9 | |                | |   The classification model which will be used for predict uplift.               |
10 | +----------------+-----------------------------------------------------------------------------------+
11 | 
12 | 
13 | *******
14 | Methods
15 | *******
16 | +-----------------------------------------------+----------------------------------------------------+
17 | | :ref:`fit(self, X, y, t) <pes_fit>`           | Build the model from the training set (X, y, t).   |
18 | +-----------------------------------------------+----------------------------------------------------+
19 | | :ref:`predict(self, X, t=None) <pes_predict>` | Predict an uplift for X.                           |
20 | +-----------------------------------------------+----------------------------------------------------+
21 | 
22 | .. _pes_fit:
23 | 
24 | fit(self, X, y, t)
25 | ------------------
26 | Build the model from the training set (X, y, t).
27 | 
28 | +------------------+---------------------------------------------------------------------------------+
29 | | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
30 | |                  | |   Matrix of features.                                                         |
31 | |                  | | **y: numpy array with shape = [n_samples,]**                                  |
32 | |                  | |   Array of target of feature.                                                 |
33 | |                  | | **t: numpy array with shape = [n_samples,]**                                  |
34 | |                  | |   Array of treatments.                                                        |
35 | +------------------+---------------------------------------------------------------------------------+
36 | | **Returns**      | **self : object**                                                               |
37 | +------------------+---------------------------------------------------------------------------------+
38 | 
39 | .. _pes_predict:
40 | 
41 | predict(self, X, t=None)
42 | ------------------------
43 | Predict an uplift for X. 
44 | 
45 | +------------------+---------------------------------------------------------------------------------+
46 | | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
47 | |                  | |   Matrix of features.                                                         |
48 | |                  | | **t: numpy array with shape = [n_samples,] or None**                          |
49 | |                  | |   Array of treatments.                                                        |
50 | +------------------+---------------------------------------------------------------------------------+
51 | | **Returns**      | | **self : object**                                                             |
52 | |                  | |   The predicted values.                                                       |
53 | +------------------+---------------------------------------------------------------------------------+
54 | 
55 | **********
56 | References
57 | **********
58 | 1. A Literature Survey and Experimental Evaluation of the State-of-the-Art in Uplift Modeling: A Stepping Stone Toward the Development of Prescriptive Analytics by Floris Devriendt, Darie Moldovan, and Wouter Verbeke
59 | 
60 | 
61 | .. code-block:: python3
62 | 
63 |    from pyuplift.transformation import Pessimistic
64 |    ...
65 |    model = Pessimistic()
66 |    model.fit(X[train_indexes, :], y[train_indexes], t[train_indexes])
67 |    uplift = model.predict(X[test_indexes, :])
68 |    print(uplift)
69 |  


--------------------------------------------------------------------------------
/docs/transformation/reflective.rst:
--------------------------------------------------------------------------------
 1 | ##########
 2 | Reflective
 3 | ##########
 4 | 
 5 | The class which implements the reflective approach [1].
 6 | 
 7 | +----------------+-----------------------------------------------------------------------------------+
 8 | | **Parameters** | | **model : object, optional (default=sklearn.linear_model.LogisticRegression)**  |
 9 | |                | |   The classification model which will be used for predict uplift.               |
10 | +----------------+-----------------------------------------------------------------------------------+
11 | 
12 | 
13 | *******
14 | Methods
15 | *******
16 | +-----------------------------------------------+----------------------------------------------------+
17 | | :ref:`fit(self, X, y, t) <ref_fit>`           | Build the model from the training set (X, y, t).   |
18 | +-----------------------------------------------+----------------------------------------------------+
19 | | :ref:`predict(self, X, t=None) <ref_predict>` | Predict an uplift for X.                           |
20 | +-----------------------------------------------+----------------------------------------------------+
21 | 
22 | .. _ref_fit:
23 | 
24 | fit(self, X, y, t)
25 | ------------------
26 | Build the model from the training set (X, y, t).
27 | 
28 | +------------------+---------------------------------------------------------------------------------+
29 | | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
30 | |                  | |   Matrix of features.                                                         |
31 | |                  | | **y: numpy array with shape = [n_samples,]**                                  |
32 | |                  | |   Array of target of feature.                                                 |
33 | |                  | | **t: numpy array with shape = [n_samples,]**                                  |
34 | |                  | |   Array of treatments.                                                        |
35 | +------------------+---------------------------------------------------------------------------------+
36 | | **Returns**      | **self : object**                                                               |
37 | +------------------+---------------------------------------------------------------------------------+
38 | 
39 | .. _ref_predict:
40 | 
41 | predict(self, X, t=None)
42 | ------------------------
43 | Predict an uplift for X. 
44 | 
45 | +------------------+---------------------------------------------------------------------------------+
46 | | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
47 | |                  | |   Matrix of features.                                                         |
48 | |                  | | **t: numpy array with shape = [n_samples,] or None**                          |
49 | |                  | |   Array of treatments.                                                        |
50 | +------------------+---------------------------------------------------------------------------------+
51 | | **Returns**      | | **self : object**                                                             |
52 | |                  | |   The predicted values.                                                       |
53 | +------------------+---------------------------------------------------------------------------------+
54 | 
55 | **********
56 | References
57 | **********
58 | 1. A Literature Survey and Experimental Evaluation of the State-of-the-Art in Uplift Modeling: A Stepping Stone Toward the Development of Prescriptive Analytics by Floris Devriendt, Darie Moldovan, and Wouter Verbeke
59 | 
60 | 
61 | .. code-block:: python3
62 | 
63 |    from pyuplift.transformation import Reflective
64 |    ...
65 |    model = Reflective()
66 |    model.fit(X[train_indexes, :], y[train_indexes], t[train_indexes])
67 |    uplift = model.predict(X[test_indexes, :])
68 |    print(uplift)
69 |  


--------------------------------------------------------------------------------
/docs/transformation/transformation_base_model.rst:
--------------------------------------------------------------------------------
1 | #########################
2 | Transformation Base Model
3 | #########################
4 | 
5 | The base class for a transformation uplift estimators.
6 | 
7 | .. note::
8 |    This class should not be used directly. Use derived classes instead.
9 |  


--------------------------------------------------------------------------------
/docs/utils/download_file.rst:
--------------------------------------------------------------------------------
 1 | #############
 2 | download_file
 3 | #############
 4 | 
 5 | Download file from `url` to `output_path`.
 6 | 
 7 | +-----------------+--------------------------------------+
 8 | | **Parameters**  | | **url: string**                    |
 9 | |                 | |   Data's URL.                      |
10 | |                 | | **output_path: string**            |
11 | |                 | |   Path where file will be saved.   |
12 | +-----------------+--------------------------------------+
13 | | **Returns**     | **None**                             |
14 | +-----------------+--------------------------------------+
15 | 
16 | ********
17 | Examples
18 | ********
19 | 
20 | .. code-block:: python3
21 | 
22 |    from pyuplift.utils import download_file
23 |    ...
24 |     if not os.path.exists(data_path):
25 |         if not os.path.exists(archive_path):
26 |             download_file(url, archive_path)
27 | 


--------------------------------------------------------------------------------
/docs/utils/index.rst:
--------------------------------------------------------------------------------
 1 | #########
 2 | Utilities
 3 | #########
 4 | 
 5 | .. toctree::
 6 |   :hidden:
 7 |   
 8 |   download_file
 9 |   retrieve_from_gz
10 | 
11 | The pyuplift.utils module includes various utilities.
12 | 
13 | +------------------------------------------------------------------------------+----------------------------------------------------------------------+
14 | | `utils.download_file(url, output_path) <download_file.html>`_                | Download file from `url` to `output_path`.                           |
15 | +------------------------------------------------------------------------------+----------------------------------------------------------------------+
16 | | `utils.retrieve_from_gz(archive_path, output_path) <retrieve_from_gz.html>`_ | The retrieving gz-archived data from `archive_path` to `output_path` |
17 | +------------------------------------------------------------------------------+----------------------------------------------------------------------+
18 | 


--------------------------------------------------------------------------------
/docs/utils/retrieve_from_gz.rst:
--------------------------------------------------------------------------------
 1 | ################
 2 | retrieve_from_gz
 3 | ################
 4 | 
 5 | The retrieving gz-archived data from `archive_path` to `output_path`.
 6 | 
 7 | +-----------------+--------------------------------------+
 8 | | **Parameters**  | | **archive_path: string**           |
 9 | |                 | |   The archive path.                |
10 | |                 | | **output_path: string**            |
11 | |                 | |   The retrieved data path.         |
12 | +-----------------+--------------------------------------+
13 | | **Returns**     | **None**                             |
14 | +-----------------+--------------------------------------+
15 | 
16 | ********
17 | Examples
18 | ********
19 | 
20 | .. code-block:: python3
21 | 
22 |    from pyuplift.utils import retrieve_from_gz
23 |    ...
24 |     if not os.path.exists(data_path):
25 |         if not os.path.exists(archive_path):
26 |             download_file(url, archive_path)
27 | 		retrieve_from_gz(archive_path, data_path)
28 | 


--------------------------------------------------------------------------------
/docs/variable_selection/cadit.rst:
--------------------------------------------------------------------------------
 1 | #####
 2 | Cadit
 3 | #####
 4 | 
 5 | The class which implements the cadit approach [1].
 6 | 
 7 | +----------------+-----------------------------------------------------------------------------------+
 8 | | **Parameters** | | **model : object, optional (default=sklearn.linear_model.LinearRegression)**    |
 9 | |                | |   The regression model which will be used for predict uplift.                   |
10 | +----------------+-----------------------------------------------------------------------------------+
11 | 
12 | 
13 | *******
14 | Methods
15 | *******
16 | +-------------------------------------------------+--------------------------------------------------+
17 | | :ref:`fit(self, X, y, t) <cadit_fit>`           | Build a model from the training set (X, y, t).   |
18 | +-------------------------------------------------+--------------------------------------------------+
19 | | :ref:`predict(self, X, t=None) <cadit_predict>` | Predict an uplift for X.                         |
20 | +-------------------------------------------------+--------------------------------------------------+
21 | 
22 | .. _cadit_fit:
23 | 
24 | fit(self, X, y, t)
25 | ------------------
26 | Build a model from the training set (X, y, t).
27 | 
28 | +------------------+---------------------------------------------------------------------------------+
29 | | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
30 | |                  | |   Matrix of features.                                                         |
31 | |                  | | **y: numpy array with shape = [n_samples,]**                                  |
32 | |                  | |   Array of target of feature.                                                 |
33 | |                  | | **t: numpy array with shape = [n_samples,]**                                  |
34 | |                  | |   Array of treatments.                                                        |
35 | +------------------+---------------------------------------------------------------------------------+
36 | | **Returns**      | **self : object**                                                               |
37 | +------------------+---------------------------------------------------------------------------------+
38 | 
39 | .. _cadit_predict:
40 | 
41 | predict(self, X, t=None)
42 | ------------------------
43 | Predict an uplift for X. 
44 | 
45 | +------------------+---------------------------------------------------------------------------------+
46 | | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
47 | |                  | |   Matrix of features.                                                         |
48 | |                  | | **t: numpy array with shape = [n_samples,] or None**                          |
49 | |                  | |   Array of treatments.                                                        |
50 | +------------------+---------------------------------------------------------------------------------+
51 | | **Returns**      | | **self : object**                                                             |
52 | |                  | |   The predicted values.                                                       |
53 | +------------------+---------------------------------------------------------------------------------+
54 | 
55 | **********
56 | References
57 | **********
58 | 1. Weisberg HI, Pontes VP. Post hoc subgroups in clinical trials: Anathema or analytics? // Clinical trials. 2015 Aug;12(4):357-64.
59 | 
60 | .. code-block:: python3
61 | 
62 |    from pyuplift.variable_selection import Cadit
63 |    ...
64 |    model = Cadit()
65 |    model.fit(X[train_indexes, :], y[train_indexes], t[train_indexes])
66 |    uplift = model.predict(X[test_indexes, :])
67 |    print(uplift)
68 |  


--------------------------------------------------------------------------------
/docs/variable_selection/dummy.rst:
--------------------------------------------------------------------------------
 1 | #####
 2 | Dummy
 3 | #####
 4 | 
 5 | The class which implements the dummy approach [1].
 6 | 
 7 | +----------------+-----------------------------------------------------------------------------------+
 8 | | **Parameters** | | **model : object, optional (default=sklearn.linear_model.LinearRegression)**    |
 9 | |                | |   The regression model which will be used for predict uplift.                   |
10 | +----------------+-----------------------------------------------------------------------------------+
11 | 
12 | 
13 | *******
14 | Methods
15 | *******
16 | +-------------------------------------------------+-----------------------------------------------------+
17 | | :ref:`fit(self, X, y, t) <dummy_fit>`           | Build a dummy model from the training set (X, y, t).|
18 | +-------------------------------------------------+-----------------------------------------------------+
19 | | :ref:`predict(self, X, t=None) <dummy_predict>` | Predict an uplift for X.                            |
20 | +-------------------------------------------------+-----------------------------------------------------+
21 | 
22 | .. _dummy_fit:
23 | 
24 | fit(self, X, y, t)
25 | ------------------
26 | Build a dummy model from the training set (X, y, t).
27 | 
28 | +------------------+---------------------------------------------------------------------------------+
29 | | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
30 | |                  | |   Matrix of features.                                                         |
31 | |                  | | **y: numpy array with shape = [n_samples,]**                                  |
32 | |                  | |   Array of target of feature.                                                 |
33 | |                  | | **t: numpy array with shape = [n_samples,]**                                  |
34 | |                  | |   Array of treatments.                                                        |
35 | +------------------+---------------------------------------------------------------------------------+
36 | | **Returns**      | **self : object**                                                               |
37 | +------------------+---------------------------------------------------------------------------------+
38 | 
39 | .. _dummy_predict:
40 | 
41 | predict(self, X, t=None)
42 | ------------------------
43 | Predict an uplift for X. 
44 | 
45 | +------------------+---------------------------------------------------------------------------------+
46 | | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
47 | |                  | |   Matrix of features.                                                         |
48 | |                  | | **t: numpy array with shape = [n_samples,] or None**                          |
49 | |                  | |   Array of treatments.                                                        |
50 | +------------------+---------------------------------------------------------------------------------+
51 | | **Returns**      | | **self : object**                                                             |
52 | |                  | |   The predicted values.                                                       |
53 | +------------------+---------------------------------------------------------------------------------+
54 | 
55 | **********
56 | References
57 | **********
58 | 1. A Literature Survey and Experimental Evaluation of the State-of-the-Art in Uplift Modeling: A Stepping Stone Toward the Development of Prescriptive Analytics by Floris Devriendt, Darie Moldovan, and Wouter Verbeke
59 | 
60 | 
61 | .. code-block:: python3
62 | 
63 |    from pyuplift.variable_selection import Dummy
64 |    ...
65 |    model = Dummy()
66 |    model.fit(X[train_indexes, :], y[train_indexes], t[train_indexes])
67 |    uplift = model.predict(X[test_indexes, :])
68 |    print(uplift)
69 |  


--------------------------------------------------------------------------------
/docs/variable_selection/econometric.rst:
--------------------------------------------------------------------------------
 1 | ###########
 2 | Econometric
 3 | ###########
 4 | 
 5 | The class which implements the econometric approach [1].
 6 | 
 7 | +----------------+-----------------------------------------------------------------------------------+
 8 | | **Parameters** | | **model : object, optional (default=sklearn.linear_model.LinearRegression)**    |
 9 | |                | |   The regression model which will be used for predict uplift.                   |
10 | +----------------+-----------------------------------------------------------------------------------+
11 | 
12 | 
13 | *******
14 | Methods
15 | *******
16 | +-----------------------------------------------+------------------------------------------------------------+
17 | | :ref:`fit(self, X, y, t) <eco_fit>`           | Build an econometric model from the training set (X, y, t).|
18 | +-----------------------------------------------+------------------------------------------------------------+
19 | | :ref:`predict(self, X, t=None) <eco_predict>` | Predict an uplift for X.                                   |
20 | +-----------------------------------------------+------------------------------------------------------------+
21 | 
22 | .. _eco_fit:
23 | 
24 | fit(self, X, y, t)
25 | ------------------
26 | Build an econometric model from the training set (X, y, t).
27 | 
28 | +------------------+---------------------------------------------------------------------------------+
29 | | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
30 | |                  | |   Matrix of features.                                                         |
31 | |                  | | **y: numpy array with shape = [n_samples,]**                                  |
32 | |                  | |   Array of target of feature.                                                 |
33 | |                  | | **t: numpy array with shape = [n_samples,]**                                  |
34 | |                  | |   Array of treatments.                                                        |
35 | +------------------+---------------------------------------------------------------------------------+
36 | | **Returns**      | **self : object**                                                               |
37 | +------------------+---------------------------------------------------------------------------------+
38 | 
39 | .. _eco_predict:
40 | 
41 | predict(self, X, t=None)
42 | ------------------------
43 | Predict an uplift for X. 
44 | 
45 | +------------------+---------------------------------------------------------------------------------+
46 | | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
47 | |                  | |   Matrix of features.                                                         |
48 | |                  | | **t: numpy array with shape = [n_samples,] or None**                          |
49 | |                  | |   Array of treatments.                                                        |
50 | +------------------+---------------------------------------------------------------------------------+
51 | | **Returns**      | | **self : object**                                                             |
52 | |                  | |   The predicted values.                                                       |
53 | +------------------+---------------------------------------------------------------------------------+
54 | 
55 | **********
56 | References
57 | **********
58 | 1. A Literature Survey and Experimental Evaluation of the State-of-the-Art in Uplift Modeling: A Stepping Stone Toward the Development of Prescriptive Analytics by Floris Devriendt, Darie Moldovan, and Wouter Verbeke
59 | 
60 | 
61 | .. code-block:: python3
62 | 
63 |    from pyuplift.variable_selection import Econometric
64 |    ...
65 |    model = Econometric()
66 |    model.fit(X[train_indexes, :], y[train_indexes], t[train_indexes])
67 |    uplift = model.predict(X[test_indexes, :])
68 |    print(uplift)
69 |  


--------------------------------------------------------------------------------
/docs/variable_selection/index.rst:
--------------------------------------------------------------------------------
 1 | ##################
 2 | Variable Selection
 3 | ##################
 4 | 
 5 | The pyuplift.variable_selection module includes classes which belongs to variable selection group of approaches.
 6 | 
 7 | .. toctree::
 8 |   :hidden:
 9 |   
10 |   two_model
11 |   econometric
12 |   dummy
13 |   cadit
14 |   
15 | +--------------------------------------------------------------------------------------------+--------------------------+
16 | | `variable_selection.TwoModel([no_treatment_model, has_treatment_model]) <two_model.html>`_ | A two model approach.    |
17 | +--------------------------------------------------------------------------------------------+--------------------------+
18 | | `variable_selection.Econometric([model]) <econometric.html>`_                              | An econometric approach. |
19 | +--------------------------------------------------------------------------------------------+--------------------------+
20 | | `variable_selection.Dummy([model]) <dummy.html>`_                                          | A dummy approach.        |
21 | +--------------------------------------------------------------------------------------------+--------------------------+
22 | | `variable_selection.Cadit([model]) <cadit.html>`_                                          | A cadit approach.        |
23 | +--------------------------------------------------------------------------------------------+--------------------------+
24 | 


--------------------------------------------------------------------------------
/docs/variable_selection/two_model.rst:
--------------------------------------------------------------------------------
 1 | #########
 2 | Two Model
 3 | #########
 4 | 
 5 | The class which implements the two model approach [1].
 6 | 
 7 | +----------------+---------------------------------------------------------------------------------------------+
 8 | | **Parameters** | | **no_treatment_model : object, optional (default=sklearn.linear_model.LinearRegression)** |
 9 | |                | |   The regression model which will be used for predict uplift.                             |
10 | |                | | **has_treatment_model : object, optional (default=sklearn.linear_model.LinearRegression)**|
11 | |                | |   The regression model which will be used for predict uplift.                             |
12 | +----------------+---------------------------------------------------------------------------------------------+
13 | 
14 | *******
15 | Methods
16 | *******
17 | +-----------------------------------------------+--------------------------------------------------------------+
18 | | :ref:`fit(self, X, y, t) <two_fit>`           | Build a two model model from the training set (X, y, t).     |
19 | +-----------------------------------------------+--------------------------------------------------------------+
20 | | :ref:`predict(self, X, t=None) <two_predict>` | Predict an uplift for X.                                     |
21 | +-----------------------------------------------+--------------------------------------------------------------+
22 | 
23 | .. _two_fit:
24 | 
25 | fit(self, X, y, t)
26 | ------------------
27 | Build a model model model from the training set (X, y, t).
28 | 
29 | +------------------+---------------------------------------------------------------------------------+
30 | | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
31 | |                  | |   Matrix of features.                                                         |
32 | |                  | | **y: numpy array with shape = [n_samples,]**                                  |
33 | |                  | |   Array of target of feature.                                                 |
34 | |                  | | **t: numpy array with shape = [n_samples,]**                                  |
35 | |                  | |   Array of treatments.                                                        |
36 | +------------------+---------------------------------------------------------------------------------+
37 | | **Returns**      | **self : object**                                                               |
38 | +------------------+---------------------------------------------------------------------------------+
39 | 
40 | .. _two_predict:
41 | 
42 | predict(self, X, t=None)
43 | ------------------------
44 | Predict an uplift for X. 
45 | 
46 | +------------------+---------------------------------------------------------------------------------+
47 | | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
48 | |                  | |   Matrix of features.                                                         |
49 | |                  | | **t: numpy array with shape = [n_samples,] or None**                          |
50 | |                  | |   Array of treatments.                                                        |
51 | +------------------+---------------------------------------------------------------------------------+
52 | | **Returns**      | | **self : object**                                                             |
53 | |                  | |   The predicted values.                                                       |
54 | +------------------+---------------------------------------------------------------------------------+
55 | 
56 | **********
57 | References
58 | **********
59 | 1. A Literature Survey and Experimental Evaluation of the State-of-the-Art in Uplift Modeling: A Stepping Stone Toward the Development of Prescriptive Analytics by Floris Devriendt, Darie Moldovan, and Wouter Verbeke
60 | 
61 | 
62 | .. code-block:: python3
63 | 
64 |    from pyuplift.variable_selection import TwoModel
65 |    ...
66 |    model = TwoModel()
67 |    model.fit(X[train_indexes, :], y[train_indexes], t[train_indexes])
68 |    uplift = model.predict(X[test_indexes, :])
69 |    print(uplift)
70 |  


--------------------------------------------------------------------------------
/examples/README.MD:
--------------------------------------------------------------------------------
1 | ## Examples
2 | This directory contains examples of code which related to the pyuplift library.
3 | 


--------------------------------------------------------------------------------
/pyuplift/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import BaseModel
2 | 


--------------------------------------------------------------------------------
/pyuplift/base.py:
--------------------------------------------------------------------------------
 1 | class BaseModel:
 2 |     """Base class for uplift models.
 3 | 
 4 |     Note: This class should not be used directly. Use derived classes instead.
 5 |     """
 6 | 
 7 |     def fit(self, X, y, t):
 8 |         """Build a TwoModel approach from the training set (x, y, t).
 9 | 
10 |         Parameters
11 |         ----------
12 |         X : numpy array of shape = [n_samples, n_features]
13 |             The training input samples.
14 |         y : numpy array of shape = [n_samples] or [n_samples, n_outputs]
15 |             The target values (class labels in classification, real numbers in regression).
16 |         t : numpy array of shape = [n_samples] or [n_samples, n_outputs]
17 |             The treatments.
18 |         Returns
19 |         -------
20 |         self : object
21 |         """
22 |         return self
23 | 
24 |     def predict(self, X, t):
25 |         """Predict treatment effect for x.
26 | 
27 |         Parameters
28 |         ----------
29 |         X : numpy array of shape = [n_samples, n_features]
30 |             The input samples.
31 |         t : numpy array of shape = [n_samples, n_features]
32 |             The treatments.
33 |         Returns
34 |         -------
35 |         y : array of shape = [n_samples] or [n_samples, n_outputs]
36 |             The predicted treatment effects.
37 |         """
38 |         pass
39 | 


--------------------------------------------------------------------------------
/pyuplift/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .loaders.criteo_uplift_prediction import download_criteo_uplift_prediction, load_criteo_uplift_prediction
2 | from .loaders.hillstrom_email_marketing import download_hillstrom_email_marketing, load_hillstrom_email_marketing
3 | from .loaders.lalonde_nsw import download_lalonde_nsw, load_lalonde_nsw
4 | 
5 | from .generators.linear import make_linear_regression
6 | 


--------------------------------------------------------------------------------
/pyuplift/datasets/generators/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duketemon/pyuplift/33daa0768ff333387cb8223ebfaedaffa57de335/pyuplift/datasets/generators/__init__.py


--------------------------------------------------------------------------------
/pyuplift/datasets/generators/linear.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def make_linear_regression(
 6 |         size: int,
 7 |         x1_params=(0, 100),
 8 |         x2_params=(0, 10),
 9 |         x3_params=(0, 100),
10 |         t_params=(0, 2),
11 |         e_params=(0, 100),
12 |         eps=0.01,
13 |         random_state=777
14 | ):
15 |     """The data generates by formula:
16 | 
17 |     ****************
18 |     Data description
19 |     ****************
20 |     Synthetic data generated by Generate data by formula:
21 | 
22 |      | ``Y' = X1 + X2 * T + E``
23 |      | ``Y = Y', if Y' - int(Y') > eps,``
24 |      | ``Y = 0,  otherwise.``
25 | 
26 |     Statistics for default parameters and size equals 100,000:
27 | 
28 |     +--------------------------+-------------+
29 |     |Features                  |           3 |
30 |     +--------------------------+-------------+
31 |     |Treatment                 |           2 |
32 |     +--------------------------+-------------+
33 |     |Samples total             |      `size` |
34 |     +--------------------------+-------------+
35 |     |Y not equals 0            |     0.49438 |
36 |     +--------------------------+-------------+
37 |     |Y values                  | 0 to 555.93 |
38 |     +--------------------------+-------------+
39 | 
40 |     Parameters
41 |     ----------
42 |     size : int
43 |          The number of observations.
44 |     x1_params : tuple(mu, sigma), default: (0, 100)
45 |          The feature with gaussian distribution and mean=mu, sd=sigma.
46 |          X1 ~ N(mu, sigma)
47 |     x2_params : tuple(mu, sigma), default: (0, 10)
48 |          The feature with gaussian distribution and mean=mu, sd=sigma.
49 |          X2 ~ N(mu, sigma)
50 |     x3_params : tuple(mu, sigma), default: (0, 100)
51 |          The feature with gaussian distribution and mean=mu, sd=sigma.
52 |          X3 ~ N(mu, sigma)
53 |     t_params : tuple(min, max), default: (0, 2)
54 |          The treatment with uniform distribution. Min value=min, Max value=max-1
55 |          T ~ R(min, max)
56 |     e_params : tuple(mu, sigma), default: (0, 100)
57 |          The error with gaussian distribution and mean=mu, sd=sigma.
58 |          E ~ N(mu, sigma)
59 |     eps : float, default: 0.01
60 |          The border value.
61 |     random_state : int, default: 777
62 |          The random seed.
63 |     Returns
64 |     -------
65 |     dataset : pandas DataFrame
66 |     """
67 | 
68 |     if size <= 0:
69 |         raise ValueError('Size of the dataset should be non negative.')
70 | 
71 |     np.random.seed(random_state)
72 |     x1 = np.random.normal(*x1_params, size)
73 |     x2 = np.random.normal(*x2_params, size)
74 |     x3 = np.random.normal(*x3_params, size)
75 |     t = np.random.randint(*t_params, size)
76 |     e = np.random.normal(*e_params, size)
77 |     y_ = x1 + x2 * t + e
78 |     y = []
79 |     for value in y_:
80 |         if value - int(value) > eps:
81 |             y.append(value)
82 |         else:
83 |             y.append(0)
84 |     y = np.array(y)
85 |     return pd.DataFrame(data={
86 |         'x1': x1,
87 |         'x2': x2,
88 |         'x3': x3,
89 |         't': t,
90 |         'y': y
91 |     })
92 | 


--------------------------------------------------------------------------------
/pyuplift/datasets/loaders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duketemon/pyuplift/33daa0768ff333387cb8223ebfaedaffa57de335/pyuplift/datasets/loaders/__init__.py


--------------------------------------------------------------------------------
/pyuplift/datasets/loaders/criteo_uplift_prediction.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import pandas as pd
  4 | from pyuplift.utils import download_file, retrieve_from_gz
  5 | 
  6 | 
  7 | def download_criteo_uplift_prediction(
  8 |     data_home=None,
  9 |     url='https://s3.us-east-2.amazonaws.com/criteo-uplift-dataset/criteo-uplift.csv.gz'
 10 | ):
 11 |     """Downloading the Criteo Uplift Prediction dataset.
 12 | 
 13 |     ****************
 14 |     Data description
 15 |     ****************
 16 |     This dataset is constructed by assembling data resulting from several incrementality tests,
 17 |     a particular randomized trial procedure
 18 |     where a random part of the population is prevented from being targeted by advertising.
 19 |     It consists of 25M rows, each one representing a user with 11 features,
 20 |     a treatment indicator and 2 labels (visits and conversions).
 21 | 
 22 |     *******
 23 |     Privacy
 24 |     *******
 25 |     For privacy reasons the data has been sub-sampled non-uniformly so that the original incrementality level
 26 |     cannot be deduced from the dataset while preserving a realistic, challenging benchmark.
 27 |     Feature names have been anonymized and their values randomly projected so as to keep predictive power
 28 |     while making it practically impossible to recover the original features or user context.
 29 | 
 30 |     +--------------------------+------------+
 31 |     |Features                  |         11 |
 32 |     +--------------------------+------------+
 33 |     |Treatment                 |          2 |
 34 |     +--------------------------+------------+
 35 |     |Samples total             | 25,309,483 |
 36 |     +--------------------------+------------+
 37 |     |Average visit rate        |    0.04132 |
 38 |     +--------------------------+------------+
 39 |     |Average conversion rate   |    0.00229 |
 40 |     +--------------------------+------------+
 41 | 
 42 |     More information about dataset you can find in
 43 |     the `official dataset description <http://ailab.criteo.com/criteo-uplift-prediction-dataset>`_.
 44 | 
 45 |     +-----------------+----------------------------------------------------------------------------------+
 46 |     | **Parameters**  | | **data_home: string**                                                          |
 47 |     |                 | |   Specify another download and cache folder for the dataset.                   |
 48 |     |                 | |   By default the dataset will be stored in the data folder in the same folder. |
 49 |     |                 | | **url: string**                                                                |
 50 |     |                 | |   The URL to file with data.                                                   |
 51 |     +-----------------+----------------------------------------------------------------------------------+
 52 |     | **Returns**     | **None**                                                                         |
 53 |     +-----------------+----------------------------------------------------------------------------------+
 54 |     """
 55 | 
 56 |     data_home, dataset_path = __get_data_home_dataset_file_paths(data_home)
 57 |     if not os.path.isdir(data_home):
 58 |         os.makedirs(data_home)
 59 | 
 60 |     archive_path = dataset_path.replace('.csv', '.gz')
 61 |     if not os.path.exists(dataset_path):
 62 |         if not os.path.exists(archive_path):
 63 |             download_file(url, archive_path)
 64 |         retrieve_from_gz(archive_path, dataset_path)
 65 | 
 66 | 
 67 | def load_criteo_uplift_prediction(
 68 |     data_home=None,
 69 |     download_if_missing=True
 70 | ):
 71 |     """Loading the Criteo Uplift Prediction dataset from the local file.
 72 | 
 73 |     ****************
 74 |     Data description
 75 |     ****************
 76 |     This dataset is constructed by assembling data resulting from several incrementality tests,
 77 |     a particular randomized trial procedure
 78 |     where a random part of the population is prevented from being targeted by advertising.
 79 |     It consists of 25M rows, each one representing a user with 11 features,
 80 |     a treatment indicator and 2 labels (visits and conversions).
 81 | 
 82 |     *******
 83 |     Privacy
 84 |     *******
 85 |     For privacy reasons the data has been sub-sampled non-uniformly so that the original incrementality level
 86 |     cannot be deduced from the dataset while preserving a realistic, challenging benchmark.
 87 |     Feature names have been anonymized and their values randomly projected so as to keep predictive power
 88 |     while making it practically impossible to recover the original features or user context.
 89 | 
 90 |     +--------------------------+------------+
 91 |     |Features                  |         11 |
 92 |     +--------------------------+------------+
 93 |     |Treatment                 |          2 |
 94 |     +--------------------------+------------+
 95 |     |Samples total             | 25,309,483 |
 96 |     +--------------------------+------------+
 97 |     |Average visit rate        |    0.04132 |
 98 |     +--------------------------+------------+
 99 |     |Average conversion rate   |    0.00229 |
100 |     +--------------------------+------------+
101 | 
102 |     More information about dataset you can find in
103 |     the `official dataset description <http://ailab.criteo.com/criteo-uplift-prediction-dataset>`_.
104 | 
105 |     Parameters
106 |     ----------
107 |     data_home : str, optional (default=None)
108 |         Specify another download and cache folder for the dataset.
109 |         By default the dataset will be stored in the data folder in the same folder.
110 |     download_if_missing : bool, optional (default=True)
111 |         Download the dataset if it is not downloaded.
112 | 
113 |     Returns
114 |     -------
115 |     dataset : dict object with the following attributes:
116 | 
117 |     dataset.description : str
118 |         Description of the Criteo Uplift Prediction dataset.
119 | 
120 |     dataset.data : ndarray, shape (25309483, 11)
121 |         Each row corresponding to the 11 feature values in order.
122 | 
123 |     dataset.feature_names : list, size 11
124 |         List of feature names.
125 | 
126 |     dataset.treatment : ndarray, shape (25309483,)
127 |         Each value corresponds to the treatment.
128 | 
129 |     dataset.target : numpy array of shape (25309483,)
130 |         Each value corresponds to one of the outcomes. By default, it's `visit` outcome (look at `target_visit` below).
131 | 
132 |     dataset.target_visit : numpy array of shape (25309483,)
133 |         Each value corresponds to whether a visit occurred for this user (binary, label).
134 | 
135 |     dataset.target_exposure : numpy array of shape (25309483,)
136 |         Each value corresponds to treatment effect, whether the user has been effectively exposed (binary).
137 | 
138 |     dataset.target_conversion : numpy array of shape (25309483,)
139 |         Each value corresponds to whether a conversion occurred for this user (binary, label).
140 |     """
141 | 
142 |     data_home, dataset_path = __get_data_home_dataset_file_paths(data_home)
143 |     if not os.path.exists(dataset_path):
144 |         if download_if_missing:
145 |             download_criteo_uplift_prediction(data_home)
146 |         else:
147 |             raise FileNotFoundError(
148 |                 'The dataset does not exist. '
149 |                 'Use `download_criteo_uplift_prediction` function to download the dataset.'
150 |             )
151 | 
152 |     df = pd.read_csv(dataset_path)
153 |     description = 'This dataset is constructed by assembling data resulting from several incrementality tests, ' \
154 |                   'a particular randomized trial procedure where a random part of the population' \
155 |                   'is prevented from being targeted by advertising. It consists of 25M rows, ' \
156 |                   'each one representing a user with 11 features, a treatment indicator and ' \
157 |                   '2 labels (visits and conversions).'
158 | 
159 |     drop_names = ['exposure', 'visit', 'conversion', 'treatment']
160 |     dataset = {
161 |         'description': description,
162 |         'data': df.drop(drop_names, axis=1).values,
163 |         'feature_names': np.array([name for name in df.columns if name not in drop_names]),
164 |         'treatment': df['treatment'].values,
165 |         'target': df['visit'].values,
166 |         'target_visit': df['visit'].values,
167 |         'target_exposure': df['exposure'].values,
168 |         'target_conversion': df['conversion'].values,
169 |     }
170 |     return dataset
171 | 
172 | 
173 | def __get_data_home_dataset_file_paths(data_home_path):
174 |     if data_home_path is None:
175 |         data_home_path = os.path.join(os.sep.join(__file__.split(os.sep)[:-1]), 'data')
176 |     dataset_path = os.path.join(data_home_path, 'criteo_uplift_prediction.csv')
177 |     return data_home_path, dataset_path
178 | 
179 | 
180 | 


--------------------------------------------------------------------------------
/pyuplift/datasets/loaders/lalonde_nsw.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import pandas as pd
  4 | from sklearn.utils import shuffle
  5 | 
  6 | 
  7 | column_names = ['treat', 'age', 'educ', 'black', 'hisp', 'married', 'nodegr', 're75', 're78']
  8 | column_types = {
  9 |     'treat': 'int32',
 10 |     'age': 'int32',
 11 |     'educ': 'int32',
 12 |     'black': 'int32',
 13 |     'hisp': 'int32',
 14 |     'married': 'int32',
 15 |     'nodegr': 'int32',
 16 | }
 17 | 
 18 | 
 19 | def download_lalonde_nsw(
 20 |     data_home=None,
 21 |     control_data_url='https://users.nber.org/~rdehejia/data/nsw_control.txt',
 22 |     treated_data_url='https://users.nber.org/~rdehejia/data/nsw_treated.txt',
 23 |     separator=r'\s+',
 24 |     column_names=column_names,
 25 |     column_types=column_types,
 26 |     random_state=123
 27 | ):
 28 |     """Downloading the Lalonde NSW dataset.
 29 | 
 30 |     ****************
 31 |     Data description
 32 |     ****************
 33 |     The dataset contains the treated and control units from the male sub-sample
 34 |     from the National Supported Work Demonstration as used by Lalonde in his paper.
 35 | 
 36 |     +--------------------------+------------+
 37 |     | Features                 |          7 |
 38 |     +--------------------------+------------+
 39 |     | Treatment                |          2 |
 40 |     +--------------------------+------------+
 41 |     | Samples total            |        722 |
 42 |     +--------------------------+------------+
 43 | 
 44 |     More information about dataset you can find `here <https://users.nber.org/~rdehejia/nswdata.html>`_.
 45 | 
 46 |     +-----------------+----------------------------------------------------------------------------------+
 47 |     | **Parameters**  | | **data_home: str**                                                             |
 48 |     |                 | |   Specify another download and cache folder for the dataset.                   |
 49 |     |                 | |   By default the dataset will be stored in the data folder in the same folder. |
 50 |     |                 | | **control_data_url: str**                                                      |
 51 |     |                 | |   The URL to file with data of the control group.                              |
 52 |     |                 | | **treated_data_url: str**                                                      |
 53 |     |                 | |   The URL to file with data of the treated group.                              |
 54 |     |                 | | **separator: str**                                                             |
 55 |     |                 | |   The separator which used in the data files.                                  |
 56 |     |                 | | **column_names: list**                                                         |
 57 |     |                 | |   List of column names of the dataset.                                         |
 58 |     |                 | | **column_types: dict**                                                         |
 59 |     |                 | |   List of types for columns of the dataset.                                    |
 60 |     |                 | | **random_state: int**                                                          |
 61 |     |                 | |   The random seed.                                                             |
 62 |     +-----------------+----------------------------------------------------------------------------------+
 63 |     | **Returns**     | **None**                                                                         |
 64 |     +-----------------+----------------------------------------------------------------------------------+
 65 |     """
 66 | 
 67 |     data_home, dataset_path = __get_data_home_dataset_file_paths(data_home)
 68 |     if not os.path.isdir(data_home):
 69 |         os.makedirs(data_home)
 70 | 
 71 |     if not os.path.exists(dataset_path):
 72 |         try:
 73 |             control_df = pd.read_csv(
 74 |                 control_data_url,
 75 |                 sep=separator,
 76 |                 header=None,
 77 |                 names=column_names,
 78 |                 dtype=column_types
 79 |             )
 80 |         except:
 81 |             raise Exception(
 82 |                 'The file with data of the control group not found. '
 83 |                 'Check `control_data_url` value.'
 84 |             )
 85 | 
 86 |         try:
 87 |             treated_df = pd.read_csv(
 88 |                 treated_data_url,
 89 |                 sep=separator,
 90 |                 header=None,
 91 |                 names=column_names,
 92 |                 dtype=column_types
 93 |             )
 94 |         except:
 95 |             raise Exception(
 96 |                 'The file with data of the treated group not found. '
 97 |                 'Check `treated_data_url` value.'
 98 |             )
 99 | 
100 |         df = control_df.append(treated_df, ignore_index=True)
101 |         df = shuffle(df, random_state=random_state)
102 |         df.to_csv(dataset_path, index=False)
103 | 
104 | 
105 | def load_lalonde_nsw(
106 |     data_home=None,
107 |     download_if_missing=True
108 | ):
109 |     """Loading the Lalonde NSW dataset from the local file.
110 | 
111 |     ****************
112 |     Data description
113 |     ****************
114 |     The dataset contains the treated and control units from the male sub-sample
115 |     from the National Supported Work Demonstration as used by Lalonde in his paper.
116 | 
117 |     +--------------------------+------------+
118 |     | Features                 |          7 |
119 |     +--------------------------+------------+
120 |     | Treatment                |          2 |
121 |     +--------------------------+------------+
122 |     | Samples total            |        722 |
123 |     +--------------------------+------------+
124 | 
125 |     More information about dataset you can find `here <https://users.nber.org/~rdehejia/nswdata.html>`_.
126 | 
127 |     Parameters
128 |     ----------
129 |     data_home : str, optional (default=None)
130 |         Specify another download and cache folder for the dataset.
131 |         By default the dataset will be stored in the data folder in the same folder.
132 |     download_if_missing : bool, optional (default=True)
133 |         Download the dataset if it is not downloaded.
134 | 
135 |     Returns
136 |     -------
137 |     dataset : dict object with the following attributes:
138 | 
139 |     dataset.description : str
140 |         Description of the dataset.
141 | 
142 |     dataset.data : ndarray, shape (722, 7)
143 |         Each row corresponding to the 7 feature values in order.
144 | 
145 |     dataset.feature_names : list, size 7
146 |         List of feature names.
147 | 
148 |     dataset.treatment : ndarray, shape (722,)
149 |         Each value corresponds to the treatment.
150 | 
151 |     dataset.target : numpy array of shape (722,)
152 |         Each value corresponds to one of the outcomes. By default, it's `re78` outcome.
153 |     """
154 | 
155 |     data_home, dataset_path = __get_data_home_dataset_file_paths(data_home)
156 |     if not os.path.exists(dataset_path):
157 |         if download_if_missing:
158 |             download_lalonde_nsw(data_home)
159 |         else:
160 |             raise FileNotFoundError(
161 |                 'The dataset does not exist. '
162 |                 'Use `download_lalonde_nsw` function to download the dataset.'
163 |             )
164 | 
165 |     df = pd.read_csv(dataset_path)
166 |     description = 'The dataset contains the treated and control units from the male sub-sample ' \
167 |                   'from the National Supported Work Demonstration as used by Lalonde in his paper.'
168 | 
169 |     drop_names = ['treat', 're78']
170 |     dataset = {
171 |         'description': description,
172 |         'data': df.drop(drop_names, axis=1).values,
173 |         'feature_names': np.array([name for name in df.columns if name not in drop_names]),
174 |         'treatment': df['treat'].values,
175 |         'target': df['re78'].values,
176 |     }
177 |     return dataset
178 | 
179 | 
180 | def __get_data_home_dataset_file_paths(data_home_path):
181 |     if data_home_path is None:
182 |         data_home_path = os.path.join(os.sep.join(__file__.split(os.sep)[:-1]), 'data')
183 |     dataset_path = os.path.join(data_home_path, 'lalonde_nsw.csv')
184 |     return data_home_path, dataset_path
185 | 


--------------------------------------------------------------------------------
/pyuplift/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | from .average_effect import get_average_effect
2 | 


--------------------------------------------------------------------------------
/pyuplift/metrics/average_effect.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from statistics import mean
 3 | 
 4 | 
 5 | def get_average_effect(y_test, t_test, y_pred, test_share=0.3):
 6 |     """Estimating an average effect of the test set.
 7 | 
 8 |     +-----------------+----------------------------------------------------------------------------------+
 9 |     | **Parameters:** | | **y_test**: numpy array                                                        |
10 |     |                 | |   Actual y values.                                                             |
11 |     |                 | | **t_test**: numpy array                                                        |
12 |     |                 | |   Actual treatment values.                                                     |
13 |     |                 | | **y_pred**: numpy array                                                        |
14 |     |                 | |   Predicted y values by uplift model.                                          |
15 |     |                 | | **test_share**: float                                                          |
16 |     |                 | |   Share of the test data which will be taken for estimating an average effect. |
17 |     +-----------------+----------------------------------------------------------------------------------+
18 |     | **Returns:**    | | **average effect**: float                                                      |
19 |     |                 | |   Average effect on the test set.                                              |
20 |     +-----------------+----------------------------------------------------------------------------------+
21 |     """
22 | 
23 |     df = pd.DataFrame(data={
24 |         'effect': y_pred,
25 |         'y': y_test,
26 |         't': t_test
27 |     })
28 |     df = df.sort_values(by='effect', ascending=False)
29 |     test_size = int(test_share * df.shape[0])
30 |     idx, s1, s0 = 0, [], []
31 |     for _, row in df.iterrows():
32 |         idx += 1
33 |         if idx > test_size:
34 |             break
35 |         if row['t'] == 1:
36 |             s1.append(row['y'])
37 |         else:
38 |             s0.append(row['y'])
39 |     if len(s0) == 0:
40 |         s0.append(0)
41 |     if len(s1) == 0:
42 |         s1.append(0)
43 |     return mean(s1) - mean(s0)
44 | 


--------------------------------------------------------------------------------
/pyuplift/model_selection/__init__.py:
--------------------------------------------------------------------------------
1 | from .splitters.train_test_split import train_test_split
2 | 
3 | from .model_validation.treatment_cross_validation import treatment_cross_val_score
4 | 


--------------------------------------------------------------------------------
/pyuplift/model_selection/model_validation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duketemon/pyuplift/33daa0768ff333387cb8223ebfaedaffa57de335/pyuplift/model_selection/model_validation/__init__.py


--------------------------------------------------------------------------------
/pyuplift/model_selection/model_validation/treatment_cross_validation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from pyuplift.metrics import get_average_effect
 4 | from pyuplift.model_selection import train_test_split
 5 | 
 6 | 
 7 | def treatment_cross_val_score(X, y, t, model, cv=5, train_share=0.7, seeds=None):
 8 |     """Evaluate a scores by cross-validation.
 9 | 
10 |     +------------------+-----------------------------------------------------------------------------------------+
11 |     | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                             |
12 |     |                  | |   Matrix of features.                                                                 |
13 |     |                  | | **y: numpy array with shape = [n_samples,]**                                          |
14 |     |                  | |   Array of target of feature.                                                         |
15 |     |                  | | **t: numpy array with shape = [n_samples,]**                                          |
16 |     |                  | |   Array of treatments.                                                                |
17 |     |                  | | **train_share: float, optional (default=0.7)**                                        |
18 |     |                  | |   train_share represents the proportion of the dataset to include in the train split. |
19 |     |                  | | **random_state: int, optional (default=777)**                                         |
20 |     |                  | |   random_state is the seed used by the random number generator.                       |
21 |     +------------------+-----------------------------------------------------------------------------------------+
22 |     | **Return**       | | **scores: numpy array of floats**                                                     |
23 |     |                  | |   Array of scores of the estimator for each run of the cross validation.              |
24 |     +------------------+-----------------------------------------------------------------------------------------+
25 |     """
26 | 
27 |     if seeds is None:
28 |         seeds = [None for _ in range(cv)]
29 | 
30 |     if cv < 1:
31 |         raise ValueError('Count of validations should be positive integer number.')
32 |     elif cv != len(seeds):
33 |         raise ValueError("The length of seed's array should be equals to cv.")
34 |     elif not (0 < train_share <= 1):
35 |         raise ValueError('Train share should be float number between 0 and 1.')
36 | 
37 |     scores = []
38 |     for seed in seeds:
39 |         X_train, X_test, y_train, y_test, t_train, t_test = train_test_split(X, y, t, train_share, seed)
40 |         model.fit(X_train, y_train, t_train)
41 |         score = get_average_effect(y_test, t_test, model.predict(X_test))
42 |         scores.append(score)
43 |     return np.array(scores)
44 | 


--------------------------------------------------------------------------------
/pyuplift/model_selection/splitters/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duketemon/pyuplift/33daa0768ff333387cb8223ebfaedaffa57de335/pyuplift/model_selection/splitters/__init__.py


--------------------------------------------------------------------------------
/pyuplift/model_selection/splitters/train_test_split.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | 
 4 | def train_test_split(X, y, t, train_share=0.7, random_state=None):
 5 |     """Split X, y, t into random train and test subsets.
 6 | 
 7 |     +------------------+-----------------------------------------------------------------------------------------+
 8 |     | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                             |
 9 |     |                  | |   Matrix of features.                                                                 |
10 |     |                  | | **y: numpy array with shape = [n_samples,]**                                          |
11 |     |                  | |   Array of target of feature.                                                         |
12 |     |                  | | **t: numpy array with shape = [n_samples,]**                                          |
13 |     |                  | |   Array of treatments.                                                                |
14 |     |                  | | **train_share: float, optional (default=0.7)**                                        |
15 |     |                  | |   train_share represents the proportion of the dataset to include in the train split. |
16 |     |                  | | **random_state: int, optional (default=None)**                                        |
17 |     |                  | |   random_state is the seed used by the random number generator.                       |
18 |     +------------------+-----------------------------------------------------------------------------------------+
19 |     | **Return**       | | **X_train: numpy ndarray**                                                            |
20 |     |                  | |   Train matrix of features.                                                           |
21 |     |                  | | **X_test: numpy ndarray**                                                             |
22 |     |                  | |   Test matrix of features.                                                            |
23 |     |                  | | **y_train: numpy array**                                                              |
24 |     |                  | |   Train array of target of feature.                                                   |
25 |     |                  | | **y_test: numpy array**                                                               |
26 |     |                  | |   Test array of target of feature.                                                    |
27 |     |                  | | **t_train: numpy array**                                                              |
28 |     |                  | |   Train array of treatments.                                                          |
29 |     |                  | | **t_test: numpy array**                                                               |
30 |     |                  | |   Test array of treatments.                                                           |
31 |     +------------------+-----------------------------------------------------------------------------------------+
32 |     """
33 | 
34 |     if not (0 < train_share <= 1):
35 |         raise ValueError('Train share should be float number between 0 and 1.')
36 | 
37 |     random.seed(random_state)
38 |     size = len(y)
39 |     train_part_size = int(train_share * size)
40 |     train_index = random.sample([i for i in range(size)], train_part_size)
41 |     test_index = [i for i in range(size) if i not in train_index]
42 | 
43 |     X_train = X[train_index, :]
44 |     X_test = X[test_index, :]
45 | 
46 |     y_train = y[train_index]
47 |     y_test = y[test_index]
48 | 
49 |     t_train = t[train_index]
50 |     t_test = t[test_index]
51 |     return X_train, X_test, y_train, y_test, t_train, t_test
52 | 


--------------------------------------------------------------------------------
/pyuplift/transformation/__init__.py:
--------------------------------------------------------------------------------
1 | from .lai import Lai
2 | from .kane import Kane
3 | from .pessimistic import Pessimistic
4 | from .reflective import Reflective
5 | from .jaskowski import Jaskowski
6 | 


--------------------------------------------------------------------------------
/pyuplift/transformation/base.py:
--------------------------------------------------------------------------------
 1 | from pyuplift import BaseModel
 2 | 
 3 | 
 4 | class TransformationBaseModel(BaseModel):
 5 |     """Base class for a transformation uplift models.
 6 | 
 7 |     Note: This class should not be used directly. Use derived classes instead.
 8 |     """
 9 | 
10 |     def is_tr(self, y, t):
11 |         """Is pair (y,t) is TR?
12 |         Treatment responders (TR) are customers who were treated and responded
13 | 
14 |         Parameters
15 |         ----------
16 |         y : float
17 |             The target value.
18 |         t : float
19 |             The treatment value.
20 |         Returns
21 |         -------
22 |         is_tr : bool
23 |         """
24 |         return t != 0 and y != 0
25 | 
26 |     def is_cn(self, y, t):
27 |         """Is pair (y,t) is CN?
28 |         Control nonresponders (CN) are the customers who did not receive a treatment and did not respond.
29 | 
30 |         Parameters
31 |         ----------
32 |         y : float
33 |             The target value.
34 |         t : float
35 |             The treatment value.
36 |         Returns
37 |         -------
38 |         is_tr : bool
39 |         """
40 |         return t == 0 and y == 0
41 | 
42 |     def is_tn(self, y, t):
43 |         """Is pair (y,t) is TN?
44 |         Treatment nonresponders (TN) are customers who received a treatment but did not respond.
45 | 
46 |         Parameters
47 |         ----------
48 |         y : float
49 |             The target value.
50 |         t : float
51 |             The treatment value.
52 |         Returns
53 |         -------
54 |         is_tr : bool
55 |         """
56 |         return t != 0 and y == 0
57 | 
58 |     def is_cr(self, y, t):
59 |         """Is pair (y,t) is CR?
60 |         Control responders (CR) are the customers who responded without having received a treatment.
61 | 
62 |         Parameters
63 |         ----------
64 |         y : float
65 |             The target value.
66 |         t : float
67 |             The treatment value.
68 |         Returns
69 |         -------
70 |         is_tr : bool
71 |         """
72 |         return t == 0 and y != 0
73 | 


--------------------------------------------------------------------------------
/pyuplift/transformation/jaskowski.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.linear_model import LogisticRegression
 3 | 
 4 | from .base import TransformationBaseModel
 5 | 
 6 | 
 7 | class Jaskowski(TransformationBaseModel):
 8 |     """The class which implements the Jaskowski's approach.
 9 | 
10 |     +----------------+-----------------------------------------------------------------------------------+
11 |     | **Parameters** | | **model : object, optional (default=sklearn.linear_model.LogisticRegression)**  |
12 |     |                | |   The classification model which will be used for predict uplift.               |
13 |     +----------------+-----------------------------------------------------------------------------------+
14 | 
15 | 
16 |     *******
17 |     Methods
18 |     *******
19 |     +-----------------------------------------------+----------------------------------------------------+
20 |     | :ref:`fit(self, X, y, t) <jask_fit>`          | Build the model from the training set (X, y, t).   |
21 |     +-----------------------------------------------+----------------------------------------------------+
22 |     | :ref:`predict(self, X, t=None) <jask_predict>`| Predict an uplift for X.                           |
23 |     +-----------------------------------------------+----------------------------------------------------+
24 |     """
25 | 
26 |     def __init__(self, model=LogisticRegression(n_jobs=-1)):
27 |         try:
28 |             model.__getattribute__('fit')
29 |             model.__getattribute__('predict')
30 |         except AttributeError:
31 |             raise ValueError('Model should contains two methods: fit and predict.')
32 |         self.model = model
33 | 
34 |     def fit(self, X, y, t):
35 |         """Build the model from the training set (X, y, t).
36 | 
37 |         +------------------+---------------------------------------------------------------------------------+
38 |         | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
39 |         |                  | |   Matrix of features.                                                         |
40 |         |                  | | **y: numpy array with shape = [n_samples,]**                                  |
41 |         |                  | |   Array of target of feature.                                                 |
42 |         |                  | | **t: numpy array with shape = [n_samples,]**                                  |
43 |         |                  | |   Array of treatments.                                                        |
44 |         +------------------+---------------------------------------------------------------------------------+
45 |         | **Returns**      | **self : object**                                                               |
46 |         +------------------+---------------------------------------------------------------------------------+
47 |         """
48 | 
49 |         y_encoded = self.__encode_data(y, t)
50 |         self.model.fit(X, y_encoded)
51 |         return self
52 | 
53 |     def predict(self, X, t=None):
54 |         """Predict an uplift for X.
55 | 
56 |         +------------------+---------------------------------------------------------------------------------+
57 |         | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
58 |         |                  | |   Matrix of features.                                                         |
59 |         |                  | | **t: numpy array with shape = [n_samples,] or None**                          |
60 |         |                  | |   Array of treatments.                                                        |
61 |         +------------------+---------------------------------------------------------------------------------+
62 |         | **Returns**      | | **self : object**                                                             |
63 |         |                  | |   The predicted values.                                                       |
64 |         +------------------+---------------------------------------------------------------------------------+
65 |         """
66 | 
67 |         p = self.model.predict_proba(X)[:, 1]
68 |         return 2 * p - 1
69 | 
70 |     def __encode_data(self, y, t):
71 |         y_values = []
72 |         for i in range(y.shape[0]):
73 |             if self.is_tr(y[i], t[i]) or self.is_cn(y[i], t[i]):
74 |                 y_values.append(1)
75 |             else:
76 |                 y_values.append(0)
77 |         return np.array(y_values)
78 | 


--------------------------------------------------------------------------------
/pyuplift/transformation/kane.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from sklearn.linear_model import LogisticRegression
  3 | 
  4 | from .base import TransformationBaseModel
  5 | 
  6 | 
  7 | class Kane(TransformationBaseModel):
  8 |     """The class which implements the Kane's approach.
  9 | 
 10 |     +----------------+-----------------------------------------------------------------------------------+
 11 |     | **Parameters** | | **model : object, optional (default=sklearn.linear_model.LogisticRegression)**  |
 12 |     |                | |   The classification model which will be used for predict uplift.               |
 13 |     |                | | **use_weights : boolean, optional (default=False)**                             |
 14 |     |                | |   Use or not weights?                                                           |
 15 |     +----------------+-----------------------------------------------------------------------------------+
 16 | 
 17 | 
 18 |     *******
 19 |     Methods
 20 |     *******
 21 |     +-----------------------------------------------+----------------------------------------------------+
 22 |     | :ref:`fit(self, X, y, t) <lai_fit>`           | Build the model from the training set (X, y, t).   |
 23 |     +-----------------------------------------------+----------------------------------------------------+
 24 |     | :ref:`predict(self, X, t=None) <lai_predict>` | Predict an uplift for X.                           |
 25 |     +-----------------------------------------------+----------------------------------------------------+
 26 |     """
 27 | 
 28 |     def __init__(self, model=LogisticRegression(n_jobs=-1), use_weights=False):
 29 |         try:
 30 |             model.__getattribute__('fit')
 31 |             model.__getattribute__('predict')
 32 |         except AttributeError:
 33 |             raise ValueError('Model should contains two methods: fit and predict.')
 34 |         self.model = model
 35 |         self.use_weights = use_weights
 36 | 
 37 |     def fit(self, X, y, t):
 38 |         """Build the model from the training set (X, y, t).
 39 | 
 40 |         +------------------+---------------------------------------------------------------------------------+
 41 |         | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
 42 |         |                  | |   Matrix of features.                                                         |
 43 |         |                  | | **y: numpy array with shape = [n_samples,]**                                  |
 44 |         |                  | |   Array of target of feature.                                                 |
 45 |         |                  | | **t: numpy array with shape = [n_samples,]**                                  |
 46 |         |                  | |   Array of treatments.                                                        |
 47 |         +------------------+---------------------------------------------------------------------------------+
 48 |         | **Returns**      | **self : object**                                                               |
 49 |         +------------------+---------------------------------------------------------------------------------+
 50 |         """
 51 | 
 52 |         y_encoded = self.__encode_data(y, t)
 53 |         self.model.fit(X, y_encoded)
 54 |         if self.use_weights:
 55 |             self.__init_weights(t)
 56 |         return self
 57 | 
 58 |     def predict(self, X, t=None):
 59 |         """Predict an uplift for X.
 60 | 
 61 |         +------------------+---------------------------------------------------------------------------------+
 62 |         | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
 63 |         |                  | |   Matrix of features.                                                         |
 64 |         |                  | | **t: numpy array with shape = [n_samples,] or None**                          |
 65 |         |                  | |   Array of treatments.                                                        |
 66 |         +------------------+---------------------------------------------------------------------------------+
 67 |         | **Returns**      | | **self : object**                                                             |
 68 |         |                  | |   The predicted values.                                                       |
 69 |         +------------------+---------------------------------------------------------------------------------+
 70 |         """
 71 | 
 72 |         p_tr = self.model.predict_proba(X)[:, 0]
 73 |         p_cn = self.model.predict_proba(X)[:, 1]
 74 |         p_tn = self.model.predict_proba(X)[:, 2]
 75 |         p_cr = self.model.predict_proba(X)[:, 3]
 76 |         if self.use_weights:
 77 |             return (p_tr / self.treatment_count + p_cn / self.control_count) - \
 78 |                    (p_tn / self.treatment_count + p_cr / self.control_count)
 79 |         else:
 80 |             return (p_tr + p_cn) - (p_tn + p_cr)
 81 | 
 82 |     def __encode_data(self, y, t):
 83 |         y_values = []
 84 |         for i in range(y.shape[0]):
 85 |             if self.is_tr(y[i], t[i]):
 86 |                 y_values.append(0)
 87 |             elif self.is_cn(y[i], t[i]):
 88 |                 y_values.append(1)
 89 |             elif self.is_tn(y[i], t[i]):
 90 |                 y_values.append(2)
 91 |             elif self.is_cr(y[i], t[i]):
 92 |                 y_values.append(3)
 93 |         return np.array(y_values)
 94 | 
 95 |     def __init_weights(self, t):
 96 |         control_count, treatment_count = 0, 0
 97 |         for el in t:
 98 |             if el == 0.0:
 99 |                 control_count += 1
100 |             else:
101 |                 treatment_count += 1
102 |         self.control_count = control_count
103 |         self.treatment_count = treatment_count
104 | 


--------------------------------------------------------------------------------
/pyuplift/transformation/lai.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.linear_model import LogisticRegression
 3 | 
 4 | from .base import TransformationBaseModel
 5 | 
 6 | 
 7 | class Lai(TransformationBaseModel):
 8 |     """The class which implements the Lai's approach.
 9 | 
10 |     +----------------+-----------------------------------------------------------------------------------+
11 |     | **Parameters** | | **model : object, optional (default=sklearn.linear_model.LogisticRegression)**  |
12 |     |                | |   The classification model which will be used for predict uplift.               |
13 |     |                | | **use_weights : boolean, optional (default=False)**                             |
14 |     |                | |   Use or not weights?                                                           |
15 |     +----------------+-----------------------------------------------------------------------------------+
16 | 
17 | 
18 |     *******
19 |     Methods
20 |     *******
21 |     +-----------------------------------------------+----------------------------------------------------+
22 |     | :ref:`fit(self, X, y, t) <lai_fit>`           | Build a Lai model from the training set (X, y, t). |
23 |     +-----------------------------------------------+----------------------------------------------------+
24 |     | :ref:`predict(self, X, t=None) <lai_predict>` | Predict an uplift for X.                           |
25 |     +-----------------------------------------------+----------------------------------------------------+
26 |     """
27 | 
28 |     def __init__(self, model=LogisticRegression(n_jobs=-1), use_weights=False):
29 |         try:
30 |             model.__getattribute__('fit')
31 |             model.__getattribute__('predict')
32 |         except AttributeError:
33 |             raise ValueError('Model should contains two methods: fit and predict.')
34 |         self.model = model
35 |         self.use_weights = use_weights
36 | 
37 |     def fit(self, X, y, t):
38 |         """Build a Lai model from the training set (X, y, t).
39 | 
40 |         +------------------+---------------------------------------------------------------------------------+
41 |         | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
42 |         |                  | |   Matrix of features.                                                         |
43 |         |                  | | **y: numpy array with shape = [n_samples,]**                                  |
44 |         |                  | |   Array of target of feature.                                                 |
45 |         |                  | | **t: numpy array with shape = [n_samples,]**                                  |
46 |         |                  | |   Array of treatments.                                                        |
47 |         +------------------+---------------------------------------------------------------------------------+
48 |         | **Returns**      | **self : object**                                                               |
49 |         +------------------+---------------------------------------------------------------------------------+
50 |         """
51 | 
52 |         y_encoded = self.__encode_data(y, t)
53 |         if self.use_weights:
54 |             self.__init_weights(y, t)
55 |         self.model.fit(X, y_encoded)
56 |         return self
57 | 
58 |     def predict(self, X, t=None):
59 |         """Predict an uplift for X.
60 | 
61 |         +------------------+---------------------------------------------------------------------------------+
62 |         | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
63 |         |                  | |   Matrix of features.                                                         |
64 |         |                  | | **t: numpy array with shape = [n_samples,] or None**                          |
65 |         |                  | |   Array of treatments.                                                        |
66 |         +------------------+---------------------------------------------------------------------------------+
67 |         | **Returns**      | | **self : object**                                                             |
68 |         |                  | |   The predicted values.                                                       |
69 |         +------------------+---------------------------------------------------------------------------------+
70 |         """
71 | 
72 |         p_tr_cn = self.model.predict_proba(X)[:, 1]
73 |         if self.use_weights:
74 |             p_tn_cr = self.model.predict_proba(X)[:, 0]
75 |             return p_tr_cn * self.p_tr_or_cn - p_tn_cr * self.p_tn_or_cr
76 |         else:
77 |             return 2 * p_tr_cn - 1
78 | 
79 |     def __encode_data(self, y, t):
80 |         y_values = []
81 |         for i in range(y.shape[0]):
82 |             if self.is_tr(y[i], t[i]) or self.is_cn(y[i], t[i]):
83 |                 y_values.append(1)
84 |             elif self.is_tn(y[i], t[i]) or self.is_cr(y[i], t[i]):
85 |                 y_values.append(0)
86 |         return np.array(y_values)
87 | 
88 |     def __init_weights(self, y, t):
89 |         pos_count, neg_count = 0, 0
90 |         for i in range(y.shape[0]):
91 |             if self.is_tr(y[i], t[i]) or self.is_cn(y[i], t[i]):
92 |                 pos_count += 1
93 |             elif self.is_tn(y[i], t[i]) or self.is_cr(y[i], t[i]):
94 |                 neg_count += 1
95 | 
96 |         self.p_tr_or_cn = pos_count / (pos_count + neg_count)
97 |         self.p_tn_or_cr = neg_count / (pos_count + neg_count)
98 | 


--------------------------------------------------------------------------------
/pyuplift/transformation/pessimistic.py:
--------------------------------------------------------------------------------
 1 | from sklearn.linear_model import LogisticRegression
 2 | 
 3 | from .base import TransformationBaseModel
 4 | from .lai import Lai
 5 | from .reflective import Reflective
 6 | 
 7 | 
 8 | class Pessimistic(TransformationBaseModel):
 9 |     """The class which implements the pessimistic approach.
10 | 
11 |     +----------------+-----------------------------------------------------------------------------------+
12 |     | **Parameters** | | **model : object, optional (default=sklearn.linear_model.LogisticRegression)**  |
13 |     |                | |   The classification model which will be used for predict uplift.               |
14 |     +----------------+-----------------------------------------------------------------------------------+
15 | 
16 | 
17 |     *******
18 |     Methods
19 |     *******
20 |     +-----------------------------------------------+----------------------------------------------------+
21 |     | :ref:`fit(self, X, y, t) <pes_fit>`           | Build the model from the training set (X, y, t).   |
22 |     +-----------------------------------------------+----------------------------------------------------+
23 |     | :ref:`predict(self, X, t=None) <pes_predict>` | Predict an uplift for X.                           |
24 |     +-----------------------------------------------+----------------------------------------------------+
25 |     """
26 | 
27 |     def __init__(self, model=LogisticRegression(n_jobs=-1)):
28 |         try:
29 |             model.__getattribute__('fit')
30 |             model.__getattribute__('predict')
31 |         except AttributeError:
32 |             raise ValueError('Model should contains two methods: fit and predict.')
33 |         self.w_lai_model = Lai(model, use_weights=True)
34 |         self.reflective_model = Reflective(model)
35 | 
36 |     def fit(self, X, y, t):
37 |         """Build the model from the training set (X, y, t).
38 | 
39 |         +------------------+---------------------------------------------------------------------------------+
40 |         | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
41 |         |                  | |   Matrix of features.                                                         |
42 |         |                  | | **y: numpy array with shape = [n_samples,]**                                  |
43 |         |                  | |   Array of target of feature.                                                 |
44 |         |                  | | **t: numpy array with shape = [n_samples,]**                                  |
45 |         |                  | |   Array of treatments.                                                        |
46 |         +------------------+---------------------------------------------------------------------------------+
47 |         | **Returns**      | **self : object**                                                               |
48 |         +------------------+---------------------------------------------------------------------------------+
49 |         """
50 | 
51 |         self.w_lai_model.fit(X, y, t)
52 |         self.reflective_model.fit(X, y, t)
53 |         return self
54 | 
55 |     def predict(self, X, t=None):
56 |         """Predict an uplift for X.
57 | 
58 |         +------------------+---------------------------------------------------------------------------------+
59 |         | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
60 |         |                  | |   Matrix of features.                                                         |
61 |         |                  | | **t: numpy array with shape = [n_samples,] or None**                          |
62 |         |                  | |   Array of treatments.                                                        |
63 |         +------------------+---------------------------------------------------------------------------------+
64 |         | **Returns**      | | **self : object**                                                             |
65 |         |                  | |   The predicted values.                                                       |
66 |         +------------------+---------------------------------------------------------------------------------+
67 |         """
68 | 
69 |         w_lai_uplift = self.w_lai_model.predict(X)
70 |         reflective_uplift = self.reflective_model.predict(X)
71 |         return (w_lai_uplift + reflective_uplift) / 2
72 | 


--------------------------------------------------------------------------------
/pyuplift/transformation/reflective.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from sklearn.linear_model import LogisticRegression
  3 | 
  4 | from .base import TransformationBaseModel
  5 | 
  6 | 
  7 | class Reflective(TransformationBaseModel):
  8 |     """The class which implements the reflective approach.
  9 | 
 10 |     +----------------+-----------------------------------------------------------------------------------+
 11 |     | **Parameters** | | **model : object, optional (default=sklearn.linear_model.LogisticRegression)**  |
 12 |     |                | |   The classification model which will be used for predict uplift.               |
 13 |     +----------------+-----------------------------------------------------------------------------------+
 14 | 
 15 | 
 16 |     *******
 17 |     Methods
 18 |     *******
 19 |     +-----------------------------------------------+----------------------------------------------------+
 20 |     | :ref:`fit(self, X, y, t) <ref_fit>`           | Build the model from the training set (X, y, t).   |
 21 |     +-----------------------------------------------+----------------------------------------------------+
 22 |     | :ref:`predict(self, X, t=None) <ref_predict>` | Predict an uplift for X.                           |
 23 |     +-----------------------------------------------+----------------------------------------------------+
 24 |     """
 25 | 
 26 |     def __init__(self, model=LogisticRegression(n_jobs=-1)):
 27 |         try:
 28 |             model.__getattribute__('fit')
 29 |             model.__getattribute__('predict')
 30 |         except AttributeError:
 31 |             raise ValueError('Model should contains two methods: fit and predict.')
 32 |         self.model = model
 33 | 
 34 |     def fit(self, X, y, t):
 35 |         """Build the model from the training set (X, y, t).
 36 | 
 37 |         +------------------+---------------------------------------------------------------------------------+
 38 |         | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
 39 |         |                  | |   Matrix of features.                                                         |
 40 |         |                  | | **y: numpy array with shape = [n_samples,]**                                  |
 41 |         |                  | |   Array of target of feature.                                                 |
 42 |         |                  | | **t: numpy array with shape = [n_samples,]**                                  |
 43 |         |                  | |   Array of treatments.                                                        |
 44 |         +------------------+---------------------------------------------------------------------------------+
 45 |         | **Returns**      | **self : object**                                                               |
 46 |         +------------------+---------------------------------------------------------------------------------+
 47 |         """
 48 | 
 49 |         y_encoded = self.__encode_data(y, t)
 50 |         self.model.fit(X, y_encoded)
 51 |         self.__init_weights(y, t)
 52 |         return self
 53 | 
 54 |     def predict(self, X, t=None):
 55 |         """Predict an uplift for X.
 56 | 
 57 |         +------------------+---------------------------------------------------------------------------------+
 58 |         | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
 59 |         |                  | |   Matrix of features.                                                         |
 60 |         |                  | | **t: numpy array with shape = [n_samples,] or None**                          |
 61 |         |                  | |   Array of treatments.                                                        |
 62 |         +------------------+---------------------------------------------------------------------------------+
 63 |         | **Returns**      | | **self : object**                                                             |
 64 |         |                  | |   The predicted values.                                                       |
 65 |         +------------------+---------------------------------------------------------------------------------+
 66 |         """
 67 | 
 68 |         p_tr = self.model.predict_proba(X)[:, 0]
 69 |         p_cn = self.model.predict_proba(X)[:, 1]
 70 |         p_tn = self.model.predict_proba(X)[:, 2]
 71 |         p_cr = self.model.predict_proba(X)[:, 3]
 72 | 
 73 |         p_pos = self.p_tlr * p_tr + self.p_cln * p_cn
 74 |         p_neg = self.p_tln * p_tn + self.p_clr * p_cr
 75 |         return p_pos - p_neg
 76 | 
 77 |     def __encode_data(self, y, t):
 78 |         y_values = []
 79 |         for i in range(y.shape[0]):
 80 |             if self.is_tr(y[i], t[i]):
 81 |                 y_values.append(0)
 82 |             elif self.is_cn(y[i], t[i]):
 83 |                 y_values.append(1)
 84 |             elif self.is_tn(y[i], t[i]):
 85 |                 y_values.append(2)
 86 |             elif self.is_cr(y[i], t[i]):
 87 |                 y_values.append(3)
 88 |         return np.array(y_values)
 89 | 
 90 |     def __init_weights(self, y, t):
 91 |         t_r, c_r, t_n, c_n = 0, 0, 0, 0
 92 |         r_count, n_count = 0, 0
 93 |         size = y.shape[0]
 94 |         for i in range(size):
 95 |             if y[i] != 0:
 96 |                 r_count += 1
 97 |                 if t[i] != 0:
 98 |                     # T|R
 99 |                     t_r += 1
100 |                 else:
101 |                     # C|R
102 |                     c_r += 1
103 |             else:
104 |                 n_count += 1
105 |                 if t[i] != 0:
106 |                     # T|N
107 |                     t_n += 1
108 |                 else:
109 |                     # C|N
110 |                     c_n += 1
111 | 
112 |         self.p_tlr = t_r / r_count
113 |         self.p_clr = c_r / r_count
114 |         self.p_cln = c_n / n_count
115 |         self.p_tln = t_n / n_count
116 | 


--------------------------------------------------------------------------------
/pyuplift/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .downloader import download_file
2 | from .retriever import retrieve_from_gz
3 | 


--------------------------------------------------------------------------------
/pyuplift/utils/downloader.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import requests
 3 | 
 4 | 
 5 | def download_file(url: str, output_path: str):
 6 |     """Download file from `url` to `output_path`.
 7 | 
 8 |     +-----------------+--------------------------------------+
 9 |     | **Parameters**  | | **url: string**                    |
10 |     |                 | |   Data's URL.                      |
11 |     |                 | | **output_path: string**            |
12 |     |                 | |   Path where file will be saved.   |
13 |     +-----------------+--------------------------------------+
14 |     | **Returns**     | **None**                             |
15 |     +-----------------+--------------------------------------+
16 |     """
17 | 
18 |     if os.path.isfile(output_path):
19 |         os.remove(output_path)
20 | 
21 |     print("Downloading file to '{}'...".format(output_path))
22 |     response = requests.get(url)
23 |     # Check if the response is ok (200)
24 |     status_code = int(response.status_code)
25 |     if status_code == 200:
26 |         with open(output_path, 'wb') as file:
27 |             # A chunk of 128 bytes
28 |             for chunk in response:
29 |                 file.write(chunk)
30 |     elif status_code == 404:
31 |         raise Exception('Wrong URL (' + url + ').')
32 | 


--------------------------------------------------------------------------------
/pyuplift/utils/retriever.py:
--------------------------------------------------------------------------------
 1 | import gzip
 2 | import shutil
 3 | 
 4 | 
 5 | def retrieve_from_gz(archive_path: str, output_path: str):
 6 |     """The retrieving gz-archived data from `archive_path` to `output_path`.
 7 | 
 8 |     +-----------------+--------------------------------------+
 9 |     | **Parameters**  | | **archive_path: string**           |
10 |     |                 | |   The archive path.                |
11 |     |                 | | **output_path: string**            |
12 |     |                 | |   The retrieved data path.         |
13 |     +-----------------+--------------------------------------+
14 |     | **Returns**     | **None**                             |
15 |     +-----------------+--------------------------------------+
16 |     """
17 | 
18 |     with gzip.open(archive_path, 'rb') as f_in:
19 |         with open(output_path, 'wb') as f_out:
20 |             shutil.copyfileobj(f_in, f_out)
21 | 


--------------------------------------------------------------------------------
/pyuplift/variable_selection/__init__.py:
--------------------------------------------------------------------------------
1 | from .cadit import Cadit
2 | from .dummy import Dummy
3 | from .two_model import TwoModel
4 | from .econometric import Econometric
5 | 


--------------------------------------------------------------------------------
/pyuplift/variable_selection/cadit.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.linear_model import LinearRegression
 3 | from pyuplift import BaseModel
 4 | 
 5 | 
 6 | class Cadit(BaseModel):
 7 |     """The class which implements the cadit approach [1].
 8 | 
 9 |     +----------------+-----------------------------------------------------------------------------------+
10 |     | **Parameters** | | **model : object, optional (default=sklearn.linear_model.LinearRegression)**    |
11 |     |                | |   The regression model which will be used for predict uplift.                   |
12 |     +----------------+-----------------------------------------------------------------------------------+
13 | 
14 | 
15 |     *******
16 |     Methods
17 |     *******
18 |     +-------------------------------------------------+--------------------------------------------------+
19 |     | :ref:`fit(self, X, y, t) <cadit_fit>`           | Build a model from the training set (X, y, t).   |
20 |     +-------------------------------------------------+--------------------------------------------------+
21 |     | :ref:`predict(self, X, t=None) <cadit_predict>` | Predict an uplift for X.                         |
22 |     +-------------------------------------------------+--------------------------------------------------+
23 |     """
24 | 
25 |     def __init__(self, model=LinearRegression()):
26 |         try:
27 |             model.__getattribute__('fit')
28 |             model.__getattribute__('predict')
29 |         except AttributeError:
30 |             raise ValueError('Model should contains two methods: fit and predict.')
31 |         self.model = model
32 | 
33 |     def fit(self, X, y, t):
34 |         """Build a model from the training set (X, y, t).
35 | 
36 |         +------------------+---------------------------------------------------------------------------------+
37 |         | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
38 |         |                  | |   Matrix of features.                                                         |
39 |         |                  | | **y: numpy array with shape = [n_samples,]**                                  |
40 |         |                  | |   Array of target of feature.                                                 |
41 |         |                  | | **t: numpy array with shape = [n_samples,]**                                  |
42 |         |                  | |   Array of treatments.                                                        |
43 |         +------------------+---------------------------------------------------------------------------------+
44 |         | **Returns**      | **self : object**                                                               |
45 |         +------------------+---------------------------------------------------------------------------------+
46 |         """
47 | 
48 |         z = self.__get_z_values(y, t)
49 |         self.model.fit(X, z)
50 |         return self
51 | 
52 |     def predict(self, X, t=None):
53 |         """Predict an uplift for X.
54 | 
55 |         +------------------+---------------------------------------------------------------------------------+
56 |         | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
57 |         |                  | |   Matrix of features.                                                         |
58 |         |                  | | **t: numpy array with shape = [n_samples,] or None**                          |
59 |         |                  | |   Array of treatments.                                                        |
60 |         +------------------+---------------------------------------------------------------------------------+
61 |         | **Returns**      | | **self : object**                                                             |
62 |         |                  | |   The predicted values.                                                       |
63 |         +------------------+---------------------------------------------------------------------------------+
64 |         """
65 | 
66 |         return self.model.predict(X)
67 | 
68 |     def __get_z_values(self, y, t):
69 |         p_t0 = t[t == 0].shape[0] / t.shape[0]
70 |         p_t1 = 1 - p_t0
71 |         y_mean = y.mean()
72 |         z = []
73 |         for i in range(y.shape[0]):
74 |             if t[i] == 0:
75 |                 val = (1/p_t1) * (y[i] - y_mean)
76 |             else:
77 |                 val = - (1/p_t0) * (y[i] - y_mean)
78 |             z.append(val)
79 |         return np.array(z)
80 | 


--------------------------------------------------------------------------------
/pyuplift/variable_selection/dummy.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.linear_model import LinearRegression
 3 | from pyuplift import BaseModel
 4 | 
 5 | 
 6 | class Dummy(BaseModel):
 7 |     """The class which implements the dummy approach.
 8 | 
 9 |     +----------------+-----------------------------------------------------------------------------------+
10 |     | **Parameters** | | **model : object, optional (default=sklearn.linear_model.LinearRegression)**    |
11 |     |                | |   The regression model which will be used for predict uplift.                   |
12 |     +----------------+-----------------------------------------------------------------------------------+
13 | 
14 | 
15 |     *******
16 |     Methods
17 |     *******
18 |     +-------------------------------------------------+-----------------------------------------------------+
19 |     | :ref:`fit(self, X, y, t) <dummy_fit>`           | Build a dummy model from the training set (X, y, t).|
20 |     +-------------------------------------------------+-----------------------------------------------------+
21 |     | :ref:`predict(self, X, t=None) <dummy_predict>` | Predict an uplift for X.                            |
22 |     +-------------------------------------------------+-----------------------------------------------------+
23 |     """
24 | 
25 |     def __init__(self, model=LinearRegression()):
26 |         try:
27 |             model.__getattribute__('fit')
28 |             model.__getattribute__('predict')
29 |         except AttributeError:
30 |             raise ValueError('Model should contains two methods: fit and predict.')
31 |         self.model = model
32 | 
33 |     def fit(self, X, y, t):
34 |         """Build a dummy model from the training set (X, y, t).
35 | 
36 |         +------------------+---------------------------------------------------------------------------------+
37 |         | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
38 |         |                  | |   Matrix of features.                                                         |
39 |         |                  | | **y: numpy array with shape = [n_samples,]**                                  |
40 |         |                  | |   Array of target of feature.                                                 |
41 |         |                  | | **t: numpy array with shape = [n_samples,]**                                  |
42 |         |                  | |   Array of treatments.                                                        |
43 |         +------------------+---------------------------------------------------------------------------------+
44 |         | **Returns**      | **self : object**                                                               |
45 |         +------------------+---------------------------------------------------------------------------------+
46 |         """
47 | 
48 |         x_train = np.append(X, t.reshape((-1, 1)), axis=1)
49 |         self.model.fit(x_train, y)
50 |         return self
51 | 
52 |     def predict(self, X, t=None):
53 |         """Predict an uplift for X.
54 | 
55 |         +------------------+---------------------------------------------------------------------------------+
56 |         | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
57 |         |                  | |   Matrix of features.                                                         |
58 |         |                  | | **t: numpy array with shape = [n_samples,] or None**                          |
59 |         |                  | |   Array of treatments.                                                        |
60 |         +------------------+---------------------------------------------------------------------------------+
61 |         | **Returns**      | | **self : object**                                                             |
62 |         |                  | |   The predicted values.                                                       |
63 |         +------------------+---------------------------------------------------------------------------------+
64 |         """
65 | 
66 |         col = np.array(X.shape[0] * [0])
67 |         x_test = np.append(X, col.reshape((-1, 1)), axis=1)
68 |         # All treatment values == 0
69 |         s0 = self.model.predict(x_test)
70 |         x_test[:, -1] = 1
71 |         # All treatment values == 1
72 |         s1 = self.model.predict(x_test)
73 |         return s1 - s0
74 | 


--------------------------------------------------------------------------------
/pyuplift/variable_selection/econometric.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.linear_model import LinearRegression
 3 | from pyuplift import BaseModel
 4 | 
 5 | 
 6 | class Econometric(BaseModel):
 7 |     """The class which implements the econometric approach.
 8 | 
 9 |     +----------------+-----------------------------------------------------------------------------------+
10 |     | **Parameters** | | **model : object, optional (default=sklearn.linear_model.LinearRegression)**    |
11 |     |                | |   The regression model which will be used for predict uplift.                   |
12 |     +----------------+-----------------------------------------------------------------------------------+
13 | 
14 | 
15 |     *******
16 |     Methods
17 |     *******
18 |     +-----------------------------------------------+------------------------------------------------------------+
19 |     | :ref:`fit(self, X, y, t) <eco_fit>`           | Build an econometric model from the training set (X, y, t).|
20 |     +-----------------------------------------------+------------------------------------------------------------+
21 |     | :ref:`predict(self, X, t=None) <eco_predict>` | Predict an uplift for X.                                   |
22 |     +-----------------------------------------------+------------------------------------------------------------+
23 |     """
24 | 
25 |     def __init__(self, model=LinearRegression()):
26 |         try:
27 |             model.__getattribute__('fit')
28 |             model.__getattribute__('predict')
29 |         except AttributeError:
30 |             raise ValueError('Model should contains two methods: fit and predict.')
31 |         self.model = model
32 | 
33 |     def fit(self, X, y, t):
34 |         """Build an econometric model from the training set (X, y, t).
35 | 
36 |         +------------------+---------------------------------------------------------------------------------+
37 |         | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
38 |         |                  | |   Matrix of features.                                                         |
39 |         |                  | | **y: numpy array with shape = [n_samples,]**                                  |
40 |         |                  | |   Array of target of feature.                                                 |
41 |         |                  | | **t: numpy array with shape = [n_samples,]**                                  |
42 |         |                  | |   Array of treatments.                                                        |
43 |         +------------------+---------------------------------------------------------------------------------+
44 |         | **Returns**      | **self : object**                                                               |
45 |         +------------------+---------------------------------------------------------------------------------+
46 |         """
47 | 
48 |         x_train = self.__get_matrix(X, t)
49 |         self.model.fit(x_train, y)
50 |         return self
51 | 
52 |     def predict(self, X, t=None):
53 |         """Predict an uplift for X.
54 | 
55 |         +------------------+---------------------------------------------------------------------------------+
56 |         | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
57 |         |                  | |   Matrix of features.                                                         |
58 |         |                  | | **t: numpy array with shape = [n_samples,] or None**                          |
59 |         |                  | |   Array of treatments.                                                        |
60 |         +------------------+---------------------------------------------------------------------------------+
61 |         | **Returns**      | | **self : object**                                                             |
62 |         |                  | |   The predicted values.                                                       |
63 |         +------------------+---------------------------------------------------------------------------------+
64 |         """
65 | 
66 |         x_test = self.__get_matrix(X, np.array(X.shape[0] * [0]))
67 |         v0 = self.model.predict(x_test)
68 |         x_test = self.__get_matrix(X, np.array(X.shape[0] * [1]))
69 |         v1 = self.model.predict(x_test)
70 |         return v1 - v0
71 | 
72 |     def __get_matrix(self, X, t):
73 |         """Create X|T|X*T matrix"""
74 | 
75 |         x_t = np.append(X, t.reshape((-1, 1)), axis=1)
76 |         xt = X * t.reshape((-1, 1))
77 |         return np.append(x_t, xt, axis=1)
78 | 


--------------------------------------------------------------------------------
/pyuplift/variable_selection/two_model.py:
--------------------------------------------------------------------------------
 1 | from sklearn.linear_model import LinearRegression
 2 | from pyuplift import BaseModel
 3 | 
 4 | 
 5 | class TwoModel(BaseModel):
 6 |     """The class which implements the two model approach.
 7 | 
 8 |     +----------------+---------------------------------------------------------------------------------------------+
 9 |     | **Parameters** | | **no_treatment_model : object, optional (default=sklearn.linear_model.LinearRegression)** |
10 |     |                | |   The regression model which will be used for predict uplift.                             |
11 |     |                | | **has_treatment_model : object, optional (default=sklearn.linear_model.LinearRegression)**|
12 |     |                | |   The regression model which will be used for predict uplift.                             |
13 |     +----------------+---------------------------------------------------------------------------------------------+
14 | 
15 |     *******
16 |     Methods
17 |     *******
18 |     +-----------------------------------------------+--------------------------------------------------------------+
19 |     | :ref:`fit(self, X, y, t) <two_fit>`           | Build a two model model from the training set (X, y, t).     |
20 |     +-----------------------------------------------+--------------------------------------------------------------+
21 |     | :ref:`predict(self, X, t=None) <two_predict>` | Predict an uplift for X.                                     |
22 |     +-----------------------------------------------+--------------------------------------------------------------+
23 |     """
24 | 
25 |     def __init__(self, no_treatment_model=LinearRegression(), has_treatment_model=LinearRegression()):
26 |         try:
27 |             no_treatment_model.__getattribute__('fit')
28 |             no_treatment_model.__getattribute__('predict')
29 |         except AttributeError:
30 |             raise ValueError('No treatment model should contains two methods: fit and predict.')
31 | 
32 |         try:
33 |             has_treatment_model.__getattribute__('fit')
34 |             has_treatment_model.__getattribute__('predict')
35 |         except AttributeError:
36 |             raise ValueError('Has treatment model should contains two methods: fit and predict.')
37 | 
38 |         self.no_treatment_model = no_treatment_model
39 |         self.has_treatment_model = has_treatment_model
40 | 
41 |     def fit(self, X, y, t):
42 |         """Build a model model model from the training set (X, y, t).
43 | 
44 |         +------------------+---------------------------------------------------------------------------------+
45 |         | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
46 |         |                  | |   Matrix of features.                                                         |
47 |         |                  | | **y: numpy array with shape = [n_samples,]**                                  |
48 |         |                  | |   Array of target of feature.                                                 |
49 |         |                  | | **t: numpy array with shape = [n_samples,]**                                  |
50 |         |                  | |   Array of treatments.                                                        |
51 |         +------------------+---------------------------------------------------------------------------------+
52 |         | **Returns**      | **self : object**                                                               |
53 |         +------------------+---------------------------------------------------------------------------------+
54 |         """
55 | 
56 |         no_treatment_x, no_treatment_y = [], []
57 |         has_treatment_x, has_treatment_y = [], []
58 |         for idx, el in enumerate(t):
59 |             if el:
60 |                 has_treatment_x.append(X[idx])
61 |                 has_treatment_y.append(y[idx])
62 |             else:
63 |                 no_treatment_x.append(X[idx])
64 |                 no_treatment_y.append(y[idx])
65 |         self.no_treatment_model.fit(no_treatment_x, no_treatment_y)
66 |         self.has_treatment_model.fit(has_treatment_x, has_treatment_y)
67 |         return self
68 | 
69 |     def predict(self, X, t=None):
70 |         """Predict an uplift for X.
71 | 
72 |         +------------------+---------------------------------------------------------------------------------+
73 |         | **Parameters**   | | **X: numpy ndarray with shape = [n_samples, n_features]**                     |
74 |         |                  | |   Matrix of features.                                                         |
75 |         |                  | | **t: numpy array with shape = [n_samples,] or None**                          |
76 |         |                  | |   Array of treatments.                                                        |
77 |         +------------------+---------------------------------------------------------------------------------+
78 |         | **Returns**      | | **self : object**                                                             |
79 |         |                  | |   The predicted values.                                                       |
80 |         +------------------+---------------------------------------------------------------------------------+
81 |         """
82 | 
83 |         s1 = self.has_treatment_model.predict(X)
84 |         s0 = self.no_treatment_model.predict(X)
85 |         return s1 - s0
86 | 


--------------------------------------------------------------------------------
/resources/logo.psd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duketemon/pyuplift/33daa0768ff333387cb8223ebfaedaffa57de335/resources/logo.psd


--------------------------------------------------------------------------------
/resources/pyuplift-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duketemon/pyuplift/33daa0768ff333387cb8223ebfaedaffa57de335/resources/pyuplift-logo.png


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import codecs
 3 | from setuptools import setup, find_packages
 4 | 
 5 | path = os.path.abspath(os.path.dirname(__file__))
 6 | 
 7 | 
 8 | def read(filename):
 9 |     return codecs.open(os.path.join(path, filename), 'r').read()
10 | 
11 | 
12 | setup(
13 |     name='pyuplift',
14 |     version='0.0.4.0',
15 |     license='MIT License',
16 |     author='Artem Kuchumov',
17 |     author_email='kuchumov7@gmail.com',
18 |     url='https://github.com/duketemon/pyuplift',
19 |     description='Uplift modeling implementation',
20 |     long_description=read('README.MD'),
21 |     long_description_content_type='text/markdown',
22 |     include_package_data=True,
23 |     zip_safe=False,
24 |     packages=find_packages(),
25 |     keywords=['uplift modeling', 'machine learning', 'true response modeling', 'incremental value marketing'],
26 |     install_requires=[
27 |         'pandas>=0.23.4',
28 |         'scikit-learn>=0.20.0',
29 |         'requests>=2.19.1',
30 |     ],
31 |     extras_require={
32 |         'tests': [
33 |             'pytest>=4.5.0'
34 |         ]
35 |     },
36 |     classifiers=[
37 |         'Intended Audience :: Science/Research',
38 |         'Intended Audience :: Education',
39 |         'Programming Language :: Python :: 3',
40 |         'Programming Language :: Python :: 3.5',
41 |         'Programming Language :: Python :: 3.6',
42 |         'Programming Language :: Python :: 3.7',
43 |         'License :: OSI Approved :: MIT License',
44 |         'Operating System :: OS Independent',
45 |         'Topic :: Software Development :: Libraries',
46 |         'Topic :: Software Development :: Libraries :: Python Modules'
47 |     ]
48 | )
49 | 


--------------------------------------------------------------------------------
/tests/README.MD:
--------------------------------------------------------------------------------
1 | ## Tests
2 | This directory contains tests of the pyuplift library.
3 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duketemon/pyuplift/33daa0768ff333387cb8223ebfaedaffa57de335/tests/__init__.py


--------------------------------------------------------------------------------
/tests/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duketemon/pyuplift/33daa0768ff333387cb8223ebfaedaffa57de335/tests/datasets/__init__.py


--------------------------------------------------------------------------------
/tests/datasets/generators/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duketemon/pyuplift/33daa0768ff333387cb8223ebfaedaffa57de335/tests/datasets/generators/__init__.py


--------------------------------------------------------------------------------
/tests/datasets/generators/test_linear.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import numpy as np
 3 | from pyuplift.datasets import make_linear_regression
 4 | 
 5 | 
 6 | def test_make_linear_regression__repeated_random_state():
 7 |     random_state, size = 101, 1000
 8 |     df1 = make_linear_regression(size, random_state=random_state)
 9 |     df2 = make_linear_regression(size, random_state=random_state)
10 | 
11 |     assert np.array_equal(df1['x1'].values, df2['x1'].values)
12 |     assert np.array_equal(df1['x2'].values, df2['x2'].values)
13 |     assert np.array_equal(df1['x3'].values, df2['x3'].values)
14 |     assert np.array_equal(df1['t'].values, df2['t'].values)
15 |     assert np.array_equal(df1['y'].values, df2['y'].values)
16 | 
17 | 
18 | def test_make_linear_regression__none_random_state():
19 |     size = 1000
20 |     df1 = make_linear_regression(size, random_state=None)
21 |     df2 = make_linear_regression(size, random_state=None)
22 | 
23 |     assert not np.array_equal(df1['x1'].values, df2['x1'].values)
24 | 
25 | 
26 | def test_make_linear_regression__zero_size():
27 |     with pytest.raises(ValueError):
28 |         make_linear_regression(0)
29 | 
30 | 
31 | def test_make_linear_regression__negative_size():
32 |     with pytest.raises(ValueError):
33 |         make_linear_regression(-10)
34 | 


--------------------------------------------------------------------------------
/tests/datasets/loaders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duketemon/pyuplift/33daa0768ff333387cb8223ebfaedaffa57de335/tests/datasets/loaders/__init__.py


--------------------------------------------------------------------------------
/tests/datasets/loaders/test_criteo_uplift_prediction.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import pytest
 4 | from pyuplift.datasets import load_criteo_uplift_prediction
 5 | from pyuplift.datasets import download_criteo_uplift_prediction
 6 | 
 7 | 
 8 | data_home = os.path.join(os.sep.join(__file__.split(os.sep)[:-1]), 'data')
 9 | 
10 | 
11 | def test_load_criteo_uplift_prediction__do_not_download_if_missing():
12 |     with pytest.raises(FileNotFoundError):
13 |         load_criteo_uplift_prediction(data_home=data_home, download_if_missing=False)
14 | 
15 | 
16 | def test_download_criteo_uplift_prediction__wrong_url():
17 |     with pytest.raises(Exception):
18 |         download_criteo_uplift_prediction(url='https://s3.us-east-2.amazonaws.com/criteo-uplift/criteo-uplift.csv.gz')
19 | 
20 | 
21 | def test_download_criteo_uplift_prediction():
22 |     download_criteo_uplift_prediction(data_home=data_home)
23 |     # shutil.rmtree(data_home)
24 | 
25 | 
26 | def test_load_criteo_uplift_prediction():
27 |     df = load_criteo_uplift_prediction(data_home=data_home)
28 |     assert len(df['feature_names']) != 11
29 |     shutil.rmtree(data_home)
30 | 


--------------------------------------------------------------------------------
/tests/datasets/loaders/test_hillstrom_email_marketing.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import pytest
 4 | from pyuplift.datasets import download_hillstrom_email_marketing
 5 | from pyuplift.datasets import load_hillstrom_email_marketing
 6 | 
 7 | 
 8 | data_home = os.path.join(os.sep.join(__file__.split(os.sep)[:-1]), 'data')
 9 | 
10 | 
11 | def test_download_hillstrom_email_marketing():
12 |     download_hillstrom_email_marketing(data_home=data_home)
13 |     shutil.rmtree(data_home)
14 | 
15 | 
16 | def test_download_hillstrom_email_marketing__twice():
17 |     download_hillstrom_email_marketing(data_home=data_home)
18 |     download_hillstrom_email_marketing(data_home=data_home)
19 |     shutil.rmtree(data_home)
20 | 
21 | 
22 | def test_download_hillstrom_email_marketing__wrong_url():
23 |     with pytest.raises(Exception):
24 |         download_hillstrom_email_marketing(url='http://www.minethatdata.com/Kevin_Hillstrom_Min')
25 | 
26 | 
27 | def test_load_hillstrom_email_marketing__using_encoded_data():
28 |     df = load_hillstrom_email_marketing(data_home=data_home, load_raw_data=False)
29 |     assert len(df['feature_names']) == 18
30 |     shutil.rmtree(data_home)
31 | 
32 | 
33 | def test_load_hillstrom_email_marketing__using_raw_data():
34 |     df = load_hillstrom_email_marketing(data_home=data_home, load_raw_data=True)
35 |     assert len(df['feature_names']) != 18
36 |     shutil.rmtree(data_home)
37 | 
38 | 
39 | def test_load_hillstrom_email_marketing__do_not_download_if_missing():
40 |     with pytest.raises(FileNotFoundError):
41 |         load_hillstrom_email_marketing(data_home=data_home, download_if_missing=False)
42 | 


--------------------------------------------------------------------------------
/tests/datasets/loaders/test_lalonde_nsw.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import pytest
 4 | from pyuplift.datasets import download_lalonde_nsw, load_lalonde_nsw
 5 | 
 6 | 
 7 | data_home = os.path.join(os.sep.join(__file__.split(os.sep)[:-1]), 'data')
 8 | 
 9 | 
10 | def test_download_lalonde_nsw():
11 |     download_lalonde_nsw(data_home=data_home)
12 |     shutil.rmtree(data_home)
13 | 
14 | 
15 | def test_download_lalonde_nsw__twice():
16 |     download_lalonde_nsw(data_home=data_home)
17 |     download_lalonde_nsw(data_home=data_home)
18 |     shutil.rmtree(data_home)
19 | 
20 | 
21 | def test_download_lalonde_nsw__wrong_control_data_url():
22 |     with pytest.raises(Exception, match=r'.*control_data_url.*'):
23 |         download_lalonde_nsw(control_data_url='https://users.nber.org/~rdehejia/data/nsw_control_fake.txt')
24 | 
25 | 
26 | def test_download_lalonde_nsw__wrong_treated_data_url():
27 |     with pytest.raises(Exception, match=r'.*treated_data_url.*'):
28 |         download_lalonde_nsw(treated_data_url='https://users.nber.org/~rdehejia/data/nsw_control_fake.txt')
29 | 
30 | 
31 | def test_load_lalonde_nsw__do_not_download_if_missing():
32 |     with pytest.raises(FileNotFoundError):
33 |         load_lalonde_nsw(data_home=data_home, download_if_missing=False)
34 | 
35 | 
36 | def test_load_lalonde_nsw():
37 |     df = load_lalonde_nsw(data_home=data_home)
38 |     assert len(df['feature_names']) == 7
39 |     shutil.rmtree(data_home)
40 | 


--------------------------------------------------------------------------------
/tests/metrics/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duketemon/pyuplift/33daa0768ff333387cb8223ebfaedaffa57de335/tests/metrics/__init__.py


--------------------------------------------------------------------------------
/tests/metrics/test_average_effect.py:
--------------------------------------------------------------------------------
 1 | from pyuplift.variable_selection import Dummy
 2 | from pyuplift.model_selection import train_test_split
 3 | from pyuplift.datasets import make_linear_regression
 4 | from pyuplift.metrics import get_average_effect
 5 | 
 6 | 
 7 | random_state = 123
 8 | df = make_linear_regression(10000)
 9 | X, y, t = df.drop(['y', 't'], axis=1).values, df['y'].values, df['t'].values
10 | X_train, X_test, y_train, y_test, t_train, t_test = train_test_split(X, y, t, random_state=random_state)
11 | 
12 | model = Dummy()
13 | model.fit(X_train, y_train, t_train)
14 | 
15 | 
16 | def test_get_average_effect__zero_test_share():
17 |     y_pred = model.predict(X_test)
18 |     test_share = 0
19 |     effect = get_average_effect(y_test, t_test, y_pred, test_share)
20 |     assert effect == 0
21 | 


--------------------------------------------------------------------------------
/tests/model_selection/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duketemon/pyuplift/33daa0768ff333387cb8223ebfaedaffa57de335/tests/model_selection/__init__.py


--------------------------------------------------------------------------------
/tests/model_selection/model_validation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duketemon/pyuplift/33daa0768ff333387cb8223ebfaedaffa57de335/tests/model_selection/model_validation/__init__.py


--------------------------------------------------------------------------------
/tests/model_selection/model_validation/test_treatment_cross_validation.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pyuplift.variable_selection import Dummy
 3 | from pyuplift.datasets import make_linear_regression
 4 | from pyuplift.model_selection import treatment_cross_val_score
 5 | 
 6 | 
 7 | model = Dummy()
 8 | random_state = 101
 9 | size = 1000
10 | train_share = 0.7
11 | df = make_linear_regression(size, random_state=random_state)
12 | X, y, t = df.drop(['y', 't'], axis=1).values, df['y'].values, df['t'].values
13 | 
14 | 
15 | def test_treatment_cross_val_score__seeds_are_none():
16 |     cv, seeds = 5, None
17 |     scores = treatment_cross_val_score(X, y, t, model, cv, train_share, seeds)
18 |     assert len(scores) == cv
19 | 
20 | 
21 | def test_treatment_cross_val_score__cv_not_equals_len_of_seeds():
22 |     cv, seeds = 5, list(range(3))
23 |     with pytest.raises(ValueError):
24 |         treatment_cross_val_score(X, y, t, model, cv, train_share, seeds)
25 | 
26 | 
27 | def test_treatment_cross_val_score__negative_cv():
28 |     cv, seeds = -5, list(range(3))
29 |     with pytest.raises(ValueError):
30 |         treatment_cross_val_score(X, y, t, model, cv, train_share, seeds)
31 | 
32 | 
33 | def test_treatment_cross_val_score__zero_cv():
34 |     cv, seeds = 0, list(range(3))
35 |     with pytest.raises(ValueError):
36 |         treatment_cross_val_score(X, y, t, model, cv, train_share, seeds)
37 | 
38 | 
39 | def test_treatment_cross_val_score__negative_train_share():
40 |     train_share = -0.7
41 |     cv, seeds = 3, list(range(3))
42 |     with pytest.raises(ValueError):
43 |         treatment_cross_val_score(X, y, t, model, cv, train_share, seeds)
44 | 
45 | 
46 | def test_treatment_cross_val_score__zero_train_share():
47 |     train_share = 0
48 |     cv, seeds = 3, list(range(3))
49 |     with pytest.raises(ValueError):
50 |         treatment_cross_val_score(X, y, t, model, cv, train_share, seeds)
51 | 


--------------------------------------------------------------------------------
/tests/model_selection/splitters/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duketemon/pyuplift/33daa0768ff333387cb8223ebfaedaffa57de335/tests/model_selection/splitters/__init__.py


--------------------------------------------------------------------------------
/tests/model_selection/splitters/test_train_test_split.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import numpy as np
 3 | from pyuplift.datasets import make_linear_regression
 4 | from pyuplift.model_selection import train_test_split
 5 | 
 6 | 
 7 | def test_train_test_split__default():
 8 |     size = 1000
 9 |     df = make_linear_regression(size)
10 |     X, y, t = df.drop(['y', 't'], axis=1).values, df['y'].values, df['t'].values
11 |     X_train, X_test, y_train, y_test, t_train, t_test = train_test_split(X, y, t, random_state=10)
12 | 
13 |     assert X_train.shape[0] == 700
14 |     assert X_test.shape[0] == 300
15 |     assert y_train.shape[0] == 700
16 |     assert y_test.shape[0] == 300
17 |     assert t_train.shape[0] == 700
18 |     assert t_test.shape[0] == 300
19 | 
20 | 
21 | def test_train_test_split__repeated_random_state():
22 |     random_state, size = 101, 1000
23 | 
24 |     df = make_linear_regression(size, random_state=random_state)
25 |     X, y, t = df.drop(['y', 't'], axis=1).values, df['y'].values, df['t'].values
26 |     X_train1, X_test1, y_train1, y_test1, t_train1, t_test1 = train_test_split(X, y, t, random_state=random_state)
27 | 
28 |     df = make_linear_regression(size, random_state=random_state)
29 |     X, y, t = df.drop(['y', 't'], axis=1).values, df['y'].values, df['t'].values
30 |     X_train2, X_test2, y_train2, y_test2, t_train2, t_test2 = train_test_split(X, y, t, random_state=random_state)
31 | 
32 |     assert np.array_equal(X_train1, X_train2)
33 |     assert np.array_equal(X_test1, X_test2)
34 |     assert np.array_equal(y_train1, y_train2)
35 |     assert np.array_equal(y_test1, y_test2)
36 |     assert np.array_equal(t_train1, t_train2)
37 |     assert np.array_equal(t_test1, t_test2)
38 | 
39 | 
40 | def test_train_test_split__none_random_state():
41 |     random_state, size = 101, 1000
42 | 
43 |     df = make_linear_regression(size, random_state=random_state)
44 |     X, y, t = df.drop(['y', 't'], axis=1).values, df['y'].values, df['t'].values
45 |     X_train1, X_test1, y_train1, y_test1, t_train1, t_test1 = train_test_split(X, y, t, random_state=None)
46 | 
47 |     df = make_linear_regression(size, random_state=random_state)
48 |     X, y, t = df.drop(['y', 't'], axis=1).values, df['y'].values, df['t'].values
49 |     X_train2, X_test2, y_train2, y_test2, t_train2, t_test2 = train_test_split(X, y, t, random_state=None)
50 | 
51 |     assert not np.array_equal(X_train1, X_train2)
52 | 
53 | 
54 | def test_train_test_split__negative_train_share():
55 |     random_state, size = 101, 1000
56 |     df = make_linear_regression(size, random_state=random_state)
57 |     X, y, t = df.drop(['y', 't'], axis=1).values, df['y'].values, df['t'].values
58 | 
59 |     with pytest.raises(ValueError):
60 |         train_test_split(X, y, t, train_share=-0.5)
61 | 
62 | 
63 | def test_train_test_split__zero_train_share():
64 |     random_state, size = 101, 1000
65 |     df = make_linear_regression(size, random_state=random_state)
66 |     X, y, t = df.drop(['y', 't'], axis=1).values, df['y'].values, df['t'].values
67 | 
68 |     with pytest.raises(ValueError):
69 |         train_test_split(X, y, t, train_share=0)
70 | 


--------------------------------------------------------------------------------
/tests/transformation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duketemon/pyuplift/33daa0768ff333387cb8223ebfaedaffa57de335/tests/transformation/__init__.py


--------------------------------------------------------------------------------
/tests/transformation/base.py:
--------------------------------------------------------------------------------
 1 | class EmptyClass:
 2 |     pass
 3 | 
 4 | 
 5 | class NoFitClass:
 6 |     def predict(self):
 7 |         pass
 8 | 
 9 | 
10 | class NoPredictClass:
11 |     def fit(self):
12 |         pass
13 | 


--------------------------------------------------------------------------------
/tests/transformation/test_jaskowski.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from sklearn.ensemble import RandomForestClassifier
 3 | from pyuplift.transformation import Jaskowski
 4 | from .base import *
 5 | 
 6 | 
 7 | def test_jaskowski__right_class():
 8 |     model = RandomForestClassifier()
 9 |     Jaskowski(model)
10 | 
11 | 
12 | def test_jaskowski__empty_class():
13 |     model = EmptyClass()
14 |     with pytest.raises(ValueError):
15 |         Jaskowski(model)
16 | 
17 | 
18 | def test_jaskowski__non_fit_class():
19 |     model = NoFitClass()
20 |     with pytest.raises(ValueError):
21 |         Jaskowski(model)
22 | 
23 | 
24 | def test_jaskowski__non_predict_class():
25 |     model = NoPredictClass()
26 |     with pytest.raises(ValueError):
27 |         Jaskowski(model)
28 | 


--------------------------------------------------------------------------------
/tests/transformation/test_kane.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from sklearn.ensemble import RandomForestClassifier
 3 | from pyuplift.transformation import Kane
 4 | from .base import *
 5 | 
 6 | 
 7 | def test_kane__right_class():
 8 |     model = RandomForestClassifier()
 9 |     Kane(model)
10 | 
11 | 
12 | def test_kane__empty_class():
13 |     model = EmptyClass()
14 |     with pytest.raises(ValueError):
15 |         Kane(model)
16 | 
17 | 
18 | def test_kane__non_fit_class():
19 |     model = NoFitClass()
20 |     with pytest.raises(ValueError):
21 |         Kane(model)
22 | 
23 | 
24 | def test_kane__non_predict_class():
25 |     model = NoPredictClass()
26 |     with pytest.raises(ValueError):
27 |         Kane(model)
28 | 


--------------------------------------------------------------------------------
/tests/transformation/test_lai.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from sklearn.ensemble import RandomForestClassifier
 3 | from pyuplift.transformation import Lai
 4 | from .base import *
 5 | 
 6 | 
 7 | def test_lai__right_class():
 8 |     model = RandomForestClassifier()
 9 |     Lai(model)
10 | 
11 | 
12 | def test_lai__empty_class():
13 |     model = EmptyClass()
14 |     with pytest.raises(ValueError):
15 |         Lai(model)
16 | 
17 | 
18 | def test_lai__non_fit_class():
19 |     model = NoFitClass()
20 |     with pytest.raises(ValueError):
21 |         Lai(model)
22 | 
23 | 
24 | def test_lai__non_predict_class():
25 |     model = NoPredictClass()
26 |     with pytest.raises(ValueError):
27 |         Lai(model)
28 | 


--------------------------------------------------------------------------------
/tests/transformation/test_pessimistic.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from sklearn.ensemble import RandomForestClassifier
 3 | from pyuplift.transformation import Pessimistic
 4 | from .base import *
 5 | 
 6 | 
 7 | def test_pessimistic__right_class():
 8 |     model = RandomForestClassifier()
 9 |     Pessimistic(model)
10 | 
11 | 
12 | def test_pessimistic__empty_class():
13 |     model = EmptyClass()
14 |     with pytest.raises(ValueError):
15 |         Pessimistic(model)
16 | 
17 | 
18 | def test_pessimistic__non_fit_class():
19 |     model = NoFitClass()
20 |     with pytest.raises(ValueError):
21 |         Pessimistic(model)
22 | 
23 | 
24 | def test_pessimistic__non_predict_class():
25 |     model = NoPredictClass()
26 |     with pytest.raises(ValueError):
27 |         Pessimistic(model)
28 | 


--------------------------------------------------------------------------------
/tests/transformation/test_reflective.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from sklearn.ensemble import RandomForestClassifier
 3 | from pyuplift.transformation import Reflective
 4 | from .base import *
 5 | 
 6 | 
 7 | def test_reflective__right_class():
 8 |     model = RandomForestClassifier()
 9 |     Reflective(model)
10 | 
11 | 
12 | def test_reflective__empty_class():
13 |     model = EmptyClass()
14 |     with pytest.raises(ValueError):
15 |         Reflective(model)
16 | 
17 | 
18 | def test_reflective__non_fit_class():
19 |     model = NoFitClass()
20 |     with pytest.raises(ValueError):
21 |         Reflective(model)
22 | 
23 | 
24 | def test_reflective__non_predict_class():
25 |     model = NoPredictClass()
26 |     with pytest.raises(ValueError):
27 |         Reflective(model)
28 | 


--------------------------------------------------------------------------------
/tests/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duketemon/pyuplift/33daa0768ff333387cb8223ebfaedaffa57de335/tests/utils/__init__.py


--------------------------------------------------------------------------------
/tests/utils/data/test.test.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duketemon/pyuplift/33daa0768ff333387cb8223ebfaedaffa57de335/tests/utils/data/test.test.gz


--------------------------------------------------------------------------------
/tests/utils/test_downloader.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | from pyuplift.utils import download_file
 4 | 
 5 | 
 6 | def test_download_file__success():
 7 |     url = 'https://github.com/duketemon/pyuplift/blob/master/LICENSE'
 8 |     output = 'LICENSE'
 9 |     download_file(url, output)
10 |     os.remove(output)
11 | 
12 | 
13 | def test_download_file__exist_file():
14 |     output = 'exist_file_test.test'
15 |     with open(output, 'w') as f:
16 |         f.write('test')
17 |     url = 'https://github.com/duketemon/pyuplift/blob/master/LICENSE'
18 |     download_file(url, output)
19 |     os.remove(output)
20 | 
21 | 
22 | def test_download_file__wrong_url():
23 |     output = 'LICENSE12'
24 |     url = 'https://githu404b.com/duketemon/pyuplift/blob/master/LICENSE'
25 |     with pytest.raises(Exception):
26 |         download_file(url, output)
27 | 
28 | 
29 | def test_download_file__wrong_output_path():
30 |     output = '/data23/LICENSE'
31 |     url = 'https://github.com/duketemon/pyuplift/blob/master/LICENSE'
32 |     with pytest.raises(FileNotFoundError):
33 |         download_file(url, output)
34 | 


--------------------------------------------------------------------------------
/tests/utils/test_retriever.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import pytest
 4 | from pyuplift.utils import retrieve_from_gz
 5 | 
 6 | 
 7 | data_home = os.path.join(os.sep.join(__file__.split(os.sep)[:-1]), 'data')
 8 | 
 9 | 
10 | def test_retrieve_from_gz():
11 |     output_path = os.path.join(data_home, 'test.test')
12 |     archive_path = output_path + '.gz'
13 |     retrieve_from_gz(archive_path, output_path)
14 |     with open(output_path, 'r') as f:
15 |         text = f.read()
16 |     os.remove(output_path)
17 |     assert text == 'good'
18 | 


--------------------------------------------------------------------------------
/tests/variable_selection/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duketemon/pyuplift/33daa0768ff333387cb8223ebfaedaffa57de335/tests/variable_selection/__init__.py


--------------------------------------------------------------------------------
/tests/variable_selection/base.py:
--------------------------------------------------------------------------------
 1 | class EmptyClass:
 2 |     pass
 3 | 
 4 | 
 5 | class NoFitClass:
 6 |     def predict(self):
 7 |         pass
 8 | 
 9 | 
10 | class NoPredictClass:
11 |     def fit(self):
12 |         pass
13 | 


--------------------------------------------------------------------------------
/tests/variable_selection/test_cadit.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from sklearn.ensemble import RandomForestRegressor
 3 | from pyuplift.variable_selection import Cadit
 4 | from .base import *
 5 | 
 6 | 
 7 | def test_dummy__right_class():
 8 |     reg_model = RandomForestRegressor()
 9 |     Cadit(reg_model)
10 | 
11 | 
12 | def test_dummy__empty_class():
13 |     reg_model = EmptyClass()
14 |     with pytest.raises(ValueError):
15 |         Cadit(reg_model)
16 | 
17 | 
18 | def test_dummy__non_fit_class():
19 |     reg_model = NoFitClass()
20 |     with pytest.raises(ValueError):
21 |         Cadit(reg_model)
22 | 
23 | 
24 | def test_dummy__non_predict_class():
25 |     reg_model = NoPredictClass()
26 |     with pytest.raises(ValueError):
27 |         Cadit(reg_model)
28 | 


--------------------------------------------------------------------------------
/tests/variable_selection/test_dummy.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from sklearn.ensemble import RandomForestRegressor
 3 | from pyuplift.variable_selection import Dummy
 4 | from .base import *
 5 | 
 6 | 
 7 | def test_dummy__right_class():
 8 |     reg_model = RandomForestRegressor()
 9 |     Dummy(reg_model)
10 | 
11 | 
12 | def test_dummy__empty_class():
13 |     reg_model = EmptyClass()
14 |     with pytest.raises(ValueError):
15 |         Dummy(reg_model)
16 | 
17 | 
18 | def test_dummy__non_fit_class():
19 |     reg_model = NoFitClass()
20 |     with pytest.raises(ValueError):
21 |         Dummy(reg_model)
22 | 
23 | 
24 | def test_dummy__non_predict_class():
25 |     reg_model = NoPredictClass()
26 |     with pytest.raises(ValueError):
27 |         Dummy(reg_model)
28 | 


--------------------------------------------------------------------------------
/tests/variable_selection/test_econometric.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from sklearn.ensemble import RandomForestRegressor
 3 | from pyuplift.variable_selection import Econometric
 4 | from .base import *
 5 | 
 6 | 
 7 | def test_econometric__right_class():
 8 |     reg_model = RandomForestRegressor()
 9 |     Econometric(reg_model)
10 | 
11 | 
12 | def test_econometric__empty_class():
13 |     reg_model = EmptyClass()
14 |     with pytest.raises(ValueError):
15 |         Econometric(reg_model)
16 | 
17 | 
18 | def test_econometric__non_fit_class():
19 |     reg_model = NoFitClass()
20 |     with pytest.raises(ValueError):
21 |         Econometric(reg_model)
22 | 
23 | 
24 | def test_econometric__non_predict_class():
25 |     reg_model = NoPredictClass()
26 |     with pytest.raises(ValueError):
27 |         Econometric(reg_model)
28 | 


--------------------------------------------------------------------------------
/tests/variable_selection/test_two_model.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from sklearn.ensemble import RandomForestRegressor
 3 | from pyuplift.variable_selection import TwoModel
 4 | from .base import *
 5 | 
 6 | 
 7 | def test_two_model__right_class():
 8 |     reg_model = RandomForestRegressor()
 9 |     TwoModel(reg_model, reg_model)
10 | 
11 | 
12 | def test_two_model__empty_class():
13 |     reg_model = EmptyClass()
14 |     with pytest.raises(ValueError):
15 |         TwoModel(reg_model, reg_model)
16 | 
17 | 
18 | def test_two_model__non_fit_class():
19 |     reg_model = NoFitClass()
20 |     with pytest.raises(ValueError):
21 |         TwoModel(reg_model, reg_model)
22 | 
23 | 
24 | def test_two_model__non_predict_class():
25 |     reg_model = NoPredictClass()
26 |     with pytest.raises(ValueError):
27 |         TwoModel(reg_model, reg_model)
28 | 


--------------------------------------------------------------------------------
/tutorials/Getting started.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Getting started tutorial"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### ! This tutorial uses the Hillstrom Email Marketing dataset. More information about the dataset you can find on the [official site](http://minethatdata.com/Stochastic_Solutions_E-Mail_Challenge_2008.04.30.pdf)"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": []
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 1,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "from pyuplift.variable_selection import Econometric\n",
 31 |     "from pyuplift.datasets import load_hillstrom_email_marketing\n",
 32 |     "from pyuplift.model_selection import train_test_split"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": null,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": []
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "### Load data from the Hillstrom Email Marketing dataset\n",
 47 |     "Parameter `load_raw_data` allowed you to load raw data (original dataset) or preprocessed data (ready to go)."
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 2,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "data = load_hillstrom_email_marketing(load_raw_data=False)"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 3,
 62 |    "metadata": {},
 63 |    "outputs": [
 64 |     {
 65 |      "data": {
 66 |       "text/plain": [
 67 |        "{'description': 'This dataset contains 64,000 customers who last purchased within twelve months. The customers were involved in an e-mail test. 1/3 were randomly chosen to receive an e-mail campaign featuring Mens merchandise. 1/3 were randomly chosen to receive an e-mail campaign featuring Womens merchandise. 1/3 were randomly chosen to not receive an e-mail campaign. During a period of two weeks following the e-mail campaign, results were tracked. Your job is to tell the world if the Mens or Womens e-mail campaign was successful.',\n",
 68 |        " 'data': array([[ 10.  , 142.44,   1.  , ...,   0.  ,   1.  ,   0.  ],\n",
 69 |        "        [  6.  , 329.08,   1.  , ...,   0.  ,   0.  ,   1.  ],\n",
 70 |        "        [  7.  , 180.65,   0.  , ...,   0.  ,   0.  ,   1.  ],\n",
 71 |        "        ...,\n",
 72 |        "        [  6.  ,  29.99,   1.  , ...,   0.  ,   1.  ,   0.  ],\n",
 73 |        "        [  1.  , 552.94,   1.  , ...,   1.  ,   0.  ,   0.  ],\n",
 74 |        "        [  1.  , 472.82,   0.  , ...,   0.  ,   0.  ,   1.  ]]),\n",
 75 |        " 'feature_names': array(['recency', 'history', 'mens', 'womens', 'newbie', 'zip_code_Rural',\n",
 76 |        "        'zip_code_Surburban', 'zip_code_Urban',\n",
 77 |        "        'history_segment_$0 - $100', 'history_segment_$1,000 +',\n",
 78 |        "        'history_segment_$100 - $200', 'history_segment_$200 - $350',\n",
 79 |        "        'history_segment_$350 - $500', 'history_segment_$500 - $750',\n",
 80 |        "        'history_segment_$750 - $1,000', 'channel_Multichannel',\n",
 81 |        "        'channel_Phone', 'channel_Web'], dtype='<U29'),\n",
 82 |        " 'treatment': array([2, 0, 2, ..., 1, 2, 1], dtype=int64),\n",
 83 |        " 'target': array([0., 0., 0., ..., 0., 0., 0.]),\n",
 84 |        " 'target_spend': array([0., 0., 0., ..., 0., 0., 0.]),\n",
 85 |        " 'target_visit': array([0, 0, 0, ..., 0, 0, 0], dtype=int64),\n",
 86 |        " 'target_conversion': array([0, 0, 0, ..., 0, 0, 0], dtype=int64)}"
 87 |       ]
 88 |      },
 89 |      "execution_count": 3,
 90 |      "metadata": {},
 91 |      "output_type": "execute_result"
 92 |     }
 93 |    ],
 94 |    "source": [
 95 |     "data"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": []
104 |   },
105 |   {
106 |    "cell_type": "markdown",
107 |    "metadata": {},
108 |    "source": [
109 |     "### Hillstrom dataset has 3 type of the treatment:\n",
110 |     "* 0, No E-Mail\n",
111 |     "* 1, Mens E-Mail\n",
112 |     "* 2, Womens E-Mail"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "metadata": {},
118 |    "source": [
119 |     "### Let's take two of them: \n",
120 |     "* No E-mail (no treatment) \n",
121 |     "* Mens E-mail (treatment)"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 4,
127 |    "metadata": {},
128 |    "outputs": [],
129 |    "source": [
130 |     "ex_womens_indexes = data['treatment'] != 2"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": 5,
136 |    "metadata": {},
137 |    "outputs": [],
138 |    "source": [
139 |     "X = data['data'][ex_womens_indexes]\n",
140 |     "y = data['target'][ex_womens_indexes]\n",
141 |     "t = data['treatment'][ex_womens_indexes]"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": null,
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": []
150 |   },
151 |   {
152 |    "cell_type": "markdown",
153 |    "metadata": {},
154 |    "source": [
155 |     "### Random split data on train (70%) and test (30%)"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 6,
161 |    "metadata": {},
162 |    "outputs": [],
163 |    "source": [
164 |     "X_train, X_test, y_train, y_test, t_train, t_test = train_test_split(X, y, t, train_share=0.7, random_state=123)"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "markdown",
169 |    "metadata": {},
170 |    "source": [
171 |     "### Create the uplift model with default parameters"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": 7,
177 |    "metadata": {},
178 |    "outputs": [],
179 |    "source": [
180 |     "model = Econometric()"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "markdown",
185 |    "metadata": {},
186 |    "source": [
187 |     "### Fit the model"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": 8,
193 |    "metadata": {},
194 |    "outputs": [
195 |     {
196 |      "data": {
197 |       "text/plain": [
198 |        "<pyuplift.variable_selection.econometric.Econometric at 0x214f0be0>"
199 |       ]
200 |      },
201 |      "execution_count": 8,
202 |      "metadata": {},
203 |      "output_type": "execute_result"
204 |     }
205 |    ],
206 |    "source": [
207 |     "model.fit(X_train, y_train, t_train)"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "markdown",
212 |    "metadata": {},
213 |    "source": [
214 |     "### Predict uplift for the test dataset"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": 9,
220 |    "metadata": {},
221 |    "outputs": [],
222 |    "source": [
223 |     "uplift = model.predict(X_test)"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": 10,
229 |    "metadata": {},
230 |    "outputs": [
231 |     {
232 |      "data": {
233 |       "text/plain": [
234 |        "array([1.0615553 , 0.41391224, 0.26028002, 2.09681851, 0.42625385,\n",
235 |        "       1.94064929, 2.50369232, 0.52225684, 0.17712341, 0.91999936,\n",
236 |        "       0.54780214, 0.27353447, 0.74778451, 0.77815588, 0.89413281,\n",
237 |        "       0.50344916, 0.5541491 , 1.19713328, 1.62508446, 2.72094539])"
238 |       ]
239 |      },
240 |      "execution_count": 10,
241 |      "metadata": {},
242 |      "output_type": "execute_result"
243 |     }
244 |    ],
245 |    "source": [
246 |     "uplift[:20]"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": null,
252 |    "metadata": {},
253 |    "outputs": [],
254 |    "source": []
255 |   }
256 |  ],
257 |  "metadata": {
258 |   "kernelspec": {
259 |    "display_name": "Python 3",
260 |    "language": "python",
261 |    "name": "python3"
262 |   },
263 |   "language_info": {
264 |    "codemirror_mode": {
265 |     "name": "ipython",
266 |     "version": 3
267 |    },
268 |    "file_extension": ".py",
269 |    "mimetype": "text/x-python",
270 |    "name": "python",
271 |    "nbconvert_exporter": "python",
272 |    "pygments_lexer": "ipython3",
273 |    "version": "3.6.4"
274 |   }
275 |  },
276 |  "nbformat": 4,
277 |  "nbformat_minor": 2
278 | }
279 | 


--------------------------------------------------------------------------------
/tutorials/README.MD:
--------------------------------------------------------------------------------
1 | ## Tutorials
2 | This directory contains tutorials which related to the pyuplift library.
3 | 
4 | * [Getting started](https://github.com/duketemon/pyuplift/blob/master/tutorials/Getting%20started.ipynb)
5 | * [EDA of the Lalonde NSW dataset](https://github.com/duketemon/pyuplift/blob/master/tutorials/EDA%20Lalonde%20NSW.ipynb)
6 | * [EDA of the Hillstrom Email Marketing dataset](https://github.com/duketemon/pyuplift/blob/master/tutorials/EDA%20Hillstrom%20Email%20Marketing.ipynb)
7 | 


--------------------------------------------------------------------------------