├── .python-version
├── docs
    ├── style.css
    ├── methods
    │   ├── ratio.md
    │   ├── funnel.md
    │   ├── total.md
    │   ├── ratio.ipynb
    │   ├── total.ipynb
    │   └── funnel.ipynb
    ├── installation.md
    ├── index.md
    ├── index.ipynb
    ├── theme
    │   ├── README.md
    │   ├── LICENSE
    │   └── main.html
    └── examples
    │   ├── ibis.md
    │   ├── simple-revenue-funnel.md
    │   ├── fashion-brand-co2e.md
    │   ├── simple-revenue-funnel.ipynb
    │   └── iowa-whiskey-sales.md
├── .gitattributes
├── icanexplain
    ├── datasets
    │   ├── iowa_whiskey_sales.csv.zip
    │   ├── product_footprints.csv.gz
    │   └── us_general_election_popular_vote.csv.zip
    ├── datasets.py
    ├── test_sum.py
    ├── test_mean.py
    └── __init__.py
├── .gitignore
├── CONTRIBUTING.md
├── .github
    ├── workflows
    │   ├── code-quality.yml
    │   └── unit-tests.yml
    └── actions
    │   └── install-env
    │       └── action.yml
├── Makefile
├── pre-commit-hooks
    └── check_pinned_actions.sh
├── mkdocs.yml
├── pyproject.toml
├── .pre-commit-config.yaml
├── README.md
└── LICENSE


/.python-version:
--------------------------------------------------------------------------------
1 | 3.11.8
2 | 


--------------------------------------------------------------------------------
/docs/style.css:
--------------------------------------------------------------------------------
1 | h1 {
2 |   color: red;
3 | }
4 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/docs/methods/ratio.md:
--------------------------------------------------------------------------------
1 | # Ratio decomposition
2 | 
3 | 
4 | ```python
5 | 
6 | ```
7 | 


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
1 | # Installation
2 | 
3 | ```sh
4 | pip install icanexplain
5 | ```
6 | 


--------------------------------------------------------------------------------
/docs/methods/funnel.md:
--------------------------------------------------------------------------------
1 | # Funnel decomposition
2 | 
3 | 
4 | ```python
5 | # Funnel
6 | ```
7 | 


--------------------------------------------------------------------------------
/docs/methods/total.md:
--------------------------------------------------------------------------------
 1 | # Total decomposition
 2 | 
 3 | $$
 4 | \sum_{i=1}^n \frac{1}{2}
 5 | $$
 6 | 
 7 | 
 8 | ```python
 9 | 
10 | ```
11 | 


--------------------------------------------------------------------------------
/icanexplain/datasets/iowa_whiskey_sales.csv.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carbonfact/icanexplain/HEAD/icanexplain/datasets/iowa_whiskey_sales.csv.zip


--------------------------------------------------------------------------------
/icanexplain/datasets/product_footprints.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carbonfact/icanexplain/HEAD/icanexplain/datasets/product_footprints.csv.gz


--------------------------------------------------------------------------------
/icanexplain/datasets/us_general_election_popular_vote.csv.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carbonfact/icanexplain/HEAD/icanexplain/datasets/us_general_election_popular_vote.csv.zip


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | docs/public
 2 | docs/resources
 3 | docs/.hugo_build.lock
 4 | *.pyc
 5 | /*.ipynb
 6 | .ipynb_checkpoints
 7 | .DS_Store
 8 | .pytest_cache
 9 | *.ddb
10 | *.ddb.wal
11 | dist/
12 | site/
13 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | ```sh
 4 | # Prepare virtual environment
 5 | git clone https://github.com/carbonfact/icanexplain
 6 | cd icanexplain
 7 | poetry install
 8 | poetry shell
 9 | 
10 | # Install pre-commit hooks
11 | pre-commit install --hook-type pre-push
12 | pre-commit run --all-files
13 | 
14 | # Run tests
15 | pytest
16 | 
17 | # Serve docs locally
18 | make docs
19 | ```
20 | 


--------------------------------------------------------------------------------
/.github/workflows/code-quality.yml:
--------------------------------------------------------------------------------
 1 | name: Code quality
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches:
 6 |       - "*"
 7 |   push:
 8 |     branches:
 9 |       - main
10 | 
11 | jobs:
12 |   run:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744
16 |       - uses: ./.github/actions/install-env
17 |       - name: Run pre-commit on all files
18 |         run: poetry run pre-commit run --all-files
19 | 


--------------------------------------------------------------------------------
/.github/workflows/unit-tests.yml:
--------------------------------------------------------------------------------
 1 | name: Unit tests
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches:
 6 |       - "*"
 7 |   push:
 8 |     branches:
 9 |       - main
10 | 
11 | jobs:
12 |   run:
13 |     runs-on: ubuntu-latest
14 |     strategy:
15 |       matrix:
16 |         python-version: ["3.10", "3.11", "3.12"]
17 |     steps:
18 |       - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744
19 |       - uses: ./.github/actions/install-env
20 |         with:
21 |           python-version: ${{ matrix.python-version }}
22 |       - run: poetry run pytest
23 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | execute-notebooks:
 2 | 	poetry run jupyter nbconvert --execute --to notebook --inplace docs/*.ipynb
 3 | 	poetry run jupyter nbconvert --execute --to notebook --inplace docs/examples/*.ipynb
 4 | 	poetry run jupyter nbconvert --execute --to notebook --inplace docs/methods/*.ipynb
 5 | 
 6 | render-notebooks:
 7 | 	poetry run jupyter nbconvert --to markdown docs/*.ipynb
 8 | 	poetry run jupyter nbconvert --to markdown docs/examples/*.ipynb
 9 | 	poetry run jupyter nbconvert --to markdown docs/methods/*.ipynb
10 | 
11 | docs: execute-notebooks render-notebooks
12 | 	poetry run mkdocs serve
13 | 
14 | publish-docs: execute-notebooks render-notebooks
15 | 	poetry run mkdocs gh-deploy
16 | 


--------------------------------------------------------------------------------
/docs/methods/ratio.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Ratio decomposition"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": null,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": []
16 |   }
17 |  ],
18 |  "metadata": {
19 |   "language_info": {
20 |    "codemirror_mode": {
21 |     "name": "ipython",
22 |     "version": 3
23 |    },
24 |    "file_extension": ".py",
25 |    "mimetype": "text/x-python",
26 |    "name": "python",
27 |    "nbconvert_exporter": "python",
28 |    "pygments_lexer": "ipython3",
29 |    "version": "3.11.4"
30 |   }
31 |  },
32 |  "nbformat": 4,
33 |  "nbformat_minor": 2
34 | }
35 | 


--------------------------------------------------------------------------------
/pre-commit-hooks/check_pinned_actions.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Only run if .github/ files are staged
 4 | staged_github_files=$(git diff --cached --name-only --diff-filter=ACM | grep '^\.github/')
 5 | if [ -z "$staged_github_files" ]; then
 6 |   exit 0
 7 | fi
 8 | 
 9 | # Check for unpinned external GitHub Actions (not using commit SHA)
10 | offenders=$(echo "$staged_github_files" | grep -E '\.github/(workflows|actions)/' |
11 |   xargs grep -E "uses:[[:space:]]*[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+@" |
12 |   grep -v "\.github/actions" |
13 |   grep -v -E "@[0-9a-f]{40}($|[^0-9a-f])")
14 | 
15 | if [ -n "$offenders" ]; then
16 |   echo "❌ Error: Detected external GitHub Actions that are not pinned to a commit SHA." >&2
17 |   echo "Please update your workflows accordingly to prevent supply chain attacks!" >&2
18 |   echo "Offending lines:" >&2
19 |   echo "$offenders" >&2
20 |   exit 1
21 | fi


--------------------------------------------------------------------------------
/docs/methods/total.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Total decomposition"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "markdown",
12 |    "metadata": {},
13 |    "source": [
14 |     "$$\n",
15 |     "\\sum_{i=1}^n \\frac{1}{2}\n",
16 |     "$$"
17 |    ]
18 |   },
19 |   {
20 |    "cell_type": "code",
21 |    "execution_count": null,
22 |    "metadata": {},
23 |    "outputs": [],
24 |    "source": []
25 |   }
26 |  ],
27 |  "metadata": {
28 |   "language_info": {
29 |    "codemirror_mode": {
30 |     "name": "ipython",
31 |     "version": 3
32 |    },
33 |    "file_extension": ".py",
34 |    "mimetype": "text/x-python",
35 |    "name": "python",
36 |    "nbconvert_exporter": "python",
37 |    "pygments_lexer": "ipython3",
38 |    "version": "3.11.4"
39 |   }
40 |  },
41 |  "nbformat": 4,
42 |  "nbformat_minor": 2
43 | }
44 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: icanexplain
 2 | repo_name: carbonfact/icanexplain
 3 | repo_url: https://github.com/carbonfact/icanexplain
 4 | 
 5 | nav:
 6 |   - Introduction:
 7 |       - index.md
 8 |       - installation.md
 9 |   - Examples:
10 |       - examples/iowa-whiskey-sales.md # total
11 |       - examples/fashion-brand-co2e.md # rate
12 |       - examples/simple-revenue-funnel.md # funnel
13 |       - examples/ibis.md
14 | theme:
15 |   name: material
16 |   font:
17 |     text: Noto Sans Mono
18 |   features:
19 |     - navigation.tabs
20 |     - tables
21 |     - content.code.copy
22 |   palette:
23 |     primary: black
24 |   icon:
25 |     logo: material/chart-tree
26 | 
27 | markdown_extensions:
28 |   - pymdownx.highlight:
29 |       anchor_linenums: true
30 |       line_spans: __span
31 |       pygments_lang_class: true
32 |   - pymdownx.inlinehilite
33 |   - pymdownx.snippets
34 |   - pymdownx.superfences
35 |   - toc:
36 |       permalink: true
37 |       permalink_title: null
38 |   - pymdownx.arithmatex:
39 |       generic: true
40 | 


--------------------------------------------------------------------------------
/docs/methods/funnel.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Funnel decomposition"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": 1,
13 |    "metadata": {
14 |     "execution": {
15 |      "iopub.execute_input": "2024-09-25T08:40:25.341828Z",
16 |      "iopub.status.busy": "2024-09-25T08:40:25.341418Z",
17 |      "iopub.status.idle": "2024-09-25T08:40:25.360667Z",
18 |      "shell.execute_reply": "2024-09-25T08:40:25.360217Z"
19 |     }
20 |    },
21 |    "outputs": [],
22 |    "source": [
23 |     "# Funnel"
24 |    ]
25 |   }
26 |  ],
27 |  "metadata": {
28 |   "language_info": {
29 |    "codemirror_mode": {
30 |     "name": "ipython",
31 |     "version": 3
32 |    },
33 |    "file_extension": ".py",
34 |    "mimetype": "text/x-python",
35 |    "name": "python",
36 |    "nbconvert_exporter": "python",
37 |    "pygments_lexer": "ipython3",
38 |    "version": "3.11.4"
39 |   }
40 |  },
41 |  "nbformat": 4,
42 |  "nbformat_minor": 2
43 | }
44 | 


--------------------------------------------------------------------------------
/icanexplain/datasets.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import pathlib
 4 | 
 5 | import pandas as pd
 6 | 
 7 | DATASETS_DIR = pathlib.Path(__file__).parent / "datasets"
 8 | 
 9 | 
10 | def load_product_footprints():
11 |     return pd.read_csv(DATASETS_DIR / "product_footprints.csv.gz")
12 | 
13 | 
14 | def load_us_general_election_popular_vote():
15 |     return pd.read_csv(DATASETS_DIR / "us_general_election_popular_vote.csv.zip")
16 | 
17 | 
18 | def load_world_demography():
19 |     return pd.read_csv(DATASETS_DIR / "world_demography.csv")
20 | 
21 | 
22 | def load_iowa_whiskey_sales():
23 |     """Iowa whiskey sales.
24 | 
25 |     This dataset contains the sales of whiskey in the state of Iowa, USA. The data comes from
26 |     Iowa's Open Data Portal.
27 | 
28 |     For the sake of example, the data is limited to 2012, 2016, and 2020. The data is also limited
29 |     to a sample of 50,000 sales records.
30 | 
31 |     References
32 |     ----------
33 |     [1] https://data.iowa.gov/Sales-Distribution/Iowa-Liquor-Sales/m3tr-qhgy/about_data
34 | 
35 |     """
36 |     return pd.read_csv(DATASETS_DIR / "iowa_whiskey_sales.csv.zip")
37 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "icanexplain"
 3 | version = "0.3.0"
 4 | description = "Explain why metrics change by unpacking them"
 5 | authors = ["Max Halford <maxhalford25@gmail.com>"]
 6 | readme = "README.md"
 7 | 
 8 | [tool.poetry.dependencies]
 9 | python = "^3.10"
10 | ibis-framework = "^9.5.0"
11 | altair = "^5.3.0"
12 | 
13 | [tool.poetry.group.dev.dependencies]
14 | ruff = "^0.3.2"
15 | jupyter = "^1.0.0"
16 | pandas = "^2.2.1"
17 | pytest = "^8.2.1"
18 | names = "^0.3.0"
19 | ibis-framework = {extras = ["duckdb", "pandas"], version = "^9.5.0"}
20 | mkdocs = "^1.6.0"
21 | pygments = "^2.18.0"
22 | vega-datasets = "^0.9.0"
23 | mkdocs-material = "^9.5.26"
24 | polars = "^1.0.0"
25 | pre-commit = "^3.8.0"
26 | mypy = "^1.11.2"
27 | 
28 | [tool.pytest.ini_options]
29 | addopts = [
30 |     "--doctest-modules",
31 |     "--doctest-glob=README.md",
32 |     "--doctest-glob=docs/api/*.md",
33 |     "--verbose",
34 |     "--color=yes",
35 |     "--strict-markers",
36 | ]
37 | doctest_optionflags = "NORMALIZE_WHITESPACE NUMBER ELLIPSIS"
38 | 
39 | [build-system]
40 | requires = ["poetry-core"]
41 | build-backend = "poetry.core.masonry.api"
42 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | # Welcome
 2 | 
 3 | Well met, fellow data analyst!
 4 | 
 5 | If you're like me, then you're used to pesky stakeholders, who ask you why a metric changed. These kind of questions are tricky to answer confidently. It usually ends with you sharing a few other related metrics, giving some context, and providing a weak explanation. All the while hoping the stakeholder will be satisfied (or fed up) and go away 😮‍💨
 6 | 
 7 | This isn't a good situation to be in. But what if you could tell *exactly* why a metric changed? Wouldn't that be great? 🤩
 8 | 
 9 | `icanexplain` is a Python package. It provides a framework to break a metric down into drivers. It attributes the change in a metric to its drivers. Instead of just measuring the evolution of each driver, we can exactly quantify how much of the metric's evolution is due to each driver.
10 | 
11 | The best way to understand how `icanexplain` works is to see it in action, by checking out the [examples](examples/iowa-whiskey-sales/).
12 | 
13 | `icanexplain` works with [pandas](https://pandas.pydata.org/) and [Polars](https://pola.rs/) out of the box. Additionally, it can run against other backends (e.g. SQL) because it is implemented with [Ibis](https://ibis-project.org/). Check out [this example](examples/ibis/) for more information.
14 | 
15 | </br>
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | files: icanexplain
 2 | repos:
 3 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 4 |     rev: v4.4.0
 5 |     hooks:
 6 |       - id: check-json
 7 |       - id: check-yaml
 8 | 
 9 |   - repo: https://github.com/astral-sh/ruff-pre-commit
10 |     # Ruff version.
11 |     rev: v0.5.7
12 |     hooks:
13 |       # Run the linter.
14 |       - id: ruff
15 |         types_or: [python, pyi, jupyter]
16 |         args: [--fix]
17 |       # Run the formatter.
18 |       - id: ruff-format
19 |         types_or: [python, pyi, jupyter]
20 | 
21 |   - repo: https://github.com/pre-commit/mirrors-mypy
22 |     rev: "v1.1.1"
23 |     hooks:
24 |       - id: mypy
25 |         args:
26 |           - "--config-file=pyproject.toml"
27 |           - "--python-version=3.11"
28 |         additional_dependencies:
29 |           - pandera[mypy]
30 |           - types-python-slugify
31 |           - types-paramiko
32 |           - types-requests
33 | 
34 |   # strip output from jupyter notebooks
35 |   - repo: https://github.com/kynan/nbstripout
36 |     rev: 0.7.1
37 |     hooks:
38 |       - id: nbstripout
39 | 
40 |   - repo: local
41 |     hooks:
42 |       - id: check-external-actions-pinned
43 |         name: Check GitHub Actions are pinned
44 |         entry: pre-commit-hooks/check_pinned_actions.sh
45 |         language: script
46 |         pass_filenames: false
47 | 


--------------------------------------------------------------------------------
/.github/actions/install-env/action.yml:
--------------------------------------------------------------------------------
 1 | name: Install Python env
 2 | 
 3 | inputs:
 4 |   python-version:
 5 |     required: true
 6 |     description: "Python version"
 7 | 
 8 | runs:
 9 |   using: "composite"
10 |   steps:
11 |     - name: Check out repository
12 |       uses: actions/checkout@v3
13 | 
14 |     - name: Set up python
15 |       id: setup-python
16 |       uses: actions/setup-python@v5
17 |       with:
18 |         python-version: ${{ inputs.python-version }}
19 | 
20 |     - name: Load cached venv
21 |       id: cached-poetry-dependencies
22 |       uses: actions/cache@v4
23 |       with:
24 |         path: .venv
25 |         key: venv-${{ runner.os }}-${{ hashFiles('poetry.lock') }}-${{ hashFiles('.github/actions/install-env/action.yml') }}-${{ steps.setup-python.outputs.python-version }}
26 | 
27 |     - name: Load cached .local
28 |       id: cached-dotlocal
29 |       uses: actions/cache@v4
30 |       with:
31 |         path: ~/.local
32 |         key: dotlocal-${{ runner.os }}-${{ hashFiles('.github/actions/install-env/action.yml') }}-${{ steps.setup-python.outputs.python-version }}
33 | 
34 |     - name: Install Python poetry
35 |       uses: snok/install-poetry@v1
36 |       with:
37 |         virtualenvs-create: true
38 |         virtualenvs-in-project: true
39 |         installer-parallel: true
40 |         virtualenvs-path: .venv
41 |       if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
42 | 
43 |     - name: Install dependencies
44 |       shell: bash
45 |       if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
46 |       run: poetry install --no-interaction
47 | 
48 |     - name: Activate environment
49 |       shell: bash
50 |       run: source .venv/bin/activate
51 | 


--------------------------------------------------------------------------------
/docs/index.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Welcome"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "markdown",
12 |    "metadata": {},
13 |    "source": [
14 |     "Well met, fellow data analyst!\n",
15 |     "\n",
16 |     "If you're like me, then you're used to pesky stakeholders, who ask you why a metric changed. These kind of questions are tricky to answer confidently. It usually ends with you sharing a few other related metrics, giving some context, and providing a weak explanation. All the while hoping the stakeholder will be satisfied (or fed up) and go away 😮‍💨\n",
17 |     "\n",
18 |     "This isn't a good situation to be in. But what if you could tell *exactly* why a metric changed? Wouldn't that be great? 🤩\n",
19 |     "\n",
20 |     "`icanexplain` is a Python package. It provides a framework to break a metric down into drivers. It attributes the change in a metric to its drivers. Instead of just measuring the evolution of each driver, we can exactly quantify how much of the metric's evolution is due to each driver.\n",
21 |     "\n",
22 |     "The best way to understand how `icanexplain` works is to see it in action, by checking out the [examples](examples/iowa-whiskey-sales/).\n",
23 |     "\n",
24 |     "`icanexplain` works with [pandas](https://pandas.pydata.org/) and [Polars](https://pola.rs/) out of the box. Additionally, it can run against other backends (e.g. SQL) because it is implemented with [Ibis](https://ibis-project.org/). Check out [this example](examples/ibis/) for more information.\n",
25 |     "\n",
26 |     "</br>"
27 |    ]
28 |   },
29 |   {
30 |    "cell_type": "markdown",
31 |    "metadata": {},
32 |    "source": []
33 |   }
34 |  ],
35 |  "metadata": {
36 |   "language_info": {
37 |    "codemirror_mode": {
38 |     "name": "ipython",
39 |     "version": 3
40 |    },
41 |    "file_extension": ".py",
42 |    "mimetype": "text/x-python",
43 |    "name": "python",
44 |    "nbconvert_exporter": "python",
45 |    "pygments_lexer": "ipython3",
46 |    "version": "3.11.4"
47 |   }
48 |  },
49 |  "nbformat": 4,
50 |  "nbformat_minor": 2
51 | }
52 | 


--------------------------------------------------------------------------------
/docs/theme/README.md:
--------------------------------------------------------------------------------
 1 | # Kilsbergen
 2 | 
 3 | A clean [MkDocs][mkdocs] theme.
 4 | 
 5 | This theme is designed for [Tako][tako], [Pris][pris], and [Noblit][noblit].
 6 | It is not flexible on purpose: it supports everything I need, and nothing more.
 7 | 
 8 | ## Demos
 9 | 
10 |  * [Musium documentation][musium-docs]
11 |  * [Noblit documentation][noblit-docs]
12 |  * [Pris documentation][pris-docs]
13 |  * [RCL documentation][rcl-docs]
14 |  * [Squiller documentation][squiller-docs]
15 |  * [Tako documentation][tako-docs]
16 | 
17 | ## Features
18 | 
19 |  * Responsive design
20 |  * Zero javascript
21 | 
22 | ## Usage
23 | 
24 | One easy way to use this theme, is to add it as a Git submodule to your `docs`
25 | directory, e.g. at `docs/theme`. Then add the following in your `mkdocs.yml`:
26 | 
27 | ```yaml
28 | theme:
29 |   name: null
30 |   custom_dir: docs/theme
31 | ```
32 | 
33 | This theme requires MkDocs 1.1 or later. For earlier versions, delete this
34 | `README.md` to work around [this bug][readmebug].
35 | 
36 | To enable anchors next to section headings, add the following to your
37 | `mkdocs.yml`:
38 | 
39 | ```yaml
40 | markdown_extensions:
41 |   - toc:
42 |       permalink: true
43 |       permalink_title: null
44 | ```
45 | 
46 | To enable syntax highlighting, ensure that `pygmentize` is available, and add
47 | the following to your `mkdocs.yml`:
48 | 
49 | ```yaml
50 | markdown_extensions:
51 |   - codehilite
52 | ```
53 | 
54 | See also [the python-markdown list of extensions][exts].
55 | 
56 | [readmebug]: https://github.com/mkdocs/mkdocs/issues/1766
57 | [exts]: https://python-markdown.github.io/extensions/
58 | 
59 | ## License
60 | 
61 | Kilsbergen is licensed under the [Apache 2.0][apache2] license. In the generated
62 | documentation, it is fine to just link to this readme from a comment.
63 | 
64 | [apache2]:       https://www.apache.org/licenses/LICENSE-2.0
65 | [mkdocs]:        https://www.mkdocs.org/
66 | [musium-docs]:   https://docs.ruuda.nl/musium/
67 | [noblit-docs]:   https://docs.ruuda.nl/noblit/
68 | [noblit]:        https://github.com/ruuda/noblit
69 | [pris-docs]:     https://docs.ruuda.nl/pris/
70 | [pris]:          https://github.com/ruuda/pris
71 | [rcl-docs]:      https://docs.ruuda.nl/rcl/
72 | [squiller-docs]: https://docs.ruuda.nl/squiller/
73 | [tako-docs]:     https://docs.ruuda.nl/tako/
74 | [tako]:          https://github.com/ruuda/tako
75 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # icanexplain
  2 | 
  3 | <p>
  4 | <!-- Tests -->
  5 | <a href="https://github.com/carbonfact/icanexplain/actions/workflows/unit-tests.yml">
  6 |     <img src="https://github.com/carbonfact/icanexplain/actions/workflows/unit-tests.yml/badge.svg" alt="tests">
  7 | </a>
  8 | 
  9 | <!-- Code quality -->
 10 | <a href="https://github.com/carbonfact/icanexplain/actions/workflows/code-quality.yml">
 11 |     <img src="https://github.com/carbonfact/icanexplain/actions/workflows/code-quality.yml/badge.svg" alt="code_quality">
 12 | </a>
 13 | 
 14 | <!-- Documentation -->
 15 | <a href="https://carbonfact.github.io/icanexplain">
 16 |     <img src="https://img.shields.io/website?label=docs&style=flat-square&url=https%3A%2F%2Fcarbonfact.github.io/icanexplain%2F" alt="documentation">
 17 | </a>
 18 | 
 19 | <!-- PyPI -->
 20 | <a href="https://pypi.org/project/icanexplain">
 21 |     <img src="https://img.shields.io/pypi/v/icanexplain.svg?label=release&color=blue" alt="pypi">
 22 | </a>
 23 | 
 24 | <!-- License -->
 25 | <a href="https://opensource.org/license/apache-2-0/">
 26 |     <img src="https://img.shields.io/github/license/carbonfact/icanexplain" alt="license">
 27 | </a>
 28 | </p>
 29 | 
 30 | _Explain why metrics change by unpacking them_
 31 | 
 32 | This library is here to help with the difficult task of explaining why a metric changes. It's particularly useful for analysts, data scientists, analytics engineers, and business intelligence professionals who need to understand the drivers of a metric's change.
 33 | 
 34 | This README provides a small introduction. For more information, please refer to the [documentation](https://carbonfact.github.io/icanexplain).
 35 | 
 36 | Check out [this blog post](https://maxhalford.github.io/blog/kpi-evolution-decomposition/) for some in-depth explanation.
 37 | 
 38 | ## Quickstart
 39 | 
 40 | Let's say you're an analyst at an Airbnb-like company. You're tasked with analyzing year-over-year revenue growth. You have obtained the following dataset:
 41 | 
 42 | ```py
 43 | >>> import pandas as pd
 44 | >>> fmt_currency = lambda x: '' if pd.isna(x) else '${:,.0f}'.format(x)
 45 | 
 46 | >>> revenue = pd.DataFrame.from_dict([
 47 | ...     {'year': 2019, 'bookings': 1_000, 'revenue_per_booking': 200},
 48 | ...     {'year': 2020, 'bookings': 1_000, 'revenue_per_booking': 220},
 49 | ...     {'year': 2021, 'bookings': 1_500, 'revenue_per_booking': 220},
 50 | ...     {'year': 2022, 'bookings': 1_700, 'revenue_per_booking': 225},
 51 | ... ])
 52 | >>> (
 53 | ...     revenue
 54 | ...     .assign(bookings=revenue.bookings.apply('{:,d}'.format))
 55 | ...     .assign(revenue_per_booking=revenue.revenue_per_booking.apply(fmt_currency))
 56 | ...     .set_index('year')
 57 | ... )
 58 |      bookings revenue_per_booking
 59 | year
 60 | 2019    1,000                $200
 61 | 2020    1,000                $220
 62 | 2021    1,500                $220
 63 | 2022    1,700                $225
 64 | 
 65 | ```
 66 | 
 67 | It's quite straightforward to calculate the revenue for each year, and then to measure the year-over-year growth:
 68 | 
 69 | ```py
 70 | >>> (
 71 | ...     revenue
 72 | ...     .assign(revenue=revenue.eval('bookings * revenue_per_booking'))
 73 | ...     .assign(growth=lambda x: x.revenue.diff())
 74 | ...     .assign(bookings=revenue.bookings.apply('{:,d}'.format))
 75 | ...     .assign(revenue_per_booking=revenue.revenue_per_booking.apply(fmt_currency))
 76 | ...     .assign(revenue=lambda x: x.revenue.apply(fmt_currency))
 77 | ...     .assign(growth=lambda x: x.growth.apply(fmt_currency))
 78 | ...     .set_index('year')
 79 | ... )
 80 |      bookings revenue_per_booking   revenue    growth
 81 | year
 82 | 2019    1,000                $200  $200,000
 83 | 2020    1,000                $220  $220,000   $20,000
 84 | 2021    1,500                $220  $330,000  $110,000
 85 | 2022    1,700                $225  $382,500   $52,500
 86 | 
 87 | ```
 88 | 
 89 | Growth can be due to two factors: an increase in the number of bookings, or an increase in the revenue per booking. The icanexplain library to decompose the growth into these two factors. First, let's install the package:
 90 | 
 91 | ```sh
 92 | pip install icanexplain
 93 | ```
 94 | 
 95 | Then, we can use the `SumExplainer` to decompose the growth:
 96 | 
 97 | ```py
 98 | >>> import icanexplain as ice
 99 | >>> explainer = ice.SumExplainer(
100 | ...     fact='revenue_per_booking',
101 | ...     period='year',
102 | ...     count='bookings'
103 | ... )
104 | >>> explanation = explainer(revenue)
105 | >>> explanation.map(fmt_currency)
106 |         inner       mix
107 | year
108 | 2020  $20,000        $0
109 | 2021       $0  $110,000
110 | 2022   $7,500   $45,000
111 | 
112 | ```
113 | 
114 | Here's how to interpret this explanation:
115 | 
116 | - From 2019 to 2020, the revenue growth was entirely due to an increase in the revenue per booking. The number of bookings was exactly the same. Therefore, the $20,000 is entirely due to the inner effect (increase in revenue per booking).
117 | - From 2020 to 2021, the revenue growth was entirely due to an increase in the number of bookings. The revenue per booking was exactly the same. Therefore, the $110,000 is entirely due to the mix effect (increase in bookings).
118 | - From 2021 to 2022, there was a $52,500 revenue growth. However, the revenue per booking went down by $10, so the increase is due to the higher number of bookings. The inner effect is -$7,500 while the mix effect is $45,000.
119 | 
120 | Here's a visual representation of this last interpretation:
121 | 
122 | <p align="center">
123 |   <img src="https://github.com/user-attachments/assets/19a10291-18d3-42aa-ad45-17af32f01e8f" alt="example" width="70%"/>
124 | </p>
125 | 
126 | ## Contributing
127 | 
128 | Feel free to reach out to [max@carbonfact.com](mailto:max@carbonfact.com) if you want to know more and/or contribute 🤗
129 | 
130 | Check out the [contribution guidelines](CONTRIBUTING.md) to get started.
131 | 
132 | ## License
133 | 
134 | icanexplain is free and open-source software licensed under the Apache License, Version 2.0.
135 | 


--------------------------------------------------------------------------------
/icanexplain/test_sum.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import pandas as pd
  3 | 
  4 | 
  5 | def make_claims() -> pd.DataFrame:
  6 |     random.seed(42)
  7 | 
  8 |     # Function to generate a random cost based on the claim type and year
  9 |     def generate_claim_cost(claim_type, year):
 10 |         if claim_type == "Dentist":
 11 |             base_cost = 100
 12 |         elif claim_type == "Psychiatrist":
 13 |             base_cost = 150
 14 |         elif claim_type == "General Physician":
 15 |             base_cost = 80
 16 |         elif claim_type == "Physiotherapy":
 17 |             base_cost = 120
 18 |         else:
 19 |             base_cost = 50
 20 | 
 21 |         # Adjust cost based on year
 22 |         if year == 2021:
 23 |             base_cost *= 1.2
 24 |         elif year == 2023:
 25 |             base_cost *= 1.5
 26 | 
 27 |         # Add some random variation
 28 |         cost = random.uniform(base_cost - 20, base_cost + 20)
 29 |         return round(cost, 2)
 30 | 
 31 |     # Generating sample data
 32 |     claim_types = ["Dentist", "Psychiatrist", "General Physician", "Physiotherapy"]
 33 |     years = [2021, 2022, 2023]
 34 |     people = [
 35 |         "John",
 36 |         "Jane",
 37 |         "Michael",
 38 |         "Emily",
 39 |         "William",
 40 |         "Emma",
 41 |         "Daniel",
 42 |         "Olivia",
 43 |         "Lucas",
 44 |         "Ava",
 45 |     ]
 46 | 
 47 |     data = []
 48 |     for year in years:
 49 |         for person in people:
 50 |             num_claims = random.randint(
 51 |                 1, 5
 52 |             )  # Random number of claims per person per year
 53 |             for _ in range(num_claims):
 54 |                 claim_type = random.choice(claim_types)
 55 |                 cost = generate_claim_cost(claim_type, year)
 56 |                 date = pd.to_datetime(
 57 |                     f"{random.randint(1, 12)}/{random.randint(1, 28)}/{year}",
 58 |                     format="%m/%d/%Y",
 59 |                 )
 60 |                 data.append([person, claim_type, date, year, cost])
 61 | 
 62 |     # Create the DataFrame
 63 |     columns = ["person", "claim_type", "date", "year", "amount"]
 64 |     claims = pd.DataFrame(data, columns=columns)
 65 | 
 66 |     return claims
 67 | 
 68 | 
 69 | def test_claims():
 70 |     """
 71 | 
 72 |     >>> import icanexplain as ice
 73 | 
 74 |     >>> claims = make_claims()
 75 |     >>> claims.head()
 76 |         person     claim_type       date  year  amount
 77 |     0     John        Dentist 2021-04-08  2021  129.66
 78 |     1     Jane        Dentist 2021-09-03  2021  127.07
 79 |     2     Jane  Physiotherapy 2021-02-07  2021  125.27
 80 |     3  Michael        Dentist 2021-12-21  2021  122.45
 81 |     4  Michael  Physiotherapy 2021-10-09  2021  132.82
 82 | 
 83 |     The goal is to explain the evolution of total claims amount over time. Let's take a look at the
 84 |     yearly evolution.
 85 | 
 86 |     >>> (
 87 |     ...     claims
 88 |     ...     .groupby('year')
 89 |     ...     .agg({'amount': 'sum'})
 90 |     ...     .assign(diff=lambda x: x.amount.diff())
 91 |     ...     .reset_index()
 92 |     ... )
 93 |        year   amount     diff
 94 |     0  2021  3814.54      NaN
 95 |     1  2022  2890.29  -924.25
 96 |     2  2023  4178.03  1287.74
 97 | 
 98 |     The theory is that the figures we find should add up to the same yearly total, however we
 99 |     explanation the metric.
100 | 
101 |     >>> explainer = ice.SumExplainer(
102 |     ...     fact='amount',
103 |     ...     period='year',
104 |     ...     group='claim_type'
105 |     ... )
106 |     >>> explanation = explainer(claims)
107 |     >>> explanation
108 |                                  inner         mix
109 |     year claim_type
110 |     2022 Dentist           -170.700000 -311.240000
111 |          General Physician  -95.053333  249.693333
112 |          Physiotherapy     -122.880000 -339.450000
113 |          Psychiatrist      -282.030000  147.410000
114 |     2023 Dentist            338.180000  480.330000
115 |          General Physician  313.151429 -236.051429
116 |          Physiotherapy      185.125000  524.575000
117 |          Psychiatrist       544.140000 -861.710000
118 | 
119 |     Let's check that the sum of the inner and mix columns add up as expected.
120 | 
121 |     >>> (
122 |     ...     explanation
123 |     ...     .groupby('year')
124 |     ...     .apply(lambda x: (x.inner + x.mix).sum(), include_groups=False)
125 |     ...     .rename('diff')
126 |     ...     .reset_index()
127 |     ... )
128 |        year     diff
129 |     0  2022  -924.25
130 |     1  2023  1287.74
131 | 
132 |     """
133 | 
134 | 
135 | def test_claims_with_gaps():
136 |     """
137 | 
138 |     In practice, dimension values don't always appear for each period of time. It's good to check
139 |     that the implementation can handle such cases.
140 | 
141 |     >>> import icanexplain as ice
142 | 
143 |     >>> claims = make_claims()
144 |     >>> claims = claims.drop(index=claims.query('year == 2021 and claim_type == "Dentist"').index)
145 |     >>> claims = claims.drop(index=claims.query('year == 2022 and claim_type == "Physiotherapy"').index)
146 | 
147 |     >>> (
148 |     ...     claims
149 |     ...     .groupby('year')
150 |     ...     .agg({'amount': 'sum'})
151 |     ...     .assign(diff=lambda x: x.amount.diff())
152 |     ...     .reset_index()
153 |     ... )
154 |        year   amount     diff
155 |     0  2021  2710.12      NaN
156 |     1  2022  2550.84  -159.28
157 |     2  2023  4178.03  1627.19
158 | 
159 |     >>> explainer = ice.SumExplainer(
160 |     ...     fact='amount',
161 |     ...     period='year',
162 |     ...     group='claim_type'
163 |     ... )
164 |     >>> explanation = explainer(claims)
165 |     >>> explanation
166 |                                  inner          mix
167 |     year claim_type
168 |     2022 Dentist              0.000000   622.480000
169 |          General Physician  -95.053333   249.693333
170 |          Physiotherapy     -801.780000    -0.000000
171 |          Psychiatrist      -282.030000   147.410000
172 |     2023 Dentist            338.180000   480.330000
173 |          General Physician  313.151429  -236.051429
174 |          Physiotherapy        0.000000  1049.150000
175 |          Psychiatrist       544.140000  -861.710000
176 | 
177 |     >>> (
178 |     ...     explanation
179 |     ...     .groupby('year')
180 |     ...     .apply(lambda x: (x.inner + x.mix).sum(), include_groups=False)
181 |     ...     .rename('diff')
182 |     ...     .reset_index()
183 |     ... )
184 |        year     diff
185 |     0  2022  -159.28
186 |     1  2023  1627.19
187 | 
188 |     """
189 | 
190 | 
191 | def test_agg_vs_samples():
192 |     """
193 | 
194 |     We want to check that explanationing with a sample by sample approach gives the same results as
195 |     explanationing with an aggregated table.
196 | 
197 |     >>> import icanexplain as ice
198 | 
199 |     >>> claims = make_claims()
200 |     >>> (
201 |     ...     claims
202 |     ...     .groupby('year')
203 |     ...     .agg({'amount': 'sum'})
204 |     ...     .assign(diff=lambda x: x.amount.diff())
205 |     ...     .reset_index()
206 |     ... )
207 |        year   amount     diff
208 |     0  2021  3814.54      NaN
209 |     1  2022  2890.29  -924.25
210 |     2  2023  4178.03  1287.74
211 | 
212 |     Sample by sample.
213 | 
214 |     >>> explainer = ice.SumExplainer(
215 |     ...     fact='amount',
216 |     ...     period='year',
217 |     ...     group='claim_type'
218 |     ... )
219 |     >>> explanation = explainer(claims)
220 |     >>> explanation
221 |                                  inner         mix
222 |     year claim_type
223 |     2022 Dentist           -170.700000 -311.240000
224 |          General Physician  -95.053333  249.693333
225 |          Physiotherapy     -122.880000 -339.450000
226 |          Psychiatrist      -282.030000  147.410000
227 |     2023 Dentist            338.180000  480.330000
228 |          General Physician  313.151429 -236.051429
229 |          Physiotherapy      185.125000  524.575000
230 |          Psychiatrist       544.140000 -861.710000
231 | 
232 |     >>> (
233 |     ...     explanation
234 |     ...     .groupby('year')
235 |     ...     .apply(lambda x: (x.inner + x.mix).sum(), include_groups=False)
236 |     ...     .rename('diff')
237 |     ...     .reset_index()
238 |     ... )
239 |        year     diff
240 |     0  2022  -924.25
241 |     1  2023  1287.74
242 | 
243 |     Aggregate.
244 | 
245 |     >>> claims_agg = (
246 |     ...     claims
247 |     ...     .groupby(['year', 'claim_type'])
248 |     ...     ['amount'].agg(['mean', 'count'])
249 |     ...     .reset_index()
250 |     ... )
251 |     >>> claims_agg
252 |         year         claim_type        mean  count
253 |     0   2021            Dentist  122.713333      9
254 |     1   2021  General Physician   99.073333      6
255 |     2   2021      Physiotherapy  133.630000      6
256 |     3   2021       Psychiatrist  187.700000      7
257 |     4   2022            Dentist  103.746667      6
258 |     5   2022  General Physician   83.231111      9
259 |     6   2022      Physiotherapy  113.150000      3
260 |     7   2022       Psychiatrist  147.410000      8
261 |     8   2023            Dentist  160.110000      9
262 |     9   2023  General Physician  118.025714      7
263 |     10  2023      Physiotherapy  174.858333      6
264 |     11  2023       Psychiatrist  215.427500      4
265 | 
266 |     >>> explainer = ice.SumExplainer(
267 |     ...     fact='mean',
268 |     ...     period='year',
269 |     ...     group='claim_type',
270 |     ...     count='count'
271 |     ... )
272 |     >>> explanation = explainer(claims_agg)
273 |     >>> explanation
274 |                                  inner         mix
275 |     year claim_type
276 |     2022 Dentist           -170.700000 -311.240000
277 |          General Physician  -95.053333  249.693333
278 |          Physiotherapy     -122.880000 -339.450000
279 |          Psychiatrist      -282.030000  147.410000
280 |     2023 Dentist            338.180000  480.330000
281 |          General Physician  313.151429 -236.051429
282 |          Physiotherapy      185.125000  524.575000
283 |          Psychiatrist       544.140000 -861.710000
284 | 
285 |     >>> (
286 |     ...     explanation
287 |     ...     .groupby('year')
288 |     ...     .apply(lambda x: (x.inner + x.mix).sum(), include_groups=False)
289 |     ...     .rename('diff')
290 |     ...     .reset_index()
291 |     ... )
292 |        year     diff
293 |     0  2022  -924.25
294 |     1  2023  1287.74
295 | 
296 |     """
297 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/docs/theme/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/icanexplain/test_mean.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import random
  3 | import names  # type: ignore[import]
  4 | import pandas as pd
  5 | 
  6 | 
  7 | def make_claims() -> pd.DataFrame:
  8 |     random.seed(42)
  9 | 
 10 |     # Function to generate a random cost based on the claim type and year
 11 |     def generate_claim_cost(claim_type, year):
 12 |         if claim_type == "Dentist":
 13 |             base_cost = 100
 14 |         elif claim_type == "Psychiatrist":
 15 |             base_cost = 150
 16 | 
 17 |         # Adjust cost based on year
 18 |         if year == 2021:
 19 |             base_cost *= 1.2
 20 |         elif year == 2023:
 21 |             base_cost *= 1.5
 22 | 
 23 |         # Add some random variation
 24 |         cost = random.uniform(base_cost - 20, base_cost + 20)
 25 |         return round(cost, 2)
 26 | 
 27 |     # Generating sample data
 28 |     claim_types = ["Dentist", "Psychiatrist"]
 29 |     years = [2021, 2022, 2023, 2024]
 30 |     people = ["John", "Jane", "Michael", "Emily", "William"]
 31 | 
 32 |     data = []
 33 |     for year in years:
 34 |         new_people = (
 35 |             [names.get_first_name() for _ in range(random.randint(1, 3))]
 36 |             if year > 2021
 37 |             else []
 38 |         )
 39 |         existing_people = [person for person in people if random.random() > 0.3]
 40 |         people_this_year = existing_people + new_people
 41 |         people.extend(new_people)
 42 | 
 43 |         for person in people_this_year:
 44 |             num_claims = random.randint(
 45 |                 1, 5
 46 |             )  # Random number of claims per existing customer per year
 47 |             for _ in range(num_claims):
 48 |                 claim_type = random.choice(claim_types)
 49 |                 cost = generate_claim_cost(claim_type, year)
 50 |                 date = pd.to_datetime(
 51 |                     f"{random.randint(1, 12)}/{random.randint(1, 28)}/{year}",
 52 |                     format="%m/%d/%Y",
 53 |                 )
 54 |                 data.append([person, claim_type, date, year, cost])
 55 | 
 56 |     # Create the DataFrame
 57 |     columns = ["person", "claim_type", "date", "year", "amount"]
 58 |     claims = pd.DataFrame(data, columns=columns)
 59 | 
 60 |     # Indicate whether people are existing, new, or returning
 61 |     years_seen = collections.defaultdict(set)
 62 |     statuses = []
 63 |     for claim in claims.to_dict(orient="records"):
 64 |         years_seen[claim["person"]].add(claim["year"])
 65 |         if claim["year"] - 1 in years_seen[claim["person"]]:
 66 |             statuses.append("EXISTING")
 67 |         elif any(year < claim["year"] for year in years_seen[claim["person"]]):
 68 |             statuses.append("RETURNING")
 69 |         elif not {
 70 |             year for year in years_seen[claim["person"]] if year != claim["year"]
 71 |         }:
 72 |             statuses.append("NEW")
 73 | 
 74 |     claims["status"] = statuses
 75 | 
 76 |     return claims
 77 | 
 78 | 
 79 | def test_claims():
 80 |     """
 81 | 
 82 |     >>> import icanexplain as ice
 83 | 
 84 |     >>> claims = make_claims()
 85 |     >>> claims.head()
 86 |       person    claim_type       date  year  amount status
 87 |     0   John       Dentist 2021-01-01  2021  123.62    NEW
 88 |     1   John       Dentist 2021-09-20  2021  108.75    NEW
 89 |     2   John       Dentist 2021-12-21  2021  122.45    NEW
 90 |     3   John  Psychiatrist 2021-10-09  2021  168.82    NEW
 91 |     4   John       Dentist 2021-03-23  2021  130.35    NEW
 92 | 
 93 |     The goal is to explain how the mean evolved over time. Let's take a look at it.
 94 | 
 95 |     >>> (
 96 |     ...     claims
 97 |     ...     .groupby('year')
 98 |     ...     .agg({'amount': 'mean'})
 99 |     ...     .assign(diff=lambda x: x.amount.diff())
100 |     ...     .reset_index()
101 |     ... )
102 |        year      amount       diff
103 |     0  2021  145.808889        NaN
104 |     1  2022  112.676667 -33.132222
105 |     2  2023  173.043667  60.367000
106 |     3  2024  122.920625 -50.123042
107 | 
108 |     Here's the breakdown by claim type:
109 | 
110 |     >>> (
111 |     ...     claims
112 |     ...     .groupby(['year', 'claim_type'])
113 |     ...     ['amount'].agg(['mean', 'count'])
114 |     ...     .reset_index()
115 |     ... )
116 |        year    claim_type       mean  count
117 |     0  2021       Dentist  122.87200      5
118 |     1  2021  Psychiatrist  174.48000      4
119 |     2  2022       Dentist   98.37500      4
120 |     3  2022  Psychiatrist  141.28000      2
121 |     4  2023       Dentist  148.36500     20
122 |     5  2023  Psychiatrist  222.40100     10
123 |     6  2024       Dentist   97.68250      8
124 |     7  2024  Psychiatrist  148.15875      8
125 | 
126 |     The theory is that however we explanation the metric, the figures we find should add up to the same
127 |     yearly total.
128 | 
129 |     >>> explainer = ice.MeanExplainer(
130 |     ...     fact='amount',
131 |     ...     period='year',
132 |     ...     group='claim_type'
133 |     ... )
134 |     >>> explanation = explainer(claims)
135 |     >>> explanation
136 |                            inner        mix
137 |     year claim_type
138 |     2022 Dentist      -16.331333   1.132815
139 |          Psychiatrist -11.066667  -6.867037
140 |     2023 Dentist       33.326667  -0.000000
141 |          Psychiatrist  27.040333  -0.000000
142 |     2024 Dentist      -25.341250  -4.240729
143 |          Psychiatrist -37.121125  16.580063
144 | 
145 |     Let's check that the sum of the inner and mix columns add up as expected.
146 | 
147 |     >>> (
148 |     ...     explanation
149 |     ...     .groupby('year')
150 |     ...     .apply(lambda x: (x.inner + x.mix).sum(), include_groups=False)
151 |     ...     .rename('diff')
152 |     ...     .reset_index()
153 |     ... )
154 |        year       diff
155 |     0  2022 -33.132222
156 |     1  2023  60.367000
157 |     2  2024 -50.123042
158 | 
159 |     """
160 | 
161 | 
162 | def test_claims_with_gaps():
163 |     """
164 | 
165 |     >>> import icanexplain as ice
166 | 
167 |     >>> claims = make_claims()
168 |     >>> claims = claims.drop(index=claims.query('year == 2021 and claim_type == "Dentist"').index)
169 |     >>> claims = claims.drop(index=claims.query('year == 2022 and claim_type == "Psychiatrist"').index)
170 |     >>> claims = claims.drop(index=claims.query('year == 2023 and claim_type == "Psychiatrist"').index)
171 | 
172 |     >>> (
173 |     ...     claims
174 |     ...     .groupby('year')
175 |     ...     .agg({'amount': 'mean'})
176 |     ...     .assign(diff=lambda x: x.amount.diff())
177 |     ...     .reset_index()
178 |     ... )
179 |        year      amount       diff
180 |     0  2021  174.480000        NaN
181 |     1  2022   98.375000 -76.105000
182 |     2  2023  148.365000  49.990000
183 |     3  2024  122.920625 -25.444375
184 | 
185 |     >>> explainer = ice.MeanExplainer(
186 |     ...     fact='amount',
187 |     ...     period='year',
188 |     ...     group='claim_type'
189 |     ... )
190 |     >>> explainer(claims)
191 |                            inner        mix
192 |     year claim_type
193 |     2022 Dentist       98.375000 -98.375000
194 |          Psychiatrist  -0.000000 -76.105000
195 |     2023 Dentist       49.990000  -0.000000
196 |          Psychiatrist   0.000000  -0.000000
197 |     2024 Dentist      -25.341250 -12.722187
198 |          Psychiatrist  74.079375 -61.460313
199 | 
200 |     """
201 | 
202 | 
203 | def test_clicks():
204 |     """
205 | 
206 |     >>> import pandas as pd
207 |     >>> traffic_agg = pd.DataFrame([
208 |     ...     {'timestamp': '2018-01-01', 'dim': 'A', 'clicks': 150, 'impressions': 1000},
209 |     ...     {'timestamp': '2018-01-01', 'dim': 'B', 'clicks': 150, 'impressions': 2000},
210 |     ...     {'timestamp': '2018-02-01', 'dim': 'A', 'clicks': 200, 'impressions': 1000},
211 |     ...     {'timestamp': '2018-02-01', 'dim': 'B', 'clicks': 300, 'impressions': 2000},
212 |     ...     {'timestamp': '2019-01-01', 'dim': 'A', 'clicks': 120, 'impressions': 1100},
213 |     ...     {'timestamp': '2019-01-01', 'dim': 'B', 'clicks': 200, 'impressions': 2150},
214 |     ...     {'timestamp': '2019-02-01', 'dim': 'A', 'clicks': 242, 'impressions': 1100},
215 |     ...     {'timestamp': '2019-02-01', 'dim': 'B', 'clicks': 323, 'impressions': 2150},
216 |     ... ])
217 |     >>> traffic_agg['timestamp'] = pd.to_datetime(traffic_agg['timestamp'])
218 | 
219 |     The figures are aggregated, which isn't the usual expected format. A first solution is to
220 |     expand the data into individual samples.
221 | 
222 |     >>> import itertools
223 |     >>> traffic = pd.DataFrame(itertools.chain(*[
224 |     ...     [{'timestamp': r['timestamp'], 'dim': r['dim'], 'click': True} for _ in range(r['clicks'])] +
225 |     ...     [{'timestamp': r['timestamp'], 'dim': r['dim'], 'click': False} for _ in range(r['impressions'] - r['clicks'])]
226 |     ...     for r in traffic_agg.to_dict(orient='records')
227 |     ... ]))
228 |     >>> traffic.head()
229 |        timestamp dim  click
230 |     0 2018-01-01   A   True
231 |     1 2018-01-01   A   True
232 |     2 2018-01-01   A   True
233 |     3 2018-01-01   A   True
234 |     4 2018-01-01   A   True
235 | 
236 |     >>> traffic = traffic.assign(
237 |     ...     year=traffic.timestamp.dt.year,
238 |     ...     month=traffic.timestamp.dt.month
239 |     ... )
240 |     >>> traffic.groupby(['timestamp', 'dim'])['click'].agg(['sum', 'size'])
241 |                     sum  size
242 |     timestamp  dim
243 |     2018-01-01 A    150  1000
244 |                B    150  2000
245 |     2018-02-01 A    200  1000
246 |                B    300  2000
247 |     2019-01-01 A    120  1100
248 |                B    200  2150
249 |     2019-02-01 A    242  1100
250 |                B    323  2150
251 | 
252 |     >>> import icanexplain as ice
253 | 
254 |     >>> explainer = ice.MeanExplainer(
255 |     ...     fact='click',
256 |     ...     period=['year', 'month'],
257 |     ...     group='dim',
258 |     ... )
259 |     >>> explainer(traffic)
260 |                        inner       mix
261 |     year month dim
262 |     2019 1     A   -0.013846  0.000264
263 |                B    0.011923  0.000120
264 |          2     A    0.006769  0.000134
265 |                B    0.000154  0.000122
266 | 
267 |     We can also make this work with the aggregate data. Here's how:
268 | 
269 |     >>> traffic_agg = traffic_agg.assign(
270 |     ...     click_rate=lambda x: x['clicks'] / x['impressions'],
271 |     ...     year=traffic_agg.timestamp.dt.year,
272 |     ...     month=traffic_agg.timestamp.dt.month
273 |     ... )
274 |     >>> explainer = ice.MeanExplainer(
275 |     ...     fact='click_rate',
276 |     ...     count='impressions',
277 |     ...     period=['year', 'month'],
278 |     ...     group='dim',
279 |     ... )
280 |     >>> explainer(traffic_agg)
281 |                        inner       mix
282 |     year month dim
283 |     2019 1     A   -0.013846  0.000264
284 |                B    0.011923  0.000120
285 |          2     A    0.006769  0.000134
286 |                B    0.000154  0.000122
287 | 
288 |     """
289 | 
290 | 
291 | def test_agg_vs_samples():
292 |     """
293 | 
294 |     We want to check that explanationing with a sample by sample approach gives the same results as
295 |     explanationing with an aggregated table.
296 | 
297 |     >>> import icanexplain as ice
298 | 
299 |     >>> claims = make_claims()
300 |     >>> (
301 |     ...     claims
302 |     ...     .groupby('year')
303 |     ...     .agg({'amount': 'mean'})
304 |     ...     .assign(diff=lambda x: x.amount.diff())
305 |     ...     .reset_index()
306 |     ... )
307 |        year      amount       diff
308 |     0  2021  145.808889        NaN
309 |     1  2022  112.676667 -33.132222
310 |     2  2023  173.043667  60.367000
311 |     3  2024  122.920625 -50.123042
312 | 
313 |     Sample by sample.
314 | 
315 |     >>> explainer = ice.MeanExplainer(
316 |     ...     fact='amount',
317 |     ...     period='year',
318 |     ...     group='claim_type'
319 |     ... )
320 |     >>> explanation = explainer(claims)
321 |     >>> explanation
322 |                            inner        mix
323 |     year claim_type
324 |     2022 Dentist      -16.331333   1.132815
325 |          Psychiatrist -11.066667  -6.867037
326 |     2023 Dentist       33.326667  -0.000000
327 |          Psychiatrist  27.040333  -0.000000
328 |     2024 Dentist      -25.341250  -4.240729
329 |          Psychiatrist -37.121125  16.580063
330 | 
331 |     >>> (
332 |     ...     explanation
333 |     ...     .groupby('year')
334 |     ...     .apply(lambda x: (x.inner + x.mix).sum(), include_groups=False)
335 |     ...     .rename('diff')
336 |     ...     .reset_index()
337 |     ... )
338 |        year       diff
339 |     0  2022 -33.132222
340 |     1  2023  60.367000
341 |     2  2024 -50.123042
342 | 
343 |     Aggregate.
344 | 
345 |     >>> claims_agg = (
346 |     ...     claims
347 |     ...     .groupby(['year', 'claim_type'])
348 |     ...     ['amount'].agg(['mean', 'count'])
349 |     ...     .reset_index()
350 |     ... )
351 |     >>> claims_agg
352 |        year    claim_type       mean  count
353 |     0  2021       Dentist  122.87200      5
354 |     1  2021  Psychiatrist  174.48000      4
355 |     2  2022       Dentist   98.37500      4
356 |     3  2022  Psychiatrist  141.28000      2
357 |     4  2023       Dentist  148.36500     20
358 |     5  2023  Psychiatrist  222.40100     10
359 |     6  2024       Dentist   97.68250      8
360 |     7  2024  Psychiatrist  148.15875      8
361 | 
362 |     >>> explainer = ice.MeanExplainer(
363 |     ...     fact='mean',
364 |     ...     period='year',
365 |     ...     group='claim_type',
366 |     ...     count='count'
367 |     ... )
368 |     >>> explanation = explainer(claims_agg)
369 |     >>> explanation
370 |                            inner        mix
371 |     year claim_type
372 |     2022 Dentist      -16.331333   1.132815
373 |          Psychiatrist -11.066667  -6.867037
374 |     2023 Dentist       33.326667  -0.000000
375 |          Psychiatrist  27.040333  -0.000000
376 |     2024 Dentist      -25.341250  -4.240729
377 |          Psychiatrist -37.121125  16.580063
378 | 
379 |     >>> (
380 |     ...     explanation
381 |     ...     .groupby('year')
382 |     ...     .apply(lambda x: (x.inner + x.mix).sum(), include_groups=False)
383 |     ...     .rename('diff')
384 |     ...     .reset_index()
385 |     ... )
386 |        year       diff
387 |     0  2022 -33.132222
388 |     1  2023  60.367000
389 |     2  2024 -50.123042
390 | 
391 |     """
392 | 


--------------------------------------------------------------------------------
/docs/theme/main.html:
--------------------------------------------------------------------------------
  1 | {#
  2 | Kilsbergen -- A clean MkDocs theme.
  3 | Copyright 2019 Ruud van Asseldonk.
  4 | 
  5 | Licensed under the Apache License, Version 2.0 (the "License");
  6 | you may not use this file except in compliance with the License.
  7 | A copy of the License has been included in the root of the repository.
  8 | #}
  9 | <!DOCTYPE html>
 10 | <html>
 11 | <head>
 12 |   <!--
 13 |     Kilsbergen MkDocs theme copyright 2019 Ruud van Asseldonk.
 14 |     Licensed under the Apache 2.0 license.
 15 |     See https://github.com/ruuda/kilsbergen.
 16 |   -->
 17 |   <meta charset="utf-8">
 18 |   <meta name="viewport" content="width=device-width, initial-scale=1">
 19 |   <title>{% if page.title %}{{ page.title }} — {% endif %}{{ config.site_name }}</title>
 20 |   <!-- Out of the options of rsms.me + Google Fonts, self-hosting, and only
 21 |     Google Fonts, the latter is by far the fastest when the docs themselves are
 22 |     not on a CDN, even though it's two additional domains to connect to. -->
 23 |   <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
 24 |   <link href="https://fonts.googleapis.com/css2?family=Roboto+Mono:ital,wght@0,400;0,500;1,400&display=swap" rel="stylesheet">
 25 |   <link href="{{ page.canonical_url }}" rel="canonical">
 26 |   <style>
 27 |   /* Inter font family, copyright Rasmus Andersson, licensed under SIL OFL 1.1,
 28 |      see https://rsms.me/inter and https://rsms.me/inter/inter.css. */
 29 |   @font-face {
 30 |     font-family: 'Inter';
 31 |     font-style: normal;
 32 |     font-weight: 100 900;
 33 |     font-display: swap;
 34 |     src: url('https://rsms.me/inter/font-files/InterVariable.woff2?v=4.0') format('woff2');
 35 |     font-named-instance: 'Regular';
 36 |   }
 37 |   @font-face {
 38 |     font-family: 'Inter';
 39 |     font-style: italic;
 40 |     font-weight: 100 900;
 41 |     font-display: swap;
 42 |     src: url('https://rsms.me/inter/font-files/InterVariable-Italic.woff2?v=4.0') format('woff2');
 43 |     font-named-instance: 'Italic';
 44 |   }
 45 |   /* Modular scale with exponent 1.7^(1/3). The 1.7 was chosen as the line hight
 46 |      that goes well with Inter. Previously I used 1.59, but it was just too tight.
 47 |      0.59em
 48 |      0.70em
 49 |      1.00em
 50 |      1.19em
 51 |      1.42em
 52 |      1.70em
 53 |      2.02em
 54 |      2.42em
 55 |   */
 56 |   * { margin: 0; padding: 0; border-spacing: 0; }
 57 |   html {
 58 |     font-family: Inter, Roboto, sans-serif;
 59 |     /* Inter 4.0 is a tad heavy by default, make it a bit thinner. */
 60 |     font-weight: 375;
 61 |     font-size: 16px;
 62 |     line-height: 1.7em;
 63 |     background-color: #fff;
 64 |     height: 100%;
 65 |   }
 66 |   em {
 67 |     font-style: italic;
 68 |   }
 69 |   body {
 70 |     height: 100%;
 71 |     /*
 72 |     Turn on character variant 8 for Inter, which puts serifs on the uppercase I.
 73 |     Also turn on variant 1, which has a curved 1.
 74 |     Also enable contextual alternates.
 75 |     */
 76 |     font-feature-settings: 'cv01' 1, 'cv08' 1, 'calt' 1;
 77 |   }
 78 |   #content {
 79 |     display: grid;
 80 |     grid-template-columns: auto 16rem 50rem auto;
 81 |     color: #333;
 82 |     min-height: 100%;
 83 |   }
 84 |   #main {
 85 |     grid-area: 1 / 3 / 2 / 4;
 86 |     padding: 2.2rem;
 87 |     padding-left: 4rem;
 88 |     padding-right: 4rem;
 89 |     overflow: hidden;
 90 |   }
 91 |   #breadcrumbs {
 92 |     margin-bottom: 3rem;
 93 |     word-spacing: 0.3em;
 94 |     color: #78a;
 95 |   }
 96 |   #breadcrumbs a {
 97 |     word-spacing: 0;
 98 |     color: #78a;
 99 |   }
100 |   article {
101 |     margin-top: 1.3rem;
102 |   }
103 |   h1, h2, h3 {
104 |     font-weight: 625;
105 |     font-size: 1rem;
106 |     color: #444;
107 |     position: relative;
108 |   }
109 |   h1 {
110 |     font-size: 2rem;
111 |     margin-bottom: 2.1rem;
112 |     line-height: 2.42rem;
113 |     margin-top: -0.35rem;
114 |     margin-bottom: 1.75rem;
115 |   }
116 |   h2 {
117 |     font-size: 1.42rem;
118 |     margin-top: 3.5rem;
119 |     margin-bottom: 1.6rem;
120 |   }
121 |   .headerlink {
122 |     position: absolute;
123 |     left: -0.9em;
124 |     width: 1em;
125 |     opacity: 0.0;
126 |     transition: opacity 0.2s ease-in;
127 |   }
128 |   /* Don't show the link for h1, usually you only have a single h1 at the top
129 |      of the page, so it doesn't make much sense to add an anchor there, and with
130 |      the larger font size, it doesn't fit in a narrow viewport. */
131 |   h2:hover .headerlink, h3:hover .headerlink {
132 |     opacity: 1.0;
133 |   }
134 |   code {
135 |     font-family: 'Roboto Mono', monospace;
136 |     font-size: 0.84rem;
137 |     line-height: 1.5rem;
138 |   }
139 |   h3 > code {
140 |     /* Roboto Mono 500 is about as heavy as Inter semibold (600). */
141 |     font-weight: 500;
142 |   }
143 |   abbr {
144 |     text-transform: uppercase;
145 |     /* Downsize so caps are x-height, and compensate weight loss. */
146 |     font-size: 0.78rem;
147 |     font-weight: 500;
148 |     letter-spacing: 0.05rem;
149 |     /* Prevent abbrs from changing the line height of lines in which they occur. */
150 |     line-height: 0;
151 |   }
152 |   sub, sup {
153 |     /* Don't disturb the line height of normal text. */
154 |     line-height: 0rem;
155 |     font-size: 0.78rem;
156 |     font-weight: 500;
157 |   }
158 |   p > code,
159 |   a > code,
160 |   h3 > code,
161 |   li > code,
162 |   td > code,
163 |   dt > code,
164 |   dd > code {
165 |     background-color: #f0f0f0;
166 |     padding: 0.13rem;
167 |     padding-left: 0.3rem;
168 |     padding-right: 0.3rem;
169 |     border-radius: 0.2rem;
170 |     line-height: 1rem;
171 |   }
172 |   h3 {
173 |     padding-top: 0.9rem;
174 |     padding-bottom: 0.8rem;
175 |   }
176 |   h3 > code {
177 |     margin-left: -0.1rem;
178 |   }
179 |   a {
180 |     color: #36d;
181 |     text-decoration: none;
182 |   }
183 |   p, ul, ol, dl, pre, table {
184 |     /* Same space as line height, leave exactly one line blank. */
185 |     margin-bottom: 1.7rem;
186 |   }
187 |   pre {
188 |     padding-top: 0.8rem;
189 |     padding-bottom: 0.9rem;
190 |     padding-left: 1.19rem;
191 |     padding-right: 0;
192 |     background-color: #f8f8f8;
193 |     border-radius: 0 0.2rem 0.2rem 0;
194 |     border-left: 0.3rem solid #d5d8e0;
195 |     overflow-x: auto;
196 |   }
197 |   pre > code {
198 |     margin-right: 1.41rem;
199 |     color: #555;
200 |   }
201 |   code .k { color: #36d; font-weight: 500; }
202 |   code .kt, code .kt { color: #36d; }
203 |   code .c, code .c1, code .cm { color: #888; font-style: italic; }
204 |   code .m { color: #599; }
205 |   code .s { color: #a44; }
206 |   code .si { color: #a44; font-weight: 500; }
207 |   code .se { color: #359; }
208 |   ul, ol {
209 |     list-style-type: none;
210 |     counter-reset: item;
211 |   }
212 |   blockquote {
213 |       border-left: 0.3em solid #d5d8e0;
214 |       padding-left: 1.3em;
215 |       color: #78a;
216 |   }
217 |   table {
218 |     font-variant-numeric: tabular-nums;
219 |   }
220 |   th, td {
221 |     padding-right: 2rem;
222 |   }
223 |   th {
224 |     text-align: left;
225 |   }
226 |   td > img {
227 |     display: block;
228 |     padding-top: 1em;
229 |     padding-bottom: 1em;
230 |   }
231 |   dl {
232 |     display: grid;
233 |     grid-template-columns: 1fr 4fr;
234 |     grid-column-gap: 2em;
235 |   }
236 |   dl dt {
237 |     text-align: right;
238 |   }
239 |   #main ul li:before {
240 |     color: #555;
241 |     content: '\2022';
242 |     display: inline-block;
243 |     font-weight: 700;
244 |     margin-left: -0.9rem;
245 |     width: 0.9rem;
246 |   }
247 |   #main ul li {
248 |     margin-left: 0.87rem;
249 |   }
250 |   #main ol li:before {
251 |     float: left;
252 |     content: counter(item);
253 |     display: inline-block;
254 |     font-weight: 700;
255 |     width: 0.8rem;
256 |     margin-left: -1.6rem;
257 |     padding-right: 0.8rem;
258 |   }
259 |   #main ol li {
260 |     margin-left: 1.6rem;
261 |     counter-increment: item;
262 |   }
263 |   a.footnote-ref {
264 |       // Add some space to make notes better clickable.
265 |       padding-left: 1pt;
266 |       padding-right: 1pt;
267 |   }
268 |   .footnote {
269 |       color: #777;
270 |       font-size: 0.9rem;
271 |       line-height: 1.7em;
272 |   }
273 |   .footnote hr {
274 |       border: none;
275 |       border-top: 1px solid #eee;
276 |       margin-top: 3.5rem;
277 |       margin-bottom: 1.2rem;
278 |   }
279 |   #main .footnote ol li:before {
280 |       font-weight: 500;
281 |       color: #78a;
282 |       opacity: 0.5;
283 |   }
284 |   #nav-prev-next {
285 |     margin-top: 3.4rem;
286 |     padding-bottom: 3.3rem;
287 |   }
288 |   #nav-prev, #nav-next, #repo-link {
289 |     display: inline-block;
290 |   }
291 |   #nav-prev {
292 |     float: left;
293 |   }
294 |   #nav-next, #repo-link {
295 |     float: right;
296 |     text-align: right;
297 |   }
298 |   #nav-prev::before {
299 |     content: '\219e';
300 |     padding-right: 0.5em;
301 |   }
302 |   #nav-next::after {
303 |     content: '\21a0';
304 |     padding-left: 0.5em;
305 |   }
306 |   aside {
307 |     grid-area: 1 / 1 / 2 / 3;
308 |     border-right: 1px solid #eee;
309 |     background-color: #fafafa;
310 |     color: #78a;
311 |   }
312 |   aside nav {
313 |     margin-top: 9rem;
314 |     padding-bottom: 3.4rem;
315 |     width: 14rem;
316 |     float: right;
317 |     /* Put the active chapter border over the sidebar border. */
318 |     margin-right: -1px;
319 |   }
320 |   aside a {
321 |     color: inherit;
322 |   }
323 |   aside .toc-section {
324 |     font-weight: 700;
325 |   }
326 |   aside ul {
327 |     margin-bottom: 0;
328 |   }
329 |   aside li {
330 |     overflow: hidden;
331 |     text-overflow: ellipsis;
332 |   }
333 |   aside li ul {
334 |     padding-top: 0.6rem;
335 |     padding-bottom: 1.1rem;
336 |   }
337 |   aside li.toc-section {
338 |     margin-top: 1.7rem;
339 |   }
340 |   aside li.toc-section {
341 |     color: #36d;
342 |   }
343 |   aside li.current, aside li ul {
344 |     border-right: 0.3em solid #d5d8e0;
345 |     padding-left: 1em;
346 |     margin-left: -1em;
347 |   }
348 |   aside li.toc-chapter.current {
349 |     font-weight: 600;
350 |   }
351 |   aside li.toc-heading {
352 |     padding-left: 1em;
353 |     padding-right: 0.3em;
354 |   }
355 | 
356 |   @media(max-width: 63rem)
357 |   {
358 |     #content {
359 |       /* Manual implementation of max-width: on narrower viewports,
360 |          auto-size the body. */
361 |       grid-template-columns: 0 16rem auto 0;
362 |     }
363 |   }
364 | 
365 |   @media(max-width: 1150px)
366 |   {
367 |     html { font-size: 15px; }
368 |   }
369 | 
370 |   /* Move the sidebar TOC below content at small widths. */
371 |   @media(max-width: 800px)
372 |   {
373 |     #content {
374 |       display: block;
375 |     }
376 |     aside {
377 |       border-top: 1px solid #eee;
378 |       border-right: 0px none;
379 |       padding-top: 1.7em;
380 |     }
381 |     aside nav {
382 |       margin-left: 4em;
383 |       margin-top: 1.7em;
384 |       margin-right: 0;
385 |       float: none;
386 |       width: auto;
387 |     }
388 |     #main {
389 |       padding-bottom: 1.7em;
390 |     }
391 |     /* Now that the TOC is full-width, adding the border on the right to
392 |        highlight the active page is not as clear anymore, it can be far away.
393 |        The current chapter is still in boldface, but that does not work for
394 |        section indexes. So point a guillemet at it as well. */
395 |     aside li.current, aside li ul {
396 |       border-right: 0px none;
397 |       position: relative;
398 |     }
399 |     aside li.current:before {
400 |       content: '\203a';
401 |       position: absolute;
402 |       left: 0;
403 |     }
404 |   }
405 | 
406 |   /* Use less generous margins for very narrow viewports. */
407 |   @media(max-width: 650px)
408 |   {
409 |     #main {
410 |       padding-left: 2rem;
411 |       padding-right: 2rem;
412 |     }
413 |     aside nav {
414 |       margin-left: 2rem;
415 |       padding-bottom: 2rem;
416 |     }
417 |   }
418 | 
419 |   @media(max-width: 450px)
420 |   {
421 |     #main {
422 |       padding-left: 1.7em;
423 |       padding-right: 1.7em;
424 |     }
425 |     aside nav {
426 |       margin-left: 1.7em;
427 |     }
428 |   }
429 |   </style>
430 | </head>
431 | <body>
432 |   <div id="content">
433 |     <div id="main">
434 |       <nav id="breadcrumbs">
435 |         <a href="{{ config.site_url|url }}">{{ config.site_name }}</a>
436 |         {% if page.parent %} › <a href="{{ page.parent.children[0].url|url }}">{{ page.parent.title }}</a> {% endif %}
437 |         {% if not page.is_index or not page.parent %} › <a href="{{ page.url|url }}">{{ page.title }}</a> {% endif %}
438 |         <a id="repo-link" href="{{ config.repo_url|url }}">GitHub</a>
439 |       </nav>
440 |       <article>
441 |         {{ page.content }}
442 |       </article>
443 |       {% if page.next_page or page.previous_page -%}
444 |       <nav id="nav-prev-next">
445 |         {% if page.previous_page -%}
446 |           <a id="nav-prev" href="{{ page.previous_page.url|url }}" title="{{ page.previous_page.title }}">Previous</a>
447 |         {% endif -%}
448 |         {% if page.next_page -%}
449 |           <a id="nav-next" href="{{ page.next_page.url|url }}" title="{{ page.next_page.title }}">Next</a>
450 |         {% endif -%}
451 |       </nav>
452 |       {% endif -%}
453 |     </div>
454 |     <aside>
455 |       <nav>
456 |         <ul>
457 |         {% for section in nav -%}
458 |           {% if section.children -%}
459 |           <li class="toc-section
460 |             {% if section.active and page.is_index %} current{% endif -%}
461 |           "><a href="{{ section.children[0].url|url }}">{{ section.title }}</a></li>
462 |           {% for chapter in section.children -%}
463 |           {% if not chapter.is_index %}
464 |             <li class="toc-chapter
465 |               {% if chapter.active %} current{% endif -%}
466 |             "><a href="{{ chapter.url|url }}">{{ chapter.title }}</a></li>
467 |             {% if chapter.active -%}
468 |             {% for toc_item in page.toc -%}
469 |               {% if toc_item.children -%}
470 |                 <li><ul>
471 |                 {% for toc_item in toc_item.children -%}
472 |                   <li class="toc-heading"><a href="{{ toc_item.url }}">{{ toc_item.title }}</a></li>
473 |                 {% endfor -%}
474 |                 </ul></li>
475 |                {% endif -%}
476 |             {% endfor -%}
477 |             {% endif -%}
478 |           {% endif -%}
479 |           {% endfor -%}
480 |           {% else -%}
481 |           <li class="toc-section"><a href="{{ section.url|url }}">{{ section.title }}</a></li>
482 |           {% endif -%}
483 |         {% endfor -%}
484 |         </ul>
485 |       </nav>
486 |     </aside>
487 |   </div>
488 | </body>
489 | </html>
490 | 


--------------------------------------------------------------------------------
/docs/examples/ibis.md:
--------------------------------------------------------------------------------
  1 | # Different backend support with Ibis 🐦
  2 | 
  3 | icanexplain is implemented with [Ibis](https://github.com/ibis-project/ibis). This means that it is framework agnostic, and can work with different backends. This example shows how to use it with [DuckDB](https://duckdb.org/).
  4 | 
  5 | 
  6 | ```python
  7 | import ibis
  8 | import icanexplain as ice
  9 | 
 10 | products_df = ice.datasets.load_product_footprints()
 11 | con = ibis.connect("duckdb://example.ddb")
 12 | con.create_table(
 13 |     "products", products_df, overwrite=True
 14 | )
 15 | ```
 16 | 
 17 | 
 18 | 
 19 | 
 20 | <pre style="white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace">DatabaseTable: example.main.products
 21 |   year       int64
 22 |   category   string
 23 |   product_id string
 24 |   footprint  float64
 25 |   units      int64
 26 | </pre>
 27 | 
 28 | 
 29 | 
 30 | 
 31 | 
 32 | ```python
 33 | con = ibis.connect("duckdb://example.ddb")
 34 | con.list_tables()
 35 | ```
 36 | 
 37 | 
 38 | 
 39 | 
 40 |     ['products']
 41 | 
 42 | 
 43 | 
 44 | 
 45 | ```python
 46 | ibis.options.interactive = True
 47 | products = con.table("products")
 48 | products.head()
 49 | ```
 50 | 
 51 | 
 52 | 
 53 | 
 54 | <pre style="white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace">┏━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━┓
 55 | ┃<span style="font-weight: bold"> year  </span>┃<span style="font-weight: bold"> category </span>┃<span style="font-weight: bold"> product_id </span>┃<span style="font-weight: bold"> footprint </span>┃<span style="font-weight: bold"> units </span>┃
 56 | ┡━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━┩
 57 | │ <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">int64</span> │ <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">string</span>   │ <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">string</span>     │ <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">float64</span>   │ <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">int64</span> │
 58 | ├───────┼──────────┼────────────┼───────────┼───────┤
 59 | │  <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">2021</span> │ <span style="color: #008000; text-decoration-color: #008000">DRESS   </span> │ <span style="color: #008000; text-decoration-color: #008000">848be709  </span> │     <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">96.04</span> │   <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">803</span> │
 60 | │  <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">2021</span> │ <span style="color: #008000; text-decoration-color: #008000">DRESS   </span> │ <span style="color: #008000; text-decoration-color: #008000">658f92b3  </span> │     <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">58.15</span> │  <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">3367</span> │
 61 | │  <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">2021</span> │ <span style="color: #008000; text-decoration-color: #008000">DRESS   </span> │ <span style="color: #008000; text-decoration-color: #008000">3a26f323  </span> │     <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">82.94</span> │   <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">240</span> │
 62 | │  <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">2021</span> │ <span style="color: #008000; text-decoration-color: #008000">DRESS   </span> │ <span style="color: #008000; text-decoration-color: #008000">6221dca6  </span> │     <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">85.94</span> │   <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">432</span> │
 63 | │  <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">2021</span> │ <span style="color: #008000; text-decoration-color: #008000">DRESS   </span> │ <span style="color: #008000; text-decoration-color: #008000">46864ac5  </span> │     <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">84.99</span> │   <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">816</span> │
 64 | └───────┴──────────┴────────────┴───────────┴───────┘
 65 | </pre>
 66 | 
 67 | 
 68 | 
 69 | 
 70 | 
 71 | ```python
 72 | explainer = ice.SumExplainer(
 73 |     fact='footprint',
 74 |     count='units',
 75 |     group='category',
 76 |     period='year'
 77 | )
 78 | explanation = explainer(products)
 79 | explanation
 80 | ```
 81 | 
 82 | 
 83 | 
 84 | 
 85 | <pre style="white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace">┏━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
 86 | ┃<span style="font-weight: bold"> year  </span>┃<span style="font-weight: bold"> category </span>┃<span style="font-weight: bold"> inner         </span>┃<span style="font-weight: bold"> mix           </span>┃
 87 | ┡━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
 88 | │ <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">int64</span> │ <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">string</span>   │ <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">float64</span>       │ <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">float64</span>       │
 89 | ├───────┼──────────┼───────────────┼───────────────┤
 90 | │  <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">2022</span> │ <span style="color: #008000; text-decoration-color: #008000">DRESS   </span> │  <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">3.931932e+06</span> │ <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">-1.881370e+07</span> │
 91 | │  <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">2022</span> │ <span style="color: #008000; text-decoration-color: #008000">JACKET  </span> │ <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">-1.510008e+07</span> │ <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">-9.238617e+07</span> │
 92 | │  <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">2022</span> │ <span style="color: #008000; text-decoration-color: #008000">PANTS   </span> │  <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">4.002506e+07</span> │  <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">5.295190e+07</span> │
 93 | │  <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">2022</span> │ <span style="color: #008000; text-decoration-color: #008000">SHIRT   </span> │ <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">-1.484809e+06</span> │ <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">-5.791456e+06</span> │
 94 | │  <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">2022</span> │ <span style="color: #008000; text-decoration-color: #008000">SWEATER </span> │ <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">-2.676209e+07</span> │  <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">1.181504e+07</span> │
 95 | │  <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">2022</span> │ <span style="color: #008000; text-decoration-color: #008000">TSHIRT  </span> │  <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">6.650940e+06</span> │ <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">-2.311836e+07</span> │
 96 | │  <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">2023</span> │ <span style="color: #008000; text-decoration-color: #008000">DRESS   </span> │ <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">-4.078094e+06</span> │ <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">-1.240339e+07</span> │
 97 | │  <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">2023</span> │ <span style="color: #008000; text-decoration-color: #008000">JACKET  </span> │ <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">-6.793317e+06</span> │ <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">-4.924036e+07</span> │
 98 | │  <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">2023</span> │ <span style="color: #008000; text-decoration-color: #008000">PANTS   </span> │ <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">-1.636299e+07</span> │ <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">-2.295608e+08</span> │
 99 | │  <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">2023</span> │ <span style="color: #008000; text-decoration-color: #008000">SHIRT   </span> │  <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">8.920908e+05</span> │ <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">-4.019144e+06</span> │
100 | │     <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">…</span> │ <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">…</span>        │             <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">…</span> │             <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">…</span> │
101 | └───────┴──────────┴───────────────┴───────────────┘
102 | </pre>
103 | 
104 | 
105 | 
106 | 
107 | 
108 | ```python
109 | type(explanation)
110 | ```
111 | 
112 | 
113 | 
114 | 
115 |     ibis.expr.types.relations.Table
116 | 
117 | 
118 | 
119 | 
120 | ```python
121 | explanation.execute()
122 | ```
123 | 
124 | 
125 | 
126 | 
127 | <div>
128 | <style scoped>
129 |     .dataframe tbody tr th:only-of-type {
130 |         vertical-align: middle;
131 |     }
132 | 
133 |     .dataframe tbody tr th {
134 |         vertical-align: top;
135 |     }
136 | 
137 |     .dataframe thead th {
138 |         text-align: right;
139 |     }
140 | </style>
141 | <table border="1" class="dataframe">
142 |   <thead>
143 |     <tr style="text-align: right;">
144 |       <th></th>
145 |       <th>year</th>
146 |       <th>category</th>
147 |       <th>inner</th>
148 |       <th>mix</th>
149 |     </tr>
150 |   </thead>
151 |   <tbody>
152 |     <tr>
153 |       <th>0</th>
154 |       <td>2022</td>
155 |       <td>DRESS</td>
156 |       <td>3.931932e+06</td>
157 |       <td>-1.881370e+07</td>
158 |     </tr>
159 |     <tr>
160 |       <th>1</th>
161 |       <td>2022</td>
162 |       <td>JACKET</td>
163 |       <td>-1.510008e+07</td>
164 |       <td>-9.238617e+07</td>
165 |     </tr>
166 |     <tr>
167 |       <th>2</th>
168 |       <td>2022</td>
169 |       <td>PANTS</td>
170 |       <td>4.002506e+07</td>
171 |       <td>5.295190e+07</td>
172 |     </tr>
173 |     <tr>
174 |       <th>3</th>
175 |       <td>2022</td>
176 |       <td>SHIRT</td>
177 |       <td>-1.484809e+06</td>
178 |       <td>-5.791456e+06</td>
179 |     </tr>
180 |     <tr>
181 |       <th>4</th>
182 |       <td>2022</td>
183 |       <td>SWEATER</td>
184 |       <td>-2.676209e+07</td>
185 |       <td>1.181504e+07</td>
186 |     </tr>
187 |     <tr>
188 |       <th>5</th>
189 |       <td>2022</td>
190 |       <td>TSHIRT</td>
191 |       <td>6.650940e+06</td>
192 |       <td>-2.311836e+07</td>
193 |     </tr>
194 |     <tr>
195 |       <th>6</th>
196 |       <td>2023</td>
197 |       <td>DRESS</td>
198 |       <td>-4.078094e+06</td>
199 |       <td>-1.240339e+07</td>
200 |     </tr>
201 |     <tr>
202 |       <th>7</th>
203 |       <td>2023</td>
204 |       <td>JACKET</td>
205 |       <td>-6.793317e+06</td>
206 |       <td>-4.924036e+07</td>
207 |     </tr>
208 |     <tr>
209 |       <th>8</th>
210 |       <td>2023</td>
211 |       <td>PANTS</td>
212 |       <td>-1.636299e+07</td>
213 |       <td>-2.295608e+08</td>
214 |     </tr>
215 |     <tr>
216 |       <th>9</th>
217 |       <td>2023</td>
218 |       <td>SHIRT</td>
219 |       <td>8.920908e+05</td>
220 |       <td>-4.019144e+06</td>
221 |     </tr>
222 |     <tr>
223 |       <th>10</th>
224 |       <td>2023</td>
225 |       <td>SWEATER</td>
226 |       <td>-5.701391e+06</td>
227 |       <td>-1.130507e+08</td>
228 |     </tr>
229 |     <tr>
230 |       <th>11</th>
231 |       <td>2023</td>
232 |       <td>TSHIRT</td>
233 |       <td>-1.150391e+07</td>
234 |       <td>-8.391323e+07</td>
235 |     </tr>
236 |   </tbody>
237 | </table>
238 | </div>
239 | 
240 | 
241 | 
242 | 
243 | ```python
244 | ibis.to_sql(explanation)
245 | ```
246 | 
247 | 
248 | 
249 | 
250 | ```sql
251 | SELECT
252 |   *
253 | FROM (
254 |   SELECT
255 |     "t9"."year",
256 |     "t9"."category",
257 |     "t9"."count_lag" * (
258 |       "t9"."mean" - "t9"."mean_lag"
259 |     ) AS "inner",
260 |     (
261 |       "t9"."count" - "t9"."count_lag"
262 |     ) * "t9"."mean" AS "mix"
263 |   FROM (
264 |     SELECT
265 |       "t8"."category",
266 |       "t8"."year",
267 |       "t8"."mean",
268 |       "t8"."count",
269 |       LAG("t8"."mean", 1) OVER (PARTITION BY "t8"."category" ORDER BY "t8"."year" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "mean_lag",
270 |       LAG("t8"."count", 1) OVER (PARTITION BY "t8"."category" ORDER BY "t8"."year" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "count_lag"
271 |     FROM (
272 |       SELECT
273 |         "t7"."category",
274 |         "t7"."year",
275 |         COALESCE("t7"."mean", 0) AS "mean",
276 |         COALESCE("t7"."count", 0) AS "count"
277 |       FROM (
278 |         SELECT
279 |           "t3"."category",
280 |           "t4"."year",
281 |           "t6"."mean",
282 |           "t6"."count"
283 |         FROM (
284 |           SELECT DISTINCT
285 |             "t0"."category"
286 |           FROM "products" AS "t0"
287 |         ) AS "t3"
288 |         CROSS JOIN (
289 |           SELECT DISTINCT
290 |             "t0"."year"
291 |           FROM "products" AS "t0"
292 |         ) AS "t4"
293 |         LEFT OUTER JOIN (
294 |           SELECT
295 |             "t0"."category",
296 |             "t0"."year",
297 |             SUM("t0"."footprint" * "t0"."units") / SUM("t0"."units") AS "mean",
298 |             SUM("t0"."units") AS "count"
299 |           FROM "products" AS "t0"
300 |           GROUP BY
301 |             1,
302 |             2
303 |         ) AS "t6"
304 |           ON "t3"."category" = "t6"."category" AND "t4"."year" = "t6"."year"
305 |       ) AS "t7"
306 |     ) AS "t8"
307 |   ) AS "t9"
308 |   ORDER BY
309 |     "t9"."year" ASC,
310 |     "t9"."category" ASC
311 | ) AS "t10"
312 | WHERE
313 |   "t10"."year" IS NOT NULL
314 |   AND "t10"."category" IS NOT NULL
315 |   AND "t10"."inner" IS NOT NULL
316 |   AND "t10"."mix" IS NOT NULL
317 | ```
318 | 
319 | 
320 | 


--------------------------------------------------------------------------------
/icanexplain/__init__.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | import functools
  3 | import operator
  4 | 
  5 | import ibis  # type: ignore[import]
  6 | 
  7 | from . import datasets
  8 | 
  9 | __all__ = ["FunnelExplainer", "MeanExplainer", "SumExplainer", "datasets"]
 10 | 
 11 | 
 12 | def cartesian_product(table, columns):
 13 |     return functools.reduce(
 14 |         lambda x, y: x.cross_join(y), [table[[d]].distinct() for d in columns]
 15 |     )
 16 | 
 17 | 
 18 | def is_pandas_dataframe(obj):
 19 |     try:
 20 |         import pandas as pd
 21 | 
 22 |         return isinstance(obj, pd.DataFrame)
 23 |     except ImportError:
 24 |         # pandas is not installed
 25 |         return False
 26 | 
 27 | 
 28 | def is_polars_dataframe(obj):
 29 |     try:
 30 |         import polars as pl  # type: ignore[import]
 31 | 
 32 |         return isinstance(obj, pl.DataFrame)
 33 |     except ImportError:
 34 |         # polars is not installed
 35 |         return False
 36 | 
 37 | 
 38 | def coerce_table(method):
 39 |     @functools.wraps(method)
 40 |     def _impl(self, table):
 41 |         if is_pandas_dataframe(table):
 42 |             return method(self, ibis.memtable(table[self._necessary_columns]))
 43 |         if is_polars_dataframe(table):
 44 |             return method(self, ibis.memtable(table[self._necessary_columns]))
 45 |         return method(self, table)
 46 | 
 47 |     return _impl
 48 | 
 49 | 
 50 | class Unpacker(abc.ABC):
 51 |     def __init__(
 52 |         self,
 53 |         fact: str,
 54 |         period: str | list[str],
 55 |         group: str | list[str] | None = None,
 56 |         count: str | None = None,
 57 |     ):
 58 |         self.fact = fact
 59 |         self.period = [period] if isinstance(period, str) else period
 60 |         self.group = [group] if isinstance(group, str) else (group if group else [])
 61 |         self.count = count
 62 | 
 63 |     @abc.abstractproperty
 64 |     def _necessary_columns(self):
 65 |         pass
 66 | 
 67 |     @abc.abstractmethod
 68 |     def _explanation(self, table: ibis.Table):
 69 |         pass
 70 | 
 71 |     @abc.abstractmethod
 72 |     def _format(self, explanation: ibis.Table):
 73 |         pass
 74 | 
 75 |     def __call__(self, table):
 76 |         explanation = self._explanation(table)
 77 |         explanation_fmt = self._format(explanation)
 78 |         if is_pandas_dataframe(table):
 79 |             return explanation_fmt.execute().set_index([*self.period, *self.group])
 80 |         if is_polars_dataframe(table):
 81 |             return explanation_fmt.execute()
 82 |         return explanation_fmt
 83 | 
 84 |     def plot(self, table):
 85 |         import altair as alt  # type: ignore[import]
 86 |         import pandas as pd
 87 | 
 88 |         explanation = self._explanation(table)
 89 |         if not isinstance(explanation, pd.DataFrame):
 90 |             explanation = explanation.execute()
 91 | 
 92 |         charts = []
 93 |         total = pd.DataFrame({"label": [], "total": []})
 94 | 
 95 |         for i, (period, period_explanation) in enumerate(
 96 |             explanation.sort_values(self.period).groupby(self.period)
 97 |         ):
 98 |             if i > 0:
 99 |                 contributions = pd.concat(
100 |                     [
101 |                         (
102 |                             period_explanation[[*self.period, *self.group, "inner"]]
103 |                             .rename(columns={"inner": "impact"})
104 |                             .assign(kind="inner")
105 |                         ),
106 |                         (
107 |                             period_explanation[[*self.period, *self.group, "mix"]]
108 |                             .rename(columns={"mix": "impact"})
109 |                             .assign(kind="mix")
110 |                         ),
111 |                     ]
112 |                 )
113 |                 prev_total_value = total["total"].iloc[-1]
114 |                 contributions = contributions.sort_values("impact", ascending=False)
115 |                 contributions["end"] = (
116 |                     prev_total_value + contributions["impact"].cumsum()
117 |                 )
118 |                 contributions["start"] = (
119 |                     contributions["end"].shift(1).fillna(prev_total_value)
120 |                 )
121 |                 label_cols = [*self.period, *self.group, "kind"]
122 |                 contributions["label"] = contributions[label_cols].agg(
123 |                     lambda x: " • ".join(x.astype(str)), axis="columns"
124 |                 )
125 |                 contributions["is_positive"] = contributions["impact"] > 0
126 | 
127 |                 chart = (
128 |                     alt.Chart(contributions)
129 |                     .mark_bar()
130 |                     .encode(
131 |                         y=alt.Y("label:O", sort=None, axis=alt.Axis(title=None)),
132 |                         x=alt.X("start:Q", axis=alt.Axis(title=self.fact)),
133 |                         x2="end:Q",
134 |                         color=alt.Color(
135 |                             "is_positive:N",
136 |                             scale=alt.Scale(
137 |                                 domain=[True, False], range=["green", "red"]
138 |                             ),
139 |                             legend=None,
140 |                         ),
141 |                         tooltip=[*label_cols, "impact"],
142 |                     )
143 |                 )
144 |                 charts.append(chart)
145 | 
146 |             total = pd.DataFrame(
147 |                 {
148 |                     "label": [period],
149 |                     "total": (
150 |                         [
151 |                             (
152 |                                 period_explanation["count"]
153 |                                 * period_explanation["ratio"]
154 |                             ).sum()
155 |                             / period_explanation["count"].sum()
156 |                         ]
157 |                         if isinstance(self, MeanExplainer)
158 |                         else [(period_explanation["total"]).sum()]
159 |                     ),
160 |                 }
161 |             )
162 | 
163 |             chart = (
164 |                 alt.Chart(total)
165 |                 .mark_bar()
166 |                 .encode(x="total:Q", y=alt.X("label:O", sort=None), tooltip=["total"])
167 |             )
168 |             charts.append(chart)
169 | 
170 |         return alt.layer(*charts).interactive()
171 | 
172 | 
173 | class SumExplainer(Unpacker):
174 |     @property
175 |     def _necessary_columns(self):
176 |         return [
177 |             self.fact,
178 |             *self.period,
179 |             *self.group,
180 |             *([self.count] if self.count else []),
181 |         ]
182 | 
183 |     @coerce_table
184 |     def _explanation(self, table):
185 |         explanation = table.aggregate(
186 |             by=[*self.group, *self.period],
187 |             mean=(
188 |                 (table[self.fact] * table[self.count]).sum() / table[self.count].sum()
189 |                 if self.count
190 |                 else table[self.fact].mean()
191 |             ),
192 |             count=(table[self.count].sum() if self.count else table[self.fact].count()),
193 |         )
194 | 
195 |         # Artificially add rows with 0s when there are no data points for a given group at a given
196 |         # period. For instance, there might not be any dentist claims in 2022, but if there some in
197 |         # 2021, then we want to have a 0 recorded so that we can measure the difference.
198 |         cart = cartesian_product(table, [*self.group, *self.period])
199 |         explanation = cart.left_join(explanation, cart.columns)[explanation.columns]
200 |         explanation = explanation.mutate(
201 |             mean=explanation["mean"].fill_null(0),
202 |             count=explanation["count"].fill_null(0),
203 |         )
204 | 
205 |         # Calculate lag values
206 |         # Usually one or more group keys are provided. But if they aren't, there is no need to
207 |         # aggregate the data by the group keys. In this case, we can just calculate the lag values
208 |         # along the period column.
209 |         # TODO: it would be nice to have a more elegant way to handle this.
210 |         if lag_key := [*self.group, *self.period[1:]]:
211 |             explanation = (
212 |                 explanation.group_by(lag_key)
213 |                 .order_by(self.period)
214 |                 .mutate(
215 |                     mean_lag=explanation["mean"].lag(1),
216 |                     count_lag=explanation["count"].lag(1),
217 |                 )
218 |             )
219 |         else:
220 |             explanation = explanation.order_by(self.period).mutate(
221 |                 mean_lag=explanation["mean"].lag(1),
222 |                 count_lag=explanation["count"].lag(1),
223 |             )
224 | 
225 |         # Calculate the inner and mix effects
226 |         return explanation.mutate(
227 |             total=explanation["count"] * explanation["mean"],
228 |             inner=explanation["count_lag"]
229 |             * (explanation["mean"] - explanation["mean_lag"]),
230 |             mix=(explanation["count"] - explanation["count_lag"]) * explanation["mean"],
231 |         )
232 | 
233 |     def _format(self, explanation):
234 |         return (
235 |             explanation.order_by([*self.period, *self.group])
236 |             .select([*self.period, *self.group, "inner", "mix"])
237 |             .drop_null(how="any")
238 |         )
239 | 
240 | 
241 | class MeanExplainer(Unpacker):
242 |     @property
243 |     def _necessary_columns(self):
244 |         return [
245 |             self.fact,
246 |             *self.period,
247 |             *self.group,
248 |             *([self.count] if self.count else []),
249 |         ]
250 | 
251 |     @coerce_table
252 |     def _explanation(self, table):
253 |         explanation = table.aggregate(
254 |             by=[*self.group, *self.period],
255 |             sum=(
256 |                 (table[self.fact] * table[self.count]).sum()
257 |                 if self.count
258 |                 else table[self.fact].sum()
259 |             ),
260 |             count=(table[self.count].sum() if self.count else table[self.fact].count()),
261 |         )
262 | 
263 |         # Artificially add rows with 0s when there are no data points for a given group at a given
264 |         # period. For instance, there might not be any dentist claims in 2022, but if there some in
265 |         # 2021, then we want to have a 0 recorded so that we can measure the difference.
266 |         cart = cartesian_product(table, [*self.group, *self.period])
267 |         explanation = cart.left_join(explanation, cart.columns)[explanation.columns]
268 |         explanation = explanation.mutate(
269 |             sum=explanation["sum"].fill_null(0), count=explanation["count"].fill_null(0)
270 |         )
271 |         explanation = explanation.mutate(
272 |             # https://ibis-project.org/reference/expression-generic#ibis.expr.types.generic.Value.nullif
273 |             ratio=(explanation["sum"] / explanation["count"].nullif(0)).fill_null(0)
274 |         )
275 |         explanation = explanation.mutate(ratio=explanation["ratio"])
276 | 
277 |         yearly_figures = explanation.group_by(self.period).aggregate(
278 |             sum_sum=explanation["sum"].sum(), count_sum=explanation["count"].sum()
279 |         )
280 |         explanation = explanation.left_join(yearly_figures, self.period)
281 |         explanation = explanation.mutate(
282 |             share=explanation["count"] / explanation["count_sum"],
283 |             global_ratio=explanation["sum_sum"] / explanation["count_sum"],
284 |         )
285 | 
286 |         # Calculate lag values
287 |         # 🐲 It's a bit tricky, but in cases where more than one period column is provided, they
288 |         # it affects the lag calculation. For instance, if we have year and month, then we want to
289 |         # calculate the lag for the same month in the previous year. This only applies to the case
290 |         # where the period is a list of columns.
291 |         if by := [*self.group, *self.period[1:]]:
292 |             explanation = (
293 |                 explanation.group_by(by)
294 |                 .order_by(self.period)
295 |                 .mutate(
296 |                     ratio_lag=explanation["ratio"].lag(1),
297 |                     share_lag=explanation["share"].lag(1),
298 |                     global_ratio_lag=explanation["global_ratio"].lag(1),
299 |                 )
300 |             )
301 |         else:
302 |             explanation = explanation.order_by(self.period).mutate(
303 |                 ratio_lag=explanation["ratio"].lag(1),
304 |                 share_lag=explanation["share"].lag(1),
305 |                 global_ratio_lag=explanation["global_ratio"].lag(1),
306 |             )
307 | 
308 |         # Calculate the inner and mix effects
309 |         return explanation.mutate(
310 |             inner=explanation["share"]
311 |             * (explanation["ratio"] - explanation["ratio_lag"]),
312 |             mix=(explanation["share"] - explanation["share_lag"])
313 |             * (explanation["ratio_lag"] - explanation["global_ratio"]),
314 |         )
315 | 
316 |     def _format(self, explanation):
317 |         return (
318 |             explanation.order_by([*self.period, *self.group])
319 |             .select([*self.period, *self.group, "inner", "mix"])
320 |             .drop_null(how="any")
321 |         )
322 | 
323 | 
324 | class FunnelExplainer(Unpacker):
325 |     def __init__(
326 |         self, funnel: list[str], period: str | list[str], group: str | list[str]
327 |     ):
328 |         self.funnel = funnel
329 |         self.period = [period] if isinstance(period, str) else period
330 |         self.group = [group] if isinstance(group, str) else group
331 | 
332 |     @property
333 |     def _necessary_columns(self):
334 |         return [*self.funnel, *self.period, *self.group]
335 | 
336 |     @coerce_table
337 |     def _explanation(self, table):
338 |         # Sum events by period and dimensions
339 |         explanation = table.group_by([*self.period, *self.group]).aggregate(
340 |             **{step: table[step].sum() for step in self.funnel}
341 |         )
342 |         ratios = {
343 |             (f"{num}_over_{den}" if den else num): (num, den)
344 |             for den, num in [(None, self.funnel[0]), *zip(self.funnel, self.funnel[1:])]
345 |         }
346 |         ratio_names = list(ratios)
347 | 
348 |         explanation = explanation.mutate(
349 |             **{
350 |                 ratio_name: explanation[num] / explanation[den]
351 |                 for ratio_name, (num, den) in ratios.items()
352 |                 if den
353 |             }
354 |         )
355 | 
356 |         explanation = (
357 |             explanation.group_by([*self.group, *self.period[1:]])
358 |             .order_by(self.period)
359 |             .mutate(
360 |                 **{
361 |                     f"{ratio_name}_lag": explanation[ratio_name].lag(1)
362 |                     for ratio_name in ratios
363 |                 }
364 |             )
365 |         )
366 | 
367 |         explanation = explanation.mutate(
368 |             **{
369 |                 f"{ratio_name}_contribution": functools.reduce(
370 |                     operator.mul,
371 |                     [
372 |                         *[explanation[ratio_name] for ratio_name in ratio_names[:i]],
373 |                         explanation[ratio_names[i]]
374 |                         - explanation[f"{ratio_names[i]}_lag"],
375 |                         *[
376 |                             explanation[f"{ratio_name}_lag"]
377 |                             for ratio_name in ratio_names[i + 1 :]
378 |                         ],
379 |                     ],
380 |                 )
381 |                 for i, ratio_name in enumerate(ratio_names)
382 |             }
383 |         )
384 | 
385 |         return explanation
386 | 
387 |     def _format(self, explanation):
388 |         return (
389 |             explanation.order_by([*self.period, *self.group])
390 |             .select(
391 |                 [
392 |                     *self.period,
393 |                     *self.group,
394 |                     *[
395 |                         col
396 |                         for col in explanation.schema()
397 |                         if col.endswith("_contribution")
398 |                     ],
399 |                 ]
400 |             )
401 |             .drop_null(how="any")
402 |         )
403 | 


--------------------------------------------------------------------------------
/docs/examples/simple-revenue-funnel.md:
--------------------------------------------------------------------------------
  1 | # Simple revenue funnel 🛒
  2 | 
  3 | We look at a toy website funnel in this example. Imagine a fictitious website that sells stuff. Users go to the website, are presented with items, can add them to their cart, and then can buy them.
  4 | 
  5 | 
  6 | ```python
  7 | import pandas as pd
  8 | import locale
  9 | 
 10 | locale.setlocale(locale.LC_MONETARY, 'en_US.UTF-8')
 11 | def fmt_currency(x):
 12 |     return locale.currency(x, grouping=True)
 13 | 
 14 | traffic = pd.DataFrame({
 15 |     'date': ['2018-01-01', '2018-01-01', '2018-01-01', '2019-01-01', '2019-01-01', '2019-01-01', '2018-02-01', '2018-02-01', '2018-02-01', '2019-02-01', '2019-02-01', '2019-02-01'],
 16 |     'group': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],
 17 |     'impressions': [1000, 2000, 2500, 1000, 2150, 2000, 50, 2000, 2500, 2500, 2150, 2000],
 18 |     'clicks': [150, 150, 250, 120, 200, 400, 20, 300, 250, 1000, 323, 320],
 19 |     'conversions': [120, 150, 125, 160, 145, 166, 10, 150, 125, 500, 145, 166],
 20 |     'revenue': ['$8,600', '$9,400', '$10,750', '$9,055', '$8,739', '$10,147', '$500', '$11,400', '$8,750', '$50,000', '$10,739', '$12,147'],
 21 | })
 22 | traffic['date'] = pd.to_datetime(traffic['date'])
 23 | traffic['revenue'] = traffic['revenue'].str.replace('$', '', regex=False).str.replace(',', '', regex=False).astype(float)
 24 | traffic.style.format({'revenue': fmt_currency, 'date': lambda x: x.strftime('%Y-%m-%d')}, na_rep='N/A')
 25 | ```
 26 | 
 27 | 
 28 | 
 29 | 
 30 | <style type="text/css">
 31 | </style>
 32 | <table id="T_40dea">
 33 |   <thead>
 34 |     <tr>
 35 |       <th class="blank level0" >&nbsp;</th>
 36 |       <th id="T_40dea_level0_col0" class="col_heading level0 col0" >date</th>
 37 |       <th id="T_40dea_level0_col1" class="col_heading level0 col1" >group</th>
 38 |       <th id="T_40dea_level0_col2" class="col_heading level0 col2" >impressions</th>
 39 |       <th id="T_40dea_level0_col3" class="col_heading level0 col3" >clicks</th>
 40 |       <th id="T_40dea_level0_col4" class="col_heading level0 col4" >conversions</th>
 41 |       <th id="T_40dea_level0_col5" class="col_heading level0 col5" >revenue</th>
 42 |     </tr>
 43 |   </thead>
 44 |   <tbody>
 45 |     <tr>
 46 |       <th id="T_40dea_level0_row0" class="row_heading level0 row0" >0</th>
 47 |       <td id="T_40dea_row0_col0" class="data row0 col0" >2018-01-01</td>
 48 |       <td id="T_40dea_row0_col1" class="data row0 col1" >A</td>
 49 |       <td id="T_40dea_row0_col2" class="data row0 col2" >1000</td>
 50 |       <td id="T_40dea_row0_col3" class="data row0 col3" >150</td>
 51 |       <td id="T_40dea_row0_col4" class="data row0 col4" >120</td>
 52 |       <td id="T_40dea_row0_col5" class="data row0 col5" >$8,600.00</td>
 53 |     </tr>
 54 |     <tr>
 55 |       <th id="T_40dea_level0_row1" class="row_heading level0 row1" >1</th>
 56 |       <td id="T_40dea_row1_col0" class="data row1 col0" >2018-01-01</td>
 57 |       <td id="T_40dea_row1_col1" class="data row1 col1" >B</td>
 58 |       <td id="T_40dea_row1_col2" class="data row1 col2" >2000</td>
 59 |       <td id="T_40dea_row1_col3" class="data row1 col3" >150</td>
 60 |       <td id="T_40dea_row1_col4" class="data row1 col4" >150</td>
 61 |       <td id="T_40dea_row1_col5" class="data row1 col5" >$9,400.00</td>
 62 |     </tr>
 63 |     <tr>
 64 |       <th id="T_40dea_level0_row2" class="row_heading level0 row2" >2</th>
 65 |       <td id="T_40dea_row2_col0" class="data row2 col0" >2018-01-01</td>
 66 |       <td id="T_40dea_row2_col1" class="data row2 col1" >C</td>
 67 |       <td id="T_40dea_row2_col2" class="data row2 col2" >2500</td>
 68 |       <td id="T_40dea_row2_col3" class="data row2 col3" >250</td>
 69 |       <td id="T_40dea_row2_col4" class="data row2 col4" >125</td>
 70 |       <td id="T_40dea_row2_col5" class="data row2 col5" >$10,750.00</td>
 71 |     </tr>
 72 |     <tr>
 73 |       <th id="T_40dea_level0_row3" class="row_heading level0 row3" >3</th>
 74 |       <td id="T_40dea_row3_col0" class="data row3 col0" >2019-01-01</td>
 75 |       <td id="T_40dea_row3_col1" class="data row3 col1" >A</td>
 76 |       <td id="T_40dea_row3_col2" class="data row3 col2" >1000</td>
 77 |       <td id="T_40dea_row3_col3" class="data row3 col3" >120</td>
 78 |       <td id="T_40dea_row3_col4" class="data row3 col4" >160</td>
 79 |       <td id="T_40dea_row3_col5" class="data row3 col5" >$9,055.00</td>
 80 |     </tr>
 81 |     <tr>
 82 |       <th id="T_40dea_level0_row4" class="row_heading level0 row4" >4</th>
 83 |       <td id="T_40dea_row4_col0" class="data row4 col0" >2019-01-01</td>
 84 |       <td id="T_40dea_row4_col1" class="data row4 col1" >B</td>
 85 |       <td id="T_40dea_row4_col2" class="data row4 col2" >2150</td>
 86 |       <td id="T_40dea_row4_col3" class="data row4 col3" >200</td>
 87 |       <td id="T_40dea_row4_col4" class="data row4 col4" >145</td>
 88 |       <td id="T_40dea_row4_col5" class="data row4 col5" >$8,739.00</td>
 89 |     </tr>
 90 |     <tr>
 91 |       <th id="T_40dea_level0_row5" class="row_heading level0 row5" >5</th>
 92 |       <td id="T_40dea_row5_col0" class="data row5 col0" >2019-01-01</td>
 93 |       <td id="T_40dea_row5_col1" class="data row5 col1" >C</td>
 94 |       <td id="T_40dea_row5_col2" class="data row5 col2" >2000</td>
 95 |       <td id="T_40dea_row5_col3" class="data row5 col3" >400</td>
 96 |       <td id="T_40dea_row5_col4" class="data row5 col4" >166</td>
 97 |       <td id="T_40dea_row5_col5" class="data row5 col5" >$10,147.00</td>
 98 |     </tr>
 99 |     <tr>
100 |       <th id="T_40dea_level0_row6" class="row_heading level0 row6" >6</th>
101 |       <td id="T_40dea_row6_col0" class="data row6 col0" >2018-02-01</td>
102 |       <td id="T_40dea_row6_col1" class="data row6 col1" >A</td>
103 |       <td id="T_40dea_row6_col2" class="data row6 col2" >50</td>
104 |       <td id="T_40dea_row6_col3" class="data row6 col3" >20</td>
105 |       <td id="T_40dea_row6_col4" class="data row6 col4" >10</td>
106 |       <td id="T_40dea_row6_col5" class="data row6 col5" >$500.00</td>
107 |     </tr>
108 |     <tr>
109 |       <th id="T_40dea_level0_row7" class="row_heading level0 row7" >7</th>
110 |       <td id="T_40dea_row7_col0" class="data row7 col0" >2018-02-01</td>
111 |       <td id="T_40dea_row7_col1" class="data row7 col1" >B</td>
112 |       <td id="T_40dea_row7_col2" class="data row7 col2" >2000</td>
113 |       <td id="T_40dea_row7_col3" class="data row7 col3" >300</td>
114 |       <td id="T_40dea_row7_col4" class="data row7 col4" >150</td>
115 |       <td id="T_40dea_row7_col5" class="data row7 col5" >$11,400.00</td>
116 |     </tr>
117 |     <tr>
118 |       <th id="T_40dea_level0_row8" class="row_heading level0 row8" >8</th>
119 |       <td id="T_40dea_row8_col0" class="data row8 col0" >2018-02-01</td>
120 |       <td id="T_40dea_row8_col1" class="data row8 col1" >C</td>
121 |       <td id="T_40dea_row8_col2" class="data row8 col2" >2500</td>
122 |       <td id="T_40dea_row8_col3" class="data row8 col3" >250</td>
123 |       <td id="T_40dea_row8_col4" class="data row8 col4" >125</td>
124 |       <td id="T_40dea_row8_col5" class="data row8 col5" >$8,750.00</td>
125 |     </tr>
126 |     <tr>
127 |       <th id="T_40dea_level0_row9" class="row_heading level0 row9" >9</th>
128 |       <td id="T_40dea_row9_col0" class="data row9 col0" >2019-02-01</td>
129 |       <td id="T_40dea_row9_col1" class="data row9 col1" >A</td>
130 |       <td id="T_40dea_row9_col2" class="data row9 col2" >2500</td>
131 |       <td id="T_40dea_row9_col3" class="data row9 col3" >1000</td>
132 |       <td id="T_40dea_row9_col4" class="data row9 col4" >500</td>
133 |       <td id="T_40dea_row9_col5" class="data row9 col5" >$50,000.00</td>
134 |     </tr>
135 |     <tr>
136 |       <th id="T_40dea_level0_row10" class="row_heading level0 row10" >10</th>
137 |       <td id="T_40dea_row10_col0" class="data row10 col0" >2019-02-01</td>
138 |       <td id="T_40dea_row10_col1" class="data row10 col1" >B</td>
139 |       <td id="T_40dea_row10_col2" class="data row10 col2" >2150</td>
140 |       <td id="T_40dea_row10_col3" class="data row10 col3" >323</td>
141 |       <td id="T_40dea_row10_col4" class="data row10 col4" >145</td>
142 |       <td id="T_40dea_row10_col5" class="data row10 col5" >$10,739.00</td>
143 |     </tr>
144 |     <tr>
145 |       <th id="T_40dea_level0_row11" class="row_heading level0 row11" >11</th>
146 |       <td id="T_40dea_row11_col0" class="data row11 col0" >2019-02-01</td>
147 |       <td id="T_40dea_row11_col1" class="data row11 col1" >C</td>
148 |       <td id="T_40dea_row11_col2" class="data row11 col2" >2000</td>
149 |       <td id="T_40dea_row11_col3" class="data row11 col3" >320</td>
150 |       <td id="T_40dea_row11_col4" class="data row11 col4" >166</td>
151 |       <td id="T_40dea_row11_col5" class="data row11 col5" >$12,147.00</td>
152 |     </tr>
153 |   </tbody>
154 | </table>
155 | 
156 | 
157 | 
158 | 
159 | The users are bucketed into 3 groups: A, B, C. We've also bucketed impressions/clicks/conversions/revenue figures by month of the year.
160 | 
161 | We're interested in understanding how the metrics evolve over time. The basic method is to calculate each metric separately. To keep things simple, we can do this for each year.
162 | 
163 | 
164 | ```python
165 | pd.DataFrame({
166 |     'impressions': (
167 |         traffic
168 |         .assign(year=traffic.date.dt.year)
169 |         .groupby('year')
170 |         .impressions.sum()
171 |     ),
172 |     'click_rate': (
173 |         traffic
174 |         .assign(year=traffic.date.dt.year)
175 |         .groupby('year')
176 |         .apply(lambda x: x.clicks.sum() / x.impressions.sum(), include_groups=False)
177 |     ),
178 |     'conversion_rate': (
179 |         traffic
180 |         .assign(year=traffic.date.dt.year)
181 |         .groupby('year')
182 |         .apply(lambda x: x.conversions.sum() / x.clicks.sum(), include_groups=False)
183 |     ),
184 |     'average_spend': (
185 |         traffic
186 |         .assign(year=traffic.date.dt.year)
187 |         .groupby('year')
188 |         .apply(lambda x: x.revenue.sum() / x.conversions.sum(), include_groups=False)
189 |     ),
190 |     'revenue': (
191 |         traffic
192 |         .assign(year=traffic.date.dt.year)
193 |         .groupby('year')
194 |         .revenue.sum()
195 |     )
196 | }).style.format({'average_spend': fmt_currency, 'revenue': fmt_currency}, na_rep='')
197 | ```
198 | 
199 | 
200 | 
201 | 
202 | <style type="text/css">
203 | </style>
204 | <table id="T_d8fca">
205 |   <thead>
206 |     <tr>
207 |       <th class="blank level0" >&nbsp;</th>
208 |       <th id="T_d8fca_level0_col0" class="col_heading level0 col0" >impressions</th>
209 |       <th id="T_d8fca_level0_col1" class="col_heading level0 col1" >click_rate</th>
210 |       <th id="T_d8fca_level0_col2" class="col_heading level0 col2" >conversion_rate</th>
211 |       <th id="T_d8fca_level0_col3" class="col_heading level0 col3" >average_spend</th>
212 |       <th id="T_d8fca_level0_col4" class="col_heading level0 col4" >revenue</th>
213 |     </tr>
214 |     <tr>
215 |       <th class="index_name level0" >year</th>
216 |       <th class="blank col0" >&nbsp;</th>
217 |       <th class="blank col1" >&nbsp;</th>
218 |       <th class="blank col2" >&nbsp;</th>
219 |       <th class="blank col3" >&nbsp;</th>
220 |       <th class="blank col4" >&nbsp;</th>
221 |     </tr>
222 |   </thead>
223 |   <tbody>
224 |     <tr>
225 |       <th id="T_d8fca_level0_row0" class="row_heading level0 row0" >2018</th>
226 |       <td id="T_d8fca_row0_col0" class="data row0 col0" >10050</td>
227 |       <td id="T_d8fca_row0_col1" class="data row0 col1" >0.111443</td>
228 |       <td id="T_d8fca_row0_col2" class="data row0 col2" >0.607143</td>
229 |       <td id="T_d8fca_row0_col3" class="data row0 col3" >$72.65</td>
230 |       <td id="T_d8fca_row0_col4" class="data row0 col4" >$49,400.00</td>
231 |     </tr>
232 |     <tr>
233 |       <th id="T_d8fca_level0_row1" class="row_heading level0 row1" >2019</th>
234 |       <td id="T_d8fca_row1_col0" class="data row1 col0" >11800</td>
235 |       <td id="T_d8fca_row1_col1" class="data row1 col1" >0.200254</td>
236 |       <td id="T_d8fca_row1_col2" class="data row1 col2" >0.542531</td>
237 |       <td id="T_d8fca_row1_col3" class="data row1 col3" >$78.65</td>
238 |       <td id="T_d8fca_row1_col4" class="data row1 col4" >$100,827.00</td>
239 |     </tr>
240 |   </tbody>
241 | </table>
242 | 
243 | 
244 | 
245 | 
246 | In and of itself, this is already quite interesting. However, what we really want to know is how the change of each metric contributes to the change in revenue. This is where icanexplain comes in.
247 | 
248 | 
249 | ```python
250 | import icanexplain as ice
251 | 
252 | explainer = ice.FunnelExplainer(
253 |     funnel=['impressions', 'clicks', 'conversions', 'revenue'],
254 |     period='year',
255 |     group=['month', 'group']
256 | )
257 | traffic = traffic.assign(
258 |     month=traffic.date.dt.month,
259 |     year=traffic.date.dt.year
260 | )
261 | explanation = explainer(traffic)
262 | explanation.style.format(fmt_currency).set_properties(**{'text-align': 'right'})
263 | ```
264 | 
265 | 
266 | 
267 | 
268 | <style type="text/css">
269 | #T_05286_row0_col0, #T_05286_row0_col1, #T_05286_row0_col2, #T_05286_row0_col3, #T_05286_row1_col0, #T_05286_row1_col1, #T_05286_row1_col2, #T_05286_row1_col3, #T_05286_row2_col0, #T_05286_row2_col1, #T_05286_row2_col2, #T_05286_row2_col3, #T_05286_row3_col0, #T_05286_row3_col1, #T_05286_row3_col2, #T_05286_row3_col3, #T_05286_row4_col0, #T_05286_row4_col1, #T_05286_row4_col2, #T_05286_row4_col3, #T_05286_row5_col0, #T_05286_row5_col1, #T_05286_row5_col2, #T_05286_row5_col3 {
270 |   text-align: right;
271 | }
272 | </style>
273 | <table id="T_05286">
274 |   <thead>
275 |     <tr>
276 |       <th class="blank" >&nbsp;</th>
277 |       <th class="blank" >&nbsp;</th>
278 |       <th class="blank level0" >&nbsp;</th>
279 |       <th id="T_05286_level0_col0" class="col_heading level0 col0" >impressions_contribution</th>
280 |       <th id="T_05286_level0_col1" class="col_heading level0 col1" >clicks_over_impressions_contribution</th>
281 |       <th id="T_05286_level0_col2" class="col_heading level0 col2" >conversions_over_clicks_contribution</th>
282 |       <th id="T_05286_level0_col3" class="col_heading level0 col3" >revenue_over_conversions_contribution</th>
283 |     </tr>
284 |     <tr>
285 |       <th class="index_name level0" >year</th>
286 |       <th class="index_name level1" >month</th>
287 |       <th class="index_name level2" >group</th>
288 |       <th class="blank col0" >&nbsp;</th>
289 |       <th class="blank col1" >&nbsp;</th>
290 |       <th class="blank col2" >&nbsp;</th>
291 |       <th class="blank col3" >&nbsp;</th>
292 |     </tr>
293 |   </thead>
294 |   <tbody>
295 |     <tr>
296 |       <th id="T_05286_level0_row0" class="row_heading level0 row0" rowspan="6">2019</th>
297 |       <th id="T_05286_level1_row0" class="row_heading level1 row0" rowspan="3">1</th>
298 |       <th id="T_05286_level2_row0" class="row_heading level2 row0" >A</th>
299 |       <td id="T_05286_row0_col0" class="data row0 col0" >$0.00</td>
300 |       <td id="T_05286_row0_col1" class="data row0 col1" >-$1,720.00</td>
301 |       <td id="T_05286_row0_col2" class="data row0 col2" >$4,586.67</td>
302 |       <td id="T_05286_row0_col3" class="data row0 col3" >-$2,411.67</td>
303 |     </tr>
304 |     <tr>
305 |       <th id="T_05286_level2_row1" class="row_heading level2 row1" >B</th>
306 |       <td id="T_05286_row1_col0" class="data row1 col0" >$705.00</td>
307 |       <td id="T_05286_row1_col1" class="data row1 col1" >$2,428.33</td>
308 |       <td id="T_05286_row1_col2" class="data row1 col2" >-$3,446.67</td>
309 |       <td id="T_05286_row1_col3" class="data row1 col3" >-$347.67</td>
310 |     </tr>
311 |     <tr>
312 |       <th id="T_05286_level2_row2" class="row_heading level2 row2" >C</th>
313 |       <td id="T_05286_row2_col0" class="data row2 col0" >-$2,150.00</td>
314 |       <td id="T_05286_row2_col1" class="data row2 col1" >$8,600.00</td>
315 |       <td id="T_05286_row2_col2" class="data row2 col2" >-$2,924.00</td>
316 |       <td id="T_05286_row2_col3" class="data row2 col3" >-$4,129.00</td>
317 |     </tr>
318 |     <tr>
319 |       <th id="T_05286_level1_row3" class="row_heading level1 row3" rowspan="3">2</th>
320 |       <th id="T_05286_level2_row3" class="row_heading level2 row3" >A</th>
321 |       <td id="T_05286_row3_col0" class="data row3 col0" >$24,500.00</td>
322 |       <td id="T_05286_row3_col1" class="data row3 col1" >$0.00</td>
323 |       <td id="T_05286_row3_col2" class="data row3 col2" >$0.00</td>
324 |       <td id="T_05286_row3_col3" class="data row3 col3" >$25,000.00</td>
325 |     </tr>
326 |     <tr>
327 |       <th id="T_05286_level2_row4" class="row_heading level2 row4" >B</th>
328 |       <td id="T_05286_row4_col0" class="data row4 col0" >$855.00</td>
329 |       <td id="T_05286_row4_col1" class="data row4 col1" >$19.00</td>
330 |       <td id="T_05286_row4_col2" class="data row4 col2" >-$1,254.00</td>
331 |       <td id="T_05286_row4_col3" class="data row4 col3" >-$281.00</td>
332 |     </tr>
333 |     <tr>
334 |       <th id="T_05286_level2_row5" class="row_heading level2 row5" >C</th>
335 |       <td id="T_05286_row5_col0" class="data row5 col0" >-$1,750.00</td>
336 |       <td id="T_05286_row5_col1" class="data row5 col1" >$4,200.00</td>
337 |       <td id="T_05286_row5_col2" class="data row5 col2" >$420.00</td>
338 |       <td id="T_05286_row5_col3" class="data row5 col3" >$527.00</td>
339 |     </tr>
340 |   </tbody>
341 | </table>
342 | 
343 | 
344 | 
345 | 
346 | This is powerful, because it allows us to understand the drivers of revenue growth. For example, between January 2018 and January 2019, revenue went up by $8,600 due an increase in clicks for group C. This is more insightful than just saying that their click rate went up.
347 | 
348 | One thing to keep in mind is that contributions sum up to the overall difference between two periods. This means that it's easy to unit test that the contributions are correct:
349 | 
350 | 
351 | ```python
352 | (
353 |     explanation
354 |     .groupby('year').sum().sum(axis=1)
355 |     .to_frame('sum')
356 |     .style.format(fmt_currency)
357 | )
358 | ```
359 | 
360 | 
361 | 
362 | 
363 | <style type="text/css">
364 | </style>
365 | <table id="T_3fdca">
366 |   <thead>
367 |     <tr>
368 |       <th class="blank level0" >&nbsp;</th>
369 |       <th id="T_3fdca_level0_col0" class="col_heading level0 col0" >sum</th>
370 |     </tr>
371 |     <tr>
372 |       <th class="index_name level0" >year</th>
373 |       <th class="blank col0" >&nbsp;</th>
374 |     </tr>
375 |   </thead>
376 |   <tbody>
377 |     <tr>
378 |       <th id="T_3fdca_level0_row0" class="row_heading level0 row0" >2019</th>
379 |       <td id="T_3fdca_row0_col0" class="data row0 col0" >$51,427.00</td>
380 |     </tr>
381 |   </tbody>
382 | </table>
383 | 
384 | 
385 | 
386 | 
387 | Of course, it would be more interesting to apply this methodology to some real data. One example is the [Google Analytics dataset sample](https://developers.google.com/analytics/bigquery/web-ecommerce-demo-dataset) which is publicly available in BigQuery. 
388 | 


--------------------------------------------------------------------------------
/docs/examples/fashion-brand-co2e.md:
--------------------------------------------------------------------------------
  1 | # Fashion brand CO2e emissions 👟
  2 | 
  3 | Fashion brands increasingly have to be aware and report on their environmental footprint.
  4 | 
  5 | The following dataset comes from a real fashion brand, and has been anomymized. Each row represents a product manufactured in a given year.
  6 | 
  7 | 
  8 | ```python
  9 | import icanexplain as ice
 10 | 
 11 | def fmt_CO2e(kg):
 12 |     if abs(kg) < 1e3:
 13 |         return f'{kg:,.2f}kgCO2e'
 14 |     return f'{kg / 1e6:,.1f}ktCO2e'
 15 | 
 16 | products = ice.datasets.load_product_footprints()
 17 | products.sample(5).style.format({'footprint': fmt_CO2e, 'units': '{:,d}'})
 18 | ```
 19 | 
 20 | 
 21 | 
 22 | 
 23 | <style type="text/css">
 24 | </style>
 25 | <table id="T_c2425">
 26 |   <thead>
 27 |     <tr>
 28 |       <th class="blank level0" >&nbsp;</th>
 29 |       <th id="T_c2425_level0_col0" class="col_heading level0 col0" >year</th>
 30 |       <th id="T_c2425_level0_col1" class="col_heading level0 col1" >category</th>
 31 |       <th id="T_c2425_level0_col2" class="col_heading level0 col2" >product_id</th>
 32 |       <th id="T_c2425_level0_col3" class="col_heading level0 col3" >footprint</th>
 33 |       <th id="T_c2425_level0_col4" class="col_heading level0 col4" >units</th>
 34 |     </tr>
 35 |   </thead>
 36 |   <tbody>
 37 |     <tr>
 38 |       <th id="T_c2425_level0_row0" class="row_heading level0 row0" >90512</th>
 39 |       <td id="T_c2425_row0_col0" class="data row0 col0" >2022</td>
 40 |       <td id="T_c2425_row0_col1" class="data row0 col1" >TSHIRT</td>
 41 |       <td id="T_c2425_row0_col2" class="data row0 col2" >cea26442</td>
 42 |       <td id="T_c2425_row0_col3" class="data row0 col3" >7.62kgCO2e</td>
 43 |       <td id="T_c2425_row0_col4" class="data row0 col4" >1,486</td>
 44 |     </tr>
 45 |     <tr>
 46 |       <th id="T_c2425_level0_row1" class="row_heading level0 row1" >46075</th>
 47 |       <td id="T_c2425_row1_col0" class="data row1 col0" >2022</td>
 48 |       <td id="T_c2425_row1_col1" class="data row1 col1" >JACKET</td>
 49 |       <td id="T_c2425_row1_col2" class="data row1 col2" >d17ec415</td>
 50 |       <td id="T_c2425_row1_col3" class="data row1 col3" >38.43kgCO2e</td>
 51 |       <td id="T_c2425_row1_col4" class="data row1 col4" >2,254</td>
 52 |     </tr>
 53 |     <tr>
 54 |       <th id="T_c2425_level0_row2" class="row_heading level0 row2" >51849</th>
 55 |       <td id="T_c2425_row2_col0" class="data row2 col0" >2022</td>
 56 |       <td id="T_c2425_row2_col1" class="data row2 col1" >PANTS</td>
 57 |       <td id="T_c2425_row2_col2" class="data row2 col2" >d5531c9b</td>
 58 |       <td id="T_c2425_row2_col3" class="data row2 col3" >41.55kgCO2e</td>
 59 |       <td id="T_c2425_row2_col4" class="data row2 col4" >8</td>
 60 |     </tr>
 61 |     <tr>
 62 |       <th id="T_c2425_level0_row3" class="row_heading level0 row3" >12818</th>
 63 |       <td id="T_c2425_row3_col0" class="data row3 col0" >2021</td>
 64 |       <td id="T_c2425_row3_col1" class="data row3 col1" >PANTS</td>
 65 |       <td id="T_c2425_row3_col2" class="data row3 col2" >335f31e3</td>
 66 |       <td id="T_c2425_row3_col3" class="data row3 col3" >13.53kgCO2e</td>
 67 |       <td id="T_c2425_row3_col4" class="data row3 col4" >4</td>
 68 |     </tr>
 69 |     <tr>
 70 |       <th id="T_c2425_level0_row4" class="row_heading level0 row4" >64870</th>
 71 |       <td id="T_c2425_row4_col0" class="data row4 col0" >2022</td>
 72 |       <td id="T_c2425_row4_col1" class="data row4 col1" >PANTS</td>
 73 |       <td id="T_c2425_row4_col2" class="data row4 col2" >e5562fe8</td>
 74 |       <td id="T_c2425_row4_col3" class="data row4 col3" >29.16kgCO2e</td>
 75 |       <td id="T_c2425_row4_col4" class="data row4 col4" >576</td>
 76 |     </tr>
 77 |   </tbody>
 78 | </table>
 79 | 
 80 | 
 81 | 
 82 | 
 83 | The `footprint` column indicates the product's carbon footprint in kgCO2e. The `units` column corresponds to the number of units produced.
 84 | 
 85 | Companies usually report their emissions on a yearly basis. We can do this by multiplying the footprint of each product, with the number of units produced, and summing the results.
 86 | 
 87 | 
 88 | ```python
 89 | (
 90 |     products
 91 |     .groupby('year')
 92 |     .apply(lambda g: (g['footprint'] * g['units']).sum() / g['units'].sum(), include_groups=False)
 93 |     .to_frame('average')
 94 |     .assign(diff=lambda x: x.average.diff())
 95 |     .style.format(fmt_CO2e, na_rep='')
 96 | )
 97 | ```
 98 | 
 99 | 
100 | 
101 | 
102 | <style type="text/css">
103 | </style>
104 | <table id="T_03f18">
105 |   <thead>
106 |     <tr>
107 |       <th class="blank level0" >&nbsp;</th>
108 |       <th id="T_03f18_level0_col0" class="col_heading level0 col0" >average</th>
109 |       <th id="T_03f18_level0_col1" class="col_heading level0 col1" >diff</th>
110 |     </tr>
111 |     <tr>
112 |       <th class="index_name level0" >year</th>
113 |       <th class="blank col0" >&nbsp;</th>
114 |       <th class="blank col1" >&nbsp;</th>
115 |     </tr>
116 |   </thead>
117 |   <tbody>
118 |     <tr>
119 |       <th id="T_03f18_level0_row0" class="row_heading level0 row0" >2021</th>
120 |       <td id="T_03f18_row0_col0" class="data row0 col0" >21.95kgCO2e</td>
121 |       <td id="T_03f18_row0_col1" class="data row0 col1" ></td>
122 |     </tr>
123 |     <tr>
124 |       <th id="T_03f18_level0_row1" class="row_heading level0 row1" >2022</th>
125 |       <td id="T_03f18_row1_col0" class="data row1 col0" >21.71kgCO2e</td>
126 |       <td id="T_03f18_row1_col1" class="data row1 col1" >-0.24kgCO2e</td>
127 |     </tr>
128 |     <tr>
129 |       <th id="T_03f18_level0_row2" class="row_heading level0 row2" >2023</th>
130 |       <td id="T_03f18_row2_col0" class="data row2 col0" >22.74kgCO2e</td>
131 |       <td id="T_03f18_row2_col1" class="data row2 col1" >1.03kgCO2e</td>
132 |     </tr>
133 |   </tbody>
134 | </table>
135 | 
136 | 
137 | 
138 | 
139 | The average footprint went down between 2021 and 2022. It then went back up in 2023. Of course, we want to understand why. When they see this, fashion brands have one word coming out of their mouth: why, why, why?
140 | 
141 | The overall average footprint can change for two reasons:
142 | 
143 | 1. The average footprint per product category evolved.
144 | 2. The mix of product categories evolved.
145 | 
146 | The second reason is called the *mix effect*. For instance, let's say t-shirts have a lower footprint than jackets. If the share of jackets produced in 2023 is higher than in 2022, the average footprint will go up.
147 | 
148 | The jackets in 2023 aren't necessarily the same than those of 2022. They could be more sustainable, and have a lower footprint. This is the tricky part: we need to disentangle the mix effect from the evolution of the footprint of each product category. That is the value proposition of this package.
149 | 
150 | 
151 | ```python
152 | explainer = ice.MeanExplainer(
153 |     fact='footprint',
154 |     count='units',
155 |     period='year',
156 |     group='category',
157 | )
158 | explanation = explainer(products)
159 | explanation.style.format({'inner': fmt_CO2e, 'mix': fmt_CO2e}, na_rep='')
160 | ```
161 | 
162 | 
163 | 
164 | 
165 | <style type="text/css">
166 | </style>
167 | <table id="T_3f3dd">
168 |   <thead>
169 |     <tr>
170 |       <th class="blank" >&nbsp;</th>
171 |       <th class="blank level0" >&nbsp;</th>
172 |       <th id="T_3f3dd_level0_col0" class="col_heading level0 col0" >inner</th>
173 |       <th id="T_3f3dd_level0_col1" class="col_heading level0 col1" >mix</th>
174 |     </tr>
175 |     <tr>
176 |       <th class="index_name level0" >year</th>
177 |       <th class="index_name level1" >category</th>
178 |       <th class="blank col0" >&nbsp;</th>
179 |       <th class="blank col1" >&nbsp;</th>
180 |     </tr>
181 |   </thead>
182 |   <tbody>
183 |     <tr>
184 |       <th id="T_3f3dd_level0_row0" class="row_heading level0 row0" rowspan="6">2022</th>
185 |       <th id="T_3f3dd_level1_row0" class="row_heading level1 row0" >DRESS</th>
186 |       <td id="T_3f3dd_row0_col0" class="data row0 col0" >0.05kgCO2e</td>
187 |       <td id="T_3f3dd_row0_col1" class="data row0 col1" >-0.14kgCO2e</td>
188 |     </tr>
189 |     <tr>
190 |       <th id="T_3f3dd_level1_row1" class="row_heading level1 row1" >JACKET</th>
191 |       <td id="T_3f3dd_row1_col0" class="data row1 col0" >-0.17kgCO2e</td>
192 |       <td id="T_3f3dd_row1_col1" class="data row1 col1" >-0.69kgCO2e</td>
193 |     </tr>
194 |     <tr>
195 |       <th id="T_3f3dd_level1_row2" class="row_heading level1 row2" >PANTS</th>
196 |       <td id="T_3f3dd_row2_col0" class="data row2 col0" >0.61kgCO2e</td>
197 |       <td id="T_3f3dd_row2_col1" class="data row2 col1" >0.20kgCO2e</td>
198 |     </tr>
199 |     <tr>
200 |       <th id="T_3f3dd_level1_row3" class="row_heading level1 row3" >SHIRT</th>
201 |       <td id="T_3f3dd_row3_col0" class="data row3 col0" >-0.02kgCO2e</td>
202 |       <td id="T_3f3dd_row3_col1" class="data row3 col1" >0.00kgCO2e</td>
203 |     </tr>
204 |     <tr>
205 |       <th id="T_3f3dd_level1_row4" class="row_heading level1 row4" >SWEATER</th>
206 |       <td id="T_3f3dd_row4_col0" class="data row4 col0" >-0.39kgCO2e</td>
207 |       <td id="T_3f3dd_row4_col1" class="data row4 col1" >-0.09kgCO2e</td>
208 |     </tr>
209 |     <tr>
210 |       <th id="T_3f3dd_level1_row5" class="row_heading level1 row5" >TSHIRT</th>
211 |       <td id="T_3f3dd_row5_col0" class="data row5 col0" >0.08kgCO2e</td>
212 |       <td id="T_3f3dd_row5_col1" class="data row5 col1" >0.30kgCO2e</td>
213 |     </tr>
214 |     <tr>
215 |       <th id="T_3f3dd_level0_row6" class="row_heading level0 row6" rowspan="6">2023</th>
216 |       <th id="T_3f3dd_level1_row6" class="row_heading level1 row6" >DRESS</th>
217 |       <td id="T_3f3dd_row6_col0" class="data row6 col0" >-0.08kgCO2e</td>
218 |       <td id="T_3f3dd_row6_col1" class="data row6 col1" >0.51kgCO2e</td>
219 |     </tr>
220 |     <tr>
221 |       <th id="T_3f3dd_level1_row7" class="row_heading level1 row7" >JACKET</th>
222 |       <td id="T_3f3dd_row7_col0" class="data row7 col0" >-0.13kgCO2e</td>
223 |       <td id="T_3f3dd_row7_col1" class="data row7 col1" >0.97kgCO2e</td>
224 |     </tr>
225 |     <tr>
226 |       <th id="T_3f3dd_level1_row8" class="row_heading level1 row8" >PANTS</th>
227 |       <td id="T_3f3dd_row8_col0" class="data row8 col0" >-0.22kgCO2e</td>
228 |       <td id="T_3f3dd_row8_col1" class="data row8 col1" >-0.09kgCO2e</td>
229 |     </tr>
230 |     <tr>
231 |       <th id="T_3f3dd_level1_row9" class="row_heading level1 row9" >SHIRT</th>
232 |       <td id="T_3f3dd_row9_col0" class="data row9 col0" >0.02kgCO2e</td>
233 |       <td id="T_3f3dd_row9_col1" class="data row9 col1" >-0.03kgCO2e</td>
234 |     </tr>
235 |     <tr>
236 |       <th id="T_3f3dd_level1_row10" class="row_heading level1 row10" >SWEATER</th>
237 |       <td id="T_3f3dd_row10_col0" class="data row10 col0" >-0.06kgCO2e</td>
238 |       <td id="T_3f3dd_row10_col1" class="data row10 col1" >0.36kgCO2e</td>
239 |     </tr>
240 |     <tr>
241 |       <th id="T_3f3dd_level1_row11" class="row_heading level1 row11" >TSHIRT</th>
242 |       <td id="T_3f3dd_row11_col0" class="data row11 col0" >-0.16kgCO2e</td>
243 |       <td id="T_3f3dd_row11_col1" class="data row11 col1" >-0.06kgCO2e</td>
244 |     </tr>
245 |   </tbody>
246 | </table>
247 | 
248 | 
249 | 
250 | 
251 | Here's the meaning of each column:
252 | 
253 | - `inner` is the difference due to the change in the average footprint per unit. A negative inner values means the footprint per unit shifted in a way that reduced emissions. For instance, low emission products seem to have been prioritized in 2022 (-17.5ktCO2e), but not in 2023 (+73.4ktCO2e).
254 | - `mix` is the difference due to the change in the number of units produced. A negative mix value means the number of units produced shifted in a way that reduced emissions.
255 | 
256 | A convenient way to read these values is to use a waterfall chart.
257 | 
258 | 
259 | ```python
260 | explainer.plot(products)
261 | ```
262 | 
263 | 
264 | 
265 | 
266 | 
267 | <style>
268 |   #altair-viz-72a67fae653241d484f323a4b24081ec.vega-embed {
269 |     width: 100%;
270 |     display: flex;
271 |   }
272 | 
273 |   #altair-viz-72a67fae653241d484f323a4b24081ec.vega-embed details,
274 |   #altair-viz-72a67fae653241d484f323a4b24081ec.vega-embed details summary {
275 |     position: relative;
276 |   }
277 | </style>
278 | <div id="altair-viz-72a67fae653241d484f323a4b24081ec"></div>
279 | <script type="text/javascript">
280 |   var VEGA_DEBUG = (typeof VEGA_DEBUG == "undefined") ? {} : VEGA_DEBUG;
281 |   (function(spec, embedOpt){
282 |     let outputDiv = document.currentScript.previousElementSibling;
283 |     if (outputDiv.id !== "altair-viz-72a67fae653241d484f323a4b24081ec") {
284 |       outputDiv = document.getElementById("altair-viz-72a67fae653241d484f323a4b24081ec");
285 |     }
286 |     const paths = {
287 |       "vega": "https://cdn.jsdelivr.net/npm/vega@5?noext",
288 |       "vega-lib": "https://cdn.jsdelivr.net/npm/vega-lib?noext",
289 |       "vega-lite": "https://cdn.jsdelivr.net/npm/vega-lite@5.17.0?noext",
290 |       "vega-embed": "https://cdn.jsdelivr.net/npm/vega-embed@6?noext",
291 |     };
292 | 
293 |     function maybeLoadScript(lib, version) {
294 |       var key = `${lib.replace("-", "")}_version`;
295 |       return (VEGA_DEBUG[key] == version) ?
296 |         Promise.resolve(paths[lib]) :
297 |         new Promise(function(resolve, reject) {
298 |           var s = document.createElement('script');
299 |           document.getElementsByTagName("head")[0].appendChild(s);
300 |           s.async = true;
301 |           s.onload = () => {
302 |             VEGA_DEBUG[key] = version;
303 |             return resolve(paths[lib]);
304 |           };
305 |           s.onerror = () => reject(`Error loading script: ${paths[lib]}`);
306 |           s.src = paths[lib];
307 |         });
308 |     }
309 | 
310 |     function showError(err) {
311 |       outputDiv.innerHTML = `<div class="error" style="color:red;">${err}</div>`;
312 |       throw err;
313 |     }
314 | 
315 |     function displayChart(vegaEmbed) {
316 |       vegaEmbed(outputDiv, spec, embedOpt)
317 |         .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));
318 |     }
319 | 
320 |     if(typeof define === "function" && define.amd) {
321 |       requirejs.config({paths});
322 |       require(["vega-embed"], displayChart, err => showError(`Error loading script: ${err.message}`));
323 |     } else {
324 |       maybeLoadScript("vega", "5")
325 |         .then(() => maybeLoadScript("vega-lite", "5.17.0"))
326 |         .then(() => maybeLoadScript("vega-embed", "6"))
327 |         .catch(showError)
328 |         .then(() => displayChart(vegaEmbed));
329 |     }
330 |   })({"config": {"view": {"continuousWidth": 300, "continuousHeight": 300}}, "layer": [{"data": {"name": "data-9af64a40ed743b5a8541e071b2e36b4e"}, "mark": {"type": "bar"}, "encoding": {"tooltip": [{"field": "total", "type": "quantitative"}], "x": {"field": "total", "type": "quantitative"}, "y": {"field": "label", "sort": null, "type": "ordinal"}}, "name": "view_1"}, {"data": {"name": "data-8fcc01c52654800eabf1f76c192b6a52"}, "mark": {"type": "bar"}, "encoding": {"color": {"field": "is_positive", "legend": null, "scale": {"domain": [true, false], "range": ["green", "red"]}, "type": "nominal"}, "tooltip": [{"field": "year", "type": "quantitative"}, {"field": "category", "type": "nominal"}, {"field": "kind", "type": "nominal"}, {"field": "impact", "type": "quantitative"}], "x": {"axis": {"title": "footprint"}, "field": "start", "type": "quantitative"}, "x2": {"field": "end"}, "y": {"axis": {"title": null}, "field": "label", "sort": null, "type": "ordinal"}}}, {"data": {"name": "data-bc638dec2a34df2f2471c7665833fe5f"}, "mark": {"type": "bar"}, "encoding": {"tooltip": [{"field": "total", "type": "quantitative"}], "x": {"field": "total", "type": "quantitative"}, "y": {"field": "label", "sort": null, "type": "ordinal"}}}, {"data": {"name": "data-238ce6794be2b25387e2bda94e764c49"}, "mark": {"type": "bar"}, "encoding": {"color": {"field": "is_positive", "legend": null, "scale": {"domain": [true, false], "range": ["green", "red"]}, "type": "nominal"}, "tooltip": [{"field": "year", "type": "quantitative"}, {"field": "category", "type": "nominal"}, {"field": "kind", "type": "nominal"}, {"field": "impact", "type": "quantitative"}], "x": {"axis": {"title": "footprint"}, "field": "start", "type": "quantitative"}, "x2": {"field": "end"}, "y": {"axis": {"title": null}, "field": "label", "sort": null, "type": "ordinal"}}}, {"data": {"name": "data-f28063f888fbd67e94d16ee44839474a"}, "mark": {"type": "bar"}, "encoding": {"tooltip": [{"field": "total", "type": "quantitative"}], "x": {"field": "total", "type": "quantitative"}, "y": {"field": "label", "sort": null, "type": "ordinal"}}}], "params": [{"name": "param_1", "select": {"type": "interval", "encodings": ["x", "y"]}, "bind": "scales", "views": ["view_1"]}], "$schema": "https://vega.github.io/schema/vega-lite/v5.17.0.json", "datasets": {"data-9af64a40ed743b5a8541e071b2e36b4e": [{"label": [2021], "total": 21.950277597803886}], "data-8fcc01c52654800eabf1f76c192b6a52": [{"year": 2022, "category": "PANTS", "impact": 0.6145273388886315, "kind": "inner", "end": 22.564804936692518, "start": 21.950277597803886, "label": "2022 \u2022 PANTS \u2022 inner", "is_positive": true}, {"year": 2022, "category": "TSHIRT", "impact": 0.30167724893309866, "kind": "mix", "end": 22.866482185625618, "start": 22.564804936692518, "label": "2022 \u2022 TSHIRT \u2022 mix", "is_positive": true}, {"year": 2022, "category": "PANTS", "impact": 0.2028211345999768, "kind": "mix", "end": 23.069303320225593, "start": 22.866482185625618, "label": "2022 \u2022 PANTS \u2022 mix", "is_positive": true}, {"year": 2022, "category": "TSHIRT", "impact": 0.08475665867342684, "kind": "inner", "end": 23.15405997889902, "start": 23.069303320225593, "label": "2022 \u2022 TSHIRT \u2022 inner", "is_positive": true}, {"year": 2022, "category": "DRESS", "impact": 0.04764107525793142, "kind": "inner", "end": 23.201701054156953, "start": 23.15405997889902, "label": "2022 \u2022 DRESS \u2022 inner", "is_positive": true}, {"year": 2022, "category": "SHIRT", "impact": 0.004249415405470712, "kind": "mix", "end": 23.20595046956242, "start": 23.201701054156953, "label": "2022 \u2022 SHIRT \u2022 mix", "is_positive": true}, {"year": 2022, "category": "SHIRT", "impact": -0.017932535305564764, "kind": "inner", "end": 23.18801793425686, "start": 23.20595046956242, "label": "2022 \u2022 SHIRT \u2022 inner", "is_positive": false}, {"year": 2022, "category": "SWEATER", "impact": -0.08811989002715365, "kind": "mix", "end": 23.099898044229704, "start": 23.18801793425686, "label": "2022 \u2022 SWEATER \u2022 mix", "is_positive": false}, {"year": 2022, "category": "DRESS", "impact": -0.13611661127961636, "kind": "mix", "end": 22.963781432950086, "start": 23.099898044229704, "label": "2022 \u2022 DRESS \u2022 mix", "is_positive": false}, {"year": 2022, "category": "JACKET", "impact": -0.16558724703683306, "kind": "inner", "end": 22.798194185913253, "start": 22.963781432950086, "label": "2022 \u2022 JACKET \u2022 inner", "is_positive": false}, {"year": 2022, "category": "SWEATER", "impact": -0.39426152927404745, "kind": "inner", "end": 22.403932656639206, "start": 22.798194185913253, "label": "2022 \u2022 SWEATER \u2022 inner", "is_positive": false}, {"year": 2022, "category": "JACKET", "impact": -0.6905343501362712, "kind": "mix", "end": 21.713398306502935, "start": 22.403932656639206, "label": "2022 \u2022 JACKET \u2022 mix", "is_positive": false}], "data-bc638dec2a34df2f2471c7665833fe5f": [{"label": [2022], "total": 21.713398306502942}], "data-238ce6794be2b25387e2bda94e764c49": [{"year": 2023, "category": "JACKET", "impact": 0.9698501184475561, "kind": "mix", "end": 22.6832484249505, "start": 21.713398306502942, "label": "2023 \u2022 JACKET \u2022 mix", "is_positive": true}, {"year": 2023, "category": "DRESS", "impact": 0.5128989262972464, "kind": "mix", "end": 23.196147351247745, "start": 22.6832484249505, "label": "2023 \u2022 DRESS \u2022 mix", "is_positive": true}, {"year": 2023, "category": "SWEATER", "impact": 0.3579306618365037, "kind": "mix", "end": 23.554078013084247, "start": 23.196147351247745, "label": "2023 \u2022 SWEATER \u2022 mix", "is_positive": true}, {"year": 2023, "category": "SHIRT", "impact": 0.017802295811369558, "kind": "inner", "end": 23.57188030889562, "start": 23.554078013084247, "label": "2023 \u2022 SHIRT \u2022 inner", "is_positive": true}, {"year": 2023, "category": "SHIRT", "impact": -0.034300793942507526, "kind": "mix", "end": 23.53757951495311, "start": 23.57188030889562, "label": "2023 \u2022 SHIRT \u2022 mix", "is_positive": false}, {"year": 2023, "category": "TSHIRT", "impact": -0.05598069939020174, "kind": "mix", "end": 23.481598815562908, "start": 23.53757951495311, "label": "2023 \u2022 TSHIRT \u2022 mix", "is_positive": false}, {"year": 2023, "category": "SWEATER", "impact": -0.06285171538046565, "kind": "inner", "end": 23.418747100182443, "start": 23.481598815562908, "label": "2023 \u2022 SWEATER \u2022 inner", "is_positive": false}, {"year": 2023, "category": "DRESS", "impact": -0.08155163940547772, "kind": "inner", "end": 23.337195460776964, "start": 23.418747100182443, "label": "2023 \u2022 DRESS \u2022 inner", "is_positive": false}, {"year": 2023, "category": "PANTS", "impact": -0.08823155008199736, "kind": "mix", "end": 23.248963910694968, "start": 23.337195460776964, "label": "2023 \u2022 PANTS \u2022 mix", "is_positive": false}, {"year": 2023, "category": "JACKET", "impact": -0.12907195009877084, "kind": "inner", "end": 23.119891960596195, "start": 23.248963910694968, "label": "2023 \u2022 JACKET \u2022 inner", "is_positive": false}, {"year": 2023, "category": "TSHIRT", "impact": -0.16246975189864427, "kind": "inner", "end": 22.95742220869755, "start": 23.119891960596195, "label": "2023 \u2022 TSHIRT \u2022 inner", "is_positive": false}, {"year": 2023, "category": "PANTS", "impact": -0.21876005026772086, "kind": "inner", "end": 22.738662158429833, "start": 22.95742220869755, "label": "2023 \u2022 PANTS \u2022 inner", "is_positive": false}], "data-f28063f888fbd67e94d16ee44839474a": [{"label": [2023], "total": 22.738662158429833}]}}, {"mode": "vega-lite"});
331 | </script>
332 | 
333 | 
334 | 
335 | This is better than reporting the average footprint and unit produced separately. It's more informative to quantify their contribution to the change in emissions. Here it's good to confirm that the decrease in emissions is mostly due to a reduction in the number of units produced for both years. But it's also good to see that there was an increase due to the average footprint in 2023. Importantly, each one of these effects is calculated, and not just assumed.
336 | 
337 | It's natural to want to deepen the analysis. For instance:
338 | 
339 | 1. Why is there a significant inner contribution for pants in 2022? Is it because the materials are less sustainable? Or because the pants got heavier?
340 | 2. The reduction in 2023 is mainly due to the reduction in the number of units produced. Can this be broken down into marketing segments? For instance, is the reduction mainly driven by online or in-person sales? How does this break down by country?
341 | 
342 | These questions hint at the interactive aspect of this kind of analysis. Once you break down a metric's evolution along a dimension, the next steps are to break down the metric (question 1) and/or include another dimension (question 2).
343 | 
344 | </br>
345 | 


--------------------------------------------------------------------------------
/docs/examples/simple-revenue-funnel.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Simple revenue funnel 🛒"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "We look at a toy website funnel in this example. Imagine a fictitious website that sells stuff. Users go to the website, are presented with items, can add them to their cart, and then can buy them."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {
 21 |     "execution": {
 22 |      "iopub.execute_input": "2024-09-25T08:40:22.306476Z",
 23 |      "iopub.status.busy": "2024-09-25T08:40:22.305631Z",
 24 |      "iopub.status.idle": "2024-09-25T08:40:22.376497Z",
 25 |      "shell.execute_reply": "2024-09-25T08:40:22.376158Z"
 26 |     }
 27 |    },
 28 |    "outputs": [
 29 |     {
 30 |      "data": {
 31 |       "text/html": [
 32 |        "<style type=\"text/css\">\n",
 33 |        "</style>\n",
 34 |        "<table id=\"T_40dea\">\n",
 35 |        "  <thead>\n",
 36 |        "    <tr>\n",
 37 |        "      <th class=\"blank level0\" >&nbsp;</th>\n",
 38 |        "      <th id=\"T_40dea_level0_col0\" class=\"col_heading level0 col0\" >date</th>\n",
 39 |        "      <th id=\"T_40dea_level0_col1\" class=\"col_heading level0 col1\" >group</th>\n",
 40 |        "      <th id=\"T_40dea_level0_col2\" class=\"col_heading level0 col2\" >impressions</th>\n",
 41 |        "      <th id=\"T_40dea_level0_col3\" class=\"col_heading level0 col3\" >clicks</th>\n",
 42 |        "      <th id=\"T_40dea_level0_col4\" class=\"col_heading level0 col4\" >conversions</th>\n",
 43 |        "      <th id=\"T_40dea_level0_col5\" class=\"col_heading level0 col5\" >revenue</th>\n",
 44 |        "    </tr>\n",
 45 |        "  </thead>\n",
 46 |        "  <tbody>\n",
 47 |        "    <tr>\n",
 48 |        "      <th id=\"T_40dea_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
 49 |        "      <td id=\"T_40dea_row0_col0\" class=\"data row0 col0\" >2018-01-01</td>\n",
 50 |        "      <td id=\"T_40dea_row0_col1\" class=\"data row0 col1\" >A</td>\n",
 51 |        "      <td id=\"T_40dea_row0_col2\" class=\"data row0 col2\" >1000</td>\n",
 52 |        "      <td id=\"T_40dea_row0_col3\" class=\"data row0 col3\" >150</td>\n",
 53 |        "      <td id=\"T_40dea_row0_col4\" class=\"data row0 col4\" >120</td>\n",
 54 |        "      <td id=\"T_40dea_row0_col5\" class=\"data row0 col5\" >$8,600.00</td>\n",
 55 |        "    </tr>\n",
 56 |        "    <tr>\n",
 57 |        "      <th id=\"T_40dea_level0_row1\" class=\"row_heading level0 row1\" >1</th>\n",
 58 |        "      <td id=\"T_40dea_row1_col0\" class=\"data row1 col0\" >2018-01-01</td>\n",
 59 |        "      <td id=\"T_40dea_row1_col1\" class=\"data row1 col1\" >B</td>\n",
 60 |        "      <td id=\"T_40dea_row1_col2\" class=\"data row1 col2\" >2000</td>\n",
 61 |        "      <td id=\"T_40dea_row1_col3\" class=\"data row1 col3\" >150</td>\n",
 62 |        "      <td id=\"T_40dea_row1_col4\" class=\"data row1 col4\" >150</td>\n",
 63 |        "      <td id=\"T_40dea_row1_col5\" class=\"data row1 col5\" >$9,400.00</td>\n",
 64 |        "    </tr>\n",
 65 |        "    <tr>\n",
 66 |        "      <th id=\"T_40dea_level0_row2\" class=\"row_heading level0 row2\" >2</th>\n",
 67 |        "      <td id=\"T_40dea_row2_col0\" class=\"data row2 col0\" >2018-01-01</td>\n",
 68 |        "      <td id=\"T_40dea_row2_col1\" class=\"data row2 col1\" >C</td>\n",
 69 |        "      <td id=\"T_40dea_row2_col2\" class=\"data row2 col2\" >2500</td>\n",
 70 |        "      <td id=\"T_40dea_row2_col3\" class=\"data row2 col3\" >250</td>\n",
 71 |        "      <td id=\"T_40dea_row2_col4\" class=\"data row2 col4\" >125</td>\n",
 72 |        "      <td id=\"T_40dea_row2_col5\" class=\"data row2 col5\" >$10,750.00</td>\n",
 73 |        "    </tr>\n",
 74 |        "    <tr>\n",
 75 |        "      <th id=\"T_40dea_level0_row3\" class=\"row_heading level0 row3\" >3</th>\n",
 76 |        "      <td id=\"T_40dea_row3_col0\" class=\"data row3 col0\" >2019-01-01</td>\n",
 77 |        "      <td id=\"T_40dea_row3_col1\" class=\"data row3 col1\" >A</td>\n",
 78 |        "      <td id=\"T_40dea_row3_col2\" class=\"data row3 col2\" >1000</td>\n",
 79 |        "      <td id=\"T_40dea_row3_col3\" class=\"data row3 col3\" >120</td>\n",
 80 |        "      <td id=\"T_40dea_row3_col4\" class=\"data row3 col4\" >160</td>\n",
 81 |        "      <td id=\"T_40dea_row3_col5\" class=\"data row3 col5\" >$9,055.00</td>\n",
 82 |        "    </tr>\n",
 83 |        "    <tr>\n",
 84 |        "      <th id=\"T_40dea_level0_row4\" class=\"row_heading level0 row4\" >4</th>\n",
 85 |        "      <td id=\"T_40dea_row4_col0\" class=\"data row4 col0\" >2019-01-01</td>\n",
 86 |        "      <td id=\"T_40dea_row4_col1\" class=\"data row4 col1\" >B</td>\n",
 87 |        "      <td id=\"T_40dea_row4_col2\" class=\"data row4 col2\" >2150</td>\n",
 88 |        "      <td id=\"T_40dea_row4_col3\" class=\"data row4 col3\" >200</td>\n",
 89 |        "      <td id=\"T_40dea_row4_col4\" class=\"data row4 col4\" >145</td>\n",
 90 |        "      <td id=\"T_40dea_row4_col5\" class=\"data row4 col5\" >$8,739.00</td>\n",
 91 |        "    </tr>\n",
 92 |        "    <tr>\n",
 93 |        "      <th id=\"T_40dea_level0_row5\" class=\"row_heading level0 row5\" >5</th>\n",
 94 |        "      <td id=\"T_40dea_row5_col0\" class=\"data row5 col0\" >2019-01-01</td>\n",
 95 |        "      <td id=\"T_40dea_row5_col1\" class=\"data row5 col1\" >C</td>\n",
 96 |        "      <td id=\"T_40dea_row5_col2\" class=\"data row5 col2\" >2000</td>\n",
 97 |        "      <td id=\"T_40dea_row5_col3\" class=\"data row5 col3\" >400</td>\n",
 98 |        "      <td id=\"T_40dea_row5_col4\" class=\"data row5 col4\" >166</td>\n",
 99 |        "      <td id=\"T_40dea_row5_col5\" class=\"data row5 col5\" >$10,147.00</td>\n",
100 |        "    </tr>\n",
101 |        "    <tr>\n",
102 |        "      <th id=\"T_40dea_level0_row6\" class=\"row_heading level0 row6\" >6</th>\n",
103 |        "      <td id=\"T_40dea_row6_col0\" class=\"data row6 col0\" >2018-02-01</td>\n",
104 |        "      <td id=\"T_40dea_row6_col1\" class=\"data row6 col1\" >A</td>\n",
105 |        "      <td id=\"T_40dea_row6_col2\" class=\"data row6 col2\" >50</td>\n",
106 |        "      <td id=\"T_40dea_row6_col3\" class=\"data row6 col3\" >20</td>\n",
107 |        "      <td id=\"T_40dea_row6_col4\" class=\"data row6 col4\" >10</td>\n",
108 |        "      <td id=\"T_40dea_row6_col5\" class=\"data row6 col5\" >$500.00</td>\n",
109 |        "    </tr>\n",
110 |        "    <tr>\n",
111 |        "      <th id=\"T_40dea_level0_row7\" class=\"row_heading level0 row7\" >7</th>\n",
112 |        "      <td id=\"T_40dea_row7_col0\" class=\"data row7 col0\" >2018-02-01</td>\n",
113 |        "      <td id=\"T_40dea_row7_col1\" class=\"data row7 col1\" >B</td>\n",
114 |        "      <td id=\"T_40dea_row7_col2\" class=\"data row7 col2\" >2000</td>\n",
115 |        "      <td id=\"T_40dea_row7_col3\" class=\"data row7 col3\" >300</td>\n",
116 |        "      <td id=\"T_40dea_row7_col4\" class=\"data row7 col4\" >150</td>\n",
117 |        "      <td id=\"T_40dea_row7_col5\" class=\"data row7 col5\" >$11,400.00</td>\n",
118 |        "    </tr>\n",
119 |        "    <tr>\n",
120 |        "      <th id=\"T_40dea_level0_row8\" class=\"row_heading level0 row8\" >8</th>\n",
121 |        "      <td id=\"T_40dea_row8_col0\" class=\"data row8 col0\" >2018-02-01</td>\n",
122 |        "      <td id=\"T_40dea_row8_col1\" class=\"data row8 col1\" >C</td>\n",
123 |        "      <td id=\"T_40dea_row8_col2\" class=\"data row8 col2\" >2500</td>\n",
124 |        "      <td id=\"T_40dea_row8_col3\" class=\"data row8 col3\" >250</td>\n",
125 |        "      <td id=\"T_40dea_row8_col4\" class=\"data row8 col4\" >125</td>\n",
126 |        "      <td id=\"T_40dea_row8_col5\" class=\"data row8 col5\" >$8,750.00</td>\n",
127 |        "    </tr>\n",
128 |        "    <tr>\n",
129 |        "      <th id=\"T_40dea_level0_row9\" class=\"row_heading level0 row9\" >9</th>\n",
130 |        "      <td id=\"T_40dea_row9_col0\" class=\"data row9 col0\" >2019-02-01</td>\n",
131 |        "      <td id=\"T_40dea_row9_col1\" class=\"data row9 col1\" >A</td>\n",
132 |        "      <td id=\"T_40dea_row9_col2\" class=\"data row9 col2\" >2500</td>\n",
133 |        "      <td id=\"T_40dea_row9_col3\" class=\"data row9 col3\" >1000</td>\n",
134 |        "      <td id=\"T_40dea_row9_col4\" class=\"data row9 col4\" >500</td>\n",
135 |        "      <td id=\"T_40dea_row9_col5\" class=\"data row9 col5\" >$50,000.00</td>\n",
136 |        "    </tr>\n",
137 |        "    <tr>\n",
138 |        "      <th id=\"T_40dea_level0_row10\" class=\"row_heading level0 row10\" >10</th>\n",
139 |        "      <td id=\"T_40dea_row10_col0\" class=\"data row10 col0\" >2019-02-01</td>\n",
140 |        "      <td id=\"T_40dea_row10_col1\" class=\"data row10 col1\" >B</td>\n",
141 |        "      <td id=\"T_40dea_row10_col2\" class=\"data row10 col2\" >2150</td>\n",
142 |        "      <td id=\"T_40dea_row10_col3\" class=\"data row10 col3\" >323</td>\n",
143 |        "      <td id=\"T_40dea_row10_col4\" class=\"data row10 col4\" >145</td>\n",
144 |        "      <td id=\"T_40dea_row10_col5\" class=\"data row10 col5\" >$10,739.00</td>\n",
145 |        "    </tr>\n",
146 |        "    <tr>\n",
147 |        "      <th id=\"T_40dea_level0_row11\" class=\"row_heading level0 row11\" >11</th>\n",
148 |        "      <td id=\"T_40dea_row11_col0\" class=\"data row11 col0\" >2019-02-01</td>\n",
149 |        "      <td id=\"T_40dea_row11_col1\" class=\"data row11 col1\" >C</td>\n",
150 |        "      <td id=\"T_40dea_row11_col2\" class=\"data row11 col2\" >2000</td>\n",
151 |        "      <td id=\"T_40dea_row11_col3\" class=\"data row11 col3\" >320</td>\n",
152 |        "      <td id=\"T_40dea_row11_col4\" class=\"data row11 col4\" >166</td>\n",
153 |        "      <td id=\"T_40dea_row11_col5\" class=\"data row11 col5\" >$12,147.00</td>\n",
154 |        "    </tr>\n",
155 |        "  </tbody>\n",
156 |        "</table>\n"
157 |       ],
158 |       "text/plain": [
159 |        "<pandas.io.formats.style.Styler at 0x121e95d90>"
160 |       ]
161 |      },
162 |      "execution_count": 1,
163 |      "metadata": {},
164 |      "output_type": "execute_result"
165 |     }
166 |    ],
167 |    "source": [
168 |     "import pandas as pd\n",
169 |     "import locale\n",
170 |     "\n",
171 |     "locale.setlocale(locale.LC_MONETARY, 'en_US.UTF-8')\n",
172 |     "def fmt_currency(x):\n",
173 |     "    return locale.currency(x, grouping=True)\n",
174 |     "\n",
175 |     "traffic = pd.DataFrame({\n",
176 |     "    'date': ['2018-01-01', '2018-01-01', '2018-01-01', '2019-01-01', '2019-01-01', '2019-01-01', '2018-02-01', '2018-02-01', '2018-02-01', '2019-02-01', '2019-02-01', '2019-02-01'],\n",
177 |     "    'group': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],\n",
178 |     "    'impressions': [1000, 2000, 2500, 1000, 2150, 2000, 50, 2000, 2500, 2500, 2150, 2000],\n",
179 |     "    'clicks': [150, 150, 250, 120, 200, 400, 20, 300, 250, 1000, 323, 320],\n",
180 |     "    'conversions': [120, 150, 125, 160, 145, 166, 10, 150, 125, 500, 145, 166],\n",
181 |     "    'revenue': ['$8,600', '$9,400', '$10,750', '$9,055', '$8,739', '$10,147', '$500', '$11,400', '$8,750', '$50,000', '$10,739', '$12,147'],\n",
182 |     "})\n",
183 |     "traffic['date'] = pd.to_datetime(traffic['date'])\n",
184 |     "traffic['revenue'] = traffic['revenue'].str.replace('$', '', regex=False).str.replace(',', '', regex=False).astype(float)\n",
185 |     "traffic.style.format({'revenue': fmt_currency, 'date': lambda x: x.strftime('%Y-%m-%d')}, na_rep='N/A')"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "markdown",
190 |    "metadata": {},
191 |    "source": [
192 |     "The users are bucketed into 3 groups: A, B, C. We've also bucketed impressions/clicks/conversions/revenue figures by month of the year.\n",
193 |     "\n",
194 |     "We're interested in understanding how the metrics evolve over time. The basic method is to calculate each metric separately. To keep things simple, we can do this for each year."
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 2,
200 |    "metadata": {
201 |     "execution": {
202 |      "iopub.execute_input": "2024-09-25T08:40:22.378509Z",
203 |      "iopub.status.busy": "2024-09-25T08:40:22.378303Z",
204 |      "iopub.status.idle": "2024-09-25T08:40:22.398464Z",
205 |      "shell.execute_reply": "2024-09-25T08:40:22.398209Z"
206 |     }
207 |    },
208 |    "outputs": [
209 |     {
210 |      "data": {
211 |       "text/html": [
212 |        "<style type=\"text/css\">\n",
213 |        "</style>\n",
214 |        "<table id=\"T_d8fca\">\n",
215 |        "  <thead>\n",
216 |        "    <tr>\n",
217 |        "      <th class=\"blank level0\" >&nbsp;</th>\n",
218 |        "      <th id=\"T_d8fca_level0_col0\" class=\"col_heading level0 col0\" >impressions</th>\n",
219 |        "      <th id=\"T_d8fca_level0_col1\" class=\"col_heading level0 col1\" >click_rate</th>\n",
220 |        "      <th id=\"T_d8fca_level0_col2\" class=\"col_heading level0 col2\" >conversion_rate</th>\n",
221 |        "      <th id=\"T_d8fca_level0_col3\" class=\"col_heading level0 col3\" >average_spend</th>\n",
222 |        "      <th id=\"T_d8fca_level0_col4\" class=\"col_heading level0 col4\" >revenue</th>\n",
223 |        "    </tr>\n",
224 |        "    <tr>\n",
225 |        "      <th class=\"index_name level0\" >year</th>\n",
226 |        "      <th class=\"blank col0\" >&nbsp;</th>\n",
227 |        "      <th class=\"blank col1\" >&nbsp;</th>\n",
228 |        "      <th class=\"blank col2\" >&nbsp;</th>\n",
229 |        "      <th class=\"blank col3\" >&nbsp;</th>\n",
230 |        "      <th class=\"blank col4\" >&nbsp;</th>\n",
231 |        "    </tr>\n",
232 |        "  </thead>\n",
233 |        "  <tbody>\n",
234 |        "    <tr>\n",
235 |        "      <th id=\"T_d8fca_level0_row0\" class=\"row_heading level0 row0\" >2018</th>\n",
236 |        "      <td id=\"T_d8fca_row0_col0\" class=\"data row0 col0\" >10050</td>\n",
237 |        "      <td id=\"T_d8fca_row0_col1\" class=\"data row0 col1\" >0.111443</td>\n",
238 |        "      <td id=\"T_d8fca_row0_col2\" class=\"data row0 col2\" >0.607143</td>\n",
239 |        "      <td id=\"T_d8fca_row0_col3\" class=\"data row0 col3\" >$72.65</td>\n",
240 |        "      <td id=\"T_d8fca_row0_col4\" class=\"data row0 col4\" >$49,400.00</td>\n",
241 |        "    </tr>\n",
242 |        "    <tr>\n",
243 |        "      <th id=\"T_d8fca_level0_row1\" class=\"row_heading level0 row1\" >2019</th>\n",
244 |        "      <td id=\"T_d8fca_row1_col0\" class=\"data row1 col0\" >11800</td>\n",
245 |        "      <td id=\"T_d8fca_row1_col1\" class=\"data row1 col1\" >0.200254</td>\n",
246 |        "      <td id=\"T_d8fca_row1_col2\" class=\"data row1 col2\" >0.542531</td>\n",
247 |        "      <td id=\"T_d8fca_row1_col3\" class=\"data row1 col3\" >$78.65</td>\n",
248 |        "      <td id=\"T_d8fca_row1_col4\" class=\"data row1 col4\" >$100,827.00</td>\n",
249 |        "    </tr>\n",
250 |        "  </tbody>\n",
251 |        "</table>\n"
252 |       ],
253 |       "text/plain": [
254 |        "<pandas.io.formats.style.Styler at 0x127360b10>"
255 |       ]
256 |      },
257 |      "execution_count": 2,
258 |      "metadata": {},
259 |      "output_type": "execute_result"
260 |     }
261 |    ],
262 |    "source": [
263 |     "pd.DataFrame({\n",
264 |     "    'impressions': (\n",
265 |     "        traffic\n",
266 |     "        .assign(year=traffic.date.dt.year)\n",
267 |     "        .groupby('year')\n",
268 |     "        .impressions.sum()\n",
269 |     "    ),\n",
270 |     "    'click_rate': (\n",
271 |     "        traffic\n",
272 |     "        .assign(year=traffic.date.dt.year)\n",
273 |     "        .groupby('year')\n",
274 |     "        .apply(lambda x: x.clicks.sum() / x.impressions.sum(), include_groups=False)\n",
275 |     "    ),\n",
276 |     "    'conversion_rate': (\n",
277 |     "        traffic\n",
278 |     "        .assign(year=traffic.date.dt.year)\n",
279 |     "        .groupby('year')\n",
280 |     "        .apply(lambda x: x.conversions.sum() / x.clicks.sum(), include_groups=False)\n",
281 |     "    ),\n",
282 |     "    'average_spend': (\n",
283 |     "        traffic\n",
284 |     "        .assign(year=traffic.date.dt.year)\n",
285 |     "        .groupby('year')\n",
286 |     "        .apply(lambda x: x.revenue.sum() / x.conversions.sum(), include_groups=False)\n",
287 |     "    ),\n",
288 |     "    'revenue': (\n",
289 |     "        traffic\n",
290 |     "        .assign(year=traffic.date.dt.year)\n",
291 |     "        .groupby('year')\n",
292 |     "        .revenue.sum()\n",
293 |     "    )\n",
294 |     "}).style.format({'average_spend': fmt_currency, 'revenue': fmt_currency}, na_rep='')"
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "markdown",
299 |    "metadata": {},
300 |    "source": [
301 |     "In and of itself, this is already quite interesting. However, what we really want to know is how the change of each metric contributes to the change in revenue. This is where icanexplain comes in."
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "code",
306 |    "execution_count": 3,
307 |    "metadata": {
308 |     "execution": {
309 |      "iopub.execute_input": "2024-09-25T08:40:22.399934Z",
310 |      "iopub.status.busy": "2024-09-25T08:40:22.399828Z",
311 |      "iopub.status.idle": "2024-09-25T08:40:22.728790Z",
312 |      "shell.execute_reply": "2024-09-25T08:40:22.728507Z"
313 |     }
314 |    },
315 |    "outputs": [
316 |     {
317 |      "data": {
318 |       "text/html": [
319 |        "<style type=\"text/css\">\n",
320 |        "#T_05286_row0_col0, #T_05286_row0_col1, #T_05286_row0_col2, #T_05286_row0_col3, #T_05286_row1_col0, #T_05286_row1_col1, #T_05286_row1_col2, #T_05286_row1_col3, #T_05286_row2_col0, #T_05286_row2_col1, #T_05286_row2_col2, #T_05286_row2_col3, #T_05286_row3_col0, #T_05286_row3_col1, #T_05286_row3_col2, #T_05286_row3_col3, #T_05286_row4_col0, #T_05286_row4_col1, #T_05286_row4_col2, #T_05286_row4_col3, #T_05286_row5_col0, #T_05286_row5_col1, #T_05286_row5_col2, #T_05286_row5_col3 {\n",
321 |        "  text-align: right;\n",
322 |        "}\n",
323 |        "</style>\n",
324 |        "<table id=\"T_05286\">\n",
325 |        "  <thead>\n",
326 |        "    <tr>\n",
327 |        "      <th class=\"blank\" >&nbsp;</th>\n",
328 |        "      <th class=\"blank\" >&nbsp;</th>\n",
329 |        "      <th class=\"blank level0\" >&nbsp;</th>\n",
330 |        "      <th id=\"T_05286_level0_col0\" class=\"col_heading level0 col0\" >impressions_contribution</th>\n",
331 |        "      <th id=\"T_05286_level0_col1\" class=\"col_heading level0 col1\" >clicks_over_impressions_contribution</th>\n",
332 |        "      <th id=\"T_05286_level0_col2\" class=\"col_heading level0 col2\" >conversions_over_clicks_contribution</th>\n",
333 |        "      <th id=\"T_05286_level0_col3\" class=\"col_heading level0 col3\" >revenue_over_conversions_contribution</th>\n",
334 |        "    </tr>\n",
335 |        "    <tr>\n",
336 |        "      <th class=\"index_name level0\" >year</th>\n",
337 |        "      <th class=\"index_name level1\" >month</th>\n",
338 |        "      <th class=\"index_name level2\" >group</th>\n",
339 |        "      <th class=\"blank col0\" >&nbsp;</th>\n",
340 |        "      <th class=\"blank col1\" >&nbsp;</th>\n",
341 |        "      <th class=\"blank col2\" >&nbsp;</th>\n",
342 |        "      <th class=\"blank col3\" >&nbsp;</th>\n",
343 |        "    </tr>\n",
344 |        "  </thead>\n",
345 |        "  <tbody>\n",
346 |        "    <tr>\n",
347 |        "      <th id=\"T_05286_level0_row0\" class=\"row_heading level0 row0\" rowspan=\"6\">2019</th>\n",
348 |        "      <th id=\"T_05286_level1_row0\" class=\"row_heading level1 row0\" rowspan=\"3\">1</th>\n",
349 |        "      <th id=\"T_05286_level2_row0\" class=\"row_heading level2 row0\" >A</th>\n",
350 |        "      <td id=\"T_05286_row0_col0\" class=\"data row0 col0\" >$0.00</td>\n",
351 |        "      <td id=\"T_05286_row0_col1\" class=\"data row0 col1\" >-$1,720.00</td>\n",
352 |        "      <td id=\"T_05286_row0_col2\" class=\"data row0 col2\" >$4,586.67</td>\n",
353 |        "      <td id=\"T_05286_row0_col3\" class=\"data row0 col3\" >-$2,411.67</td>\n",
354 |        "    </tr>\n",
355 |        "    <tr>\n",
356 |        "      <th id=\"T_05286_level2_row1\" class=\"row_heading level2 row1\" >B</th>\n",
357 |        "      <td id=\"T_05286_row1_col0\" class=\"data row1 col0\" >$705.00</td>\n",
358 |        "      <td id=\"T_05286_row1_col1\" class=\"data row1 col1\" >$2,428.33</td>\n",
359 |        "      <td id=\"T_05286_row1_col2\" class=\"data row1 col2\" >-$3,446.67</td>\n",
360 |        "      <td id=\"T_05286_row1_col3\" class=\"data row1 col3\" >-$347.67</td>\n",
361 |        "    </tr>\n",
362 |        "    <tr>\n",
363 |        "      <th id=\"T_05286_level2_row2\" class=\"row_heading level2 row2\" >C</th>\n",
364 |        "      <td id=\"T_05286_row2_col0\" class=\"data row2 col0\" >-$2,150.00</td>\n",
365 |        "      <td id=\"T_05286_row2_col1\" class=\"data row2 col1\" >$8,600.00</td>\n",
366 |        "      <td id=\"T_05286_row2_col2\" class=\"data row2 col2\" >-$2,924.00</td>\n",
367 |        "      <td id=\"T_05286_row2_col3\" class=\"data row2 col3\" >-$4,129.00</td>\n",
368 |        "    </tr>\n",
369 |        "    <tr>\n",
370 |        "      <th id=\"T_05286_level1_row3\" class=\"row_heading level1 row3\" rowspan=\"3\">2</th>\n",
371 |        "      <th id=\"T_05286_level2_row3\" class=\"row_heading level2 row3\" >A</th>\n",
372 |        "      <td id=\"T_05286_row3_col0\" class=\"data row3 col0\" >$24,500.00</td>\n",
373 |        "      <td id=\"T_05286_row3_col1\" class=\"data row3 col1\" >$0.00</td>\n",
374 |        "      <td id=\"T_05286_row3_col2\" class=\"data row3 col2\" >$0.00</td>\n",
375 |        "      <td id=\"T_05286_row3_col3\" class=\"data row3 col3\" >$25,000.00</td>\n",
376 |        "    </tr>\n",
377 |        "    <tr>\n",
378 |        "      <th id=\"T_05286_level2_row4\" class=\"row_heading level2 row4\" >B</th>\n",
379 |        "      <td id=\"T_05286_row4_col0\" class=\"data row4 col0\" >$855.00</td>\n",
380 |        "      <td id=\"T_05286_row4_col1\" class=\"data row4 col1\" >$19.00</td>\n",
381 |        "      <td id=\"T_05286_row4_col2\" class=\"data row4 col2\" >-$1,254.00</td>\n",
382 |        "      <td id=\"T_05286_row4_col3\" class=\"data row4 col3\" >-$281.00</td>\n",
383 |        "    </tr>\n",
384 |        "    <tr>\n",
385 |        "      <th id=\"T_05286_level2_row5\" class=\"row_heading level2 row5\" >C</th>\n",
386 |        "      <td id=\"T_05286_row5_col0\" class=\"data row5 col0\" >-$1,750.00</td>\n",
387 |        "      <td id=\"T_05286_row5_col1\" class=\"data row5 col1\" >$4,200.00</td>\n",
388 |        "      <td id=\"T_05286_row5_col2\" class=\"data row5 col2\" >$420.00</td>\n",
389 |        "      <td id=\"T_05286_row5_col3\" class=\"data row5 col3\" >$527.00</td>\n",
390 |        "    </tr>\n",
391 |        "  </tbody>\n",
392 |        "</table>\n"
393 |       ],
394 |       "text/plain": [
395 |        "<pandas.io.formats.style.Styler at 0x1312121d0>"
396 |       ]
397 |      },
398 |      "execution_count": 3,
399 |      "metadata": {},
400 |      "output_type": "execute_result"
401 |     }
402 |    ],
403 |    "source": [
404 |     "import icanexplain as ice\n",
405 |     "\n",
406 |     "explainer = ice.FunnelExplainer(\n",
407 |     "    funnel=['impressions', 'clicks', 'conversions', 'revenue'],\n",
408 |     "    period='year',\n",
409 |     "    group=['month', 'group']\n",
410 |     ")\n",
411 |     "traffic = traffic.assign(\n",
412 |     "    month=traffic.date.dt.month,\n",
413 |     "    year=traffic.date.dt.year\n",
414 |     ")\n",
415 |     "explanation = explainer(traffic)\n",
416 |     "explanation.style.format(fmt_currency).set_properties(**{'text-align': 'right'})"
417 |    ]
418 |   },
419 |   {
420 |    "cell_type": "markdown",
421 |    "metadata": {},
422 |    "source": [
423 |     "This is powerful, because it allows us to understand the drivers of revenue growth. For example, between January 2018 and January 2019, revenue went up by $8,600 due an increase in clicks for group C. This is more insightful than just saying that their click rate went up."
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "markdown",
428 |    "metadata": {},
429 |    "source": [
430 |     "One thing to keep in mind is that contributions sum up to the overall difference between two periods. This means that it's easy to unit test that the contributions are correct:"
431 |    ]
432 |   },
433 |   {
434 |    "cell_type": "code",
435 |    "execution_count": 4,
436 |    "metadata": {
437 |     "execution": {
438 |      "iopub.execute_input": "2024-09-25T08:40:22.730483Z",
439 |      "iopub.status.busy": "2024-09-25T08:40:22.730381Z",
440 |      "iopub.status.idle": "2024-09-25T08:40:22.742096Z",
441 |      "shell.execute_reply": "2024-09-25T08:40:22.741750Z"
442 |     }
443 |    },
444 |    "outputs": [
445 |     {
446 |      "data": {
447 |       "text/html": [
448 |        "<style type=\"text/css\">\n",
449 |        "</style>\n",
450 |        "<table id=\"T_3fdca\">\n",
451 |        "  <thead>\n",
452 |        "    <tr>\n",
453 |        "      <th class=\"blank level0\" >&nbsp;</th>\n",
454 |        "      <th id=\"T_3fdca_level0_col0\" class=\"col_heading level0 col0\" >sum</th>\n",
455 |        "    </tr>\n",
456 |        "    <tr>\n",
457 |        "      <th class=\"index_name level0\" >year</th>\n",
458 |        "      <th class=\"blank col0\" >&nbsp;</th>\n",
459 |        "    </tr>\n",
460 |        "  </thead>\n",
461 |        "  <tbody>\n",
462 |        "    <tr>\n",
463 |        "      <th id=\"T_3fdca_level0_row0\" class=\"row_heading level0 row0\" >2019</th>\n",
464 |        "      <td id=\"T_3fdca_row0_col0\" class=\"data row0 col0\" >$51,427.00</td>\n",
465 |        "    </tr>\n",
466 |        "  </tbody>\n",
467 |        "</table>\n"
468 |       ],
469 |       "text/plain": [
470 |        "<pandas.io.formats.style.Styler at 0x12725abd0>"
471 |       ]
472 |      },
473 |      "execution_count": 4,
474 |      "metadata": {},
475 |      "output_type": "execute_result"
476 |     }
477 |    ],
478 |    "source": [
479 |     "(\n",
480 |     "    explanation\n",
481 |     "    .groupby('year').sum().sum(axis=1)\n",
482 |     "    .to_frame('sum')\n",
483 |     "    .style.format(fmt_currency)\n",
484 |     ")"
485 |    ]
486 |   },
487 |   {
488 |    "cell_type": "markdown",
489 |    "metadata": {},
490 |    "source": [
491 |     "Of course, it would be more interesting to apply this methodology to some real data. One example is the [Google Analytics dataset sample](https://developers.google.com/analytics/bigquery/web-ecommerce-demo-dataset) which is publicly available in BigQuery. "
492 |    ]
493 |   }
494 |  ],
495 |  "metadata": {
496 |   "kernelspec": {
497 |    "display_name": "Python 3",
498 |    "language": "python",
499 |    "name": "python3"
500 |   },
501 |   "language_info": {
502 |    "codemirror_mode": {
503 |     "name": "ipython",
504 |     "version": 3
505 |    },
506 |    "file_extension": ".py",
507 |    "mimetype": "text/x-python",
508 |    "name": "python",
509 |    "nbconvert_exporter": "python",
510 |    "pygments_lexer": "ipython3",
511 |    "version": "3.11.4"
512 |   }
513 |  },
514 |  "nbformat": 4,
515 |  "nbformat_minor": 2
516 | }
517 | 


--------------------------------------------------------------------------------
/docs/examples/iowa-whiskey-sales.md:
--------------------------------------------------------------------------------
  1 | # Iowa whiskey sales 🥃
  2 | 
  3 | Let's look at whiskey sales in Iowa. This is a subset of the data from the [Iowa Liquor Sales dataset](https://data.iowa.gov/Sales-Distribution/Iowa-Liquor-Sales/m3tr-qhgy).
  4 | 
  5 | 
  6 | ```python
  7 | import icanexplain as ice
  8 | 
  9 | sales = ice.datasets.load_iowa_whiskey_sales()
 10 | sales.head().style.format()
 11 | ```
 12 | 
 13 | 
 14 | 
 15 | 
 16 | <style type="text/css">
 17 | </style>
 18 | <table id="T_65390">
 19 |   <thead>
 20 |     <tr>
 21 |       <th class="blank level0" >&nbsp;</th>
 22 |       <th id="T_65390_level0_col0" class="col_heading level0 col0" >date</th>
 23 |       <th id="T_65390_level0_col1" class="col_heading level0 col1" >category</th>
 24 |       <th id="T_65390_level0_col2" class="col_heading level0 col2" >vendor</th>
 25 |       <th id="T_65390_level0_col3" class="col_heading level0 col3" >sales_amount</th>
 26 |       <th id="T_65390_level0_col4" class="col_heading level0 col4" >price_per_bottle</th>
 27 |       <th id="T_65390_level0_col5" class="col_heading level0 col5" >bottles_sold</th>
 28 |       <th id="T_65390_level0_col6" class="col_heading level0 col6" >bottle_volume_ml</th>
 29 |       <th id="T_65390_level0_col7" class="col_heading level0 col7" >year</th>
 30 |     </tr>
 31 |   </thead>
 32 |   <tbody>
 33 |     <tr>
 34 |       <th id="T_65390_level0_row0" class="row_heading level0 row0" >0</th>
 35 |       <td id="T_65390_row0_col0" class="data row0 col0" >2012-06-04</td>
 36 |       <td id="T_65390_row0_col1" class="data row0 col1" >CANADIAN WHISKIES</td>
 37 |       <td id="T_65390_row0_col2" class="data row0 col2" >CONSTELLATION WINE COMPANY, INC.</td>
 38 |       <td id="T_65390_row0_col3" class="data row0 col3" >94.020000</td>
 39 |       <td id="T_65390_row0_col4" class="data row0 col4" >15.670000</td>
 40 |       <td id="T_65390_row0_col5" class="data row0 col5" >6</td>
 41 |       <td id="T_65390_row0_col6" class="data row0 col6" >1750</td>
 42 |       <td id="T_65390_row0_col7" class="data row0 col7" >2012</td>
 43 |     </tr>
 44 |     <tr>
 45 |       <th id="T_65390_level0_row1" class="row_heading level0 row1" >1</th>
 46 |       <td id="T_65390_row1_col0" class="data row1 col0" >2016-01-05</td>
 47 |       <td id="T_65390_row1_col1" class="data row1 col1" >STRAIGHT BOURBON WHISKIES</td>
 48 |       <td id="T_65390_row1_col2" class="data row1 col2" >CAMPARI(SKYY)</td>
 49 |       <td id="T_65390_row1_col3" class="data row1 col3" >18.760000</td>
 50 |       <td id="T_65390_row1_col4" class="data row1 col4" >9.380000</td>
 51 |       <td id="T_65390_row1_col5" class="data row1 col5" >2</td>
 52 |       <td id="T_65390_row1_col6" class="data row1 col6" >375</td>
 53 |       <td id="T_65390_row1_col7" class="data row1 col7" >2016</td>
 54 |     </tr>
 55 |     <tr>
 56 |       <th id="T_65390_level0_row2" class="row_heading level0 row2" >2</th>
 57 |       <td id="T_65390_row2_col0" class="data row2 col0" >2016-05-25</td>
 58 |       <td id="T_65390_row2_col1" class="data row2 col1" >CANADIAN WHISKIES</td>
 59 |       <td id="T_65390_row2_col2" class="data row2 col2" >DIAGEO AMERICAS</td>
 60 |       <td id="T_65390_row2_col3" class="data row2 col3" >11.030000</td>
 61 |       <td id="T_65390_row2_col4" class="data row2 col4" >11.030000</td>
 62 |       <td id="T_65390_row2_col5" class="data row2 col5" >1</td>
 63 |       <td id="T_65390_row2_col6" class="data row2 col6" >300</td>
 64 |       <td id="T_65390_row2_col7" class="data row2 col7" >2016</td>
 65 |     </tr>
 66 |     <tr>
 67 |       <th id="T_65390_level0_row3" class="row_heading level0 row3" >3</th>
 68 |       <td id="T_65390_row3_col0" class="data row3 col0" >2016-01-20</td>
 69 |       <td id="T_65390_row3_col1" class="data row3 col1" >CANADIAN WHISKIES</td>
 70 |       <td id="T_65390_row3_col2" class="data row3 col2" >PHILLIPS BEVERAGE COMPANY</td>
 71 |       <td id="T_65390_row3_col3" class="data row3 col3" >33.840000</td>
 72 |       <td id="T_65390_row3_col4" class="data row3 col4" >11.280000</td>
 73 |       <td id="T_65390_row3_col5" class="data row3 col5" >3</td>
 74 |       <td id="T_65390_row3_col6" class="data row3 col6" >750</td>
 75 |       <td id="T_65390_row3_col7" class="data row3 col7" >2016</td>
 76 |     </tr>
 77 |     <tr>
 78 |       <th id="T_65390_level0_row4" class="row_heading level0 row4" >4</th>
 79 |       <td id="T_65390_row4_col0" class="data row4 col0" >2012-03-19</td>
 80 |       <td id="T_65390_row4_col1" class="data row4 col1" >CANADIAN WHISKIES</td>
 81 |       <td id="T_65390_row4_col2" class="data row4 col2" >CONSTELLATION WINE COMPANY, INC.</td>
 82 |       <td id="T_65390_row4_col3" class="data row4 col3" >94.020000</td>
 83 |       <td id="T_65390_row4_col4" class="data row4 col4" >15.670000</td>
 84 |       <td id="T_65390_row4_col5" class="data row4 col5" >6</td>
 85 |       <td id="T_65390_row4_col6" class="data row4 col6" >1750</td>
 86 |       <td id="T_65390_row4_col7" class="data row4 col7" >2012</td>
 87 |     </tr>
 88 |   </tbody>
 89 | </table>
 90 | 
 91 | 
 92 | 
 93 | 
 94 | The `sales_amount` column represents the bill a customer payed for a given transaction. We can sum it and group by year to see how the total sales amount evolves over time.
 95 | 
 96 | 
 97 | ```python
 98 | import locale
 99 | 
100 | locale.setlocale(locale.LC_MONETARY, 'en_US.UTF-8')
101 | def fmt_currency(x):
102 |     return locale.currency(x, grouping=True)
103 | 
104 | (
105 |     sales.groupby('year')['sales_amount']
106 |     .sum()
107 |     .to_frame()
108 |     .assign(diff=lambda x: x.diff())
109 |     .style.format(lambda x: fmt_currency(x) if x > 0 else '')
110 | )
111 | ```
112 | 
113 | 
114 | 
115 | 
116 | <style type="text/css">
117 | </style>
118 | <table id="T_a7fe0">
119 |   <thead>
120 |     <tr>
121 |       <th class="blank level0" >&nbsp;</th>
122 |       <th id="T_a7fe0_level0_col0" class="col_heading level0 col0" >sales_amount</th>
123 |       <th id="T_a7fe0_level0_col1" class="col_heading level0 col1" >diff</th>
124 |     </tr>
125 |     <tr>
126 |       <th class="index_name level0" >year</th>
127 |       <th class="blank col0" >&nbsp;</th>
128 |       <th class="blank col1" >&nbsp;</th>
129 |     </tr>
130 |   </thead>
131 |   <tbody>
132 |     <tr>
133 |       <th id="T_a7fe0_level0_row0" class="row_heading level0 row0" >2012</th>
134 |       <td id="T_a7fe0_row0_col0" class="data row0 col0" >$1,842,098.86</td>
135 |       <td id="T_a7fe0_row0_col1" class="data row0 col1" ></td>
136 |     </tr>
137 |     <tr>
138 |       <th id="T_a7fe0_level0_row1" class="row_heading level0 row1" >2016</th>
139 |       <td id="T_a7fe0_row1_col0" class="data row1 col0" >$2,298,505.88</td>
140 |       <td id="T_a7fe0_row1_col1" class="data row1 col1" >$456,407.02</td>
141 |     </tr>
142 |     <tr>
143 |       <th id="T_a7fe0_level0_row2" class="row_heading level0 row2" >2020</th>
144 |       <td id="T_a7fe0_row2_col0" class="data row2 col0" >$3,378,164.43</td>
145 |       <td id="T_a7fe0_row2_col1" class="data row2 col1" >$1,079,658.55</td>
146 |     </tr>
147 |   </tbody>
148 | </table>
149 | 
150 | 
151 | 
152 | 
153 | Ok, but why? Well, we can use icanexplain to break down the evolution into two effects:
154 | 
155 | 1. The inner effect: how much the average transaction value changed.
156 | 2. The mix effect: how much the number of transations changed.
157 | 
158 | 
159 | ```python
160 | import icanexplain as ice
161 | 
162 | explainer = ice.SumExplainer(
163 |     fact='sales_amount',
164 |     period='year',
165 |     group='category'
166 | )
167 | explanation = explainer(sales)
168 | (
169 |     explanation.style
170 |     .format(lambda x: fmt_currency(x) if x > 0 else '$0')
171 |     .set_properties(**{'text-align': 'right'})
172 | )
173 | ```
174 | 
175 | 
176 | 
177 | 
178 | <style type="text/css">
179 | #T_61d4a_row0_col0, #T_61d4a_row0_col1, #T_61d4a_row1_col0, #T_61d4a_row1_col1, #T_61d4a_row2_col0, #T_61d4a_row2_col1, #T_61d4a_row3_col0, #T_61d4a_row3_col1, #T_61d4a_row4_col0, #T_61d4a_row4_col1, #T_61d4a_row5_col0, #T_61d4a_row5_col1, #T_61d4a_row6_col0, #T_61d4a_row6_col1, #T_61d4a_row7_col0, #T_61d4a_row7_col1, #T_61d4a_row8_col0, #T_61d4a_row8_col1, #T_61d4a_row9_col0, #T_61d4a_row9_col1, #T_61d4a_row10_col0, #T_61d4a_row10_col1, #T_61d4a_row11_col0, #T_61d4a_row11_col1, #T_61d4a_row12_col0, #T_61d4a_row12_col1, #T_61d4a_row13_col0, #T_61d4a_row13_col1, #T_61d4a_row14_col0, #T_61d4a_row14_col1, #T_61d4a_row15_col0, #T_61d4a_row15_col1 {
180 |   text-align: right;
181 | }
182 | </style>
183 | <table id="T_61d4a">
184 |   <thead>
185 |     <tr>
186 |       <th class="blank" >&nbsp;</th>
187 |       <th class="blank level0" >&nbsp;</th>
188 |       <th id="T_61d4a_level0_col0" class="col_heading level0 col0" >inner</th>
189 |       <th id="T_61d4a_level0_col1" class="col_heading level0 col1" >mix</th>
190 |     </tr>
191 |     <tr>
192 |       <th class="index_name level0" >year</th>
193 |       <th class="index_name level1" >category</th>
194 |       <th class="blank col0" >&nbsp;</th>
195 |       <th class="blank col1" >&nbsp;</th>
196 |     </tr>
197 |   </thead>
198 |   <tbody>
199 |     <tr>
200 |       <th id="T_61d4a_level0_row0" class="row_heading level0 row0" rowspan="8">2016</th>
201 |       <th id="T_61d4a_level1_row0" class="row_heading level1 row0" >BLENDED WHISKIES</th>
202 |       <td id="T_61d4a_row0_col0" class="data row0 col0" >$17,854.43</td>
203 |       <td id="T_61d4a_row0_col1" class="data row0 col1" >$7,356.77</td>
204 |     </tr>
205 |     <tr>
206 |       <th id="T_61d4a_level1_row1" class="row_heading level1 row1" >CANADIAN WHISKIES</th>
207 |       <td id="T_61d4a_row1_col0" class="data row1 col0" >$0</td>
208 |       <td id="T_61d4a_row1_col1" class="data row1 col1" >$225,902.66</td>
209 |     </tr>
210 |     <tr>
211 |       <th id="T_61d4a_level1_row2" class="row_heading level1 row2" >CORN WHISKIES</th>
212 |       <td id="T_61d4a_row2_col0" class="data row2 col0" >$0</td>
213 |       <td id="T_61d4a_row2_col1" class="data row2 col1" >$4,113.90</td>
214 |     </tr>
215 |     <tr>
216 |       <th id="T_61d4a_level1_row3" class="row_heading level1 row3" >IRISH WHISKIES</th>
217 |       <td id="T_61d4a_row3_col0" class="data row3 col0" >$22,144.48</td>
218 |       <td id="T_61d4a_row3_col1" class="data row3 col1" >$75,122.83</td>
219 |     </tr>
220 |     <tr>
221 |       <th id="T_61d4a_level1_row4" class="row_heading level1 row4" >SCOTCH WHISKIES</th>
222 |       <td id="T_61d4a_row4_col0" class="data row4 col0" >$19,591.97</td>
223 |       <td id="T_61d4a_row4_col1" class="data row4 col1" >$0</td>
224 |     </tr>
225 |     <tr>
226 |       <th id="T_61d4a_level1_row5" class="row_heading level1 row5" >SINGLE BARREL BOURBON WHISKIES</th>
227 |       <td id="T_61d4a_row5_col0" class="data row5 col0" >$1,852.03</td>
228 |       <td id="T_61d4a_row5_col1" class="data row5 col1" >$6,375.43</td>
229 |     </tr>
230 |     <tr>
231 |       <th id="T_61d4a_level1_row6" class="row_heading level1 row6" >STRAIGHT BOURBON WHISKIES</th>
232 |       <td id="T_61d4a_row6_col0" class="data row6 col0" >$107,144.93</td>
233 |       <td id="T_61d4a_row6_col1" class="data row6 col1" >$97,934.50</td>
234 |     </tr>
235 |     <tr>
236 |       <th id="T_61d4a_level1_row7" class="row_heading level1 row7" >STRAIGHT RYE WHISKIES</th>
237 |       <td id="T_61d4a_row7_col0" class="data row7 col0" >$0</td>
238 |       <td id="T_61d4a_row7_col1" class="data row7 col1" >$0</td>
239 |     </tr>
240 |     <tr>
241 |       <th id="T_61d4a_level0_row8" class="row_heading level0 row8" rowspan="8">2020</th>
242 |       <th id="T_61d4a_level1_row8" class="row_heading level1 row8" >BLENDED WHISKIES</th>
243 |       <td id="T_61d4a_row8_col0" class="data row8 col0" >$83,342.60</td>
244 |       <td id="T_61d4a_row8_col1" class="data row8 col1" >$59,768.58</td>
245 |     </tr>
246 |     <tr>
247 |       <th id="T_61d4a_level1_row9" class="row_heading level1 row9" >CANADIAN WHISKIES</th>
248 |       <td id="T_61d4a_row9_col0" class="data row9 col0" >$224,022.62</td>
249 |       <td id="T_61d4a_row9_col1" class="data row9 col1" >$149,363.35</td>
250 |     </tr>
251 |     <tr>
252 |       <th id="T_61d4a_level1_row10" class="row_heading level1 row10" >CORN WHISKIES</th>
253 |       <td id="T_61d4a_row10_col0" class="data row10 col0" >$1,517.48</td>
254 |       <td id="T_61d4a_row10_col1" class="data row10 col1" >$1,453.26</td>
255 |     </tr>
256 |     <tr>
257 |       <th id="T_61d4a_level1_row11" class="row_heading level1 row11" >IRISH WHISKIES</th>
258 |       <td id="T_61d4a_row11_col0" class="data row11 col0" >$0</td>
259 |       <td id="T_61d4a_row11_col1" class="data row11 col1" >$67,344.41</td>
260 |     </tr>
261 |     <tr>
262 |       <th id="T_61d4a_level1_row12" class="row_heading level1 row12" >SCOTCH WHISKIES</th>
263 |       <td id="T_61d4a_row12_col0" class="data row12 col0" >$19,840.48</td>
264 |       <td id="T_61d4a_row12_col1" class="data row12 col1" >$0</td>
265 |     </tr>
266 |     <tr>
267 |       <th id="T_61d4a_level1_row13" class="row_heading level1 row13" >SINGLE BARREL BOURBON WHISKIES</th>
268 |       <td id="T_61d4a_row13_col0" class="data row13 col0" >$11,958.32</td>
269 |       <td id="T_61d4a_row13_col1" class="data row13 col1" >$3,819.27</td>
270 |     </tr>
271 |     <tr>
272 |       <th id="T_61d4a_level1_row14" class="row_heading level1 row14" >STRAIGHT BOURBON WHISKIES</th>
273 |       <td id="T_61d4a_row14_col0" class="data row14 col0" >$167,864.46</td>
274 |       <td id="T_61d4a_row14_col1" class="data row14 col1" >$268,064.74</td>
275 |     </tr>
276 |     <tr>
277 |       <th id="T_61d4a_level1_row15" class="row_heading level1 row15" >STRAIGHT RYE WHISKIES</th>
278 |       <td id="T_61d4a_row15_col0" class="data row15 col0" >$0</td>
279 |       <td id="T_61d4a_row15_col1" class="data row15 col1" >$64,056.43</td>
280 |     </tr>
281 |   </tbody>
282 | </table>
283 | 
284 | 
285 | 
286 | 
287 | For instance, we see that the average transation amount for blended whiskies contributed to an $17,854 increase in sales from 2012 to 2016. This is the inner effect. The mix effect for blended whiskies, on the other hand, contributed to a $7,356 increase in sales.
288 | 
289 | Here's another example: the mix effect of Canadian whiskies is $225,902. This value, the mix effect, represents the increase due to the number of extra sales for Canadian whiskies. The inner effect, on the other hand, is $0. This means that the average transaction value for Canadian whiskies did not change between 2012 and 2016, and therefore didn't contribute to the increase in sales.
290 | 
291 | A visual way to look interpret the above table is to use a waterfall chart. The idea is that the contributions sum to the difference between two periods. In this case, the difference in sales from 2012 to 2016 is $456,407. The waterfall chart shows how the inner and mix effects contributed to this difference.
292 | 
293 | 
294 | ```python
295 | explainer.plot(sales)
296 | ```
297 | 
298 | 
299 | 
300 | 
301 | 
302 | <style>
303 |   #altair-viz-87b425397ea04a6daa6efed068755f45.vega-embed {
304 |     width: 100%;
305 |     display: flex;
306 |   }
307 | 
308 |   #altair-viz-87b425397ea04a6daa6efed068755f45.vega-embed details,
309 |   #altair-viz-87b425397ea04a6daa6efed068755f45.vega-embed details summary {
310 |     position: relative;
311 |   }
312 | </style>
313 | <div id="altair-viz-87b425397ea04a6daa6efed068755f45"></div>
314 | <script type="text/javascript">
315 |   var VEGA_DEBUG = (typeof VEGA_DEBUG == "undefined") ? {} : VEGA_DEBUG;
316 |   (function(spec, embedOpt){
317 |     let outputDiv = document.currentScript.previousElementSibling;
318 |     if (outputDiv.id !== "altair-viz-87b425397ea04a6daa6efed068755f45") {
319 |       outputDiv = document.getElementById("altair-viz-87b425397ea04a6daa6efed068755f45");
320 |     }
321 |     const paths = {
322 |       "vega": "https://cdn.jsdelivr.net/npm/vega@5?noext",
323 |       "vega-lib": "https://cdn.jsdelivr.net/npm/vega-lib?noext",
324 |       "vega-lite": "https://cdn.jsdelivr.net/npm/vega-lite@5.17.0?noext",
325 |       "vega-embed": "https://cdn.jsdelivr.net/npm/vega-embed@6?noext",
326 |     };
327 | 
328 |     function maybeLoadScript(lib, version) {
329 |       var key = `${lib.replace("-", "")}_version`;
330 |       return (VEGA_DEBUG[key] == version) ?
331 |         Promise.resolve(paths[lib]) :
332 |         new Promise(function(resolve, reject) {
333 |           var s = document.createElement('script');
334 |           document.getElementsByTagName("head")[0].appendChild(s);
335 |           s.async = true;
336 |           s.onload = () => {
337 |             VEGA_DEBUG[key] = version;
338 |             return resolve(paths[lib]);
339 |           };
340 |           s.onerror = () => reject(`Error loading script: ${paths[lib]}`);
341 |           s.src = paths[lib];
342 |         });
343 |     }
344 | 
345 |     function showError(err) {
346 |       outputDiv.innerHTML = `<div class="error" style="color:red;">${err}</div>`;
347 |       throw err;
348 |     }
349 | 
350 |     function displayChart(vegaEmbed) {
351 |       vegaEmbed(outputDiv, spec, embedOpt)
352 |         .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));
353 |     }
354 | 
355 |     if(typeof define === "function" && define.amd) {
356 |       requirejs.config({paths});
357 |       require(["vega-embed"], displayChart, err => showError(`Error loading script: ${err.message}`));
358 |     } else {
359 |       maybeLoadScript("vega", "5")
360 |         .then(() => maybeLoadScript("vega-lite", "5.17.0"))
361 |         .then(() => maybeLoadScript("vega-embed", "6"))
362 |         .catch(showError)
363 |         .then(() => displayChart(vegaEmbed));
364 |     }
365 |   })({"config": {"view": {"continuousWidth": 300, "continuousHeight": 300}}, "layer": [{"data": {"name": "data-2cde2fd109a6e9acf70d318feaf217e6"}, "mark": {"type": "bar"}, "encoding": {"tooltip": [{"field": "total", "type": "quantitative"}], "x": {"field": "total", "type": "quantitative"}, "y": {"field": "label", "sort": null, "type": "ordinal"}}, "name": "view_1"}, {"data": {"name": "data-a926bcfd3d5a579ed72e157f328d0558"}, "mark": {"type": "bar"}, "encoding": {"color": {"field": "is_positive", "legend": null, "scale": {"domain": [true, false], "range": ["green", "red"]}, "type": "nominal"}, "tooltip": [{"field": "year", "type": "quantitative"}, {"field": "category", "type": "nominal"}, {"field": "kind", "type": "nominal"}, {"field": "impact", "type": "quantitative"}], "x": {"axis": {"title": "sales_amount"}, "field": "start", "type": "quantitative"}, "x2": {"field": "end"}, "y": {"axis": {"title": null}, "field": "label", "sort": null, "type": "ordinal"}}}, {"data": {"name": "data-fcdb480df466db1be3744c58e3506c03"}, "mark": {"type": "bar"}, "encoding": {"tooltip": [{"field": "total", "type": "quantitative"}], "x": {"field": "total", "type": "quantitative"}, "y": {"field": "label", "sort": null, "type": "ordinal"}}}, {"data": {"name": "data-873356aecfdc388476ccee465bcbebef"}, "mark": {"type": "bar"}, "encoding": {"color": {"field": "is_positive", "legend": null, "scale": {"domain": [true, false], "range": ["green", "red"]}, "type": "nominal"}, "tooltip": [{"field": "year", "type": "quantitative"}, {"field": "category", "type": "nominal"}, {"field": "kind", "type": "nominal"}, {"field": "impact", "type": "quantitative"}], "x": {"axis": {"title": "sales_amount"}, "field": "start", "type": "quantitative"}, "x2": {"field": "end"}, "y": {"axis": {"title": null}, "field": "label", "sort": null, "type": "ordinal"}}}, {"data": {"name": "data-244c8da070240dcdf05dfddf0ad096fd"}, "mark": {"type": "bar"}, "encoding": {"tooltip": [{"field": "total", "type": "quantitative"}], "x": {"field": "total", "type": "quantitative"}, "y": {"field": "label", "sort": null, "type": "ordinal"}}}], "params": [{"name": "param_1", "select": {"type": "interval", "encodings": ["x", "y"]}, "bind": "scales", "views": ["view_1"]}], "$schema": "https://vega.github.io/schema/vega-lite/v5.17.0.json", "datasets": {"data-2cde2fd109a6e9acf70d318feaf217e6": [{"label": [2012], "total": 1842098.859999999}], "data-a926bcfd3d5a579ed72e157f328d0558": [{"year": 2016, "category": "CANADIAN WHISKIES", "impact": 225902.657725558, "kind": "mix", "end": 2068001.5177255569, "start": 1842098.859999999, "label": "2016 \u2022 CANADIAN WHISKIES \u2022 mix", "is_positive": true}, {"year": 2016, "category": "STRAIGHT BOURBON WHISKIES", "impact": 107144.93012664506, "kind": "inner", "end": 2175146.4478522018, "start": 2068001.5177255569, "label": "2016 \u2022 STRAIGHT BOURBON WHISKIES \u2022 inner", "is_positive": true}, {"year": 2016, "category": "STRAIGHT BOURBON WHISKIES", "impact": 97934.49987335323, "kind": "mix", "end": 2273080.9477255554, "start": 2175146.4478522018, "label": "2016 \u2022 STRAIGHT BOURBON WHISKIES \u2022 mix", "is_positive": true}, {"year": 2016, "category": "IRISH WHISKIES", "impact": 75122.82523437538, "kind": "mix", "end": 2348203.772959931, "start": 2273080.9477255554, "label": "2016 \u2022 IRISH WHISKIES \u2022 mix", "is_positive": true}, {"year": 2016, "category": "IRISH WHISKIES", "impact": 22144.48476562564, "kind": "inner", "end": 2370348.2577255564, "start": 2348203.772959931, "label": "2016 \u2022 IRISH WHISKIES \u2022 inner", "is_positive": true}, {"year": 2016, "category": "SCOTCH WHISKIES", "impact": 19591.969612402725, "kind": "inner", "end": 2389940.227337959, "start": 2370348.2577255564, "label": "2016 \u2022 SCOTCH WHISKIES \u2022 inner", "is_positive": true}, {"year": 2016, "category": "BLENDED WHISKIES", "impact": 17854.42684012496, "kind": "inner", "end": 2407794.654178084, "start": 2389940.227337959, "label": "2016 \u2022 BLENDED WHISKIES \u2022 inner", "is_positive": true}, {"year": 2016, "category": "BLENDED WHISKIES", "impact": 7356.77315987678, "kind": "mix", "end": 2415151.427337961, "start": 2407794.654178084, "label": "2016 \u2022 BLENDED WHISKIES \u2022 mix", "is_positive": true}, {"year": 2016, "category": "SINGLE BARREL BOURBON WHISKIES", "impact": 6375.427184466021, "kind": "mix", "end": 2421526.8545224266, "start": 2415151.427337961, "label": "2016 \u2022 SINGLE BARREL BOURBON WHISKIES \u2022 mix", "is_positive": true}, {"year": 2016, "category": "CORN WHISKIES", "impact": 4113.9000000000015, "kind": "mix", "end": 2425640.754522427, "start": 2421526.8545224266, "label": "2016 \u2022 CORN WHISKIES \u2022 mix", "is_positive": true}, {"year": 2016, "category": "SINGLE BARREL BOURBON WHISKIES", "impact": 1852.0328155339826, "kind": "inner", "end": 2427492.7873379607, "start": 2425640.754522427, "label": "2016 \u2022 SINGLE BARREL BOURBON WHISKIES \u2022 inner", "is_positive": true}, {"year": 2016, "category": "CORN WHISKIES", "impact": 0.0, "kind": "inner", "end": 2427492.7873379607, "start": 2427492.7873379607, "label": "2016 \u2022 CORN WHISKIES \u2022 inner", "is_positive": false}, {"year": 2016, "category": "SCOTCH WHISKIES", "impact": -13570.609612403085, "kind": "mix", "end": 2413922.1777255577, "start": 2427492.7873379607, "label": "2016 \u2022 SCOTCH WHISKIES \u2022 mix", "is_positive": false}, {"year": 2016, "category": "CANADIAN WHISKIES", "impact": -22278.517725560992, "kind": "inner", "end": 2391643.659999997, "start": 2413922.1777255577, "label": "2016 \u2022 CANADIAN WHISKIES \u2022 inner", "is_positive": false}, {"year": 2016, "category": "STRAIGHT RYE WHISKIES", "impact": -23929.320350877173, "kind": "inner", "end": 2367714.3396491194, "start": 2391643.659999997, "label": "2016 \u2022 STRAIGHT RYE WHISKIES \u2022 inner", "is_positive": false}, {"year": 2016, "category": "STRAIGHT RYE WHISKIES", "impact": -69208.45964912252, "kind": "mix", "end": 2298505.879999997, "start": 2367714.3396491194, "label": "2016 \u2022 STRAIGHT RYE WHISKIES \u2022 mix", "is_positive": false}], "data-fcdb480df466db1be3744c58e3506c03": [{"label": [2016], "total": 2298505.879999997}], "data-873356aecfdc388476ccee465bcbebef": [{"year": 2020, "category": "STRAIGHT BOURBON WHISKIES", "impact": 268064.7402925044, "kind": "mix", "end": 2566570.6202925015, "start": 2298505.879999997, "label": "2020 \u2022 STRAIGHT BOURBON WHISKIES \u2022 mix", "is_positive": true}, {"year": 2020, "category": "CANADIAN WHISKIES", "impact": 224022.61605993344, "kind": "inner", "end": 2790593.236352435, "start": 2566570.6202925015, "label": "2020 \u2022 CANADIAN WHISKIES \u2022 inner", "is_positive": true}, {"year": 2020, "category": "STRAIGHT BOURBON WHISKIES", "impact": 167864.45970749695, "kind": "inner", "end": 2958457.696059932, "start": 2790593.236352435, "label": "2020 \u2022 STRAIGHT BOURBON WHISKIES \u2022 inner", "is_positive": true}, {"year": 2020, "category": "CANADIAN WHISKIES", "impact": 149363.35394006473, "kind": "mix", "end": 3107821.0499999966, "start": 2958457.696059932, "label": "2020 \u2022 CANADIAN WHISKIES \u2022 mix", "is_positive": true}, {"year": 2020, "category": "BLENDED WHISKIES", "impact": 83342.5971874109, "kind": "inner", "end": 3191163.6471874076, "start": 3107821.0499999966, "label": "2020 \u2022 BLENDED WHISKIES \u2022 inner", "is_positive": true}, {"year": 2020, "category": "IRISH WHISKIES", "impact": 67344.40679665783, "kind": "mix", "end": 3258508.0539840655, "start": 3191163.6471874076, "label": "2020 \u2022 IRISH WHISKIES \u2022 mix", "is_positive": true}, {"year": 2020, "category": "STRAIGHT RYE WHISKIES", "impact": 64056.431595091875, "kind": "mix", "end": 3322564.4855791572, "start": 3258508.0539840655, "label": "2020 \u2022 STRAIGHT RYE WHISKIES \u2022 mix", "is_positive": true}, {"year": 2020, "category": "BLENDED WHISKIES", "impact": 59768.5828125906, "kind": "mix", "end": 3382333.068391748, "start": 3322564.4855791572, "label": "2020 \u2022 BLENDED WHISKIES \u2022 mix", "is_positive": true}, {"year": 2020, "category": "SCOTCH WHISKIES", "impact": 19840.47750433307, "kind": "inner", "end": 3402173.545896081, "start": 3382333.068391748, "label": "2020 \u2022 SCOTCH WHISKIES \u2022 inner", "is_positive": true}, {"year": 2020, "category": "SINGLE BARREL BOURBON WHISKIES", "impact": 11958.31739495796, "kind": "inner", "end": 3414131.8632910387, "start": 3402173.545896081, "label": "2020 \u2022 SINGLE BARREL BOURBON WHISKIES \u2022 inner", "is_positive": true}, {"year": 2020, "category": "SINGLE BARREL BOURBON WHISKIES", "impact": 3819.2726050420133, "kind": "mix", "end": 3417951.1358960806, "start": 3414131.8632910387, "label": "2020 \u2022 SINGLE BARREL BOURBON WHISKIES \u2022 mix", "is_positive": true}, {"year": 2020, "category": "CORN WHISKIES", "impact": 1517.4805128205116, "kind": "inner", "end": 3419468.6164089013, "start": 3417951.1358960806, "label": "2020 \u2022 CORN WHISKIES \u2022 inner", "is_positive": true}, {"year": 2020, "category": "CORN WHISKIES", "impact": 1453.2594871794872, "kind": "mix", "end": 3420921.8758960804, "start": 3419468.6164089013, "label": "2020 \u2022 CORN WHISKIES \u2022 mix", "is_positive": true}, {"year": 2020, "category": "STRAIGHT RYE WHISKIES", "impact": -9839.091595091852, "kind": "inner", "end": 3411082.7843009885, "start": 3420921.8758960804, "label": "2020 \u2022 STRAIGHT RYE WHISKIES \u2022 inner", "is_positive": false}, {"year": 2020, "category": "IRISH WHISKIES", "impact": -14048.436796657195, "kind": "inner", "end": 3397034.3475043317, "start": 3411082.7843009885, "label": "2020 \u2022 IRISH WHISKIES \u2022 inner", "is_positive": false}, {"year": 2020, "category": "SCOTCH WHISKIES", "impact": -18869.91750433277, "kind": "mix", "end": 3378164.429999999, "start": 3397034.3475043317, "label": "2020 \u2022 SCOTCH WHISKIES \u2022 mix", "is_positive": false}], "data-244c8da070240dcdf05dfddf0ad096fd": [{"label": [2020], "total": 3378164.4299999992}]}}, {"mode": "vega-lite"});
366 | </script>
367 | 
368 | 
369 | 


--------------------------------------------------------------------------------