├── .github
├── dependabot.yml
└── workflows
│ └── main.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── CHANGELOG.rst
├── LICENSES
├── LICENSE
└── LICENSE_ASTROML.rst
├── README.md
├── docs
├── Makefile
├── api
│ ├── hepstats.hypotests.calculators.asymptotic_calculator.rst
│ ├── hepstats.hypotests.calculators.basecalculator.rst
│ ├── hepstats.hypotests.calculators.frequentist_calculator.rst
│ ├── hepstats.hypotests.calculators.rst
│ ├── hepstats.hypotests.core.basetest.rst
│ ├── hepstats.hypotests.core.confidence_interval.rst
│ ├── hepstats.hypotests.core.discovery.rst
│ ├── hepstats.hypotests.core.rst
│ ├── hepstats.hypotests.core.upperlimit.rst
│ ├── hepstats.hypotests.exceptions.rst
│ ├── hepstats.hypotests.hypotests_object.rst
│ ├── hepstats.hypotests.parameters.rst
│ ├── hepstats.hypotests.rst
│ ├── hepstats.hypotests.toyutils.rst
│ ├── hepstats.modeling.bayesian_blocks.rst
│ ├── hepstats.modeling.rst
│ ├── hepstats.rst
│ ├── hepstats.splot.exceptions.rst
│ ├── hepstats.splot.rst
│ ├── hepstats.splot.sweights.rst
│ ├── hepstats.splot.warnings.rst
│ ├── hepstats.utils.fit.api_check.rst
│ ├── hepstats.utils.fit.diverse.rst
│ ├── hepstats.utils.fit.rst
│ ├── hepstats.utils.fit.sampling.rst
│ ├── hepstats.utils.rst
│ ├── hepstats.version.rst
│ ├── hypotests.rst
│ ├── index.rst
│ ├── modeling.rst
│ ├── modules.rst
│ ├── splot.rst
│ └── utils.rst
├── bib
│ └── references.bib
├── bibliography.rst
├── conf.py
├── getting_started
│ ├── hypotests.rst
│ ├── index.rst
│ ├── modeling.rst
│ └── splot.rst
├── images
│ ├── logo.pdf
│ ├── logo.png
│ ├── logo.xcf
│ ├── logo_medium.png
│ └── logo_small.png
├── index.rst
├── make.bat
├── make_docs.sh
└── whats_new.rst
├── environment.yml
├── notebooks
├── README.md
├── hypotests
│ ├── FC_interval_asy.ipynb
│ ├── FC_interval_freq.ipynb
│ ├── Simultaneous_fit_discovery_splot.ipynb
│ ├── __init__.py
│ ├── asy_ci.png
│ ├── asy_ul.png
│ ├── confidenceinterval_asy_zfit.ipynb
│ ├── confidenceinterval_freq_zfit.ipynb
│ ├── counting.ipynb
│ ├── discovery_asy_zfit.ipynb
│ ├── discovery_freq_zfit.ipynb
│ ├── toys
│ │ ├── FC_toys_-1.0.yml
│ │ ├── FC_toys_-2.0.npz
│ │ ├── FC_toys_-2.0.yml
│ │ ├── FC_toys_-3.0.yml
│ │ ├── FC_toys_-4.0.yml
│ │ ├── FC_toys_-5.0.yml
│ │ ├── FC_toys_-6.0.yml
│ │ ├── FC_toys_0.0.yml
│ │ ├── FC_toys_1.0.yml
│ │ ├── FC_toys_2.0.yml
│ │ ├── FC_toys_3.0.yml
│ │ ├── FC_toys_4.0.yml
│ │ ├── FC_toys_5.0.yml
│ │ ├── FC_toys_6.0.yml
│ │ ├── ci_freq_zfit_toys.yml
│ │ ├── discovery_freq_zfit_toys.yml
│ │ └── upperlimit_freq_zfit_toys.yml
│ ├── upperlimit_asy_zfit.ipynb
│ ├── upperlimit_freq_zfit.ipynb
│ └── utils.py
├── modeling
│ ├── bayesian_blocks.ipynb
│ ├── bayesian_blocks_example.png
│ ├── hists_2LP.png
│ ├── hists_MuPT.png
│ └── hists_jPT.png
└── splots
│ ├── splot_example.ipynb
│ ├── splot_example_2.ipynb
│ └── utils.py
├── pyproject.toml
├── src
└── hepstats
│ ├── __init__.py
│ ├── hypotests
│ ├── README.md
│ ├── __init__.py
│ ├── calculators
│ │ ├── __init__.py
│ │ ├── asymptotic_calculator.py
│ │ ├── basecalculator.py
│ │ └── frequentist_calculator.py
│ ├── core
│ │ ├── __init__.py
│ │ ├── basetest.py
│ │ ├── confidence_interval.py
│ │ ├── discovery.py
│ │ └── upperlimit.py
│ ├── exceptions.py
│ ├── hypotests_object.py
│ ├── parameters.py
│ └── toyutils.py
│ ├── modeling
│ ├── __init__.py
│ └── bayesian_blocks.py
│ ├── splot
│ ├── __init__.py
│ ├── exceptions.py
│ ├── sweights.py
│ └── warnings.py
│ └── utils
│ ├── __init__.py
│ └── fit
│ ├── __init__.py
│ ├── api_check.py
│ ├── diverse.py
│ └── sampling.py
└── tests
├── __init__.py
├── conftest.py
├── hypotests
├── data
│ ├── cls_pvalues.npz
│ └── clsb_pvalues.npz
├── test_basetest.py
├── test_calculators.py
├── test_confidence_intervals.py
├── test_discovery.py
├── test_parameters.py
├── test_toysutils.py
└── test_upperlimit.py
├── modeling
├── __init__.py
├── data
│ └── answers_bayesian_blocks.npz
└── test_bayesianblocks.py
└── splots
└── test_splots.py
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: "github-actions"
4 | directory: "/"
5 | schedule:
6 | interval: "monthly"
7 |
--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | concurrency:
4 | group: ${{ github.ref }}
5 | cancel-in-progress: true
6 |
7 | on:
8 | pull_request:
9 | push:
10 | branches: [ "main"]
11 | release:
12 | types:
13 | - "published"
14 |
15 | jobs:
16 |
17 | pre-commit:
18 | name: Format
19 | runs-on: ubuntu-latest
20 | steps:
21 | - uses: actions/checkout@v4
22 | with:
23 | fetch-depth: 0
24 | - uses: actions/setup-python@v5
25 |
26 | checks:
27 | runs-on: ${{ matrix.os }}
28 | strategy:
29 | fail-fast: false
30 | matrix:
31 | os:
32 | - ubuntu-latest
33 | python-version:
34 | - "3.9"
35 | - "3.12"
36 | - "3.13"
37 | include:
38 | - os: windows-latest
39 | python-version: "3.9"
40 | - os: macos-13
41 | python-version: "3.9"
42 | - os: macos-latest
43 | python-version: "3.12" # old versions not supported
44 | name: Check Python ${{ matrix.python-version }} ${{ matrix.os }}
45 | steps:
46 | - uses: actions/checkout@v4
47 | with:
48 | fetch-depth: 0
49 | - name: Setup Python ${{ matrix.python-version }}
50 | uses: actions/setup-python@v5
51 | with:
52 | python-version: ${{ matrix.python-version }}
53 |
54 | - name: Install package
55 | run: |
56 | pip install uv
57 | uv pip install --system -e .[test] pytest-xdist # for multiprocessing, -e needed for pathes etc.
58 |
59 | - name: Test package
60 | run: python -m pytest --doctest-modules --cov=hepstats --cov-report=xml -n auto
61 |
62 | - name: Upload coverage to Codecov
63 | if: matrix.python-version == '3.9' && matrix.os == 'ubuntu-latest'
64 | uses: codecov/codecov-action@v5
65 | with:
66 | token: ${{ secrets.CODECOV_TOKEN }} # technically not needed, but prevents failures: https://community.codecov.com/t/upload-issues-unable-to-locate-build-via-github-actions-api/3954
67 | file: ./coverage.xml
68 | flags: unittests
69 | name: codecov-umbrella
70 | fail_ci_if_error: false # flaky upload...
71 |
72 | dist:
73 | runs-on: ubuntu-latest
74 | steps:
75 | - uses: actions/checkout@v4
76 | with:
77 | fetch-depth: 0
78 |
79 | - name: Build
80 | run: pipx run build
81 |
82 | - uses: actions/upload-artifact@v4
83 | with:
84 | path: dist/*
85 |
86 | - name: Check metadata
87 | run: pipx run twine check dist/*
88 |
89 | docs:
90 | runs-on: ubuntu-latest
91 | steps:
92 | - uses: actions/checkout@v4
93 | with:
94 | fetch-depth: 0
95 |
96 | - name: Setup Python 3.9
97 | uses: actions/setup-python@v5
98 | with:
99 | python-version: 3.9
100 |
101 | - name: Install dependencies
102 | run: |
103 | pip install uv
104 | uv pip install --system -e .[docs]
105 | - name: build docs
106 | run: |
107 | sphinx-build -b html docs docs/_build/html
108 | touch docs/_build/html/.nojekyll
109 |
110 | - name: Deploy docs to GitHub Pages
111 | if: success() && github.event_name == 'push' && github.ref == 'refs/heads/main'
112 | uses: peaceiris/actions-gh-pages@v4
113 | with:
114 | github_token: ${{ secrets.GITHUB_TOKEN }}
115 | publish_dir: docs/_build/html
116 | force_orphan: true
117 | user_name: 'github-actions[bot]'
118 | user_email: 'github-actions[bot]@users.noreply.github.com'
119 | commit_message: Deploy to GitHub pages
120 |
121 | publish:
122 | needs: [ dist ]
123 | environment: pypi
124 | permissions:
125 | id-token: write
126 | runs-on: ubuntu-latest
127 | if: github.event_name == 'release' && github.event.action == 'published'
128 | steps:
129 | - uses: actions/download-artifact@v4
130 | with:
131 | name: artifact
132 | path: dist
133 |
134 | - uses: pypa/gh-action-pypi-publish@release/v1
135 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *checkpoint*
3 | *egg*
4 | build/
5 | dist/
6 | .tox/
7 | .python-version
8 | .ipynb_checkpoints
9 | .pytest_cache
10 | *.gv*
11 | /src/hepstats/version.py
12 | /.mypy_cache/*
13 | /pip-wheel-metadata
14 | /docs/_build/*
15 | /docs/source/*
16 | /_build/**
17 | /.idea/**
18 | /src/hepstats/_version.py
19 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/.gitmodules
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | ci:
2 | autoupdate_schedule: quarterly
3 |
4 | repos:
5 | - repo: https://github.com/pre-commit/pre-commit-hooks
6 | rev: v5.0.0
7 | hooks:
8 | - id: check-added-large-files
9 | args: [ '--maxkb=1000' ]
10 | - id: mixed-line-ending
11 | exclude: ^notebooks/
12 | - id: trailing-whitespace
13 | exclude: ^notebooks/
14 | - id: check-merge-conflict
15 | - id: check-case-conflict
16 | - id: check-symlinks
17 | - id: check-yaml
18 | exclude: ^notebooks/
19 | - id: requirements-txt-fixer
20 | - id: debug-statements
21 | - id: end-of-file-fixer
22 | # - repo: https://github.com/mgedmin/check-manifest
23 | # rev: "0.50"
24 | # hooks:
25 | # - id: check-manifest
26 | # args:
27 | # - --update
28 | # - --no-build-isolation
29 | # additional_dependencies:
30 | # - hatchling
31 | # - hatch-vcs
32 |
33 | - repo: https://github.com/pre-commit/mirrors-mypy
34 | rev: v1.15.0
35 | hooks:
36 | - id: mypy
37 | files: src
38 |
39 | - repo: https://github.com/roy-ht/pre-commit-jupyter
40 | rev: v1.2.1
41 | hooks:
42 | - id: jupyter-notebook-cleanup
43 |
44 | - repo: https://github.com/pre-commit/pygrep-hooks
45 | rev: v1.10.0
46 | hooks:
47 | - id: python-use-type-annotations
48 | - id: python-check-mock-methods
49 | - id: python-no-eval
50 | - id: rst-backticks
51 | - id: rst-directive-colons
52 |
53 | - repo: https://github.com/asottile/pyupgrade
54 | rev: v3.19.1
55 | hooks:
56 | - id: pyupgrade
57 | args: [ --py39-plus ]
58 |
59 | - repo: https://github.com/asottile/setup-cfg-fmt
60 | rev: v2.8.0
61 | hooks:
62 | - id: setup-cfg-fmt
63 | args: [ --max-py-version=3.13, --include-version-classifiers ]
64 |
65 | # Notebook formatting
66 | - repo: https://github.com/nbQA-dev/nbQA
67 | rev: 1.9.1
68 | hooks:
69 |
70 | - id: nbqa-pyupgrade
71 | additional_dependencies: [ pyupgrade ]
72 | args: [ --py39-plus ]
73 |
74 |
75 | - repo: https://github.com/roy-ht/pre-commit-jupyter
76 | rev: v1.2.1
77 | hooks:
78 | - id: jupyter-notebook-cleanup
79 |
80 | - repo: https://github.com/sondrelg/pep585-upgrade
81 | rev: 'v1.0'
82 | hooks:
83 | - id: upgrade-type-hints
84 | args: [ '--futures=true' ]
85 |
86 |
87 | - repo: https://github.com/dannysepler/rm_unneeded_f_str
88 | rev: v0.2.0
89 | hooks:
90 | - id: rm-unneeded-f-str
91 |
92 | - repo: https://github.com/python-jsonschema/check-jsonschema
93 | rev: 0.32.1
94 | hooks:
95 | - id: check-github-workflows
96 | - id: check-github-actions
97 | - id: check-dependabot
98 | - id: check-readthedocs
99 |
100 | - repo: https://github.com/MarcoGorelli/auto-walrus
101 | rev: 0.3.4
102 | hooks:
103 | - id: auto-walrus
104 |
105 | - repo: https://github.com/astral-sh/ruff-pre-commit
106 | rev: "v0.11.4"
107 | hooks:
108 | - id: ruff
109 | types_or: [ python, pyi, jupyter ]
110 | args: [ --fix, --unsafe-fixes, --show-fixes , --line-length=120 ]
111 | # Run the formatter.
112 | - id: ruff-format
113 | types_or: [ python, pyi, jupyter ]
114 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | # Read the Docs configuration file
2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
3 |
4 | version: 2
5 |
6 | # Build documentation in the docs/ directory with Sphinx
7 | sphinx:
8 | configuration: docs/conf.py
9 |
10 | build:
11 | os: ubuntu-22.04
12 | tools:
13 | python: "3.11"
14 |
15 | python:
16 | install:
17 | - method: pip
18 | path: .
19 | extra_requirements:
20 | - docs
21 |
--------------------------------------------------------------------------------
/CHANGELOG.rst:
--------------------------------------------------------------------------------
1 | Changelog
2 | =========
3 |
4 | main
5 | *************
6 |
7 | Version 0.9.2
8 | **************
9 |
10 | * fix wrong import with optional dependencies
11 |
12 | Version 0.9.1
13 | **************
14 |
15 | * fix dumping of fitresult in test, require ASDF version < 1.6.0 in writing to file
16 | * fix sampling of model in FrequentistCalculator with simultaneous fits
17 |
18 |
19 | Version 0.9.0
20 | **************
21 |
22 | * Add support for Python 3.13
23 |
24 | Version 0.8.1
25 | **************
26 |
27 | * Add support for Python 3.12, drop support for Python 3.8
28 | * Improved support for zfit 0.20+
29 |
30 | Thanks to @MoritzNeuberger for finding and proposing a hypothesis test fix.
31 |
32 | Version 0.7.0
33 | *************
34 |
35 | * Add support for Python 3.11, drop support for Python 3.7
36 |
37 | Version 0.6.1
38 | *************
39 |
40 | * fix toy generation with constraints
41 |
42 | Version 0.6.0
43 | *************
44 |
45 | * Upgrade to Python 3.10 and zfit >= 0.10.0
46 | * Enhanced speed toy limit calculation
47 | * Add multidimensionl PDF support
48 | * Add support for binned data and models
49 |
50 | Version 0.5.0
51 | *************
52 | * Upgrade to Python 3.9 and drop support for 3.6
53 |
54 | Version 0.4.0
55 | *************
56 | * loss: upgrade API to use ``create_new`` to make sure that the losses are comparable. Compatible with zfit 0.6.4+
57 |
58 | Version 0.3.1
59 | *************
60 | * sPlot: Increase the tolerance of the sanity check from 1e-3 to 5e-2, if above the tolerance a ModelNotFittedToData
61 | exception is raised. In addition if the the check is above the 5e-3 tolerance a warning message is printed.
62 |
63 |
64 | Version 0.3.0
65 | *************
66 | * New documentation style
67 | * **hepstats** can now do hypothesis tests, and compute upper limits and confidence intervals for counting analysis
68 | * Progess bars are used to see the progression of the generation of the toys
69 |
70 | Version 0.2.5
71 | *************
72 | * ConfidenceInterval can compute Feldman and Cousin intervals with boundaries (i.e ``qtilde=True``)
73 | * **AsymptoticCalculator** asymov weights are now scaled to the number of entries in dataset from loss
74 | function if the loss is not extended
75 | * **hepstats.hypotests** can now be used even if there is no nuisances. The **pll** function in **utils/fit/diverse.py**
76 | had to be modified such that if there are no nuisances, the **pll** function returns the value of the loss function.
77 | * add notebooks demos for FC intervals with the ``FrequentistCalculator`` and ``AsymptoticCalculator``.
78 | * add warnings when multiple roots are found in ``ConfidenceInterval``
79 | * move toys .yml files from notebook to notebook/toys
80 |
81 | Version 0.2.4
82 | *************
83 | * Redesigned packaging system, GHA deployment.
84 | * **expected_poi** removed from **BaseCalculator** and **AsymptoticCalculator**
85 | * add type checks in the **hypotests** submodule
86 |
87 | Version 0.2.3
88 | **************
89 | * **hepstats** is now compatible with zfit > 0.5 api
90 | * expected intervals in upper limit are now calculated from the pvalues and not from the **expected_poi**
91 | function anymore.
92 |
93 | Version 0.2.2
94 | **************
95 | * Addition of the **sPlot** algorithm
96 |
97 | Version 0.2.1
98 | **************
99 | * Addition of the **FrequentistCalculator** to performs hypothesis test, upper limit and interval calculations
100 | with toys. Toys can be saved and loaded in / from yaml files using the methods:
101 |
102 | * ``to_yaml``
103 | * ``from_yaml``
104 |
105 | Version 0.2.0
106 | **************
107 | * New version for the new **hepstats** name of the package
108 |
109 | Version 0.1.3
110 | **************
111 | * Package name changed from **scikit*stats** to **hepstats**
112 |
113 | Version 0.1.2
114 | **************
115 | * Additions of classes to compute upper limits and confidence intervals.
116 |
117 | Version 0.1.1
118 | **************
119 | * Release for Zenodo DOI
120 |
121 | Version 0.1.0
122 | **************
123 | * First release of **scikit*stats**
124 | * Addition of the **modeling** submodule with the ``Bayesian Blocks algorithm``
125 | * Addition of the **hypotests** submodule
126 |
--------------------------------------------------------------------------------
/LICENSES/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2019-2025, The Scikit-HEP Administrators
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | 3. Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/LICENSES/LICENSE_ASTROML.rst:
--------------------------------------------------------------------------------
1 | https://github.com/astroML/astroML
2 |
3 | Copyright (c) 2012-2013, Jacob Vanderplas
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
7 |
8 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
9 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10 |
11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | # `hepstats` package: statistics tools and utilities
5 |
6 | [![Scikit-HEP][sk-badge]](https://scikit-hep.org/)
7 |
8 | [](https://pypi.org/project/hepstats/)
9 | [](https://anaconda.org/conda-forge/hepstats)
10 | [](https://pypi.org/project/hepstats/)
11 | [](https://doi.org/10.5281/zenodo.3519200)
12 |
13 | [](https://github.com/scikit-hep/hepstats/actions)
14 | [](https://codecov.io/gh/scikit-hep/hepstats?branch=main)
15 | [](https://github.com/psf/black)
16 |
17 | [](https://mybinder.org/v2/gh/scikit-hep/hepstats/main)
18 |
19 | hepstats is a library for statistical inference aiming to cover the needs High Energy Physics.
20 | It is part of the [Scikit-HEP project](https://scikit-hep.org/).
21 |
22 | **Questions**: for usage questions, use [StackOverflow with the hepstats tag](https://stackoverflow.com/questions/ask?tags=hepstats)
23 | **Bugs and odd behavior**: open [an issue with hepstats](https://github.com/scikit-hep/hepstats/issues/new)
24 |
25 | ## Installation
26 |
27 | Install `hepstats` like any other Python package:
28 |
29 | ```
30 | pip install hepstats
31 | ```
32 |
33 | or similar (use e.g. `virtualenv` if you wish).
34 |
35 | ## Changelog
36 | See the [changelog](https://github.com/scikit-hep/hepstats/blob/main/CHANGELOG.md) for a history of notable changes.
37 |
38 | ## Getting Started
39 |
40 | The `hepstats` module includes `modeling`, `hypotests` and `splot` submodules. This a quick user guide to each submodule. The [binder](https://mybinder.org/v2/gh/scikit-hep/hepstats/main) examples are also a good way to get started.
41 |
42 | ### modeling
43 |
44 | The modeling submodule includes the [Bayesian Block algorithm](https://arxiv.org/pdf/1207.5578.pdf) that can be used to improve the binning of histograms. The visual improvement can be dramatic, and more importantly, this algorithm produces histograms that accurately represent the underlying distribution while being robust to statistical fluctuations. Here is a small example of the algorithm applied on Laplacian sampled data, compared to a histogram of this sample with a fine binning.
45 |
46 | ```python
47 | >>> import numpy as np
48 | >>> import matplotlib.pyplot as plt
49 | >>> from hepstats.modeling import bayesian_blocks
50 |
51 | >>> data = np.random.laplace(size=10000)
52 | >>> blocks = bayesian_blocks(data)
53 |
54 | >>> plt.hist(data, bins=1000, label='Fine Binning', density=True, alpha=0.6)
55 | >>> plt.hist(data, bins=blocks, label='Bayesian Blocks', histtype='step', density=True, linewidth=2)
56 | >>> plt.legend(loc=2)
57 | ```
58 |
59 | 
60 |
61 | ### hypotests
62 |
63 | This submodule provides tools to do hypothesis tests such as discovery test and computations of upper limits or confidence intervals. hepstats needs a fitting backend to perform computations such as [zfit](https://github.com/zfit/zfit). Any fitting library can be used if their API is compatible with hepstats (see [api checks](https://github.com/scikit-hep/hepstats/blob/main/hepstats/hypotests/utils/fit/api_check.py)).
64 |
65 | We give here a simple example of an upper limit calculation of the yield of a Gaussian signal with known mean and sigma over an exponential background. The fitting backend used is the [zfit](https://github.com/zfit/zfit) package. An example with a **counting experiment** analysis is also given in the [binder](https://mybinder.org/v2/gh/scikit-hep/hepstats/main) examples.
66 |
67 | ```python
68 | >>> import zfit
69 | >>> from zfit.loss import ExtendedUnbinnedNLL
70 | >>> from zfit.minimize import Minuit
71 |
72 | >>> bounds = (0.1, 3.0)
73 | >>> obs = zfit.Space('x', limits=bounds)
74 |
75 | >>> bkg = np.random.exponential(0.5, 300)
76 | >>> peak = np.random.normal(1.2, 0.1, 10)
77 | >>> data = np.concatenate((bkg, peak))
78 | >>> data = data[(data > bounds[0]) & (data < bounds[1])]
79 | >>> N = data.size
80 | >>> data = zfit.Data.from_numpy(obs=obs, array=data)
81 |
82 | >>> lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0)
83 | >>> Nsig = zfit.Parameter("Nsig", 1., -20., N)
84 | >>> Nbkg = zfit.Parameter("Nbkg", N, 0., N*1.1)
85 | >>> signal = zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1).create_extended(Nsig)
86 | >>> background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg)
87 | >>> total = zfit.pdf.SumPDF([signal, background])
88 | >>> loss = ExtendedUnbinnedNLL(model=total, data=data)
89 |
90 | >>> from hepstats.hypotests.calculators import AsymptoticCalculator
91 | >>> from hepstats.hypotests import UpperLimit
92 | >>> from hepstats.hypotests.parameters import POI, POIarray
93 |
94 | >>> calculator = AsymptoticCalculator(loss, Minuit(), asimov_bins=100)
95 | >>> poinull = POIarray(Nsig, np.linspace(0.0, 25, 20))
96 | >>> poialt = POI(Nsig, 0)
97 | >>> ul = UpperLimit(calculator, poinull, poialt)
98 | >>> ul.upperlimit(alpha=0.05, CLs=True)
99 |
100 | Observed upper limit: Nsig = 15.725784747406346
101 | Expected upper limit: Nsig = 11.927442041887158
102 | Expected upper limit +1 sigma: Nsig = 16.596396280677116
103 | Expected upper limit -1 sigma: Nsig = 8.592750403611896
104 | Expected upper limit +2 sigma: Nsig = 22.24864429383046
105 | Expected upper limit -2 sigma: Nsig = 6.400549971360598
106 | ```
107 |
108 | 
109 |
110 | ### splots
111 |
112 | A full example using the **sPlot** algorithm can be found [here](https://github.com/scikit-hep/hepstats/tree/main/notebooks/splots/splot_example.ipynb). **sWeights** for different components in a data sample, modeled with a sum of extended probability density functions, are derived using the `compute_sweights` function:
113 |
114 | ```python
115 | >>> from hepstats.splot import compute_sweights
116 |
117 | # using same model as above for illustration
118 | >>> sweights = compute_sweights(zfit.pdf.SumPDF([signal, background]), data)
119 |
120 | >>> bkg_sweights = sweights[Nbkg]
121 | >>> sig_sweights = sweights[Nsig]
122 | ```
123 |
124 | The model needs to be fitted to the data for the computation of the **sWeights**, if not an error is raised.
125 |
126 | [sk-badge]: https://img.shields.io/badge/Scikit--HEP-Project-blue?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABoAAAAcCAYAAAB/E6/TAAAACXBIWXMAAAEZAAABGQHyCY1sAAAAGXRFWHRTb2Z0d2FyZQB3d3cuaW5rc2NhcGUub3Jnm+48GgAAA6dJREFUSImdlktonFUUx/930kQ0nYo2JX5NUqSghSq2oIgvcCFaC0EEH2AXUh9FEV1UUaGIC924qY8igo+FQi26sWp8gDS24qOgCOIiKCVomLlnZpxk7Dh2GjMz389F7tgv4zfNJGdzh/M/5/zuud/ch9MKDFgnaUjSnHOu2kuOmb0h6brMMoVHgceBY8ApSVVJ05JOAnXga+BJ4OK0fO/9PZL2AL91AwwBLwLz9GYLwKvAcLtGLpcbMbM5MyuXy+UoDXI14BNFcsABYBy4DLgojDvDZH5PxJaAG4CM937SzCgUCnemQcaB0yFpDngMGFhmefuBh4E/Qt4/tVrtoJlhZq+nJWwHaiH4F2DL2QAp+SPA9wBxHDM7O5svl8vZzqBzgOkAOQGsXwmkbbVabUOj0Wh/1xIw+J+Yy+UuBJ4O4jywdTUQSSoUCgdKpRJxHC+Ees8mxVKr1WoGYf9qId77m80sNrNvgedDvb+A8yQpMzg4OJHJZPoAVavVQ6uBmNmQc+4dSfVWq7Vb0n5JC5KyknZIUiabzdYlqdFoqF6vTxSLxctXwXpNUuSce3RsbOyEc+6kpKNBG5ekjKRLguMTSUNxHE/m8/ntvRK89w9IukvS4SiK3k5Ix8N4aRu0UZIGBgaOAHdIWpfJZI56769fDlIqlTY7515yzlkcx3s65xDGjW1Qf3A0R0ZGJpxzOyX1Oee+MLMd3SDAmjiOD0paK+nB0dHRuY6QhTD2t0EWHJEkRVF0zDl3k6TTkj5OPUIkmdkzwLWSXomi6POUkNFkZxlJM8GxrR0RRdEPzrkbnXOzwHve+/s7IFc55/ZJmmq1Wvu6NH1FGGfaS3B3YrMuOTJKpdJmM5sO+2OvJBWLxUEz+9XM5vP5/DalWDhpqqHu7rYzmzhI96Ys0aZQmEKh8IKZvRV+P9GlEwEPhXoNYCgpvByE2SXCmc6GzeyncCLjvZ8EUi9N4HygEOq92SmuB/4M4pdAf2eBmZmZC7z3lQDb1AXSB3wW6vwNpF54twOtEPQBsLYzplgsfmhmpHUDnAscCvkxsCttMu3gpzhjPwNLNq2ZHU4DsXgr/5jIfa4rJJF0H0vfCp8C9wLDSRCwAdgFfBQ6gMW3wyPLQhKwK4Gv+L81m80mwKkU7Tvgmp4hHcBbgXcTf5ROqwLvA7cBblWQDmA/sLVSqXxTqVQAbmHxJXTWh0vS1vQS5JxrSJoys3JwHXHOxSuZbE+ghE1J2rJSiCT9CxJT5EBIY81lAAAAAElFTkSuQmCC
127 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/api/hepstats.hypotests.calculators.asymptotic_calculator.rst:
--------------------------------------------------------------------------------
1 | hepstats.hypotests.calculators.asymptotic\_calculator module
2 | ============================================================
3 |
4 | .. automodule:: hepstats.hypotests.calculators.asymptotic_calculator
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/api/hepstats.hypotests.calculators.basecalculator.rst:
--------------------------------------------------------------------------------
1 | hepstats.hypotests.calculators.basecalculator module
2 | ====================================================
3 |
4 | .. automodule:: hepstats.hypotests.calculators.basecalculator
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/api/hepstats.hypotests.calculators.frequentist_calculator.rst:
--------------------------------------------------------------------------------
1 | hepstats.hypotests.calculators.frequentist\_calculator module
2 | =============================================================
3 |
4 | .. automodule:: hepstats.hypotests.calculators.frequentist_calculator
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/api/hepstats.hypotests.calculators.rst:
--------------------------------------------------------------------------------
1 | hepstats.hypotests.calculators package
2 | ======================================
3 |
4 | .. automodule:: hepstats.hypotests.calculators
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
10 | Submodules
11 | ----------
12 |
13 | .. toctree::
14 | :maxdepth: 4
15 |
16 | hepstats.hypotests.calculators.asymptotic_calculator
17 | hepstats.hypotests.calculators.basecalculator
18 | hepstats.hypotests.calculators.frequentist_calculator
19 |
--------------------------------------------------------------------------------
/docs/api/hepstats.hypotests.core.basetest.rst:
--------------------------------------------------------------------------------
1 | hepstats.hypotests.core.basetest module
2 | =======================================
3 |
4 | .. automodule:: hepstats.hypotests.core.basetest
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/api/hepstats.hypotests.core.confidence_interval.rst:
--------------------------------------------------------------------------------
1 | hepstats.hypotests.core.confidence\_interval module
2 | ===================================================
3 |
4 | .. automodule:: hepstats.hypotests.core.confidence_interval
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/api/hepstats.hypotests.core.discovery.rst:
--------------------------------------------------------------------------------
1 | hepstats.hypotests.core.discovery module
2 | ========================================
3 |
4 | .. automodule:: hepstats.hypotests.core.discovery
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/api/hepstats.hypotests.core.rst:
--------------------------------------------------------------------------------
1 | hepstats.hypotests.core package
2 | ===============================
3 |
4 | .. automodule:: hepstats.hypotests.core
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
10 | Submodules
11 | ----------
12 |
13 | .. toctree::
14 | :maxdepth: 4
15 |
16 | hepstats.hypotests.core.basetest
17 | hepstats.hypotests.core.confidence_interval
18 | hepstats.hypotests.core.discovery
19 | hepstats.hypotests.core.upperlimit
20 |
--------------------------------------------------------------------------------
/docs/api/hepstats.hypotests.core.upperlimit.rst:
--------------------------------------------------------------------------------
1 | hepstats.hypotests.core.upperlimit module
2 | =========================================
3 |
4 | .. automodule:: hepstats.hypotests.core.upperlimit
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/api/hepstats.hypotests.exceptions.rst:
--------------------------------------------------------------------------------
1 | hepstats.hypotests.exceptions module
2 | ====================================
3 |
4 | .. automodule:: hepstats.hypotests.exceptions
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/api/hepstats.hypotests.hypotests_object.rst:
--------------------------------------------------------------------------------
1 | hepstats.hypotests.hypotests\_object module
2 | ===========================================
3 |
4 | .. automodule:: hepstats.hypotests.hypotests_object
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/api/hepstats.hypotests.parameters.rst:
--------------------------------------------------------------------------------
1 | hepstats.hypotests.parameters module
2 | ====================================
3 |
4 | .. automodule:: hepstats.hypotests.parameters
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/api/hepstats.hypotests.rst:
--------------------------------------------------------------------------------
1 | hepstats.hypotests package
2 | ==========================
3 |
4 | .. automodule:: hepstats.hypotests
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
10 | Subpackages
11 | -----------
12 |
13 | .. toctree::
14 | :maxdepth: 4
15 |
16 | hepstats.hypotests.calculators
17 | hepstats.hypotests.core
18 |
19 | Submodules
20 | ----------
21 |
22 | .. toctree::
23 | :maxdepth: 4
24 |
25 | hepstats.hypotests.exceptions
26 | hepstats.hypotests.hypotests_object
27 | hepstats.hypotests.parameters
28 | hepstats.hypotests.toyutils
29 |
--------------------------------------------------------------------------------
/docs/api/hepstats.hypotests.toyutils.rst:
--------------------------------------------------------------------------------
1 | hepstats.hypotests.toyutils module
2 | ==================================
3 |
4 | .. automodule:: hepstats.hypotests.toyutils
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/api/hepstats.modeling.bayesian_blocks.rst:
--------------------------------------------------------------------------------
1 | hepstats.modeling.bayesian\_blocks module
2 | =========================================
3 |
4 | .. automodule:: hepstats.modeling.bayesian_blocks
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/api/hepstats.modeling.rst:
--------------------------------------------------------------------------------
1 | hepstats.modeling package
2 | =========================
3 |
4 | .. automodule:: hepstats.modeling
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
10 | Submodules
11 | ----------
12 |
13 | .. toctree::
14 | :maxdepth: 4
15 |
16 | hepstats.modeling.bayesian_blocks
17 |
--------------------------------------------------------------------------------
/docs/api/hepstats.rst:
--------------------------------------------------------------------------------
1 | hepstats package
2 | ================
3 |
4 | .. automodule:: hepstats
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
10 | Subpackages
11 | -----------
12 |
13 | .. toctree::
14 | :maxdepth: 4
15 |
16 | hepstats.hypotests
17 | hepstats.modeling
18 | hepstats.splot
19 | hepstats.utils
20 |
21 | Submodules
22 | ----------
23 |
24 | .. toctree::
25 | :maxdepth: 4
26 |
27 | hepstats.version
28 |
--------------------------------------------------------------------------------
/docs/api/hepstats.splot.exceptions.rst:
--------------------------------------------------------------------------------
1 | hepstats.splot.exceptions module
2 | ================================
3 |
4 | .. automodule:: hepstats.splot.exceptions
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/api/hepstats.splot.rst:
--------------------------------------------------------------------------------
1 | hepstats.splot package
2 | ======================
3 |
4 | .. automodule:: hepstats.splot
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
10 | Submodules
11 | ----------
12 |
13 | .. toctree::
14 | :maxdepth: 4
15 |
16 | hepstats.splot.exceptions
17 | hepstats.splot.sweights
18 | hepstats.splot.warnings
19 |
--------------------------------------------------------------------------------
/docs/api/hepstats.splot.sweights.rst:
--------------------------------------------------------------------------------
1 | hepstats.splot.sweights module
2 | ==============================
3 |
4 | .. automodule:: hepstats.splot.sweights
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/api/hepstats.splot.warnings.rst:
--------------------------------------------------------------------------------
1 | hepstats.splot.warnings module
2 | ================================
3 |
4 | .. automodule:: hepstats.splot.warnings
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/api/hepstats.utils.fit.api_check.rst:
--------------------------------------------------------------------------------
1 | hepstats.utils.fit.api\_check module
2 | ====================================
3 |
4 | .. automodule:: hepstats.utils.fit.api_check
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/api/hepstats.utils.fit.diverse.rst:
--------------------------------------------------------------------------------
1 | hepstats.utils.fit.diverse module
2 | =================================
3 |
4 | .. automodule:: hepstats.utils.fit.diverse
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/api/hepstats.utils.fit.rst:
--------------------------------------------------------------------------------
1 | hepstats.utils.fit package
2 | ==========================
3 |
4 | .. automodule:: hepstats.utils.fit
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
10 | Submodules
11 | ----------
12 |
13 | .. toctree::
14 | :maxdepth: 4
15 |
16 | hepstats.utils.fit.api_check
17 | hepstats.utils.fit.diverse
18 | hepstats.utils.fit.sampling
19 |
--------------------------------------------------------------------------------
/docs/api/hepstats.utils.fit.sampling.rst:
--------------------------------------------------------------------------------
1 | hepstats.utils.fit.sampling module
2 | ==================================
3 |
4 | .. automodule:: hepstats.utils.fit.sampling
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/api/hepstats.utils.rst:
--------------------------------------------------------------------------------
1 | hepstats.utils package
2 | ======================
3 |
4 | .. automodule:: hepstats.utils
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
10 | Subpackages
11 | -----------
12 |
13 | .. toctree::
14 | :maxdepth: 4
15 |
16 | hepstats.utils.fit
17 |
--------------------------------------------------------------------------------
/docs/api/hepstats.version.rst:
--------------------------------------------------------------------------------
1 | hepstats.version module
2 | =======================
3 |
4 | .. automodule:: hepstats.version
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/api/hypotests.rst:
--------------------------------------------------------------------------------
1 | hepstats.hypotests
2 | ------------------
3 |
4 | .. currentmodule:: hepstats.hypotests.core.discovery
5 |
6 | .. autosummary::
7 |
8 | Discovery
9 |
10 | .. currentmodule:: hepstats.hypotests.core.upperlimit
11 |
12 | .. autosummary::
13 |
14 | UpperLimit
15 |
16 | .. currentmodule:: hepstats.hypotests.core.confidence_interval
17 |
18 | .. autosummary::
19 |
20 | ConfidenceInterval
21 |
22 | Parameters
23 | """"""""""
24 |
25 | .. currentmodule:: hepstats.hypotests.parameters
26 |
27 | .. autosummary::
28 |
29 | POIarray
30 | POI
31 |
32 | Calculators
33 | """""""""""
34 |
35 | Module defining the base class for the calculators for statistical tests based on the likelyhood ratio.
36 |
37 | Acronyms used in the code:
38 | * nll = negative log-likehood, which is the value of the `loss` attribute of a calculator;
39 | * obs = observed, i.e. measured on provided data.
40 |
41 | .. currentmodule:: hepstats.hypotests.calculators.asymptotic_calculator
42 |
43 | .. autosummary::
44 |
45 | AsymptoticCalculator
46 |
47 | .. currentmodule:: hepstats.hypotests.calculators.frequentist_calculator
48 |
49 | .. autosummary::
50 |
51 | FrequentistCalculator
52 |
53 | Toys utils
54 | """"""""""
55 |
56 | .. currentmodule:: hepstats.hypotests.toyutils
57 |
58 | .. autosummary::
59 |
60 | ToyResult
61 | ToysManager
62 |
--------------------------------------------------------------------------------
/docs/api/index.rst:
--------------------------------------------------------------------------------
1 | API reference
2 | =============
3 |
4 | .. toctree::
5 | :maxdepth: 6
6 |
7 | modeling
8 | hypotests
9 | splot
10 | utils
11 |
--------------------------------------------------------------------------------
/docs/api/modeling.rst:
--------------------------------------------------------------------------------
1 | hepstats.modeling
2 | -----------------
3 |
4 | .. currentmodule:: hepstats.modeling
5 |
6 | .. autosummary::
7 |
8 | bayesian_blocks
9 |
--------------------------------------------------------------------------------
/docs/api/modules.rst:
--------------------------------------------------------------------------------
1 | hepstats
2 | ========
3 |
4 | .. toctree::
5 | :maxdepth: 4
6 |
7 | hepstats
8 |
--------------------------------------------------------------------------------
/docs/api/splot.rst:
--------------------------------------------------------------------------------
1 | hepstats.splot
2 | --------------
3 |
4 | hepstats.splot.sweights
5 | =======================
6 |
7 | .. currentmodule:: hepstats.splot.sweights
8 |
9 | .. autosummary::
10 |
11 | compute_sweights
12 | is_sum_of_extended_pdfs
13 |
14 | hepstats.splot.exceptions
15 | =========================
16 |
17 | .. currentmodule:: hepstats.splot.exceptions
18 |
19 | .. autosummary::
20 |
21 | ModelNotFittedToData
22 |
--------------------------------------------------------------------------------
/docs/api/utils.rst:
--------------------------------------------------------------------------------
1 | hepstats.utils
2 | ==============
3 |
4 | hepstats.utils.fit
5 | ------------------
6 |
7 | hepstats.utils.fit.api_check
8 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
9 |
10 | Module for testing a fitting library validity with hepstats.
11 |
12 | A fitting library should provide six basic objects:
13 |
14 | * model / probability density function
15 | * parameters of the models
16 | * data
17 | * loss / likelihood function
18 | * minimizer
19 | * fitresult (optional)
20 |
21 | A function for each object is defined in this module, all should return ``True`` to work
22 | with hepstats.
23 |
24 | The **zfit** API is currently the standard fitting API in hepstats.
25 |
26 |
27 | .. currentmodule:: hepstats.utils.fit.api_check
28 |
29 | .. autosummary::
30 |
31 | is_valid_parameter
32 | is_valid_data
33 | is_valid_pdf
34 | is_valid_loss
35 | is_valid_fitresult
36 | is_valid_minimizer
37 |
38 | hepstats.utils.fit.diverse
39 | ^^^^^^^^^^^^^^^^^^^^^^^^^^
40 |
41 | .. currentmodule:: hepstats.utils.fit.diverse
42 |
43 | .. autosummary::
44 |
45 | get_value
46 | eval_pdf
47 | pll
48 | array2dataset
49 | get_nevents
50 |
51 | hepstats.utils.fit.sampling
52 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^
53 |
54 | .. currentmodule:: hepstats.utils.fit.sampling
55 |
56 | .. autosummary::
57 |
58 | base_sampler
59 | base_sample
60 |
--------------------------------------------------------------------------------
/docs/bib/references.bib:
--------------------------------------------------------------------------------
1 | @article{Cowan:2010js,
2 | author = "Cowan, Glen and Cranmer, Kyle and Gross, Eilam and Vitells, Ofer",
3 | title = "{Asymptotic formulae for likelihood-based tests of new physics}",
4 | eprint = "1007.1727",
5 | archivePrefix = "arXiv",
6 | primaryClass = "physics.data-an",
7 | doi = "10.1140/epjc/s10052-011-1554-0",
8 | journal = "Eur. Phys. J. C",
9 | volume = "71",
10 | pages = "1554",
11 | year = "2011",
12 | note = "[Erratum: Eur.Phys.J.C 73, 2501 (2013)]"
13 | }
14 |
15 | @article{Pivk:2004ty,
16 | author = "Pivk, Muriel and Le Diberder, Francois R.",
17 | title = "{SPlot: A Statistical tool to unfold data distributions}",
18 | journal = "Nucl. Instrum. Meth.",
19 | volume = "A555",
20 | year = "2005",
21 | pages = "356-369",
22 | doi = "10.1016/j.nima.2005.08.106",
23 | eprint = "physics/0402083",
24 | archivePrefix = "arXiv",
25 | primaryClass = "physics.data-an",
26 | reportNumber = "LAL-04-07",
27 | SLACcitation = ""
28 | }
29 |
30 | @article{Scargle_2013,
31 | title = {STUDIES IN ASTRONOMICAL TIME SERIES ANALYSIS. VI. BAYESIAN BLOCK REPRESENTATIONS},
32 | volume = {764},
33 | ISSN = {1538-4357},
34 | url = {http://dx.doi.org/10.1088/0004-637X/764/2/167},
35 | DOI = {10.1088/0004-637x/764/2/167},
36 | number = {2},
37 | journal = {The Astrophysical Journal},
38 | publisher = {IOP Publishing},
39 | author = {Scargle, Jeffrey D. and Norris, Jay P. and Jackson, Brad and Chiang, James},
40 | year = {2013},
41 | month = {Feb},
42 | pages = {167}
43 | }
44 |
45 | @misc{BB_jakevdp,
46 | title = {{Dynamic Programming in Python: Bayesian Blocks},
47 | howpublished = {http://jakevdp.github.io/blog/2012/09/12/dynamic-programming-in-python/}},
48 | note = {Accessed: 2020-11-03}
49 | }
50 |
51 | @article{VanderPlas_2012,
52 | title = {Introduction to astroML: Machine learning for astrophysics},
53 | ISBN = {9781467346269},
54 | url = {http://dx.doi.org/10.1109/CIDU.2012.6382200},
55 | DOI = {10.1109/cidu.2012.6382200},
56 | journal = {2012 Conference on Intelligent Data Understanding},
57 | publisher = {IEEE},
58 | author = {VanderPlas, Jacob and Connolly, Andrew J. and Ivezic, Zeljko and Gray, Alex},
59 | year = {2012},
60 | month = {Oct}
61 | }
62 |
63 | @article{Pollack:2017srh,
64 | author = "Pollack, Brian and Bhattacharya, Saptaparna and Schmitt,
65 | Michael",
66 | title = "{Bayesian Block Histogramming for High Energy Physics}",
67 | year = "2017",
68 | eprint = "1708.00810",
69 | archivePrefix = "arXiv",
70 | primaryClass = "physics.data-an",
71 | reportNumber = "nuhep-exp/17-05, NUHEP-EXP-17-05",
72 | SLACcitation = "",
73 | journal = "",
74 | }
75 |
--------------------------------------------------------------------------------
/docs/bibliography.rst:
--------------------------------------------------------------------------------
1 | Bibliography
2 | ============
3 |
4 | .. bibliography:: bib/references.bib
5 |
6 | ```{bibliography} bib/references.bib```
7 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 | from __future__ import annotations
9 |
10 | from pathlib import Path
11 |
12 | from hepstats import __version__ as version
13 |
14 | project_dir = Path(__file__).parents[1]
15 |
16 |
17 | # -- Project information -----------------------------------------------------
18 |
19 | project = "hepstats"
20 | copyright = "2019-2025, The Scikit-HEP Administrators"
21 | author = "Matthieu Marinangeli"
22 |
23 | # The full version, including alpha/beta/rc tags
24 |
25 | release = version
26 |
27 |
28 | # -- General configuration ---------------------------------------------------
29 |
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 | extensions = [
34 | "sphinx.ext.autodoc",
35 | "sphinx.ext.mathjax",
36 | "sphinx.ext.ifconfig",
37 | "sphinx.ext.githubpages",
38 | "sphinx.ext.viewcode",
39 | "sphinx.ext.napoleon",
40 | "sphinx.ext.autosummary",
41 | "sphinx.ext.inheritance_diagram",
42 | # "sphinxcontrib.bibtex",
43 | "matplotlib.sphinxext.plot_directive",
44 | "sphinx_copybutton",
45 | "sphinx_autodoc_typehints",
46 | ]
47 |
48 | bibtex_bibfiles = [
49 | str(project_dir / "docs" / "bib" / "references.bib")
50 | ] # TODO: currently string, Path doesn't work: https://github.com/mcmtroffaes/sphinxcontrib-bibtex/issues/314
51 | # Add any paths that contain templates here, relative to this directory.
52 | templates_path = ["_templates"]
53 |
54 | # List of patterns, relative to source directory, that match files and
55 | # directories to ignore when looking for source files.
56 | # This pattern also affects html_static_path and html_extra_path.
57 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
58 |
59 | # The name of the Pygments (syntax highlighting) style to use.
60 | pygments_style = "sphinx"
61 |
62 | # -- Options for HTML output -------------------------------------------------
63 |
64 | # The theme to use for HTML and HTML Help pages. See the documentation for
65 | # a list of builtin themes.
66 | #
67 | html_theme = "pydata_sphinx_theme"
68 |
69 | # Add any paths that contain custom static files (such as style sheets) here,
70 | # relative to this directory. They are copied after the builtin static files,
71 | # so a file named "default.css" will overwrite the builtin "default.css".
72 |
73 | copybutton_prompt_text = ">>> "
74 |
75 | # -- autodoc settings ---------------------------------------------
76 |
77 | # also doc __init__ docstrings
78 | autoclass_content = "both"
79 | autodoc_member_order = "bysource"
80 | autodoc_default_options = {
81 | "show-inheritance": True,
82 | }
83 | autodoc_inherit_docstrings = False
84 |
85 | html_static_path = [] # "_static"
86 |
87 |
88 | html_logo = "images/logo.png"
89 |
90 | html_theme_options = {
91 | "github_url": "https://github.com/scikit-hep/hepstats",
92 | "use_edit_page_button": True,
93 | "search_bar_text": "Search hepstats...",
94 | "navigation_with_keys": True,
95 | "search_bar_position": "sidebar",
96 | }
97 |
98 | html_context = {
99 | "github_user": "scikit-hep",
100 | "github_repo": "hepstats",
101 | "github_version": "main",
102 | "doc_path": "docs",
103 | }
104 |
--------------------------------------------------------------------------------
/docs/getting_started/hypotests.rst:
--------------------------------------------------------------------------------
1 |
2 | hypotests
3 | #########
4 |
5 | This submodule provides tools to do statistical inferences such as discovery test and computations of
6 | upper limits or confidence intervals. ``hepstats`` needs a fitting backend to perform computations such as
7 | `zfit `_. Any fitting library can be used if their API is compatible with hepstats
8 | (see `api checks `_).
9 |
10 | We give here a simple example of an upper limit calculation of the yield of a Gaussian signal with known mean
11 | and sigma over an exponential background. The fitting backend used is the `zfit `_
12 | package. If you are unfamiliar with zfit you can have a look at the `zfit documentation `_.
13 |
14 | First we import what's necessary from zfit, such as the **ExtendedUnbinnedNLL** class as we want to construct
15 | an extended unbinned likelihood. **Minuit** is also imported, it is a zfit wrapper of the the minuit minimizer
16 | from `iminuit `_.
17 |
18 | .. code-block:: pycon
19 |
20 | >>> import zfit
21 | >>> from zfit.loss import ExtendedUnbinnedNLL
22 | >>> from zfit.minimize import Minuit
23 | >>> import numpy as np
24 |
25 | Then we construct the data sample which consists of 300 points that are drawn from an exponential distribution with -2
26 | slope, constituting the background, whereas 10 points drawn from a Gaussian distribution of mean 1.2 and width 0.1, is the signal. The
27 | fit range is defined between 0.1 and 3.0 meaning that some points of the background distribution are filtered
28 | out. The data, which is a numpy array, is then transformed into a zfit **Data** object.
29 |
30 | .. code-block:: pycon
31 |
32 | >>> bounds = (0.1, 3.0)
33 | >>> obs = zfit.Space('x', limits=bounds)
34 | >>> bkg = np.random.exponential(1/2, 300)
35 | >>> peak = np.random.normal(1.2, 0.1, 10)
36 | >>> data = np.concatenate((bkg, peak))
37 | >>> data = data[(data > bounds[0]) & (data < bounds[1])]
38 | >>> data = zfit.Data.from_numpy(obs=obs, array=data)
39 |
40 | Now we build the model. For the background an exponential pdf with **lambda_**, the slope of the exponential as
41 | a free parameter. For the signal a Gaussian pdf is used with mean and width fixed to 1.2 and 0.1 respectively.
42 | The background and signal pdfs are extended using the yield parameters **Nbkg** and **Nsig** respectively, which
43 | are free. The extended negative log-likelihood is then constructed using the background and signal models summed
44 | and the data.
45 |
46 | .. code-block:: pycon
47 |
48 | >>> lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0)
49 | >>> Nsig = zfit.Parameter("Nsig", 1., -20., 500)
50 | >>> Nbkg = zfit.Parameter("Nbkg", 250, 0., 50)
51 | >>> signal = zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1).create_extended(Nsig)
52 | >>> background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg)
53 | >>> total = zfit.pdf.SumPDF([signal, background])
54 | >>> nll = ExtendedUnbinnedNLL(model=total, data=data)
55 |
56 | The background plus signal can then be fitted to the data.
57 |
58 | .. code-block:: pycon
59 |
60 | >>> # Instantiate a minuit minimizer
61 | >>> minimizer = Minuit()
62 | >>> # minimisation of the loss function
63 | >>> minimum = minimizer.minimize(loss=nll)
64 | >>> minimum.hesse()
65 | >>> print(minimum)
66 |
67 | +---------+-------------+------------------+---------+-------------+
68 | | valid | converged | param at limit | edm | min value |
69 | +=========+=============+==================+=========+=============+
70 | | True | True | False | 4.9e-05 | -1077 |
71 | +---------+-------------+------------------+---------+-------------+
72 |
73 | Parameters
74 |
75 | +--------+--------+---------------+-----------+
76 | | name | value | hesse | at limit |
77 | +========+========+===============+===========+
78 | | Nsig | 4.518 | +/- 5.8 | False |
79 | +--------+--------+---------------+-----------+
80 | | Nbkg | 251.6 | +/- 17 | False |
81 | +--------+--------+---------------+-----------+
82 | | lambda | -1.93 | +/- 0.14 | False |
83 | +--------+--------+---------------+-----------+
84 |
85 | So the fitted number of signal candidates is 4.518 +/- 5.8, which is consistent with zero. We can then compute an
86 | upper limit on this number which should be approximately equal to 4.5 + 2 * 5.8 ≈ 16.
87 | First we import from the :py:mod:`~hepstats.hypotests.calculators` submodule of :py:mod:`~hepstats.hypotests`
88 | the :py:class:`~hepstats.hypotests.calculators.asymptotic_calculator.AsymptoticCalculator` which takes as input
89 | the loss function and minimizer.
90 |
91 |
92 |
93 | >>> from hepstats.hypotests.calculators import AsymptoticCalculator
94 | >>> calculator = AsymptoticCalculator(nll, Minuit(), asimov_bins=100)
95 |
96 | The :py:class:`~hepstats.hypotests.parameters.POI` and :py:class:`~hepstats.hypotests.parameters.POIarray`
97 | classes are also imported, POI stands for parameter of interest. In our case the POI is **Nsig**. To compute
98 | an upper limit you need to explicitly specify the background-only hypothesis (null) and the background plus
99 | signal hypothesis, in hepstats this done using :py:class:`~hepstats.hypotests.parameters.POI`/ :py:class:`~hepstats.hypotests.parameters.POIarray`:
100 |
101 | .. code-block:: pycon
102 |
103 | >>> from hepstats.hypotests.parameters import POI, POIarray
104 | >>>
105 | >>> # background only
106 | >>> poialt = POI(Nsig, 0)
107 | >>> # background + signal
108 | >>> poinull = POIarray(Nsig, np.linspace(0.0, 25, 20))
109 |
110 | A :py:class:`~hepstats.hypotests.parameters.POI` takes as input the parameter **Nsig** and a single value for a
111 | given hypothesis, for **poialt** it's 0 because this is the background only hypothesis. Similarly :py:class:`~hepstats.hypotests.parameters.POIarray`
112 | takes as input the parameter **Nsig** and an array of values to scan for **Nsig**, from 0 to 25. A range is needed
113 | because the **calculator** instance will compute a *p-value* for each value in **poinull**, the upper limit for
114 | a given confidence level :math:`\alpha` is defined as the value of **Nsig** for which the *p-value* is equal
115 | to :math:`1 - \alpha`.
116 |
117 | We can now create an :py:class:`~hepstats.hypotests.core.upperlimit.UpperLimit` instance which takes as input
118 | the **calculator**, **poinull** and **poialt**. The :py:class:`~hepstats.hypotests.core.upperlimit.UpperLimit`
119 | instance will ask the **calculator** to compute the *p-values* for each value in **poinull** and eventually find
120 | the value of the upper limit on **Nsig** (if the upper limit is in the range of the **poinull** values). Below
121 | is an example on how to compute a CLs upper limit at 95 % confidence level.
122 |
123 | .. code-block:: pycon
124 |
125 | >>> from hepstats.hypotests import UpperLimit
126 | >>> ul = UpperLimit(calculator, poinull, poialt)
127 | >>> ul.upperlimit(alpha=0.05, CLs=True)
128 |
129 | Observed upper limit: Nsig = 15.725784747406346
130 | Expected upper limit: Nsig = 11.927442041887158
131 | Expected upper limit +1 sigma: Nsig = 16.596396280677116
132 | Expected upper limit -1 sigma: Nsig = 8.592750403611896
133 | Expected upper limit +2 sigma: Nsig = 22.24864429383046
134 | Expected upper limit -2 sigma: Nsig = 6.400549971360598
135 |
136 | In the result you obtain the observed and expected limits. The observed limit is the limit based on the observation
137 | of 4.518 +/- 5.8 signal candidates in data. The expected limit is the limit under the background only hypothesis.
138 | A graphical representation on how the upper limit is computed in shown in the following figure.
139 |
140 | .. image:: https://raw.githubusercontent.com/scikit-hep/hepstats/main/notebooks/hypotests/asy_ul.png
141 |
--------------------------------------------------------------------------------
/docs/getting_started/index.rst:
--------------------------------------------------------------------------------
1 | **********
2 | Quickstart
3 | **********
4 |
5 | The ``hepstats`` module includes modeling and hypothesis tests submodules. This a quick user
6 | guide to each submodule:
7 |
8 |
9 | .. toctree::
10 | :maxdepth: 1
11 |
12 | modeling
13 | hypotests
14 | splot
15 |
16 | The `binder `_ examples are also a good way to get started.
17 |
--------------------------------------------------------------------------------
/docs/getting_started/modeling.rst:
--------------------------------------------------------------------------------
1 | modeling
2 | ########
3 |
4 | The modeling submodule includes the `Bayesian Block algorithm `_ that
5 | can be used to improve the binning of histograms. The visual improvement can be dramatic, and more importantly,
6 | this algorithm produces histograms that accurately represent the underlying distribution while being robust
7 | to statistical fluctuations. Here is a small example of the algorithm applied on Laplacian sampled data,
8 | compared to a histogram of this sample with a fine binning.
9 |
10 | .. code-block:: python
11 |
12 | >>> import numpy as np
13 | >>> import matplotlib.pyplot as plt
14 | >>> from hepstats.modeling import bayesian_blocks
15 |
16 | >>> # sample data from a Laplacian distribution
17 | >>> data = np.random.laplace(size=10000)
18 | >>> blocks = bayesian_blocks(data)
19 |
20 | >>> # plot the histograms of the data with 1000 equally spaced bins and the bins from the
21 | >>> # bayesian_blocks function
22 | >>> plt.hist(data, bins=1000, label='Fine Binning', density=True, alpha=0.6)
23 | >>> plt.hist(data, bins=blocks, label='Bayesian Blocks', histtype='step', density=True,
24 | >>> linewidth=2)
25 | >>> plt.legend(loc=2)
26 |
27 | .. image:: https://raw.githubusercontent.com/scikit-hep/hepstats/main/notebooks/modeling/bayesian_blocks_example.png
28 |
--------------------------------------------------------------------------------
/docs/getting_started/splot.rst:
--------------------------------------------------------------------------------
1 | splot
2 | #####
3 |
4 | A full example using the **sPlot** algorithm can be found `here `_ . **sWeights** for different components in a data sample, modeled with a sum of extended probability density functions, are derived using the ``compute_sweights`` function:
5 |
6 | .. code-block:: python
7 |
8 | >>> from hepstats.splot import compute_sweights
9 | >>> # using same model as above for illustration
10 | >>> sweights = compute_sweights(zfit.pdf.SumPDF([signal, background]), data)
11 | >>> bkg_sweights = sweights[Nbkg]
12 | >>> sig_sweights = sweights[Nsig]
13 |
14 |
15 | The model needs to be fitted to the data for the computation of the **sWeights**, if not an error is raised.
16 |
--------------------------------------------------------------------------------
/docs/images/logo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/docs/images/logo.pdf
--------------------------------------------------------------------------------
/docs/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/docs/images/logo.png
--------------------------------------------------------------------------------
/docs/images/logo.xcf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/docs/images/logo.xcf
--------------------------------------------------------------------------------
/docs/images/logo_medium.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/docs/images/logo_medium.png
--------------------------------------------------------------------------------
/docs/images/logo_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/docs/images/logo_small.png
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 |
2 |
3 | |hepstats_logo|
4 |
5 | ==============================
6 | Statistics tools and utilities
7 | ==============================
8 |
9 |
10 | The hepstats package is a python library providing statistics tools and utilities for particles physics.
11 | In particular hepstats can work with a fitting library, such as `zfit `_, to build
12 | likelihoods function that hepstats will use to perform statistical inferences. hepstats offers a pythonic
13 | oriented alternative to the RooStat library from the `ROOT `_ data analysis package but
14 | also other tools.
15 |
16 | You can install hepstats from PyPI_ with pip:
17 |
18 | .. code-block:: console
19 |
20 | $ pip install hepstats
21 |
22 |
23 | .. toctree::
24 | :maxdepth: 2
25 |
26 | getting_started/index
27 | whats_new
28 | api/index
29 | bibliography
30 |
31 |
32 | Indices and tables
33 | ==================
34 |
35 | * :ref:`genindex`
36 | * :ref:`modindex`
37 | * :ref:`search`
38 |
39 | .. |hepstats_logo| image:: images/logo_small.png
40 | :target: https://github.com/scikit-hep/hepstats
41 | :alt: hepstats logo
42 |
43 |
44 | .. _PyPI: https://pypi.org/project/hepstats
45 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/make_docs.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # script has to be executed inside folder `docs`
3 | # get current directory name
4 | pushd `dirname $0` > /dev/null
5 | MAKE_DOCS_PATH="$( cd "$(dirname "$0")" ; pwd -P )"
6 | #MAKE_DOCS_PATH=$(pwd -P)
7 | popd > /dev/null
8 |
9 | # generate the ReST files
10 | echo "debug"
11 | echo ${MAKE_DOCS_PATH}/../src/hepstats
12 | #ls ${MAKE_DOCS_PATH}
13 | SPHINX_APIDOC_OPTIONS=members,undoc-members,show-inheritance,inherited-members sphinx-apidoc -e -o ${MAKE_DOCS_PATH}/api ${MAKE_DOCS_PATH}/../src/hepstats -fMeT && \
14 | make -C ${MAKE_DOCS_PATH} clean && make -C ${MAKE_DOCS_PATH} html -j8 && \
15 | echo "Documentation successfully built!" || echo "FAILED to build Documentation"
16 |
--------------------------------------------------------------------------------
/docs/whats_new.rst:
--------------------------------------------------------------------------------
1 | ==========
2 | What's new
3 | ==========
4 |
5 | .. include:: ../CHANGELOG.rst
6 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: hepstats-demo
2 | channels:
3 | - defaults
4 | - conda-forge
5 | dependencies:
6 | - numpy
7 | - scipy
8 | - iminuit
9 | - tensorflow-probability
10 | - zfit
11 | - asdf
12 | - matplotlib
13 | - pip:
14 | - .
15 |
--------------------------------------------------------------------------------
/notebooks/README.md:
--------------------------------------------------------------------------------
1 | # Notebooks
2 |
3 | In this directory are stored all the notebooks demo that you can either run with [binder](https://mybinder.org/v2/gh/scikit-hep/hepstats/main) or by downloading the jupyter notebooks `ipynb` files.
4 |
5 | The notebooks are divided for each `submodule`:
6 | - `hypotests`:
7 | * discovery_asy_zfit.ipynb: computes the significance of a gaussian signal over an exponential background, fitted with `zfit`, using the `AsymptoticCalculator`.
8 | * discovery_freq_zfit.ipynb: computes the significance of a gaussian signal over an exponential background, fitted with `zfit`, using the `FrequentistCalculator`.
9 | * upperlimit_asy_zfit.ipynb: computes the upper limit on the number signal of a gaussian signal over an exponential background, fitted with `zfit`, using the `AsymptoticCalculator`.
10 | * upperlimit_freq_zfit.ipynb: computes the upper limit on the number signal of a gaussian signal over an exponential background, fitted with `zfit`, using the `FrequentistCalculator`.
11 | * confidenceinterval_asy_zfit.ipynb: computes the 68% confidence level interval on the mean of a gaussian signal over an exponential background, fitted with `zfit`, using the `AsymptoticCalculator`.
12 | * confidenceinterval_freq_zfit.ipynb: computes the 68% confidence level interval on the mean of a gaussian signal over an exponential background, fitted with `zfit`, using the `FrequentistCalculator`.
13 | * FC_interval_asy.ipynb: computes the 90% confidence level Feldman and Cousins interval on the measured mean 𝑥 of a gaussian for several values of the true mean μ, using the `AsymptoticCalculator`.
14 | * FC_interval_asy.ipynb: computes the 90% confidence level Feldman and Cousins interval on the measured mean 𝑥 of a gaussian for several values of the true mean μ, using the `FrequentistCalculator`.
15 | * counting.ipynb: shows examples of inferences with `hepstats` using a counting analysis instead of a shape analysis.
16 |
17 | - `modeling`
18 | * bayesian_blocks.ipynb: presentation of the Bayesian Blocks algorithm and comparison with other binning methods.
19 |
20 | - `splots`
21 | * splot_example.ipynb: example of `sPlot` on fake mass and momentum distributions for some signal and some background. The `sWeights` are derived using mass fit of a gaussian signal over an exponential background with `zfit`. The `sWeights` are applied on the momentum distribution to retrieve the signal distribution. This example is a reproduction of the example in [hep_ml](https://github.com/arogozhnikov/hep_ml/blob/main/notebooks/sPlot.ipynb) using `hepstats`.
22 | * splot_example_2.ipynb: example of `sPlot` on fake mass and lifetime distributions for some signal and some background. The `sWeights` are derived using mass fit of a gaussian signal over an exponential background with `zfit`. The `sWeights` are applied on the lifetime distribution to retrieve the signal distribution. This example is a reproduction of the example of the [LHCb statistics guidelines](https://gitlab.cern.ch/lhcb/statistics-guidelines/-/blob/add_sweights_item/resources/appendix_f4.ipynb) using `hepstats`.
23 |
--------------------------------------------------------------------------------
/notebooks/hypotests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/__init__.py
--------------------------------------------------------------------------------
/notebooks/hypotests/asy_ci.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/asy_ci.png
--------------------------------------------------------------------------------
/notebooks/hypotests/asy_ul.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/asy_ul.png
--------------------------------------------------------------------------------
/notebooks/hypotests/confidenceinterval_asy_zfit.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "pycharm": {
7 | "name": "#%% md\n"
8 | }
9 | },
10 | "source": [
11 | "# Example of confidence interval computation"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": null,
17 | "metadata": {
18 | "pycharm": {
19 | "name": "#%%\n"
20 | }
21 | },
22 | "outputs": [],
23 | "source": [
24 | "from __future__ import annotations\n",
25 | "\n",
26 | "import matplotlib.pyplot as plt\n",
27 | "import numpy as np\n",
28 | "import zfit\n",
29 | "from utils import one_minus_cl_plot, plotfitresult, pltdist\n",
30 | "from zfit.loss import ExtendedUnbinnedNLL\n",
31 | "from zfit.minimize import Minuit\n",
32 | "\n",
33 | "from hepstats.hypotests import ConfidenceInterval\n",
34 | "from hepstats.hypotests.calculators import AsymptoticCalculator\n",
35 | "from hepstats.hypotests.parameters import POIarray"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "metadata": {
42 | "pycharm": {
43 | "name": "#%%\n"
44 | }
45 | },
46 | "outputs": [],
47 | "source": [
48 | "plt.rcParams[\"figure.figsize\"] = (9, 8)\n",
49 | "plt.rcParams[\"font.size\"] = 16"
50 | ]
51 | },
52 | {
53 | "cell_type": "markdown",
54 | "metadata": {
55 | "pycharm": {
56 | "name": "#%% md\n"
57 | }
58 | },
59 | "source": [
60 | "### Fit of a Gaussian signal over an exponential background:"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": null,
66 | "metadata": {
67 | "pycharm": {
68 | "name": "#%%\n"
69 | }
70 | },
71 | "outputs": [],
72 | "source": [
73 | "bounds = (0.1, 3.0)\n",
74 | "\n",
75 | "# Data and signal\n",
76 | "\n",
77 | "np.random.seed(0)\n",
78 | "tau = -2.0\n",
79 | "beta = -1 / tau\n",
80 | "data = np.random.exponential(beta, 300)\n",
81 | "peak = np.random.normal(1.2, 0.1, 80)\n",
82 | "data = np.concatenate((data, peak))\n",
83 | "data = data[(data > bounds[0]) & (data < bounds[1])]\n",
84 | "\n",
85 | "pltdist(data, bins=80, bounds=bounds)"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": null,
91 | "metadata": {
92 | "pycharm": {
93 | "name": "#%%\n"
94 | }
95 | },
96 | "outputs": [],
97 | "source": [
98 | "obs = zfit.Space(\"x\", limits=bounds)"
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": null,
104 | "metadata": {
105 | "pycharm": {
106 | "name": "#%%\n"
107 | }
108 | },
109 | "outputs": [],
110 | "source": [
111 | "mean = zfit.Parameter(\"mean\", 1.2, 0.5, 2.0)\n",
112 | "sigma = zfit.Parameter(\"sigma\", 0.1, 0.02, 0.2)\n",
113 | "lambda_ = zfit.Parameter(\"lambda\", -2.0, -4.0, -1.0)\n",
114 | "Nsig = zfit.Parameter(\"Nsig\", 20.0, -20.0, len(data))\n",
115 | "Nbkg = zfit.Parameter(\"Nbkg\", len(data), 0.0, len(data) * 1.1)"
116 | ]
117 | },
118 | {
119 | "cell_type": "code",
120 | "execution_count": null,
121 | "metadata": {
122 | "pycharm": {
123 | "name": "#%%\n"
124 | }
125 | },
126 | "outputs": [],
127 | "source": [
128 | "signal = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma).create_extended(Nsig)\n",
129 | "background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg)\n",
130 | "tot_model = zfit.pdf.SumPDF([signal, background])"
131 | ]
132 | },
133 | {
134 | "cell_type": "code",
135 | "execution_count": null,
136 | "metadata": {
137 | "pycharm": {
138 | "name": "#%%\n"
139 | }
140 | },
141 | "outputs": [],
142 | "source": [
143 | "# Create the negative log likelihood\n",
144 | "data_ = zfit.data.Data.from_numpy(obs=obs, array=data)\n",
145 | "nll = ExtendedUnbinnedNLL(model=tot_model, data=data_)"
146 | ]
147 | },
148 | {
149 | "cell_type": "code",
150 | "execution_count": null,
151 | "metadata": {
152 | "pycharm": {
153 | "name": "#%%\n"
154 | }
155 | },
156 | "outputs": [],
157 | "source": [
158 | "# Instantiate a minuit minimizer\n",
159 | "minimizer = Minuit()"
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": null,
165 | "metadata": {
166 | "pycharm": {
167 | "name": "#%%\n"
168 | }
169 | },
170 | "outputs": [],
171 | "source": [
172 | "# minimisation of the loss function\n",
173 | "minimum = minimizer.minimize(loss=nll)\n",
174 | "minimum.hesse()"
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": null,
180 | "metadata": {
181 | "pycharm": {
182 | "name": "#%%\n"
183 | }
184 | },
185 | "outputs": [],
186 | "source": [
187 | "nbins = 80\n",
188 | "pltdist(data, nbins, bounds)\n",
189 | "plotfitresult(tot_model, bounds, nbins)\n",
190 | "plt.xlabel(\"m [GeV/c$^2$]\")\n",
191 | "plt.ylabel(\"number of events\")"
192 | ]
193 | },
194 | {
195 | "cell_type": "markdown",
196 | "metadata": {
197 | "pycharm": {
198 | "name": "#%% md\n"
199 | }
200 | },
201 | "source": [
202 | "### Confidence interval\n",
203 | "\n",
204 | "We want to compute the confidence interval of the mean of the Gaussian at 68% confidence level."
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": null,
210 | "metadata": {
211 | "pycharm": {
212 | "name": "#%%\n"
213 | }
214 | },
215 | "outputs": [],
216 | "source": [
217 | "# instantation of the calculator\n",
218 | "calculator = AsymptoticCalculator(nll, minimizer)\n",
219 | "calculator.bestfit = minimum # optionnal"
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": null,
225 | "metadata": {
226 | "pycharm": {
227 | "name": "#%%\n"
228 | }
229 | },
230 | "outputs": [],
231 | "source": [
232 | "# parameter of interest of the null hypothesis\n",
233 | "poinull = POIarray(mean, np.linspace(1.15, 1.26, 100))"
234 | ]
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": null,
239 | "metadata": {
240 | "pycharm": {
241 | "name": "#%%\n"
242 | }
243 | },
244 | "outputs": [],
245 | "source": [
246 | "# instantation of the discovery test\n",
247 | "ci = ConfidenceInterval(calculator, poinull)"
248 | ]
249 | },
250 | {
251 | "cell_type": "code",
252 | "execution_count": null,
253 | "metadata": {
254 | "pycharm": {
255 | "name": "#%%\n"
256 | }
257 | },
258 | "outputs": [],
259 | "source": [
260 | "ci.interval();"
261 | ]
262 | },
263 | {
264 | "cell_type": "code",
265 | "execution_count": null,
266 | "metadata": {
267 | "pycharm": {
268 | "name": "#%%\n"
269 | }
270 | },
271 | "outputs": [],
272 | "source": [
273 | "f = plt.figure(figsize=(9, 8))\n",
274 | "one_minus_cl_plot(poinull.values, ci.pvalues())\n",
275 | "plt.xlabel(\"mean\")\n",
276 | "f.savefig(\"asy_ci.png\")"
277 | ]
278 | }
279 | ],
280 | "metadata": {
281 | "kernelspec": {
282 | "display_name": "Python 3",
283 | "language": "python",
284 | "name": "python3"
285 | },
286 | "language_info": {
287 | "codemirror_mode": {
288 | "name": "ipython",
289 | "version": 3
290 | },
291 | "file_extension": ".py",
292 | "mimetype": "text/x-python",
293 | "name": "python",
294 | "nbconvert_exporter": "python",
295 | "pygments_lexer": "ipython3",
296 | "version": "3.7.7"
297 | }
298 | },
299 | "nbformat": 4,
300 | "nbformat_minor": 4
301 | }
302 |
--------------------------------------------------------------------------------
/notebooks/hypotests/confidenceinterval_freq_zfit.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "pycharm": {
7 | "name": "#%% md\n"
8 | }
9 | },
10 | "source": [
11 | "# Example of confidence interval computation"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": null,
17 | "metadata": {
18 | "pycharm": {
19 | "name": "#%%\n"
20 | }
21 | },
22 | "outputs": [],
23 | "source": [
24 | "from __future__ import annotations\n",
25 | "\n",
26 | "import matplotlib.pyplot as plt\n",
27 | "import numpy as np\n",
28 | "import zfit\n",
29 | "from utils import one_minus_cl_plot, plotfitresult, pltdist\n",
30 | "from zfit.loss import ExtendedUnbinnedNLL\n",
31 | "from zfit.minimize import Minuit\n",
32 | "\n",
33 | "from hepstats.hypotests import ConfidenceInterval\n",
34 | "from hepstats.hypotests.calculators import FrequentistCalculator\n",
35 | "from hepstats.hypotests.parameters import POIarray"
36 | ]
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "metadata": {
41 | "pycharm": {
42 | "name": "#%% md\n"
43 | }
44 | },
45 | "source": [
46 | "### Fit of a Gaussian signal over an exponential background:"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": null,
52 | "metadata": {
53 | "pycharm": {
54 | "name": "#%%\n"
55 | }
56 | },
57 | "outputs": [],
58 | "source": [
59 | "bounds = (0.1, 3.0)\n",
60 | "\n",
61 | "# Data and signal\n",
62 | "\n",
63 | "np.random.seed(0)\n",
64 | "tau = -2.0\n",
65 | "beta = -1 / tau\n",
66 | "data = np.random.exponential(beta, 300)\n",
67 | "peak = np.random.normal(1.2, 0.1, 80)\n",
68 | "data = np.concatenate((data, peak))\n",
69 | "data = data[(data > bounds[0]) & (data < bounds[1])]\n",
70 | "\n",
71 | "plt.hist(data, bins=100, histtype=\"step\");"
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": null,
77 | "metadata": {
78 | "pycharm": {
79 | "name": "#%%\n"
80 | }
81 | },
82 | "outputs": [],
83 | "source": [
84 | "obs = zfit.Space(\"x\", limits=bounds)"
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": null,
90 | "metadata": {
91 | "pycharm": {
92 | "name": "#%%\n"
93 | }
94 | },
95 | "outputs": [],
96 | "source": [
97 | "mean = zfit.Parameter(\"mean\", 1.2, 0.5, 2.0)\n",
98 | "sigma = zfit.Parameter(\"sigma\", 0.1, 0.02, 0.2)\n",
99 | "lambda_ = zfit.Parameter(\"lambda\", -2.0, -4.0, -1.0)\n",
100 | "Nsig = zfit.Parameter(\"Nsig\", 20.0, -20.0, len(data))\n",
101 | "Nbkg = zfit.Parameter(\"Nbkg\", len(data), 0.0, len(data) * 1.1)"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": null,
107 | "metadata": {
108 | "pycharm": {
109 | "name": "#%%\n"
110 | }
111 | },
112 | "outputs": [],
113 | "source": [
114 | "signal = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma).create_extended(Nsig)\n",
115 | "background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg)\n",
116 | "tot_model = zfit.pdf.SumPDF([signal, background])"
117 | ]
118 | },
119 | {
120 | "cell_type": "code",
121 | "execution_count": null,
122 | "metadata": {
123 | "pycharm": {
124 | "name": "#%%\n"
125 | }
126 | },
127 | "outputs": [],
128 | "source": [
129 | "# Create the negative log likelihood\n",
130 | "data_ = zfit.data.Data.from_numpy(obs=obs, array=data)\n",
131 | "nll = ExtendedUnbinnedNLL(model=tot_model, data=data_)"
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": null,
137 | "metadata": {
138 | "pycharm": {
139 | "name": "#%%\n"
140 | }
141 | },
142 | "outputs": [],
143 | "source": [
144 | "# Instantiate a minuit minimizer\n",
145 | "minimizer = Minuit()"
146 | ]
147 | },
148 | {
149 | "cell_type": "code",
150 | "execution_count": null,
151 | "metadata": {
152 | "pycharm": {
153 | "name": "#%%\n"
154 | }
155 | },
156 | "outputs": [],
157 | "source": [
158 | "# minimisation of the loss function\n",
159 | "minimum = minimizer.minimize(loss=nll)"
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": null,
165 | "metadata": {
166 | "pycharm": {
167 | "name": "#%%\n"
168 | }
169 | },
170 | "outputs": [],
171 | "source": [
172 | "nbins = 80\n",
173 | "pltdist(data, nbins, bounds)\n",
174 | "plotfitresult(tot_model, bounds, nbins)\n",
175 | "plt.xlabel(\"m [GeV/c$^2$]\")\n",
176 | "plt.ylabel(\"number of events\")"
177 | ]
178 | },
179 | {
180 | "cell_type": "markdown",
181 | "metadata": {
182 | "pycharm": {
183 | "name": "#%% md\n"
184 | }
185 | },
186 | "source": [
187 | "### Confidence interval\n",
188 | "\n",
189 | "We want to compute the confidence interval of the mean of the Gaussian at 68% confidence level."
190 | ]
191 | },
192 | {
193 | "cell_type": "code",
194 | "execution_count": null,
195 | "metadata": {
196 | "pycharm": {
197 | "name": "#%%\n"
198 | }
199 | },
200 | "outputs": [],
201 | "source": [
202 | "# instantation of the calculator\n",
203 | "# calculator = FrequentistCalculator(nll, minimizer, ntoysnull=100)\n",
204 | "calculator = FrequentistCalculator.from_yaml(\"toys/ci_freq_zfit_toys.yml\", nll, minimizer, ntoysnull=2000)\n",
205 | "calculator.bestfit = minimum # optionnal"
206 | ]
207 | },
208 | {
209 | "cell_type": "code",
210 | "execution_count": null,
211 | "metadata": {
212 | "pycharm": {
213 | "name": "#%%\n"
214 | }
215 | },
216 | "outputs": [],
217 | "source": [
218 | "# parameter of interest of the null hypothesis\n",
219 | "poinull = POIarray(mean, np.linspace(1.15, 1.26, 50))"
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": null,
225 | "metadata": {
226 | "pycharm": {
227 | "name": "#%%\n"
228 | }
229 | },
230 | "outputs": [],
231 | "source": [
232 | "# instantation of the discovery test\n",
233 | "ci = ConfidenceInterval(calculator, poinull)"
234 | ]
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": null,
239 | "metadata": {
240 | "pycharm": {
241 | "name": "#%%\n"
242 | }
243 | },
244 | "outputs": [],
245 | "source": [
246 | "ci.interval();"
247 | ]
248 | },
249 | {
250 | "cell_type": "code",
251 | "execution_count": null,
252 | "metadata": {
253 | "pycharm": {
254 | "name": "#%%\n"
255 | }
256 | },
257 | "outputs": [],
258 | "source": [
259 | "f = plt.figure(figsize=(9, 8))\n",
260 | "one_minus_cl_plot(poinull.values, ci.pvalues())\n",
261 | "plt.xlabel(\"mean\")"
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "execution_count": null,
267 | "metadata": {
268 | "pycharm": {
269 | "name": "#%%\n"
270 | }
271 | },
272 | "outputs": [],
273 | "source": [
274 | "calculator.to_yaml(\"toys/ci_freq_zfit_toys.yml\")"
275 | ]
276 | },
277 | {
278 | "cell_type": "code",
279 | "execution_count": null,
280 | "metadata": {
281 | "pycharm": {
282 | "name": "#%%\n"
283 | }
284 | },
285 | "outputs": [],
286 | "source": []
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": null,
291 | "metadata": {
292 | "pycharm": {
293 | "name": "#%%\n"
294 | }
295 | },
296 | "outputs": [],
297 | "source": []
298 | }
299 | ],
300 | "metadata": {
301 | "kernelspec": {
302 | "display_name": "Python 3",
303 | "language": "python",
304 | "name": "python3"
305 | },
306 | "language_info": {
307 | "codemirror_mode": {
308 | "name": "ipython",
309 | "version": 3
310 | },
311 | "file_extension": ".py",
312 | "mimetype": "text/x-python",
313 | "name": "python",
314 | "nbconvert_exporter": "python",
315 | "pygments_lexer": "ipython3",
316 | "version": "3.7.7"
317 | }
318 | },
319 | "nbformat": 4,
320 | "nbformat_minor": 4
321 | }
322 |
--------------------------------------------------------------------------------
/notebooks/hypotests/discovery_asy_zfit.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "pycharm": {
7 | "name": "#%% md\n"
8 | }
9 | },
10 | "source": [
11 | "# Discovery test example"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": null,
17 | "metadata": {
18 | "pycharm": {
19 | "name": "#%%\n"
20 | }
21 | },
22 | "outputs": [],
23 | "source": [
24 | "from __future__ import annotations\n",
25 | "\n",
26 | "import matplotlib.pyplot as plt\n",
27 | "import numpy as np\n",
28 | "import zfit\n",
29 | "from utils import plotfitresult, pltdist\n",
30 | "from zfit.loss import ExtendedUnbinnedNLL\n",
31 | "from zfit.minimize import Minuit\n",
32 | "\n",
33 | "from hepstats.hypotests import Discovery\n",
34 | "from hepstats.hypotests.calculators import AsymptoticCalculator\n",
35 | "from hepstats.hypotests.parameters import POI"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "metadata": {
42 | "pycharm": {
43 | "name": "#%%\n"
44 | }
45 | },
46 | "outputs": [],
47 | "source": [
48 | "plt.rcParams[\"figure.figsize\"] = (8, 6)\n",
49 | "plt.rcParams[\"font.size\"] = 16"
50 | ]
51 | },
52 | {
53 | "cell_type": "markdown",
54 | "metadata": {
55 | "pycharm": {
56 | "name": "#%% md\n"
57 | }
58 | },
59 | "source": [
60 | "### Fit of a Gaussian signal over an exponential background:"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": null,
66 | "metadata": {
67 | "pycharm": {
68 | "name": "#%%\n"
69 | }
70 | },
71 | "outputs": [],
72 | "source": [
73 | "bounds = (0.1, 3.0)\n",
74 | "\n",
75 | "# Data and signal\n",
76 | "\n",
77 | "np.random.seed(0)\n",
78 | "tau = -2.0\n",
79 | "beta = -1 / tau\n",
80 | "data = np.random.exponential(beta, 300)\n",
81 | "peak = np.random.normal(1.2, 0.1, 25)\n",
82 | "data = np.concatenate((data, peak))\n",
83 | "data = data[(data > bounds[0]) & (data < bounds[1])]"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": null,
89 | "metadata": {
90 | "pycharm": {
91 | "name": "#%%\n"
92 | }
93 | },
94 | "outputs": [],
95 | "source": [
96 | "pltdist(data, bins=80, bounds=bounds)"
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": null,
102 | "metadata": {
103 | "pycharm": {
104 | "name": "#%%\n"
105 | }
106 | },
107 | "outputs": [],
108 | "source": [
109 | "obs = zfit.Space(\"x\", limits=bounds)"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": null,
115 | "metadata": {
116 | "pycharm": {
117 | "name": "#%%\n"
118 | }
119 | },
120 | "outputs": [],
121 | "source": [
122 | "lambda_ = zfit.Parameter(\"lambda\", -2.0, -4.0, -1.0)\n",
123 | "Nsig = zfit.Parameter(\"Nsig\", 20.0, -20.0, len(data))\n",
124 | "Nbkg = zfit.Parameter(\"Nbkg\", len(data), 0.0, len(data) * 1.1)"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": null,
130 | "metadata": {
131 | "pycharm": {
132 | "name": "#%%\n"
133 | }
134 | },
135 | "outputs": [],
136 | "source": [
137 | "signal = zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1).create_extended(Nsig)\n",
138 | "background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg)\n",
139 | "tot_model = zfit.pdf.SumPDF([signal, background])"
140 | ]
141 | },
142 | {
143 | "cell_type": "code",
144 | "execution_count": null,
145 | "metadata": {
146 | "pycharm": {
147 | "name": "#%%\n"
148 | }
149 | },
150 | "outputs": [],
151 | "source": [
152 | "# Create the negative log likelihood\n",
153 | "data_ = zfit.data.Data.from_numpy(obs=obs, array=data)\n",
154 | "nll = ExtendedUnbinnedNLL(model=[tot_model], data=[data_])"
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": null,
160 | "metadata": {
161 | "pycharm": {
162 | "name": "#%%\n"
163 | }
164 | },
165 | "outputs": [],
166 | "source": [
167 | "# Instantiate a minuit minimizer\n",
168 | "minimizer = Minuit()"
169 | ]
170 | },
171 | {
172 | "cell_type": "code",
173 | "execution_count": null,
174 | "metadata": {
175 | "pycharm": {
176 | "name": "#%%\n"
177 | }
178 | },
179 | "outputs": [],
180 | "source": [
181 | "# minimisation of the loss function\n",
182 | "minimum = minimizer.minimize(loss=nll)\n",
183 | "minimum.hesse()"
184 | ]
185 | },
186 | {
187 | "cell_type": "code",
188 | "execution_count": null,
189 | "metadata": {
190 | "pycharm": {
191 | "name": "#%%\n"
192 | }
193 | },
194 | "outputs": [],
195 | "source": [
196 | "nbins = 80\n",
197 | "pltdist(data, nbins, bounds)\n",
198 | "plotfitresult(tot_model, bounds, nbins)\n",
199 | "plt.xlabel(\"m [GeV/c$^2$]\")\n",
200 | "plt.ylabel(\"number of events\")"
201 | ]
202 | },
203 | {
204 | "cell_type": "markdown",
205 | "metadata": {
206 | "pycharm": {
207 | "name": "#%% md\n"
208 | }
209 | },
210 | "source": [
211 | "### Discovery test\n",
212 | "\n",
213 | "In a discovery test the null hypothesis is the absence of signal, .i.e Nsig = 0."
214 | ]
215 | },
216 | {
217 | "cell_type": "code",
218 | "execution_count": null,
219 | "metadata": {
220 | "pycharm": {
221 | "name": "#%%\n"
222 | }
223 | },
224 | "outputs": [],
225 | "source": [
226 | "# instantation of the calculator\n",
227 | "calculator = AsymptoticCalculator(nll, minimizer)\n",
228 | "calculator.bestfit = minimum # optionnal"
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": null,
234 | "metadata": {
235 | "pycharm": {
236 | "name": "#%%\n"
237 | }
238 | },
239 | "outputs": [],
240 | "source": [
241 | "# parameter of interest of the null hypothesis\n",
242 | "poinull = POI(Nsig, 0)"
243 | ]
244 | },
245 | {
246 | "cell_type": "code",
247 | "execution_count": null,
248 | "metadata": {
249 | "pycharm": {
250 | "name": "#%%\n"
251 | }
252 | },
253 | "outputs": [],
254 | "source": [
255 | "# instantation of the discovery test\n",
256 | "discovery_test = Discovery(calculator, poinull)"
257 | ]
258 | },
259 | {
260 | "cell_type": "code",
261 | "execution_count": null,
262 | "metadata": {
263 | "pycharm": {
264 | "name": "#%%\n"
265 | }
266 | },
267 | "outputs": [],
268 | "source": [
269 | "pnull, significance = discovery_test.result()"
270 | ]
271 | },
272 | {
273 | "cell_type": "code",
274 | "execution_count": null,
275 | "metadata": {
276 | "pycharm": {
277 | "name": "#%%\n"
278 | }
279 | },
280 | "outputs": [],
281 | "source": []
282 | }
283 | ],
284 | "metadata": {
285 | "kernelspec": {
286 | "display_name": "Python 3",
287 | "language": "python",
288 | "name": "python3"
289 | },
290 | "language_info": {
291 | "codemirror_mode": {
292 | "name": "ipython",
293 | "version": 3
294 | },
295 | "file_extension": ".py",
296 | "mimetype": "text/x-python",
297 | "name": "python",
298 | "nbconvert_exporter": "python",
299 | "pygments_lexer": "ipython3",
300 | "version": "3.7.7"
301 | }
302 | },
303 | "nbformat": 4,
304 | "nbformat_minor": 4
305 | }
306 |
--------------------------------------------------------------------------------
/notebooks/hypotests/toys/FC_toys_-1.0.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_-1.0.yml
--------------------------------------------------------------------------------
/notebooks/hypotests/toys/FC_toys_-2.0.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_-2.0.npz
--------------------------------------------------------------------------------
/notebooks/hypotests/toys/FC_toys_-2.0.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_-2.0.yml
--------------------------------------------------------------------------------
/notebooks/hypotests/toys/FC_toys_-3.0.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_-3.0.yml
--------------------------------------------------------------------------------
/notebooks/hypotests/toys/FC_toys_-4.0.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_-4.0.yml
--------------------------------------------------------------------------------
/notebooks/hypotests/toys/FC_toys_-5.0.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_-5.0.yml
--------------------------------------------------------------------------------
/notebooks/hypotests/toys/FC_toys_-6.0.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_-6.0.yml
--------------------------------------------------------------------------------
/notebooks/hypotests/toys/FC_toys_0.0.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_0.0.yml
--------------------------------------------------------------------------------
/notebooks/hypotests/toys/FC_toys_1.0.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_1.0.yml
--------------------------------------------------------------------------------
/notebooks/hypotests/toys/FC_toys_2.0.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_2.0.yml
--------------------------------------------------------------------------------
/notebooks/hypotests/toys/FC_toys_3.0.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_3.0.yml
--------------------------------------------------------------------------------
/notebooks/hypotests/toys/FC_toys_4.0.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_4.0.yml
--------------------------------------------------------------------------------
/notebooks/hypotests/toys/FC_toys_5.0.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_5.0.yml
--------------------------------------------------------------------------------
/notebooks/hypotests/toys/FC_toys_6.0.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_6.0.yml
--------------------------------------------------------------------------------
/notebooks/hypotests/toys/ci_freq_zfit_toys.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/ci_freq_zfit_toys.yml
--------------------------------------------------------------------------------
/notebooks/hypotests/toys/discovery_freq_zfit_toys.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/discovery_freq_zfit_toys.yml
--------------------------------------------------------------------------------
/notebooks/hypotests/toys/upperlimit_freq_zfit_toys.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/upperlimit_freq_zfit_toys.yml
--------------------------------------------------------------------------------
/notebooks/hypotests/utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import matplotlib.pyplot as plt
4 | import numpy as np
5 |
6 |
7 | def pltdist(data, bins, bounds):
8 | y, bin_edges = np.histogram(data, bins=bins, range=bounds)
9 | bin_centers = 0.5 * (bin_edges[1:] + bin_edges[:-1])
10 | yerr = np.sqrt(y)
11 | plt.errorbar(bin_centers, y, yerr=yerr, fmt=".", color="royalblue")
12 |
13 |
14 | def plotfitresult(model, bounds, nbins):
15 | x = np.linspace(*bounds, num=1000)
16 | if model.is_extended:
17 | pdf = model.ext_pdf(x, norm_range=bounds) * ((bounds[1] - bounds[0]) / nbins)
18 | else:
19 | pdf = model.pdf(x, norm_range=bounds)
20 | plt.plot(x, pdf, "-r", label="fit result")
21 |
22 |
23 | def plotlimit(ul, alpha=0.05, CLs=True, ax=None):
24 | """
25 | plot pvalue scan for different values of a parameter of interest (observed, expected and +/- sigma bands)
26 |
27 | Args:
28 | ul: UpperLimit instance
29 | alpha (float, default=0.05): significance level
30 | CLs (bool, optional): if `True` uses pvalues as $$p_{cls}=p_{null}/p_{alt}=p_{clsb}/p_{clb}$$
31 | else as $$p_{clsb} = p_{null}$
32 | ax (matplotlib axis, optionnal)
33 |
34 | """
35 | if ax is None:
36 | ax = plt.gca()
37 |
38 | poivalues = ul.poinull.values
39 | pvalues = ul.pvalues(CLs=CLs)
40 |
41 | if CLs:
42 | cls_clr = "r"
43 | clsb_clr = "b"
44 | else:
45 | cls_clr = "b"
46 | clsb_clr = "r"
47 |
48 | color_1sigma = "mediumseagreen"
49 | color_2sigma = "gold"
50 |
51 | ax.plot(
52 | poivalues,
53 | pvalues["cls"],
54 | label="Observed CL$_{s}$",
55 | marker=".",
56 | color="k",
57 | markerfacecolor=cls_clr,
58 | markeredgecolor=cls_clr,
59 | linewidth=2.0,
60 | ms=11,
61 | )
62 |
63 | ax.plot(
64 | poivalues,
65 | pvalues["clsb"],
66 | label="Observed CL$_{s+b}$",
67 | marker=".",
68 | color="k",
69 | markerfacecolor=clsb_clr,
70 | markeredgecolor=clsb_clr,
71 | linewidth=2.0,
72 | ms=11,
73 | linestyle=":",
74 | )
75 |
76 | ax.plot(
77 | poivalues,
78 | pvalues["clb"],
79 | label="Observed CL$_{b}$",
80 | marker=".",
81 | color="k",
82 | markerfacecolor="k",
83 | markeredgecolor="k",
84 | linewidth=2.0,
85 | ms=11,
86 | )
87 |
88 | ax.plot(
89 | poivalues,
90 | pvalues["expected"],
91 | label="Expected CL$_{s}-$Median",
92 | color="k",
93 | linestyle="--",
94 | linewidth=1.5,
95 | ms=10,
96 | )
97 |
98 | ax.plot(
99 | [poivalues[0], poivalues[-1]],
100 | [alpha, alpha],
101 | color="r",
102 | linestyle="-",
103 | linewidth=1.5,
104 | )
105 |
106 | ax.fill_between(
107 | poivalues,
108 | pvalues["expected"],
109 | pvalues["expected_p1"],
110 | facecolor=color_1sigma,
111 | label="Expected CL$_{s} \\pm 1 \\sigma$",
112 | alpha=0.8,
113 | )
114 |
115 | ax.fill_between(
116 | poivalues,
117 | pvalues["expected"],
118 | pvalues["expected_m1"],
119 | facecolor=color_1sigma,
120 | alpha=0.8,
121 | )
122 |
123 | ax.fill_between(
124 | poivalues,
125 | pvalues["expected_p1"],
126 | pvalues["expected_p2"],
127 | facecolor=color_2sigma,
128 | label="Expected CL$_{s} \\pm 2 \\sigma$",
129 | alpha=0.8,
130 | )
131 |
132 | ax.fill_between(
133 | poivalues,
134 | pvalues["expected_m1"],
135 | pvalues["expected_m2"],
136 | facecolor=color_2sigma,
137 | alpha=0.8,
138 | )
139 |
140 | ax.set_ylim(-0.01, 1.1)
141 | ax.set_ylabel("p-value")
142 | ax.set_xlabel("parameter of interest")
143 | ax.legend(loc="best", fontsize=14)
144 |
145 | return ax
146 |
147 |
148 | def one_minus_cl_plot(x, pvalues, alpha=None, ax=None):
149 | if alpha is None:
150 | alpha = [0.32]
151 | if ax is None:
152 | ax = plt.gca()
153 |
154 | ax.plot(x, pvalues, ".--")
155 | for a in alpha:
156 | ax.axhline(a, color="red", label="$\\alpha = " + str(a) + "$")
157 | ax.set_ylabel("1-CL")
158 |
159 | return ax
160 |
--------------------------------------------------------------------------------
/notebooks/modeling/bayesian_blocks.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Histogramming With Bayesian Blocks\n",
8 | "\n",
9 | "The Bayesian Block algorithm, originally developed for applications in astronomy, can be used to improve the binning of histograms in high energy physics (HEP). The visual improvement can be dramatic, and more importantly, this algorithm produces histograms that accurately represent the underlying distribution while being robust to statistical fluctuations. The key concept behind Bayesian Blocks is that variable-width bins are determined for a given distribution, such that the data within each bin is consistent with a uniform distribution across the range of that bin. This reduces the appearance of statistical fluctuations while still capturing the form of the underlying distribution.\n",
10 | "\n",
11 | "For more information on the algorithm and implementation, see:\n",
12 | "\n",
13 | "1. [Bayesian Blocks Algorithm, Scargle et al.](https://arxiv.org/pdf/1207.5578.pdf)\n",
14 | "2. [Bayesian Blocks in HEP, Pollack et al.](https://arxiv.org/pdf/1708.00810.pdf)"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "## Using Bayesian Blocks Binning\n",
22 | "\n",
23 | "Bayesian Blocks binning options are available as part of `hepstats/modeling` package. Below is a simple example:"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": null,
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "from __future__ import annotations\n",
33 | "\n",
34 | "import matplotlib.pyplot as plt\n",
35 | "import numpy as np\n",
36 | "\n",
37 | "from hepstats.modeling import bayesian_blocks\n",
38 | "\n",
39 | "%matplotlib inline"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": null,
45 | "metadata": {},
46 | "outputs": [],
47 | "source": [
48 | "plt.rcParams[\"figure.figsize\"] = (8, 6)\n",
49 | "plt.rcParams[\"font.size\"] = 16"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": null,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": [
58 | "np.random.seed(1001)\n",
59 | "data = np.random.laplace(size=10000)\n",
60 | "blocks = bayesian_blocks(data)"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": null,
66 | "metadata": {},
67 | "outputs": [],
68 | "source": [
69 | "plt.hist(data, bins=1000, label=\"Fine Binning\", density=True, alpha=0.6)\n",
70 | "plt.hist(\n",
71 | " data,\n",
72 | " bins=blocks,\n",
73 | " label=\"Bayesian Blocks\",\n",
74 | " histtype=\"step\",\n",
75 | " density=True,\n",
76 | " linewidth=2,\n",
77 | ")\n",
78 | "plt.legend(loc=2);"
79 | ]
80 | },
81 | {
82 | "cell_type": "markdown",
83 | "metadata": {},
84 | "source": [
85 | "For appropriate visualization, one should typically also use `density=True`. This divides each bin by its width, which is important for capturing the overall shape of the underlying distribution. Without using this argument, the histogram will look jagged (a consequence of using variable-width binning)."
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": null,
91 | "metadata": {},
92 | "outputs": [],
93 | "source": [
94 | "fig, axes = plt.subplots(ncols=2, figsize=(12, 5))\n",
95 | "axes[0].hist(data, bins=blocks, label=\"Bayesian Blocks\", histtype=\"step\", linewidth=2)\n",
96 | "axes[0].set_title(\"Unscaled\")\n",
97 | "axes[1].hist(\n",
98 | " data,\n",
99 | " bins=blocks,\n",
100 | " label=\"Bayesian Blocks\",\n",
101 | " histtype=\"step\",\n",
102 | " density=True,\n",
103 | " linewidth=2,\n",
104 | ")\n",
105 | "axes[1].set_title(\"Scaled by bin width\")\n",
106 | "plt.show()"
107 | ]
108 | },
109 | {
110 | "cell_type": "markdown",
111 | "metadata": {},
112 | "source": [
113 | "The user has control over an additional parameter to determine how many bins are generated by the Bayesian Blocks algorithm. The `p0` parameter (valid between 0 and 1) determines how strictly the algorithm determines bin edges. A small `p0` will be more robust to statistical fluctuations in the data, but could be overly coarse. Conversely, a large `p0` will result in a finer binning, but could isolate spurious fluctuations in the data."
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": null,
119 | "metadata": {},
120 | "outputs": [],
121 | "source": [
122 | "fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 12))\n",
123 | "\n",
124 | "axes[0][0].hist(\n",
125 | " data,\n",
126 | " bins=bayesian_blocks(data, p0=1e-50),\n",
127 | " label=\"Bayesian Blocks\",\n",
128 | " histtype=\"step\",\n",
129 | " density=True,\n",
130 | " linewidth=2,\n",
131 | ")\n",
132 | "axes[0][0].set_title(\"p0=1e-50\")\n",
133 | "\n",
134 | "axes[0][1].hist(\n",
135 | " data,\n",
136 | " bins=bayesian_blocks(data, p0=1e-5),\n",
137 | " label=\"Bayesian Blocks\",\n",
138 | " histtype=\"step\",\n",
139 | " density=True,\n",
140 | " linewidth=2,\n",
141 | ")\n",
142 | "axes[0][1].set_title(\"p0=1e-5\")\n",
143 | "\n",
144 | "axes[1][0].hist(\n",
145 | " data,\n",
146 | " bins=bayesian_blocks(data, p0=1e-3),\n",
147 | " label=\"Bayesian Blocks\",\n",
148 | " histtype=\"step\",\n",
149 | " density=True,\n",
150 | " linewidth=2,\n",
151 | ")\n",
152 | "axes[1][0].set_title(\"p0=1e-3\")\n",
153 | "\n",
154 | "axes[1][1].hist(\n",
155 | " data,\n",
156 | " bins=bayesian_blocks(data, p0=0.5),\n",
157 | " label=\"Bayesian Blocks\",\n",
158 | " histtype=\"step\",\n",
159 | " density=True,\n",
160 | " linewidth=2,\n",
161 | ")\n",
162 | "axes[1][1].set_title(\"p0=0.5\")\n",
163 | "\n",
164 | "fig.suptitle(\"Varying the p0 parameter\")\n",
165 | "plt.show()"
166 | ]
167 | },
168 | {
169 | "cell_type": "markdown",
170 | "metadata": {},
171 | "source": [
172 | "The optimal value of `p0` differs, depending on the number of data points and the nature of the underlying distribution. It typically must be determined empirically, but in general the value of `p0` should be inversely proportional the size of the input dataset."
173 | ]
174 | },
175 | {
176 | "cell_type": "markdown",
177 | "metadata": {},
178 | "source": [
179 | "## Comparison with Other Binning Methods\n",
180 | "\n",
181 | "Because Bayesian Blocks determines variable-width binning, the algorithm can provide a more optimal set of bins for a given distribution, especially if that distribution varies greatly in density. Below are some examples of Bayesian Blocks and other popular binning methods.\n",
182 | "\n",
183 | "**A rapidly falling distribution:**\n",
184 | "\n",
185 | "\n",
186 | "**An asymmetric, peaked distribution:**\n",
187 | "\n",
188 | "\n",
189 | "**Two peaks of different widths:**\n",
190 | "\n",
191 | "\n",
192 | "\n",
193 | "*Brian Pollack, 2018*"
194 | ]
195 | },
196 | {
197 | "cell_type": "code",
198 | "execution_count": null,
199 | "metadata": {},
200 | "outputs": [],
201 | "source": []
202 | }
203 | ],
204 | "metadata": {
205 | "kernelspec": {
206 | "display_name": "Python 3",
207 | "language": "python",
208 | "name": "python3"
209 | },
210 | "language_info": {
211 | "codemirror_mode": {
212 | "name": "ipython",
213 | "version": 3
214 | },
215 | "file_extension": ".py",
216 | "mimetype": "text/x-python",
217 | "name": "python",
218 | "nbconvert_exporter": "python",
219 | "pygments_lexer": "ipython3",
220 | "version": "3.7.6"
221 | }
222 | },
223 | "nbformat": 4,
224 | "nbformat_minor": 4
225 | }
226 |
--------------------------------------------------------------------------------
/notebooks/modeling/bayesian_blocks_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/modeling/bayesian_blocks_example.png
--------------------------------------------------------------------------------
/notebooks/modeling/hists_2LP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/modeling/hists_2LP.png
--------------------------------------------------------------------------------
/notebooks/modeling/hists_MuPT.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/modeling/hists_MuPT.png
--------------------------------------------------------------------------------
/notebooks/modeling/hists_jPT.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/modeling/hists_jPT.png
--------------------------------------------------------------------------------
/notebooks/splots/splot_example_2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Generate and visualize toy data sets"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "from __future__ import annotations\n",
17 | "\n",
18 | "import numpy as np\n",
19 | "import zfit\n",
20 | "from matplotlib import pyplot as plt\n",
21 | "from scipy.stats import expon, norm\n",
22 | "\n",
23 | "zfit.settings.set_seed(10) # fix seed\n",
24 | "\n",
25 | "bounds = (0, 10)\n",
26 | "obs = zfit.Space(\"x\", limits=bounds)\n",
27 | "\n",
28 | "# true parameters for signal and background\n",
29 | "truth_n_sig = 1000\n",
30 | "Nsig = zfit.Parameter(\"Nsig\", truth_n_sig)\n",
31 | "mean_sig = zfit.Parameter(\"mean_sig\", 5.0)\n",
32 | "sigma_sig = zfit.Parameter(\"sigma_sig\", 0.5)\n",
33 | "sig_pdf = zfit.pdf.Gauss(obs=obs, mu=mean_sig, sigma=sigma_sig).create_extended(Nsig)\n",
34 | "\n",
35 | "truth_n_bkg = 10000\n",
36 | "Nbkg = zfit.Parameter(\"Nbkg\", truth_n_bkg)\n",
37 | "lambda_bkg = zfit.Parameter(\"lambda_bkg\", -1 / 4.0)\n",
38 | "bkg_pdf = zfit.pdf.Exponential(obs=obs, lambda_=lambda_bkg).create_extended(Nbkg)\n",
39 | "\n",
40 | "truth_sig_t = (1.0,)\n",
41 | "truth_bkg_t = (2.5, 2.0)\n",
42 | "\n",
43 | "# make a data set\n",
44 | "m_sig = sig_pdf.sample(truth_n_sig).numpy()\n",
45 | "m_bkg = bkg_pdf.sample(truth_n_bkg).numpy()\n",
46 | "m = np.concatenate([m_sig, m_bkg]).flatten()\n",
47 | "\n",
48 | "# fill t variables\n",
49 | "t_sig = expon(0, *truth_sig_t).rvs(truth_n_sig)\n",
50 | "t_bkg = norm(*truth_bkg_t).rvs(truth_n_bkg)\n",
51 | "t = np.concatenate([t_sig, t_bkg])\n",
52 | "\n",
53 | "# cut out range (0, 10) in m, t\n",
54 | "ma = (bounds[0] < t) & (t < bounds[1])\n",
55 | "m = m[ma]\n",
56 | "t = t[ma]\n",
57 | "\n",
58 | "fig, ax = plt.subplots(1, 3, figsize=(16, 4.5))\n",
59 | "ax[0].hist2d(m, t, bins=(50, 50))\n",
60 | "ax[0].set_xlabel(\"m\")\n",
61 | "ax[0].set_ylabel(\"t\")\n",
62 | "ax[1].hist([m_bkg, m_sig], bins=50, stacked=True, label=(\"background\", \"signal\"))\n",
63 | "ax[1].set_xlabel(\"m\")\n",
64 | "ax[1].legend()\n",
65 | "ax[2].hist(\n",
66 | " (t[truth_n_sig:], t[:truth_n_sig]),\n",
67 | " bins=50,\n",
68 | " stacked=True,\n",
69 | " label=(\"background\", \"signal\"),\n",
70 | ")\n",
71 | "ax[2].set_xlabel(\"t\")\n",
72 | "ax[2].legend()\n",
73 | "\n",
74 | "sorter = np.argsort(m)\n",
75 | "m = m[sorter]\n",
76 | "t = t[sorter]"
77 | ]
78 | },
79 | {
80 | "cell_type": "markdown",
81 | "metadata": {},
82 | "source": [
83 | "# Fit toy data set"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": null,
89 | "metadata": {},
90 | "outputs": [],
91 | "source": [
92 | "from zfit.loss import ExtendedUnbinnedNLL\n",
93 | "from zfit.minimize import Minuit\n",
94 | "\n",
95 | "tot_pdf = zfit.pdf.SumPDF([sig_pdf, bkg_pdf])\n",
96 | "loss = ExtendedUnbinnedNLL(model=tot_pdf, data=zfit.data.Data.from_numpy(obs=obs, array=m))\n",
97 | "\n",
98 | "minimizer = Minuit()\n",
99 | "\n",
100 | "minimum = minimizer.minimize(loss=loss)\n",
101 | "minimum.hesse()"
102 | ]
103 | },
104 | {
105 | "cell_type": "markdown",
106 | "metadata": {},
107 | "source": [
108 | "## Visualize fitted model"
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": null,
114 | "metadata": {},
115 | "outputs": [],
116 | "source": [
117 | "from utils import plotfitresult, pltdist\n",
118 | "\n",
119 | "fig = plt.figure(figsize=(8, 5.5))\n",
120 | "\n",
121 | "nbins = 80\n",
122 | "pltdist(m, nbins, bounds)\n",
123 | "plotfitresult(tot_pdf, bounds, nbins, label=\"total model\", color=\"crimson\")\n",
124 | "plotfitresult(bkg_pdf, bounds, nbins, label=\"background\", color=\"forestgreen\")\n",
125 | "plotfitresult(sig_pdf, bounds, nbins, label=\"signal\", color=\"orange\")\n",
126 | "plt.xlabel(\"m\")\n",
127 | "plt.ylabel(\"number of events\")\n",
128 | "plt.legend();"
129 | ]
130 | },
131 | {
132 | "cell_type": "markdown",
133 | "metadata": {},
134 | "source": [
135 | "## Compute sWeights"
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": null,
141 | "metadata": {},
142 | "outputs": [],
143 | "source": [
144 | "from hepstats.splot import compute_sweights\n",
145 | "\n",
146 | "weights = compute_sweights(tot_pdf, m)"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": null,
152 | "metadata": {},
153 | "outputs": [],
154 | "source": [
155 | "fig, ax = plt.subplots(1, 2, figsize=(16, 4.5))\n",
156 | "plt.sca(ax[0])\n",
157 | "nbins = 40\n",
158 | "\n",
159 | "plt.plot(m, weights[Nsig], label=\"$w_\\\\mathrm{sig}$\")\n",
160 | "plt.plot(m, weights[Nbkg], label=\"$w_\\\\mathrm{bkg}$\")\n",
161 | "plt.plot(m, weights[Nsig] + weights[Nbkg], \"-k\")\n",
162 | "plt.axhline(0, color=\"0.5\")\n",
163 | "plt.legend()\n",
164 | "plt.sca(ax[1])\n",
165 | "\n",
166 | "plt.hist(t, bins=nbins, range=bounds, weights=weights[Nsig], label=\"weighted histogram\")\n",
167 | "plt.hist(t_sig, bins=nbins, range=bounds, histtype=\"step\", label=\"true histogram\")\n",
168 | "t1 = np.linspace(*bounds, nbins)\n",
169 | "tcdf = expon(0, 1).pdf(t1) * np.sum(weights[Nsig]) * (bounds[1] - bounds[0]) / nbins\n",
170 | "plt.plot(t1, tcdf, label=\"model with $\\\\lambda_\\\\mathrm{sig}$\")\n",
171 | "plt.xlabel(\"t\")\n",
172 | "plt.legend();"
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": null,
178 | "metadata": {},
179 | "outputs": [],
180 | "source": [
181 | "np.average(t, weights=weights[Nsig])"
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": null,
187 | "metadata": {},
188 | "outputs": [],
189 | "source": [
190 | "np.average(t_sig)"
191 | ]
192 | },
193 | {
194 | "cell_type": "code",
195 | "execution_count": null,
196 | "metadata": {},
197 | "outputs": [],
198 | "source": []
199 | }
200 | ],
201 | "metadata": {
202 | "kernelspec": {
203 | "display_name": "Python 3",
204 | "language": "python",
205 | "name": "python3"
206 | },
207 | "language_info": {
208 | "codemirror_mode": {
209 | "name": "ipython",
210 | "version": 3
211 | },
212 | "file_extension": ".py",
213 | "mimetype": "text/x-python",
214 | "name": "python",
215 | "nbconvert_exporter": "python",
216 | "pygments_lexer": "ipython3",
217 | "version": "3.7.6"
218 | }
219 | },
220 | "nbformat": 4,
221 | "nbformat_minor": 4
222 | }
223 |
--------------------------------------------------------------------------------
/notebooks/splots/utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import matplotlib.pyplot as plt
4 | import numpy as np
5 | import zfit
6 |
7 |
8 | def pltdist(data, bins, bounds, weights=None, label=None):
9 | y, bin_edges = np.histogram(data, bins=bins, range=bounds, weights=weights)
10 | bin_centers = 0.5 * (bin_edges[1:] + bin_edges[:-1])
11 | yerr = np.sqrt(y)
12 | plt.errorbar(bin_centers, y, yerr=yerr, fmt=".", color="royalblue", label=label)
13 |
14 |
15 | def plotfitresult(model, bounds, nbins, **kwargs):
16 | x = np.linspace(*bounds, num=1000)
17 | pdf = zfit.run(model.pdf(x, norm_range=bounds) * model.get_yield())
18 | plt.plot(x, ((bounds[1] - bounds[0]) / nbins) * (pdf), **kwargs)
19 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["hatchling", "hatch-vcs"]
3 | build-backend = "hatchling.build"
4 |
5 | [project]
6 | name = "hepstats"
7 | description = "HEP statistics tools and utilities"
8 | authors = [{ name = "Matthieu Marinangeli", email = "matthieu.marinangeli@gmail.com" }]
9 | maintainers = [{ name = "Scikit-HEP", email = "scikit-hep-admins@googlegroups.com" }]
10 | license = { text = "BSD 3-Clause License" }
11 | classifiers = [
12 | "Development Status :: 4 - Beta",
13 | "Intended Audience :: Developers",
14 | "Intended Audience :: Information Technology",
15 | "Intended Audience :: Science/Research",
16 | "License :: OSI Approved :: BSD License",
17 | "Operating System :: MacOS",
18 | "Operating System :: Microsoft :: Windows",
19 | "Operating System :: POSIX",
20 | "Operating System :: Unix",
21 | "Programming Language :: C++",
22 | "Programming Language :: Python",
23 | "Programming Language :: Python :: 3",
24 | "Programming Language :: Python :: 3 :: Only",
25 | "Programming Language :: Python :: 3.9",
26 | "Programming Language :: Python :: 3.10",
27 | "Programming Language :: Python :: 3.11",
28 | "Programming Language :: Python :: 3.12",
29 | "Programming Language :: Python :: 3.13",
30 | "Topic :: Scientific/Engineering",
31 | "Topic :: Scientific/Engineering :: Information Analysis",
32 | "Topic :: Scientific/Engineering :: Mathematics",
33 | "Topic :: Scientific/Engineering :: Physics",
34 | "Topic :: Software Development",
35 | "Topic :: Utilities",
36 | ]
37 | urls = { Homepage = "https://github.com/scikit-hep/hepstats" }
38 | requires-python = ">=3.9"
39 | dependencies = [
40 | "asdf",
41 | "numpy",
42 | "pandas",
43 | "scipy",
44 | "tqdm",
45 | "uhi",
46 | ]
47 | dynamic = ["version"]
48 |
49 | [project.readme]
50 | file = "README.md"
51 | content-type = "text/markdown"
52 |
53 | [project.optional-dependencies]
54 | dev = [
55 | "hepstats[docs]",
56 | "hepstats[test]",
57 | "pre-commit",
58 | ]
59 | docs = [
60 | "matplotlib",
61 | "pydata-sphinx-theme",
62 | "sphinx>=3.1.2",
63 | "sphinx-autodoc-typehints",
64 | "sphinx-copybutton",
65 | "sphinxcontrib-bibtex>=2.0.0",
66 | ]
67 | doc = ["hepstats[docs]"] # alias
68 | test = [
69 | "pytest",
70 | "pytest-cov",
71 | "pytest-runner",
72 | "zfit>=0.20.0;python_version<'3.13'",
73 | # 'hepstats[zfit];python_version<"3.13"', # not working, why?
74 | ]
75 | zfit = ["zfit>=0.20.0"]
76 |
77 |
78 |
79 | [tool.pytest.ini_options]
80 | junit_family = "xunit2"
81 | testpaths = ["tests"]
82 |
83 | [tool.check-manifest]
84 | ignore = ["src/hepstats/_version.py"]
85 |
86 | [tool.build_sphinx]
87 | project = "hepstats"
88 | source-dir = "docs"
89 | build-dir = "docs/_build"
90 | all-files = "1"
91 | warning-is-error = "0"
92 |
93 |
94 | [tool.hatch]
95 | version.source = "vcs"
96 | build.hooks.vcs.version-file = "src/hepstats/_version.py"
97 |
98 | [tool.ruff]
99 | #src = ["src"]
100 | line-length = 120
101 | exclude = [
102 | ".tox/*",
103 | "*/test*",
104 | "*/__init__.py",
105 | "*/_version.py",
106 | ]
107 | [tool.ruff.lint]
108 | extend-select = [
109 | "B", # flake8-bugbear
110 | "I", # isort
111 | "ARG", # flake8-unused-arguments
112 | "C4", # flake8-comprehensions
113 | "EM", # flake8-errmsg
114 | "ICN", # flake8-import-conventions
115 | "G", # flake8-logging-format
116 | "PGH", # pygrep-hooks
117 | "PIE", # flake8-pie
118 | "PL", # pylint
119 | "PT", # flake8-pytest-style
120 | "PTH", # flake8-use-pathlib
121 | "RET", # flake8-return
122 | "RUF", # Ruff-specific
123 | "SIM", # flake8-simplify
124 | "T20", # flake8-print
125 | "UP", # pyupgrade
126 | "YTT", # flake8-2020
127 | "EXE", # flake8-executable
128 | "NPY", # NumPy specific rules
129 | "PD", # pandas-vet
130 | ]
131 | ignore = [
132 | "UP007", # type annotation upgrade, breaks pydantic for Python 3.9 (remove once above)
133 | "PLR09", # Too many <...>
134 | "PLR2004", # Magic value used in comparison
135 | "ISC001", # Conflicts with formatter
136 | "RET505", # This is sometimes wanted, protets against accidental intendation
137 | "PD901", # "avoid using `df[...].values`" -> no, this is a very good name if there is only one df
138 | "PD011", # "replace `df[...].values` with `df[...].to_numpy()`" -> not yet, it's not deprecated.
139 | # Prefer to have a single way to access the data if we don't care about whether it's a numpy array or not.
140 | "PLW0603", # updating global variables with a function is bad, but we use it for
141 | "PLW2901", # "for loop overwritten by assignment" -> we use this to update the loop variable
142 | "PD013", # "melt over stack": df function, but triggers on tensors
143 | "NPY002", # "Use rnd generator in numpy" -> we use np.random for some legacy stuff but do use the new one where we can
144 | "T201", # "print used" -> we use print for displaying information in verbose mode
145 |
146 | ]
147 | isort.required-imports = ["from __future__ import annotations"]
148 |
149 | [tool.ruff.lint.per-file-ignores]
150 | "tests/**" = ["T20"]
151 | "noxfile.py" = ["T20"]
152 |
--------------------------------------------------------------------------------
/src/hepstats/__init__.py:
--------------------------------------------------------------------------------
1 | from ._version import version as __version__
2 |
--------------------------------------------------------------------------------
/src/hepstats/hypotests/README.md:
--------------------------------------------------------------------------------
1 | # Hypotests
2 |
3 | This submodule provides tools to do hypothesis tests such as discovery test and computations of upper limits or confidence intervals. hepstats needs a fitting backend to perform computations such as [zfit](https://github.com/zfit/zfit). Any fitting library can be used if their API is compatible with hepstats (see [api checks](https://github.com/scikit-hep/hepstats/blob/main/hepstats/hypotests/fitutils/api_check.py)).
4 |
5 | We give here a simple example of a discovery test, using the [zfit](https://github.com/zfit/zfit)
6 | fitting package as backend, of a Gaussian signal with known mean and sigma over an exponential background.
7 |
8 | ```python
9 | >>> import zfit
10 | >>> from zfit.loss import ExtendedUnbinnedNLL
11 | >>> from zfit.minimize import Minuit
12 |
13 | >>> bounds = (0.1, 3.0)
14 | >>> obs = zfit.Space('x', limits=bounds)
15 |
16 | >>> bkg = np.random.exponential(0.5, 300)
17 | >>> peak = np.random.normal(1.2, 0.1, 25)
18 | >>> data = np.concatenate((bkg, peak))
19 | >>> data = data[(data > bounds[0]) & (data < bounds[1])]
20 | >>> N = data.size
21 | >>> data = zfit.Data.from_numpy(obs=obs, array=data)
22 |
23 | >>> lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0)
24 | >>> Nsig = zfit.Parameter("Ns", 20., -20., N)
25 | >>> Nbkg = zfit.Parameter("Nbkg", N, 0., N*1.1)
26 | >>> signal = zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1).create_extended(Nsig)
27 | >>> background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg)
28 | >>> total = zfit.pdf.SumPDF([signal, background])
29 | >>> loss = ExtendedUnbinnedNLL(model=total, data=data)
30 |
31 | >>> from hepstats.hypotests.calculators import AsymptoticCalculator
32 | >>> from hepstats.hypotests import Discovery
33 | >>> from hepstats.hypotests.parameters import POI
34 |
35 | >>> calculator = AsymptoticCalculator(input=loss, minimizer=Minuit())
36 | >>> poinull = POI(Nsig, 0)
37 | >>> discovery_test = Discovery(calculator, poinull)
38 | >>> discovery_test.result()
39 |
40 | p_value for the Null hypothesis = 0.0007571045424956679
41 | Significance (in units of sigma) = 3.1719464825102244
42 | ```
43 |
44 | The discovery test prints out the p-value and the significance of the null hypothesis to be rejected.
45 |
--------------------------------------------------------------------------------
/src/hepstats/hypotests/__init__.py:
--------------------------------------------------------------------------------
1 | # Licensed under a 3-clause BSD style license, see LICENSE.
2 | """
3 | Module for hypothesis tests, upper limits and confidence intervals calculations.
4 | """
5 |
6 | # -----------------------------------------------------------------------------
7 | # Import statements
8 | # -----------------------------------------------------------------------------
9 | from .core import Discovery, UpperLimit, ConfidenceInterval
10 |
--------------------------------------------------------------------------------
/src/hepstats/hypotests/calculators/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module defining the base class for the calculators for statistical tests based on the likelyhood ratio.
3 |
4 | Acronyms used in the code:
5 | * nll = negative log-likehood, which is the value of the `loss` attribute of a calculator;
6 | * obs = observed, i.e. measured on provided data.
7 |
8 | """
9 |
10 | from .asymptotic_calculator import AsymptoticCalculator
11 | from .frequentist_calculator import FrequentistCalculator
12 |
--------------------------------------------------------------------------------
/src/hepstats/hypotests/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .confidence_interval import ConfidenceInterval
2 | from .discovery import Discovery
3 | from .upperlimit import UpperLimit
4 |
--------------------------------------------------------------------------------
/src/hepstats/hypotests/core/basetest.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from ..calculators.basecalculator import BaseCalculator
4 | from ..parameters import POI, POIarray
5 |
6 | """
7 | Module defining the base class for hypothesis tests.
8 | """
9 |
10 |
11 | class BaseTest:
12 | def __init__(
13 | self,
14 | calculator: BaseCalculator,
15 | poinull: POI | POIarray,
16 | poialt: POI | POIarray | None = None,
17 | ):
18 | """Base class for hypothesis tests.
19 |
20 | Args:
21 | calculator: calculator to use for computing the pvalues
22 | poinull: parameters of interest for the null hypothesis
23 | poialt: parameters of interest for the alternative hypothesis
24 |
25 | Raises:
26 | TypeError: if calculator is not a BaseCalculator instance
27 | """
28 |
29 | if not isinstance(calculator, BaseCalculator):
30 | msg = "Invalid type, {0}, for calculator. Calculator required."
31 | raise TypeError(msg)
32 | self._calculator = calculator
33 |
34 | self.calculator.check_pois(poinull)
35 | if poialt:
36 | self.calculator.check_pois(poialt)
37 | self.calculator.check_pois_compatibility(poinull, poialt)
38 |
39 | self._poinull = poinull
40 | self._poialt = poialt
41 |
42 | @property
43 | def poinull(self):
44 | """
45 | Returns the POI for the null hypothesis.
46 | """
47 | return self._poinull
48 |
49 | @property
50 | def poialt(self):
51 | """
52 | Returns the POI for the alternative hypothesis.
53 | """
54 | return self._poialt
55 |
56 | @property
57 | def calculator(self):
58 | """
59 | Returns the calculator.
60 | """
61 | return self._calculator
62 |
--------------------------------------------------------------------------------
/src/hepstats/hypotests/core/confidence_interval.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import warnings
4 |
5 | import numpy as np
6 | from scipy import interpolate
7 |
8 | from ..calculators import FrequentistCalculator
9 | from ..calculators.basecalculator import BaseCalculator
10 | from ..exceptions import POIRangeError
11 | from ..parameters import POIarray
12 | from .basetest import BaseTest
13 |
14 |
15 | class ConfidenceInterval(BaseTest):
16 | """Class for confidence interval calculation."""
17 |
18 | def __init__(self, calculator: BaseCalculator, poinull: POIarray, qtilde: bool = False):
19 | """
20 | Args:
21 | calculator: calculator to use for computing the pvalues.
22 | poinull: parameters of interest for the null hypothesis.
23 | qtilde: if `True` use the :math:`\\widetilde{q}` test statistics else (default)
24 | use the :math:`q` test statistic.
25 |
26 | Example with **zfit**:
27 | >>> import numpy as np
28 | >>> import zfit
29 | >>> from zfit.loss import ExtendedUnbinnedNLL
30 | >>> from zfit.minimize import Minuit
31 | >>>
32 | >>> bounds = (0.1, 3.0)
33 | >>> zfit.Space('x', limits=bounds)
34 | >>>
35 | >>> bkg = np.random.exponential(0.5, 300)
36 | >>> peak = np.random.normal(1.2, 0.1, 80)
37 | >>> data = np.concatenate((bkg, peak))
38 | >>> data = data[(data > bounds[0]) & (data < bounds[1])]
39 | >>> N = data.size
40 | >>> data = zfit.data.Data.from_numpy(obs=obs, array=data)
41 | >>>
42 | >>> mean = zfit.Parameter("mean", 1.2, 0.5, 2.0)
43 | >>> sigma = zfit.Parameter("sigma", 0.1, 0.02, 0.2)
44 | >>> lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0)
45 | >>> Nsig = zfit.Parameter("Ns", 20., -20., N)
46 | >>> Nbkg = zfit.Parameter("Nbkg", N, 0., N*1.1)
47 | >>> signal = Nsig * zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma)
48 | >>> background = Nbkg * zfit.pdf.Exponential(obs=obs, lambda_=lambda_)
49 | >>> loss = ExtendedUnbinnedNLL(model=signal + background, data=data)
50 | >>>
51 | >>> from hepstats.hypotests.calculators import AsymptoticCalculator
52 | >>> from hepstats.hypotests import ConfidenceInterval
53 | >>> from hepstats.hypotests.parameters import POI, POIarray
54 | >>>
55 | >>> calculator = AsymptoticCalculator(loss, Minuit())
56 | >>> poinull = POIarray(mean, np.linspace(1.15, 1.26, 100))
57 | >>> ci = ConfidenceInterval(calculator, poinull)
58 | >>> ci.interval()
59 | Confidence interval on mean:
60 | 1.1810371356602791 < mean < 1.2156701172321935 at 68.0% C.L.
61 | """
62 | super().__init__(calculator, poinull)
63 |
64 | self._qtilde = qtilde
65 |
66 | @property
67 | def qtilde(self) -> bool:
68 | """
69 | Returns True if qtilde test statistic is used, else False.
70 | """
71 | return self._qtilde
72 |
73 | def pvalues(self) -> np.ndarray:
74 | """
75 | Returns p-values scanned for the values of the parameters of interest
76 | in the null hypothesis.
77 |
78 | Returns:
79 | Array of p-values for CLsb, CLs, expected (+/- sigma bands).
80 | """
81 |
82 | poialt = None
83 | return self.calculator.pvalue(poinull=self.poinull, poialt=poialt, qtilde=self.qtilde, onesided=False)[0]
84 |
85 | def interval(self, alpha: float = 0.32, printlevel: int = 1) -> dict[str, float]:
86 | """
87 | Returns the confidence level on the parameter of interest.
88 |
89 | Args:
90 | alpha: significance level.
91 | printlevel: if > 0 print the result.
92 |
93 | Returns:
94 | Dict of the values for the central, upper and lower bounds on the parameter of interest.
95 |
96 | """
97 |
98 | bands = {}
99 | poinull = self.poinull
100 | observed = self.calculator.bestfit.params[poinull.parameter]["value"]
101 | bands["observed"] = observed
102 |
103 | if min(self.pvalues()) > alpha:
104 | msg = f"The minimum of the scanned p-values is {min(self.pvalues())} which is larger than the"
105 | msg += f" confidence level alpha = {alpha}. Try to increase the range of POI values."
106 | raise POIRangeError(msg)
107 |
108 | tck = interpolate.splrep(poinull.values, self.pvalues() - alpha, s=0)
109 | roots = np.array(interpolate.sproot(tck))
110 |
111 | msg = f" bound on the POI `{poinull.name}` cannot not be interpolated."
112 |
113 | if roots.size > 2:
114 | msg_warn = "Multiple roots have been founds."
115 | if isinstance(self.calculator, FrequentistCalculator):
116 | msg_warn += " Try to increase the number of toys, 'ntoysnull', to reduce fluctuations."
117 | warnings.warn(msg_warn, stacklevel=2)
118 |
119 | lower_roots = roots[roots < observed]
120 | upper_roots = roots[roots > observed]
121 |
122 | if upper_roots.size == 0:
123 | msg = "Upper" + msg + " Try to increase the maximum POI value."
124 | raise POIRangeError(msg)
125 | bands["upper"] = max(upper_roots)
126 |
127 | if lower_roots.size == 0:
128 | if self.qtilde:
129 | bands["lower"] = 0.0
130 | else:
131 | msg = "Low" + msg + " Try to decrease the minimum POI value."
132 | raise POIRangeError(msg)
133 | else:
134 | bands["lower"] = min(lower_roots)
135 |
136 | if self.qtilde and bands["lower"] < 0.0:
137 | bands["lower"] = 0.0
138 |
139 | if printlevel > 0:
140 | msg = f"\nConfidence interval on {poinull.name}:\n"
141 | msg += f"\t{bands['lower']} < {poinull.name} < {bands['upper']} at {(1 - alpha) * 100:.1f}% C.L."
142 |
143 | return bands
144 |
--------------------------------------------------------------------------------
/src/hepstats/hypotests/core/discovery.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from scipy.stats import norm
4 |
5 | from ..calculators.basecalculator import BaseCalculator
6 | from ..parameters import POI
7 | from .basetest import BaseTest
8 |
9 |
10 | class Discovery(BaseTest):
11 | """Class for discovery test."""
12 |
13 | def __init__(self, calculator: BaseCalculator, poinull: POI):
14 | """
15 | Args:
16 | calculator: calculator to use for computing the pvalues.
17 | poinull: parameter of interest for the null hypothesis.
18 |
19 | Example with **zfit**:
20 | >>> import zfit
21 | >>> from zfit.loss import ExtendedUnbinnedNLL
22 | >>> from zfit.minimize import Minuit
23 | >>>
24 | >>> bounds = (0.1, 3.0)
25 | >>> zfit.Space('x', limits=bounds)
26 | >>>
27 | >>> bkg = np.random.exponential(0.5, 300)
28 | >>> peak = np.random.normal(1.2, 0.1, 25)
29 | >>> data = np.concatenate((bkg, peak))
30 | >>> data = data[(data > bounds[0]) & (data < bounds[1])]
31 | >>> N = data.size
32 | >>> data = zfit.data.Data.from_numpy(obs=obs, array=data)
33 | >>>
34 | >>> lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0)
35 | >>> Nsig = zfit.Parameter("Ns", 20., -20., N)
36 | >>> Nbkg = zfit.Parameter("Nbkg", N, 0., N*1.1)
37 | >>> signal = Nsig * zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1)
38 | >>> background = Nbkg * zfit.pdf.Exponential(obs=obs, lambda_=lambda_)
39 | >>> loss = ExtendedUnbinnedNLL(model=signal + background, data=data)
40 | >>>
41 | >>> from hepstats.hypotests.calculators import AsymptoticCalculator
42 | >>> from hepstats.hypotests import Discovery
43 | >>> from hepstats.hypotests.parameters import POI
44 | >>>
45 | >>> calculator = AsymptoticCalculator(loss, Minuit())
46 | >>> poinull = POI(Nsig, 0)
47 | >>> discovery_test = Discovery(calculator, poinull)
48 | >>> discovery_test.result()
49 | p_value for the Null hypothesis = 0.0007571045424956679
50 | Significance (in units of sigma) = 3.1719464825102244
51 | """
52 |
53 | super().__init__(calculator, poinull)
54 |
55 | def result(self, printlevel: int = 1) -> tuple[float, float]:
56 | """Return the result of the discovery hypothesis test.
57 |
58 | The result can be (0.0, inf), which means that the numerical precision is not high enough or that the
59 | number of toys is not large enough. For example if all toys are rejected, the result is (0.0, inf).
60 |
61 | Args:
62 | printlevel: if > 0 print the result.
63 |
64 | Returns:
65 | Tuple of the p-value for the null hypothesis and the significance.
66 | """
67 | pnull, _ = self.calculator.pvalue(self.poinull, onesideddiscovery=True)
68 | pnull = pnull[0]
69 |
70 | significance = norm.ppf(1.0 - pnull)
71 |
72 | if printlevel > 0:
73 | pass
74 |
75 | return pnull, significance
76 |
--------------------------------------------------------------------------------
/src/hepstats/hypotests/core/upperlimit.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import numpy as np
4 | from scipy import interpolate
5 |
6 | from ..calculators.basecalculator import BaseCalculator
7 | from ..exceptions import POIRangeError
8 | from ..parameters import POI, POIarray
9 | from .basetest import BaseTest
10 |
11 |
12 | class UpperLimit(BaseTest):
13 | """Class for upper limit calculation."""
14 |
15 | def __init__(
16 | self,
17 | calculator: BaseCalculator,
18 | poinull: POI | POIarray,
19 | poialt: POI,
20 | qtilde: bool = False,
21 | ):
22 | """
23 | Args:
24 | calculator: calculator to use for computing the pvalues.
25 | poinull: parameters of interest for the null hypothesis.
26 | poialt: parameters of interest for the alternative hypothesis.
27 | qtilde: if `True` use the :math:`\\widetilde{q}` test statistics else (default) use the :math:`q`
28 | test statistic.
29 |
30 | Example with **zfit**:
31 | >>> import numpy as np
32 | >>> import zfit
33 | >>> from zfit.loss import ExtendedUnbinnedNLL
34 | >>> from zfit.minimize import Minuit
35 | >>>
36 | >>> bounds = (0.1, 3.0)
37 | >>> zfit.Space('x', limits=bounds)
38 | >>>
39 | >>> bkg = np.random.exponential(0.5, 300)
40 | >>> peak = np.random.normal(1.2, 0.1, 10)
41 | >>> data = np.concatenate((bkg, peak))
42 | >>> data = data[(data > bounds[0]) & (data < bounds[1])]
43 | >>> N = data.size
44 | >>> data = zfit.data.Data.from_numpy(obs=obs, array=data)
45 | >>>
46 | >>> lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0)
47 | >>> Nsig = zfit.Parameter("Ns", 20., -20., N)
48 | >>> Nbkg = zfit.Parameter("Nbkg", N, 0., N*1.1)
49 | >>> signal = Nsig * zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1)
50 | >>> background = Nbkg * zfit.pdf.Exponential(obs=obs, lambda_=lambda_)
51 | >>> loss = ExtendedUnbinnedNLL(model=signal + background, data=data)
52 | >>>
53 | >>> from hepstats.hypotests.calculators import AsymptoticCalculator
54 | >>> from hepstats.hypotests import UpperLimit
55 | >>> from hepstats.hypotests.parameters import POI, POIarray
56 | >>>
57 | >>> calculator = AsymptoticCalculator(loss, Minuit())
58 | >>> poinull = POIarray(Nsig, np.linspace(0.0, 25, 20))
59 | >>> poialt = POI(Nsig, 0)
60 | >>> ul = UpperLimit(calculator, poinull, poialt)
61 | >>> ul.upperlimit(alpha=0.05, CLs=True)
62 | Observed upper limit: Nsig = 15.725784747406346
63 | Expected upper limit: Nsig = 11.927442041887158
64 | Expected upper limit +1 sigma: Nsig = 16.596396280677116
65 | Expected upper limit -1 sigma: Nsig = 8.592750403611896
66 | Expected upper limit +2 sigma: Nsig = 22.24864429383046
67 | Expected upper limit -2 sigma: Nsig = 6.400549971360598
68 | """
69 |
70 | super().__init__(calculator, poinull, poialt)
71 |
72 | self._qtilde = qtilde
73 |
74 | @property
75 | def qtilde(self) -> bool:
76 | """
77 | Returns True if qtilde test statistic is used, else False.
78 | """
79 | return self._qtilde
80 |
81 | def pvalues(self, CLs: int = True) -> dict[str, np.ndarray]:
82 | """
83 | Returns p-values scanned for the values of the parameters of interest
84 | in the null hypothesis.
85 |
86 | Args:
87 | CLs: if `True` uses pvalues as :math:`p_{cls}=p_{null}/p_{alt}=p_{clsb}/p_{clb}`
88 | else as :math:`p_{clsb} = p_{null}`.
89 |
90 | Returns:
91 | Dictionary of p-values for CLsb, CLs, expected (+/- sigma bands).
92 | """
93 | pvalue_func = self.calculator.pvalue
94 |
95 | pnull, palt = pvalue_func(poinull=self.poinull, poialt=self.poialt, qtilde=self.qtilde, onesided=True)
96 |
97 | pvalues = {"clsb": pnull, "clb": palt}
98 |
99 | sigmas = [0.0, 1.0, 2.0, -1.0, -2.0]
100 |
101 | exppvalue_func = self.calculator.expected_pvalue
102 |
103 | result = exppvalue_func(
104 | poinull=self.poinull,
105 | poialt=self.poialt,
106 | nsigma=sigmas,
107 | CLs=CLs,
108 | qtilde=self.qtilde,
109 | onesided=True,
110 | )
111 |
112 | pvalues["expected"] = result[0]
113 | pvalues["expected_p1"] = result[1]
114 | pvalues["expected_p2"] = result[2]
115 | pvalues["expected_m1"] = result[3]
116 | pvalues["expected_m2"] = result[4]
117 |
118 | pvalues["cls"] = pnull / palt
119 |
120 | return pvalues
121 |
122 | def upperlimit(self, alpha: float = 0.05, CLs: bool = True, printlevel: int = 1) -> dict[str, float]:
123 | """
124 | Returns the upper limit of the parameter of interest.
125 |
126 | Args:
127 | alpha: significance level.
128 | CLs: if `True` uses pvalues as :math:`p_{cls}=p_{null}/p_{alt}=p_{clsb}/p_{clb}` else as
129 | :math:`p_{clsb} = p_{null}`.
130 | printlevel: if > 0 print the result.
131 |
132 | Returns:
133 | Dictionnary of upper limits for observed, expected (+/- sigma bands).
134 |
135 | """
136 |
137 | poinull = self.poinull
138 |
139 | # create a filter for -1 and -2 sigma expected limits
140 | bestfit = self.calculator.bestfit.params[poinull.parameter]["value"]
141 | filter = poinull.values >= bestfit
142 |
143 | observed_key = "cls" if CLs else "clsb"
144 |
145 | to_interpolate = [observed_key] + [f"expected{i}" for i in ["", "_p1", "_m1", "_p2", "_m2"]]
146 |
147 | limits: dict = {}
148 |
149 | all_pvalues = self.pvalues(CLs)
150 | for k in to_interpolate:
151 | pvalues = all_pvalues[k]
152 | values = poinull.values
153 |
154 | if k == observed_key:
155 | k = "observed"
156 | pvalues = pvalues[filter]
157 | values = values[filter]
158 |
159 | if min(pvalues) > alpha:
160 | if k in ["expected", "observed"]:
161 | msg = f"The minimum of the scanned p-values is {min(pvalues)} which is larger than the"
162 | msg += f" confidence level alpha = {alpha}. Try to increase the maximum POI value."
163 | raise POIRangeError(msg)
164 |
165 | limits[k] = None
166 | continue
167 |
168 | tck = interpolate.splrep(values, pvalues - alpha, s=0)
169 | root = interpolate.sproot(tck)
170 |
171 | if len(root) > 1:
172 | root = root[0]
173 |
174 | try:
175 | limits[k] = float(root)
176 | except TypeError:
177 | limits[k] = None
178 |
179 | if printlevel > 0:
180 | for sigma in ["+1", "-1", "+2", "-2"]:
181 | sigma.replace("+", "p").replace("-", "m")
182 |
183 | return limits
184 |
--------------------------------------------------------------------------------
/src/hepstats/hypotests/exceptions.py:
--------------------------------------------------------------------------------
1 | """
2 | Specific exceptions for the `hypotests` submodule
3 | """
4 |
5 | from __future__ import annotations
6 |
7 |
8 | class POIRangeError(Exception):
9 | """Exception class non adequate POI scan range"""
10 |
11 | def __init__(self, *args, **kwargs):
12 | Exception.__init__(self, *args, **kwargs)
13 |
14 |
15 | class ParameterNotFound(Exception):
16 | """Exception class raised if a parameter with a given name is not found"""
17 |
18 | def __init__(self, *args, **kwargs):
19 | Exception.__init__(self, *args, **kwargs)
20 |
21 |
22 | class FormatError(Exception):
23 | """Exception class raised when unexpected yaml format are read"""
24 |
25 | def __init__(self, *args, **kwargs):
26 | Exception.__init__(self, *args, **kwargs)
27 |
--------------------------------------------------------------------------------
/src/hepstats/hypotests/parameters.py:
--------------------------------------------------------------------------------
1 | # Licensed under a 3-clause BSD style license, see LICENSE
2 | """
3 | Module defining the parameter of interest classes, currently includes:
4 |
5 | * **POIarray**
6 | * **POI**
7 | """
8 |
9 | from __future__ import annotations
10 |
11 | from collections.abc import Collection
12 |
13 | import numpy as np
14 |
15 | from ..utils.fit.api_check import is_valid_parameter
16 |
17 |
18 | class POIarray:
19 | """
20 | Class for parameters of interest with multiple values:
21 | """
22 |
23 | def __init__(self, parameter, values: Collection | np.array):
24 | """
25 | Args:
26 | parameter: the parameter of interest
27 | values: values of the parameter of interest
28 |
29 | Raises:
30 | ValueError: if is_valid_parameter(parameter) returns False
31 | TypeError: if parameter is not an iterable
32 |
33 | Example with `zfit`:
34 | >>> Nsig = zfit.Parameter("Nsig")
35 | >>> poi = POIarray(Nsig, value=np.linspace(0,10,10))
36 | """
37 |
38 | if not is_valid_parameter(parameter):
39 | msg = f"{parameter} is not a valid parameter!"
40 | raise ValueError(msg)
41 |
42 | if not isinstance(values, Collection):
43 | msg = "A list/array of values of the POI is required."
44 | raise TypeError(msg)
45 |
46 | self.parameter = parameter
47 | self.name = parameter.name
48 | self._values = np.array(values, dtype=np.float64)
49 | self._ndim = 1
50 | self._shape = (len(values),)
51 |
52 | @property
53 | def values(self):
54 | """
55 | Returns the values of the **POIarray**.
56 | """
57 | return self._values
58 |
59 | def __repr__(self):
60 | return f"POIarray('{self.name}', values={self.values})"
61 |
62 | def __getitem__(self, i):
63 | """
64 | Get the i-th element the array of values of the **POIarray**.
65 | """
66 | return POI(self.parameter, self.values[i])
67 |
68 | def __iter__(self):
69 | for v in self.values:
70 | yield POI(self.parameter, v)
71 |
72 | def __len__(self):
73 | return len(self.values)
74 |
75 | def __eq__(self, other):
76 | if not isinstance(other, POIarray):
77 | return NotImplemented
78 |
79 | if len(self) != len(other):
80 | return False
81 |
82 | values_equal = self.values == other.values
83 | name_equal = self.name == other.name
84 | return values_equal.all() and name_equal
85 |
86 | def __hash__(self):
87 | return hash((self.name, self.values.tostring()))
88 |
89 | @property
90 | def ndim(self):
91 | """
92 | Returns the number of dimension of the **POIarray**.
93 | """
94 | return self._ndim
95 |
96 | @property
97 | def shape(self):
98 | """
99 | Returns the shape of the **POIarray**.
100 | """
101 | return self._shape
102 |
103 | def append(self, values: int | float | Collection | np.ndarray):
104 | """
105 | Append values in the **POIarray**.
106 |
107 | Args:
108 | values: values to append
109 | """
110 | if not isinstance(values, Collection):
111 | values = [values]
112 | values = np.concatenate([self.values, values])
113 | return POIarray(parameter=self.parameter, values=values)
114 |
115 |
116 | class POI(POIarray):
117 | """
118 | Class for single value parameter of interest:
119 | """
120 |
121 | def __init__(self, parameter, value: int | float):
122 | """
123 | Args:
124 | parameter: the parameter of interest
125 | values: value of the parameter of interest
126 |
127 | Raises:
128 | TypeError: if value is an iterable
129 |
130 | Example with `zfit`:
131 | >>> Nsig = zfit.Parameter("Nsig")
132 | >>> poi = POI(Nsig, value=0)
133 | """
134 | if isinstance(value, Collection):
135 | msg = "A single value for the POI is required."
136 | raise TypeError(msg)
137 |
138 | super().__init__(parameter=parameter, values=[value])
139 | self._value = value
140 |
141 | @property
142 | def value(self):
143 | """
144 | Returns the value of the **POI**.
145 | """
146 | return self._value
147 |
148 | def __eq__(self, other):
149 | if not isinstance(other, POI):
150 | return NotImplemented
151 |
152 | value_equal = self.value == other.value
153 | name_equal = self.name == other.name
154 | return value_equal and name_equal
155 |
156 | def __repr__(self):
157 | return f"POI('{self.name}', value={self.value})"
158 |
159 | def __hash__(self):
160 | return hash((self.name, self.value))
161 |
162 |
163 | def asarray(poi: POI) -> POIarray:
164 | """
165 | Transforms a **POI** instance into a **POIarray** instance.
166 |
167 | Args:
168 | poi: the parameter of interest.
169 | """
170 | return POIarray(parameter=poi.parameter, values=poi.values)
171 |
--------------------------------------------------------------------------------
/src/hepstats/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | # Licensed under a 3-clause BSD style license, see LICENSE.
2 | """
3 | Module for algorithms and methods used to model distributions.
4 |
5 | This module contains in particular:
6 |
7 | * Bayesian Blocks binning algorithm.
8 | """
9 |
10 | # -----------------------------------------------------------------------------
11 | # Import statements
12 | # -----------------------------------------------------------------------------
13 |
14 | from .bayesian_blocks import bayesian_blocks
15 |
--------------------------------------------------------------------------------
/src/hepstats/modeling/bayesian_blocks.py:
--------------------------------------------------------------------------------
1 | # Licensed under a 3-clause BSD style license, see LICENSE and LICENSE_ASTROML
2 | """
3 | Bayesian Block implementation
4 | =============================
5 |
6 | Dynamic programming algorithm for finding the optimal adaptive-width histogram. Modified from the
7 | bayesian blocks python implementation found in astroML :cite:`VanderPlas_2012`.
8 |
9 | * Based on Scargle et al 2012 :cite:`Scargle_2013`
10 | * Initial Python Implementation :cite:`BB_jakevdp`
11 | * Initial Examination in HEP context :cite:`Pollack:2017srh`
12 |
13 | """
14 |
15 | from __future__ import annotations
16 |
17 | from collections.abc import Iterable
18 |
19 | import numpy as np
20 | import pandas as pd
21 |
22 |
23 | class Prior:
24 | """Helper class for calculating the prior on the fitness function."""
25 |
26 | def __init__(self, p0: float = 0.05, gamma: float | None = None):
27 | """
28 | Args:
29 | p0: False-positive rate, between 0 and 1. A lower number places a stricter penalty
30 | against creating more bin edges, thus reducing the potential for false-positive bin edges. In general,
31 | the larger the number of bins, the small the p0 should be to prevent the creation of spurious, jagged
32 | bins. Defaults to 0.05.
33 |
34 | gamma: If specified, then use this gamma to compute the general prior form,
35 | :math:`p \\sim \\gamma^N`. If gamma is specified, p0 is ignored. Defaults to None.
36 | """
37 |
38 | self.p0 = p0
39 | self.gamma = gamma
40 |
41 | def calc(self, N: int) -> float:
42 | """
43 | Computes the prior.
44 |
45 | Args:
46 | N: N-th change point.
47 |
48 | Returns:
49 | the prior.
50 | """
51 | if self.gamma is not None:
52 | return -np.log(self.gamma)
53 | else:
54 | # eq. 21 from Scargle 2012
55 | return 4 - np.log(73.53 * self.p0 * (N**-0.478))
56 |
57 |
58 | def bayesian_blocks(
59 | data: Iterable | np.ndarray,
60 | weights: Iterable | np.ndarray | None = None,
61 | p0: float = 0.05,
62 | gamma: float | None = None,
63 | ) -> np.ndarray:
64 | """Bayesian Blocks Implementation.
65 |
66 | This is a flexible implementation of the Bayesian Blocks algorithm described in :cite:`Scargle_2013`.
67 | It has been modified to natively accept weighted events, for ease of use in HEP applications.
68 |
69 | Args:
70 | data: Input data values (one dimensional, length N). Repeat values are allowed.
71 |
72 | weights: Weights for data (otherwise assume all data points have a weight of 1).
73 | Must be same length as data. Defaults to None.
74 |
75 | p0: False-positive rate, between 0 and 1. A lower number places a stricter penalty
76 | against creating more bin edges, thus reducing the potential for false-positive bin edges. In general,
77 | the larger the number of bins, the small the p0 should be to prevent the creation of spurious, jagged
78 | bins. Defaults to 0.05.
79 |
80 | gamma: If specified, then use this gamma to compute the general prior form,
81 | :math:`p \\sim \\gamma^N`. If gamma is specified, p0 is ignored. Defaults to None.
82 |
83 | Returns:
84 | Array containing the (N+1) bin edges
85 |
86 | Examples:
87 | Unweighted data:
88 |
89 | >>> d = np.random.normal(size=100)
90 | >>> bins = bayesian_blocks(d, p0=0.01)
91 |
92 | Unweighted data with repeats:
93 |
94 | >>> d = np.random.normal(size=100)
95 | >>> d[80:] = d[:20]
96 | >>> bins = bayesian_blocks(d, p0=0.01)
97 |
98 | Weighted data:
99 |
100 | >>> d = np.random.normal(size=100)
101 | >>> w = np.random.uniform(1,2, size=100)
102 | >>> bins = bayesian_blocks(d, w, p0=0.01)
103 |
104 | """
105 | # validate input data
106 | data = np.asarray(data, dtype=float)
107 | assert data.ndim == 1
108 |
109 | # validate input weights
110 | # set them to 1 if not given
111 | weights = np.asarray(weights) if weights is not None else np.ones_like(data)
112 |
113 | # initialize the prior
114 | prior = Prior(p0, gamma)
115 |
116 | # Place data and weights into a DataFrame.
117 | # We want to sort the data array (without losing the associated weights), and combine duplicate
118 | # data points by summing their weights together. We can accomplish all this with `groupby`
119 |
120 | df = pd.DataFrame({"data": data, "weights": weights})
121 | gb = df.groupby("data").sum()
122 | data = gb.index.values
123 | weights = gb.weights.values
124 |
125 | N = weights.size
126 |
127 | # create length-(N + 1) array of cell edges
128 | edges = np.concatenate([data[:1], 0.5 * (data[1:] + data[:-1]), data[-1:]])
129 | block_length = data[-1] - edges
130 |
131 | # arrays to store the best configuration
132 | best = np.zeros(N, dtype=float)
133 | last = np.zeros(N, dtype=int)
134 |
135 | # -----------------------------------------------------------------
136 | # Start with first data cell; add one cell at each iteration
137 | # -----------------------------------------------------------------
138 | # last = core_loop(N, block_length, weights, fitfunc, best, last)
139 | for R in range(N):
140 | # Compute fit_vec : fitness of putative last block (end at R)
141 |
142 | # T_k: width/duration of each block
143 | T_k = block_length[: R + 1] - block_length[R + 1]
144 |
145 | # N_k: number of elements in each block
146 | N_k = np.cumsum(weights[: R + 1][::-1])[::-1]
147 |
148 | # evaluate fitness function
149 | fit_vec = N_k * (np.log(N_k / T_k))
150 |
151 | # penalize function with prior
152 | A_R = fit_vec - prior.calc(R + 1)
153 | A_R[1:] += best[:R]
154 |
155 | i_max = np.argmax(A_R)
156 | last[R] = i_max
157 | best[R] = A_R[i_max]
158 |
159 | # -----------------------------------------------------------------
160 | # Now find changepoints by iteratively peeling off the last block
161 | # -----------------------------------------------------------------
162 | change_points = np.zeros(N, dtype=int)
163 | i_cp = N
164 | ind = N
165 | while True:
166 | i_cp -= 1
167 | change_points[i_cp] = ind
168 | if ind == 0:
169 | break
170 | ind = last[ind - 1]
171 | change_points = change_points[i_cp:]
172 |
173 | return edges[change_points]
174 |
--------------------------------------------------------------------------------
/src/hepstats/splot/__init__.py:
--------------------------------------------------------------------------------
1 | # Licensed under a 3-clause BSD style license, see LICENSE.
2 | """
3 | Module implementing the **sPlot** algorithm, see :cite:`Pivk:2004ty`.
4 |
5 |
6 | """
7 |
8 | from .sweights import compute_sweights
9 |
--------------------------------------------------------------------------------
/src/hepstats/splot/exceptions.py:
--------------------------------------------------------------------------------
1 | """
2 | Specific exceptions for the `splot` submodule
3 | """
4 |
5 | from __future__ import annotations
6 |
7 |
8 | class ModelNotFittedToData(Exception):
9 | """Exception class for model not fitted to data provided to compute sweights"""
10 |
--------------------------------------------------------------------------------
/src/hepstats/splot/sweights.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import warnings
4 | from typing import Any
5 |
6 | import numpy as np
7 |
8 | from ..utils import eval_pdf
9 | from ..utils.fit.api_check import is_valid_pdf
10 | from .exceptions import ModelNotFittedToData
11 | from .warnings import AboveToleranceWarning
12 |
13 |
14 | def is_sum_of_extended_pdfs(model) -> bool:
15 | """Checks if the input model is a sum of extended models.
16 |
17 | Args:
18 | model: the input model/pdf
19 |
20 | Returns:
21 | True if the model is a sum of extended models, False if not.
22 | """
23 | if not hasattr(model, "get_models"):
24 | return False
25 |
26 | return all(m.is_extended for m in model.get_models()) and model.is_extended
27 |
28 |
29 | def compute_sweights(model, x: np.ndarray, *, atol_exceptions: float | None = None) -> dict[Any, np.ndarray]:
30 | """Computes sWeights from probability density functions for different components/species in a fit model
31 | (for instance signal and background) fitted on some data `x`.
32 |
33 | i.e. model = Nsig * pdf_signal + Nbkg * pdf_bkg
34 |
35 | Args:
36 | model: sum of extended pdfs.
37 | x: data on which `model` is fitted
38 | atol_exceptions: absolute tolerance to check if the Maximum Likelihood Sum Rule sanity check,
39 | described in equation 17 of arXiv:physics/0402083, failed. Sum of yields should be 1 with
40 | an absolute tolerance of `atol_exceptions`.
41 |
42 | Returns:
43 | dictionary with yield parameters as keys, and sWeights for correspoind species as values.
44 |
45 | Example with **zfit**:
46 |
47 | Imports:
48 |
49 | >>> import numpy as np
50 | >>> import zfit
51 | >>> from zfit.loss import ExtendedUnbinnedNLL
52 | >>> from zfit.minimize import Minuit
53 |
54 | Definition of the bounds and yield of background and signal species:
55 |
56 | >>> bounds = (0.0, 3.0)
57 | >>> nbkg = 10000
58 | >>> nsig = 5000
59 | >>> obs = zfit.Space('x', limits=bounds)
60 |
61 | Generation of data:
62 |
63 | >>> bkg = np.random.exponential(0.5, nbkg)
64 | >>> peak = np.random.normal(1.2, 0.1, nsig)
65 | >>> data = np.concatenate((bkg, peak))
66 | >>> data = data[(data > bounds[0]) & (data < bounds[1])]
67 | >>> N = data.size
68 | >>> data = zfit.data.Data.from_numpy(obs=obs, array=data)
69 |
70 | Model definition:
71 |
72 | >>> mean = zfit.Parameter("mean", 1.2, 0.5, 2.0)
73 | >>> sigma = zfit.Parameter("sigma", 0.1, 0.02, 0.2)
74 | >>> lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0)
75 | >>> Nsig = zfit.Parameter("Nsig", nsig, 0., N)
76 | >>> Nbkg = zfit.Parameter("Nbkg", nbkg, 0., N)
77 | >>> signal = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma).create_extended(Nsig)
78 | >>> background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg)
79 | >>> tot_model = zfit.pdf.SumPDF([signal, background])
80 |
81 | Loss construction and minimization:
82 |
83 | >>> loss = ExtendedUnbinnedNLL(model=signal + background, data=data)
84 | >>> minimizer = Minuit()
85 | >>> minimum = minimizer.minimize(loss)
86 |
87 | sWeights computation:
88 |
89 | >>> from hepstats.splot import compute_sweights
90 | >>> sweights = compute_sweights(tot_model, data)
91 | >>> print(sweights)
92 | {: array([-0.09953299, -0.09953299, -0.09953299, ...,
93 | 0.78689884, 1.08823111, 1.05948873]),
94 | : array([ 1.09953348, 1.09953348, 1.09953348, ...,
95 | 0.21310097, -0.08823153, -0.05948912])}
96 | """
97 |
98 | if not is_valid_pdf(model):
99 | msg = f"{model} is not a valid pdf!"
100 | raise ValueError(msg)
101 | if not is_sum_of_extended_pdfs(model):
102 | msg = f"Input model, {model}, should be a sum of extended pdfs!"
103 | raise ValueError(msg)
104 |
105 | models = model.get_models()
106 | yields = [m.get_yield() for m in models]
107 |
108 | p = np.vstack([eval_pdf(m, x) for m in models]).T
109 | Nx = eval_pdf(model, x, allow_extended=True)
110 | pN = p / Nx[:, None]
111 |
112 | MLSR = pN.sum(axis=0)
113 | atol_warning = 5e-3
114 | if atol_exceptions is None:
115 | atol_exceptions = 5e-2
116 |
117 | def msg_fn(tolerance):
118 | msg = (
119 | "The Maximum Likelihood Sum Rule sanity check, described in equation 17 of"
120 | + " arXiv:physics/0402083, failed. According to this check the following quantities\n"
121 | )
122 | for y, mlsr in zip(yields, MLSR):
123 | msg += f"\t* {y.name}: {mlsr},\n"
124 | msg += f"should be equal to 1.0 with an absolute tolerance of {tolerance}."
125 | return msg
126 |
127 | if not np.allclose(MLSR, 1, atol=atol_exceptions):
128 | msg = msg_fn(atol_exceptions)
129 | msg += " The numbers suggest that the model is not fitted to the data. Please check your fit."
130 | raise ModelNotFittedToData(msg)
131 |
132 | if not np.allclose(MLSR, 1, atol=atol_warning):
133 | msg = msg_fn(atol_warning)
134 | msg += " If the fit to the data is good please ignore this warning."
135 | warnings.warn(msg, AboveToleranceWarning, stacklevel=2)
136 |
137 | Vinv = (pN).T.dot(pN)
138 | V = np.linalg.inv(Vinv)
139 |
140 | sweights = p.dot(V) / Nx[:, None]
141 |
142 | return {y: sweights[:, i] for i, y in enumerate(yields)}
143 |
--------------------------------------------------------------------------------
/src/hepstats/splot/warnings.py:
--------------------------------------------------------------------------------
1 | """
2 | Specific warnings for the `splot` submodule
3 | """
4 |
5 | from __future__ import annotations
6 |
7 |
8 | class AboveToleranceWarning(UserWarning):
9 | pass
10 |
--------------------------------------------------------------------------------
/src/hepstats/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .fit import (
2 | eval_pdf,
3 | array2dataset,
4 | pll,
5 | base_sampler,
6 | base_sample,
7 | get_value,
8 | set_values,
9 | )
10 |
--------------------------------------------------------------------------------
/src/hepstats/utils/fit/__init__.py:
--------------------------------------------------------------------------------
1 | from .diverse import (
2 | get_value,
3 | eval_pdf,
4 | pll,
5 | array2dataset,
6 | get_nevents,
7 | set_values,
8 | set_values_once,
9 | )
10 | from .sampling import base_sampler, base_sample
11 |
--------------------------------------------------------------------------------
/src/hepstats/utils/fit/api_check.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for testing a fitting library validity with hepstats.
3 |
4 | A fitting library should provide six basic objects:
5 |
6 | * model / probability density function
7 | * parameters of the models
8 | * data
9 | * loss / likelihood function
10 | * minimizer
11 | * fitresult (optional)
12 |
13 | A function for each object is defined in this module, all should return `True` to work
14 | with hepstats.
15 |
16 | The `zfit` API is currently the standard fitting API in hepstats.
17 |
18 | """
19 |
20 | from __future__ import annotations
21 |
22 | import warnings
23 |
24 | import uhi.typing.plottable
25 |
26 |
27 | def is_valid_parameter(object):
28 | """
29 | Checks if a parameter has the following attributes/methods:
30 | * value
31 | * set_value
32 | * floating
33 | """
34 | has_value = hasattr(object, "value")
35 | has_set_value = hasattr(object, "set_value")
36 | has_floating = hasattr(object, "floating")
37 |
38 | return has_value and has_set_value and has_floating
39 |
40 |
41 | def is_valid_data(object):
42 | """
43 | Checks if the data object has the following attributes/methods:
44 | * nevents
45 | * weights
46 | * set_weights
47 | * space
48 | """
49 | is_sampled_data = hasattr(object, "resample")
50 |
51 | try:
52 | has_nevents = hasattr(object, "nevents")
53 | except RuntimeError:
54 | if is_sampled_data:
55 | object.resample()
56 | has_nevents = hasattr(object, "nevents")
57 | else:
58 | has_nevents = False
59 |
60 | has_weights = hasattr(object, "weights")
61 | has_set_weights = hasattr(object, "set_weights")
62 | has_space = hasattr(object, "space")
63 | is_histlike = isinstance(object, uhi.typing.plottable.PlottableHistogram)
64 | return (has_nevents and has_weights and has_set_weights and has_space) or is_histlike
65 |
66 |
67 | def is_valid_pdf(object):
68 | """
69 | Checks if the pdf object has the following attributes/methods:
70 | * get_params
71 | * pdf
72 | * integrate
73 | * sample
74 | * get_yield
75 |
76 | Also the function **is_valid_parameter** is called with each of the parameters returned by get_params
77 | as argument.
78 | """
79 | has_get_params = hasattr(object, "get_params")
80 | if not has_get_params:
81 | return False
82 | else:
83 | params = object.get_params()
84 |
85 | all_valid_params = all(is_valid_parameter(p) for p in params)
86 | has_pdf = hasattr(object, "pdf")
87 | has_integrate = hasattr(object, "integrate")
88 | has_sample = hasattr(object, "sample")
89 | has_space = hasattr(object, "space")
90 | has_get_yield = hasattr(object, "get_yield")
91 |
92 | return all_valid_params and has_pdf and has_integrate and has_sample and has_space and has_get_yield
93 |
94 |
95 | def is_valid_loss(object):
96 | """
97 | Checks if the loss object has the following attributes/methods:
98 | * model
99 | * data
100 | * get_params
101 | * constraints
102 | * fit_range
103 |
104 | Also the function **is_valid_pdf** is called with each of the models returned by model
105 | as argument. Additionnally the function **is_valid_data** is called with each of the data objects
106 | return by data as argument.
107 | """
108 | if not hasattr(object, "model"):
109 | return False
110 | else:
111 | model = object.model
112 |
113 | if not hasattr(object, "data"):
114 | return False
115 | else:
116 | data = object.data
117 |
118 | has_get_params = hasattr(object, "get_params")
119 | has_constraints = hasattr(object, "constraints")
120 | has_create_new = hasattr(object, "create_new")
121 | if not has_create_new:
122 | warnings.warn("Loss should have a `create_new` method.", FutureWarning, stacklevel=3)
123 | has_create_new = True # TODO: allowed now, will be dropped in the future
124 | all_valid_pdfs = all(is_valid_pdf(m) for m in model)
125 | all_valid_datasets = all(is_valid_data(d) for d in data)
126 |
127 | return all_valid_pdfs and all_valid_datasets and has_constraints and has_create_new and has_get_params
128 |
129 |
130 | def is_valid_fitresult(object):
131 | """
132 | Checks if the fit result object has the following attributes/methods:
133 | * loss
134 | * params
135 | * covariance
136 |
137 | Also the function **is_valid_loss** is called with the loss as argument.
138 | """
139 | has_loss = hasattr(object, "loss")
140 |
141 | if not has_loss:
142 | return False
143 | else:
144 | loss = object.loss
145 | has_params = hasattr(object, "params")
146 | has_covariance = hasattr(object, "covariance")
147 | return is_valid_loss(loss) and has_params and has_covariance
148 |
149 |
150 | def is_valid_minimizer(object):
151 | """
152 | Checks if the minimzer object has the following attributes/methods:
153 | * minimize
154 | """
155 | return hasattr(object, "minimize")
156 |
--------------------------------------------------------------------------------
/src/hepstats/utils/fit/diverse.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from collections.abc import Mapping
4 | from contextlib import ExitStack, contextmanager, suppress
5 |
6 | import numpy as np
7 |
8 |
9 | def get_ndims(dataset):
10 | """Return the number of dimensions in the dataset"""
11 | return len(dataset.obs)
12 |
13 |
14 | def get_value(value):
15 | return np.array(value)
16 |
17 |
18 | def set_values_once(params, values):
19 | with suppress(ImportError):
20 | import zfit
21 |
22 | return zfit.param.set_values(params, values) # more efficient
23 |
24 | for p, v in zip(params, values):
25 | p.set_value(v)
26 | return None
27 |
28 |
29 | def eval_pdf(model, x, params=None, allow_extended=False):
30 | """Compute pdf of model at a given point x and for given parameters values"""
31 |
32 | if params is None:
33 | params = {}
34 |
35 | def pdf(model, x):
36 | ret = model.ext_pdf(x) if model.is_extended and allow_extended else model.pdf(x)
37 |
38 | return get_value(ret)
39 |
40 | with ExitStack() as stack:
41 | for param in model.get_params():
42 | if param in params:
43 | value = params[param]["value"]
44 | stack.enter_context(param.set_value(value))
45 | return pdf(model, x)
46 |
47 |
48 | def pll(minimizer, loss, pois, init=None) -> float:
49 | """Compute minimum profile likelihood for fixed given parameters values."""
50 | del init # unused currently
51 |
52 | with ExitStack() as stack:
53 | for p in pois:
54 | param = p.parameter
55 | stack.enter_context(param.set_value(p.value))
56 | param.floating = False
57 |
58 | if any(param_loss.floating for param_loss in loss.get_params()):
59 | minimum = minimizer.minimize(loss=loss) # TODO: add init?
60 | value = minimum.fmin
61 | else:
62 | value = get_value(loss.value())
63 |
64 | for p in pois:
65 | p.parameter.floating = True
66 |
67 | return value
68 |
69 |
70 | @contextmanager
71 | def set_values(params, values=None):
72 | if values is None:
73 | if isinstance(params, Mapping):
74 | values = tuple(params.values())
75 | params = tuple(params.keys())
76 | else:
77 | msg = "values must be provided if params is not a Mapping (dict-like)"
78 | raise ValueError(msg)
79 | old_values = [p.value() for p in params]
80 | for p, v in zip(params, values):
81 | p.set_value(v)
82 | yield
83 | for p, v in zip(params, old_values):
84 | p.set_value(v)
85 |
86 |
87 | def array2dataset(dataset_cls, obs, array, weights=None):
88 | """
89 | dataset_cls: only used to get the class in which array/weights will be
90 | converted.
91 | """
92 |
93 | if hasattr(dataset_cls, "from_numpy"):
94 | return dataset_cls.from_numpy(obs, array=array, weights=weights)
95 | else:
96 | return dataset_cls(obs, array=array, weights=weights)
97 |
98 |
99 | def get_nevents(dataset):
100 | """Returns the number of events in the dataset"""
101 |
102 | return get_value(dataset.nevents)
103 |
--------------------------------------------------------------------------------
/src/hepstats/utils/fit/sampling.py:
--------------------------------------------------------------------------------
1 | """
2 | Module providing basic sampling methods.
3 | """
4 |
5 | from __future__ import annotations
6 |
7 | from .api_check import is_valid_pdf
8 | from .diverse import get_value, set_values
9 |
10 |
11 | def base_sampler(models, nevents):
12 | """
13 | Creates samplers from models.
14 |
15 | Args:
16 | models (list(model)): models to sample
17 | nevents (list(int)): number of in each sampler
18 |
19 | Returns:
20 | Samplers
21 | """
22 |
23 | assert all(is_valid_pdf(m) for m in models)
24 | assert len(nevents) == len(models)
25 |
26 | samplers = []
27 |
28 | for i, m in enumerate(models):
29 | sampler = m.create_sampler(n=nevents[i])
30 | samplers.append(sampler)
31 |
32 | return samplers
33 |
34 |
35 | def base_sample(samplers, ntoys, parameter=None, value=None, constraints=None):
36 | """
37 | Samples from samplers. The parameters that are floating in the samplers can be set to a specific value
38 | using the `parameter` and `value` argument.
39 |
40 | Args:
41 | samplers (list): generators of samples
42 | ntoys (int): number of samples to generate
43 | parameter (optional): floating parameter in the sampler
44 | value (optional): value of the parameter
45 | constraints (optional): constraints to sample
46 |
47 | Returns:
48 | dict: sampled values for each constraint
49 | """
50 |
51 | sampled_constraints = {}
52 | if constraints is not None:
53 | for constr in constraints:
54 | try:
55 | sampled_constraints.update({k: get_value(v) for k, v in constr.sample(n=ntoys).items()})
56 | except AttributeError:
57 | continue
58 |
59 | params = {} if parameter is None or value is None else {parameter: value}
60 | for i in range(ntoys):
61 | with set_values(params):
62 | for s in samplers:
63 | s.resample() # do not pass parameters as arguments as it will fail in simultaneous fits
64 |
65 | if constraints is not None:
66 | yield {param: value[i] for param, value in sampled_constraints.items()}
67 | else:
68 | yield {}
69 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/tests/__init__.py
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import numpy as np
4 | import pytest
5 |
6 |
7 | def pytest_addoption(parser):
8 | parser.addoption(
9 | "--cmdopt",
10 | action="store",
11 | default="test",
12 | help="option: 'test' or \
13 | 'generate'. Only use 'generate' if you've changed the tests and need to update the expected \
14 | output!",
15 | )
16 |
17 |
18 | @pytest.fixture
19 | def cmdopt(request):
20 | return request.config.getoption("--cmdopt")
21 |
22 |
23 | @pytest.fixture(scope="session")
24 | def data_gen():
25 | np.random.seed(111)
26 | data1 = np.random.normal(size=1000)
27 | data2 = np.random.normal(2, 1, size=1000)
28 | weights = np.random.uniform(1, 2, size=1000)
29 | return data1, data2, weights
30 |
31 |
32 | # TODO: manually ported, use pre-made: https://github.com/zfit/zfit-development/issues/73
33 | @pytest.fixture(autouse=True)
34 | def _setup_teardown():
35 | try:
36 | import zfit
37 | except ImportError:
38 | yield
39 | return
40 |
41 | old_chunksize = zfit.run.chunking.max_n_points
42 | old_active = zfit.run.chunking.active
43 |
44 | yield
45 |
46 | from zfit.core.parameter import ZfitParameterMixin
47 |
48 | ZfitParameterMixin._existing_params.clear()
49 |
50 | from zfit.util.cache import clear_graph_cache
51 |
52 | clear_graph_cache()
53 | zfit.run.chunking.active = old_active
54 | zfit.run.chunking.max_n_points = old_chunksize
55 | zfit.run.set_graph_mode()
56 | zfit.run.set_autograd_mode()
57 |
58 |
59 | def create_loss_func(npeak, nbins=None, nbkg=None, nameadd="", obs=None):
60 | import zfit
61 |
62 | bounds = (0.1, 3.0)
63 | obs = "x" if obs is None else obs
64 | obs = zfit.Space(obs, limits=bounds)
65 |
66 | # Data and signal
67 | np.random.seed(0)
68 | tau = -2.0
69 | beta = -1 / tau
70 | nbkg = 300 if nbkg is None else nbkg
71 | bkg = np.random.exponential(beta, nbkg)
72 | peak = np.random.normal(1.2, 0.1, npeak)
73 | data = np.concatenate((bkg, peak))
74 | data = data[(data > bounds[0]) & (data < bounds[1])]
75 | N = len(data)
76 | data = zfit.data.Data.from_numpy(obs=obs, array=data)
77 |
78 | mean = zfit.Parameter("mean" + nameadd, 1.2, 0.5, 2.0)
79 | sigma = zfit.Parameter("sigma" + nameadd, 0.1, 0.02, 0.2)
80 | lambda_ = zfit.Parameter("lambda" + nameadd, -2.0, -4.0, -1.0)
81 | Nsig = zfit.Parameter("Nsig" + nameadd, 20.0, -20.0, N * 3)
82 | Nbkg = zfit.Parameter("Nbkg" + nameadd, N, 0.0, N * 3)
83 |
84 | signal = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma).create_extended(Nsig)
85 | background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg)
86 |
87 | tot_model = zfit.pdf.SumPDF([signal, background])
88 |
89 | if nbins is not None:
90 | binned_space = obs.with_binning(nbins)
91 | data = data.to_binned(binned_space)
92 | tot_model = tot_model.to_binned(binned_space)
93 | loss = zfit.loss.ExtendedBinnedNLL(tot_model, data)
94 | else:
95 | loss = zfit.loss.ExtendedUnbinnedNLL(model=tot_model, data=data)
96 |
97 | return loss, (Nsig, Nbkg, mean, sigma)
98 |
99 |
100 | def create_sim_loss_func(npeak, nbins=None):
101 | loss1, params1 = create_loss_func(npeak, nbins=nbins, nameadd="_1", obs="x1")
102 | loss2, params2 = create_loss_func(npeak * 10, nbins=nbins, nameadd="_2", obs="x2", nbkg=500)
103 | loss = loss1 + loss2
104 |
105 | return loss, params1
106 |
107 |
108 | @pytest.fixture
109 | def create_loss():
110 | return create_loss_func
111 |
112 |
113 | @pytest.fixture
114 | def create_sim_loss():
115 | return create_sim_loss_func
116 |
--------------------------------------------------------------------------------
/tests/hypotests/data/cls_pvalues.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/tests/hypotests/data/cls_pvalues.npz
--------------------------------------------------------------------------------
/tests/hypotests/data/clsb_pvalues.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/tests/hypotests/data/clsb_pvalues.npz
--------------------------------------------------------------------------------
/tests/hypotests/test_basetest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 | zfit = pytest.importorskip("zfit")
4 | from zfit.loss import UnbinnedNLL
5 | from zfit.minimize import Minuit
6 |
7 | from hepstats.hypotests.calculators.basecalculator import BaseCalculator
8 | from hepstats.hypotests.core.basetest import BaseTest
9 | from hepstats.hypotests.parameters import POI, POIarray
10 |
11 |
12 |
13 | def create_loss():
14 | obs = zfit.Space("x", limits=(0.1, 2.0))
15 | data = zfit.data.Data.from_numpy(obs=obs, array=np.random.normal(1.2, 0.1, 10000))
16 | mean = zfit.Parameter("mu", 1.2)
17 | sigma = zfit.Parameter("sigma", 0.1)
18 | model = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma)
19 | loss = UnbinnedNLL(model=model, data=data)
20 |
21 | return loss, (mean, sigma)
22 |
23 |
24 | def test_constructor():
25 | with pytest.raises(TypeError):
26 | BaseTest()
27 |
28 | loss, (mean, sigma) = create_loss()
29 | calculator = BaseCalculator(loss, Minuit())
30 |
31 | poimean = POIarray(mean, [1.0, 1.1, 1.2, 1.3])
32 | poisigma = POI(sigma, 0.1)
33 |
34 | with pytest.raises(TypeError):
35 | BaseTest(calculator)
36 |
37 | with pytest.raises(TypeError):
38 | BaseTest(calculator, poimean, [poisigma])
39 |
40 | with pytest.raises(TypeError):
41 | BaseTest("calculator", poimean, poisigma)
42 |
43 |
44 | def test_attributes():
45 | loss, (mean, sigma) = create_loss()
46 | calculator = BaseCalculator(loss, Minuit())
47 |
48 | poimean_1 = POIarray(mean, [1.0, 1.1, 1.2, 1.3])
49 | poimean_2 = POI(mean, 1.2)
50 |
51 | test = BaseTest(calculator, poimean_1, poimean_2)
52 |
53 | assert test.poinull == poimean_1
54 | assert test.poialt == poimean_2
55 | assert test.calculator == calculator
56 |
--------------------------------------------------------------------------------
/tests/hypotests/test_calculators.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import numpy as np
4 | import pytest
5 | zfit = pytest.importorskip("zfit")
6 | from zfit.loss import UnbinnedNLL
7 | from zfit.minimize import Minuit
8 |
9 | from hepstats.hypotests.calculators import AsymptoticCalculator, FrequentistCalculator
10 | from hepstats.hypotests.calculators.basecalculator import BaseCalculator
11 | from hepstats.hypotests.parameters import POI, POIarray
12 | from hepstats.utils.fit.api_check import is_valid_loss, is_valid_data
13 |
14 | true_mu = 1.2
15 | true_sigma = 0.1
16 |
17 |
18 | def create_loss(constraint=False, nbins=None, make2d=False):
19 | if not isinstance(nbins, list):
20 | nbins = [nbins] * 2 if make2d else [nbins]
21 | obs1 = zfit.Space("x", limits=(0.1, 2.0), binning=nbins[0])
22 | obs = obs1
23 | if make2d:
24 | obs2 = zfit.Space("y", limits=(-0.1, 3.0), binning=nbins[1])
25 | obs = obs1 * obs2
26 |
27 | array1 = np.random.normal(1.2, 0.1, (10000, 2 if make2d else 1))
28 | data = zfit.data.Data.from_numpy(obs=obs.with_binning(None), array=array1)
29 | if nbins[0] is not None:
30 | data = data.to_binned(obs)
31 | mean = zfit.Parameter("mu", true_mu, true_mu - 2, true_mu + 2)
32 | sigma = zfit.Parameter("sigma", true_sigma, 0.01, 1.0)
33 | model = zfit.pdf.Gauss(obs=obs1.with_binning(None), mu=mean, sigma=sigma)
34 | if make2d:
35 | model2 = zfit.pdf.Gauss(obs=obs2.with_binning(None), mu=mean, sigma=sigma)
36 | model = model * model2
37 | if nbins[0] is not None:
38 | model = zfit.pdf.BinnedFromUnbinnedPDF(model, space=obs)
39 | if constraint:
40 | constraint = zfit.constraint.GaussianConstraint(
41 | params=mean, observation=true_mu, uncertainty=0.01
42 | )
43 | else:
44 | constraint = None
45 | if nbins[0] is None:
46 | loss = UnbinnedNLL(model=model, data=data, constraints=constraint)
47 | else:
48 | loss = zfit.loss.BinnedNLL(model=model, data=data, constraints=constraint)
49 |
50 | return loss, (mean, sigma)
51 |
52 |
53 | @pytest.mark.parametrize(
54 | "calculator",
55 | [BaseCalculator, AsymptoticCalculator, FrequentistCalculator, "AsymptoticOld"],
56 | )
57 | @pytest.mark.parametrize("make2d", [False, True], ids=["1d", "2d"])
58 | @pytest.mark.parametrize(
59 | "nbins",
60 | [None, [10, 13], [9, 50]],
61 | ids=lambda x: f"Binning {x}" if x is not None else "Unbinned",
62 | )
63 | @pytest.mark.parametrize(
64 | "constraint", [False, True], ids=["No constraint", "With constraint"]
65 | )
66 | def test_base_calculator(calculator, make2d, nbins, constraint):
67 | if calculator == "AsymptoticOld":
68 | if make2d:
69 | pytest.skip("AsymptoticOld does not support 2D")
70 | if nbins is not None:
71 | pytest.skip("AsymptoticOld does not support binned")
72 |
73 | class calculator(AsymptoticCalculator): # we disable the converter
74 | UNBINNED_TO_BINNED_LOSS = {}
75 |
76 | assert calculator is not AsymptoticCalculator, "Must not be the same"
77 | assert AsymptoticCalculator.UNBINNED_TO_BINNED_LOSS, "Has to be filled"
78 | with pytest.raises(TypeError):
79 | calculator()
80 |
81 | loss, (mean, sigma) = create_loss(constraint=constraint, make2d=make2d, nbins=nbins)
82 |
83 | with pytest.raises(ValueError):
84 | calculator("loss", Minuit())
85 |
86 | with pytest.raises(ValueError):
87 | calculator(loss, "Minuit()")
88 |
89 | calc_loss = calculator(loss, Minuit())
90 |
91 | with pytest.raises(ValueError):
92 | calc_loss.bestfit = "bestfit"
93 |
94 | bestfit = calc_loss.bestfit
95 | calc_fitresult = calculator(bestfit, calc_loss.minimizer)
96 |
97 | assert calc_loss.bestfit == calc_fitresult.bestfit
98 | assert calc_loss.loss == calc_fitresult.loss
99 |
100 | mean_poi = POIarray(mean, [1.15, 1.2, 1.25])
101 | mean_nll = calc_loss.obs_nll(pois=mean_poi)
102 | calc_loss.obs_nll(pois=mean_poi) # get from cache
103 |
104 | assert mean_nll[0] >= mean_nll[1]
105 | assert mean_nll[2] >= mean_nll[1]
106 |
107 | assert calc_loss.obs_nll(mean_poi[0]) == mean_nll[0]
108 | assert calc_loss.obs_nll(mean_poi[1]) == mean_nll[1]
109 | assert calc_loss.obs_nll(mean_poi[2]) == mean_nll[2]
110 |
111 | mean_poialt = POI(mean, 1.2)
112 |
113 | def pvalue():
114 | return calc_loss.pvalue(poinull=mean_poi, poialt=mean_poialt)
115 |
116 | def exp_pvalue():
117 | return calc_loss.expected_pvalue(
118 | poinull=mean_poi, poialt=mean_poialt, nsigma=np.arange(-2, 3, 1)
119 | )
120 |
121 | def exp_poi():
122 | return calc_loss.expected_poi(
123 | poinull=mean_poi, poialt=mean_poialt, nsigma=np.arange(-2, 3, 1)
124 | )
125 |
126 | if calculator == BaseCalculator:
127 | with pytest.raises(NotImplementedError):
128 | pvalue()
129 | with pytest.raises(NotImplementedError):
130 | exp_pvalue()
131 | else:
132 | pvalue()
133 | exp_pvalue()
134 |
135 | model = calc_loss.model[0]
136 | sampler = model.create_sampler(n=10000)
137 | assert is_valid_data(sampler)
138 |
139 | loss = calc_loss.lossbuilder(model=[model], data=[sampler], weights=None)
140 | assert is_valid_loss(loss)
141 |
142 | with pytest.raises(ValueError):
143 | calc_loss.lossbuilder(model=[model, model], data=[sampler])
144 | with pytest.raises(ValueError):
145 | calc_loss.lossbuilder(model=[model], data=[sampler, calc_loss.data[0]])
146 | with pytest.raises(ValueError):
147 | calc_loss.lossbuilder(model=[model], data=[sampler], weights=[])
148 | with pytest.raises(ValueError):
149 | calc_loss.lossbuilder(
150 | model=[model], data=[sampler], weights=[np.ones(10000), np.ones(10000)]
151 | )
152 |
153 | assert calc_loss.get_parameter(mean_poi.name) == mean
154 | with pytest.raises(KeyError):
155 | calc_loss.get_parameter("dummy_parameter")
156 |
157 |
158 | def test_asymptotic_calculator_one_poi():
159 | with pytest.raises(TypeError):
160 | AsymptoticCalculator()
161 |
162 | loss, (mean, sigma) = create_loss()
163 | calc = AsymptoticCalculator(loss, Minuit())
164 |
165 | poi_null = POIarray(mean, [1.15, 1.2, 1.25])
166 | poi_alt = POI(mean, 1.2)
167 |
168 | dataset = calc.asimov_dataset(poi_alt)
169 | assert all(is_valid_data(d) for d in dataset)
170 | loss = calc.asimov_loss(poi_alt)
171 | assert is_valid_loss(loss)
172 |
173 | null_nll = calc.asimov_nll(pois=poi_null, poialt=poi_alt)
174 |
175 | assert null_nll[0] >= null_nll[1]
176 | assert null_nll[2] >= null_nll[1]
177 |
178 |
179 | @pytest.mark.parametrize("constraint", [False, True])
180 | def test_frequentist_calculator_one_poi(constraint):
181 | with pytest.raises(TypeError):
182 | FrequentistCalculator()
183 |
184 | loss, (mean, sigma) = create_loss(constraint=constraint)
185 | calc = FrequentistCalculator(loss, Minuit(), ntoysnull=100, ntoysalt=100)
186 |
187 | assert calc.ntoysnull == 100
188 | assert calc.ntoysalt == 100
189 |
190 | samplers = calc.sampler()
191 | assert all(is_valid_data(s) for s in samplers)
192 | loss = calc.toys_loss(mean.name)
193 | assert is_valid_loss(loss)
194 |
--------------------------------------------------------------------------------
/tests/hypotests/test_confidence_intervals.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 | zfit = pytest.importorskip("zfit")
4 | import os
5 | from zfit.loss import UnbinnedNLL
6 | from zfit.minimize import Minuit
7 |
8 | import hepstats
9 | from hepstats.hypotests.calculators.basecalculator import BaseCalculator
10 | from hepstats.hypotests.calculators import AsymptoticCalculator, FrequentistCalculator
11 | from hepstats.hypotests import ConfidenceInterval
12 | from hepstats.hypotests.parameters import POI, POIarray
13 | from hepstats.hypotests.exceptions import POIRangeError
14 |
15 | notebooks_dir = os.path.dirname(hepstats.__file__) + "/../../notebooks/hypotests"
16 |
17 |
18 | def test_constructor(create_loss):
19 | with pytest.raises(TypeError):
20 | ConfidenceInterval()
21 |
22 | loss, (_, __, mean, _) = create_loss(npeak=80)
23 | calculator = BaseCalculator(loss, Minuit())
24 |
25 | poi_1 = POI(mean, 1.5)
26 | poi_2 = POI(mean, 1.2)
27 |
28 | with pytest.raises(TypeError):
29 | ConfidenceInterval(calculator)
30 |
31 | with pytest.raises(TypeError):
32 | ConfidenceInterval(calculator, [poi_1], poi_2, qtilde=True)
33 |
34 | with pytest.raises(TypeError):
35 | ConfidenceInterval(calculator, [poi_1], [poi_2], qtilde=False)
36 |
37 |
38 | def asy_calc(create_loss, nbins=None):
39 | loss, (_, __, mean, ___) = create_loss(npeak=80, nbins=nbins)
40 | return mean, AsymptoticCalculator(loss, Minuit())
41 |
42 |
43 | def asy_calc_old(create_loss, nbins=None):
44 | loss, (_, __, mean, ___) = create_loss(npeak=80, nbins=nbins)
45 |
46 | class calculator(AsymptoticCalculator):
47 | UNBINNED_TO_BINNED_LOSS = {}
48 |
49 | assert calculator is not AsymptoticCalculator, "Must not be the same"
50 | assert AsymptoticCalculator.UNBINNED_TO_BINNED_LOSS, "Has to be filled"
51 | return mean, calculator(loss, Minuit())
52 |
53 |
54 | def freq_calc(create_loss, nbins=None):
55 | loss, (_, __, mean, ___) = create_loss(npeak=80, nbins=nbins)
56 | calculator = FrequentistCalculator.from_yaml(
57 | f"{notebooks_dir}/toys/ci_freq_zfit_toys.yml", loss, Minuit()
58 | )
59 | return mean, calculator
60 |
61 |
62 | @pytest.mark.parametrize("calculator", [asy_calc, freq_calc, asy_calc_old])
63 | @pytest.mark.parametrize("nbins", [None, 47, 300], ids=lambda x: f"nbins={x}")
64 | def test_with_gauss_exp_example(create_loss, calculator, nbins):
65 | if calculator is asy_calc_old and nbins is not None:
66 | pytest.skip("Not implemented for old calculator")
67 | mean, calculator = calculator(create_loss, nbins=nbins)
68 | scan_values = np.linspace(1.15, 1.26, 50)
69 | poinull = POIarray(mean, scan_values)
70 | ci = ConfidenceInterval(calculator, poinull)
71 | interval = ci.interval()
72 | assert interval["lower"] == pytest.approx(1.1810371356602791, rel=0.1)
73 | assert interval["upper"] == pytest.approx(1.2156701172321935, rel=0.1)
74 | with pytest.raises(POIRangeError):
75 | poinull = POIarray(
76 | mean, scan_values[(scan_values >= 1.2) & (scan_values <= 1.205)]
77 | )
78 |
79 | ci = ConfidenceInterval(calculator, poinull)
80 | ci.interval()
81 | with pytest.raises(POIRangeError):
82 | poinull = POIarray(mean, scan_values[scan_values >= 1.2])
83 | ci = ConfidenceInterval(calculator, poinull)
84 | ci.interval()
85 | with pytest.raises(POIRangeError):
86 | poinull = POIarray(mean, scan_values[scan_values <= 1.205])
87 | ci = ConfidenceInterval(calculator, poinull)
88 | ci.interval()
89 |
90 |
91 | def test_with_gauss_fluctuations():
92 | x_true = -2.0
93 |
94 | minimizer = Minuit()
95 | bounds = (-10, 10)
96 | obs = zfit.Space("x", limits=bounds)
97 |
98 | mean = zfit.Parameter("mean", 0)
99 | sigma = zfit.Parameter("sigma", 1.0)
100 | model = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma)
101 |
102 | npzfile = f"{notebooks_dir}/toys/FC_toys_{x_true}.npz"
103 | data = zfit.data.Data.from_numpy(obs=obs, array=np.load(npzfile)["x"])
104 |
105 | nll = UnbinnedNLL(model=model, data=data)
106 |
107 | minimum = minimizer.minimize(loss=nll)
108 | minimum.hesse()
109 |
110 | toys_fname = f"{notebooks_dir}/toys/FC_toys_{x_true}.yml"
111 | calculator = FrequentistCalculator.from_yaml(toys_fname, minimum, minimizer)
112 | keys = np.unique([k[0].value for k in calculator.keys()])
113 | keys.sort()
114 | poinull = POIarray(mean, keys)
115 |
116 | ci = ConfidenceInterval(calculator, poinull, qtilde=False)
117 | with pytest.warns(UserWarning):
118 | ci.interval(alpha=0.05, printlevel=0)
119 |
120 | ci = ConfidenceInterval(calculator, poinull, qtilde=True)
121 | ci.interval(alpha=0.05, printlevel=0)
122 |
123 |
124 | @pytest.mark.parametrize("n", [0.5])
125 | @pytest.mark.parametrize("min_x", [0, -10])
126 | def test_with_gauss_qtilde(n, min_x):
127 | sigma_x = 0.032
128 |
129 | minimizer = Minuit()
130 | bounds = (-10, 10)
131 | obs = zfit.Space("x", limits=bounds)
132 |
133 | mean = zfit.Parameter("mean", n * sigma_x)
134 | sigma = zfit.Parameter("sigma", 1.0)
135 | model = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma)
136 |
137 | data = model.sample(n=1000)
138 |
139 | nll = UnbinnedNLL(model=model, data=data)
140 |
141 | minimum = minimizer.minimize(loss=nll)
142 | minimum.hesse()
143 |
144 | x = minimum.params[mean]["value"]
145 | x_err = minimum.params[mean]["hesse"]["error"]
146 |
147 | x_min = x - x_err * 3
148 | x_max = x + x_err * 3
149 |
150 | x_min = max([x_min, min_x])
151 |
152 | poinull = POIarray(mean, np.linspace(x_min, x_max, 50))
153 | calculator = AsymptoticCalculator(nll, minimizer)
154 |
155 | ci = ConfidenceInterval(calculator, poinull, qtilde=True)
156 | ci.interval(alpha=0.05, printlevel=1)
157 |
--------------------------------------------------------------------------------
/tests/hypotests/test_discovery.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 | import pytest
5 |
6 | from tests.conftest import create_loss_func
7 |
8 | zfit = pytest.importorskip("zfit")
9 | from zfit.loss import UnbinnedNLL
10 | from zfit.minimize import Minuit
11 |
12 | import hepstats
13 | from hepstats.hypotests import Discovery
14 | from hepstats.hypotests.calculators import AsymptoticCalculator, FrequentistCalculator
15 | from hepstats.hypotests.calculators.basecalculator import BaseCalculator
16 | from hepstats.hypotests.parameters import POI
17 |
18 | notebooks_dir = f"{os.path.dirname(hepstats.__file__)}/../../notebooks/hypotests"
19 |
20 |
21 | @pytest.mark.parametrize("nbins", [None, 30], ids=["unbinned", "binned"])
22 | def test_constructor(create_loss, nbins):
23 | with pytest.raises(TypeError):
24 | Discovery()
25 |
26 | loss, (Nsig, Nbkg, _, _) = create_loss(nbins=nbins, npeak=25)
27 | calculator = BaseCalculator(loss, Minuit())
28 |
29 | poi_1 = POI(Nsig, 0.0)
30 | poi_2 = POI(Nsig, 2.0)
31 |
32 | with pytest.raises(TypeError):
33 | Discovery(calculator)
34 |
35 | with pytest.raises(TypeError):
36 | Discovery(calculator, [poi_1], poi_2)
37 |
38 | with pytest.raises(TypeError):
39 | Discovery(calculator, [poi_1], [poi_2])
40 |
41 |
42 | class AsymptoticCalculatorOld(AsymptoticCalculator):
43 | UNBINNED_TO_BINNED_LOSS = {}
44 |
45 |
46 | @pytest.mark.parametrize(
47 | "nbins", [None, 76, 253], ids=lambda x: "unbinned" if x is None else f"nbin={x}"
48 | )
49 | @pytest.mark.parametrize("Calculator", [AsymptoticCalculator, AsymptoticCalculatorOld])
50 | def test_with_asymptotic_calculator(create_loss, nbins, Calculator):
51 | if Calculator is AsymptoticCalculatorOld and nbins is not None:
52 | pytest.skip("Old AsymptoticCalculator does not support binned loss")
53 |
54 | loss, (Nsig, Nbkg, mean, sigma) = create_loss(npeak=25, nbins=nbins)
55 | mean.floating = False
56 | sigma.floating = False
57 | calculator = Calculator(loss, Minuit())
58 |
59 | poinull = POI(Nsig, 0)
60 |
61 | discovery_test = Discovery(calculator, poinull)
62 | pnull, significance = discovery_test.result()
63 |
64 | uncertainty = 0.05
65 | if nbins is not None and nbins < 80:
66 | uncertainty *= 4
67 |
68 | # check absolute significance
69 | assert pnull == pytest.approx(0.000757, abs=uncertainty)
70 | assert significance == pytest.approx(3.17, abs=uncertainty)
71 | assert significance >= 3
72 |
73 |
74 | @pytest.mark.parametrize(
75 | "nbins", [None, 95, 153], ids=lambda x: "unbinned" if x is None else f"nbin={x}"
76 | )
77 | @pytest.mark.parametrize("losscreator", [create_loss_func,
78 | # create_sim_loss_func
79 | ], ids=["simple",
80 | # "sim"
81 | ])
82 | def test_with_frequentist_calculator(losscreator, nbins):
83 | loss, (Nsig, Nbkg, mean, sigma) = losscreator(npeak=25, nbins=nbins)
84 | mean.floating = False
85 | sigma.floating = False
86 | calculator = FrequentistCalculator.from_yaml(
87 | f"{notebooks_dir}/toys/discovery_freq_zfit_toys.yml", loss, Minuit()
88 | )
89 | # calculator = FrequentistCalculator(loss, Minuit(), ntoysnull=500, ntoysalt=500)
90 |
91 | poinull = POI(Nsig, 0)
92 |
93 | discovery_test = Discovery(calculator, poinull)
94 | pnull, significance = discovery_test.result()
95 |
96 | abserr = 0.1
97 | if nbins is not None and nbins < 120:
98 | abserr *= 4
99 | abserr_pnull = 0.0005
100 | if nbins is not None and nbins < 120:
101 | abserr_pnull *= 4
102 | assert pnull == pytest.approx(0.0004, rel=0.05, abs=abserr_pnull)
103 | assert significance == pytest.approx(3.3427947805048592, rel=0.05, abs=abserr)
104 | assert significance >= 3
105 |
106 |
107 | def create_loss_counting():
108 | n = 370
109 | nbkg = 340
110 |
111 | Nsig = zfit.Parameter("Nsig", 0, -100.0, 100)
112 | Nbkg = zfit.Parameter("Nbkg", nbkg, floating=False)
113 | Nobs = zfit.ComposedParameter("Nobs", lambda a, b: a + b, params=[Nsig, Nbkg])
114 |
115 | obs = zfit.Space("N", limits=(0, 800))
116 | model = zfit.pdf.Poisson(obs=obs, lamb=Nobs)
117 |
118 | data = zfit.data.Data.from_numpy(obs=obs, array=np.array([n]))
119 |
120 | loss = UnbinnedNLL(model=model, data=data)
121 |
122 | return loss, Nsig
123 |
124 |
125 | def test_counting_with_asymptotic_calculator():
126 | (
127 | loss,
128 | Nsig,
129 | ) = create_loss_counting()
130 | calculator = AsymptoticCalculator(loss, Minuit())
131 |
132 | poinull = POI(Nsig, 0)
133 |
134 | discovery_test = Discovery(calculator, poinull)
135 | pnull, significance = discovery_test.result()
136 |
137 | assert significance < 2
138 |
139 |
140 | def test_counting_with_frequentist_calculator():
141 | (
142 | loss,
143 | Nsig,
144 | ) = create_loss_counting()
145 | calculator = FrequentistCalculator(loss, Minuit(), ntoysnull=1000)
146 |
147 | poinull = POI(Nsig, 0)
148 |
149 | discovery_test = Discovery(calculator, poinull)
150 | pnull, significance = discovery_test.result()
151 |
152 | assert significance < 2
153 |
154 |
155 | def test_likelihood_ratio_fmin():
156 | import numpy as np
157 | import zfit
158 | from zfit.loss import UnbinnedNLL
159 | from zfit.minimize import Minuit
160 | from hepstats.hypotests import Discovery
161 | from hepstats.hypotests.calculators import (AsymptoticCalculator)
162 | from hepstats.hypotests.parameters import POI
163 |
164 | Nsig = zfit.Parameter("Nsig", 40, -100., 100)
165 | Nbkg = zfit.Parameter("Nbkg", 340, 0, 500)
166 | Nobs = zfit.ComposedParameter("Nobs", lambda a, b: a + b, params=[Nsig, Nbkg])
167 |
168 |
169 |
170 | obs = zfit.Space('N', limits=(0, 800))
171 | model = zfit.pdf.Poisson(obs=obs, lamb=Nobs)
172 |
173 | n = 370
174 | nbkg = 340
175 |
176 | data = zfit.data.Data.from_numpy(obs=obs, array=np.array([n]))
177 | Nbkg.set_value(nbkg)
178 | Nbkg.floating = False
179 |
180 | nll = UnbinnedNLL(model=model, data=data)
181 | minimizer = Minuit(verbosity=0)
182 | minimum = minimizer.minimize(loss=nll)
183 |
184 | calculator = AsymptoticCalculator(nll, minimizer)
185 | calculator.bestfit = minimum
186 |
187 | discovery_test = Discovery(calculator, POI(Nsig, 0))
188 | pnull, significance = discovery_test.result()
189 | assert pytest.approx(pnull, abs=0.01) == 0.05
190 | assert pytest.approx(significance, abs=0.1) == 1.6
191 |
--------------------------------------------------------------------------------
/tests/hypotests/test_parameters.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | import pytest
3 | import numpy as np
4 | zfit = pytest.importorskip("zfit")
5 |
6 | from hepstats.hypotests.parameters import POI, POIarray
7 |
8 |
9 | def test_pois():
10 | mean = zfit.Parameter("mu", 1.2, 0.1, 2)
11 |
12 | p0 = POI(mean, 0)
13 | p1 = POI(mean, 1.0)
14 | values = np.linspace(0.0, 1.0, 10)
15 | pn = POIarray(mean, values)
16 | pnc = POIarray(mean, values)
17 |
18 | for cls in [POI, POIarray]:
19 | with pytest.raises(ValueError):
20 | cls("mean", 0)
21 | with pytest.raises(TypeError):
22 | cls(mean)
23 |
24 | with pytest.raises(TypeError):
25 | POI(mean, values)
26 | with pytest.raises(TypeError):
27 | POIarray(mean, 0)
28 |
29 | repr(p0)
30 | repr(pn)
31 |
32 | assert p0.value == 0
33 | assert p0.name == mean.name
34 | assert p0 != p1
35 |
36 | assert all(pn.values == values)
37 | assert pn.name == mean.name
38 | assert len(pn) == len(values)
39 | iter(pn)
40 | assert pn == pnc
41 | assert hash(pn) == hash(pnc)
42 |
43 | assert pn != p0
44 | assert pn != p1
45 |
46 | assert pn[0] == p0
47 | assert pn[1] != p0
48 | assert pn[-1] == p1
49 |
50 | pn1 = pn.append(12)
51 | assert pn1.values[-1] == 12
52 | assert all(pn.values == values)
53 | assert pn1 != pn
54 | pn2 = pn.append([15, 20, 30])
55 | assert pn2.values[-1] == 30
56 | assert pn2.values[-2] == 20
57 | assert pn2.values[-3] == 15
58 | assert pn2 != pn
59 |
60 | {p0: "p0", p1: "p1", pn: "pn"}
61 |
--------------------------------------------------------------------------------
/tests/hypotests/test_toysutils.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 | zfit = pytest.importorskip("zfit")
4 | import os
5 | from zfit.loss import ExtendedUnbinnedNLL, UnbinnedNLL
6 | from zfit.minimize import Minuit
7 |
8 | import hepstats
9 | from hepstats.hypotests.parameters import POI, POIarray
10 | from hepstats.hypotests.exceptions import ParameterNotFound
11 | from hepstats.hypotests.toyutils import ToyResult, ToysManager
12 | from hepstats.utils.fit.api_check import is_valid_loss, is_valid_data
13 |
14 | pwd = os.path.dirname(__file__)
15 | notebooks_dir = os.path.dirname(hepstats.__file__) + "/../../notebooks/hypotests"
16 |
17 |
18 | def create_loss():
19 | bounds = (0.1, 3.0)
20 | obs = zfit.Space("x", limits=bounds)
21 |
22 | # Data and signal
23 | np.random.seed(0)
24 | tau = -2.0
25 | beta = -1 / tau
26 | bkg = np.random.exponential(beta, 300)
27 | peak = np.random.normal(1.2, 0.1, 25)
28 | data = np.concatenate((bkg, peak))
29 | data = data[(data > bounds[0]) & (data < bounds[1])]
30 | N = len(data)
31 | data = zfit.data.Data.from_numpy(obs=obs, array=data)
32 |
33 | lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0)
34 | Nsig = zfit.Parameter("Nsig", 20.0, -20.0, N)
35 | Nbkg = zfit.Parameter("Nbkg", N, 0.0, N * 1.1)
36 |
37 | signal = zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1).create_extended(Nsig)
38 | background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg)
39 | tot_model = zfit.pdf.SumPDF([signal, background])
40 |
41 | loss = ExtendedUnbinnedNLL(model=tot_model, data=data)
42 |
43 | poigen = POI(Nsig, 0.0)
44 | poieval = POIarray(Nsig, [0.0])
45 |
46 | return loss, (Nsig, poigen, poieval)
47 |
48 |
49 | def create_loss_1():
50 | obs = zfit.Space("x", limits=(0.1, 2.0))
51 | data = zfit.data.Data.from_numpy(obs=obs, array=np.random.normal(1.2, 0.1, 10000))
52 | mean = zfit.Parameter("mu", 1.2)
53 | sigma = zfit.Parameter("sigma", 0.1)
54 | model = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma)
55 | loss = UnbinnedNLL(model=model, data=data)
56 |
57 | return loss
58 |
59 |
60 | def test_constructors():
61 | loss, (Nsig, poigen, poieval) = create_loss()
62 | ToyResult(poigen, poieval)
63 |
64 | with pytest.raises(TypeError):
65 | ToyResult(poigen, "poieval")
66 | with pytest.raises(TypeError):
67 | ToyResult(poieval, poieval)
68 |
69 | ToysManager(loss, Minuit())
70 |
71 |
72 | def test_toyresult_attributes():
73 | _, (_, poigen, poieval) = create_loss()
74 | tr = ToyResult(poigen, poieval)
75 |
76 | assert tr.ntoys == 0
77 | assert tr.poigen == poigen
78 | assert tr.poieval == poieval
79 |
80 | bf = np.array([0.5, 0.1, 0.2])
81 | nll_bf = np.array([-1000, -1001, -1002])
82 | nlls = {poieval[0]: np.array([-1001, -1002, -1003])}
83 |
84 | tr.add_entries(bestfit=bf, nll_bestfit=nll_bf, nlls=nlls)
85 | assert tr.ntoys == 3
86 |
87 | with pytest.raises(ValueError):
88 | tr.add_entries(bestfit=bf, nll_bestfit=nll_bf, nlls={})
89 |
90 | tr.add_entries(bestfit=bf, nll_bestfit=nll_bf, nlls=nlls)
91 | assert tr.ntoys == 6
92 |
93 | tr.to_dict()
94 |
95 |
96 | def test_toymanager_attributes():
97 | loss, (Nsig, poigen, poieval) = create_loss()
98 |
99 | tm = ToysManager.from_yaml(
100 | f"{notebooks_dir}/toys/discovery_freq_zfit_toys.yml", loss, Minuit()
101 | )
102 |
103 | with pytest.raises(ParameterNotFound):
104 | ToysManager.from_yaml(
105 | f"{notebooks_dir}/toys/discovery_freq_zfit_toys.yml",
106 | create_loss_1(),
107 | Minuit(),
108 | )
109 |
110 | tr = list(tm.values())[0]
111 | assert isinstance(tr, ToyResult)
112 | assert list(tm.keys())[0] == (poigen, poigen)
113 | assert (poigen, poieval) in tm.keys()
114 |
115 | assert tm.get_toyresult(poigen, poieval) == tr
116 | tr1 = ToyResult(poigen, poieval.append(1))
117 | tm.add_toyresult(tr1)
118 | with pytest.raises(TypeError):
119 | tm.add_toyresult("tr1")
120 | assert (tr1.poigen, tr1.poieval) in tm.keys()
121 |
122 | tm.to_yaml(f"{pwd}/test_toyutils.yml")
123 | tm.to_yaml(f"{pwd}/test_toyutils.yml")
124 | tmc = ToysManager.from_yaml(f"{pwd}/test_toyutils.yml", loss, Minuit())
125 | assert (
126 | tm.get_toyresult(poigen, poieval).ntoys
127 | == tmc.get_toyresult(poigen, poieval).ntoys
128 | )
129 |
130 | samplers = tm.sampler()
131 | assert all(is_valid_data(s) for s in samplers)
132 | loss = tm.toys_loss(poigen.name)
133 | assert is_valid_loss(loss)
134 |
135 | os.remove(f"{pwd}/test_toyutils.yml")
136 |
--------------------------------------------------------------------------------
/tests/hypotests/test_upperlimit.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 | import os
4 | zfit = pytest.importorskip("zfit")
5 | from zfit.minimize import Minuit
6 |
7 | import hepstats
8 | from hepstats.hypotests.calculators.basecalculator import BaseCalculator
9 | from hepstats.hypotests.calculators import AsymptoticCalculator, FrequentistCalculator
10 | from hepstats.hypotests import UpperLimit
11 | from hepstats.hypotests.parameters import POI, POIarray
12 | from hepstats.hypotests.exceptions import POIRangeError
13 |
14 | notebooks_dir = os.path.dirname(hepstats.__file__) + "/../../notebooks/hypotests"
15 |
16 |
17 | # def create_loss():
18 | #
19 | # bounds = (0.1, 3.0)
20 | # obs = zfit.Space("x", limits=bounds)
21 | #
22 | # # Data and signal
23 | # np.random.seed(0)
24 | # tau = -2.0
25 | # beta = -1 / tau
26 | # bkg = np.random.exponential(beta, 300)
27 | # peak = np.random.normal(1.2, 0.1, 10)
28 | # data = np.concatenate((bkg, peak))
29 | # data = data[(data > bounds[0]) & (data < bounds[1])]
30 | # N = len(data)
31 | # data = zfit.data.Data.from_numpy(obs=obs, array=data)
32 | #
33 | # lambda_ = zfit.Parameter("lambda", -2.0, -10.0, -0.1)
34 | # Nsig = zfit.Parameter("Nsig", 20.0, -20.0, N)
35 | # Nbkg = zfit.Parameter("Nbkg", N, 0.0, N * 2)
36 | #
37 | # signal = zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1).create_extended(Nsig)
38 | # background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg)
39 | # tot_model = zfit.pdf.SumPDF([signal, background])
40 | #
41 | # loss = ExtendedUnbinnedNLL(model=tot_model, data=data)
42 | #
43 | # return loss, (Nsig, Nbkg)
44 |
45 |
46 | def test_constructor(create_loss):
47 | with pytest.raises(TypeError):
48 | UpperLimit()
49 |
50 | loss, (Nsig, Nbkg, _, _) = create_loss(npeak=10)
51 | calculator = BaseCalculator(loss, Minuit())
52 |
53 | poi_1 = POI(Nsig, 0.0)
54 | poi_2 = POI(Nsig, 2.0)
55 |
56 | with pytest.raises(TypeError):
57 | UpperLimit(calculator)
58 |
59 | with pytest.raises(TypeError):
60 | UpperLimit(calculator, poi_1)
61 |
62 | with pytest.raises(TypeError):
63 | UpperLimit(calculator, [poi_1], poi_2)
64 |
65 |
66 | class AsymptoticCalculatorOld(AsymptoticCalculator):
67 | UNBINNED_TO_BINNED_LOSS = {}
68 |
69 |
70 | def asy_calc(create_loss, nbins):
71 | loss, (Nsig, Nbkg, mean, sigma) = create_loss(npeak=10, nbins=nbins)
72 | mean.floating = False
73 | sigma.floating = False
74 | return Nsig, AsymptoticCalculator(loss, Minuit())
75 |
76 |
77 | def asy_calc_old(create_loss, nbins):
78 | loss, (Nsig, Nbkg, mean, sigma) = create_loss(npeak=10, nbins=nbins)
79 | mean.floating = False
80 | sigma.floating = False
81 | return Nsig, AsymptoticCalculatorOld(loss, Minuit())
82 |
83 |
84 | def freq_calc(create_loss, nbins):
85 | loss, (Nsig, Nbkg, mean, sigma) = create_loss(npeak=10, nbins=nbins)
86 | mean.floating = False
87 | sigma.floating = False
88 | calculator = FrequentistCalculator.from_yaml(
89 | f"{notebooks_dir}/toys/upperlimit_freq_zfit_toys.yml", loss, Minuit()
90 | )
91 | # calculator = FrequentistCalculator(loss, Minuit(), ntoysnull=10000, ntoysalt=10000)
92 | return Nsig, calculator
93 |
94 |
95 | @pytest.mark.parametrize(
96 | "nbins", [None, 73, 211], ids=lambda x: "unbinned" if x is None else f"nbins={x}"
97 | )
98 | @pytest.mark.parametrize("calculator", [asy_calc, freq_calc, asy_calc_old])
99 | def test_with_gauss_exp_example(create_loss, calculator, nbins):
100 | if calculator is asy_calc_old and nbins is not None:
101 | pytest.skip("Old asymptotic calculator does not support binned loss")
102 | Nsig, calculator = calculator(create_loss, nbins)
103 |
104 | poinull = POIarray(Nsig, np.linspace(0.0, 25, 15))
105 | poialt = POI(Nsig, 0)
106 |
107 | ul = UpperLimit(calculator, poinull, poialt)
108 | ul_qtilde = UpperLimit(calculator, poinull, poialt, qtilde=True)
109 | limits = ul.upperlimit(alpha=0.05, CLs=True)
110 |
111 | assert limits["observed"] == pytest.approx(16.7, rel=0.15)
112 | assert limits["expected"] == pytest.approx(11.5, rel=0.15)
113 | assert limits["expected_p1"] == pytest.approx(16.729552184042365, rel=0.1)
114 | assert limits["expected_p2"] == pytest.approx(23.718823517614066, rel=0.15)
115 | assert limits["expected_m1"] == pytest.approx(7.977175378979202, rel=0.1)
116 | assert limits["expected_m2"] == pytest.approx(5.805298972983304, rel=0.15)
117 |
118 | ul.upperlimit(alpha=0.05, CLs=False)
119 | ul_qtilde.upperlimit(alpha=0.05, CLs=True)
120 |
121 | # test error when scan range is too small
122 |
123 | with pytest.raises(POIRangeError):
124 | poinull = POIarray(Nsig, poinull.values[:5])
125 | ul = UpperLimit(calculator, poinull, poialt)
126 | ul.upperlimit(alpha=0.05, CLs=True)
127 |
--------------------------------------------------------------------------------
/tests/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | # Licensed under a 3-clause BSD style license, see LICENSE.
2 |
--------------------------------------------------------------------------------
/tests/modeling/data/answers_bayesian_blocks.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/tests/modeling/data/answers_bayesian_blocks.npz
--------------------------------------------------------------------------------
/tests/modeling/test_bayesianblocks.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import numpy as np
4 |
5 | import hepstats
6 | from hepstats.modeling import bayesian_blocks
7 |
8 | answer_dir = Path(__file__).parent / "data"
9 |
10 |
11 | def test_bayesian_blocks(cmdopt, data_gen):
12 | be1 = bayesian_blocks(data_gen[0], p0=0.05)
13 | be2 = bayesian_blocks(data_gen[0], gamma=0.1)
14 | be3 = bayesian_blocks(data_gen[0], weights=data_gen[2])
15 |
16 | if cmdopt == "generate":
17 | with open(answer_dir / "answers_bayesian_blocks.npz", "wb") as f:
18 | np.savez(f, be1=be1, be2=be2, be3=be3)
19 | elif cmdopt == "test":
20 | answers = np.load(answer_dir / "answers_bayesian_blocks.npz")
21 | np.testing.assert_array_equal(be1, answers["be1"])
22 | np.testing.assert_array_equal(be2, answers["be2"])
23 | np.testing.assert_array_equal(be3, answers["be3"])
24 | # assert(np.all(output[1] == answers['be']))
25 |
--------------------------------------------------------------------------------
/tests/splots/test_splots.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 | from scipy.stats import ks_2samp
4 |
5 | zfit = pytest.importorskip("zfit")
6 | from zfit.loss import ExtendedUnbinnedNLL
7 | from zfit.minimize import Minuit
8 |
9 | from hepstats.splot import compute_sweights
10 | from hepstats.splot.sweights import is_sum_of_extended_pdfs
11 | from hepstats.utils.fit import get_value
12 | from hepstats.splot.exceptions import ModelNotFittedToData
13 | from hepstats.splot.warnings import AboveToleranceWarning
14 |
15 |
16 | def get_data_and_loss():
17 | bounds = (0.0, 3.0)
18 | obs = zfit.Space("x", limits=bounds)
19 | nbkg = 10000
20 | nsig = 5000
21 |
22 | # Data and signal
23 | def get_sel(arr):
24 | return (arr > bounds[0]) & (arr < bounds[1])
25 |
26 | np.random.seed(0)
27 |
28 | N = nsig + nbkg
29 | mean = zfit.Parameter("mean", 1.2, 0.5, 2.0)
30 | sigma = zfit.Parameter("sigma", 0.1, 0.02, 0.3)
31 | lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0)
32 | Nsig = zfit.Parameter("Nsig", nsig, 0.0, N)
33 | Nbkg = zfit.Parameter("Nbkg", nbkg, 0.0, N)
34 |
35 | signal = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma).create_extended(Nsig)
36 | background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg)
37 | tot_model = zfit.pdf.SumPDF([signal, background])
38 |
39 | bkg = background.sample(nbkg, params={lambda_: -2.1})
40 |
41 | peak = signal.sample(nsig, params={sigma: 0.2})
42 |
43 | mass = np.concatenate((bkg["x"], peak["x"]))
44 |
45 | bck_p = np.random.normal(3, 1, size=nbkg)
46 | sig_p = np.random.normal(5, 1, size=nsig)
47 | p = np.concatenate([bck_p, sig_p])
48 |
49 | data = zfit.data.concat([bkg, peak], axis="index")
50 |
51 | loss = ExtendedUnbinnedNLL(model=tot_model, data=data)
52 |
53 | return mass, p, loss, Nsig, Nbkg, sig_p, bck_p
54 |
55 |
56 | def test_sweights_constructor():
57 | mass, p, loss, Nsig, Nbkg, sig_p, bkg_p = get_data_and_loss()
58 |
59 | with pytest.raises(ValueError):
60 | compute_sweights("model", mass)
61 |
62 | with pytest.raises(ValueError):
63 | compute_sweights(loss.model[0].get_models()[0], mass)
64 |
65 |
66 | def test_sweights():
67 | minimizer = Minuit()
68 | mass, p, loss, Nsig, Nbkg, sig_p, bkg_p = get_data_and_loss()
69 |
70 | with pytest.raises(ModelNotFittedToData):
71 | compute_sweights(loss.model[0], mass)
72 |
73 | result = minimizer.minimize(loss)
74 | assert result.valid
75 |
76 | model = loss.model[0]
77 | assert is_sum_of_extended_pdfs(model)
78 |
79 | yields = [Nsig, Nbkg]
80 |
81 | sweights = compute_sweights(loss.model[0], mass)
82 |
83 | assert np.allclose(
84 | [np.sum(sweights[y]) / get_value(y.value()) for y in yields], 1.0
85 | )
86 |
87 | nbins = 30
88 | hist_conf = dict(bins=nbins, range=[0, 10])
89 |
90 | hist_sig_true_p, _ = np.histogram(sig_p, **hist_conf)
91 | sel = hist_sig_true_p != 0
92 | hist_sig_true_p = hist_sig_true_p[sel]
93 | hist_sig_sweights_p = np.histogram(p, weights=sweights[Nsig], **hist_conf)[0][sel]
94 |
95 | assert ks_2samp(hist_sig_sweights_p, hist_sig_true_p)[-1] > 0.001
96 |
97 | hist_bkg_true_p, _ = np.histogram(bkg_p, **hist_conf)
98 | sel = hist_bkg_true_p != 0
99 | hist_bkg_true_p = hist_bkg_true_p[sel]
100 | hist_bkg_sweights_p = np.histogram(p, weights=sweights[Nbkg], **hist_conf)[0][sel]
101 |
102 | assert ks_2samp(hist_bkg_sweights_p, hist_bkg_true_p)[-1] > 0.001
103 |
104 | with pytest.warns(AboveToleranceWarning):
105 | compute_sweights(
106 | loss.model[0], np.concatenate([mass, np.random.normal(0.8, 0.1, 100)])
107 | )
108 |
109 | with pytest.raises(ModelNotFittedToData):
110 | compute_sweights(
111 | loss.model[0], np.concatenate([mass, np.random.normal(0.8, 0.1, 1000)])
112 | )
113 |
--------------------------------------------------------------------------------