├── .github ├── dependabot.yml └── workflows │ └── main.yml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── CHANGELOG.rst ├── LICENSES ├── LICENSE └── LICENSE_ASTROML.rst ├── README.md ├── docs ├── Makefile ├── api │ ├── hepstats.hypotests.calculators.asymptotic_calculator.rst │ ├── hepstats.hypotests.calculators.basecalculator.rst │ ├── hepstats.hypotests.calculators.frequentist_calculator.rst │ ├── hepstats.hypotests.calculators.rst │ ├── hepstats.hypotests.core.basetest.rst │ ├── hepstats.hypotests.core.confidence_interval.rst │ ├── hepstats.hypotests.core.discovery.rst │ ├── hepstats.hypotests.core.rst │ ├── hepstats.hypotests.core.upperlimit.rst │ ├── hepstats.hypotests.exceptions.rst │ ├── hepstats.hypotests.hypotests_object.rst │ ├── hepstats.hypotests.parameters.rst │ ├── hepstats.hypotests.rst │ ├── hepstats.hypotests.toyutils.rst │ ├── hepstats.modeling.bayesian_blocks.rst │ ├── hepstats.modeling.rst │ ├── hepstats.rst │ ├── hepstats.splot.exceptions.rst │ ├── hepstats.splot.rst │ ├── hepstats.splot.sweights.rst │ ├── hepstats.splot.warnings.rst │ ├── hepstats.utils.fit.api_check.rst │ ├── hepstats.utils.fit.diverse.rst │ ├── hepstats.utils.fit.rst │ ├── hepstats.utils.fit.sampling.rst │ ├── hepstats.utils.rst │ ├── hepstats.version.rst │ ├── hypotests.rst │ ├── index.rst │ ├── modeling.rst │ ├── modules.rst │ ├── splot.rst │ └── utils.rst ├── bib │ └── references.bib ├── bibliography.rst ├── conf.py ├── getting_started │ ├── hypotests.rst │ ├── index.rst │ ├── modeling.rst │ └── splot.rst ├── images │ ├── logo.pdf │ ├── logo.png │ ├── logo.xcf │ ├── logo_medium.png │ └── logo_small.png ├── index.rst ├── make.bat ├── make_docs.sh └── whats_new.rst ├── environment.yml ├── notebooks ├── README.md ├── hypotests │ ├── FC_interval_asy.ipynb │ ├── FC_interval_freq.ipynb │ ├── Simultaneous_fit_discovery_splot.ipynb │ ├── __init__.py │ ├── asy_ci.png │ ├── asy_ul.png │ ├── confidenceinterval_asy_zfit.ipynb │ ├── confidenceinterval_freq_zfit.ipynb │ ├── counting.ipynb │ ├── discovery_asy_zfit.ipynb │ ├── discovery_freq_zfit.ipynb │ ├── toys │ │ ├── FC_toys_-1.0.yml │ │ ├── FC_toys_-2.0.npz │ │ ├── FC_toys_-2.0.yml │ │ ├── FC_toys_-3.0.yml │ │ ├── FC_toys_-4.0.yml │ │ ├── FC_toys_-5.0.yml │ │ ├── FC_toys_-6.0.yml │ │ ├── FC_toys_0.0.yml │ │ ├── FC_toys_1.0.yml │ │ ├── FC_toys_2.0.yml │ │ ├── FC_toys_3.0.yml │ │ ├── FC_toys_4.0.yml │ │ ├── FC_toys_5.0.yml │ │ ├── FC_toys_6.0.yml │ │ ├── ci_freq_zfit_toys.yml │ │ ├── discovery_freq_zfit_toys.yml │ │ └── upperlimit_freq_zfit_toys.yml │ ├── upperlimit_asy_zfit.ipynb │ ├── upperlimit_freq_zfit.ipynb │ └── utils.py ├── modeling │ ├── bayesian_blocks.ipynb │ ├── bayesian_blocks_example.png │ ├── hists_2LP.png │ ├── hists_MuPT.png │ └── hists_jPT.png └── splots │ ├── splot_example.ipynb │ ├── splot_example_2.ipynb │ └── utils.py ├── pyproject.toml ├── src └── hepstats │ ├── __init__.py │ ├── hypotests │ ├── README.md │ ├── __init__.py │ ├── calculators │ │ ├── __init__.py │ │ ├── asymptotic_calculator.py │ │ ├── basecalculator.py │ │ └── frequentist_calculator.py │ ├── core │ │ ├── __init__.py │ │ ├── basetest.py │ │ ├── confidence_interval.py │ │ ├── discovery.py │ │ └── upperlimit.py │ ├── exceptions.py │ ├── hypotests_object.py │ ├── parameters.py │ └── toyutils.py │ ├── modeling │ ├── __init__.py │ └── bayesian_blocks.py │ ├── splot │ ├── __init__.py │ ├── exceptions.py │ ├── sweights.py │ └── warnings.py │ └── utils │ ├── __init__.py │ └── fit │ ├── __init__.py │ ├── api_check.py │ ├── diverse.py │ └── sampling.py └── tests ├── __init__.py ├── conftest.py ├── hypotests ├── data │ ├── cls_pvalues.npz │ └── clsb_pvalues.npz ├── test_basetest.py ├── test_calculators.py ├── test_confidence_intervals.py ├── test_discovery.py ├── test_parameters.py ├── test_toysutils.py └── test_upperlimit.py ├── modeling ├── __init__.py ├── data │ └── answers_bayesian_blocks.npz └── test_bayesianblocks.py └── splots └── test_splots.py /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "monthly" 7 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | concurrency: 4 | group: ${{ github.ref }} 5 | cancel-in-progress: true 6 | 7 | on: 8 | pull_request: 9 | push: 10 | branches: [ "main"] 11 | release: 12 | types: 13 | - "published" 14 | 15 | jobs: 16 | 17 | pre-commit: 18 | name: Format 19 | runs-on: ubuntu-latest 20 | steps: 21 | - uses: actions/checkout@v4 22 | with: 23 | fetch-depth: 0 24 | - uses: actions/setup-python@v5 25 | 26 | checks: 27 | runs-on: ${{ matrix.os }} 28 | strategy: 29 | fail-fast: false 30 | matrix: 31 | os: 32 | - ubuntu-latest 33 | python-version: 34 | - "3.9" 35 | - "3.12" 36 | - "3.13" 37 | include: 38 | - os: windows-latest 39 | python-version: "3.9" 40 | - os: macos-13 41 | python-version: "3.9" 42 | - os: macos-latest 43 | python-version: "3.12" # old versions not supported 44 | name: Check Python ${{ matrix.python-version }} ${{ matrix.os }} 45 | steps: 46 | - uses: actions/checkout@v4 47 | with: 48 | fetch-depth: 0 49 | - name: Setup Python ${{ matrix.python-version }} 50 | uses: actions/setup-python@v5 51 | with: 52 | python-version: ${{ matrix.python-version }} 53 | 54 | - name: Install package 55 | run: | 56 | pip install uv 57 | uv pip install --system -e .[test] pytest-xdist # for multiprocessing, -e needed for pathes etc. 58 | 59 | - name: Test package 60 | run: python -m pytest --doctest-modules --cov=hepstats --cov-report=xml -n auto 61 | 62 | - name: Upload coverage to Codecov 63 | if: matrix.python-version == '3.9' && matrix.os == 'ubuntu-latest' 64 | uses: codecov/codecov-action@v5 65 | with: 66 | token: ${{ secrets.CODECOV_TOKEN }} # technically not needed, but prevents failures: https://community.codecov.com/t/upload-issues-unable-to-locate-build-via-github-actions-api/3954 67 | file: ./coverage.xml 68 | flags: unittests 69 | name: codecov-umbrella 70 | fail_ci_if_error: false # flaky upload... 71 | 72 | dist: 73 | runs-on: ubuntu-latest 74 | steps: 75 | - uses: actions/checkout@v4 76 | with: 77 | fetch-depth: 0 78 | 79 | - name: Build 80 | run: pipx run build 81 | 82 | - uses: actions/upload-artifact@v4 83 | with: 84 | path: dist/* 85 | 86 | - name: Check metadata 87 | run: pipx run twine check dist/* 88 | 89 | docs: 90 | runs-on: ubuntu-latest 91 | steps: 92 | - uses: actions/checkout@v4 93 | with: 94 | fetch-depth: 0 95 | 96 | - name: Setup Python 3.9 97 | uses: actions/setup-python@v5 98 | with: 99 | python-version: 3.9 100 | 101 | - name: Install dependencies 102 | run: | 103 | pip install uv 104 | uv pip install --system -e .[docs] 105 | - name: build docs 106 | run: | 107 | sphinx-build -b html docs docs/_build/html 108 | touch docs/_build/html/.nojekyll 109 | 110 | - name: Deploy docs to GitHub Pages 111 | if: success() && github.event_name == 'push' && github.ref == 'refs/heads/main' 112 | uses: peaceiris/actions-gh-pages@v4 113 | with: 114 | github_token: ${{ secrets.GITHUB_TOKEN }} 115 | publish_dir: docs/_build/html 116 | force_orphan: true 117 | user_name: 'github-actions[bot]' 118 | user_email: 'github-actions[bot]@users.noreply.github.com' 119 | commit_message: Deploy to GitHub pages 120 | 121 | publish: 122 | needs: [ dist ] 123 | environment: pypi 124 | permissions: 125 | id-token: write 126 | runs-on: ubuntu-latest 127 | if: github.event_name == 'release' && github.event.action == 'published' 128 | steps: 129 | - uses: actions/download-artifact@v4 130 | with: 131 | name: artifact 132 | path: dist 133 | 134 | - uses: pypa/gh-action-pypi-publish@release/v1 135 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *checkpoint* 3 | *egg* 4 | build/ 5 | dist/ 6 | .tox/ 7 | .python-version 8 | .ipynb_checkpoints 9 | .pytest_cache 10 | *.gv* 11 | /src/hepstats/version.py 12 | /.mypy_cache/* 13 | /pip-wheel-metadata 14 | /docs/_build/* 15 | /docs/source/* 16 | /_build/** 17 | /.idea/** 18 | /src/hepstats/_version.py 19 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/.gitmodules -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | ci: 2 | autoupdate_schedule: quarterly 3 | 4 | repos: 5 | - repo: https://github.com/pre-commit/pre-commit-hooks 6 | rev: v5.0.0 7 | hooks: 8 | - id: check-added-large-files 9 | args: [ '--maxkb=1000' ] 10 | - id: mixed-line-ending 11 | exclude: ^notebooks/ 12 | - id: trailing-whitespace 13 | exclude: ^notebooks/ 14 | - id: check-merge-conflict 15 | - id: check-case-conflict 16 | - id: check-symlinks 17 | - id: check-yaml 18 | exclude: ^notebooks/ 19 | - id: requirements-txt-fixer 20 | - id: debug-statements 21 | - id: end-of-file-fixer 22 | # - repo: https://github.com/mgedmin/check-manifest 23 | # rev: "0.50" 24 | # hooks: 25 | # - id: check-manifest 26 | # args: 27 | # - --update 28 | # - --no-build-isolation 29 | # additional_dependencies: 30 | # - hatchling 31 | # - hatch-vcs 32 | 33 | - repo: https://github.com/pre-commit/mirrors-mypy 34 | rev: v1.15.0 35 | hooks: 36 | - id: mypy 37 | files: src 38 | 39 | - repo: https://github.com/roy-ht/pre-commit-jupyter 40 | rev: v1.2.1 41 | hooks: 42 | - id: jupyter-notebook-cleanup 43 | 44 | - repo: https://github.com/pre-commit/pygrep-hooks 45 | rev: v1.10.0 46 | hooks: 47 | - id: python-use-type-annotations 48 | - id: python-check-mock-methods 49 | - id: python-no-eval 50 | - id: rst-backticks 51 | - id: rst-directive-colons 52 | 53 | - repo: https://github.com/asottile/pyupgrade 54 | rev: v3.19.1 55 | hooks: 56 | - id: pyupgrade 57 | args: [ --py39-plus ] 58 | 59 | - repo: https://github.com/asottile/setup-cfg-fmt 60 | rev: v2.8.0 61 | hooks: 62 | - id: setup-cfg-fmt 63 | args: [ --max-py-version=3.13, --include-version-classifiers ] 64 | 65 | # Notebook formatting 66 | - repo: https://github.com/nbQA-dev/nbQA 67 | rev: 1.9.1 68 | hooks: 69 | 70 | - id: nbqa-pyupgrade 71 | additional_dependencies: [ pyupgrade ] 72 | args: [ --py39-plus ] 73 | 74 | 75 | - repo: https://github.com/roy-ht/pre-commit-jupyter 76 | rev: v1.2.1 77 | hooks: 78 | - id: jupyter-notebook-cleanup 79 | 80 | - repo: https://github.com/sondrelg/pep585-upgrade 81 | rev: 'v1.0' 82 | hooks: 83 | - id: upgrade-type-hints 84 | args: [ '--futures=true' ] 85 | 86 | 87 | - repo: https://github.com/dannysepler/rm_unneeded_f_str 88 | rev: v0.2.0 89 | hooks: 90 | - id: rm-unneeded-f-str 91 | 92 | - repo: https://github.com/python-jsonschema/check-jsonschema 93 | rev: 0.32.1 94 | hooks: 95 | - id: check-github-workflows 96 | - id: check-github-actions 97 | - id: check-dependabot 98 | - id: check-readthedocs 99 | 100 | - repo: https://github.com/MarcoGorelli/auto-walrus 101 | rev: 0.3.4 102 | hooks: 103 | - id: auto-walrus 104 | 105 | - repo: https://github.com/astral-sh/ruff-pre-commit 106 | rev: "v0.11.4" 107 | hooks: 108 | - id: ruff 109 | types_or: [ python, pyi, jupyter ] 110 | args: [ --fix, --unsafe-fixes, --show-fixes , --line-length=120 ] 111 | # Run the formatter. 112 | - id: ruff-format 113 | types_or: [ python, pyi, jupyter ] 114 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | version: 2 5 | 6 | # Build documentation in the docs/ directory with Sphinx 7 | sphinx: 8 | configuration: docs/conf.py 9 | 10 | build: 11 | os: ubuntu-22.04 12 | tools: 13 | python: "3.11" 14 | 15 | python: 16 | install: 17 | - method: pip 18 | path: . 19 | extra_requirements: 20 | - docs 21 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | 4 | main 5 | ************* 6 | 7 | Version 0.9.2 8 | ************** 9 | 10 | * fix wrong import with optional dependencies 11 | 12 | Version 0.9.1 13 | ************** 14 | 15 | * fix dumping of fitresult in test, require ASDF version < 1.6.0 in writing to file 16 | * fix sampling of model in FrequentistCalculator with simultaneous fits 17 | 18 | 19 | Version 0.9.0 20 | ************** 21 | 22 | * Add support for Python 3.13 23 | 24 | Version 0.8.1 25 | ************** 26 | 27 | * Add support for Python 3.12, drop support for Python 3.8 28 | * Improved support for zfit 0.20+ 29 | 30 | Thanks to @MoritzNeuberger for finding and proposing a hypothesis test fix. 31 | 32 | Version 0.7.0 33 | ************* 34 | 35 | * Add support for Python 3.11, drop support for Python 3.7 36 | 37 | Version 0.6.1 38 | ************* 39 | 40 | * fix toy generation with constraints 41 | 42 | Version 0.6.0 43 | ************* 44 | 45 | * Upgrade to Python 3.10 and zfit >= 0.10.0 46 | * Enhanced speed toy limit calculation 47 | * Add multidimensionl PDF support 48 | * Add support for binned data and models 49 | 50 | Version 0.5.0 51 | ************* 52 | * Upgrade to Python 3.9 and drop support for 3.6 53 | 54 | Version 0.4.0 55 | ************* 56 | * loss: upgrade API to use ``create_new`` to make sure that the losses are comparable. Compatible with zfit 0.6.4+ 57 | 58 | Version 0.3.1 59 | ************* 60 | * sPlot: Increase the tolerance of the sanity check from 1e-3 to 5e-2, if above the tolerance a ModelNotFittedToData 61 | exception is raised. In addition if the the check is above the 5e-3 tolerance a warning message is printed. 62 | 63 | 64 | Version 0.3.0 65 | ************* 66 | * New documentation style 67 | * **hepstats** can now do hypothesis tests, and compute upper limits and confidence intervals for counting analysis 68 | * Progess bars are used to see the progression of the generation of the toys 69 | 70 | Version 0.2.5 71 | ************* 72 | * ConfidenceInterval can compute Feldman and Cousin intervals with boundaries (i.e ``qtilde=True``) 73 | * **AsymptoticCalculator** asymov weights are now scaled to the number of entries in dataset from loss 74 | function if the loss is not extended 75 | * **hepstats.hypotests** can now be used even if there is no nuisances. The **pll** function in **utils/fit/diverse.py** 76 | had to be modified such that if there are no nuisances, the **pll** function returns the value of the loss function. 77 | * add notebooks demos for FC intervals with the ``FrequentistCalculator`` and ``AsymptoticCalculator``. 78 | * add warnings when multiple roots are found in ``ConfidenceInterval`` 79 | * move toys .yml files from notebook to notebook/toys 80 | 81 | Version 0.2.4 82 | ************* 83 | * Redesigned packaging system, GHA deployment. 84 | * **expected_poi** removed from **BaseCalculator** and **AsymptoticCalculator** 85 | * add type checks in the **hypotests** submodule 86 | 87 | Version 0.2.3 88 | ************** 89 | * **hepstats** is now compatible with zfit > 0.5 api 90 | * expected intervals in upper limit are now calculated from the pvalues and not from the **expected_poi** 91 | function anymore. 92 | 93 | Version 0.2.2 94 | ************** 95 | * Addition of the **sPlot** algorithm 96 | 97 | Version 0.2.1 98 | ************** 99 | * Addition of the **FrequentistCalculator** to performs hypothesis test, upper limit and interval calculations 100 | with toys. Toys can be saved and loaded in / from yaml files using the methods: 101 | 102 | * ``to_yaml`` 103 | * ``from_yaml`` 104 | 105 | Version 0.2.0 106 | ************** 107 | * New version for the new **hepstats** name of the package 108 | 109 | Version 0.1.3 110 | ************** 111 | * Package name changed from **scikit*stats** to **hepstats** 112 | 113 | Version 0.1.2 114 | ************** 115 | * Additions of classes to compute upper limits and confidence intervals. 116 | 117 | Version 0.1.1 118 | ************** 119 | * Release for Zenodo DOI 120 | 121 | Version 0.1.0 122 | ************** 123 | * First release of **scikit*stats** 124 | * Addition of the **modeling** submodule with the ``Bayesian Blocks algorithm`` 125 | * Addition of the **hypotests** submodule 126 | -------------------------------------------------------------------------------- /LICENSES/LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019-2025, The Scikit-HEP Administrators 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /LICENSES/LICENSE_ASTROML.rst: -------------------------------------------------------------------------------- 1 | https://github.com/astroML/astroML 2 | 3 | Copyright (c) 2012-2013, Jacob Vanderplas 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 7 | 8 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 9 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # `hepstats` package: statistics tools and utilities 5 | 6 | [![Scikit-HEP][sk-badge]](https://scikit-hep.org/) 7 | 8 | [![PyPI](https://img.shields.io/pypi/v/hepstats)](https://pypi.org/project/hepstats/) 9 | [![Conda latest release](https://img.shields.io/conda/vn/conda-forge/hepstats.svg)](https://anaconda.org/conda-forge/hepstats) 10 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/hepstats)](https://pypi.org/project/hepstats/) 11 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3519200.svg)](https://doi.org/10.5281/zenodo.3519200) 12 | 13 | [![GitHub Actions Status: CI](https://github.com/scikit-hep/hepstats/workflows/CI/badge.svg)](https://github.com/scikit-hep/hepstats/actions) 14 | [![Code Coverage](https://codecov.io/gh/scikit-hep/hepstats/graph/badge.svg?branch=main)](https://codecov.io/gh/scikit-hep/hepstats?branch=main) 15 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 16 | 17 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/scikit-hep/hepstats/main) 18 | 19 | hepstats is a library for statistical inference aiming to cover the needs High Energy Physics. 20 | It is part of the [Scikit-HEP project](https://scikit-hep.org/). 21 | 22 | **Questions**: for usage questions, use [StackOverflow with the hepstats tag](https://stackoverflow.com/questions/ask?tags=hepstats) 23 | **Bugs and odd behavior**: open [an issue with hepstats](https://github.com/scikit-hep/hepstats/issues/new) 24 | 25 | ## Installation 26 | 27 | Install `hepstats` like any other Python package: 28 | 29 | ``` 30 | pip install hepstats 31 | ``` 32 | 33 | or similar (use e.g. `virtualenv` if you wish). 34 | 35 | ## Changelog 36 | See the [changelog](https://github.com/scikit-hep/hepstats/blob/main/CHANGELOG.md) for a history of notable changes. 37 | 38 | ## Getting Started 39 | 40 | The `hepstats` module includes `modeling`, `hypotests` and `splot` submodules. This a quick user guide to each submodule. The [binder](https://mybinder.org/v2/gh/scikit-hep/hepstats/main) examples are also a good way to get started. 41 | 42 | ### modeling 43 | 44 | The modeling submodule includes the [Bayesian Block algorithm](https://arxiv.org/pdf/1207.5578.pdf) that can be used to improve the binning of histograms. The visual improvement can be dramatic, and more importantly, this algorithm produces histograms that accurately represent the underlying distribution while being robust to statistical fluctuations. Here is a small example of the algorithm applied on Laplacian sampled data, compared to a histogram of this sample with a fine binning. 45 | 46 | ```python 47 | >>> import numpy as np 48 | >>> import matplotlib.pyplot as plt 49 | >>> from hepstats.modeling import bayesian_blocks 50 | 51 | >>> data = np.random.laplace(size=10000) 52 | >>> blocks = bayesian_blocks(data) 53 | 54 | >>> plt.hist(data, bins=1000, label='Fine Binning', density=True, alpha=0.6) 55 | >>> plt.hist(data, bins=blocks, label='Bayesian Blocks', histtype='step', density=True, linewidth=2) 56 | >>> plt.legend(loc=2) 57 | ``` 58 | 59 | ![bayesian blocks example](https://raw.githubusercontent.com/scikit-hep/hepstats/main/notebooks/modeling/bayesian_blocks_example.png) 60 | 61 | ### hypotests 62 | 63 | This submodule provides tools to do hypothesis tests such as discovery test and computations of upper limits or confidence intervals. hepstats needs a fitting backend to perform computations such as [zfit](https://github.com/zfit/zfit). Any fitting library can be used if their API is compatible with hepstats (see [api checks](https://github.com/scikit-hep/hepstats/blob/main/hepstats/hypotests/utils/fit/api_check.py)). 64 | 65 | We give here a simple example of an upper limit calculation of the yield of a Gaussian signal with known mean and sigma over an exponential background. The fitting backend used is the [zfit](https://github.com/zfit/zfit) package. An example with a **counting experiment** analysis is also given in the [binder](https://mybinder.org/v2/gh/scikit-hep/hepstats/main) examples. 66 | 67 | ```python 68 | >>> import zfit 69 | >>> from zfit.loss import ExtendedUnbinnedNLL 70 | >>> from zfit.minimize import Minuit 71 | 72 | >>> bounds = (0.1, 3.0) 73 | >>> obs = zfit.Space('x', limits=bounds) 74 | 75 | >>> bkg = np.random.exponential(0.5, 300) 76 | >>> peak = np.random.normal(1.2, 0.1, 10) 77 | >>> data = np.concatenate((bkg, peak)) 78 | >>> data = data[(data > bounds[0]) & (data < bounds[1])] 79 | >>> N = data.size 80 | >>> data = zfit.Data.from_numpy(obs=obs, array=data) 81 | 82 | >>> lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0) 83 | >>> Nsig = zfit.Parameter("Nsig", 1., -20., N) 84 | >>> Nbkg = zfit.Parameter("Nbkg", N, 0., N*1.1) 85 | >>> signal = zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1).create_extended(Nsig) 86 | >>> background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg) 87 | >>> total = zfit.pdf.SumPDF([signal, background]) 88 | >>> loss = ExtendedUnbinnedNLL(model=total, data=data) 89 | 90 | >>> from hepstats.hypotests.calculators import AsymptoticCalculator 91 | >>> from hepstats.hypotests import UpperLimit 92 | >>> from hepstats.hypotests.parameters import POI, POIarray 93 | 94 | >>> calculator = AsymptoticCalculator(loss, Minuit(), asimov_bins=100) 95 | >>> poinull = POIarray(Nsig, np.linspace(0.0, 25, 20)) 96 | >>> poialt = POI(Nsig, 0) 97 | >>> ul = UpperLimit(calculator, poinull, poialt) 98 | >>> ul.upperlimit(alpha=0.05, CLs=True) 99 | 100 | Observed upper limit: Nsig = 15.725784747406346 101 | Expected upper limit: Nsig = 11.927442041887158 102 | Expected upper limit +1 sigma: Nsig = 16.596396280677116 103 | Expected upper limit -1 sigma: Nsig = 8.592750403611896 104 | Expected upper limit +2 sigma: Nsig = 22.24864429383046 105 | Expected upper limit -2 sigma: Nsig = 6.400549971360598 106 | ``` 107 | 108 | ![upper limit example](https://raw.githubusercontent.com/scikit-hep/hepstats/main/notebooks/hypotests/asy_ul.png) 109 | 110 | ### splots 111 | 112 | A full example using the **sPlot** algorithm can be found [here](https://github.com/scikit-hep/hepstats/tree/main/notebooks/splots/splot_example.ipynb). **sWeights** for different components in a data sample, modeled with a sum of extended probability density functions, are derived using the `compute_sweights` function: 113 | 114 | ```python 115 | >>> from hepstats.splot import compute_sweights 116 | 117 | # using same model as above for illustration 118 | >>> sweights = compute_sweights(zfit.pdf.SumPDF([signal, background]), data) 119 | 120 | >>> bkg_sweights = sweights[Nbkg] 121 | >>> sig_sweights = sweights[Nsig] 122 | ``` 123 | 124 | The model needs to be fitted to the data for the computation of the **sWeights**, if not an error is raised. 125 | 126 | [sk-badge]: https://img.shields.io/badge/Scikit--HEP-Project-blue?logo= 127 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/api/hepstats.hypotests.calculators.asymptotic_calculator.rst: -------------------------------------------------------------------------------- 1 | hepstats.hypotests.calculators.asymptotic\_calculator module 2 | ============================================================ 3 | 4 | .. automodule:: hepstats.hypotests.calculators.asymptotic_calculator 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | -------------------------------------------------------------------------------- /docs/api/hepstats.hypotests.calculators.basecalculator.rst: -------------------------------------------------------------------------------- 1 | hepstats.hypotests.calculators.basecalculator module 2 | ==================================================== 3 | 4 | .. automodule:: hepstats.hypotests.calculators.basecalculator 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | -------------------------------------------------------------------------------- /docs/api/hepstats.hypotests.calculators.frequentist_calculator.rst: -------------------------------------------------------------------------------- 1 | hepstats.hypotests.calculators.frequentist\_calculator module 2 | ============================================================= 3 | 4 | .. automodule:: hepstats.hypotests.calculators.frequentist_calculator 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | -------------------------------------------------------------------------------- /docs/api/hepstats.hypotests.calculators.rst: -------------------------------------------------------------------------------- 1 | hepstats.hypotests.calculators package 2 | ====================================== 3 | 4 | .. automodule:: hepstats.hypotests.calculators 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | 10 | Submodules 11 | ---------- 12 | 13 | .. toctree:: 14 | :maxdepth: 4 15 | 16 | hepstats.hypotests.calculators.asymptotic_calculator 17 | hepstats.hypotests.calculators.basecalculator 18 | hepstats.hypotests.calculators.frequentist_calculator 19 | -------------------------------------------------------------------------------- /docs/api/hepstats.hypotests.core.basetest.rst: -------------------------------------------------------------------------------- 1 | hepstats.hypotests.core.basetest module 2 | ======================================= 3 | 4 | .. automodule:: hepstats.hypotests.core.basetest 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | -------------------------------------------------------------------------------- /docs/api/hepstats.hypotests.core.confidence_interval.rst: -------------------------------------------------------------------------------- 1 | hepstats.hypotests.core.confidence\_interval module 2 | =================================================== 3 | 4 | .. automodule:: hepstats.hypotests.core.confidence_interval 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | -------------------------------------------------------------------------------- /docs/api/hepstats.hypotests.core.discovery.rst: -------------------------------------------------------------------------------- 1 | hepstats.hypotests.core.discovery module 2 | ======================================== 3 | 4 | .. automodule:: hepstats.hypotests.core.discovery 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | -------------------------------------------------------------------------------- /docs/api/hepstats.hypotests.core.rst: -------------------------------------------------------------------------------- 1 | hepstats.hypotests.core package 2 | =============================== 3 | 4 | .. automodule:: hepstats.hypotests.core 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | 10 | Submodules 11 | ---------- 12 | 13 | .. toctree:: 14 | :maxdepth: 4 15 | 16 | hepstats.hypotests.core.basetest 17 | hepstats.hypotests.core.confidence_interval 18 | hepstats.hypotests.core.discovery 19 | hepstats.hypotests.core.upperlimit 20 | -------------------------------------------------------------------------------- /docs/api/hepstats.hypotests.core.upperlimit.rst: -------------------------------------------------------------------------------- 1 | hepstats.hypotests.core.upperlimit module 2 | ========================================= 3 | 4 | .. automodule:: hepstats.hypotests.core.upperlimit 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | -------------------------------------------------------------------------------- /docs/api/hepstats.hypotests.exceptions.rst: -------------------------------------------------------------------------------- 1 | hepstats.hypotests.exceptions module 2 | ==================================== 3 | 4 | .. automodule:: hepstats.hypotests.exceptions 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | -------------------------------------------------------------------------------- /docs/api/hepstats.hypotests.hypotests_object.rst: -------------------------------------------------------------------------------- 1 | hepstats.hypotests.hypotests\_object module 2 | =========================================== 3 | 4 | .. automodule:: hepstats.hypotests.hypotests_object 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | -------------------------------------------------------------------------------- /docs/api/hepstats.hypotests.parameters.rst: -------------------------------------------------------------------------------- 1 | hepstats.hypotests.parameters module 2 | ==================================== 3 | 4 | .. automodule:: hepstats.hypotests.parameters 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | -------------------------------------------------------------------------------- /docs/api/hepstats.hypotests.rst: -------------------------------------------------------------------------------- 1 | hepstats.hypotests package 2 | ========================== 3 | 4 | .. automodule:: hepstats.hypotests 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | 10 | Subpackages 11 | ----------- 12 | 13 | .. toctree:: 14 | :maxdepth: 4 15 | 16 | hepstats.hypotests.calculators 17 | hepstats.hypotests.core 18 | 19 | Submodules 20 | ---------- 21 | 22 | .. toctree:: 23 | :maxdepth: 4 24 | 25 | hepstats.hypotests.exceptions 26 | hepstats.hypotests.hypotests_object 27 | hepstats.hypotests.parameters 28 | hepstats.hypotests.toyutils 29 | -------------------------------------------------------------------------------- /docs/api/hepstats.hypotests.toyutils.rst: -------------------------------------------------------------------------------- 1 | hepstats.hypotests.toyutils module 2 | ================================== 3 | 4 | .. automodule:: hepstats.hypotests.toyutils 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | -------------------------------------------------------------------------------- /docs/api/hepstats.modeling.bayesian_blocks.rst: -------------------------------------------------------------------------------- 1 | hepstats.modeling.bayesian\_blocks module 2 | ========================================= 3 | 4 | .. automodule:: hepstats.modeling.bayesian_blocks 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | -------------------------------------------------------------------------------- /docs/api/hepstats.modeling.rst: -------------------------------------------------------------------------------- 1 | hepstats.modeling package 2 | ========================= 3 | 4 | .. automodule:: hepstats.modeling 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | 10 | Submodules 11 | ---------- 12 | 13 | .. toctree:: 14 | :maxdepth: 4 15 | 16 | hepstats.modeling.bayesian_blocks 17 | -------------------------------------------------------------------------------- /docs/api/hepstats.rst: -------------------------------------------------------------------------------- 1 | hepstats package 2 | ================ 3 | 4 | .. automodule:: hepstats 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | 10 | Subpackages 11 | ----------- 12 | 13 | .. toctree:: 14 | :maxdepth: 4 15 | 16 | hepstats.hypotests 17 | hepstats.modeling 18 | hepstats.splot 19 | hepstats.utils 20 | 21 | Submodules 22 | ---------- 23 | 24 | .. toctree:: 25 | :maxdepth: 4 26 | 27 | hepstats.version 28 | -------------------------------------------------------------------------------- /docs/api/hepstats.splot.exceptions.rst: -------------------------------------------------------------------------------- 1 | hepstats.splot.exceptions module 2 | ================================ 3 | 4 | .. automodule:: hepstats.splot.exceptions 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | -------------------------------------------------------------------------------- /docs/api/hepstats.splot.rst: -------------------------------------------------------------------------------- 1 | hepstats.splot package 2 | ====================== 3 | 4 | .. automodule:: hepstats.splot 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | 10 | Submodules 11 | ---------- 12 | 13 | .. toctree:: 14 | :maxdepth: 4 15 | 16 | hepstats.splot.exceptions 17 | hepstats.splot.sweights 18 | hepstats.splot.warnings 19 | -------------------------------------------------------------------------------- /docs/api/hepstats.splot.sweights.rst: -------------------------------------------------------------------------------- 1 | hepstats.splot.sweights module 2 | ============================== 3 | 4 | .. automodule:: hepstats.splot.sweights 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | -------------------------------------------------------------------------------- /docs/api/hepstats.splot.warnings.rst: -------------------------------------------------------------------------------- 1 | hepstats.splot.warnings module 2 | ================================ 3 | 4 | .. automodule:: hepstats.splot.warnings 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | -------------------------------------------------------------------------------- /docs/api/hepstats.utils.fit.api_check.rst: -------------------------------------------------------------------------------- 1 | hepstats.utils.fit.api\_check module 2 | ==================================== 3 | 4 | .. automodule:: hepstats.utils.fit.api_check 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | -------------------------------------------------------------------------------- /docs/api/hepstats.utils.fit.diverse.rst: -------------------------------------------------------------------------------- 1 | hepstats.utils.fit.diverse module 2 | ================================= 3 | 4 | .. automodule:: hepstats.utils.fit.diverse 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | -------------------------------------------------------------------------------- /docs/api/hepstats.utils.fit.rst: -------------------------------------------------------------------------------- 1 | hepstats.utils.fit package 2 | ========================== 3 | 4 | .. automodule:: hepstats.utils.fit 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | 10 | Submodules 11 | ---------- 12 | 13 | .. toctree:: 14 | :maxdepth: 4 15 | 16 | hepstats.utils.fit.api_check 17 | hepstats.utils.fit.diverse 18 | hepstats.utils.fit.sampling 19 | -------------------------------------------------------------------------------- /docs/api/hepstats.utils.fit.sampling.rst: -------------------------------------------------------------------------------- 1 | hepstats.utils.fit.sampling module 2 | ================================== 3 | 4 | .. automodule:: hepstats.utils.fit.sampling 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | -------------------------------------------------------------------------------- /docs/api/hepstats.utils.rst: -------------------------------------------------------------------------------- 1 | hepstats.utils package 2 | ====================== 3 | 4 | .. automodule:: hepstats.utils 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | 10 | Subpackages 11 | ----------- 12 | 13 | .. toctree:: 14 | :maxdepth: 4 15 | 16 | hepstats.utils.fit 17 | -------------------------------------------------------------------------------- /docs/api/hepstats.version.rst: -------------------------------------------------------------------------------- 1 | hepstats.version module 2 | ======================= 3 | 4 | .. automodule:: hepstats.version 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | -------------------------------------------------------------------------------- /docs/api/hypotests.rst: -------------------------------------------------------------------------------- 1 | hepstats.hypotests 2 | ------------------ 3 | 4 | .. currentmodule:: hepstats.hypotests.core.discovery 5 | 6 | .. autosummary:: 7 | 8 | Discovery 9 | 10 | .. currentmodule:: hepstats.hypotests.core.upperlimit 11 | 12 | .. autosummary:: 13 | 14 | UpperLimit 15 | 16 | .. currentmodule:: hepstats.hypotests.core.confidence_interval 17 | 18 | .. autosummary:: 19 | 20 | ConfidenceInterval 21 | 22 | Parameters 23 | """""""""" 24 | 25 | .. currentmodule:: hepstats.hypotests.parameters 26 | 27 | .. autosummary:: 28 | 29 | POIarray 30 | POI 31 | 32 | Calculators 33 | """"""""""" 34 | 35 | Module defining the base class for the calculators for statistical tests based on the likelyhood ratio. 36 | 37 | Acronyms used in the code: 38 | * nll = negative log-likehood, which is the value of the `loss` attribute of a calculator; 39 | * obs = observed, i.e. measured on provided data. 40 | 41 | .. currentmodule:: hepstats.hypotests.calculators.asymptotic_calculator 42 | 43 | .. autosummary:: 44 | 45 | AsymptoticCalculator 46 | 47 | .. currentmodule:: hepstats.hypotests.calculators.frequentist_calculator 48 | 49 | .. autosummary:: 50 | 51 | FrequentistCalculator 52 | 53 | Toys utils 54 | """""""""" 55 | 56 | .. currentmodule:: hepstats.hypotests.toyutils 57 | 58 | .. autosummary:: 59 | 60 | ToyResult 61 | ToysManager 62 | -------------------------------------------------------------------------------- /docs/api/index.rst: -------------------------------------------------------------------------------- 1 | API reference 2 | ============= 3 | 4 | .. toctree:: 5 | :maxdepth: 6 6 | 7 | modeling 8 | hypotests 9 | splot 10 | utils 11 | -------------------------------------------------------------------------------- /docs/api/modeling.rst: -------------------------------------------------------------------------------- 1 | hepstats.modeling 2 | ----------------- 3 | 4 | .. currentmodule:: hepstats.modeling 5 | 6 | .. autosummary:: 7 | 8 | bayesian_blocks 9 | -------------------------------------------------------------------------------- /docs/api/modules.rst: -------------------------------------------------------------------------------- 1 | hepstats 2 | ======== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | hepstats 8 | -------------------------------------------------------------------------------- /docs/api/splot.rst: -------------------------------------------------------------------------------- 1 | hepstats.splot 2 | -------------- 3 | 4 | hepstats.splot.sweights 5 | ======================= 6 | 7 | .. currentmodule:: hepstats.splot.sweights 8 | 9 | .. autosummary:: 10 | 11 | compute_sweights 12 | is_sum_of_extended_pdfs 13 | 14 | hepstats.splot.exceptions 15 | ========================= 16 | 17 | .. currentmodule:: hepstats.splot.exceptions 18 | 19 | .. autosummary:: 20 | 21 | ModelNotFittedToData 22 | -------------------------------------------------------------------------------- /docs/api/utils.rst: -------------------------------------------------------------------------------- 1 | hepstats.utils 2 | ============== 3 | 4 | hepstats.utils.fit 5 | ------------------ 6 | 7 | hepstats.utils.fit.api_check 8 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 9 | 10 | Module for testing a fitting library validity with hepstats. 11 | 12 | A fitting library should provide six basic objects: 13 | 14 | * model / probability density function 15 | * parameters of the models 16 | * data 17 | * loss / likelihood function 18 | * minimizer 19 | * fitresult (optional) 20 | 21 | A function for each object is defined in this module, all should return ``True`` to work 22 | with hepstats. 23 | 24 | The **zfit** API is currently the standard fitting API in hepstats. 25 | 26 | 27 | .. currentmodule:: hepstats.utils.fit.api_check 28 | 29 | .. autosummary:: 30 | 31 | is_valid_parameter 32 | is_valid_data 33 | is_valid_pdf 34 | is_valid_loss 35 | is_valid_fitresult 36 | is_valid_minimizer 37 | 38 | hepstats.utils.fit.diverse 39 | ^^^^^^^^^^^^^^^^^^^^^^^^^^ 40 | 41 | .. currentmodule:: hepstats.utils.fit.diverse 42 | 43 | .. autosummary:: 44 | 45 | get_value 46 | eval_pdf 47 | pll 48 | array2dataset 49 | get_nevents 50 | 51 | hepstats.utils.fit.sampling 52 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ 53 | 54 | .. currentmodule:: hepstats.utils.fit.sampling 55 | 56 | .. autosummary:: 57 | 58 | base_sampler 59 | base_sample 60 | -------------------------------------------------------------------------------- /docs/bib/references.bib: -------------------------------------------------------------------------------- 1 | @article{Cowan:2010js, 2 | author = "Cowan, Glen and Cranmer, Kyle and Gross, Eilam and Vitells, Ofer", 3 | title = "{Asymptotic formulae for likelihood-based tests of new physics}", 4 | eprint = "1007.1727", 5 | archivePrefix = "arXiv", 6 | primaryClass = "physics.data-an", 7 | doi = "10.1140/epjc/s10052-011-1554-0", 8 | journal = "Eur. Phys. J. C", 9 | volume = "71", 10 | pages = "1554", 11 | year = "2011", 12 | note = "[Erratum: Eur.Phys.J.C 73, 2501 (2013)]" 13 | } 14 | 15 | @article{Pivk:2004ty, 16 | author = "Pivk, Muriel and Le Diberder, Francois R.", 17 | title = "{SPlot: A Statistical tool to unfold data distributions}", 18 | journal = "Nucl. Instrum. Meth.", 19 | volume = "A555", 20 | year = "2005", 21 | pages = "356-369", 22 | doi = "10.1016/j.nima.2005.08.106", 23 | eprint = "physics/0402083", 24 | archivePrefix = "arXiv", 25 | primaryClass = "physics.data-an", 26 | reportNumber = "LAL-04-07", 27 | SLACcitation = "" 28 | } 29 | 30 | @article{Scargle_2013, 31 | title = {STUDIES IN ASTRONOMICAL TIME SERIES ANALYSIS. VI. BAYESIAN BLOCK REPRESENTATIONS}, 32 | volume = {764}, 33 | ISSN = {1538-4357}, 34 | url = {http://dx.doi.org/10.1088/0004-637X/764/2/167}, 35 | DOI = {10.1088/0004-637x/764/2/167}, 36 | number = {2}, 37 | journal = {The Astrophysical Journal}, 38 | publisher = {IOP Publishing}, 39 | author = {Scargle, Jeffrey D. and Norris, Jay P. and Jackson, Brad and Chiang, James}, 40 | year = {2013}, 41 | month = {Feb}, 42 | pages = {167} 43 | } 44 | 45 | @misc{BB_jakevdp, 46 | title = {{Dynamic Programming in Python: Bayesian Blocks}, 47 | howpublished = {http://jakevdp.github.io/blog/2012/09/12/dynamic-programming-in-python/}}, 48 | note = {Accessed: 2020-11-03} 49 | } 50 | 51 | @article{VanderPlas_2012, 52 | title = {Introduction to astroML: Machine learning for astrophysics}, 53 | ISBN = {9781467346269}, 54 | url = {http://dx.doi.org/10.1109/CIDU.2012.6382200}, 55 | DOI = {10.1109/cidu.2012.6382200}, 56 | journal = {2012 Conference on Intelligent Data Understanding}, 57 | publisher = {IEEE}, 58 | author = {VanderPlas, Jacob and Connolly, Andrew J. and Ivezic, Zeljko and Gray, Alex}, 59 | year = {2012}, 60 | month = {Oct} 61 | } 62 | 63 | @article{Pollack:2017srh, 64 | author = "Pollack, Brian and Bhattacharya, Saptaparna and Schmitt, 65 | Michael", 66 | title = "{Bayesian Block Histogramming for High Energy Physics}", 67 | year = "2017", 68 | eprint = "1708.00810", 69 | archivePrefix = "arXiv", 70 | primaryClass = "physics.data-an", 71 | reportNumber = "nuhep-exp/17-05, NUHEP-EXP-17-05", 72 | SLACcitation = "", 73 | journal = "", 74 | } 75 | -------------------------------------------------------------------------------- /docs/bibliography.rst: -------------------------------------------------------------------------------- 1 | Bibliography 2 | ============ 3 | 4 | .. bibliography:: bib/references.bib 5 | 6 | ```{bibliography} bib/references.bib``` 7 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | from __future__ import annotations 9 | 10 | from pathlib import Path 11 | 12 | from hepstats import __version__ as version 13 | 14 | project_dir = Path(__file__).parents[1] 15 | 16 | 17 | # -- Project information ----------------------------------------------------- 18 | 19 | project = "hepstats" 20 | copyright = "2019-2025, The Scikit-HEP Administrators" 21 | author = "Matthieu Marinangeli" 22 | 23 | # The full version, including alpha/beta/rc tags 24 | 25 | release = version 26 | 27 | 28 | # -- General configuration --------------------------------------------------- 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [ 34 | "sphinx.ext.autodoc", 35 | "sphinx.ext.mathjax", 36 | "sphinx.ext.ifconfig", 37 | "sphinx.ext.githubpages", 38 | "sphinx.ext.viewcode", 39 | "sphinx.ext.napoleon", 40 | "sphinx.ext.autosummary", 41 | "sphinx.ext.inheritance_diagram", 42 | # "sphinxcontrib.bibtex", 43 | "matplotlib.sphinxext.plot_directive", 44 | "sphinx_copybutton", 45 | "sphinx_autodoc_typehints", 46 | ] 47 | 48 | bibtex_bibfiles = [ 49 | str(project_dir / "docs" / "bib" / "references.bib") 50 | ] # TODO: currently string, Path doesn't work: https://github.com/mcmtroffaes/sphinxcontrib-bibtex/issues/314 51 | # Add any paths that contain templates here, relative to this directory. 52 | templates_path = ["_templates"] 53 | 54 | # List of patterns, relative to source directory, that match files and 55 | # directories to ignore when looking for source files. 56 | # This pattern also affects html_static_path and html_extra_path. 57 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 58 | 59 | # The name of the Pygments (syntax highlighting) style to use. 60 | pygments_style = "sphinx" 61 | 62 | # -- Options for HTML output ------------------------------------------------- 63 | 64 | # The theme to use for HTML and HTML Help pages. See the documentation for 65 | # a list of builtin themes. 66 | # 67 | html_theme = "pydata_sphinx_theme" 68 | 69 | # Add any paths that contain custom static files (such as style sheets) here, 70 | # relative to this directory. They are copied after the builtin static files, 71 | # so a file named "default.css" will overwrite the builtin "default.css". 72 | 73 | copybutton_prompt_text = ">>> " 74 | 75 | # -- autodoc settings --------------------------------------------- 76 | 77 | # also doc __init__ docstrings 78 | autoclass_content = "both" 79 | autodoc_member_order = "bysource" 80 | autodoc_default_options = { 81 | "show-inheritance": True, 82 | } 83 | autodoc_inherit_docstrings = False 84 | 85 | html_static_path = [] # "_static" 86 | 87 | 88 | html_logo = "images/logo.png" 89 | 90 | html_theme_options = { 91 | "github_url": "https://github.com/scikit-hep/hepstats", 92 | "use_edit_page_button": True, 93 | "search_bar_text": "Search hepstats...", 94 | "navigation_with_keys": True, 95 | "search_bar_position": "sidebar", 96 | } 97 | 98 | html_context = { 99 | "github_user": "scikit-hep", 100 | "github_repo": "hepstats", 101 | "github_version": "main", 102 | "doc_path": "docs", 103 | } 104 | -------------------------------------------------------------------------------- /docs/getting_started/hypotests.rst: -------------------------------------------------------------------------------- 1 | 2 | hypotests 3 | ######### 4 | 5 | This submodule provides tools to do statistical inferences such as discovery test and computations of 6 | upper limits or confidence intervals. ``hepstats`` needs a fitting backend to perform computations such as 7 | `zfit `_. Any fitting library can be used if their API is compatible with hepstats 8 | (see `api checks `_). 9 | 10 | We give here a simple example of an upper limit calculation of the yield of a Gaussian signal with known mean 11 | and sigma over an exponential background. The fitting backend used is the `zfit `_ 12 | package. If you are unfamiliar with zfit you can have a look at the `zfit documentation `_. 13 | 14 | First we import what's necessary from zfit, such as the **ExtendedUnbinnedNLL** class as we want to construct 15 | an extended unbinned likelihood. **Minuit** is also imported, it is a zfit wrapper of the the minuit minimizer 16 | from `iminuit `_. 17 | 18 | .. code-block:: pycon 19 | 20 | >>> import zfit 21 | >>> from zfit.loss import ExtendedUnbinnedNLL 22 | >>> from zfit.minimize import Minuit 23 | >>> import numpy as np 24 | 25 | Then we construct the data sample which consists of 300 points that are drawn from an exponential distribution with -2 26 | slope, constituting the background, whereas 10 points drawn from a Gaussian distribution of mean 1.2 and width 0.1, is the signal. The 27 | fit range is defined between 0.1 and 3.0 meaning that some points of the background distribution are filtered 28 | out. The data, which is a numpy array, is then transformed into a zfit **Data** object. 29 | 30 | .. code-block:: pycon 31 | 32 | >>> bounds = (0.1, 3.0) 33 | >>> obs = zfit.Space('x', limits=bounds) 34 | >>> bkg = np.random.exponential(1/2, 300) 35 | >>> peak = np.random.normal(1.2, 0.1, 10) 36 | >>> data = np.concatenate((bkg, peak)) 37 | >>> data = data[(data > bounds[0]) & (data < bounds[1])] 38 | >>> data = zfit.Data.from_numpy(obs=obs, array=data) 39 | 40 | Now we build the model. For the background an exponential pdf with **lambda_**, the slope of the exponential as 41 | a free parameter. For the signal a Gaussian pdf is used with mean and width fixed to 1.2 and 0.1 respectively. 42 | The background and signal pdfs are extended using the yield parameters **Nbkg** and **Nsig** respectively, which 43 | are free. The extended negative log-likelihood is then constructed using the background and signal models summed 44 | and the data. 45 | 46 | .. code-block:: pycon 47 | 48 | >>> lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0) 49 | >>> Nsig = zfit.Parameter("Nsig", 1., -20., 500) 50 | >>> Nbkg = zfit.Parameter("Nbkg", 250, 0., 50) 51 | >>> signal = zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1).create_extended(Nsig) 52 | >>> background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg) 53 | >>> total = zfit.pdf.SumPDF([signal, background]) 54 | >>> nll = ExtendedUnbinnedNLL(model=total, data=data) 55 | 56 | The background plus signal can then be fitted to the data. 57 | 58 | .. code-block:: pycon 59 | 60 | >>> # Instantiate a minuit minimizer 61 | >>> minimizer = Minuit() 62 | >>> # minimisation of the loss function 63 | >>> minimum = minimizer.minimize(loss=nll) 64 | >>> minimum.hesse() 65 | >>> print(minimum) 66 | 67 | +---------+-------------+------------------+---------+-------------+ 68 | | valid | converged | param at limit | edm | min value | 69 | +=========+=============+==================+=========+=============+ 70 | | True | True | False | 4.9e-05 | -1077 | 71 | +---------+-------------+------------------+---------+-------------+ 72 | 73 | Parameters 74 | 75 | +--------+--------+---------------+-----------+ 76 | | name | value | hesse | at limit | 77 | +========+========+===============+===========+ 78 | | Nsig | 4.518 | +/- 5.8 | False | 79 | +--------+--------+---------------+-----------+ 80 | | Nbkg | 251.6 | +/- 17 | False | 81 | +--------+--------+---------------+-----------+ 82 | | lambda | -1.93 | +/- 0.14 | False | 83 | +--------+--------+---------------+-----------+ 84 | 85 | So the fitted number of signal candidates is 4.518 +/- 5.8, which is consistent with zero. We can then compute an 86 | upper limit on this number which should be approximately equal to 4.5 + 2 * 5.8 ≈ 16. 87 | First we import from the :py:mod:`~hepstats.hypotests.calculators` submodule of :py:mod:`~hepstats.hypotests` 88 | the :py:class:`~hepstats.hypotests.calculators.asymptotic_calculator.AsymptoticCalculator` which takes as input 89 | the loss function and minimizer. 90 | 91 | 92 | 93 | >>> from hepstats.hypotests.calculators import AsymptoticCalculator 94 | >>> calculator = AsymptoticCalculator(nll, Minuit(), asimov_bins=100) 95 | 96 | The :py:class:`~hepstats.hypotests.parameters.POI` and :py:class:`~hepstats.hypotests.parameters.POIarray` 97 | classes are also imported, POI stands for parameter of interest. In our case the POI is **Nsig**. To compute 98 | an upper limit you need to explicitly specify the background-only hypothesis (null) and the background plus 99 | signal hypothesis, in hepstats this done using :py:class:`~hepstats.hypotests.parameters.POI`/ :py:class:`~hepstats.hypotests.parameters.POIarray`: 100 | 101 | .. code-block:: pycon 102 | 103 | >>> from hepstats.hypotests.parameters import POI, POIarray 104 | >>> 105 | >>> # background only 106 | >>> poialt = POI(Nsig, 0) 107 | >>> # background + signal 108 | >>> poinull = POIarray(Nsig, np.linspace(0.0, 25, 20)) 109 | 110 | A :py:class:`~hepstats.hypotests.parameters.POI` takes as input the parameter **Nsig** and a single value for a 111 | given hypothesis, for **poialt** it's 0 because this is the background only hypothesis. Similarly :py:class:`~hepstats.hypotests.parameters.POIarray` 112 | takes as input the parameter **Nsig** and an array of values to scan for **Nsig**, from 0 to 25. A range is needed 113 | because the **calculator** instance will compute a *p-value* for each value in **poinull**, the upper limit for 114 | a given confidence level :math:`\alpha` is defined as the value of **Nsig** for which the *p-value* is equal 115 | to :math:`1 - \alpha`. 116 | 117 | We can now create an :py:class:`~hepstats.hypotests.core.upperlimit.UpperLimit` instance which takes as input 118 | the **calculator**, **poinull** and **poialt**. The :py:class:`~hepstats.hypotests.core.upperlimit.UpperLimit` 119 | instance will ask the **calculator** to compute the *p-values* for each value in **poinull** and eventually find 120 | the value of the upper limit on **Nsig** (if the upper limit is in the range of the **poinull** values). Below 121 | is an example on how to compute a CLs upper limit at 95 % confidence level. 122 | 123 | .. code-block:: pycon 124 | 125 | >>> from hepstats.hypotests import UpperLimit 126 | >>> ul = UpperLimit(calculator, poinull, poialt) 127 | >>> ul.upperlimit(alpha=0.05, CLs=True) 128 | 129 | Observed upper limit: Nsig = 15.725784747406346 130 | Expected upper limit: Nsig = 11.927442041887158 131 | Expected upper limit +1 sigma: Nsig = 16.596396280677116 132 | Expected upper limit -1 sigma: Nsig = 8.592750403611896 133 | Expected upper limit +2 sigma: Nsig = 22.24864429383046 134 | Expected upper limit -2 sigma: Nsig = 6.400549971360598 135 | 136 | In the result you obtain the observed and expected limits. The observed limit is the limit based on the observation 137 | of 4.518 +/- 5.8 signal candidates in data. The expected limit is the limit under the background only hypothesis. 138 | A graphical representation on how the upper limit is computed in shown in the following figure. 139 | 140 | .. image:: https://raw.githubusercontent.com/scikit-hep/hepstats/main/notebooks/hypotests/asy_ul.png 141 | -------------------------------------------------------------------------------- /docs/getting_started/index.rst: -------------------------------------------------------------------------------- 1 | ********** 2 | Quickstart 3 | ********** 4 | 5 | The ``hepstats`` module includes modeling and hypothesis tests submodules. This a quick user 6 | guide to each submodule: 7 | 8 | 9 | .. toctree:: 10 | :maxdepth: 1 11 | 12 | modeling 13 | hypotests 14 | splot 15 | 16 | The `binder `_ examples are also a good way to get started. 17 | -------------------------------------------------------------------------------- /docs/getting_started/modeling.rst: -------------------------------------------------------------------------------- 1 | modeling 2 | ######## 3 | 4 | The modeling submodule includes the `Bayesian Block algorithm `_ that 5 | can be used to improve the binning of histograms. The visual improvement can be dramatic, and more importantly, 6 | this algorithm produces histograms that accurately represent the underlying distribution while being robust 7 | to statistical fluctuations. Here is a small example of the algorithm applied on Laplacian sampled data, 8 | compared to a histogram of this sample with a fine binning. 9 | 10 | .. code-block:: python 11 | 12 | >>> import numpy as np 13 | >>> import matplotlib.pyplot as plt 14 | >>> from hepstats.modeling import bayesian_blocks 15 | 16 | >>> # sample data from a Laplacian distribution 17 | >>> data = np.random.laplace(size=10000) 18 | >>> blocks = bayesian_blocks(data) 19 | 20 | >>> # plot the histograms of the data with 1000 equally spaced bins and the bins from the 21 | >>> # bayesian_blocks function 22 | >>> plt.hist(data, bins=1000, label='Fine Binning', density=True, alpha=0.6) 23 | >>> plt.hist(data, bins=blocks, label='Bayesian Blocks', histtype='step', density=True, 24 | >>> linewidth=2) 25 | >>> plt.legend(loc=2) 26 | 27 | .. image:: https://raw.githubusercontent.com/scikit-hep/hepstats/main/notebooks/modeling/bayesian_blocks_example.png 28 | -------------------------------------------------------------------------------- /docs/getting_started/splot.rst: -------------------------------------------------------------------------------- 1 | splot 2 | ##### 3 | 4 | A full example using the **sPlot** algorithm can be found `here `_ . **sWeights** for different components in a data sample, modeled with a sum of extended probability density functions, are derived using the ``compute_sweights`` function: 5 | 6 | .. code-block:: python 7 | 8 | >>> from hepstats.splot import compute_sweights 9 | >>> # using same model as above for illustration 10 | >>> sweights = compute_sweights(zfit.pdf.SumPDF([signal, background]), data) 11 | >>> bkg_sweights = sweights[Nbkg] 12 | >>> sig_sweights = sweights[Nsig] 13 | 14 | 15 | The model needs to be fitted to the data for the computation of the **sWeights**, if not an error is raised. 16 | -------------------------------------------------------------------------------- /docs/images/logo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/docs/images/logo.pdf -------------------------------------------------------------------------------- /docs/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/docs/images/logo.png -------------------------------------------------------------------------------- /docs/images/logo.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/docs/images/logo.xcf -------------------------------------------------------------------------------- /docs/images/logo_medium.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/docs/images/logo_medium.png -------------------------------------------------------------------------------- /docs/images/logo_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/docs/images/logo_small.png -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | 2 | 3 | |hepstats_logo| 4 | 5 | ============================== 6 | Statistics tools and utilities 7 | ============================== 8 | 9 | 10 | The hepstats package is a python library providing statistics tools and utilities for particles physics. 11 | In particular hepstats can work with a fitting library, such as `zfit `_, to build 12 | likelihoods function that hepstats will use to perform statistical inferences. hepstats offers a pythonic 13 | oriented alternative to the RooStat library from the `ROOT `_ data analysis package but 14 | also other tools. 15 | 16 | You can install hepstats from PyPI_ with pip: 17 | 18 | .. code-block:: console 19 | 20 | $ pip install hepstats 21 | 22 | 23 | .. toctree:: 24 | :maxdepth: 2 25 | 26 | getting_started/index 27 | whats_new 28 | api/index 29 | bibliography 30 | 31 | 32 | Indices and tables 33 | ================== 34 | 35 | * :ref:`genindex` 36 | * :ref:`modindex` 37 | * :ref:`search` 38 | 39 | .. |hepstats_logo| image:: images/logo_small.png 40 | :target: https://github.com/scikit-hep/hepstats 41 | :alt: hepstats logo 42 | 43 | 44 | .. _PyPI: https://pypi.org/project/hepstats 45 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/make_docs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # script has to be executed inside folder `docs` 3 | # get current directory name 4 | pushd `dirname $0` > /dev/null 5 | MAKE_DOCS_PATH="$( cd "$(dirname "$0")" ; pwd -P )" 6 | #MAKE_DOCS_PATH=$(pwd -P) 7 | popd > /dev/null 8 | 9 | # generate the ReST files 10 | echo "debug" 11 | echo ${MAKE_DOCS_PATH}/../src/hepstats 12 | #ls ${MAKE_DOCS_PATH} 13 | SPHINX_APIDOC_OPTIONS=members,undoc-members,show-inheritance,inherited-members sphinx-apidoc -e -o ${MAKE_DOCS_PATH}/api ${MAKE_DOCS_PATH}/../src/hepstats -fMeT && \ 14 | make -C ${MAKE_DOCS_PATH} clean && make -C ${MAKE_DOCS_PATH} html -j8 && \ 15 | echo "Documentation successfully built!" || echo "FAILED to build Documentation" 16 | -------------------------------------------------------------------------------- /docs/whats_new.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | What's new 3 | ========== 4 | 5 | .. include:: ../CHANGELOG.rst 6 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: hepstats-demo 2 | channels: 3 | - defaults 4 | - conda-forge 5 | dependencies: 6 | - numpy 7 | - scipy 8 | - iminuit 9 | - tensorflow-probability 10 | - zfit 11 | - asdf 12 | - matplotlib 13 | - pip: 14 | - . 15 | -------------------------------------------------------------------------------- /notebooks/README.md: -------------------------------------------------------------------------------- 1 | # Notebooks 2 | 3 | In this directory are stored all the notebooks demo that you can either run with [binder](https://mybinder.org/v2/gh/scikit-hep/hepstats/main) or by downloading the jupyter notebooks `ipynb` files. 4 | 5 | The notebooks are divided for each `submodule`: 6 | - `hypotests`: 7 | * discovery_asy_zfit.ipynb: computes the significance of a gaussian signal over an exponential background, fitted with `zfit`, using the `AsymptoticCalculator`. 8 | * discovery_freq_zfit.ipynb: computes the significance of a gaussian signal over an exponential background, fitted with `zfit`, using the `FrequentistCalculator`. 9 | * upperlimit_asy_zfit.ipynb: computes the upper limit on the number signal of a gaussian signal over an exponential background, fitted with `zfit`, using the `AsymptoticCalculator`. 10 | * upperlimit_freq_zfit.ipynb: computes the upper limit on the number signal of a gaussian signal over an exponential background, fitted with `zfit`, using the `FrequentistCalculator`. 11 | * confidenceinterval_asy_zfit.ipynb: computes the 68% confidence level interval on the mean of a gaussian signal over an exponential background, fitted with `zfit`, using the `AsymptoticCalculator`. 12 | * confidenceinterval_freq_zfit.ipynb: computes the 68% confidence level interval on the mean of a gaussian signal over an exponential background, fitted with `zfit`, using the `FrequentistCalculator`. 13 | * FC_interval_asy.ipynb: computes the 90% confidence level Feldman and Cousins interval on the measured mean 𝑥 of a gaussian for several values of the true mean μ, using the `AsymptoticCalculator`. 14 | * FC_interval_asy.ipynb: computes the 90% confidence level Feldman and Cousins interval on the measured mean 𝑥 of a gaussian for several values of the true mean μ, using the `FrequentistCalculator`. 15 | * counting.ipynb: shows examples of inferences with `hepstats` using a counting analysis instead of a shape analysis. 16 | 17 | - `modeling` 18 | * bayesian_blocks.ipynb: presentation of the Bayesian Blocks algorithm and comparison with other binning methods. 19 | 20 | - `splots` 21 | * splot_example.ipynb: example of `sPlot` on fake mass and momentum distributions for some signal and some background. The `sWeights` are derived using mass fit of a gaussian signal over an exponential background with `zfit`. The `sWeights` are applied on the momentum distribution to retrieve the signal distribution. This example is a reproduction of the example in [hep_ml](https://github.com/arogozhnikov/hep_ml/blob/main/notebooks/sPlot.ipynb) using `hepstats`. 22 | * splot_example_2.ipynb: example of `sPlot` on fake mass and lifetime distributions for some signal and some background. The `sWeights` are derived using mass fit of a gaussian signal over an exponential background with `zfit`. The `sWeights` are applied on the lifetime distribution to retrieve the signal distribution. This example is a reproduction of the example of the [LHCb statistics guidelines](https://gitlab.cern.ch/lhcb/statistics-guidelines/-/blob/add_sweights_item/resources/appendix_f4.ipynb) using `hepstats`. 23 | -------------------------------------------------------------------------------- /notebooks/hypotests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/__init__.py -------------------------------------------------------------------------------- /notebooks/hypotests/asy_ci.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/asy_ci.png -------------------------------------------------------------------------------- /notebooks/hypotests/asy_ul.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/asy_ul.png -------------------------------------------------------------------------------- /notebooks/hypotests/confidenceinterval_asy_zfit.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "pycharm": { 7 | "name": "#%% md\n" 8 | } 9 | }, 10 | "source": [ 11 | "# Example of confidence interval computation" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": { 18 | "pycharm": { 19 | "name": "#%%\n" 20 | } 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "from __future__ import annotations\n", 25 | "\n", 26 | "import matplotlib.pyplot as plt\n", 27 | "import numpy as np\n", 28 | "import zfit\n", 29 | "from utils import one_minus_cl_plot, plotfitresult, pltdist\n", 30 | "from zfit.loss import ExtendedUnbinnedNLL\n", 31 | "from zfit.minimize import Minuit\n", 32 | "\n", 33 | "from hepstats.hypotests import ConfidenceInterval\n", 34 | "from hepstats.hypotests.calculators import AsymptoticCalculator\n", 35 | "from hepstats.hypotests.parameters import POIarray" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "pycharm": { 43 | "name": "#%%\n" 44 | } 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "plt.rcParams[\"figure.figsize\"] = (9, 8)\n", 49 | "plt.rcParams[\"font.size\"] = 16" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": { 55 | "pycharm": { 56 | "name": "#%% md\n" 57 | } 58 | }, 59 | "source": [ 60 | "### Fit of a Gaussian signal over an exponential background:" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": { 67 | "pycharm": { 68 | "name": "#%%\n" 69 | } 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "bounds = (0.1, 3.0)\n", 74 | "\n", 75 | "# Data and signal\n", 76 | "\n", 77 | "np.random.seed(0)\n", 78 | "tau = -2.0\n", 79 | "beta = -1 / tau\n", 80 | "data = np.random.exponential(beta, 300)\n", 81 | "peak = np.random.normal(1.2, 0.1, 80)\n", 82 | "data = np.concatenate((data, peak))\n", 83 | "data = data[(data > bounds[0]) & (data < bounds[1])]\n", 84 | "\n", 85 | "pltdist(data, bins=80, bounds=bounds)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": { 92 | "pycharm": { 93 | "name": "#%%\n" 94 | } 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "obs = zfit.Space(\"x\", limits=bounds)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": { 105 | "pycharm": { 106 | "name": "#%%\n" 107 | } 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "mean = zfit.Parameter(\"mean\", 1.2, 0.5, 2.0)\n", 112 | "sigma = zfit.Parameter(\"sigma\", 0.1, 0.02, 0.2)\n", 113 | "lambda_ = zfit.Parameter(\"lambda\", -2.0, -4.0, -1.0)\n", 114 | "Nsig = zfit.Parameter(\"Nsig\", 20.0, -20.0, len(data))\n", 115 | "Nbkg = zfit.Parameter(\"Nbkg\", len(data), 0.0, len(data) * 1.1)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": { 122 | "pycharm": { 123 | "name": "#%%\n" 124 | } 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "signal = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma).create_extended(Nsig)\n", 129 | "background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg)\n", 130 | "tot_model = zfit.pdf.SumPDF([signal, background])" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": { 137 | "pycharm": { 138 | "name": "#%%\n" 139 | } 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "# Create the negative log likelihood\n", 144 | "data_ = zfit.data.Data.from_numpy(obs=obs, array=data)\n", 145 | "nll = ExtendedUnbinnedNLL(model=tot_model, data=data_)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": { 152 | "pycharm": { 153 | "name": "#%%\n" 154 | } 155 | }, 156 | "outputs": [], 157 | "source": [ 158 | "# Instantiate a minuit minimizer\n", 159 | "minimizer = Minuit()" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": { 166 | "pycharm": { 167 | "name": "#%%\n" 168 | } 169 | }, 170 | "outputs": [], 171 | "source": [ 172 | "# minimisation of the loss function\n", 173 | "minimum = minimizer.minimize(loss=nll)\n", 174 | "minimum.hesse()" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": { 181 | "pycharm": { 182 | "name": "#%%\n" 183 | } 184 | }, 185 | "outputs": [], 186 | "source": [ 187 | "nbins = 80\n", 188 | "pltdist(data, nbins, bounds)\n", 189 | "plotfitresult(tot_model, bounds, nbins)\n", 190 | "plt.xlabel(\"m [GeV/c$^2$]\")\n", 191 | "plt.ylabel(\"number of events\")" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": { 197 | "pycharm": { 198 | "name": "#%% md\n" 199 | } 200 | }, 201 | "source": [ 202 | "### Confidence interval\n", 203 | "\n", 204 | "We want to compute the confidence interval of the mean of the Gaussian at 68% confidence level." 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": { 211 | "pycharm": { 212 | "name": "#%%\n" 213 | } 214 | }, 215 | "outputs": [], 216 | "source": [ 217 | "# instantation of the calculator\n", 218 | "calculator = AsymptoticCalculator(nll, minimizer)\n", 219 | "calculator.bestfit = minimum # optionnal" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": { 226 | "pycharm": { 227 | "name": "#%%\n" 228 | } 229 | }, 230 | "outputs": [], 231 | "source": [ 232 | "# parameter of interest of the null hypothesis\n", 233 | "poinull = POIarray(mean, np.linspace(1.15, 1.26, 100))" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": { 240 | "pycharm": { 241 | "name": "#%%\n" 242 | } 243 | }, 244 | "outputs": [], 245 | "source": [ 246 | "# instantation of the discovery test\n", 247 | "ci = ConfidenceInterval(calculator, poinull)" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": { 254 | "pycharm": { 255 | "name": "#%%\n" 256 | } 257 | }, 258 | "outputs": [], 259 | "source": [ 260 | "ci.interval();" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "metadata": { 267 | "pycharm": { 268 | "name": "#%%\n" 269 | } 270 | }, 271 | "outputs": [], 272 | "source": [ 273 | "f = plt.figure(figsize=(9, 8))\n", 274 | "one_minus_cl_plot(poinull.values, ci.pvalues())\n", 275 | "plt.xlabel(\"mean\")\n", 276 | "f.savefig(\"asy_ci.png\")" 277 | ] 278 | } 279 | ], 280 | "metadata": { 281 | "kernelspec": { 282 | "display_name": "Python 3", 283 | "language": "python", 284 | "name": "python3" 285 | }, 286 | "language_info": { 287 | "codemirror_mode": { 288 | "name": "ipython", 289 | "version": 3 290 | }, 291 | "file_extension": ".py", 292 | "mimetype": "text/x-python", 293 | "name": "python", 294 | "nbconvert_exporter": "python", 295 | "pygments_lexer": "ipython3", 296 | "version": "3.7.7" 297 | } 298 | }, 299 | "nbformat": 4, 300 | "nbformat_minor": 4 301 | } 302 | -------------------------------------------------------------------------------- /notebooks/hypotests/confidenceinterval_freq_zfit.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "pycharm": { 7 | "name": "#%% md\n" 8 | } 9 | }, 10 | "source": [ 11 | "# Example of confidence interval computation" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": { 18 | "pycharm": { 19 | "name": "#%%\n" 20 | } 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "from __future__ import annotations\n", 25 | "\n", 26 | "import matplotlib.pyplot as plt\n", 27 | "import numpy as np\n", 28 | "import zfit\n", 29 | "from utils import one_minus_cl_plot, plotfitresult, pltdist\n", 30 | "from zfit.loss import ExtendedUnbinnedNLL\n", 31 | "from zfit.minimize import Minuit\n", 32 | "\n", 33 | "from hepstats.hypotests import ConfidenceInterval\n", 34 | "from hepstats.hypotests.calculators import FrequentistCalculator\n", 35 | "from hepstats.hypotests.parameters import POIarray" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": { 41 | "pycharm": { 42 | "name": "#%% md\n" 43 | } 44 | }, 45 | "source": [ 46 | "### Fit of a Gaussian signal over an exponential background:" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "pycharm": { 54 | "name": "#%%\n" 55 | } 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "bounds = (0.1, 3.0)\n", 60 | "\n", 61 | "# Data and signal\n", 62 | "\n", 63 | "np.random.seed(0)\n", 64 | "tau = -2.0\n", 65 | "beta = -1 / tau\n", 66 | "data = np.random.exponential(beta, 300)\n", 67 | "peak = np.random.normal(1.2, 0.1, 80)\n", 68 | "data = np.concatenate((data, peak))\n", 69 | "data = data[(data > bounds[0]) & (data < bounds[1])]\n", 70 | "\n", 71 | "plt.hist(data, bins=100, histtype=\"step\");" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": { 78 | "pycharm": { 79 | "name": "#%%\n" 80 | } 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "obs = zfit.Space(\"x\", limits=bounds)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": { 91 | "pycharm": { 92 | "name": "#%%\n" 93 | } 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "mean = zfit.Parameter(\"mean\", 1.2, 0.5, 2.0)\n", 98 | "sigma = zfit.Parameter(\"sigma\", 0.1, 0.02, 0.2)\n", 99 | "lambda_ = zfit.Parameter(\"lambda\", -2.0, -4.0, -1.0)\n", 100 | "Nsig = zfit.Parameter(\"Nsig\", 20.0, -20.0, len(data))\n", 101 | "Nbkg = zfit.Parameter(\"Nbkg\", len(data), 0.0, len(data) * 1.1)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": { 108 | "pycharm": { 109 | "name": "#%%\n" 110 | } 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "signal = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma).create_extended(Nsig)\n", 115 | "background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg)\n", 116 | "tot_model = zfit.pdf.SumPDF([signal, background])" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": { 123 | "pycharm": { 124 | "name": "#%%\n" 125 | } 126 | }, 127 | "outputs": [], 128 | "source": [ 129 | "# Create the negative log likelihood\n", 130 | "data_ = zfit.data.Data.from_numpy(obs=obs, array=data)\n", 131 | "nll = ExtendedUnbinnedNLL(model=tot_model, data=data_)" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": { 138 | "pycharm": { 139 | "name": "#%%\n" 140 | } 141 | }, 142 | "outputs": [], 143 | "source": [ 144 | "# Instantiate a minuit minimizer\n", 145 | "minimizer = Minuit()" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": { 152 | "pycharm": { 153 | "name": "#%%\n" 154 | } 155 | }, 156 | "outputs": [], 157 | "source": [ 158 | "# minimisation of the loss function\n", 159 | "minimum = minimizer.minimize(loss=nll)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": { 166 | "pycharm": { 167 | "name": "#%%\n" 168 | } 169 | }, 170 | "outputs": [], 171 | "source": [ 172 | "nbins = 80\n", 173 | "pltdist(data, nbins, bounds)\n", 174 | "plotfitresult(tot_model, bounds, nbins)\n", 175 | "plt.xlabel(\"m [GeV/c$^2$]\")\n", 176 | "plt.ylabel(\"number of events\")" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": { 182 | "pycharm": { 183 | "name": "#%% md\n" 184 | } 185 | }, 186 | "source": [ 187 | "### Confidence interval\n", 188 | "\n", 189 | "We want to compute the confidence interval of the mean of the Gaussian at 68% confidence level." 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": { 196 | "pycharm": { 197 | "name": "#%%\n" 198 | } 199 | }, 200 | "outputs": [], 201 | "source": [ 202 | "# instantation of the calculator\n", 203 | "# calculator = FrequentistCalculator(nll, minimizer, ntoysnull=100)\n", 204 | "calculator = FrequentistCalculator.from_yaml(\"toys/ci_freq_zfit_toys.yml\", nll, minimizer, ntoysnull=2000)\n", 205 | "calculator.bestfit = minimum # optionnal" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": { 212 | "pycharm": { 213 | "name": "#%%\n" 214 | } 215 | }, 216 | "outputs": [], 217 | "source": [ 218 | "# parameter of interest of the null hypothesis\n", 219 | "poinull = POIarray(mean, np.linspace(1.15, 1.26, 50))" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": { 226 | "pycharm": { 227 | "name": "#%%\n" 228 | } 229 | }, 230 | "outputs": [], 231 | "source": [ 232 | "# instantation of the discovery test\n", 233 | "ci = ConfidenceInterval(calculator, poinull)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": { 240 | "pycharm": { 241 | "name": "#%%\n" 242 | } 243 | }, 244 | "outputs": [], 245 | "source": [ 246 | "ci.interval();" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": { 253 | "pycharm": { 254 | "name": "#%%\n" 255 | } 256 | }, 257 | "outputs": [], 258 | "source": [ 259 | "f = plt.figure(figsize=(9, 8))\n", 260 | "one_minus_cl_plot(poinull.values, ci.pvalues())\n", 261 | "plt.xlabel(\"mean\")" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": { 268 | "pycharm": { 269 | "name": "#%%\n" 270 | } 271 | }, 272 | "outputs": [], 273 | "source": [ 274 | "calculator.to_yaml(\"toys/ci_freq_zfit_toys.yml\")" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": null, 280 | "metadata": { 281 | "pycharm": { 282 | "name": "#%%\n" 283 | } 284 | }, 285 | "outputs": [], 286 | "source": [] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": { 292 | "pycharm": { 293 | "name": "#%%\n" 294 | } 295 | }, 296 | "outputs": [], 297 | "source": [] 298 | } 299 | ], 300 | "metadata": { 301 | "kernelspec": { 302 | "display_name": "Python 3", 303 | "language": "python", 304 | "name": "python3" 305 | }, 306 | "language_info": { 307 | "codemirror_mode": { 308 | "name": "ipython", 309 | "version": 3 310 | }, 311 | "file_extension": ".py", 312 | "mimetype": "text/x-python", 313 | "name": "python", 314 | "nbconvert_exporter": "python", 315 | "pygments_lexer": "ipython3", 316 | "version": "3.7.7" 317 | } 318 | }, 319 | "nbformat": 4, 320 | "nbformat_minor": 4 321 | } 322 | -------------------------------------------------------------------------------- /notebooks/hypotests/discovery_asy_zfit.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "pycharm": { 7 | "name": "#%% md\n" 8 | } 9 | }, 10 | "source": [ 11 | "# Discovery test example" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": { 18 | "pycharm": { 19 | "name": "#%%\n" 20 | } 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "from __future__ import annotations\n", 25 | "\n", 26 | "import matplotlib.pyplot as plt\n", 27 | "import numpy as np\n", 28 | "import zfit\n", 29 | "from utils import plotfitresult, pltdist\n", 30 | "from zfit.loss import ExtendedUnbinnedNLL\n", 31 | "from zfit.minimize import Minuit\n", 32 | "\n", 33 | "from hepstats.hypotests import Discovery\n", 34 | "from hepstats.hypotests.calculators import AsymptoticCalculator\n", 35 | "from hepstats.hypotests.parameters import POI" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "pycharm": { 43 | "name": "#%%\n" 44 | } 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "plt.rcParams[\"figure.figsize\"] = (8, 6)\n", 49 | "plt.rcParams[\"font.size\"] = 16" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": { 55 | "pycharm": { 56 | "name": "#%% md\n" 57 | } 58 | }, 59 | "source": [ 60 | "### Fit of a Gaussian signal over an exponential background:" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": { 67 | "pycharm": { 68 | "name": "#%%\n" 69 | } 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "bounds = (0.1, 3.0)\n", 74 | "\n", 75 | "# Data and signal\n", 76 | "\n", 77 | "np.random.seed(0)\n", 78 | "tau = -2.0\n", 79 | "beta = -1 / tau\n", 80 | "data = np.random.exponential(beta, 300)\n", 81 | "peak = np.random.normal(1.2, 0.1, 25)\n", 82 | "data = np.concatenate((data, peak))\n", 83 | "data = data[(data > bounds[0]) & (data < bounds[1])]" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": { 90 | "pycharm": { 91 | "name": "#%%\n" 92 | } 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "pltdist(data, bins=80, bounds=bounds)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": { 103 | "pycharm": { 104 | "name": "#%%\n" 105 | } 106 | }, 107 | "outputs": [], 108 | "source": [ 109 | "obs = zfit.Space(\"x\", limits=bounds)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": { 116 | "pycharm": { 117 | "name": "#%%\n" 118 | } 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "lambda_ = zfit.Parameter(\"lambda\", -2.0, -4.0, -1.0)\n", 123 | "Nsig = zfit.Parameter(\"Nsig\", 20.0, -20.0, len(data))\n", 124 | "Nbkg = zfit.Parameter(\"Nbkg\", len(data), 0.0, len(data) * 1.1)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": { 131 | "pycharm": { 132 | "name": "#%%\n" 133 | } 134 | }, 135 | "outputs": [], 136 | "source": [ 137 | "signal = zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1).create_extended(Nsig)\n", 138 | "background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg)\n", 139 | "tot_model = zfit.pdf.SumPDF([signal, background])" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": { 146 | "pycharm": { 147 | "name": "#%%\n" 148 | } 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "# Create the negative log likelihood\n", 153 | "data_ = zfit.data.Data.from_numpy(obs=obs, array=data)\n", 154 | "nll = ExtendedUnbinnedNLL(model=[tot_model], data=[data_])" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": { 161 | "pycharm": { 162 | "name": "#%%\n" 163 | } 164 | }, 165 | "outputs": [], 166 | "source": [ 167 | "# Instantiate a minuit minimizer\n", 168 | "minimizer = Minuit()" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": { 175 | "pycharm": { 176 | "name": "#%%\n" 177 | } 178 | }, 179 | "outputs": [], 180 | "source": [ 181 | "# minimisation of the loss function\n", 182 | "minimum = minimizer.minimize(loss=nll)\n", 183 | "minimum.hesse()" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": { 190 | "pycharm": { 191 | "name": "#%%\n" 192 | } 193 | }, 194 | "outputs": [], 195 | "source": [ 196 | "nbins = 80\n", 197 | "pltdist(data, nbins, bounds)\n", 198 | "plotfitresult(tot_model, bounds, nbins)\n", 199 | "plt.xlabel(\"m [GeV/c$^2$]\")\n", 200 | "plt.ylabel(\"number of events\")" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": { 206 | "pycharm": { 207 | "name": "#%% md\n" 208 | } 209 | }, 210 | "source": [ 211 | "### Discovery test\n", 212 | "\n", 213 | "In a discovery test the null hypothesis is the absence of signal, .i.e Nsig = 0." 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "metadata": { 220 | "pycharm": { 221 | "name": "#%%\n" 222 | } 223 | }, 224 | "outputs": [], 225 | "source": [ 226 | "# instantation of the calculator\n", 227 | "calculator = AsymptoticCalculator(nll, minimizer)\n", 228 | "calculator.bestfit = minimum # optionnal" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": { 235 | "pycharm": { 236 | "name": "#%%\n" 237 | } 238 | }, 239 | "outputs": [], 240 | "source": [ 241 | "# parameter of interest of the null hypothesis\n", 242 | "poinull = POI(Nsig, 0)" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": { 249 | "pycharm": { 250 | "name": "#%%\n" 251 | } 252 | }, 253 | "outputs": [], 254 | "source": [ 255 | "# instantation of the discovery test\n", 256 | "discovery_test = Discovery(calculator, poinull)" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": null, 262 | "metadata": { 263 | "pycharm": { 264 | "name": "#%%\n" 265 | } 266 | }, 267 | "outputs": [], 268 | "source": [ 269 | "pnull, significance = discovery_test.result()" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": { 276 | "pycharm": { 277 | "name": "#%%\n" 278 | } 279 | }, 280 | "outputs": [], 281 | "source": [] 282 | } 283 | ], 284 | "metadata": { 285 | "kernelspec": { 286 | "display_name": "Python 3", 287 | "language": "python", 288 | "name": "python3" 289 | }, 290 | "language_info": { 291 | "codemirror_mode": { 292 | "name": "ipython", 293 | "version": 3 294 | }, 295 | "file_extension": ".py", 296 | "mimetype": "text/x-python", 297 | "name": "python", 298 | "nbconvert_exporter": "python", 299 | "pygments_lexer": "ipython3", 300 | "version": "3.7.7" 301 | } 302 | }, 303 | "nbformat": 4, 304 | "nbformat_minor": 4 305 | } 306 | -------------------------------------------------------------------------------- /notebooks/hypotests/toys/FC_toys_-1.0.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_-1.0.yml -------------------------------------------------------------------------------- /notebooks/hypotests/toys/FC_toys_-2.0.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_-2.0.npz -------------------------------------------------------------------------------- /notebooks/hypotests/toys/FC_toys_-2.0.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_-2.0.yml -------------------------------------------------------------------------------- /notebooks/hypotests/toys/FC_toys_-3.0.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_-3.0.yml -------------------------------------------------------------------------------- /notebooks/hypotests/toys/FC_toys_-4.0.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_-4.0.yml -------------------------------------------------------------------------------- /notebooks/hypotests/toys/FC_toys_-5.0.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_-5.0.yml -------------------------------------------------------------------------------- /notebooks/hypotests/toys/FC_toys_-6.0.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_-6.0.yml -------------------------------------------------------------------------------- /notebooks/hypotests/toys/FC_toys_0.0.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_0.0.yml -------------------------------------------------------------------------------- /notebooks/hypotests/toys/FC_toys_1.0.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_1.0.yml -------------------------------------------------------------------------------- /notebooks/hypotests/toys/FC_toys_2.0.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_2.0.yml -------------------------------------------------------------------------------- /notebooks/hypotests/toys/FC_toys_3.0.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_3.0.yml -------------------------------------------------------------------------------- /notebooks/hypotests/toys/FC_toys_4.0.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_4.0.yml -------------------------------------------------------------------------------- /notebooks/hypotests/toys/FC_toys_5.0.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_5.0.yml -------------------------------------------------------------------------------- /notebooks/hypotests/toys/FC_toys_6.0.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/FC_toys_6.0.yml -------------------------------------------------------------------------------- /notebooks/hypotests/toys/ci_freq_zfit_toys.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/ci_freq_zfit_toys.yml -------------------------------------------------------------------------------- /notebooks/hypotests/toys/discovery_freq_zfit_toys.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/discovery_freq_zfit_toys.yml -------------------------------------------------------------------------------- /notebooks/hypotests/toys/upperlimit_freq_zfit_toys.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/hypotests/toys/upperlimit_freq_zfit_toys.yml -------------------------------------------------------------------------------- /notebooks/hypotests/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | 7 | def pltdist(data, bins, bounds): 8 | y, bin_edges = np.histogram(data, bins=bins, range=bounds) 9 | bin_centers = 0.5 * (bin_edges[1:] + bin_edges[:-1]) 10 | yerr = np.sqrt(y) 11 | plt.errorbar(bin_centers, y, yerr=yerr, fmt=".", color="royalblue") 12 | 13 | 14 | def plotfitresult(model, bounds, nbins): 15 | x = np.linspace(*bounds, num=1000) 16 | if model.is_extended: 17 | pdf = model.ext_pdf(x, norm_range=bounds) * ((bounds[1] - bounds[0]) / nbins) 18 | else: 19 | pdf = model.pdf(x, norm_range=bounds) 20 | plt.plot(x, pdf, "-r", label="fit result") 21 | 22 | 23 | def plotlimit(ul, alpha=0.05, CLs=True, ax=None): 24 | """ 25 | plot pvalue scan for different values of a parameter of interest (observed, expected and +/- sigma bands) 26 | 27 | Args: 28 | ul: UpperLimit instance 29 | alpha (float, default=0.05): significance level 30 | CLs (bool, optional): if `True` uses pvalues as $$p_{cls}=p_{null}/p_{alt}=p_{clsb}/p_{clb}$$ 31 | else as $$p_{clsb} = p_{null}$ 32 | ax (matplotlib axis, optionnal) 33 | 34 | """ 35 | if ax is None: 36 | ax = plt.gca() 37 | 38 | poivalues = ul.poinull.values 39 | pvalues = ul.pvalues(CLs=CLs) 40 | 41 | if CLs: 42 | cls_clr = "r" 43 | clsb_clr = "b" 44 | else: 45 | cls_clr = "b" 46 | clsb_clr = "r" 47 | 48 | color_1sigma = "mediumseagreen" 49 | color_2sigma = "gold" 50 | 51 | ax.plot( 52 | poivalues, 53 | pvalues["cls"], 54 | label="Observed CL$_{s}$", 55 | marker=".", 56 | color="k", 57 | markerfacecolor=cls_clr, 58 | markeredgecolor=cls_clr, 59 | linewidth=2.0, 60 | ms=11, 61 | ) 62 | 63 | ax.plot( 64 | poivalues, 65 | pvalues["clsb"], 66 | label="Observed CL$_{s+b}$", 67 | marker=".", 68 | color="k", 69 | markerfacecolor=clsb_clr, 70 | markeredgecolor=clsb_clr, 71 | linewidth=2.0, 72 | ms=11, 73 | linestyle=":", 74 | ) 75 | 76 | ax.plot( 77 | poivalues, 78 | pvalues["clb"], 79 | label="Observed CL$_{b}$", 80 | marker=".", 81 | color="k", 82 | markerfacecolor="k", 83 | markeredgecolor="k", 84 | linewidth=2.0, 85 | ms=11, 86 | ) 87 | 88 | ax.plot( 89 | poivalues, 90 | pvalues["expected"], 91 | label="Expected CL$_{s}-$Median", 92 | color="k", 93 | linestyle="--", 94 | linewidth=1.5, 95 | ms=10, 96 | ) 97 | 98 | ax.plot( 99 | [poivalues[0], poivalues[-1]], 100 | [alpha, alpha], 101 | color="r", 102 | linestyle="-", 103 | linewidth=1.5, 104 | ) 105 | 106 | ax.fill_between( 107 | poivalues, 108 | pvalues["expected"], 109 | pvalues["expected_p1"], 110 | facecolor=color_1sigma, 111 | label="Expected CL$_{s} \\pm 1 \\sigma$", 112 | alpha=0.8, 113 | ) 114 | 115 | ax.fill_between( 116 | poivalues, 117 | pvalues["expected"], 118 | pvalues["expected_m1"], 119 | facecolor=color_1sigma, 120 | alpha=0.8, 121 | ) 122 | 123 | ax.fill_between( 124 | poivalues, 125 | pvalues["expected_p1"], 126 | pvalues["expected_p2"], 127 | facecolor=color_2sigma, 128 | label="Expected CL$_{s} \\pm 2 \\sigma$", 129 | alpha=0.8, 130 | ) 131 | 132 | ax.fill_between( 133 | poivalues, 134 | pvalues["expected_m1"], 135 | pvalues["expected_m2"], 136 | facecolor=color_2sigma, 137 | alpha=0.8, 138 | ) 139 | 140 | ax.set_ylim(-0.01, 1.1) 141 | ax.set_ylabel("p-value") 142 | ax.set_xlabel("parameter of interest") 143 | ax.legend(loc="best", fontsize=14) 144 | 145 | return ax 146 | 147 | 148 | def one_minus_cl_plot(x, pvalues, alpha=None, ax=None): 149 | if alpha is None: 150 | alpha = [0.32] 151 | if ax is None: 152 | ax = plt.gca() 153 | 154 | ax.plot(x, pvalues, ".--") 155 | for a in alpha: 156 | ax.axhline(a, color="red", label="$\\alpha = " + str(a) + "$") 157 | ax.set_ylabel("1-CL") 158 | 159 | return ax 160 | -------------------------------------------------------------------------------- /notebooks/modeling/bayesian_blocks.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Histogramming With Bayesian Blocks\n", 8 | "\n", 9 | "The Bayesian Block algorithm, originally developed for applications in astronomy, can be used to improve the binning of histograms in high energy physics (HEP). The visual improvement can be dramatic, and more importantly, this algorithm produces histograms that accurately represent the underlying distribution while being robust to statistical fluctuations. The key concept behind Bayesian Blocks is that variable-width bins are determined for a given distribution, such that the data within each bin is consistent with a uniform distribution across the range of that bin. This reduces the appearance of statistical fluctuations while still capturing the form of the underlying distribution.\n", 10 | "\n", 11 | "For more information on the algorithm and implementation, see:\n", 12 | "\n", 13 | "1. [Bayesian Blocks Algorithm, Scargle et al.](https://arxiv.org/pdf/1207.5578.pdf)\n", 14 | "2. [Bayesian Blocks in HEP, Pollack et al.](https://arxiv.org/pdf/1708.00810.pdf)" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "## Using Bayesian Blocks Binning\n", 22 | "\n", 23 | "Bayesian Blocks binning options are available as part of `hepstats/modeling` package. Below is a simple example:" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "from __future__ import annotations\n", 33 | "\n", 34 | "import matplotlib.pyplot as plt\n", 35 | "import numpy as np\n", 36 | "\n", 37 | "from hepstats.modeling import bayesian_blocks\n", 38 | "\n", 39 | "%matplotlib inline" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "plt.rcParams[\"figure.figsize\"] = (8, 6)\n", 49 | "plt.rcParams[\"font.size\"] = 16" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "np.random.seed(1001)\n", 59 | "data = np.random.laplace(size=10000)\n", 60 | "blocks = bayesian_blocks(data)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "plt.hist(data, bins=1000, label=\"Fine Binning\", density=True, alpha=0.6)\n", 70 | "plt.hist(\n", 71 | " data,\n", 72 | " bins=blocks,\n", 73 | " label=\"Bayesian Blocks\",\n", 74 | " histtype=\"step\",\n", 75 | " density=True,\n", 76 | " linewidth=2,\n", 77 | ")\n", 78 | "plt.legend(loc=2);" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "For appropriate visualization, one should typically also use `density=True`. This divides each bin by its width, which is important for capturing the overall shape of the underlying distribution. Without using this argument, the histogram will look jagged (a consequence of using variable-width binning)." 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "fig, axes = plt.subplots(ncols=2, figsize=(12, 5))\n", 95 | "axes[0].hist(data, bins=blocks, label=\"Bayesian Blocks\", histtype=\"step\", linewidth=2)\n", 96 | "axes[0].set_title(\"Unscaled\")\n", 97 | "axes[1].hist(\n", 98 | " data,\n", 99 | " bins=blocks,\n", 100 | " label=\"Bayesian Blocks\",\n", 101 | " histtype=\"step\",\n", 102 | " density=True,\n", 103 | " linewidth=2,\n", 104 | ")\n", 105 | "axes[1].set_title(\"Scaled by bin width\")\n", 106 | "plt.show()" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "The user has control over an additional parameter to determine how many bins are generated by the Bayesian Blocks algorithm. The `p0` parameter (valid between 0 and 1) determines how strictly the algorithm determines bin edges. A small `p0` will be more robust to statistical fluctuations in the data, but could be overly coarse. Conversely, a large `p0` will result in a finer binning, but could isolate spurious fluctuations in the data." 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 12))\n", 123 | "\n", 124 | "axes[0][0].hist(\n", 125 | " data,\n", 126 | " bins=bayesian_blocks(data, p0=1e-50),\n", 127 | " label=\"Bayesian Blocks\",\n", 128 | " histtype=\"step\",\n", 129 | " density=True,\n", 130 | " linewidth=2,\n", 131 | ")\n", 132 | "axes[0][0].set_title(\"p0=1e-50\")\n", 133 | "\n", 134 | "axes[0][1].hist(\n", 135 | " data,\n", 136 | " bins=bayesian_blocks(data, p0=1e-5),\n", 137 | " label=\"Bayesian Blocks\",\n", 138 | " histtype=\"step\",\n", 139 | " density=True,\n", 140 | " linewidth=2,\n", 141 | ")\n", 142 | "axes[0][1].set_title(\"p0=1e-5\")\n", 143 | "\n", 144 | "axes[1][0].hist(\n", 145 | " data,\n", 146 | " bins=bayesian_blocks(data, p0=1e-3),\n", 147 | " label=\"Bayesian Blocks\",\n", 148 | " histtype=\"step\",\n", 149 | " density=True,\n", 150 | " linewidth=2,\n", 151 | ")\n", 152 | "axes[1][0].set_title(\"p0=1e-3\")\n", 153 | "\n", 154 | "axes[1][1].hist(\n", 155 | " data,\n", 156 | " bins=bayesian_blocks(data, p0=0.5),\n", 157 | " label=\"Bayesian Blocks\",\n", 158 | " histtype=\"step\",\n", 159 | " density=True,\n", 160 | " linewidth=2,\n", 161 | ")\n", 162 | "axes[1][1].set_title(\"p0=0.5\")\n", 163 | "\n", 164 | "fig.suptitle(\"Varying the p0 parameter\")\n", 165 | "plt.show()" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "The optimal value of `p0` differs, depending on the number of data points and the nature of the underlying distribution. It typically must be determined empirically, but in general the value of `p0` should be inversely proportional the size of the input dataset." 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "## Comparison with Other Binning Methods\n", 180 | "\n", 181 | "Because Bayesian Blocks determines variable-width binning, the algorithm can provide a more optimal set of bins for a given distribution, especially if that distribution varies greatly in density. Below are some examples of Bayesian Blocks and other popular binning methods.\n", 182 | "\n", 183 | "**A rapidly falling distribution:**\n", 184 | "![Jet Pt Spectrum](./hists_jPT.png)\n", 185 | "\n", 186 | "**An asymmetric, peaked distribution:**\n", 187 | "![Muon Pt Spectrum](./hists_MuPT.png)\n", 188 | "\n", 189 | "**Two peaks of different widths:**\n", 190 | "![Double Laplacians](./hists_2LP.png)\n", 191 | "\n", 192 | "\n", 193 | "*Brian Pollack, 2018*" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [] 202 | } 203 | ], 204 | "metadata": { 205 | "kernelspec": { 206 | "display_name": "Python 3", 207 | "language": "python", 208 | "name": "python3" 209 | }, 210 | "language_info": { 211 | "codemirror_mode": { 212 | "name": "ipython", 213 | "version": 3 214 | }, 215 | "file_extension": ".py", 216 | "mimetype": "text/x-python", 217 | "name": "python", 218 | "nbconvert_exporter": "python", 219 | "pygments_lexer": "ipython3", 220 | "version": "3.7.6" 221 | } 222 | }, 223 | "nbformat": 4, 224 | "nbformat_minor": 4 225 | } 226 | -------------------------------------------------------------------------------- /notebooks/modeling/bayesian_blocks_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/modeling/bayesian_blocks_example.png -------------------------------------------------------------------------------- /notebooks/modeling/hists_2LP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/modeling/hists_2LP.png -------------------------------------------------------------------------------- /notebooks/modeling/hists_MuPT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/modeling/hists_MuPT.png -------------------------------------------------------------------------------- /notebooks/modeling/hists_jPT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/notebooks/modeling/hists_jPT.png -------------------------------------------------------------------------------- /notebooks/splots/splot_example_2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Generate and visualize toy data sets" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from __future__ import annotations\n", 17 | "\n", 18 | "import numpy as np\n", 19 | "import zfit\n", 20 | "from matplotlib import pyplot as plt\n", 21 | "from scipy.stats import expon, norm\n", 22 | "\n", 23 | "zfit.settings.set_seed(10) # fix seed\n", 24 | "\n", 25 | "bounds = (0, 10)\n", 26 | "obs = zfit.Space(\"x\", limits=bounds)\n", 27 | "\n", 28 | "# true parameters for signal and background\n", 29 | "truth_n_sig = 1000\n", 30 | "Nsig = zfit.Parameter(\"Nsig\", truth_n_sig)\n", 31 | "mean_sig = zfit.Parameter(\"mean_sig\", 5.0)\n", 32 | "sigma_sig = zfit.Parameter(\"sigma_sig\", 0.5)\n", 33 | "sig_pdf = zfit.pdf.Gauss(obs=obs, mu=mean_sig, sigma=sigma_sig).create_extended(Nsig)\n", 34 | "\n", 35 | "truth_n_bkg = 10000\n", 36 | "Nbkg = zfit.Parameter(\"Nbkg\", truth_n_bkg)\n", 37 | "lambda_bkg = zfit.Parameter(\"lambda_bkg\", -1 / 4.0)\n", 38 | "bkg_pdf = zfit.pdf.Exponential(obs=obs, lambda_=lambda_bkg).create_extended(Nbkg)\n", 39 | "\n", 40 | "truth_sig_t = (1.0,)\n", 41 | "truth_bkg_t = (2.5, 2.0)\n", 42 | "\n", 43 | "# make a data set\n", 44 | "m_sig = sig_pdf.sample(truth_n_sig).numpy()\n", 45 | "m_bkg = bkg_pdf.sample(truth_n_bkg).numpy()\n", 46 | "m = np.concatenate([m_sig, m_bkg]).flatten()\n", 47 | "\n", 48 | "# fill t variables\n", 49 | "t_sig = expon(0, *truth_sig_t).rvs(truth_n_sig)\n", 50 | "t_bkg = norm(*truth_bkg_t).rvs(truth_n_bkg)\n", 51 | "t = np.concatenate([t_sig, t_bkg])\n", 52 | "\n", 53 | "# cut out range (0, 10) in m, t\n", 54 | "ma = (bounds[0] < t) & (t < bounds[1])\n", 55 | "m = m[ma]\n", 56 | "t = t[ma]\n", 57 | "\n", 58 | "fig, ax = plt.subplots(1, 3, figsize=(16, 4.5))\n", 59 | "ax[0].hist2d(m, t, bins=(50, 50))\n", 60 | "ax[0].set_xlabel(\"m\")\n", 61 | "ax[0].set_ylabel(\"t\")\n", 62 | "ax[1].hist([m_bkg, m_sig], bins=50, stacked=True, label=(\"background\", \"signal\"))\n", 63 | "ax[1].set_xlabel(\"m\")\n", 64 | "ax[1].legend()\n", 65 | "ax[2].hist(\n", 66 | " (t[truth_n_sig:], t[:truth_n_sig]),\n", 67 | " bins=50,\n", 68 | " stacked=True,\n", 69 | " label=(\"background\", \"signal\"),\n", 70 | ")\n", 71 | "ax[2].set_xlabel(\"t\")\n", 72 | "ax[2].legend()\n", 73 | "\n", 74 | "sorter = np.argsort(m)\n", 75 | "m = m[sorter]\n", 76 | "t = t[sorter]" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "# Fit toy data set" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "from zfit.loss import ExtendedUnbinnedNLL\n", 93 | "from zfit.minimize import Minuit\n", 94 | "\n", 95 | "tot_pdf = zfit.pdf.SumPDF([sig_pdf, bkg_pdf])\n", 96 | "loss = ExtendedUnbinnedNLL(model=tot_pdf, data=zfit.data.Data.from_numpy(obs=obs, array=m))\n", 97 | "\n", 98 | "minimizer = Minuit()\n", 99 | "\n", 100 | "minimum = minimizer.minimize(loss=loss)\n", 101 | "minimum.hesse()" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "## Visualize fitted model" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "from utils import plotfitresult, pltdist\n", 118 | "\n", 119 | "fig = plt.figure(figsize=(8, 5.5))\n", 120 | "\n", 121 | "nbins = 80\n", 122 | "pltdist(m, nbins, bounds)\n", 123 | "plotfitresult(tot_pdf, bounds, nbins, label=\"total model\", color=\"crimson\")\n", 124 | "plotfitresult(bkg_pdf, bounds, nbins, label=\"background\", color=\"forestgreen\")\n", 125 | "plotfitresult(sig_pdf, bounds, nbins, label=\"signal\", color=\"orange\")\n", 126 | "plt.xlabel(\"m\")\n", 127 | "plt.ylabel(\"number of events\")\n", 128 | "plt.legend();" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "## Compute sWeights" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "from hepstats.splot import compute_sweights\n", 145 | "\n", 146 | "weights = compute_sweights(tot_pdf, m)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "fig, ax = plt.subplots(1, 2, figsize=(16, 4.5))\n", 156 | "plt.sca(ax[0])\n", 157 | "nbins = 40\n", 158 | "\n", 159 | "plt.plot(m, weights[Nsig], label=\"$w_\\\\mathrm{sig}$\")\n", 160 | "plt.plot(m, weights[Nbkg], label=\"$w_\\\\mathrm{bkg}$\")\n", 161 | "plt.plot(m, weights[Nsig] + weights[Nbkg], \"-k\")\n", 162 | "plt.axhline(0, color=\"0.5\")\n", 163 | "plt.legend()\n", 164 | "plt.sca(ax[1])\n", 165 | "\n", 166 | "plt.hist(t, bins=nbins, range=bounds, weights=weights[Nsig], label=\"weighted histogram\")\n", 167 | "plt.hist(t_sig, bins=nbins, range=bounds, histtype=\"step\", label=\"true histogram\")\n", 168 | "t1 = np.linspace(*bounds, nbins)\n", 169 | "tcdf = expon(0, 1).pdf(t1) * np.sum(weights[Nsig]) * (bounds[1] - bounds[0]) / nbins\n", 170 | "plt.plot(t1, tcdf, label=\"model with $\\\\lambda_\\\\mathrm{sig}$\")\n", 171 | "plt.xlabel(\"t\")\n", 172 | "plt.legend();" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "np.average(t, weights=weights[Nsig])" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "np.average(t_sig)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [] 199 | } 200 | ], 201 | "metadata": { 202 | "kernelspec": { 203 | "display_name": "Python 3", 204 | "language": "python", 205 | "name": "python3" 206 | }, 207 | "language_info": { 208 | "codemirror_mode": { 209 | "name": "ipython", 210 | "version": 3 211 | }, 212 | "file_extension": ".py", 213 | "mimetype": "text/x-python", 214 | "name": "python", 215 | "nbconvert_exporter": "python", 216 | "pygments_lexer": "ipython3", 217 | "version": "3.7.6" 218 | } 219 | }, 220 | "nbformat": 4, 221 | "nbformat_minor": 4 222 | } 223 | -------------------------------------------------------------------------------- /notebooks/splots/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import zfit 6 | 7 | 8 | def pltdist(data, bins, bounds, weights=None, label=None): 9 | y, bin_edges = np.histogram(data, bins=bins, range=bounds, weights=weights) 10 | bin_centers = 0.5 * (bin_edges[1:] + bin_edges[:-1]) 11 | yerr = np.sqrt(y) 12 | plt.errorbar(bin_centers, y, yerr=yerr, fmt=".", color="royalblue", label=label) 13 | 14 | 15 | def plotfitresult(model, bounds, nbins, **kwargs): 16 | x = np.linspace(*bounds, num=1000) 17 | pdf = zfit.run(model.pdf(x, norm_range=bounds) * model.get_yield()) 18 | plt.plot(x, ((bounds[1] - bounds[0]) / nbins) * (pdf), **kwargs) 19 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling", "hatch-vcs"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "hepstats" 7 | description = "HEP statistics tools and utilities" 8 | authors = [{ name = "Matthieu Marinangeli", email = "matthieu.marinangeli@gmail.com" }] 9 | maintainers = [{ name = "Scikit-HEP", email = "scikit-hep-admins@googlegroups.com" }] 10 | license = { text = "BSD 3-Clause License" } 11 | classifiers = [ 12 | "Development Status :: 4 - Beta", 13 | "Intended Audience :: Developers", 14 | "Intended Audience :: Information Technology", 15 | "Intended Audience :: Science/Research", 16 | "License :: OSI Approved :: BSD License", 17 | "Operating System :: MacOS", 18 | "Operating System :: Microsoft :: Windows", 19 | "Operating System :: POSIX", 20 | "Operating System :: Unix", 21 | "Programming Language :: C++", 22 | "Programming Language :: Python", 23 | "Programming Language :: Python :: 3", 24 | "Programming Language :: Python :: 3 :: Only", 25 | "Programming Language :: Python :: 3.9", 26 | "Programming Language :: Python :: 3.10", 27 | "Programming Language :: Python :: 3.11", 28 | "Programming Language :: Python :: 3.12", 29 | "Programming Language :: Python :: 3.13", 30 | "Topic :: Scientific/Engineering", 31 | "Topic :: Scientific/Engineering :: Information Analysis", 32 | "Topic :: Scientific/Engineering :: Mathematics", 33 | "Topic :: Scientific/Engineering :: Physics", 34 | "Topic :: Software Development", 35 | "Topic :: Utilities", 36 | ] 37 | urls = { Homepage = "https://github.com/scikit-hep/hepstats" } 38 | requires-python = ">=3.9" 39 | dependencies = [ 40 | "asdf", 41 | "numpy", 42 | "pandas", 43 | "scipy", 44 | "tqdm", 45 | "uhi", 46 | ] 47 | dynamic = ["version"] 48 | 49 | [project.readme] 50 | file = "README.md" 51 | content-type = "text/markdown" 52 | 53 | [project.optional-dependencies] 54 | dev = [ 55 | "hepstats[docs]", 56 | "hepstats[test]", 57 | "pre-commit", 58 | ] 59 | docs = [ 60 | "matplotlib", 61 | "pydata-sphinx-theme", 62 | "sphinx>=3.1.2", 63 | "sphinx-autodoc-typehints", 64 | "sphinx-copybutton", 65 | "sphinxcontrib-bibtex>=2.0.0", 66 | ] 67 | doc = ["hepstats[docs]"] # alias 68 | test = [ 69 | "pytest", 70 | "pytest-cov", 71 | "pytest-runner", 72 | "zfit>=0.20.0;python_version<'3.13'", 73 | # 'hepstats[zfit];python_version<"3.13"', # not working, why? 74 | ] 75 | zfit = ["zfit>=0.20.0"] 76 | 77 | 78 | 79 | [tool.pytest.ini_options] 80 | junit_family = "xunit2" 81 | testpaths = ["tests"] 82 | 83 | [tool.check-manifest] 84 | ignore = ["src/hepstats/_version.py"] 85 | 86 | [tool.build_sphinx] 87 | project = "hepstats" 88 | source-dir = "docs" 89 | build-dir = "docs/_build" 90 | all-files = "1" 91 | warning-is-error = "0" 92 | 93 | 94 | [tool.hatch] 95 | version.source = "vcs" 96 | build.hooks.vcs.version-file = "src/hepstats/_version.py" 97 | 98 | [tool.ruff] 99 | #src = ["src"] 100 | line-length = 120 101 | exclude = [ 102 | ".tox/*", 103 | "*/test*", 104 | "*/__init__.py", 105 | "*/_version.py", 106 | ] 107 | [tool.ruff.lint] 108 | extend-select = [ 109 | "B", # flake8-bugbear 110 | "I", # isort 111 | "ARG", # flake8-unused-arguments 112 | "C4", # flake8-comprehensions 113 | "EM", # flake8-errmsg 114 | "ICN", # flake8-import-conventions 115 | "G", # flake8-logging-format 116 | "PGH", # pygrep-hooks 117 | "PIE", # flake8-pie 118 | "PL", # pylint 119 | "PT", # flake8-pytest-style 120 | "PTH", # flake8-use-pathlib 121 | "RET", # flake8-return 122 | "RUF", # Ruff-specific 123 | "SIM", # flake8-simplify 124 | "T20", # flake8-print 125 | "UP", # pyupgrade 126 | "YTT", # flake8-2020 127 | "EXE", # flake8-executable 128 | "NPY", # NumPy specific rules 129 | "PD", # pandas-vet 130 | ] 131 | ignore = [ 132 | "UP007", # type annotation upgrade, breaks pydantic for Python 3.9 (remove once above) 133 | "PLR09", # Too many <...> 134 | "PLR2004", # Magic value used in comparison 135 | "ISC001", # Conflicts with formatter 136 | "RET505", # This is sometimes wanted, protets against accidental intendation 137 | "PD901", # "avoid using `df[...].values`" -> no, this is a very good name if there is only one df 138 | "PD011", # "replace `df[...].values` with `df[...].to_numpy()`" -> not yet, it's not deprecated. 139 | # Prefer to have a single way to access the data if we don't care about whether it's a numpy array or not. 140 | "PLW0603", # updating global variables with a function is bad, but we use it for 141 | "PLW2901", # "for loop overwritten by assignment" -> we use this to update the loop variable 142 | "PD013", # "melt over stack": df function, but triggers on tensors 143 | "NPY002", # "Use rnd generator in numpy" -> we use np.random for some legacy stuff but do use the new one where we can 144 | "T201", # "print used" -> we use print for displaying information in verbose mode 145 | 146 | ] 147 | isort.required-imports = ["from __future__ import annotations"] 148 | 149 | [tool.ruff.lint.per-file-ignores] 150 | "tests/**" = ["T20"] 151 | "noxfile.py" = ["T20"] 152 | -------------------------------------------------------------------------------- /src/hepstats/__init__.py: -------------------------------------------------------------------------------- 1 | from ._version import version as __version__ 2 | -------------------------------------------------------------------------------- /src/hepstats/hypotests/README.md: -------------------------------------------------------------------------------- 1 | # Hypotests 2 | 3 | This submodule provides tools to do hypothesis tests such as discovery test and computations of upper limits or confidence intervals. hepstats needs a fitting backend to perform computations such as [zfit](https://github.com/zfit/zfit). Any fitting library can be used if their API is compatible with hepstats (see [api checks](https://github.com/scikit-hep/hepstats/blob/main/hepstats/hypotests/fitutils/api_check.py)). 4 | 5 | We give here a simple example of a discovery test, using the [zfit](https://github.com/zfit/zfit) 6 | fitting package as backend, of a Gaussian signal with known mean and sigma over an exponential background. 7 | 8 | ```python 9 | >>> import zfit 10 | >>> from zfit.loss import ExtendedUnbinnedNLL 11 | >>> from zfit.minimize import Minuit 12 | 13 | >>> bounds = (0.1, 3.0) 14 | >>> obs = zfit.Space('x', limits=bounds) 15 | 16 | >>> bkg = np.random.exponential(0.5, 300) 17 | >>> peak = np.random.normal(1.2, 0.1, 25) 18 | >>> data = np.concatenate((bkg, peak)) 19 | >>> data = data[(data > bounds[0]) & (data < bounds[1])] 20 | >>> N = data.size 21 | >>> data = zfit.Data.from_numpy(obs=obs, array=data) 22 | 23 | >>> lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0) 24 | >>> Nsig = zfit.Parameter("Ns", 20., -20., N) 25 | >>> Nbkg = zfit.Parameter("Nbkg", N, 0., N*1.1) 26 | >>> signal = zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1).create_extended(Nsig) 27 | >>> background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg) 28 | >>> total = zfit.pdf.SumPDF([signal, background]) 29 | >>> loss = ExtendedUnbinnedNLL(model=total, data=data) 30 | 31 | >>> from hepstats.hypotests.calculators import AsymptoticCalculator 32 | >>> from hepstats.hypotests import Discovery 33 | >>> from hepstats.hypotests.parameters import POI 34 | 35 | >>> calculator = AsymptoticCalculator(input=loss, minimizer=Minuit()) 36 | >>> poinull = POI(Nsig, 0) 37 | >>> discovery_test = Discovery(calculator, poinull) 38 | >>> discovery_test.result() 39 | 40 | p_value for the Null hypothesis = 0.0007571045424956679 41 | Significance (in units of sigma) = 3.1719464825102244 42 | ``` 43 | 44 | The discovery test prints out the p-value and the significance of the null hypothesis to be rejected. 45 | -------------------------------------------------------------------------------- /src/hepstats/hypotests/__init__.py: -------------------------------------------------------------------------------- 1 | # Licensed under a 3-clause BSD style license, see LICENSE. 2 | """ 3 | Module for hypothesis tests, upper limits and confidence intervals calculations. 4 | """ 5 | 6 | # ----------------------------------------------------------------------------- 7 | # Import statements 8 | # ----------------------------------------------------------------------------- 9 | from .core import Discovery, UpperLimit, ConfidenceInterval 10 | -------------------------------------------------------------------------------- /src/hepstats/hypotests/calculators/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module defining the base class for the calculators for statistical tests based on the likelyhood ratio. 3 | 4 | Acronyms used in the code: 5 | * nll = negative log-likehood, which is the value of the `loss` attribute of a calculator; 6 | * obs = observed, i.e. measured on provided data. 7 | 8 | """ 9 | 10 | from .asymptotic_calculator import AsymptoticCalculator 11 | from .frequentist_calculator import FrequentistCalculator 12 | -------------------------------------------------------------------------------- /src/hepstats/hypotests/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .confidence_interval import ConfidenceInterval 2 | from .discovery import Discovery 3 | from .upperlimit import UpperLimit 4 | -------------------------------------------------------------------------------- /src/hepstats/hypotests/core/basetest.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from ..calculators.basecalculator import BaseCalculator 4 | from ..parameters import POI, POIarray 5 | 6 | """ 7 | Module defining the base class for hypothesis tests. 8 | """ 9 | 10 | 11 | class BaseTest: 12 | def __init__( 13 | self, 14 | calculator: BaseCalculator, 15 | poinull: POI | POIarray, 16 | poialt: POI | POIarray | None = None, 17 | ): 18 | """Base class for hypothesis tests. 19 | 20 | Args: 21 | calculator: calculator to use for computing the pvalues 22 | poinull: parameters of interest for the null hypothesis 23 | poialt: parameters of interest for the alternative hypothesis 24 | 25 | Raises: 26 | TypeError: if calculator is not a BaseCalculator instance 27 | """ 28 | 29 | if not isinstance(calculator, BaseCalculator): 30 | msg = "Invalid type, {0}, for calculator. Calculator required." 31 | raise TypeError(msg) 32 | self._calculator = calculator 33 | 34 | self.calculator.check_pois(poinull) 35 | if poialt: 36 | self.calculator.check_pois(poialt) 37 | self.calculator.check_pois_compatibility(poinull, poialt) 38 | 39 | self._poinull = poinull 40 | self._poialt = poialt 41 | 42 | @property 43 | def poinull(self): 44 | """ 45 | Returns the POI for the null hypothesis. 46 | """ 47 | return self._poinull 48 | 49 | @property 50 | def poialt(self): 51 | """ 52 | Returns the POI for the alternative hypothesis. 53 | """ 54 | return self._poialt 55 | 56 | @property 57 | def calculator(self): 58 | """ 59 | Returns the calculator. 60 | """ 61 | return self._calculator 62 | -------------------------------------------------------------------------------- /src/hepstats/hypotests/core/confidence_interval.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import warnings 4 | 5 | import numpy as np 6 | from scipy import interpolate 7 | 8 | from ..calculators import FrequentistCalculator 9 | from ..calculators.basecalculator import BaseCalculator 10 | from ..exceptions import POIRangeError 11 | from ..parameters import POIarray 12 | from .basetest import BaseTest 13 | 14 | 15 | class ConfidenceInterval(BaseTest): 16 | """Class for confidence interval calculation.""" 17 | 18 | def __init__(self, calculator: BaseCalculator, poinull: POIarray, qtilde: bool = False): 19 | """ 20 | Args: 21 | calculator: calculator to use for computing the pvalues. 22 | poinull: parameters of interest for the null hypothesis. 23 | qtilde: if `True` use the :math:`\\widetilde{q}` test statistics else (default) 24 | use the :math:`q` test statistic. 25 | 26 | Example with **zfit**: 27 | >>> import numpy as np 28 | >>> import zfit 29 | >>> from zfit.loss import ExtendedUnbinnedNLL 30 | >>> from zfit.minimize import Minuit 31 | >>> 32 | >>> bounds = (0.1, 3.0) 33 | >>> zfit.Space('x', limits=bounds) 34 | >>> 35 | >>> bkg = np.random.exponential(0.5, 300) 36 | >>> peak = np.random.normal(1.2, 0.1, 80) 37 | >>> data = np.concatenate((bkg, peak)) 38 | >>> data = data[(data > bounds[0]) & (data < bounds[1])] 39 | >>> N = data.size 40 | >>> data = zfit.data.Data.from_numpy(obs=obs, array=data) 41 | >>> 42 | >>> mean = zfit.Parameter("mean", 1.2, 0.5, 2.0) 43 | >>> sigma = zfit.Parameter("sigma", 0.1, 0.02, 0.2) 44 | >>> lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0) 45 | >>> Nsig = zfit.Parameter("Ns", 20., -20., N) 46 | >>> Nbkg = zfit.Parameter("Nbkg", N, 0., N*1.1) 47 | >>> signal = Nsig * zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma) 48 | >>> background = Nbkg * zfit.pdf.Exponential(obs=obs, lambda_=lambda_) 49 | >>> loss = ExtendedUnbinnedNLL(model=signal + background, data=data) 50 | >>> 51 | >>> from hepstats.hypotests.calculators import AsymptoticCalculator 52 | >>> from hepstats.hypotests import ConfidenceInterval 53 | >>> from hepstats.hypotests.parameters import POI, POIarray 54 | >>> 55 | >>> calculator = AsymptoticCalculator(loss, Minuit()) 56 | >>> poinull = POIarray(mean, np.linspace(1.15, 1.26, 100)) 57 | >>> ci = ConfidenceInterval(calculator, poinull) 58 | >>> ci.interval() 59 | Confidence interval on mean: 60 | 1.1810371356602791 < mean < 1.2156701172321935 at 68.0% C.L. 61 | """ 62 | super().__init__(calculator, poinull) 63 | 64 | self._qtilde = qtilde 65 | 66 | @property 67 | def qtilde(self) -> bool: 68 | """ 69 | Returns True if qtilde test statistic is used, else False. 70 | """ 71 | return self._qtilde 72 | 73 | def pvalues(self) -> np.ndarray: 74 | """ 75 | Returns p-values scanned for the values of the parameters of interest 76 | in the null hypothesis. 77 | 78 | Returns: 79 | Array of p-values for CLsb, CLs, expected (+/- sigma bands). 80 | """ 81 | 82 | poialt = None 83 | return self.calculator.pvalue(poinull=self.poinull, poialt=poialt, qtilde=self.qtilde, onesided=False)[0] 84 | 85 | def interval(self, alpha: float = 0.32, printlevel: int = 1) -> dict[str, float]: 86 | """ 87 | Returns the confidence level on the parameter of interest. 88 | 89 | Args: 90 | alpha: significance level. 91 | printlevel: if > 0 print the result. 92 | 93 | Returns: 94 | Dict of the values for the central, upper and lower bounds on the parameter of interest. 95 | 96 | """ 97 | 98 | bands = {} 99 | poinull = self.poinull 100 | observed = self.calculator.bestfit.params[poinull.parameter]["value"] 101 | bands["observed"] = observed 102 | 103 | if min(self.pvalues()) > alpha: 104 | msg = f"The minimum of the scanned p-values is {min(self.pvalues())} which is larger than the" 105 | msg += f" confidence level alpha = {alpha}. Try to increase the range of POI values." 106 | raise POIRangeError(msg) 107 | 108 | tck = interpolate.splrep(poinull.values, self.pvalues() - alpha, s=0) 109 | roots = np.array(interpolate.sproot(tck)) 110 | 111 | msg = f" bound on the POI `{poinull.name}` cannot not be interpolated." 112 | 113 | if roots.size > 2: 114 | msg_warn = "Multiple roots have been founds." 115 | if isinstance(self.calculator, FrequentistCalculator): 116 | msg_warn += " Try to increase the number of toys, 'ntoysnull', to reduce fluctuations." 117 | warnings.warn(msg_warn, stacklevel=2) 118 | 119 | lower_roots = roots[roots < observed] 120 | upper_roots = roots[roots > observed] 121 | 122 | if upper_roots.size == 0: 123 | msg = "Upper" + msg + " Try to increase the maximum POI value." 124 | raise POIRangeError(msg) 125 | bands["upper"] = max(upper_roots) 126 | 127 | if lower_roots.size == 0: 128 | if self.qtilde: 129 | bands["lower"] = 0.0 130 | else: 131 | msg = "Low" + msg + " Try to decrease the minimum POI value." 132 | raise POIRangeError(msg) 133 | else: 134 | bands["lower"] = min(lower_roots) 135 | 136 | if self.qtilde and bands["lower"] < 0.0: 137 | bands["lower"] = 0.0 138 | 139 | if printlevel > 0: 140 | msg = f"\nConfidence interval on {poinull.name}:\n" 141 | msg += f"\t{bands['lower']} < {poinull.name} < {bands['upper']} at {(1 - alpha) * 100:.1f}% C.L." 142 | 143 | return bands 144 | -------------------------------------------------------------------------------- /src/hepstats/hypotests/core/discovery.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from scipy.stats import norm 4 | 5 | from ..calculators.basecalculator import BaseCalculator 6 | from ..parameters import POI 7 | from .basetest import BaseTest 8 | 9 | 10 | class Discovery(BaseTest): 11 | """Class for discovery test.""" 12 | 13 | def __init__(self, calculator: BaseCalculator, poinull: POI): 14 | """ 15 | Args: 16 | calculator: calculator to use for computing the pvalues. 17 | poinull: parameter of interest for the null hypothesis. 18 | 19 | Example with **zfit**: 20 | >>> import zfit 21 | >>> from zfit.loss import ExtendedUnbinnedNLL 22 | >>> from zfit.minimize import Minuit 23 | >>> 24 | >>> bounds = (0.1, 3.0) 25 | >>> zfit.Space('x', limits=bounds) 26 | >>> 27 | >>> bkg = np.random.exponential(0.5, 300) 28 | >>> peak = np.random.normal(1.2, 0.1, 25) 29 | >>> data = np.concatenate((bkg, peak)) 30 | >>> data = data[(data > bounds[0]) & (data < bounds[1])] 31 | >>> N = data.size 32 | >>> data = zfit.data.Data.from_numpy(obs=obs, array=data) 33 | >>> 34 | >>> lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0) 35 | >>> Nsig = zfit.Parameter("Ns", 20., -20., N) 36 | >>> Nbkg = zfit.Parameter("Nbkg", N, 0., N*1.1) 37 | >>> signal = Nsig * zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1) 38 | >>> background = Nbkg * zfit.pdf.Exponential(obs=obs, lambda_=lambda_) 39 | >>> loss = ExtendedUnbinnedNLL(model=signal + background, data=data) 40 | >>> 41 | >>> from hepstats.hypotests.calculators import AsymptoticCalculator 42 | >>> from hepstats.hypotests import Discovery 43 | >>> from hepstats.hypotests.parameters import POI 44 | >>> 45 | >>> calculator = AsymptoticCalculator(loss, Minuit()) 46 | >>> poinull = POI(Nsig, 0) 47 | >>> discovery_test = Discovery(calculator, poinull) 48 | >>> discovery_test.result() 49 | p_value for the Null hypothesis = 0.0007571045424956679 50 | Significance (in units of sigma) = 3.1719464825102244 51 | """ 52 | 53 | super().__init__(calculator, poinull) 54 | 55 | def result(self, printlevel: int = 1) -> tuple[float, float]: 56 | """Return the result of the discovery hypothesis test. 57 | 58 | The result can be (0.0, inf), which means that the numerical precision is not high enough or that the 59 | number of toys is not large enough. For example if all toys are rejected, the result is (0.0, inf). 60 | 61 | Args: 62 | printlevel: if > 0 print the result. 63 | 64 | Returns: 65 | Tuple of the p-value for the null hypothesis and the significance. 66 | """ 67 | pnull, _ = self.calculator.pvalue(self.poinull, onesideddiscovery=True) 68 | pnull = pnull[0] 69 | 70 | significance = norm.ppf(1.0 - pnull) 71 | 72 | if printlevel > 0: 73 | pass 74 | 75 | return pnull, significance 76 | -------------------------------------------------------------------------------- /src/hepstats/hypotests/core/upperlimit.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import numpy as np 4 | from scipy import interpolate 5 | 6 | from ..calculators.basecalculator import BaseCalculator 7 | from ..exceptions import POIRangeError 8 | from ..parameters import POI, POIarray 9 | from .basetest import BaseTest 10 | 11 | 12 | class UpperLimit(BaseTest): 13 | """Class for upper limit calculation.""" 14 | 15 | def __init__( 16 | self, 17 | calculator: BaseCalculator, 18 | poinull: POI | POIarray, 19 | poialt: POI, 20 | qtilde: bool = False, 21 | ): 22 | """ 23 | Args: 24 | calculator: calculator to use for computing the pvalues. 25 | poinull: parameters of interest for the null hypothesis. 26 | poialt: parameters of interest for the alternative hypothesis. 27 | qtilde: if `True` use the :math:`\\widetilde{q}` test statistics else (default) use the :math:`q` 28 | test statistic. 29 | 30 | Example with **zfit**: 31 | >>> import numpy as np 32 | >>> import zfit 33 | >>> from zfit.loss import ExtendedUnbinnedNLL 34 | >>> from zfit.minimize import Minuit 35 | >>> 36 | >>> bounds = (0.1, 3.0) 37 | >>> zfit.Space('x', limits=bounds) 38 | >>> 39 | >>> bkg = np.random.exponential(0.5, 300) 40 | >>> peak = np.random.normal(1.2, 0.1, 10) 41 | >>> data = np.concatenate((bkg, peak)) 42 | >>> data = data[(data > bounds[0]) & (data < bounds[1])] 43 | >>> N = data.size 44 | >>> data = zfit.data.Data.from_numpy(obs=obs, array=data) 45 | >>> 46 | >>> lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0) 47 | >>> Nsig = zfit.Parameter("Ns", 20., -20., N) 48 | >>> Nbkg = zfit.Parameter("Nbkg", N, 0., N*1.1) 49 | >>> signal = Nsig * zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1) 50 | >>> background = Nbkg * zfit.pdf.Exponential(obs=obs, lambda_=lambda_) 51 | >>> loss = ExtendedUnbinnedNLL(model=signal + background, data=data) 52 | >>> 53 | >>> from hepstats.hypotests.calculators import AsymptoticCalculator 54 | >>> from hepstats.hypotests import UpperLimit 55 | >>> from hepstats.hypotests.parameters import POI, POIarray 56 | >>> 57 | >>> calculator = AsymptoticCalculator(loss, Minuit()) 58 | >>> poinull = POIarray(Nsig, np.linspace(0.0, 25, 20)) 59 | >>> poialt = POI(Nsig, 0) 60 | >>> ul = UpperLimit(calculator, poinull, poialt) 61 | >>> ul.upperlimit(alpha=0.05, CLs=True) 62 | Observed upper limit: Nsig = 15.725784747406346 63 | Expected upper limit: Nsig = 11.927442041887158 64 | Expected upper limit +1 sigma: Nsig = 16.596396280677116 65 | Expected upper limit -1 sigma: Nsig = 8.592750403611896 66 | Expected upper limit +2 sigma: Nsig = 22.24864429383046 67 | Expected upper limit -2 sigma: Nsig = 6.400549971360598 68 | """ 69 | 70 | super().__init__(calculator, poinull, poialt) 71 | 72 | self._qtilde = qtilde 73 | 74 | @property 75 | def qtilde(self) -> bool: 76 | """ 77 | Returns True if qtilde test statistic is used, else False. 78 | """ 79 | return self._qtilde 80 | 81 | def pvalues(self, CLs: int = True) -> dict[str, np.ndarray]: 82 | """ 83 | Returns p-values scanned for the values of the parameters of interest 84 | in the null hypothesis. 85 | 86 | Args: 87 | CLs: if `True` uses pvalues as :math:`p_{cls}=p_{null}/p_{alt}=p_{clsb}/p_{clb}` 88 | else as :math:`p_{clsb} = p_{null}`. 89 | 90 | Returns: 91 | Dictionary of p-values for CLsb, CLs, expected (+/- sigma bands). 92 | """ 93 | pvalue_func = self.calculator.pvalue 94 | 95 | pnull, palt = pvalue_func(poinull=self.poinull, poialt=self.poialt, qtilde=self.qtilde, onesided=True) 96 | 97 | pvalues = {"clsb": pnull, "clb": palt} 98 | 99 | sigmas = [0.0, 1.0, 2.0, -1.0, -2.0] 100 | 101 | exppvalue_func = self.calculator.expected_pvalue 102 | 103 | result = exppvalue_func( 104 | poinull=self.poinull, 105 | poialt=self.poialt, 106 | nsigma=sigmas, 107 | CLs=CLs, 108 | qtilde=self.qtilde, 109 | onesided=True, 110 | ) 111 | 112 | pvalues["expected"] = result[0] 113 | pvalues["expected_p1"] = result[1] 114 | pvalues["expected_p2"] = result[2] 115 | pvalues["expected_m1"] = result[3] 116 | pvalues["expected_m2"] = result[4] 117 | 118 | pvalues["cls"] = pnull / palt 119 | 120 | return pvalues 121 | 122 | def upperlimit(self, alpha: float = 0.05, CLs: bool = True, printlevel: int = 1) -> dict[str, float]: 123 | """ 124 | Returns the upper limit of the parameter of interest. 125 | 126 | Args: 127 | alpha: significance level. 128 | CLs: if `True` uses pvalues as :math:`p_{cls}=p_{null}/p_{alt}=p_{clsb}/p_{clb}` else as 129 | :math:`p_{clsb} = p_{null}`. 130 | printlevel: if > 0 print the result. 131 | 132 | Returns: 133 | Dictionnary of upper limits for observed, expected (+/- sigma bands). 134 | 135 | """ 136 | 137 | poinull = self.poinull 138 | 139 | # create a filter for -1 and -2 sigma expected limits 140 | bestfit = self.calculator.bestfit.params[poinull.parameter]["value"] 141 | filter = poinull.values >= bestfit 142 | 143 | observed_key = "cls" if CLs else "clsb" 144 | 145 | to_interpolate = [observed_key] + [f"expected{i}" for i in ["", "_p1", "_m1", "_p2", "_m2"]] 146 | 147 | limits: dict = {} 148 | 149 | all_pvalues = self.pvalues(CLs) 150 | for k in to_interpolate: 151 | pvalues = all_pvalues[k] 152 | values = poinull.values 153 | 154 | if k == observed_key: 155 | k = "observed" 156 | pvalues = pvalues[filter] 157 | values = values[filter] 158 | 159 | if min(pvalues) > alpha: 160 | if k in ["expected", "observed"]: 161 | msg = f"The minimum of the scanned p-values is {min(pvalues)} which is larger than the" 162 | msg += f" confidence level alpha = {alpha}. Try to increase the maximum POI value." 163 | raise POIRangeError(msg) 164 | 165 | limits[k] = None 166 | continue 167 | 168 | tck = interpolate.splrep(values, pvalues - alpha, s=0) 169 | root = interpolate.sproot(tck) 170 | 171 | if len(root) > 1: 172 | root = root[0] 173 | 174 | try: 175 | limits[k] = float(root) 176 | except TypeError: 177 | limits[k] = None 178 | 179 | if printlevel > 0: 180 | for sigma in ["+1", "-1", "+2", "-2"]: 181 | sigma.replace("+", "p").replace("-", "m") 182 | 183 | return limits 184 | -------------------------------------------------------------------------------- /src/hepstats/hypotests/exceptions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Specific exceptions for the `hypotests` submodule 3 | """ 4 | 5 | from __future__ import annotations 6 | 7 | 8 | class POIRangeError(Exception): 9 | """Exception class non adequate POI scan range""" 10 | 11 | def __init__(self, *args, **kwargs): 12 | Exception.__init__(self, *args, **kwargs) 13 | 14 | 15 | class ParameterNotFound(Exception): 16 | """Exception class raised if a parameter with a given name is not found""" 17 | 18 | def __init__(self, *args, **kwargs): 19 | Exception.__init__(self, *args, **kwargs) 20 | 21 | 22 | class FormatError(Exception): 23 | """Exception class raised when unexpected yaml format are read""" 24 | 25 | def __init__(self, *args, **kwargs): 26 | Exception.__init__(self, *args, **kwargs) 27 | -------------------------------------------------------------------------------- /src/hepstats/hypotests/parameters.py: -------------------------------------------------------------------------------- 1 | # Licensed under a 3-clause BSD style license, see LICENSE 2 | """ 3 | Module defining the parameter of interest classes, currently includes: 4 | 5 | * **POIarray** 6 | * **POI** 7 | """ 8 | 9 | from __future__ import annotations 10 | 11 | from collections.abc import Collection 12 | 13 | import numpy as np 14 | 15 | from ..utils.fit.api_check import is_valid_parameter 16 | 17 | 18 | class POIarray: 19 | """ 20 | Class for parameters of interest with multiple values: 21 | """ 22 | 23 | def __init__(self, parameter, values: Collection | np.array): 24 | """ 25 | Args: 26 | parameter: the parameter of interest 27 | values: values of the parameter of interest 28 | 29 | Raises: 30 | ValueError: if is_valid_parameter(parameter) returns False 31 | TypeError: if parameter is not an iterable 32 | 33 | Example with `zfit`: 34 | >>> Nsig = zfit.Parameter("Nsig") 35 | >>> poi = POIarray(Nsig, value=np.linspace(0,10,10)) 36 | """ 37 | 38 | if not is_valid_parameter(parameter): 39 | msg = f"{parameter} is not a valid parameter!" 40 | raise ValueError(msg) 41 | 42 | if not isinstance(values, Collection): 43 | msg = "A list/array of values of the POI is required." 44 | raise TypeError(msg) 45 | 46 | self.parameter = parameter 47 | self.name = parameter.name 48 | self._values = np.array(values, dtype=np.float64) 49 | self._ndim = 1 50 | self._shape = (len(values),) 51 | 52 | @property 53 | def values(self): 54 | """ 55 | Returns the values of the **POIarray**. 56 | """ 57 | return self._values 58 | 59 | def __repr__(self): 60 | return f"POIarray('{self.name}', values={self.values})" 61 | 62 | def __getitem__(self, i): 63 | """ 64 | Get the i-th element the array of values of the **POIarray**. 65 | """ 66 | return POI(self.parameter, self.values[i]) 67 | 68 | def __iter__(self): 69 | for v in self.values: 70 | yield POI(self.parameter, v) 71 | 72 | def __len__(self): 73 | return len(self.values) 74 | 75 | def __eq__(self, other): 76 | if not isinstance(other, POIarray): 77 | return NotImplemented 78 | 79 | if len(self) != len(other): 80 | return False 81 | 82 | values_equal = self.values == other.values 83 | name_equal = self.name == other.name 84 | return values_equal.all() and name_equal 85 | 86 | def __hash__(self): 87 | return hash((self.name, self.values.tostring())) 88 | 89 | @property 90 | def ndim(self): 91 | """ 92 | Returns the number of dimension of the **POIarray**. 93 | """ 94 | return self._ndim 95 | 96 | @property 97 | def shape(self): 98 | """ 99 | Returns the shape of the **POIarray**. 100 | """ 101 | return self._shape 102 | 103 | def append(self, values: int | float | Collection | np.ndarray): 104 | """ 105 | Append values in the **POIarray**. 106 | 107 | Args: 108 | values: values to append 109 | """ 110 | if not isinstance(values, Collection): 111 | values = [values] 112 | values = np.concatenate([self.values, values]) 113 | return POIarray(parameter=self.parameter, values=values) 114 | 115 | 116 | class POI(POIarray): 117 | """ 118 | Class for single value parameter of interest: 119 | """ 120 | 121 | def __init__(self, parameter, value: int | float): 122 | """ 123 | Args: 124 | parameter: the parameter of interest 125 | values: value of the parameter of interest 126 | 127 | Raises: 128 | TypeError: if value is an iterable 129 | 130 | Example with `zfit`: 131 | >>> Nsig = zfit.Parameter("Nsig") 132 | >>> poi = POI(Nsig, value=0) 133 | """ 134 | if isinstance(value, Collection): 135 | msg = "A single value for the POI is required." 136 | raise TypeError(msg) 137 | 138 | super().__init__(parameter=parameter, values=[value]) 139 | self._value = value 140 | 141 | @property 142 | def value(self): 143 | """ 144 | Returns the value of the **POI**. 145 | """ 146 | return self._value 147 | 148 | def __eq__(self, other): 149 | if not isinstance(other, POI): 150 | return NotImplemented 151 | 152 | value_equal = self.value == other.value 153 | name_equal = self.name == other.name 154 | return value_equal and name_equal 155 | 156 | def __repr__(self): 157 | return f"POI('{self.name}', value={self.value})" 158 | 159 | def __hash__(self): 160 | return hash((self.name, self.value)) 161 | 162 | 163 | def asarray(poi: POI) -> POIarray: 164 | """ 165 | Transforms a **POI** instance into a **POIarray** instance. 166 | 167 | Args: 168 | poi: the parameter of interest. 169 | """ 170 | return POIarray(parameter=poi.parameter, values=poi.values) 171 | -------------------------------------------------------------------------------- /src/hepstats/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Licensed under a 3-clause BSD style license, see LICENSE. 2 | """ 3 | Module for algorithms and methods used to model distributions. 4 | 5 | This module contains in particular: 6 | 7 | * Bayesian Blocks binning algorithm. 8 | """ 9 | 10 | # ----------------------------------------------------------------------------- 11 | # Import statements 12 | # ----------------------------------------------------------------------------- 13 | 14 | from .bayesian_blocks import bayesian_blocks 15 | -------------------------------------------------------------------------------- /src/hepstats/modeling/bayesian_blocks.py: -------------------------------------------------------------------------------- 1 | # Licensed under a 3-clause BSD style license, see LICENSE and LICENSE_ASTROML 2 | """ 3 | Bayesian Block implementation 4 | ============================= 5 | 6 | Dynamic programming algorithm for finding the optimal adaptive-width histogram. Modified from the 7 | bayesian blocks python implementation found in astroML :cite:`VanderPlas_2012`. 8 | 9 | * Based on Scargle et al 2012 :cite:`Scargle_2013` 10 | * Initial Python Implementation :cite:`BB_jakevdp` 11 | * Initial Examination in HEP context :cite:`Pollack:2017srh` 12 | 13 | """ 14 | 15 | from __future__ import annotations 16 | 17 | from collections.abc import Iterable 18 | 19 | import numpy as np 20 | import pandas as pd 21 | 22 | 23 | class Prior: 24 | """Helper class for calculating the prior on the fitness function.""" 25 | 26 | def __init__(self, p0: float = 0.05, gamma: float | None = None): 27 | """ 28 | Args: 29 | p0: False-positive rate, between 0 and 1. A lower number places a stricter penalty 30 | against creating more bin edges, thus reducing the potential for false-positive bin edges. In general, 31 | the larger the number of bins, the small the p0 should be to prevent the creation of spurious, jagged 32 | bins. Defaults to 0.05. 33 | 34 | gamma: If specified, then use this gamma to compute the general prior form, 35 | :math:`p \\sim \\gamma^N`. If gamma is specified, p0 is ignored. Defaults to None. 36 | """ 37 | 38 | self.p0 = p0 39 | self.gamma = gamma 40 | 41 | def calc(self, N: int) -> float: 42 | """ 43 | Computes the prior. 44 | 45 | Args: 46 | N: N-th change point. 47 | 48 | Returns: 49 | the prior. 50 | """ 51 | if self.gamma is not None: 52 | return -np.log(self.gamma) 53 | else: 54 | # eq. 21 from Scargle 2012 55 | return 4 - np.log(73.53 * self.p0 * (N**-0.478)) 56 | 57 | 58 | def bayesian_blocks( 59 | data: Iterable | np.ndarray, 60 | weights: Iterable | np.ndarray | None = None, 61 | p0: float = 0.05, 62 | gamma: float | None = None, 63 | ) -> np.ndarray: 64 | """Bayesian Blocks Implementation. 65 | 66 | This is a flexible implementation of the Bayesian Blocks algorithm described in :cite:`Scargle_2013`. 67 | It has been modified to natively accept weighted events, for ease of use in HEP applications. 68 | 69 | Args: 70 | data: Input data values (one dimensional, length N). Repeat values are allowed. 71 | 72 | weights: Weights for data (otherwise assume all data points have a weight of 1). 73 | Must be same length as data. Defaults to None. 74 | 75 | p0: False-positive rate, between 0 and 1. A lower number places a stricter penalty 76 | against creating more bin edges, thus reducing the potential for false-positive bin edges. In general, 77 | the larger the number of bins, the small the p0 should be to prevent the creation of spurious, jagged 78 | bins. Defaults to 0.05. 79 | 80 | gamma: If specified, then use this gamma to compute the general prior form, 81 | :math:`p \\sim \\gamma^N`. If gamma is specified, p0 is ignored. Defaults to None. 82 | 83 | Returns: 84 | Array containing the (N+1) bin edges 85 | 86 | Examples: 87 | Unweighted data: 88 | 89 | >>> d = np.random.normal(size=100) 90 | >>> bins = bayesian_blocks(d, p0=0.01) 91 | 92 | Unweighted data with repeats: 93 | 94 | >>> d = np.random.normal(size=100) 95 | >>> d[80:] = d[:20] 96 | >>> bins = bayesian_blocks(d, p0=0.01) 97 | 98 | Weighted data: 99 | 100 | >>> d = np.random.normal(size=100) 101 | >>> w = np.random.uniform(1,2, size=100) 102 | >>> bins = bayesian_blocks(d, w, p0=0.01) 103 | 104 | """ 105 | # validate input data 106 | data = np.asarray(data, dtype=float) 107 | assert data.ndim == 1 108 | 109 | # validate input weights 110 | # set them to 1 if not given 111 | weights = np.asarray(weights) if weights is not None else np.ones_like(data) 112 | 113 | # initialize the prior 114 | prior = Prior(p0, gamma) 115 | 116 | # Place data and weights into a DataFrame. 117 | # We want to sort the data array (without losing the associated weights), and combine duplicate 118 | # data points by summing their weights together. We can accomplish all this with `groupby` 119 | 120 | df = pd.DataFrame({"data": data, "weights": weights}) 121 | gb = df.groupby("data").sum() 122 | data = gb.index.values 123 | weights = gb.weights.values 124 | 125 | N = weights.size 126 | 127 | # create length-(N + 1) array of cell edges 128 | edges = np.concatenate([data[:1], 0.5 * (data[1:] + data[:-1]), data[-1:]]) 129 | block_length = data[-1] - edges 130 | 131 | # arrays to store the best configuration 132 | best = np.zeros(N, dtype=float) 133 | last = np.zeros(N, dtype=int) 134 | 135 | # ----------------------------------------------------------------- 136 | # Start with first data cell; add one cell at each iteration 137 | # ----------------------------------------------------------------- 138 | # last = core_loop(N, block_length, weights, fitfunc, best, last) 139 | for R in range(N): 140 | # Compute fit_vec : fitness of putative last block (end at R) 141 | 142 | # T_k: width/duration of each block 143 | T_k = block_length[: R + 1] - block_length[R + 1] 144 | 145 | # N_k: number of elements in each block 146 | N_k = np.cumsum(weights[: R + 1][::-1])[::-1] 147 | 148 | # evaluate fitness function 149 | fit_vec = N_k * (np.log(N_k / T_k)) 150 | 151 | # penalize function with prior 152 | A_R = fit_vec - prior.calc(R + 1) 153 | A_R[1:] += best[:R] 154 | 155 | i_max = np.argmax(A_R) 156 | last[R] = i_max 157 | best[R] = A_R[i_max] 158 | 159 | # ----------------------------------------------------------------- 160 | # Now find changepoints by iteratively peeling off the last block 161 | # ----------------------------------------------------------------- 162 | change_points = np.zeros(N, dtype=int) 163 | i_cp = N 164 | ind = N 165 | while True: 166 | i_cp -= 1 167 | change_points[i_cp] = ind 168 | if ind == 0: 169 | break 170 | ind = last[ind - 1] 171 | change_points = change_points[i_cp:] 172 | 173 | return edges[change_points] 174 | -------------------------------------------------------------------------------- /src/hepstats/splot/__init__.py: -------------------------------------------------------------------------------- 1 | # Licensed under a 3-clause BSD style license, see LICENSE. 2 | """ 3 | Module implementing the **sPlot** algorithm, see :cite:`Pivk:2004ty`. 4 | 5 | 6 | """ 7 | 8 | from .sweights import compute_sweights 9 | -------------------------------------------------------------------------------- /src/hepstats/splot/exceptions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Specific exceptions for the `splot` submodule 3 | """ 4 | 5 | from __future__ import annotations 6 | 7 | 8 | class ModelNotFittedToData(Exception): 9 | """Exception class for model not fitted to data provided to compute sweights""" 10 | -------------------------------------------------------------------------------- /src/hepstats/splot/sweights.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import warnings 4 | from typing import Any 5 | 6 | import numpy as np 7 | 8 | from ..utils import eval_pdf 9 | from ..utils.fit.api_check import is_valid_pdf 10 | from .exceptions import ModelNotFittedToData 11 | from .warnings import AboveToleranceWarning 12 | 13 | 14 | def is_sum_of_extended_pdfs(model) -> bool: 15 | """Checks if the input model is a sum of extended models. 16 | 17 | Args: 18 | model: the input model/pdf 19 | 20 | Returns: 21 | True if the model is a sum of extended models, False if not. 22 | """ 23 | if not hasattr(model, "get_models"): 24 | return False 25 | 26 | return all(m.is_extended for m in model.get_models()) and model.is_extended 27 | 28 | 29 | def compute_sweights(model, x: np.ndarray, *, atol_exceptions: float | None = None) -> dict[Any, np.ndarray]: 30 | """Computes sWeights from probability density functions for different components/species in a fit model 31 | (for instance signal and background) fitted on some data `x`. 32 | 33 | i.e. model = Nsig * pdf_signal + Nbkg * pdf_bkg 34 | 35 | Args: 36 | model: sum of extended pdfs. 37 | x: data on which `model` is fitted 38 | atol_exceptions: absolute tolerance to check if the Maximum Likelihood Sum Rule sanity check, 39 | described in equation 17 of arXiv:physics/0402083, failed. Sum of yields should be 1 with 40 | an absolute tolerance of `atol_exceptions`. 41 | 42 | Returns: 43 | dictionary with yield parameters as keys, and sWeights for correspoind species as values. 44 | 45 | Example with **zfit**: 46 | 47 | Imports: 48 | 49 | >>> import numpy as np 50 | >>> import zfit 51 | >>> from zfit.loss import ExtendedUnbinnedNLL 52 | >>> from zfit.minimize import Minuit 53 | 54 | Definition of the bounds and yield of background and signal species: 55 | 56 | >>> bounds = (0.0, 3.0) 57 | >>> nbkg = 10000 58 | >>> nsig = 5000 59 | >>> obs = zfit.Space('x', limits=bounds) 60 | 61 | Generation of data: 62 | 63 | >>> bkg = np.random.exponential(0.5, nbkg) 64 | >>> peak = np.random.normal(1.2, 0.1, nsig) 65 | >>> data = np.concatenate((bkg, peak)) 66 | >>> data = data[(data > bounds[0]) & (data < bounds[1])] 67 | >>> N = data.size 68 | >>> data = zfit.data.Data.from_numpy(obs=obs, array=data) 69 | 70 | Model definition: 71 | 72 | >>> mean = zfit.Parameter("mean", 1.2, 0.5, 2.0) 73 | >>> sigma = zfit.Parameter("sigma", 0.1, 0.02, 0.2) 74 | >>> lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0) 75 | >>> Nsig = zfit.Parameter("Nsig", nsig, 0., N) 76 | >>> Nbkg = zfit.Parameter("Nbkg", nbkg, 0., N) 77 | >>> signal = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma).create_extended(Nsig) 78 | >>> background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg) 79 | >>> tot_model = zfit.pdf.SumPDF([signal, background]) 80 | 81 | Loss construction and minimization: 82 | 83 | >>> loss = ExtendedUnbinnedNLL(model=signal + background, data=data) 84 | >>> minimizer = Minuit() 85 | >>> minimum = minimizer.minimize(loss) 86 | 87 | sWeights computation: 88 | 89 | >>> from hepstats.splot import compute_sweights 90 | >>> sweights = compute_sweights(tot_model, data) 91 | >>> print(sweights) 92 | {: array([-0.09953299, -0.09953299, -0.09953299, ..., 93 | 0.78689884, 1.08823111, 1.05948873]), 94 | : array([ 1.09953348, 1.09953348, 1.09953348, ..., 95 | 0.21310097, -0.08823153, -0.05948912])} 96 | """ 97 | 98 | if not is_valid_pdf(model): 99 | msg = f"{model} is not a valid pdf!" 100 | raise ValueError(msg) 101 | if not is_sum_of_extended_pdfs(model): 102 | msg = f"Input model, {model}, should be a sum of extended pdfs!" 103 | raise ValueError(msg) 104 | 105 | models = model.get_models() 106 | yields = [m.get_yield() for m in models] 107 | 108 | p = np.vstack([eval_pdf(m, x) for m in models]).T 109 | Nx = eval_pdf(model, x, allow_extended=True) 110 | pN = p / Nx[:, None] 111 | 112 | MLSR = pN.sum(axis=0) 113 | atol_warning = 5e-3 114 | if atol_exceptions is None: 115 | atol_exceptions = 5e-2 116 | 117 | def msg_fn(tolerance): 118 | msg = ( 119 | "The Maximum Likelihood Sum Rule sanity check, described in equation 17 of" 120 | + " arXiv:physics/0402083, failed. According to this check the following quantities\n" 121 | ) 122 | for y, mlsr in zip(yields, MLSR): 123 | msg += f"\t* {y.name}: {mlsr},\n" 124 | msg += f"should be equal to 1.0 with an absolute tolerance of {tolerance}." 125 | return msg 126 | 127 | if not np.allclose(MLSR, 1, atol=atol_exceptions): 128 | msg = msg_fn(atol_exceptions) 129 | msg += " The numbers suggest that the model is not fitted to the data. Please check your fit." 130 | raise ModelNotFittedToData(msg) 131 | 132 | if not np.allclose(MLSR, 1, atol=atol_warning): 133 | msg = msg_fn(atol_warning) 134 | msg += " If the fit to the data is good please ignore this warning." 135 | warnings.warn(msg, AboveToleranceWarning, stacklevel=2) 136 | 137 | Vinv = (pN).T.dot(pN) 138 | V = np.linalg.inv(Vinv) 139 | 140 | sweights = p.dot(V) / Nx[:, None] 141 | 142 | return {y: sweights[:, i] for i, y in enumerate(yields)} 143 | -------------------------------------------------------------------------------- /src/hepstats/splot/warnings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Specific warnings for the `splot` submodule 3 | """ 4 | 5 | from __future__ import annotations 6 | 7 | 8 | class AboveToleranceWarning(UserWarning): 9 | pass 10 | -------------------------------------------------------------------------------- /src/hepstats/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .fit import ( 2 | eval_pdf, 3 | array2dataset, 4 | pll, 5 | base_sampler, 6 | base_sample, 7 | get_value, 8 | set_values, 9 | ) 10 | -------------------------------------------------------------------------------- /src/hepstats/utils/fit/__init__.py: -------------------------------------------------------------------------------- 1 | from .diverse import ( 2 | get_value, 3 | eval_pdf, 4 | pll, 5 | array2dataset, 6 | get_nevents, 7 | set_values, 8 | set_values_once, 9 | ) 10 | from .sampling import base_sampler, base_sample 11 | -------------------------------------------------------------------------------- /src/hepstats/utils/fit/api_check.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for testing a fitting library validity with hepstats. 3 | 4 | A fitting library should provide six basic objects: 5 | 6 | * model / probability density function 7 | * parameters of the models 8 | * data 9 | * loss / likelihood function 10 | * minimizer 11 | * fitresult (optional) 12 | 13 | A function for each object is defined in this module, all should return `True` to work 14 | with hepstats. 15 | 16 | The `zfit` API is currently the standard fitting API in hepstats. 17 | 18 | """ 19 | 20 | from __future__ import annotations 21 | 22 | import warnings 23 | 24 | import uhi.typing.plottable 25 | 26 | 27 | def is_valid_parameter(object): 28 | """ 29 | Checks if a parameter has the following attributes/methods: 30 | * value 31 | * set_value 32 | * floating 33 | """ 34 | has_value = hasattr(object, "value") 35 | has_set_value = hasattr(object, "set_value") 36 | has_floating = hasattr(object, "floating") 37 | 38 | return has_value and has_set_value and has_floating 39 | 40 | 41 | def is_valid_data(object): 42 | """ 43 | Checks if the data object has the following attributes/methods: 44 | * nevents 45 | * weights 46 | * set_weights 47 | * space 48 | """ 49 | is_sampled_data = hasattr(object, "resample") 50 | 51 | try: 52 | has_nevents = hasattr(object, "nevents") 53 | except RuntimeError: 54 | if is_sampled_data: 55 | object.resample() 56 | has_nevents = hasattr(object, "nevents") 57 | else: 58 | has_nevents = False 59 | 60 | has_weights = hasattr(object, "weights") 61 | has_set_weights = hasattr(object, "set_weights") 62 | has_space = hasattr(object, "space") 63 | is_histlike = isinstance(object, uhi.typing.plottable.PlottableHistogram) 64 | return (has_nevents and has_weights and has_set_weights and has_space) or is_histlike 65 | 66 | 67 | def is_valid_pdf(object): 68 | """ 69 | Checks if the pdf object has the following attributes/methods: 70 | * get_params 71 | * pdf 72 | * integrate 73 | * sample 74 | * get_yield 75 | 76 | Also the function **is_valid_parameter** is called with each of the parameters returned by get_params 77 | as argument. 78 | """ 79 | has_get_params = hasattr(object, "get_params") 80 | if not has_get_params: 81 | return False 82 | else: 83 | params = object.get_params() 84 | 85 | all_valid_params = all(is_valid_parameter(p) for p in params) 86 | has_pdf = hasattr(object, "pdf") 87 | has_integrate = hasattr(object, "integrate") 88 | has_sample = hasattr(object, "sample") 89 | has_space = hasattr(object, "space") 90 | has_get_yield = hasattr(object, "get_yield") 91 | 92 | return all_valid_params and has_pdf and has_integrate and has_sample and has_space and has_get_yield 93 | 94 | 95 | def is_valid_loss(object): 96 | """ 97 | Checks if the loss object has the following attributes/methods: 98 | * model 99 | * data 100 | * get_params 101 | * constraints 102 | * fit_range 103 | 104 | Also the function **is_valid_pdf** is called with each of the models returned by model 105 | as argument. Additionnally the function **is_valid_data** is called with each of the data objects 106 | return by data as argument. 107 | """ 108 | if not hasattr(object, "model"): 109 | return False 110 | else: 111 | model = object.model 112 | 113 | if not hasattr(object, "data"): 114 | return False 115 | else: 116 | data = object.data 117 | 118 | has_get_params = hasattr(object, "get_params") 119 | has_constraints = hasattr(object, "constraints") 120 | has_create_new = hasattr(object, "create_new") 121 | if not has_create_new: 122 | warnings.warn("Loss should have a `create_new` method.", FutureWarning, stacklevel=3) 123 | has_create_new = True # TODO: allowed now, will be dropped in the future 124 | all_valid_pdfs = all(is_valid_pdf(m) for m in model) 125 | all_valid_datasets = all(is_valid_data(d) for d in data) 126 | 127 | return all_valid_pdfs and all_valid_datasets and has_constraints and has_create_new and has_get_params 128 | 129 | 130 | def is_valid_fitresult(object): 131 | """ 132 | Checks if the fit result object has the following attributes/methods: 133 | * loss 134 | * params 135 | * covariance 136 | 137 | Also the function **is_valid_loss** is called with the loss as argument. 138 | """ 139 | has_loss = hasattr(object, "loss") 140 | 141 | if not has_loss: 142 | return False 143 | else: 144 | loss = object.loss 145 | has_params = hasattr(object, "params") 146 | has_covariance = hasattr(object, "covariance") 147 | return is_valid_loss(loss) and has_params and has_covariance 148 | 149 | 150 | def is_valid_minimizer(object): 151 | """ 152 | Checks if the minimzer object has the following attributes/methods: 153 | * minimize 154 | """ 155 | return hasattr(object, "minimize") 156 | -------------------------------------------------------------------------------- /src/hepstats/utils/fit/diverse.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import Mapping 4 | from contextlib import ExitStack, contextmanager, suppress 5 | 6 | import numpy as np 7 | 8 | 9 | def get_ndims(dataset): 10 | """Return the number of dimensions in the dataset""" 11 | return len(dataset.obs) 12 | 13 | 14 | def get_value(value): 15 | return np.array(value) 16 | 17 | 18 | def set_values_once(params, values): 19 | with suppress(ImportError): 20 | import zfit 21 | 22 | return zfit.param.set_values(params, values) # more efficient 23 | 24 | for p, v in zip(params, values): 25 | p.set_value(v) 26 | return None 27 | 28 | 29 | def eval_pdf(model, x, params=None, allow_extended=False): 30 | """Compute pdf of model at a given point x and for given parameters values""" 31 | 32 | if params is None: 33 | params = {} 34 | 35 | def pdf(model, x): 36 | ret = model.ext_pdf(x) if model.is_extended and allow_extended else model.pdf(x) 37 | 38 | return get_value(ret) 39 | 40 | with ExitStack() as stack: 41 | for param in model.get_params(): 42 | if param in params: 43 | value = params[param]["value"] 44 | stack.enter_context(param.set_value(value)) 45 | return pdf(model, x) 46 | 47 | 48 | def pll(minimizer, loss, pois, init=None) -> float: 49 | """Compute minimum profile likelihood for fixed given parameters values.""" 50 | del init # unused currently 51 | 52 | with ExitStack() as stack: 53 | for p in pois: 54 | param = p.parameter 55 | stack.enter_context(param.set_value(p.value)) 56 | param.floating = False 57 | 58 | if any(param_loss.floating for param_loss in loss.get_params()): 59 | minimum = minimizer.minimize(loss=loss) # TODO: add init? 60 | value = minimum.fmin 61 | else: 62 | value = get_value(loss.value()) 63 | 64 | for p in pois: 65 | p.parameter.floating = True 66 | 67 | return value 68 | 69 | 70 | @contextmanager 71 | def set_values(params, values=None): 72 | if values is None: 73 | if isinstance(params, Mapping): 74 | values = tuple(params.values()) 75 | params = tuple(params.keys()) 76 | else: 77 | msg = "values must be provided if params is not a Mapping (dict-like)" 78 | raise ValueError(msg) 79 | old_values = [p.value() for p in params] 80 | for p, v in zip(params, values): 81 | p.set_value(v) 82 | yield 83 | for p, v in zip(params, old_values): 84 | p.set_value(v) 85 | 86 | 87 | def array2dataset(dataset_cls, obs, array, weights=None): 88 | """ 89 | dataset_cls: only used to get the class in which array/weights will be 90 | converted. 91 | """ 92 | 93 | if hasattr(dataset_cls, "from_numpy"): 94 | return dataset_cls.from_numpy(obs, array=array, weights=weights) 95 | else: 96 | return dataset_cls(obs, array=array, weights=weights) 97 | 98 | 99 | def get_nevents(dataset): 100 | """Returns the number of events in the dataset""" 101 | 102 | return get_value(dataset.nevents) 103 | -------------------------------------------------------------------------------- /src/hepstats/utils/fit/sampling.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module providing basic sampling methods. 3 | """ 4 | 5 | from __future__ import annotations 6 | 7 | from .api_check import is_valid_pdf 8 | from .diverse import get_value, set_values 9 | 10 | 11 | def base_sampler(models, nevents): 12 | """ 13 | Creates samplers from models. 14 | 15 | Args: 16 | models (list(model)): models to sample 17 | nevents (list(int)): number of in each sampler 18 | 19 | Returns: 20 | Samplers 21 | """ 22 | 23 | assert all(is_valid_pdf(m) for m in models) 24 | assert len(nevents) == len(models) 25 | 26 | samplers = [] 27 | 28 | for i, m in enumerate(models): 29 | sampler = m.create_sampler(n=nevents[i]) 30 | samplers.append(sampler) 31 | 32 | return samplers 33 | 34 | 35 | def base_sample(samplers, ntoys, parameter=None, value=None, constraints=None): 36 | """ 37 | Samples from samplers. The parameters that are floating in the samplers can be set to a specific value 38 | using the `parameter` and `value` argument. 39 | 40 | Args: 41 | samplers (list): generators of samples 42 | ntoys (int): number of samples to generate 43 | parameter (optional): floating parameter in the sampler 44 | value (optional): value of the parameter 45 | constraints (optional): constraints to sample 46 | 47 | Returns: 48 | dict: sampled values for each constraint 49 | """ 50 | 51 | sampled_constraints = {} 52 | if constraints is not None: 53 | for constr in constraints: 54 | try: 55 | sampled_constraints.update({k: get_value(v) for k, v in constr.sample(n=ntoys).items()}) 56 | except AttributeError: 57 | continue 58 | 59 | params = {} if parameter is None or value is None else {parameter: value} 60 | for i in range(ntoys): 61 | with set_values(params): 62 | for s in samplers: 63 | s.resample() # do not pass parameters as arguments as it will fail in simultaneous fits 64 | 65 | if constraints is not None: 66 | yield {param: value[i] for param, value in sampled_constraints.items()} 67 | else: 68 | yield {} 69 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | 7 | def pytest_addoption(parser): 8 | parser.addoption( 9 | "--cmdopt", 10 | action="store", 11 | default="test", 12 | help="option: 'test' or \ 13 | 'generate'. Only use 'generate' if you've changed the tests and need to update the expected \ 14 | output!", 15 | ) 16 | 17 | 18 | @pytest.fixture 19 | def cmdopt(request): 20 | return request.config.getoption("--cmdopt") 21 | 22 | 23 | @pytest.fixture(scope="session") 24 | def data_gen(): 25 | np.random.seed(111) 26 | data1 = np.random.normal(size=1000) 27 | data2 = np.random.normal(2, 1, size=1000) 28 | weights = np.random.uniform(1, 2, size=1000) 29 | return data1, data2, weights 30 | 31 | 32 | # TODO: manually ported, use pre-made: https://github.com/zfit/zfit-development/issues/73 33 | @pytest.fixture(autouse=True) 34 | def _setup_teardown(): 35 | try: 36 | import zfit 37 | except ImportError: 38 | yield 39 | return 40 | 41 | old_chunksize = zfit.run.chunking.max_n_points 42 | old_active = zfit.run.chunking.active 43 | 44 | yield 45 | 46 | from zfit.core.parameter import ZfitParameterMixin 47 | 48 | ZfitParameterMixin._existing_params.clear() 49 | 50 | from zfit.util.cache import clear_graph_cache 51 | 52 | clear_graph_cache() 53 | zfit.run.chunking.active = old_active 54 | zfit.run.chunking.max_n_points = old_chunksize 55 | zfit.run.set_graph_mode() 56 | zfit.run.set_autograd_mode() 57 | 58 | 59 | def create_loss_func(npeak, nbins=None, nbkg=None, nameadd="", obs=None): 60 | import zfit 61 | 62 | bounds = (0.1, 3.0) 63 | obs = "x" if obs is None else obs 64 | obs = zfit.Space(obs, limits=bounds) 65 | 66 | # Data and signal 67 | np.random.seed(0) 68 | tau = -2.0 69 | beta = -1 / tau 70 | nbkg = 300 if nbkg is None else nbkg 71 | bkg = np.random.exponential(beta, nbkg) 72 | peak = np.random.normal(1.2, 0.1, npeak) 73 | data = np.concatenate((bkg, peak)) 74 | data = data[(data > bounds[0]) & (data < bounds[1])] 75 | N = len(data) 76 | data = zfit.data.Data.from_numpy(obs=obs, array=data) 77 | 78 | mean = zfit.Parameter("mean" + nameadd, 1.2, 0.5, 2.0) 79 | sigma = zfit.Parameter("sigma" + nameadd, 0.1, 0.02, 0.2) 80 | lambda_ = zfit.Parameter("lambda" + nameadd, -2.0, -4.0, -1.0) 81 | Nsig = zfit.Parameter("Nsig" + nameadd, 20.0, -20.0, N * 3) 82 | Nbkg = zfit.Parameter("Nbkg" + nameadd, N, 0.0, N * 3) 83 | 84 | signal = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma).create_extended(Nsig) 85 | background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg) 86 | 87 | tot_model = zfit.pdf.SumPDF([signal, background]) 88 | 89 | if nbins is not None: 90 | binned_space = obs.with_binning(nbins) 91 | data = data.to_binned(binned_space) 92 | tot_model = tot_model.to_binned(binned_space) 93 | loss = zfit.loss.ExtendedBinnedNLL(tot_model, data) 94 | else: 95 | loss = zfit.loss.ExtendedUnbinnedNLL(model=tot_model, data=data) 96 | 97 | return loss, (Nsig, Nbkg, mean, sigma) 98 | 99 | 100 | def create_sim_loss_func(npeak, nbins=None): 101 | loss1, params1 = create_loss_func(npeak, nbins=nbins, nameadd="_1", obs="x1") 102 | loss2, params2 = create_loss_func(npeak * 10, nbins=nbins, nameadd="_2", obs="x2", nbkg=500) 103 | loss = loss1 + loss2 104 | 105 | return loss, params1 106 | 107 | 108 | @pytest.fixture 109 | def create_loss(): 110 | return create_loss_func 111 | 112 | 113 | @pytest.fixture 114 | def create_sim_loss(): 115 | return create_sim_loss_func 116 | -------------------------------------------------------------------------------- /tests/hypotests/data/cls_pvalues.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/tests/hypotests/data/cls_pvalues.npz -------------------------------------------------------------------------------- /tests/hypotests/data/clsb_pvalues.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/tests/hypotests/data/clsb_pvalues.npz -------------------------------------------------------------------------------- /tests/hypotests/test_basetest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | zfit = pytest.importorskip("zfit") 4 | from zfit.loss import UnbinnedNLL 5 | from zfit.minimize import Minuit 6 | 7 | from hepstats.hypotests.calculators.basecalculator import BaseCalculator 8 | from hepstats.hypotests.core.basetest import BaseTest 9 | from hepstats.hypotests.parameters import POI, POIarray 10 | 11 | 12 | 13 | def create_loss(): 14 | obs = zfit.Space("x", limits=(0.1, 2.0)) 15 | data = zfit.data.Data.from_numpy(obs=obs, array=np.random.normal(1.2, 0.1, 10000)) 16 | mean = zfit.Parameter("mu", 1.2) 17 | sigma = zfit.Parameter("sigma", 0.1) 18 | model = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma) 19 | loss = UnbinnedNLL(model=model, data=data) 20 | 21 | return loss, (mean, sigma) 22 | 23 | 24 | def test_constructor(): 25 | with pytest.raises(TypeError): 26 | BaseTest() 27 | 28 | loss, (mean, sigma) = create_loss() 29 | calculator = BaseCalculator(loss, Minuit()) 30 | 31 | poimean = POIarray(mean, [1.0, 1.1, 1.2, 1.3]) 32 | poisigma = POI(sigma, 0.1) 33 | 34 | with pytest.raises(TypeError): 35 | BaseTest(calculator) 36 | 37 | with pytest.raises(TypeError): 38 | BaseTest(calculator, poimean, [poisigma]) 39 | 40 | with pytest.raises(TypeError): 41 | BaseTest("calculator", poimean, poisigma) 42 | 43 | 44 | def test_attributes(): 45 | loss, (mean, sigma) = create_loss() 46 | calculator = BaseCalculator(loss, Minuit()) 47 | 48 | poimean_1 = POIarray(mean, [1.0, 1.1, 1.2, 1.3]) 49 | poimean_2 = POI(mean, 1.2) 50 | 51 | test = BaseTest(calculator, poimean_1, poimean_2) 52 | 53 | assert test.poinull == poimean_1 54 | assert test.poialt == poimean_2 55 | assert test.calculator == calculator 56 | -------------------------------------------------------------------------------- /tests/hypotests/test_calculators.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import numpy as np 4 | import pytest 5 | zfit = pytest.importorskip("zfit") 6 | from zfit.loss import UnbinnedNLL 7 | from zfit.minimize import Minuit 8 | 9 | from hepstats.hypotests.calculators import AsymptoticCalculator, FrequentistCalculator 10 | from hepstats.hypotests.calculators.basecalculator import BaseCalculator 11 | from hepstats.hypotests.parameters import POI, POIarray 12 | from hepstats.utils.fit.api_check import is_valid_loss, is_valid_data 13 | 14 | true_mu = 1.2 15 | true_sigma = 0.1 16 | 17 | 18 | def create_loss(constraint=False, nbins=None, make2d=False): 19 | if not isinstance(nbins, list): 20 | nbins = [nbins] * 2 if make2d else [nbins] 21 | obs1 = zfit.Space("x", limits=(0.1, 2.0), binning=nbins[0]) 22 | obs = obs1 23 | if make2d: 24 | obs2 = zfit.Space("y", limits=(-0.1, 3.0), binning=nbins[1]) 25 | obs = obs1 * obs2 26 | 27 | array1 = np.random.normal(1.2, 0.1, (10000, 2 if make2d else 1)) 28 | data = zfit.data.Data.from_numpy(obs=obs.with_binning(None), array=array1) 29 | if nbins[0] is not None: 30 | data = data.to_binned(obs) 31 | mean = zfit.Parameter("mu", true_mu, true_mu - 2, true_mu + 2) 32 | sigma = zfit.Parameter("sigma", true_sigma, 0.01, 1.0) 33 | model = zfit.pdf.Gauss(obs=obs1.with_binning(None), mu=mean, sigma=sigma) 34 | if make2d: 35 | model2 = zfit.pdf.Gauss(obs=obs2.with_binning(None), mu=mean, sigma=sigma) 36 | model = model * model2 37 | if nbins[0] is not None: 38 | model = zfit.pdf.BinnedFromUnbinnedPDF(model, space=obs) 39 | if constraint: 40 | constraint = zfit.constraint.GaussianConstraint( 41 | params=mean, observation=true_mu, uncertainty=0.01 42 | ) 43 | else: 44 | constraint = None 45 | if nbins[0] is None: 46 | loss = UnbinnedNLL(model=model, data=data, constraints=constraint) 47 | else: 48 | loss = zfit.loss.BinnedNLL(model=model, data=data, constraints=constraint) 49 | 50 | return loss, (mean, sigma) 51 | 52 | 53 | @pytest.mark.parametrize( 54 | "calculator", 55 | [BaseCalculator, AsymptoticCalculator, FrequentistCalculator, "AsymptoticOld"], 56 | ) 57 | @pytest.mark.parametrize("make2d", [False, True], ids=["1d", "2d"]) 58 | @pytest.mark.parametrize( 59 | "nbins", 60 | [None, [10, 13], [9, 50]], 61 | ids=lambda x: f"Binning {x}" if x is not None else "Unbinned", 62 | ) 63 | @pytest.mark.parametrize( 64 | "constraint", [False, True], ids=["No constraint", "With constraint"] 65 | ) 66 | def test_base_calculator(calculator, make2d, nbins, constraint): 67 | if calculator == "AsymptoticOld": 68 | if make2d: 69 | pytest.skip("AsymptoticOld does not support 2D") 70 | if nbins is not None: 71 | pytest.skip("AsymptoticOld does not support binned") 72 | 73 | class calculator(AsymptoticCalculator): # we disable the converter 74 | UNBINNED_TO_BINNED_LOSS = {} 75 | 76 | assert calculator is not AsymptoticCalculator, "Must not be the same" 77 | assert AsymptoticCalculator.UNBINNED_TO_BINNED_LOSS, "Has to be filled" 78 | with pytest.raises(TypeError): 79 | calculator() 80 | 81 | loss, (mean, sigma) = create_loss(constraint=constraint, make2d=make2d, nbins=nbins) 82 | 83 | with pytest.raises(ValueError): 84 | calculator("loss", Minuit()) 85 | 86 | with pytest.raises(ValueError): 87 | calculator(loss, "Minuit()") 88 | 89 | calc_loss = calculator(loss, Minuit()) 90 | 91 | with pytest.raises(ValueError): 92 | calc_loss.bestfit = "bestfit" 93 | 94 | bestfit = calc_loss.bestfit 95 | calc_fitresult = calculator(bestfit, calc_loss.minimizer) 96 | 97 | assert calc_loss.bestfit == calc_fitresult.bestfit 98 | assert calc_loss.loss == calc_fitresult.loss 99 | 100 | mean_poi = POIarray(mean, [1.15, 1.2, 1.25]) 101 | mean_nll = calc_loss.obs_nll(pois=mean_poi) 102 | calc_loss.obs_nll(pois=mean_poi) # get from cache 103 | 104 | assert mean_nll[0] >= mean_nll[1] 105 | assert mean_nll[2] >= mean_nll[1] 106 | 107 | assert calc_loss.obs_nll(mean_poi[0]) == mean_nll[0] 108 | assert calc_loss.obs_nll(mean_poi[1]) == mean_nll[1] 109 | assert calc_loss.obs_nll(mean_poi[2]) == mean_nll[2] 110 | 111 | mean_poialt = POI(mean, 1.2) 112 | 113 | def pvalue(): 114 | return calc_loss.pvalue(poinull=mean_poi, poialt=mean_poialt) 115 | 116 | def exp_pvalue(): 117 | return calc_loss.expected_pvalue( 118 | poinull=mean_poi, poialt=mean_poialt, nsigma=np.arange(-2, 3, 1) 119 | ) 120 | 121 | def exp_poi(): 122 | return calc_loss.expected_poi( 123 | poinull=mean_poi, poialt=mean_poialt, nsigma=np.arange(-2, 3, 1) 124 | ) 125 | 126 | if calculator == BaseCalculator: 127 | with pytest.raises(NotImplementedError): 128 | pvalue() 129 | with pytest.raises(NotImplementedError): 130 | exp_pvalue() 131 | else: 132 | pvalue() 133 | exp_pvalue() 134 | 135 | model = calc_loss.model[0] 136 | sampler = model.create_sampler(n=10000) 137 | assert is_valid_data(sampler) 138 | 139 | loss = calc_loss.lossbuilder(model=[model], data=[sampler], weights=None) 140 | assert is_valid_loss(loss) 141 | 142 | with pytest.raises(ValueError): 143 | calc_loss.lossbuilder(model=[model, model], data=[sampler]) 144 | with pytest.raises(ValueError): 145 | calc_loss.lossbuilder(model=[model], data=[sampler, calc_loss.data[0]]) 146 | with pytest.raises(ValueError): 147 | calc_loss.lossbuilder(model=[model], data=[sampler], weights=[]) 148 | with pytest.raises(ValueError): 149 | calc_loss.lossbuilder( 150 | model=[model], data=[sampler], weights=[np.ones(10000), np.ones(10000)] 151 | ) 152 | 153 | assert calc_loss.get_parameter(mean_poi.name) == mean 154 | with pytest.raises(KeyError): 155 | calc_loss.get_parameter("dummy_parameter") 156 | 157 | 158 | def test_asymptotic_calculator_one_poi(): 159 | with pytest.raises(TypeError): 160 | AsymptoticCalculator() 161 | 162 | loss, (mean, sigma) = create_loss() 163 | calc = AsymptoticCalculator(loss, Minuit()) 164 | 165 | poi_null = POIarray(mean, [1.15, 1.2, 1.25]) 166 | poi_alt = POI(mean, 1.2) 167 | 168 | dataset = calc.asimov_dataset(poi_alt) 169 | assert all(is_valid_data(d) for d in dataset) 170 | loss = calc.asimov_loss(poi_alt) 171 | assert is_valid_loss(loss) 172 | 173 | null_nll = calc.asimov_nll(pois=poi_null, poialt=poi_alt) 174 | 175 | assert null_nll[0] >= null_nll[1] 176 | assert null_nll[2] >= null_nll[1] 177 | 178 | 179 | @pytest.mark.parametrize("constraint", [False, True]) 180 | def test_frequentist_calculator_one_poi(constraint): 181 | with pytest.raises(TypeError): 182 | FrequentistCalculator() 183 | 184 | loss, (mean, sigma) = create_loss(constraint=constraint) 185 | calc = FrequentistCalculator(loss, Minuit(), ntoysnull=100, ntoysalt=100) 186 | 187 | assert calc.ntoysnull == 100 188 | assert calc.ntoysalt == 100 189 | 190 | samplers = calc.sampler() 191 | assert all(is_valid_data(s) for s in samplers) 192 | loss = calc.toys_loss(mean.name) 193 | assert is_valid_loss(loss) 194 | -------------------------------------------------------------------------------- /tests/hypotests/test_confidence_intervals.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | zfit = pytest.importorskip("zfit") 4 | import os 5 | from zfit.loss import UnbinnedNLL 6 | from zfit.minimize import Minuit 7 | 8 | import hepstats 9 | from hepstats.hypotests.calculators.basecalculator import BaseCalculator 10 | from hepstats.hypotests.calculators import AsymptoticCalculator, FrequentistCalculator 11 | from hepstats.hypotests import ConfidenceInterval 12 | from hepstats.hypotests.parameters import POI, POIarray 13 | from hepstats.hypotests.exceptions import POIRangeError 14 | 15 | notebooks_dir = os.path.dirname(hepstats.__file__) + "/../../notebooks/hypotests" 16 | 17 | 18 | def test_constructor(create_loss): 19 | with pytest.raises(TypeError): 20 | ConfidenceInterval() 21 | 22 | loss, (_, __, mean, _) = create_loss(npeak=80) 23 | calculator = BaseCalculator(loss, Minuit()) 24 | 25 | poi_1 = POI(mean, 1.5) 26 | poi_2 = POI(mean, 1.2) 27 | 28 | with pytest.raises(TypeError): 29 | ConfidenceInterval(calculator) 30 | 31 | with pytest.raises(TypeError): 32 | ConfidenceInterval(calculator, [poi_1], poi_2, qtilde=True) 33 | 34 | with pytest.raises(TypeError): 35 | ConfidenceInterval(calculator, [poi_1], [poi_2], qtilde=False) 36 | 37 | 38 | def asy_calc(create_loss, nbins=None): 39 | loss, (_, __, mean, ___) = create_loss(npeak=80, nbins=nbins) 40 | return mean, AsymptoticCalculator(loss, Minuit()) 41 | 42 | 43 | def asy_calc_old(create_loss, nbins=None): 44 | loss, (_, __, mean, ___) = create_loss(npeak=80, nbins=nbins) 45 | 46 | class calculator(AsymptoticCalculator): 47 | UNBINNED_TO_BINNED_LOSS = {} 48 | 49 | assert calculator is not AsymptoticCalculator, "Must not be the same" 50 | assert AsymptoticCalculator.UNBINNED_TO_BINNED_LOSS, "Has to be filled" 51 | return mean, calculator(loss, Minuit()) 52 | 53 | 54 | def freq_calc(create_loss, nbins=None): 55 | loss, (_, __, mean, ___) = create_loss(npeak=80, nbins=nbins) 56 | calculator = FrequentistCalculator.from_yaml( 57 | f"{notebooks_dir}/toys/ci_freq_zfit_toys.yml", loss, Minuit() 58 | ) 59 | return mean, calculator 60 | 61 | 62 | @pytest.mark.parametrize("calculator", [asy_calc, freq_calc, asy_calc_old]) 63 | @pytest.mark.parametrize("nbins", [None, 47, 300], ids=lambda x: f"nbins={x}") 64 | def test_with_gauss_exp_example(create_loss, calculator, nbins): 65 | if calculator is asy_calc_old and nbins is not None: 66 | pytest.skip("Not implemented for old calculator") 67 | mean, calculator = calculator(create_loss, nbins=nbins) 68 | scan_values = np.linspace(1.15, 1.26, 50) 69 | poinull = POIarray(mean, scan_values) 70 | ci = ConfidenceInterval(calculator, poinull) 71 | interval = ci.interval() 72 | assert interval["lower"] == pytest.approx(1.1810371356602791, rel=0.1) 73 | assert interval["upper"] == pytest.approx(1.2156701172321935, rel=0.1) 74 | with pytest.raises(POIRangeError): 75 | poinull = POIarray( 76 | mean, scan_values[(scan_values >= 1.2) & (scan_values <= 1.205)] 77 | ) 78 | 79 | ci = ConfidenceInterval(calculator, poinull) 80 | ci.interval() 81 | with pytest.raises(POIRangeError): 82 | poinull = POIarray(mean, scan_values[scan_values >= 1.2]) 83 | ci = ConfidenceInterval(calculator, poinull) 84 | ci.interval() 85 | with pytest.raises(POIRangeError): 86 | poinull = POIarray(mean, scan_values[scan_values <= 1.205]) 87 | ci = ConfidenceInterval(calculator, poinull) 88 | ci.interval() 89 | 90 | 91 | def test_with_gauss_fluctuations(): 92 | x_true = -2.0 93 | 94 | minimizer = Minuit() 95 | bounds = (-10, 10) 96 | obs = zfit.Space("x", limits=bounds) 97 | 98 | mean = zfit.Parameter("mean", 0) 99 | sigma = zfit.Parameter("sigma", 1.0) 100 | model = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma) 101 | 102 | npzfile = f"{notebooks_dir}/toys/FC_toys_{x_true}.npz" 103 | data = zfit.data.Data.from_numpy(obs=obs, array=np.load(npzfile)["x"]) 104 | 105 | nll = UnbinnedNLL(model=model, data=data) 106 | 107 | minimum = minimizer.minimize(loss=nll) 108 | minimum.hesse() 109 | 110 | toys_fname = f"{notebooks_dir}/toys/FC_toys_{x_true}.yml" 111 | calculator = FrequentistCalculator.from_yaml(toys_fname, minimum, minimizer) 112 | keys = np.unique([k[0].value for k in calculator.keys()]) 113 | keys.sort() 114 | poinull = POIarray(mean, keys) 115 | 116 | ci = ConfidenceInterval(calculator, poinull, qtilde=False) 117 | with pytest.warns(UserWarning): 118 | ci.interval(alpha=0.05, printlevel=0) 119 | 120 | ci = ConfidenceInterval(calculator, poinull, qtilde=True) 121 | ci.interval(alpha=0.05, printlevel=0) 122 | 123 | 124 | @pytest.mark.parametrize("n", [0.5]) 125 | @pytest.mark.parametrize("min_x", [0, -10]) 126 | def test_with_gauss_qtilde(n, min_x): 127 | sigma_x = 0.032 128 | 129 | minimizer = Minuit() 130 | bounds = (-10, 10) 131 | obs = zfit.Space("x", limits=bounds) 132 | 133 | mean = zfit.Parameter("mean", n * sigma_x) 134 | sigma = zfit.Parameter("sigma", 1.0) 135 | model = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma) 136 | 137 | data = model.sample(n=1000) 138 | 139 | nll = UnbinnedNLL(model=model, data=data) 140 | 141 | minimum = minimizer.minimize(loss=nll) 142 | minimum.hesse() 143 | 144 | x = minimum.params[mean]["value"] 145 | x_err = minimum.params[mean]["hesse"]["error"] 146 | 147 | x_min = x - x_err * 3 148 | x_max = x + x_err * 3 149 | 150 | x_min = max([x_min, min_x]) 151 | 152 | poinull = POIarray(mean, np.linspace(x_min, x_max, 50)) 153 | calculator = AsymptoticCalculator(nll, minimizer) 154 | 155 | ci = ConfidenceInterval(calculator, poinull, qtilde=True) 156 | ci.interval(alpha=0.05, printlevel=1) 157 | -------------------------------------------------------------------------------- /tests/hypotests/test_discovery.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | from tests.conftest import create_loss_func 7 | 8 | zfit = pytest.importorskip("zfit") 9 | from zfit.loss import UnbinnedNLL 10 | from zfit.minimize import Minuit 11 | 12 | import hepstats 13 | from hepstats.hypotests import Discovery 14 | from hepstats.hypotests.calculators import AsymptoticCalculator, FrequentistCalculator 15 | from hepstats.hypotests.calculators.basecalculator import BaseCalculator 16 | from hepstats.hypotests.parameters import POI 17 | 18 | notebooks_dir = f"{os.path.dirname(hepstats.__file__)}/../../notebooks/hypotests" 19 | 20 | 21 | @pytest.mark.parametrize("nbins", [None, 30], ids=["unbinned", "binned"]) 22 | def test_constructor(create_loss, nbins): 23 | with pytest.raises(TypeError): 24 | Discovery() 25 | 26 | loss, (Nsig, Nbkg, _, _) = create_loss(nbins=nbins, npeak=25) 27 | calculator = BaseCalculator(loss, Minuit()) 28 | 29 | poi_1 = POI(Nsig, 0.0) 30 | poi_2 = POI(Nsig, 2.0) 31 | 32 | with pytest.raises(TypeError): 33 | Discovery(calculator) 34 | 35 | with pytest.raises(TypeError): 36 | Discovery(calculator, [poi_1], poi_2) 37 | 38 | with pytest.raises(TypeError): 39 | Discovery(calculator, [poi_1], [poi_2]) 40 | 41 | 42 | class AsymptoticCalculatorOld(AsymptoticCalculator): 43 | UNBINNED_TO_BINNED_LOSS = {} 44 | 45 | 46 | @pytest.mark.parametrize( 47 | "nbins", [None, 76, 253], ids=lambda x: "unbinned" if x is None else f"nbin={x}" 48 | ) 49 | @pytest.mark.parametrize("Calculator", [AsymptoticCalculator, AsymptoticCalculatorOld]) 50 | def test_with_asymptotic_calculator(create_loss, nbins, Calculator): 51 | if Calculator is AsymptoticCalculatorOld and nbins is not None: 52 | pytest.skip("Old AsymptoticCalculator does not support binned loss") 53 | 54 | loss, (Nsig, Nbkg, mean, sigma) = create_loss(npeak=25, nbins=nbins) 55 | mean.floating = False 56 | sigma.floating = False 57 | calculator = Calculator(loss, Minuit()) 58 | 59 | poinull = POI(Nsig, 0) 60 | 61 | discovery_test = Discovery(calculator, poinull) 62 | pnull, significance = discovery_test.result() 63 | 64 | uncertainty = 0.05 65 | if nbins is not None and nbins < 80: 66 | uncertainty *= 4 67 | 68 | # check absolute significance 69 | assert pnull == pytest.approx(0.000757, abs=uncertainty) 70 | assert significance == pytest.approx(3.17, abs=uncertainty) 71 | assert significance >= 3 72 | 73 | 74 | @pytest.mark.parametrize( 75 | "nbins", [None, 95, 153], ids=lambda x: "unbinned" if x is None else f"nbin={x}" 76 | ) 77 | @pytest.mark.parametrize("losscreator", [create_loss_func, 78 | # create_sim_loss_func 79 | ], ids=["simple", 80 | # "sim" 81 | ]) 82 | def test_with_frequentist_calculator(losscreator, nbins): 83 | loss, (Nsig, Nbkg, mean, sigma) = losscreator(npeak=25, nbins=nbins) 84 | mean.floating = False 85 | sigma.floating = False 86 | calculator = FrequentistCalculator.from_yaml( 87 | f"{notebooks_dir}/toys/discovery_freq_zfit_toys.yml", loss, Minuit() 88 | ) 89 | # calculator = FrequentistCalculator(loss, Minuit(), ntoysnull=500, ntoysalt=500) 90 | 91 | poinull = POI(Nsig, 0) 92 | 93 | discovery_test = Discovery(calculator, poinull) 94 | pnull, significance = discovery_test.result() 95 | 96 | abserr = 0.1 97 | if nbins is not None and nbins < 120: 98 | abserr *= 4 99 | abserr_pnull = 0.0005 100 | if nbins is not None and nbins < 120: 101 | abserr_pnull *= 4 102 | assert pnull == pytest.approx(0.0004, rel=0.05, abs=abserr_pnull) 103 | assert significance == pytest.approx(3.3427947805048592, rel=0.05, abs=abserr) 104 | assert significance >= 3 105 | 106 | 107 | def create_loss_counting(): 108 | n = 370 109 | nbkg = 340 110 | 111 | Nsig = zfit.Parameter("Nsig", 0, -100.0, 100) 112 | Nbkg = zfit.Parameter("Nbkg", nbkg, floating=False) 113 | Nobs = zfit.ComposedParameter("Nobs", lambda a, b: a + b, params=[Nsig, Nbkg]) 114 | 115 | obs = zfit.Space("N", limits=(0, 800)) 116 | model = zfit.pdf.Poisson(obs=obs, lamb=Nobs) 117 | 118 | data = zfit.data.Data.from_numpy(obs=obs, array=np.array([n])) 119 | 120 | loss = UnbinnedNLL(model=model, data=data) 121 | 122 | return loss, Nsig 123 | 124 | 125 | def test_counting_with_asymptotic_calculator(): 126 | ( 127 | loss, 128 | Nsig, 129 | ) = create_loss_counting() 130 | calculator = AsymptoticCalculator(loss, Minuit()) 131 | 132 | poinull = POI(Nsig, 0) 133 | 134 | discovery_test = Discovery(calculator, poinull) 135 | pnull, significance = discovery_test.result() 136 | 137 | assert significance < 2 138 | 139 | 140 | def test_counting_with_frequentist_calculator(): 141 | ( 142 | loss, 143 | Nsig, 144 | ) = create_loss_counting() 145 | calculator = FrequentistCalculator(loss, Minuit(), ntoysnull=1000) 146 | 147 | poinull = POI(Nsig, 0) 148 | 149 | discovery_test = Discovery(calculator, poinull) 150 | pnull, significance = discovery_test.result() 151 | 152 | assert significance < 2 153 | 154 | 155 | def test_likelihood_ratio_fmin(): 156 | import numpy as np 157 | import zfit 158 | from zfit.loss import UnbinnedNLL 159 | from zfit.minimize import Minuit 160 | from hepstats.hypotests import Discovery 161 | from hepstats.hypotests.calculators import (AsymptoticCalculator) 162 | from hepstats.hypotests.parameters import POI 163 | 164 | Nsig = zfit.Parameter("Nsig", 40, -100., 100) 165 | Nbkg = zfit.Parameter("Nbkg", 340, 0, 500) 166 | Nobs = zfit.ComposedParameter("Nobs", lambda a, b: a + b, params=[Nsig, Nbkg]) 167 | 168 | 169 | 170 | obs = zfit.Space('N', limits=(0, 800)) 171 | model = zfit.pdf.Poisson(obs=obs, lamb=Nobs) 172 | 173 | n = 370 174 | nbkg = 340 175 | 176 | data = zfit.data.Data.from_numpy(obs=obs, array=np.array([n])) 177 | Nbkg.set_value(nbkg) 178 | Nbkg.floating = False 179 | 180 | nll = UnbinnedNLL(model=model, data=data) 181 | minimizer = Minuit(verbosity=0) 182 | minimum = minimizer.minimize(loss=nll) 183 | 184 | calculator = AsymptoticCalculator(nll, minimizer) 185 | calculator.bestfit = minimum 186 | 187 | discovery_test = Discovery(calculator, POI(Nsig, 0)) 188 | pnull, significance = discovery_test.result() 189 | assert pytest.approx(pnull, abs=0.01) == 0.05 190 | assert pytest.approx(significance, abs=0.1) == 1.6 191 | -------------------------------------------------------------------------------- /tests/hypotests/test_parameters.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import pytest 3 | import numpy as np 4 | zfit = pytest.importorskip("zfit") 5 | 6 | from hepstats.hypotests.parameters import POI, POIarray 7 | 8 | 9 | def test_pois(): 10 | mean = zfit.Parameter("mu", 1.2, 0.1, 2) 11 | 12 | p0 = POI(mean, 0) 13 | p1 = POI(mean, 1.0) 14 | values = np.linspace(0.0, 1.0, 10) 15 | pn = POIarray(mean, values) 16 | pnc = POIarray(mean, values) 17 | 18 | for cls in [POI, POIarray]: 19 | with pytest.raises(ValueError): 20 | cls("mean", 0) 21 | with pytest.raises(TypeError): 22 | cls(mean) 23 | 24 | with pytest.raises(TypeError): 25 | POI(mean, values) 26 | with pytest.raises(TypeError): 27 | POIarray(mean, 0) 28 | 29 | repr(p0) 30 | repr(pn) 31 | 32 | assert p0.value == 0 33 | assert p0.name == mean.name 34 | assert p0 != p1 35 | 36 | assert all(pn.values == values) 37 | assert pn.name == mean.name 38 | assert len(pn) == len(values) 39 | iter(pn) 40 | assert pn == pnc 41 | assert hash(pn) == hash(pnc) 42 | 43 | assert pn != p0 44 | assert pn != p1 45 | 46 | assert pn[0] == p0 47 | assert pn[1] != p0 48 | assert pn[-1] == p1 49 | 50 | pn1 = pn.append(12) 51 | assert pn1.values[-1] == 12 52 | assert all(pn.values == values) 53 | assert pn1 != pn 54 | pn2 = pn.append([15, 20, 30]) 55 | assert pn2.values[-1] == 30 56 | assert pn2.values[-2] == 20 57 | assert pn2.values[-3] == 15 58 | assert pn2 != pn 59 | 60 | {p0: "p0", p1: "p1", pn: "pn"} 61 | -------------------------------------------------------------------------------- /tests/hypotests/test_toysutils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | zfit = pytest.importorskip("zfit") 4 | import os 5 | from zfit.loss import ExtendedUnbinnedNLL, UnbinnedNLL 6 | from zfit.minimize import Minuit 7 | 8 | import hepstats 9 | from hepstats.hypotests.parameters import POI, POIarray 10 | from hepstats.hypotests.exceptions import ParameterNotFound 11 | from hepstats.hypotests.toyutils import ToyResult, ToysManager 12 | from hepstats.utils.fit.api_check import is_valid_loss, is_valid_data 13 | 14 | pwd = os.path.dirname(__file__) 15 | notebooks_dir = os.path.dirname(hepstats.__file__) + "/../../notebooks/hypotests" 16 | 17 | 18 | def create_loss(): 19 | bounds = (0.1, 3.0) 20 | obs = zfit.Space("x", limits=bounds) 21 | 22 | # Data and signal 23 | np.random.seed(0) 24 | tau = -2.0 25 | beta = -1 / tau 26 | bkg = np.random.exponential(beta, 300) 27 | peak = np.random.normal(1.2, 0.1, 25) 28 | data = np.concatenate((bkg, peak)) 29 | data = data[(data > bounds[0]) & (data < bounds[1])] 30 | N = len(data) 31 | data = zfit.data.Data.from_numpy(obs=obs, array=data) 32 | 33 | lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0) 34 | Nsig = zfit.Parameter("Nsig", 20.0, -20.0, N) 35 | Nbkg = zfit.Parameter("Nbkg", N, 0.0, N * 1.1) 36 | 37 | signal = zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1).create_extended(Nsig) 38 | background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg) 39 | tot_model = zfit.pdf.SumPDF([signal, background]) 40 | 41 | loss = ExtendedUnbinnedNLL(model=tot_model, data=data) 42 | 43 | poigen = POI(Nsig, 0.0) 44 | poieval = POIarray(Nsig, [0.0]) 45 | 46 | return loss, (Nsig, poigen, poieval) 47 | 48 | 49 | def create_loss_1(): 50 | obs = zfit.Space("x", limits=(0.1, 2.0)) 51 | data = zfit.data.Data.from_numpy(obs=obs, array=np.random.normal(1.2, 0.1, 10000)) 52 | mean = zfit.Parameter("mu", 1.2) 53 | sigma = zfit.Parameter("sigma", 0.1) 54 | model = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma) 55 | loss = UnbinnedNLL(model=model, data=data) 56 | 57 | return loss 58 | 59 | 60 | def test_constructors(): 61 | loss, (Nsig, poigen, poieval) = create_loss() 62 | ToyResult(poigen, poieval) 63 | 64 | with pytest.raises(TypeError): 65 | ToyResult(poigen, "poieval") 66 | with pytest.raises(TypeError): 67 | ToyResult(poieval, poieval) 68 | 69 | ToysManager(loss, Minuit()) 70 | 71 | 72 | def test_toyresult_attributes(): 73 | _, (_, poigen, poieval) = create_loss() 74 | tr = ToyResult(poigen, poieval) 75 | 76 | assert tr.ntoys == 0 77 | assert tr.poigen == poigen 78 | assert tr.poieval == poieval 79 | 80 | bf = np.array([0.5, 0.1, 0.2]) 81 | nll_bf = np.array([-1000, -1001, -1002]) 82 | nlls = {poieval[0]: np.array([-1001, -1002, -1003])} 83 | 84 | tr.add_entries(bestfit=bf, nll_bestfit=nll_bf, nlls=nlls) 85 | assert tr.ntoys == 3 86 | 87 | with pytest.raises(ValueError): 88 | tr.add_entries(bestfit=bf, nll_bestfit=nll_bf, nlls={}) 89 | 90 | tr.add_entries(bestfit=bf, nll_bestfit=nll_bf, nlls=nlls) 91 | assert tr.ntoys == 6 92 | 93 | tr.to_dict() 94 | 95 | 96 | def test_toymanager_attributes(): 97 | loss, (Nsig, poigen, poieval) = create_loss() 98 | 99 | tm = ToysManager.from_yaml( 100 | f"{notebooks_dir}/toys/discovery_freq_zfit_toys.yml", loss, Minuit() 101 | ) 102 | 103 | with pytest.raises(ParameterNotFound): 104 | ToysManager.from_yaml( 105 | f"{notebooks_dir}/toys/discovery_freq_zfit_toys.yml", 106 | create_loss_1(), 107 | Minuit(), 108 | ) 109 | 110 | tr = list(tm.values())[0] 111 | assert isinstance(tr, ToyResult) 112 | assert list(tm.keys())[0] == (poigen, poigen) 113 | assert (poigen, poieval) in tm.keys() 114 | 115 | assert tm.get_toyresult(poigen, poieval) == tr 116 | tr1 = ToyResult(poigen, poieval.append(1)) 117 | tm.add_toyresult(tr1) 118 | with pytest.raises(TypeError): 119 | tm.add_toyresult("tr1") 120 | assert (tr1.poigen, tr1.poieval) in tm.keys() 121 | 122 | tm.to_yaml(f"{pwd}/test_toyutils.yml") 123 | tm.to_yaml(f"{pwd}/test_toyutils.yml") 124 | tmc = ToysManager.from_yaml(f"{pwd}/test_toyutils.yml", loss, Minuit()) 125 | assert ( 126 | tm.get_toyresult(poigen, poieval).ntoys 127 | == tmc.get_toyresult(poigen, poieval).ntoys 128 | ) 129 | 130 | samplers = tm.sampler() 131 | assert all(is_valid_data(s) for s in samplers) 132 | loss = tm.toys_loss(poigen.name) 133 | assert is_valid_loss(loss) 134 | 135 | os.remove(f"{pwd}/test_toyutils.yml") 136 | -------------------------------------------------------------------------------- /tests/hypotests/test_upperlimit.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | import os 4 | zfit = pytest.importorskip("zfit") 5 | from zfit.minimize import Minuit 6 | 7 | import hepstats 8 | from hepstats.hypotests.calculators.basecalculator import BaseCalculator 9 | from hepstats.hypotests.calculators import AsymptoticCalculator, FrequentistCalculator 10 | from hepstats.hypotests import UpperLimit 11 | from hepstats.hypotests.parameters import POI, POIarray 12 | from hepstats.hypotests.exceptions import POIRangeError 13 | 14 | notebooks_dir = os.path.dirname(hepstats.__file__) + "/../../notebooks/hypotests" 15 | 16 | 17 | # def create_loss(): 18 | # 19 | # bounds = (0.1, 3.0) 20 | # obs = zfit.Space("x", limits=bounds) 21 | # 22 | # # Data and signal 23 | # np.random.seed(0) 24 | # tau = -2.0 25 | # beta = -1 / tau 26 | # bkg = np.random.exponential(beta, 300) 27 | # peak = np.random.normal(1.2, 0.1, 10) 28 | # data = np.concatenate((bkg, peak)) 29 | # data = data[(data > bounds[0]) & (data < bounds[1])] 30 | # N = len(data) 31 | # data = zfit.data.Data.from_numpy(obs=obs, array=data) 32 | # 33 | # lambda_ = zfit.Parameter("lambda", -2.0, -10.0, -0.1) 34 | # Nsig = zfit.Parameter("Nsig", 20.0, -20.0, N) 35 | # Nbkg = zfit.Parameter("Nbkg", N, 0.0, N * 2) 36 | # 37 | # signal = zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1).create_extended(Nsig) 38 | # background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg) 39 | # tot_model = zfit.pdf.SumPDF([signal, background]) 40 | # 41 | # loss = ExtendedUnbinnedNLL(model=tot_model, data=data) 42 | # 43 | # return loss, (Nsig, Nbkg) 44 | 45 | 46 | def test_constructor(create_loss): 47 | with pytest.raises(TypeError): 48 | UpperLimit() 49 | 50 | loss, (Nsig, Nbkg, _, _) = create_loss(npeak=10) 51 | calculator = BaseCalculator(loss, Minuit()) 52 | 53 | poi_1 = POI(Nsig, 0.0) 54 | poi_2 = POI(Nsig, 2.0) 55 | 56 | with pytest.raises(TypeError): 57 | UpperLimit(calculator) 58 | 59 | with pytest.raises(TypeError): 60 | UpperLimit(calculator, poi_1) 61 | 62 | with pytest.raises(TypeError): 63 | UpperLimit(calculator, [poi_1], poi_2) 64 | 65 | 66 | class AsymptoticCalculatorOld(AsymptoticCalculator): 67 | UNBINNED_TO_BINNED_LOSS = {} 68 | 69 | 70 | def asy_calc(create_loss, nbins): 71 | loss, (Nsig, Nbkg, mean, sigma) = create_loss(npeak=10, nbins=nbins) 72 | mean.floating = False 73 | sigma.floating = False 74 | return Nsig, AsymptoticCalculator(loss, Minuit()) 75 | 76 | 77 | def asy_calc_old(create_loss, nbins): 78 | loss, (Nsig, Nbkg, mean, sigma) = create_loss(npeak=10, nbins=nbins) 79 | mean.floating = False 80 | sigma.floating = False 81 | return Nsig, AsymptoticCalculatorOld(loss, Minuit()) 82 | 83 | 84 | def freq_calc(create_loss, nbins): 85 | loss, (Nsig, Nbkg, mean, sigma) = create_loss(npeak=10, nbins=nbins) 86 | mean.floating = False 87 | sigma.floating = False 88 | calculator = FrequentistCalculator.from_yaml( 89 | f"{notebooks_dir}/toys/upperlimit_freq_zfit_toys.yml", loss, Minuit() 90 | ) 91 | # calculator = FrequentistCalculator(loss, Minuit(), ntoysnull=10000, ntoysalt=10000) 92 | return Nsig, calculator 93 | 94 | 95 | @pytest.mark.parametrize( 96 | "nbins", [None, 73, 211], ids=lambda x: "unbinned" if x is None else f"nbins={x}" 97 | ) 98 | @pytest.mark.parametrize("calculator", [asy_calc, freq_calc, asy_calc_old]) 99 | def test_with_gauss_exp_example(create_loss, calculator, nbins): 100 | if calculator is asy_calc_old and nbins is not None: 101 | pytest.skip("Old asymptotic calculator does not support binned loss") 102 | Nsig, calculator = calculator(create_loss, nbins) 103 | 104 | poinull = POIarray(Nsig, np.linspace(0.0, 25, 15)) 105 | poialt = POI(Nsig, 0) 106 | 107 | ul = UpperLimit(calculator, poinull, poialt) 108 | ul_qtilde = UpperLimit(calculator, poinull, poialt, qtilde=True) 109 | limits = ul.upperlimit(alpha=0.05, CLs=True) 110 | 111 | assert limits["observed"] == pytest.approx(16.7, rel=0.15) 112 | assert limits["expected"] == pytest.approx(11.5, rel=0.15) 113 | assert limits["expected_p1"] == pytest.approx(16.729552184042365, rel=0.1) 114 | assert limits["expected_p2"] == pytest.approx(23.718823517614066, rel=0.15) 115 | assert limits["expected_m1"] == pytest.approx(7.977175378979202, rel=0.1) 116 | assert limits["expected_m2"] == pytest.approx(5.805298972983304, rel=0.15) 117 | 118 | ul.upperlimit(alpha=0.05, CLs=False) 119 | ul_qtilde.upperlimit(alpha=0.05, CLs=True) 120 | 121 | # test error when scan range is too small 122 | 123 | with pytest.raises(POIRangeError): 124 | poinull = POIarray(Nsig, poinull.values[:5]) 125 | ul = UpperLimit(calculator, poinull, poialt) 126 | ul.upperlimit(alpha=0.05, CLs=True) 127 | -------------------------------------------------------------------------------- /tests/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Licensed under a 3-clause BSD style license, see LICENSE. 2 | -------------------------------------------------------------------------------- /tests/modeling/data/answers_bayesian_blocks.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-hep/hepstats/8f5d2d5553d5a5ba2c90d6119fa5481ea4a9cbf5/tests/modeling/data/answers_bayesian_blocks.npz -------------------------------------------------------------------------------- /tests/modeling/test_bayesianblocks.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import numpy as np 4 | 5 | import hepstats 6 | from hepstats.modeling import bayesian_blocks 7 | 8 | answer_dir = Path(__file__).parent / "data" 9 | 10 | 11 | def test_bayesian_blocks(cmdopt, data_gen): 12 | be1 = bayesian_blocks(data_gen[0], p0=0.05) 13 | be2 = bayesian_blocks(data_gen[0], gamma=0.1) 14 | be3 = bayesian_blocks(data_gen[0], weights=data_gen[2]) 15 | 16 | if cmdopt == "generate": 17 | with open(answer_dir / "answers_bayesian_blocks.npz", "wb") as f: 18 | np.savez(f, be1=be1, be2=be2, be3=be3) 19 | elif cmdopt == "test": 20 | answers = np.load(answer_dir / "answers_bayesian_blocks.npz") 21 | np.testing.assert_array_equal(be1, answers["be1"]) 22 | np.testing.assert_array_equal(be2, answers["be2"]) 23 | np.testing.assert_array_equal(be3, answers["be3"]) 24 | # assert(np.all(output[1] == answers['be'])) 25 | -------------------------------------------------------------------------------- /tests/splots/test_splots.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from scipy.stats import ks_2samp 4 | 5 | zfit = pytest.importorskip("zfit") 6 | from zfit.loss import ExtendedUnbinnedNLL 7 | from zfit.minimize import Minuit 8 | 9 | from hepstats.splot import compute_sweights 10 | from hepstats.splot.sweights import is_sum_of_extended_pdfs 11 | from hepstats.utils.fit import get_value 12 | from hepstats.splot.exceptions import ModelNotFittedToData 13 | from hepstats.splot.warnings import AboveToleranceWarning 14 | 15 | 16 | def get_data_and_loss(): 17 | bounds = (0.0, 3.0) 18 | obs = zfit.Space("x", limits=bounds) 19 | nbkg = 10000 20 | nsig = 5000 21 | 22 | # Data and signal 23 | def get_sel(arr): 24 | return (arr > bounds[0]) & (arr < bounds[1]) 25 | 26 | np.random.seed(0) 27 | 28 | N = nsig + nbkg 29 | mean = zfit.Parameter("mean", 1.2, 0.5, 2.0) 30 | sigma = zfit.Parameter("sigma", 0.1, 0.02, 0.3) 31 | lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0) 32 | Nsig = zfit.Parameter("Nsig", nsig, 0.0, N) 33 | Nbkg = zfit.Parameter("Nbkg", nbkg, 0.0, N) 34 | 35 | signal = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma).create_extended(Nsig) 36 | background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg) 37 | tot_model = zfit.pdf.SumPDF([signal, background]) 38 | 39 | bkg = background.sample(nbkg, params={lambda_: -2.1}) 40 | 41 | peak = signal.sample(nsig, params={sigma: 0.2}) 42 | 43 | mass = np.concatenate((bkg["x"], peak["x"])) 44 | 45 | bck_p = np.random.normal(3, 1, size=nbkg) 46 | sig_p = np.random.normal(5, 1, size=nsig) 47 | p = np.concatenate([bck_p, sig_p]) 48 | 49 | data = zfit.data.concat([bkg, peak], axis="index") 50 | 51 | loss = ExtendedUnbinnedNLL(model=tot_model, data=data) 52 | 53 | return mass, p, loss, Nsig, Nbkg, sig_p, bck_p 54 | 55 | 56 | def test_sweights_constructor(): 57 | mass, p, loss, Nsig, Nbkg, sig_p, bkg_p = get_data_and_loss() 58 | 59 | with pytest.raises(ValueError): 60 | compute_sweights("model", mass) 61 | 62 | with pytest.raises(ValueError): 63 | compute_sweights(loss.model[0].get_models()[0], mass) 64 | 65 | 66 | def test_sweights(): 67 | minimizer = Minuit() 68 | mass, p, loss, Nsig, Nbkg, sig_p, bkg_p = get_data_and_loss() 69 | 70 | with pytest.raises(ModelNotFittedToData): 71 | compute_sweights(loss.model[0], mass) 72 | 73 | result = minimizer.minimize(loss) 74 | assert result.valid 75 | 76 | model = loss.model[0] 77 | assert is_sum_of_extended_pdfs(model) 78 | 79 | yields = [Nsig, Nbkg] 80 | 81 | sweights = compute_sweights(loss.model[0], mass) 82 | 83 | assert np.allclose( 84 | [np.sum(sweights[y]) / get_value(y.value()) for y in yields], 1.0 85 | ) 86 | 87 | nbins = 30 88 | hist_conf = dict(bins=nbins, range=[0, 10]) 89 | 90 | hist_sig_true_p, _ = np.histogram(sig_p, **hist_conf) 91 | sel = hist_sig_true_p != 0 92 | hist_sig_true_p = hist_sig_true_p[sel] 93 | hist_sig_sweights_p = np.histogram(p, weights=sweights[Nsig], **hist_conf)[0][sel] 94 | 95 | assert ks_2samp(hist_sig_sweights_p, hist_sig_true_p)[-1] > 0.001 96 | 97 | hist_bkg_true_p, _ = np.histogram(bkg_p, **hist_conf) 98 | sel = hist_bkg_true_p != 0 99 | hist_bkg_true_p = hist_bkg_true_p[sel] 100 | hist_bkg_sweights_p = np.histogram(p, weights=sweights[Nbkg], **hist_conf)[0][sel] 101 | 102 | assert ks_2samp(hist_bkg_sweights_p, hist_bkg_true_p)[-1] > 0.001 103 | 104 | with pytest.warns(AboveToleranceWarning): 105 | compute_sweights( 106 | loss.model[0], np.concatenate([mass, np.random.normal(0.8, 0.1, 100)]) 107 | ) 108 | 109 | with pytest.raises(ModelNotFittedToData): 110 | compute_sweights( 111 | loss.model[0], np.concatenate([mass, np.random.normal(0.8, 0.1, 1000)]) 112 | ) 113 | --------------------------------------------------------------------------------