├── _static └── zfit_workflow.png ├── _website ├── images │ ├── P5p_Value.png │ ├── zfit-favicon.png │ ├── hepstats-pvalue.png │ ├── zfit_workflow_v2.png │ ├── zfit-logo_400x168.png │ └── logo_graph_tensorflow.png ├── _static │ └── zfit_workflow.png ├── tutorials │ ├── _static │ │ └── zfit_workflow.png │ ├── components │ │ ├── 72 - Custom Loss.ipynb │ │ ├── 32 - Templated fits.ipynb │ │ ├── 31 - Fitting a histogram with binned losses.ipynb │ │ ├── README.rst │ │ ├── 20 - Composite Models.ipynb │ │ ├── 80 - Toy Study.ipynb │ │ ├── 60 - Custom PDF.ipynb │ │ ├── 77 - Custom Minimizer.ipynb │ │ ├── 62 - Multidim Custom PDF.ipynb │ │ ├── 71 - Simple Loss.ipynb │ │ ├── 30 - Binned models.ipynb │ │ ├── 40 - Bayesian Inference.ipynb │ │ ├── 90 - Serialization basics.ipynb │ │ ├── 61 - Custom Binned PDF.ipynb │ │ ├── 05 - Exploring the FitResult.ipynb │ │ └── 33 - Binned fits.ipynb │ ├── introduction │ │ ├── README.rst │ │ ├── utils.py │ │ ├── Quickstart.ipynb │ │ └── upgrade_guide_020.ipynb │ ├── guides │ │ ├── README.rst │ │ └── utils.py │ └── TensorFlow │ │ ├── README.rst │ │ └── 01 - TensorFlow-Probability.ipynb ├── Makefile ├── make.bat ├── index.rst └── conf.py ├── .github ├── dependabot.yml └── workflows │ └── ci.yml ├── requirements.txt ├── environment.yml ├── .readthedocs.yaml ├── README.rst ├── LICENSE ├── .gitignore └── .pre-commit-config.yaml /_static/zfit_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zfit/zfit-tutorials/HEAD/_static/zfit_workflow.png -------------------------------------------------------------------------------- /_website/images/P5p_Value.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zfit/zfit-tutorials/HEAD/_website/images/P5p_Value.png -------------------------------------------------------------------------------- /_website/_static/zfit_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zfit/zfit-tutorials/HEAD/_website/_static/zfit_workflow.png -------------------------------------------------------------------------------- /_website/images/zfit-favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zfit/zfit-tutorials/HEAD/_website/images/zfit-favicon.png -------------------------------------------------------------------------------- /_website/images/hepstats-pvalue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zfit/zfit-tutorials/HEAD/_website/images/hepstats-pvalue.png -------------------------------------------------------------------------------- /_website/images/zfit_workflow_v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zfit/zfit-tutorials/HEAD/_website/images/zfit_workflow_v2.png -------------------------------------------------------------------------------- /_website/images/zfit-logo_400x168.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zfit/zfit-tutorials/HEAD/_website/images/zfit-logo_400x168.png -------------------------------------------------------------------------------- /_website/images/logo_graph_tensorflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zfit/zfit-tutorials/HEAD/_website/images/logo_graph_tensorflow.png -------------------------------------------------------------------------------- /_website/tutorials/_static/zfit_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zfit/zfit-tutorials/HEAD/_website/tutorials/_static/zfit_workflow.png -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | 4 | - package-ecosystem: "github-actions" 5 | directory: "/" 6 | schedule: 7 | # Check for updates to GitHub Actions every month 8 | interval: "monthly" 9 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | hepstats 2 | hepunits 3 | hist 4 | jupyter-book # docs 5 | matplotlib 6 | mplhep 7 | myst-nb 8 | nbval 9 | numba 10 | numpy 11 | pandas 12 | particle 13 | scikit-hep-testdata 14 | seaborn 15 | sphinx>5 16 | sphinx-book-theme 17 | sphinx-copybutton 18 | sphinx-panels # docs 19 | sphinx-thebe 20 | sphinx-togglebutton 21 | sysrsync # docs 22 | zfit-physics>=0.6 23 | zfit[all]>=0.28 24 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: zfit-tutorials 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.12 6 | - hepunits 7 | - matplotlib 8 | - numpy 9 | - numba 10 | - pandas 11 | - particle 12 | - pip 13 | - uv 14 | - scipy 15 | - xgboost 16 | - mplhep 17 | - hist 18 | - scikit-hep-testdata 19 | - hepstats 20 | - zfit >=0.26 21 | - zfit-physics 22 | - pip: 23 | - formulate 24 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | build: 8 | os: ubuntu-lts-latest 9 | tools: 10 | python: "3.12" 11 | 12 | # Build documentation in the docs/ directory with Sphinx 13 | sphinx: 14 | configuration: _website/conf.py 15 | 16 | formats: [ ] 17 | 18 | python: 19 | install: 20 | - requirements: requirements.txt 21 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | 2 | `Interactive zfit tutorials `_ 3 | ================================================================================ 4 | 5 | `Click here to view, interactively execute or download the tutorials `_ 6 | 7 | 8 | Tutorials for the zfit project 9 | ---------------------------------- 10 | 11 | If you have a good use case or a notebook that demonstrates something of zfit, please let us know, 12 | contributions are very welcome! 13 | 14 | The whole repository can also be launched in binder here (instead of over the website) 15 | 16 | .. image:: https://mybinder.org/badge.svg 17 | :target: https://mybinder.org/v2/gh/zfit/zfit-tutorials/main 18 | -------------------------------------------------------------------------------- /_website/tutorials/components/72 - Custom Loss.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Custom Loss\n", 8 | "\n", 9 | "\n", 10 | "Building a custom loss function in zfit." 11 | ] 12 | } 13 | ], 14 | "metadata": { 15 | "kernelspec": { 16 | "display_name": "Python 3 (ipykernel)", 17 | "language": "python", 18 | "name": "python3" 19 | }, 20 | "language_info": { 21 | "codemirror_mode": { 22 | "name": "ipython", 23 | "version": 3 24 | }, 25 | "file_extension": ".py", 26 | "mimetype": "text/x-python", 27 | "name": "python", 28 | "nbconvert_exporter": "python", 29 | "pygments_lexer": "ipython3", 30 | "version": "3.10.4" 31 | } 32 | }, 33 | "nbformat": 4, 34 | "nbformat_minor": 1 35 | } 36 | -------------------------------------------------------------------------------- /_website/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | CURRENT_DIR = dirname -- "$( readlink -f -- "$0"; )" 11 | export ZFIT_TUTORIAL_ROOT = "$( CURRENT_DIR )/.." 12 | # Available for all targets 13 | # Put it first so that "make" without argument is like "make help". 14 | help: 15 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 16 | 17 | .PHONY: help Makefile 18 | 19 | # Catch-all target: route all unknown targets to Sphinx using the new 20 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 21 | %: Makefile 22 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 23 | -------------------------------------------------------------------------------- /_website/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /_website/tutorials/introduction/README.rst: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | Welcome to zfit! 5 | 6 | 7 | Quickstart with zfit 8 | +++++++++++++++++++++ 9 | 10 | Short introduction into zfit with a minimal example. 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | 15 | Quickstart.ipynb 16 | 17 | Complete Tutorial 18 | +++++++++++++++++ 19 | 20 | A comprehensive, modern introduction that explains the elementary aspects of zfit. 21 | 22 | .. toctree:: 23 | :maxdepth: 2 24 | 25 | Introduction.ipynb 26 | 27 | Extended Tutorial 28 | +++++++++++++++++ 29 | 30 | A longer, more detailed introduction covering additional topics. 31 | 32 | .. toctree:: 33 | :maxdepth: 2 34 | 35 | Introduction_long.ipynb 36 | 37 | What's new 38 | +++++++++++ 39 | 40 | To know about the newest features in version 0.20+, see the changelog and the guide. 41 | 42 | .. toctree:: 43 | :maxdepth: 2 44 | 45 | upgrade_guide_020.ipynb 46 | -------------------------------------------------------------------------------- /_website/tutorials/guides/README.rst: -------------------------------------------------------------------------------- 1 | Guides 2 | ============ 3 | 4 | These guides, or applied examples, are larger tutorials 5 | that include several advanced aspects of zfit. 6 | 7 | 8 | Custom model guide 9 | +++++++++++++++++++ 10 | 11 | From building a simple custom model to multidimensional models of an angular analysis and functors that depend 12 | on other PDFs and a whole explanation on how models work internally. 13 | 14 | .. toctree:: 15 | :maxdepth: 2 16 | 17 | constraints_simultaneous_fit_discovery_splot.ipynb 18 | 19 | 20 | Constraints, simultaneous fits, discovery and sPlot 21 | ++++++++++++++++++++++++++++++++++++++++++++++++++++ 22 | 23 | Adding additional knowledge to fits can be done with constraints or through simultaneous fits. Furthermore, 24 | how to make a discovery and use the sPlot technique in conjunction with hepstats is explained. 25 | 26 | 27 | .. toctree:: 28 | :maxdepth: 2 29 | 30 | custom_models.ipynb 31 | -------------------------------------------------------------------------------- /_website/tutorials/components/32 - Templated fits.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "pycharm": { 7 | "name": "#%%\n" 8 | }, 9 | "tags": [] 10 | }, 11 | "outputs": [], 12 | "source": [ 13 | "# Templated fits\n", 14 | "\n", 15 | "This tutorial is currently under development. Please check back later for content on templated fitting techniques in zfit." 16 | ] 17 | } 18 | ], 19 | "metadata": { 20 | "kernelspec": { 21 | "display_name": "Python 3 (ipykernel)", 22 | "language": "python", 23 | "name": "python3" 24 | }, 25 | "language_info": { 26 | "codemirror_mode": { 27 | "name": "ipython", 28 | "version": 3 29 | }, 30 | "file_extension": ".py", 31 | "mimetype": "text/x-python", 32 | "name": "python", 33 | "nbconvert_exporter": "python", 34 | "pygments_lexer": "ipython3", 35 | "version": "3.10.4" 36 | } 37 | }, 38 | "nbformat": 4, 39 | "nbformat_minor": 4 40 | } 41 | -------------------------------------------------------------------------------- /_website/tutorials/TensorFlow/README.rst: -------------------------------------------------------------------------------- 1 | HPC with TensorFlow 2 | ======================= 3 | 4 | TensorFlow is a high performance computing library that is mainly 5 | used for Machine Learning on large datasets. 6 | 7 | It also serves as the backend of zfit as it allows to 8 | speedup and parallelize computations through different 9 | techniques. 10 | 11 | This tutorials cover TensorFlow as a computing engine. 12 | 13 | HPC in Python 14 | +++++++++++++++++++++++++++++++++++++++++++ 15 | 16 | 17 | Introduction to TensorFlow from a HPC perspective with explanations of the graph and comparison 18 | to other frameworks such as Numpy and Numba. 19 | 20 | 21 | .. toctree:: 22 | :maxdepth: 2 23 | 24 | HPC_with_TensorFlow.ipynb 25 | 26 | Statistics with TensorFlow 27 | ++++++++++++++++++++++++++++ 28 | 29 | 30 | Introduces TensorFlow Probability, a framework to do probabilistic reasoning in TensorFlow 31 | 32 | .. toctree:: 33 | :maxdepth: 2 34 | 35 | 01 - TensorFlow-Probability.ipynb 36 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: tests 2 | 3 | concurrency: 4 | group: ${{ github.ref }} 5 | cancel-in-progress: true 6 | 7 | on: 8 | workflow_dispatch: 9 | push: 10 | 11 | jobs: 12 | notebooks: 13 | runs-on: ${{ matrix.os }} 14 | timeout-minutes: 150 15 | strategy: 16 | max-parallel: 8 17 | fail-fast: False 18 | matrix: 19 | os: [ ubuntu-latest ] 20 | use-graph: [ 0, 1 ] 21 | python-version: [ "3.10", "3.13" ] 22 | include: 23 | - os: macos-latest 24 | python-version: "3.13" 25 | use-graph: 1 26 | - os: windows-latest 27 | python-version: "3.13" 28 | use-graph: 1 29 | name: Run notebooks, ${{ matrix.os }}, Python ${{ matrix.python-version }}, compiled = ${{ matrix.use-graph }} 30 | steps: 31 | - uses: actions/checkout@v5 32 | - uses: actions/setup-python@v6 33 | name: Set up Python ${{ matrix.python-version }} 34 | with: 35 | python-version: ${{ matrix.python-version }} 36 | - uses: astral-sh/setup-uv@v7 37 | with: 38 | enable-cache: true 39 | - shell: bash -l {0} 40 | name: Install dependencies 41 | run: | 42 | uv pip install --system -r requirements.txt pytest-xdist nbval git+https://github.com/zfit/zfit 43 | - name: Run notebooks 44 | run: | 45 | ZFIT_DO_JIT=${{ matrix.use-graph }} 46 | pytest --nbval-lax --dist loadscope -n auto 47 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2025, zfit 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | /_website/_build/ 106 | /_website/.jupiter_cache/ 107 | /.idea/** 108 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | ci: 2 | autoupdate_schedule: quarterly 3 | 4 | repos: 5 | - repo: https://github.com/pre-commit/pre-commit-hooks 6 | rev: v6.0.0 7 | hooks: 8 | - id: check-added-large-files 9 | - id: check-case-conflict 10 | - id: check-merge-conflict 11 | - id: check-symlinks 12 | - id: check-yaml 13 | - id: check-toml 14 | - id: debug-statements 15 | - id: mixed-line-ending 16 | - id: requirements-txt-fixer 17 | - id: detect-private-key 18 | - id: fix-byte-order-marker 19 | - id: check-ast 20 | - id: end-of-file-fixer 21 | 22 | 23 | - repo: https://github.com/pre-commit/pygrep-hooks 24 | rev: v1.10.0 # Use the ref you want to point at 25 | hooks: 26 | - id: python-use-type-annotations 27 | - id: python-check-mock-methods 28 | - id: python-no-eval 29 | - id: rst-directive-colons 30 | 31 | 32 | - repo: https://github.com/asottile/pyupgrade 33 | rev: v3.20.0 34 | hooks: 35 | - id: pyupgrade 36 | args: [ --py310-plus ] 37 | 38 | - repo: https://github.com/asottile/setup-cfg-fmt 39 | rev: v2.8.0 40 | hooks: 41 | - id: setup-cfg-fmt 42 | 43 | # Notebook formatting 44 | - repo: https://github.com/nbQA-dev/nbQA 45 | rev: 1.9.1 46 | hooks: 47 | - id: nbqa-isort 48 | additional_dependencies: [ isort ] 49 | 50 | - id: nbqa-pyupgrade 51 | additional_dependencies: [ pyupgrade] 52 | args: [ --py310-plus ] 53 | 54 | - repo: https://github.com/sondrelg/pep585-upgrade 55 | rev: 'v1.0' 56 | hooks: 57 | - id: upgrade-type-hints 58 | 59 | - repo: https://github.com/janosh/format-ipy-cells 60 | rev: v0.1.11 61 | hooks: 62 | - id: format-ipy-cells 63 | 64 | - repo: https://github.com/kynan/nbstripout 65 | rev: 0.8.1 66 | hooks: 67 | - id: nbstripout 68 | -------------------------------------------------------------------------------- /_website/index.rst: -------------------------------------------------------------------------------- 1 | ============== 2 | zfit tutorials 3 | ============== 4 | 5 | A collection of interactive guides and tutorials 6 | for the zfit project. 7 | 8 | 9 | 10 | .. panels:: 11 | :header: text-center 12 | :img-top-cls: pl-2 pr-2 bw-success 13 | 14 | :img-top: images/P5p_Value.png 15 | 16 | .. link-button:: tutorials/introduction/README 17 | :type: ref 18 | :text: Introduction 19 | :classes: btn-outline-primary btn-block stretched-link 20 | 21 | 22 | --- 23 | :img-top-cls: + pt-4 24 | :img-top: images/zfit_workflow_v2.png 25 | .. link-button:: tutorials/components/README 26 | :type: ref 27 | :text: Components 28 | :classes: btn-outline-primary btn-block stretched-link 29 | 30 | --- 31 | :img-top: images/hepstats-pvalue.png 32 | 33 | .. link-button:: tutorials/guides/README 34 | :type: ref 35 | :text: Guides 36 | :classes: btn-outline-primary btn-block stretched-link 37 | 38 | --- 39 | :img-top-cls: + pt-4 40 | :img-top: images/logo_graph_tensorflow.png 41 | 42 | .. link-button:: tutorials/TensorFlow/README 43 | :type: ref 44 | :text: TensorFlow 45 | :classes: btn-outline-primary btn-block stretched-link 46 | 47 | 48 | .. toctree:: 49 | :maxdepth: 2 50 | 51 | tutorials/introduction/README 52 | tutorials/components/README 53 | tutorials/guides/README 54 | tutorials/TensorFlow/README 55 | 56 | 57 | 58 | | **Components** focuse on a specific component of zfit. They are rather short and function as a lookup 59 | | **guides** are more extensive notebooks that go through several aspects of zfit and combine it with hepstats. 60 | 61 | 62 | They can all be launched (rocket button upper right), but you can also launch all of them in 63 | 64 | .. image:: https://mybinder.org/badge.svg 65 | :target: https://mybinder.org/v2/gh/zfit/zfit-tutorials/main 66 | -------------------------------------------------------------------------------- /_website/tutorials/components/31 - Fitting a histogram with binned losses.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "pycharm": { 7 | "name": "#%% md\n" 8 | } 9 | }, 10 | "source": [ 11 | "# Histograms in zfit\n", 12 | "\n", 13 | "We can fit histograms directly loaded from a root histogram.\n" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": { 20 | "jupyter": { 21 | "outputs_hidden": false 22 | }, 23 | "pycharm": { 24 | "name": "#%%\n" 25 | } 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "import mplhep\n", 30 | "import uproot\n", 31 | "import zfit\n", 32 | "from skhep_testdata import data_path\n", 33 | "\n", 34 | "filename = data_path(\"uproot-histograms.root\", raise_missing=False)" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": { 41 | "jupyter": { 42 | "outputs_hidden": false 43 | }, 44 | "pycharm": { 45 | "name": "#%%\n" 46 | } 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "histos = [h.to_hist() for h in uproot.open(filename).values()]" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": { 57 | "jupyter": { 58 | "outputs_hidden": false 59 | }, 60 | "pycharm": { 61 | "name": "#%%\n" 62 | } 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "data = zfit.data.BinnedData.from_hist(histos[0])" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "mplhep.histplot(histos[0])" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": { 82 | "jupyter": { 83 | "outputs_hidden": false 84 | }, 85 | "pycharm": { 86 | "name": "#%%\n" 87 | } 88 | }, 89 | "outputs": [], 90 | "source": [] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": { 96 | "jupyter": { 97 | "outputs_hidden": false 98 | }, 99 | "pycharm": { 100 | "name": "#%%\n" 101 | } 102 | }, 103 | "outputs": [], 104 | "source": [] 105 | } 106 | ], 107 | "metadata": { 108 | "kernelspec": { 109 | "display_name": "Python 3 (ipykernel)", 110 | "language": "python", 111 | "name": "python3" 112 | }, 113 | "language_info": { 114 | "codemirror_mode": { 115 | "name": "ipython", 116 | "version": 3 117 | }, 118 | "file_extension": ".py", 119 | "mimetype": "text/x-python", 120 | "name": "python", 121 | "nbconvert_exporter": "python", 122 | "pygments_lexer": "ipython3", 123 | "version": "3.10.4" 124 | } 125 | }, 126 | "nbformat": 4, 127 | "nbformat_minor": 4 128 | } 129 | -------------------------------------------------------------------------------- /_website/tutorials/guides/utils.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | 4 | def plotlimit(ul, alpha=0.05, CLs=True, ax=None): 5 | """Plot pvalue scan for different values of a parameter of interest (observed, expected and +/- sigma bands) 6 | 7 | Args: 8 | ul: UpperLimit instance 9 | alpha (float, default=0.05): significance level 10 | CLs (bool, optional): if `True` uses pvalues as $$p_{cls}=p_{null}/p_{alt}=p_{clsb}/p_{clb}$$ 11 | else as $$p_{clsb} = p_{null}$ 12 | ax (matplotlib axis, optionnal) 13 | """ 14 | if ax is None: 15 | ax = plt.gca() 16 | 17 | poivalues = ul.poinull.values 18 | pvalues = ul.pvalues(CLs=CLs) 19 | 20 | if CLs: 21 | cls_clr = "r" 22 | clsb_clr = "b" 23 | else: 24 | cls_clr = "b" 25 | clsb_clr = "r" 26 | 27 | color_1sigma = "mediumseagreen" 28 | color_2sigma = "gold" 29 | 30 | ax.plot( 31 | poivalues, 32 | pvalues["cls"], 33 | label="Observed CL$_{s}$", 34 | marker=".", 35 | color="k", 36 | markerfacecolor=cls_clr, 37 | markeredgecolor=cls_clr, 38 | linewidth=2.0, 39 | ms=11, 40 | ) 41 | 42 | ax.plot( 43 | poivalues, 44 | pvalues["clsb"], 45 | label="Observed CL$_{s+b}$", 46 | marker=".", 47 | color="k", 48 | markerfacecolor=clsb_clr, 49 | markeredgecolor=clsb_clr, 50 | linewidth=2.0, 51 | ms=11, 52 | linestyle=":", 53 | ) 54 | 55 | ax.plot( 56 | poivalues, 57 | pvalues["clb"], 58 | label="Observed CL$_{b}$", 59 | marker=".", 60 | color="k", 61 | markerfacecolor="k", 62 | markeredgecolor="k", 63 | linewidth=2.0, 64 | ms=11, 65 | ) 66 | 67 | ax.plot( 68 | poivalues, 69 | pvalues["expected"], 70 | label="Expected CL$_{s}-$Median", 71 | color="k", 72 | linestyle="--", 73 | linewidth=1.5, 74 | ms=10, 75 | ) 76 | 77 | ax.plot( 78 | [poivalues[0], poivalues[-1]], 79 | [alpha, alpha], 80 | color="r", 81 | linestyle="-", 82 | linewidth=1.5, 83 | ) 84 | 85 | ax.fill_between( 86 | poivalues, 87 | pvalues["expected"], 88 | pvalues["expected_p1"], 89 | facecolor=color_1sigma, 90 | label="Expected CL$_{s} \\pm 1 \\sigma$", 91 | alpha=0.8, 92 | ) 93 | 94 | ax.fill_between( 95 | poivalues, 96 | pvalues["expected"], 97 | pvalues["expected_m1"], 98 | facecolor=color_1sigma, 99 | alpha=0.8, 100 | ) 101 | 102 | ax.fill_between( 103 | poivalues, 104 | pvalues["expected_p1"], 105 | pvalues["expected_p2"], 106 | facecolor=color_2sigma, 107 | label="Expected CL$_{s} \\pm 2 \\sigma$", 108 | alpha=0.8, 109 | ) 110 | 111 | ax.fill_between( 112 | poivalues, 113 | pvalues["expected_m1"], 114 | pvalues["expected_m2"], 115 | facecolor=color_2sigma, 116 | alpha=0.8, 117 | ) 118 | 119 | ax.set_ylim(-0.01, 1.1) 120 | ax.set_ylabel("p-value") 121 | ax.set_xlabel("parameter of interest") 122 | ax.legend(loc="best", fontsize=14) 123 | 124 | return ax 125 | 126 | 127 | def one_minus_cl_plot(ci, alpha=[0.32], ax=None): 128 | x = ci.poinull.values 129 | pvalues = ci.pvalues() 130 | 131 | if ax is None: 132 | ax = plt.gca() 133 | 134 | ax.plot(x, pvalues, ".--") 135 | for a in alpha: 136 | ax.axhline(a, color="red", label="$\\alpha = " + str(a) + "$") 137 | ax.set_ylabel("1-CL") 138 | 139 | return ax 140 | -------------------------------------------------------------------------------- /_website/tutorials/introduction/utils.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | 4 | def plotlimit(ul, alpha=0.05, CLs=True, ax=None): 5 | """Plot pvalue scan for different values of a parameter of interest (observed, expected and +/- sigma bands) 6 | 7 | Args: 8 | ul: UpperLimit instance 9 | alpha (float, default=0.05): significance level 10 | CLs (bool, optional): if `True` uses pvalues as $$p_{cls}=p_{null}/p_{alt}=p_{clsb}/p_{clb}$$ 11 | else as $$p_{clsb} = p_{null}$ 12 | ax (matplotlib axis, optionnal) 13 | """ 14 | if ax is None: 15 | ax = plt.gca() 16 | 17 | poivalues = ul.poinull.values 18 | pvalues = ul.pvalues(CLs=CLs) 19 | 20 | if CLs: 21 | cls_clr = "r" 22 | clsb_clr = "b" 23 | else: 24 | cls_clr = "b" 25 | clsb_clr = "r" 26 | 27 | color_1sigma = "mediumseagreen" 28 | color_2sigma = "gold" 29 | 30 | ax.plot( 31 | poivalues, 32 | pvalues["cls"], 33 | label="Observed CL$_{s}$", 34 | marker=".", 35 | color="k", 36 | markerfacecolor=cls_clr, 37 | markeredgecolor=cls_clr, 38 | linewidth=2.0, 39 | ms=11, 40 | ) 41 | 42 | ax.plot( 43 | poivalues, 44 | pvalues["clsb"], 45 | label="Observed CL$_{s+b}$", 46 | marker=".", 47 | color="k", 48 | markerfacecolor=clsb_clr, 49 | markeredgecolor=clsb_clr, 50 | linewidth=2.0, 51 | ms=11, 52 | linestyle=":", 53 | ) 54 | 55 | ax.plot( 56 | poivalues, 57 | pvalues["clb"], 58 | label="Observed CL$_{b}$", 59 | marker=".", 60 | color="k", 61 | markerfacecolor="k", 62 | markeredgecolor="k", 63 | linewidth=2.0, 64 | ms=11, 65 | ) 66 | 67 | ax.plot( 68 | poivalues, 69 | pvalues["expected"], 70 | label="Expected CL$_{s}-$Median", 71 | color="k", 72 | linestyle="--", 73 | linewidth=1.5, 74 | ms=10, 75 | ) 76 | 77 | ax.plot( 78 | [poivalues[0], poivalues[-1]], 79 | [alpha, alpha], 80 | color="r", 81 | linestyle="-", 82 | linewidth=1.5, 83 | ) 84 | 85 | ax.fill_between( 86 | poivalues, 87 | pvalues["expected"], 88 | pvalues["expected_p1"], 89 | facecolor=color_1sigma, 90 | label="Expected CL$_{s} \\pm 1 \\sigma$", 91 | alpha=0.8, 92 | ) 93 | 94 | ax.fill_between( 95 | poivalues, 96 | pvalues["expected"], 97 | pvalues["expected_m1"], 98 | facecolor=color_1sigma, 99 | alpha=0.8, 100 | ) 101 | 102 | ax.fill_between( 103 | poivalues, 104 | pvalues["expected_p1"], 105 | pvalues["expected_p2"], 106 | facecolor=color_2sigma, 107 | label="Expected CL$_{s} \\pm 2 \\sigma$", 108 | alpha=0.8, 109 | ) 110 | 111 | ax.fill_between( 112 | poivalues, 113 | pvalues["expected_m1"], 114 | pvalues["expected_m2"], 115 | facecolor=color_2sigma, 116 | alpha=0.8, 117 | ) 118 | 119 | ax.set_ylim(-0.01, 1.1) 120 | ax.set_ylabel("p-value") 121 | ax.set_xlabel("parameter of interest") 122 | ax.legend(loc="best", fontsize=14) 123 | 124 | return ax 125 | 126 | 127 | def one_minus_cl_plot(ci, alpha=[0.32], ax=None): 128 | x = ci.poinull.values 129 | pvalues = ci.pvalues() 130 | 131 | if ax is None: 132 | ax = plt.gca() 133 | 134 | ax.plot(x, pvalues, ".--") 135 | for a in alpha: 136 | ax.axhline(a, color="red", label="$\\alpha = " + str(a) + "$") 137 | ax.set_ylabel("1-CL") 138 | 139 | return ax 140 | -------------------------------------------------------------------------------- /_website/tutorials/components/README.rst: -------------------------------------------------------------------------------- 1 | Components 2 | ============ 3 | 4 | This tutorials introduce in more depth specific components 5 | of zfit. 6 | 7 | 05 FitResult 8 | ++++++++++++++ 9 | 10 | .. toctree:: 11 | :maxdepth: 2 12 | 13 | 05 - Exploring the FitResult.ipynb 14 | 15 | 16 | 13 Kernel Density Estimation 17 | ++++++++++++++++++++++++++++++++ 18 | 19 | .. toctree:: 20 | :maxdepth: 2 21 | 22 | 13 - Kernel Density Estimation.ipynb 23 | 24 | 20 Composite Models 25 | +++++++++++++++++++++++ 26 | 27 | Building models out of other models using sums, products and more is an essential part of model building. 28 | This tutorial starts with the basics of it. 29 | 30 | .. toctree:: 31 | :maxdepth: 2 32 | 33 | 20 - Composite Models.ipynb 34 | 35 | 30 Binned Models 36 | +++++++++++++++++++++++ 37 | 38 | Binned models and data handle differently than their unbinned counterparts. 39 | 40 | .. toctree:: 41 | :maxdepth: 2 42 | 43 | 30 - Binned models.ipynb 44 | 31 - Fitting a histogram with binned losses.ipynb 45 | 32 - Templated fits.ipynb 46 | 33 - Binned fits.ipynb 47 | 48 | 40 Bayesian Inference 49 | ++++++++++++++++++++++++++++ 50 | 51 | Bayesian inference is a powerful tool to infer parameters given data and a model. 52 | 53 | .. toctree:: 54 | :maxdepth: 2 55 | 56 | 40 - Bayesian Inference.ipynb 57 | 58 | 59 | 50 Custom code and different run modes 60 | ++++++++++++++++++++++++++++++++++++++++ 61 | 62 | 63 | .. toctree:: 64 | :maxdepth: 2 65 | 66 | 50 - Custom code and run mode.ipynb 67 | 68 | 69 | 70 | 60 Custom PDF 71 | +++++++++++++++++++++++ 72 | 73 | Being able to build a custom model simply is an essential feature of zfit. 74 | This tutorial introduces the two main ways of doing it, a simpler and a more advanced, more flexible way. 75 | 76 | 77 | .. toctree:: 78 | :maxdepth: 2 79 | 80 | 60 - Custom PDF.ipynb 81 | 82 | 61 Custom Binned PDF 83 | ++++++++++++++++++++++++++++ 84 | 85 | Building a binned pdf in zfit. 86 | 87 | .. toctree:: 88 | :maxdepth: 2 89 | 90 | 61 - Custom Binned PDF.ipynb 91 | 92 | 93 | 62 Multidimensional custom PDF 94 | ++++++++++++++++++++++++++++++++++++++++++++++ 95 | 96 | Building a pdf in multiple dimensions and registering an analytic integral. 97 | 98 | .. toctree:: 99 | :maxdepth: 2 100 | 101 | 62 - Multidim Custom PDF.ipynb 102 | 103 | 71 - Simple Loss 104 | ++++++++++++++++++++++++++++ 105 | 106 | A simple loss doesn't need a distribution or data, it just needs a function to minimize. 107 | 108 | .. toctree:: 109 | :maxdepth: 2 110 | 111 | 71 - Simple Loss.ipynb 112 | 113 | 72 - Custom Loss 114 | ++++++++++++++++++++++++++++ 115 | 116 | Building a custom loss function in zfit. 117 | 118 | .. toctree:: 119 | :maxdepth: 2 120 | 121 | 72 - Custom Loss.ipynb 122 | 123 | 124 | 77 - Custom Minimizers 125 | ++++++++++++++++++++++++++++ 126 | 127 | Building a custom minimizer in zfit. 128 | 129 | .. toctree:: 130 | :maxdepth: 2 131 | 132 | 77 - Custom Minimizer.ipynb 133 | 134 | 135 | 136 | 80 Toy Study 137 | ++++++++++++ 138 | 139 | A minimal example of how to manually perform toy studies with zfit. 140 | 141 | .. toctree:: 142 | :maxdepth: 2 143 | 144 | 80 - Toy Study.ipynb 145 | 146 | 90 Serialization 147 | ++++++++++++++++ 148 | 149 | There are multiple ways of serializing zfit objects, this tutorial introduces them. 150 | 151 | .. warning:: Parts of it, namely the HS3-like human-readable serialization is still highly experimental and will change in every release. 152 | 153 | .. toctree:: 154 | :maxdepth: 2 155 | 156 | 90 - Serialization basics.ipynb 157 | 158 | If any component is missing, please open an issue on `github `_. 159 | -------------------------------------------------------------------------------- /_website/conf.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | """Configuration file for the Sphinx documentation builder. 3 | 4 | This file only contains a selection of the most common options. For a full list see the documentation: 5 | https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | """ 7 | 8 | import os 9 | 10 | # -- Project information ----------------------------------------------------- 11 | import sys 12 | from pathlib import Path 13 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1" 14 | 15 | 16 | project_dir = Path(__file__).parents[1] 17 | sys.path.insert(0, str(project_dir)) 18 | os.environ["ZFIT_TUTORIALS_ROOT"] = f"{project_dir}/" 19 | 20 | project = "zfit" 21 | package = "zfit-tutorials" 22 | repo_name = "zfit-tutorials" 23 | copyright = "2021, zfit" 24 | author = "zfit" 25 | 26 | # sphinx can't handle relative pathes, so add repo as symlink 27 | project_dir = Path(__file__).parents[1] 28 | 29 | # -- General configuration --------------------------------------------------- 30 | 31 | html_logo = str(project_dir.joinpath("_website", "images", "zfit-logo_400x168.png")) 32 | source_suffix = { 33 | ".ipynb": "myst-nb", 34 | ".md": "myst-nb", 35 | ".rst": "restructuredtext", 36 | } 37 | 38 | # The master toctree document. 39 | master_doc = "index" 40 | modindex_common_prefix = [ 41 | f"{package}.", 42 | ] 43 | 44 | extensions = [ 45 | "myst_nb", 46 | "sphinx.ext.autodoc", 47 | "sphinx.ext.autosectionlabel", 48 | "sphinx.ext.doctest", 49 | "sphinx.ext.intersphinx", 50 | "sphinx.ext.mathjax", 51 | "sphinx.ext.napoleon", 52 | "sphinx.ext.viewcode", 53 | "sphinx_copybutton", 54 | "sphinx_panels", 55 | "sphinx_thebe", 56 | "sphinx_togglebutton", 57 | ] 58 | exclude_patterns = [ 59 | "**.ipynb_checkpoints", 60 | "*build", 61 | "adr*", 62 | "tests", 63 | ".jupiter_cache", 64 | ".jupiter_cache/*", 65 | ".jupiter_cache/**/*", 66 | ] 67 | 68 | html_copy_source = True # needed for download notebook button 69 | html_favicon = str(project_dir.joinpath("_website", "images", "zfit-favicon.png")) 70 | html_show_copyright = False 71 | html_show_sourcelink = False 72 | html_show_sphinx = False 73 | html_sourcelink_suffix = "" 74 | html_static_path = ["_static"] 75 | html_theme = "sphinx_book_theme" 76 | html_theme_options = { 77 | "repository_url": f"https://github.com/zfit/{repo_name}", 78 | "repository_branch": "main", 79 | "path_to_docs": "_website", 80 | "use_download_button": True, 81 | "use_edit_page_button": True, 82 | "use_issues_button": True, 83 | "use_repository_button": True, 84 | "launch_buttons": { 85 | "binderhub_url": "https://mybinder.org", 86 | "colab_url": "https://colab.research.google.com", 87 | "notebook_interface": "jupyterlab", 88 | "thebe": True, 89 | "thebelab": True, 90 | }, 91 | } 92 | html_title = "tutorials" 93 | panels_add_bootstrap_css = False # wider page width with sphinx-panels 94 | pygments_style = "sphinx" 95 | todo_include_todos = False 96 | 97 | # Cross-referencing configuration 98 | default_role = "py:obj" 99 | primary_domain = "py" 100 | # nitpicky = True # warn if cross-references are missing 101 | # nitpick_ignore = [ 102 | # ("py:class", "tensorflow.keras.losses.Loss"), 103 | # ("py:class", "tensorflow.python.keras.losses.Loss"), 104 | # ("py:obj", "Loss"), 105 | # ] 106 | 107 | # Settings for copybutton 108 | copybutton_prompt_is_regexp = True 109 | copybutton_prompt_text = r">>> |\.\.\. " # doctest 110 | 111 | # Settings for myst_nb 112 | nb_execution_timeout = -1 113 | nb_output_stderr = "remove" 114 | # Priority overrides for mime types (lower number = higher priority) 115 | nb_mime_priority_overrides = [ 116 | ("html", "application/vnd.jupyter.widget-view+json", 0), 117 | ("html", "application/javascript", 10), 118 | ("html", "text/html", 20), 119 | ("html", "image/svg+xml", 30), 120 | ("html", "image/png", 40), 121 | ("html", "image/jpeg", 50), 122 | ("html", "text/markdown", 60), 123 | ("html", "text/latex", 70), 124 | ("html", "text/plain", 80), 125 | ] 126 | 127 | nb_execution_mode = "cache" 128 | # jupyter_execute_notebooks = "force" 129 | nb_execution_excludepatterns = [ 130 | "B2KstLL.py", 131 | "*_website*", 132 | "kstmumu_tutorial.py", 133 | "API.ipynb", 134 | "Summary.ipynb", 135 | ] 136 | jupyter_cache_path = project_dir.joinpath("_website", ".jupiter_cache") 137 | # remove cache directory to clean if needed 138 | # shutil.rmtree(jupyter_cache_path, ignore_errors=True) 139 | jupyter_cache_path.mkdir(exist_ok=True) 140 | if nb_execution_mode == "cache": 141 | nb_execution_cache_path = str(jupyter_cache_path) 142 | 143 | # Settings for myst-parser 144 | myst_enable_extensions = [ 145 | "amsmath", 146 | "colon_fence", 147 | "dollarmath", 148 | "smartquotes", 149 | "html_image", 150 | ] 151 | myst_update_mathjax = False 152 | 153 | # Settings for Thebe cell output 154 | thebe_config = { 155 | "repository_url": html_theme_options["repository_url"], 156 | "repository_branch": html_theme_options["repository_branch"], 157 | } 158 | -------------------------------------------------------------------------------- /_website/tutorials/TensorFlow/01 - TensorFlow-Probability.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Probability with TensorFlow\n", 8 | "\n", 9 | "While TensorFlow offers some support for statistical inference, TensorFlow-Probability is very strong at this and provides MCMC methods, probability distributions and more." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import matplotlib.pyplot as plt\n", 19 | "import numpy as np\n", 20 | "import tensorflow as tf\n", 21 | "import tensorflow_probability as tfp\n", 22 | "import zfit\n", 23 | "from zfit import z" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "## Distributions\n", 31 | "\n", 32 | "There is a whole collection of different distributions to be found in TFP. They have a minimal and well designed interface, which is similar to the SciPy distributions." 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "tfd = tfp.distributions" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "cauchy = tfd.Cauchy(loc=1., scale=10.)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "sample = cauchy.sample(10)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "cauchy.prob(sample)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "### Mixtures of PDFs\n", 76 | "\n", 77 | "TensorFlow-Probability also supports creating mixtures of different distributions." 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "mix = 0.3\n", 87 | "mix_gauss_cauchy = tfd.Mixture(\n", 88 | " cat=tfd.Categorical(probs=[mix, 1.-mix]),\n", 89 | " components=[\n", 90 | " cauchy,\n", 91 | " tfd.Normal(loc=+1., scale=0.5),\n", 92 | "])" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "sample_mixed = mix_gauss_cauchy.sample(10)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "mix_gauss_cauchy.prob(sample_mixed)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "### Joint distributions\n", 118 | "\n", 119 | "Furthermore, joint distributions of multiple variables are supported." 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "joint = tfd.JointDistributionNamed(dict(\n", 129 | " c= tfd.Cauchy(loc=10., scale=1.),\n", 130 | " n= tfd.Normal(loc=0, scale=2.),\n", 131 | " m=lambda n, c: tfd.Normal(loc=n, scale=c),\n", 132 | "))" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "sample_joint = joint.sample(10)\n", 142 | "sample_joint" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "joint.prob(sample_joint)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "### How TFP compares to zfit\n", 159 | "\n", 160 | "TensorFlow-Probability offers a great choice of distributions to build a model. The flexibility in terms of vectorization and parametrization is larger than in zfit. However, they only provide models with analytically known CDFs and lack any numerical normalization or sampling methods. This excludes any more sophisticated model, convolutions and more.\n", 161 | "\n", 162 | "Internally, zfit simply wraps TFP distributions for certain implementations, such as the `Gauss`. There is also a standard wrapper, `WrapDistribution`, that allows to easily wrap any TFP distribution and use it in zfit." 163 | ] 164 | } 165 | ], 166 | "metadata": { 167 | "kernelspec": { 168 | "display_name": "Python 3 (ipykernel)", 169 | "language": "python", 170 | "name": "python3" 171 | }, 172 | "language_info": { 173 | "codemirror_mode": { 174 | "name": "ipython", 175 | "version": 3 176 | }, 177 | "file_extension": ".py", 178 | "mimetype": "text/x-python", 179 | "name": "python", 180 | "nbconvert_exporter": "python", 181 | "pygments_lexer": "ipython3", 182 | "version": "3.10.4" 183 | } 184 | }, 185 | "nbformat": 4, 186 | "nbformat_minor": 4 187 | } 188 | -------------------------------------------------------------------------------- /_website/tutorials/components/20 - Composite Models.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Composite Models\n", 8 | "\n", 9 | "Models can be created by using functors like `SumPDF`, `ProdPDF`.\n", 10 | "\n", 11 | "There are two ways to create such models, either with the class API or with simple Python syntax.\n", 12 | "\n", 13 | "## Sum PDF\n", 14 | "\n", 15 | "Lets compose a sum of two gaussians by first creating each gaussian and a fraction parameter." 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "import numpy as np\n", 25 | "import zfit" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "frac = zfit.Parameter(\"frac_gauss\", 0.5, 0, 1)\n", 35 | "\n", 36 | "obs1 = zfit.Space('obs1',-5, 5)\n", 37 | "\n", 38 | "mu1 = zfit.Parameter(\"mu1\", 1.)\n", 39 | "sigma1 = zfit.Parameter(\"sigma1\", 1.)\n", 40 | "gauss1 = zfit.pdf.Gauss(obs=obs1, mu=mu1, sigma=sigma1)\n", 41 | "\n", 42 | "mu2 = zfit.Parameter(\"mu2\", 1.)\n", 43 | "sigma2 = zfit.Parameter(\"sigma2\", 1.)\n", 44 | "gauss2 = zfit.pdf.Gauss(obs=obs1, mu=mu2, sigma=sigma2)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "The sum can be created like this:" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "sum_gauss = zfit.pdf.SumPDF(pdfs=[gauss1, gauss2], fracs=frac)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "print(sum_gauss.obs)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "Hereby, the new pdf has the same observables as the daughter pdfs, as they coincide. If they do not, then they are combined (if there is no conflict with the limits). This can be useful to create higher dimensional pdfs." 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "## Product PDF\n", 89 | "\n", 90 | "Let's now create a 2D product of two gaussians. Again, we can choose between the Python syntax and the class API." 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "obs2 = zfit.Space('obs2', -3, 7)\n", 100 | "mu3 = zfit.Parameter(\"mu3\", 1.)\n", 101 | "sigma3 = zfit.Parameter(\"sigma3\", 1.)\n", 102 | "gauss3 = zfit.pdf.Gauss(obs=obs2, mu=mu3, sigma=sigma3) # different obs than above." 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "prod_gauss = zfit.pdf.ProductPDF(pdfs=[gauss1, gauss3])\n", 112 | "prod_gauss_inverted_order = zfit.pdf.ProductPDF(pdfs=[gauss3, gauss1]) # notice the different order of the pdfS!" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "_it is also possible to use the following code, but it should only be used for simple cases and is not recommended for more then two pdfs, since this leads to inefficinet, tree like product structures_:\n", 120 | "`prod_gauss = gauss1 * gauss3` # NOT RECOMMENDED FOR MORE THAN 2 PDFs!" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "The new pdf is now in two dimensions. The order of the observables follows the order of the pdfs given." 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "print(\"python syntax product obs\", prod_gauss.obs)\n", 137 | "print(\"class API product obs\", prod_gauss_inverted_order.obs)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "## creating an extended PDF\n", 145 | "\n", 146 | "An extended PDF can be created using the `extended` argument in the initialization.\n", 147 | " \n", 148 | "Alternatively, an extended PDF from a non-extended PDF can be created with the `create_extended(yield_param)` method." 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "yield1 = zfit.Parameter(\"yield_gauss1\", 100, 0, 1000)\n", 158 | "gauss3_ext = zfit.pdf.Gauss(obs=obs2, mu=mu3, sigma=sigma3, extended=yield1)" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "extended_gauss_method = gauss3.create_extended(yield1)" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": {}, 174 | "outputs": [], 175 | "source": [] 176 | } 177 | ], 178 | "metadata": { 179 | "kernelspec": { 180 | "display_name": "Python 3 (ipykernel)", 181 | "language": "python", 182 | "name": "python3" 183 | }, 184 | "language_info": { 185 | "codemirror_mode": { 186 | "name": "ipython", 187 | "version": 3 188 | }, 189 | "file_extension": ".py", 190 | "mimetype": "text/x-python", 191 | "name": "python", 192 | "nbconvert_exporter": "python", 193 | "pygments_lexer": "ipython3", 194 | "version": "3.10.4" 195 | } 196 | }, 197 | "nbformat": 4, 198 | "nbformat_minor": 4 199 | } 200 | -------------------------------------------------------------------------------- /_website/tutorials/components/80 - Toy Study.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Toy studies\n", 8 | "\n", 9 | "Having a model, it can be convenient to do sensitivity studies and checks of the fit by doing a \"toy study\": sampling from the model and fitting to the generated sample. The fitted values and the spread characterize whether the fit is biased or not. The difference to the \"actual\" value divided by the uncertainty (the pulls) should follow a standard Gaussian distribution" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import matplotlib.pyplot as plt\n", 19 | "import numpy as np\n", 20 | "import tensorflow as tf\n", 21 | "import zfit\n", 22 | "from tqdm.auto import tqdm\n", 23 | "from zfit import z" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "We will build a simple model, just a Gaussian. But, given the well defined workflow of zfit, `model` can be exchanged by _any_ complicated composition or custom model." 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "obs = zfit.Space('x', -5, 5)\n", 40 | "\n", 41 | "sigma = zfit.Parameter('sigma', 1, 0.1, 10)\n", 42 | "mu = zfit.Parameter('mu', 0, -1, 1)\n", 43 | "model = zfit.pdf.Gauss(obs=obs, mu=mu, sigma=sigma)" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "Instead of using `sample` as before, we will first build our loss with a more efficient `Data`, a \"sampler\", created by `create_sampler`. This has like `sample` the arguments for limits and the number of samples, but also supports `fixed_params`, which is true by default. This means that whenever this object is _resampled_, it will be resampled with the parameter values that it had when we created the sampler." 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "sampler = model.create_sampler(n=3000)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "This takes a while, as the first resampling is happening now. But first, we build our whole chain, just using our sampler as `data`." 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "nll = zfit.loss.UnbinnedNLL(model, sampler)\n", 76 | "\n", 77 | "# this stategy does not raise an error with NaNs but returns a non-converged `FitResult`\n", 78 | "from zfit.minimize import DefaultToyStrategy\n", 79 | "\n", 80 | "minimizer = zfit.minimize.Minuit(strategy=DefaultToyStrategy(), verbosity=0, tol=1e-3, use_minuit_grad=True)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "fit_results = []\n", 90 | "ntoys = 20" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "params = nll.get_params()\n", 100 | "\n", 101 | "with tqdm(total=ntoys) as bar:\n", 102 | "\n", 103 | " while len(fit_results) < ntoys:\n", 104 | "\n", 105 | " # Generate toys\n", 106 | " sampler.resample() # this is where the sampling happens\n", 107 | "\n", 108 | " # Randomise initial values. They can put the pdf in an unphysical region, making it negative at points.\n", 109 | " # This will produce NaNs in the log of the NLL. Therefore, we randomize until we got no NaNs anymore.\n", 110 | " for param in params:\n", 111 | " param.randomize() # or smarter, use `set_value` for your own method\n", 112 | "\n", 113 | "# The following can be used if the loss may returns NaNs, to test. Repeat in a while loop until it matches\n", 114 | "# try:\n", 115 | "# is_nan = np.isnan(zfit.run(nll.value()))\n", 116 | "# except tf.errors.InvalidArgumentError: # NaNs produced, check_numerics raises this error\n", 117 | "# # print(\"nan error, try again\") # try again\n", 118 | "# is_nan = True\n", 119 | "# else:\n", 120 | "# break\n", 121 | "\n", 122 | " # Minimise the NLL\n", 123 | " result = minimizer.minimize(nll)\n", 124 | "\n", 125 | " if result.converged:\n", 126 | " # Calculate uncertainties\n", 127 | " result.hesse()\n", 128 | " fit_results.append(result)\n", 129 | " bar.update(1)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "print(fit_results[:10])" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "## Evaluate results\n", 146 | "\n", 147 | "From here on, we can use the fit_results to compare against the true value, make plots, etc." 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [] 156 | } 157 | ], 158 | "metadata": { 159 | "kernelspec": { 160 | "display_name": "Python 3 (ipykernel)", 161 | "language": "python", 162 | "name": "python3" 163 | }, 164 | "language_info": { 165 | "codemirror_mode": { 166 | "name": "ipython", 167 | "version": 3 168 | }, 169 | "file_extension": ".py", 170 | "mimetype": "text/x-python", 171 | "name": "python", 172 | "nbconvert_exporter": "python", 173 | "pygments_lexer": "ipython3", 174 | "version": "3.10.4" 175 | } 176 | }, 177 | "nbformat": 4, 178 | "nbformat_minor": 4 179 | } 180 | -------------------------------------------------------------------------------- /_website/tutorials/components/60 - Custom PDF.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Creating your own pdf\n", 8 | "\n", 9 | "A core feature of zfit is the ability to create custom pdfs and functions in an simple and straightforward way.\n", 10 | "\n", 11 | "There are two main possibilities to create a custom pdf, an easier for most use-cases and an advanced way..\n", 12 | "\n", 13 | "## The simple way\n", 14 | "\n", 15 | "While the same works for functions, an example with a PDF is shown here.\n", 16 | "\n" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "import numpy as np\n", 26 | "import zfit\n", 27 | "from zfit import z" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "The first way is the most simple and should only be used for the trivial cases, i.e. if you're not familiar with Python classes (especially not with the `__init__` method)." 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "class MyGauss(zfit.pdf.ZPDF):\n", 44 | " _N_OBS = 1 # dimension, can be omitted\n", 45 | " _PARAMS = ['mean', 'std'] # the name of the parameters\n", 46 | "\n", 47 | " @zfit.supports()\n", 48 | " def _unnormalized_pdf(self, x, params):\n", 49 | " x0 = x[0] # using the 0th axis\n", 50 | " mean = params['mean']\n", 51 | " std = params['std']\n", 52 | " return z.exp(- ((x0 - mean) / std) ** 2)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "Done. Now we can use our pdf already!\n", 60 | "\n", 61 | "The slightly more general way involves overwritting the `__init__` and gives you all the possible flexibility: to use custom parameters, to preprocess them etc.\n", 62 | "\n", 63 | "Here we inherit from `BasePDF`" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "class MyGauss(zfit.pdf.BasePDF):\n", 73 | "\n", 74 | " def __init__(self, mean, std, obs, extended=None, norm=None, name=None, label=None):\n", 75 | " params = {'mean': mean, # 'mean' is the name as it will be named in the PDF, mean is just the parameter to create the PDF\n", 76 | " 'std': std\n", 77 | " }\n", 78 | " super().__init__(obs=obs, params=params, extended=extended, norm=norm,\n", 79 | " name=name, label=label)\n", 80 | "\n", 81 | " @zfit.supports()\n", 82 | " def _unnormalized_pdf(self, x, params):\n", 83 | " x0 = x[0] # using the 0th axis\n", 84 | " mean = params['mean']\n", 85 | " std = params['std']\n", 86 | " return z.exp(- ((x0 - mean) / std) ** 2)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "obs = zfit.Space('obs1', -3, 6)\n", 96 | "\n", 97 | "data_np = np.random.random(size=1000)\n", 98 | "data = zfit.Data(data_np, obs=obs)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "Create two parameters and an instance of your own pdf" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "mean = zfit.Parameter(\"mean\", 1.)\n", 115 | "std = zfit.Parameter(\"std\", 1.)\n", 116 | "my_gauss = MyGauss(obs=obs, mean=mean, std=std)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "probs = my_gauss.pdf(data)" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "print(probs[:20])" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "If we want to make sure it's a numpy array, we can use `zfit.run`" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "We could improve our PDF by registering an integral" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "def gauss_integral_from_any_to_any(limits, params, model):\n", 158 | " lower, upper = limits.v1.limits\n", 159 | " mean = params['mean']\n", 160 | " std = params['std']\n", 161 | " # write your integral here\n", 162 | " return 42. # dummy integral, must be a scalar!" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "limits = zfit.Space(axes=0, lower=zfit.Space.ANY_LOWER, upper=zfit.Space.ANY_UPPER)\n", 172 | "MyGauss.register_analytic_integral(func=gauss_integral_from_any_to_any, limits=limits)" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "More advanced custom PDFs are introduced in the guide on [custom PDFs](custom_pdfs.ipynb)." 180 | ] 181 | } 182 | ], 183 | "metadata": { 184 | "kernelspec": { 185 | "display_name": "Python 3 (ipykernel)", 186 | "language": "python", 187 | "name": "python3" 188 | }, 189 | "language_info": { 190 | "codemirror_mode": { 191 | "name": "ipython", 192 | "version": 3 193 | }, 194 | "file_extension": ".py", 195 | "mimetype": "text/x-python", 196 | "name": "python", 197 | "nbconvert_exporter": "python", 198 | "pygments_lexer": "ipython3", 199 | "version": "3.10.4" 200 | } 201 | }, 202 | "nbformat": 4, 203 | "nbformat_minor": 4 204 | } 205 | -------------------------------------------------------------------------------- /_website/tutorials/components/77 - Custom Minimizer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "0", 6 | "metadata": {}, 7 | "source": [ 8 | "# Building your own minimizer" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "id": "1", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "from __future__ import annotations\n", 19 | "\n", 20 | "from collections.abc import Sequence\n", 21 | "\n", 22 | "import zfit\n", 23 | "from zfit.minimizers.interface import ZfitMinimizer" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "id": "2", 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "zfit.run.experimental_disable_param_update(True) # does not update parameters automatically after minimization" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "id": "3", 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "class ChainedMinimizer(zfit.minimize.BaseMinimizer):\n", 44 | " def __init__(self, minimizers: ZfitMinimizer | Sequence[ZfitMinimizer], tol=None, verbosity=0, name=None):\n", 45 | " if isinstance(minimizers, ZfitMinimizer):\n", 46 | " minimizers = [minimizers]\n", 47 | "\n", 48 | " self.minimizers = minimizers\n", 49 | " lasttol = minimizers[-1].tol\n", 50 | " if tol is None:\n", 51 | " tol = lasttol\n", 52 | " elif abs(tol - lasttol) > 1e-6:\n", 53 | " raise ValueError(\"The tolerance of the chained minimizer must be the same as the last minimizer.\")\n", 54 | " super().__init__(tol=tol, verbosity=verbosity, name=name)\n", 55 | "\n", 56 | " @zfit.minimize.minimize_supports(init=True)\n", 57 | " def _minimize(self, loss, params, init):\n", 58 | " result = init\n", 59 | " for minimizer in self.minimizers:\n", 60 | " result = minimizer.minimize(loss, params=params, init=result)\n", 61 | " if self.verbosity > 7:\n", 62 | " print(f\"Minimizer {minimizer} finished with result \\n{result}\")\n", 63 | " return result" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "id": "4", 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "minimizer1 = zfit.minimize.Minuit(tol=10., mode=0)\n", 74 | "minimizer2 = zfit.minimize.ScipyTrustConstrV1(tol=1e-3)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "id": "5", 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "minimizer = ChainedMinimizer([minimizer1, minimizer2], verbosity=8)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "id": "6", 90 | "metadata": {}, 91 | "source": [ 92 | "Create a simple loss and minimize it with the chained minimizer." 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "id": "7", 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "obs = zfit.Space('obs1', -10, 10)\n", 103 | "mu = zfit.Parameter('mu', 1., -1, 5)\n", 104 | "sigma = zfit.Parameter('sigma', 1., 0, 10)\n", 105 | "sigyield = zfit.Parameter('sigyield', 1000, 0, 10000)\n", 106 | "gauss = zfit.pdf.Gauss(obs=obs, mu=mu, sigma=sigma, extended=sigyield)\n", 107 | "\n", 108 | "lamb = zfit.Parameter('lambda', -0.1, -1, -0.01)\n", 109 | "bkgyield = zfit.Parameter('bkgyield', 1000, 0, 10000)\n", 110 | "exponential = zfit.pdf.Exponential(obs=obs, lambda_=lamb, extended=bkgyield)\n", 111 | "\n", 112 | "model = zfit.pdf.SumPDF([gauss, exponential])\n", 113 | "\n", 114 | "data = model.sample(n=5000, params={mu: 0.5, sigma: 1.2, lamb: -0.05, sigyield: 3000, bkgyield: 2000})\n", 115 | "\n", 116 | "loss = zfit.loss.ExtendedUnbinnedNLL(model=model, data=data)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "id": "8", 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "# result = minimizer.minimize(loss=loss)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "id": "9", 132 | "metadata": {}, 133 | "source": [ 134 | "# Implementing a custom algorithm" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "id": "10", 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "import zfit.z.numpy as znp\n", 145 | "from zfit.result import FitResult\n", 146 | "\n", 147 | "\n", 148 | "class GradientDescentMinimizer(zfit.minimize.BaseMinimizer):\n", 149 | " def __init__(self, scaling, tol=None, verbosity=0, strategy=None, criterion=None, maxiter=None, name=None):\n", 150 | " super().__init__(\n", 151 | " name=name,\n", 152 | " strategy=strategy,\n", 153 | " tol=tol,\n", 154 | " verbosity=verbosity,\n", 155 | " criterion=criterion,\n", 156 | " maxiter=maxiter\n", 157 | " )\n", 158 | " self.scaling = scaling\n", 159 | "\n", 160 | " @zfit.minimize.minimize_supports(init=False) # we could allow the previous result as additional information\n", 161 | " def _minimize(self, loss, params, init):\n", 162 | " criterion = self.create_criterion(loss, params) # this is to be checked for convergence\n", 163 | " evaluator = self.create_evaluator(loss, params) # takes into account the strategy, callbacks, maxiter, and so on. A wrapper around the loss\n", 164 | " paramvals = znp.asarray(params)\n", 165 | " i = 1\n", 166 | " while True:\n", 167 | " value, gradients = evaluator.value_gradient(paramvals)\n", 168 | " result = FitResult(loss=loss, params={p: v for p, v in zip(params, paramvals)}, minimizer=self, valid=False, converged=False, edm=None, fminopt=None,\n", 169 | " approx={'gradient': gradients}, criterion=criterion,\n", 170 | " )\n", 171 | " if criterion.converged(result=result):\n", 172 | " result = FitResult(loss=loss, params={p: v for p, v in zip(params, paramvals)}, minimizer=self, valid=True, converged=True, edm=None,\n", 173 | " fminopt=None, approx={'gradient': gradients}, criterion=criterion)\n", 174 | " if self.verbosity > 5:\n", 175 | " print(f\"Converged with value {value}, criterion {criterion.last_value}\")\n", 176 | " break\n", 177 | " if self.verbosity > 9:\n", 178 | " print(f\"Criterion: {criterion.last_value} Loss value: {value}, gradients: {gradients}\")\n", 179 | " paramvals -= self.scaling * gradients / i ** 0.1\n", 180 | " return result" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "id": "11", 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "gsdminimizer = GradientDescentMinimizer(scaling=0.0001, tol=0.3, verbosity=10, maxiter=10) # limit maxiter, as it won't converge" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "id": "12", 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [ 200 | "loss.hessian(loss.get_params())" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "id": "13", 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "gsdresult = gsdminimizer.minimize(loss=loss)" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "id": "14", 217 | "metadata": {}, 218 | "outputs": [], 219 | "source": [] 220 | } 221 | ], 222 | "metadata": { 223 | "kernelspec": { 224 | "display_name": "Python 3 (ipykernel)", 225 | "language": "python", 226 | "name": "python3" 227 | }, 228 | "language_info": { 229 | "codemirror_mode": { 230 | "name": "ipython", 231 | "version": 3 232 | }, 233 | "file_extension": ".py", 234 | "mimetype": "text/x-python", 235 | "name": "python", 236 | "nbconvert_exporter": "python", 237 | "pygments_lexer": "ipython3", 238 | "version": "3.10.13" 239 | } 240 | }, 241 | "nbformat": 4, 242 | "nbformat_minor": 5 243 | } 244 | -------------------------------------------------------------------------------- /_website/tutorials/components/62 - Multidim Custom PDF.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Multidimensional PDFs\n", 8 | "\n", 9 | "This tutorial is about handling multiple dimensions when creating a custom PDF.\n", 10 | "\n", 11 | "The differences are marginal since the ordering is handled automatically. It is on the other hand crucial to understand the concept of a `Space`, most notably `obs` and `axes`.\n", 12 | "\n", 13 | "A user (1someone who instantiates the PDF) only knows and handles observables. The relative order does not matter, if a data has observables a and b and a pdf has observables b and a, the data will be reordered automatically. Inside a PDF on the other hand, we do not care at all about observables but only about the ordering of the data, the *axis*. So any data tensor we have, and limits for integration, normalization etc. **inside** the PDF is order based and uses *axes*.\n", 14 | "\n", 15 | "When passing the observables to the init of the PDF (as a user), each observable is automatically assigned to an axis corresponding to the order of the observable. The crucial point is therefore to communicate to the user which *axis* corresponds to what. The naming of the observables is completely up to the user, but the order of the observables depends on the pdf. Therefore, the correspondance of each axis to it's meaning has to be stated in the docs." 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "import numpy as np\n", 25 | "import zfit\n", 26 | "import zfit.z.numpy as znp\n", 27 | "from zfit import z" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "## Axes, not obs\n", 35 | "\n", 36 | "Since we create a pdf here, we now can completely forget about observables. We can assume that all the data is axes based (order based).We simply need to write down what each axis means.\n", 37 | "\n", 38 | "An example pdf is implemented below. It calculates the lenght of a vector shifted by some number (dummy example)." 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "class AbsVectorShifted(zfit.pdf.ZPDF):\n", 48 | " _N_OBS = 3 # dimension, can be omitted\n", 49 | " _PARAMS = ['xshift', 'yshift'] # the name of the parameters\n", 50 | "\n", 51 | " @zfit.supports(norm=False)\n", 52 | " def _pdf(self, x,norm, params):\n", 53 | " x0 = x[0]\n", 54 | " x1 = x[1]\n", 55 | " x2 = x[2]\n", 56 | " # alternatively, we could use the following line to get the same result\n", 57 | " # x0, x1, x2 = z.unstack_x(x) # returns a list with the columns: do x1, x2, x3 = z.unstack_x(x) for 3D\n", 58 | " xshift = params['xshift']\n", 59 | " yshift = params['yshift']\n", 60 | " x0 = x0 + xshift\n", 61 | " x1 = x1 + yshift\n", 62 | " return znp.sqrt(znp.square(x0) + x1 ** 2 + znp.power(x2, 2)) # dummy calculations, all are equivalent" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "Done. Now we can use our pdf already!" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "xobs = zfit.Space('xobs', (-3, 3))\n", 79 | "yobs = zfit.Space('yobs', (-2, 2))\n", 80 | "zobs = zfit.Space('z', (-1, 1))\n", 81 | "obs = xobs * yobs * zobs\n", 82 | "\n", 83 | "data_np = np.random.random(size=(1000, 3))\n", 84 | "data = zfit.Data(data=data_np, obs=obs) # obs is automatically used as limits here." 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "Create two parameters and an instance of your own pdf" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "xshift = zfit.Parameter(\"xshift\", 1.)\n", 101 | "yshift = zfit.Parameter(\"yshift\", 2.)\n", 102 | "abs_vector = AbsVectorShifted(obs=obs, xshift=xshift, yshift=yshift)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "probs = abs_vector.pdf(data)\n", 112 | "print(probs[:20])" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "We could improve our PDF by registering an integral. This requires a few steps:\n", 120 | " - define our integral as a function in python\n", 121 | " - define in which space our integral is valid, e.g. whether it is an integral over all axis or only partial and whether any limit is valid or only special (e.g. from -inf to inf)\n", 122 | " - register the integral and say if it supports additional things (e.g. norm)\n", 123 | "\n", 124 | "Let's start defining the function. This takes, for an integral over all axes, three parameters:\n", 125 | " - limits: the actual limits the integral is over\n", 126 | " - params: the parameters of the model (which _may_ be needed)\n", 127 | " - model: the model (pdf/func) itself\n", 128 | "\n", 129 | "we need to calculate the integral and return (currently) a scalar." 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "def abs_vector_integral_from_any_to_any(limits, params, model):\n", 139 | " lower, upper = limits.v1.limits\n", 140 | " # write your integral here\n", 141 | " return 42. # dummy integral, must be a scalar!" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "Now let's define the limits. We want to allow an integral over whole space in three dims, this may looks cumbersome but is straightforward (and done only once):" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "limit0 = zfit.Space(axes=0, lower=zfit.Space.ANY_LOWER, upper=zfit.Space.ANY_UPPER)\n", 158 | "limit1 = zfit.Space(axes=1, lower=zfit.Space.ANY_LOWER, upper=zfit.Space.ANY_UPPER)\n", 159 | "limit2 = zfit.Space(axes=2, lower=zfit.Space.ANY_LOWER, upper=zfit.Space.ANY_UPPER)\n", 160 | "limits = limit0 * limit1 * limit2 # creates the 3D limits\n", 161 | "print(limits)" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "Now we create our space and register the integral. In order to change precedency of integrals (e.g. because some are very simple and return a single number, so this special cases should be regarded first), a priority argument can be given. Also if the integral supports multiple limits or norm range calculation, this can be specified here. Otherwise, this is automatically handled and the integral never gets multiple limits resp a norm range (that's why we don't have it in the API of the integral function)." 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "AbsVectorShifted.register_analytic_integral(func=abs_vector_integral_from_any_to_any, limits=limits,\n", 178 | " priority=51,\n", 179 | " supports_norm=False)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "### n-dimensional" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "## Advanced Custom PDF\n", 194 | "\n", 195 | "Subclass BasePDF. The `_unnormalized_pdf` has to be overriden and, in addition, the `__init__`.\n", 196 | "\n", 197 | "Any of the public main methods (`pdf`, `integrate`, `partial_integrate` etc.) can **always** be overriden by implementing the function with a leading underscore, e.g. implement `_pdf` to directly controls `pdf`, the API is the same as the public function without the name. In case, during execution of your own method, it is found to be a bad idea to have overridden the default methods, throwing a `NotImplementedError` will restore the default behavior." 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "# TOBEDONE" 207 | ] 208 | } 209 | ], 210 | "metadata": { 211 | "kernelspec": { 212 | "display_name": "Python 3 (ipykernel)", 213 | "language": "python", 214 | "name": "python3" 215 | }, 216 | "language_info": { 217 | "codemirror_mode": { 218 | "name": "ipython", 219 | "version": 3 220 | }, 221 | "file_extension": ".py", 222 | "mimetype": "text/x-python", 223 | "name": "python", 224 | "nbconvert_exporter": "python", 225 | "pygments_lexer": "ipython3", 226 | "version": "3.10.4" 227 | } 228 | }, 229 | "nbformat": 4, 230 | "nbformat_minor": 4 231 | } 232 | -------------------------------------------------------------------------------- /_website/tutorials/components/71 - Simple Loss.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Simple loss\n", 8 | "\n", 9 | "A simple loss provides an easy way to minimize an arbitrary function where more than simply the function is known, such as when the gradient or hessian is known. It does not require a model or data, but can be used with them." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np\n", 19 | "import zfit\n", 20 | "\n", 21 | "zfit.run.experimental_disable_param_update(True)" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "Let's start with a simple example of a function to be minimized. The function is a simple quadratic function with a minimum at (1, 2, 3)." 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "def optimizefn(x):\n", 38 | " return (x[0] - 1) ** 2 + (x[1] - 2) ** 2 + (x[2] - 3) ** 2\n", 39 | "\n", 40 | "\n", 41 | "def optimizefn2(x):\n", 42 | " return (x[0] - 1) ** 2 + (x[1] - 2) ** 2 + (x[2] - 3) ** 2" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "minimizer = zfit.minimize.Minuit()" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "Simply minimizing this function fails because an `errordef` attribute is needed; the order of magnitude of the uncertainty." 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": { 65 | "tags": [ 66 | "raises-exception" 67 | ] 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "minimizer.minimize(optimizefn, params=[2.0, 2.2, 2.4])" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "optimizefn.errordef = 1 # 1 for a chi2, 0.5 for a likelihood typically" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "result = minimizer.minimize(optimizefn, params=[2.0, 2.2, 2.4])\n", 90 | "print(result)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "## Extending the loss\n", 98 | "\n", 99 | "To add more knowledge to the loss, we can extend it with a gradient and hessian using the `SimpleLoss` class." 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "def gradientfn(x):\n", 109 | " print(f\"gradientfn called with x={x}\")\n", 110 | " return 2 * (x[0] - 1), 2 * (x[1] - 2), 2 * (x[2] - 3)\n", 111 | "\n", 112 | "\n", 113 | "def hessianfn(x):\n", 114 | " print(f\"hessianfn called with x={x}\")\n", 115 | " return np.array([[2., 0, 0], [0, 2., 0], [0, 0, 2.]])\n", 116 | "\n", 117 | "\n", 118 | "params = [zfit.Parameter(f\"param_{i}\", 2.0 + i * 0.2) for i in range(3)]\n", 119 | "loss = zfit.loss.SimpleLoss(func=optimizefn, gradient=gradientfn, params=params, hessian=hessianfn)" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "loss.gradient()" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "loss.hessian()" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "minimizer_grad = zfit.minimize.Minuit(gradient=\"zfit\")" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "result_grad = minimizer_grad.minimize(loss)\n", 156 | "print(result_grad)" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "result.hesse(method='hesse_np', name=\"loss hesse\")\n", 166 | "print(result)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "result_grad.hesse(name=\"iminuit hesse3\") # default uses iminuit, nothing printed" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "result_grad.hesse(method='hesse_np', name=\"loss hesse\") # uses provided hessian" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "result_grad" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [ 202 | "result_grad.errors(name=\"minos\")\n", 203 | "result_grad.errors(name=\"zfit\", method=\"zfit_errors\")\n", 204 | "result_grad" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": {}, 211 | "outputs": [], 212 | "source": [ 213 | "result.errors(name=\"minos\")\n", 214 | "result.errors(name=\"zfit\", method=\"zfit_errors\")\n", 215 | "result" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "result.hesse(name=\"minuit\")\n", 225 | "result" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "metadata": {}, 232 | "outputs": [], 233 | "source": [ 234 | "loss.hessian()" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": null, 240 | "metadata": {}, 241 | "outputs": [], 242 | "source": [ 243 | "np.linalg.inv(loss.hessian())" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "# Use optimizefn2 which doesn't have errordef set, so we can pass it as parameter\n", 253 | "optimizefn2.errordef = 0.5\n", 254 | "loss = zfit.loss.SimpleLoss(func=optimizefn2, gradient=gradientfn, params=params)" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [ 263 | "loss.hessian()" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "res = minimizer_grad.minimize(loss)" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": null, 278 | "metadata": {}, 279 | "outputs": [], 280 | "source": [ 281 | "res.hesse()\n", 282 | "res.hesse(method=\"hesse_np\", name=\"loss hesse\")\n", 283 | "res.errors(name=\"minos\")\n", 284 | "res.errors(name=\"zfit\", method=\"zfit_errors\")\n", 285 | "res" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "0.71 ** 2" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": null, 300 | "metadata": {}, 301 | "outputs": [], 302 | "source": [ 303 | "minu = minimizer._minuit_minimizer" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": null, 309 | "metadata": {}, 310 | "outputs": [], 311 | "source": [ 312 | "minu.errordef = 0.5" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": null, 318 | "metadata": {}, 319 | "outputs": [], 320 | "source": [ 321 | "minu.hesse()" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": null, 327 | "metadata": {}, 328 | "outputs": [], 329 | "source": [ 330 | "minu.covariance" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": null, 336 | "metadata": {}, 337 | "outputs": [], 338 | "source": [ 339 | "p = list(result.params)[0]" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": null, 345 | "metadata": {}, 346 | "outputs": [], 347 | "source": [ 348 | "p" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": null, 354 | "metadata": {}, 355 | "outputs": [], 356 | "source": [ 357 | "result.loss.value(params={p: 2.0})" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "metadata": {}, 364 | "outputs": [], 365 | "source": [] 366 | } 367 | ], 368 | "metadata": { 369 | "kernelspec": { 370 | "display_name": "Python 3 (ipykernel)", 371 | "language": "python", 372 | "name": "python3" 373 | }, 374 | "language_info": { 375 | "codemirror_mode": { 376 | "name": "ipython", 377 | "version": 3 378 | }, 379 | "file_extension": ".py", 380 | "mimetype": "text/x-python", 381 | "name": "python", 382 | "nbconvert_exporter": "python", 383 | "pygments_lexer": "ipython3", 384 | "version": "3.10.4" 385 | } 386 | }, 387 | "nbformat": 4, 388 | "nbformat_minor": 1 389 | } 390 | -------------------------------------------------------------------------------- /_website/tutorials/introduction/Quickstart.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "inputHidden": false, 7 | "outputHidden": false, 8 | "pycharm": { 9 | "name": "#%% md\n" 10 | } 11 | }, 12 | "source": [ 13 | "# Quickstart\n", 14 | "\n", 15 | "In this quick tutorial, we'll show the basic ideas on what you can do with `zfit`, without going into much detail or performing advanced tasks.\n" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": { 22 | "pycharm": { 23 | "name": "#%%\n" 24 | } 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "import matplotlib.pyplot as plt\n", 29 | "import mplhep\n", 30 | "import numpy as np\n", 31 | "import zfit\n", 32 | "import zfit.z.numpy as znp # numpy-like backend" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": { 38 | "inputHidden": false, 39 | "outputHidden": false, 40 | "pycharm": { 41 | "name": "#%% md\n" 42 | } 43 | }, 44 | "source": [ 45 | "## Create observables\n", 46 | "\n", 47 | "The observable space in which PDFs are defined is created with the `Space` class" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": { 54 | "inputHidden": false, 55 | "outputHidden": false, 56 | "pycharm": { 57 | "name": "#%%\n" 58 | } 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "obs = zfit.Space('x', -10, 10)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": { 68 | "inputHidden": false, 69 | "outputHidden": false, 70 | "pycharm": { 71 | "name": "#%% md\n" 72 | } 73 | }, 74 | "source": [ 75 | "## Create data\n", 76 | "\n", 77 | "We create some unbinned data using `numpy`. Other constructors, e.g. for `ROOT` files are also available." 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": { 84 | "inputHidden": false, 85 | "outputHidden": false, 86 | "pycharm": { 87 | "name": "#%%\n" 88 | } 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "mu_true = 0\n", 93 | "sigma_true = 1\n", 94 | "\n", 95 | "data_np = np.random.normal(mu_true, sigma_true, size=10000)\n", 96 | "data = zfit.Data(data=data_np, obs=obs)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": { 102 | "inputHidden": false, 103 | "outputHidden": false, 104 | "pycharm": { 105 | "name": "#%% md\n" 106 | } 107 | }, 108 | "source": [ 109 | "## Create a PDF to fit\n", 110 | "\n", 111 | "Let's create a Gaussian PDF so we can fit the dataset. To do this, first we create the fit parameters, which follow a convention similar to `RooFit`:\n", 112 | "\n", 113 | "```\n", 114 | "zfit.Parameter(name, initial_value, lower_limit (optional), upper_limit (optional), other options)\n", 115 | "```" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": { 122 | "pycharm": { 123 | "name": "#%%\n" 124 | } 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "mu = zfit.Parameter(\"mu\", 2.4, -1., 5., step_size=0.001) # step_size is not mandatory but can be helpful\n", 129 | "sigma = zfit.Parameter(\"sigma\", 1.3, 0, 5., step_size=0.001) # it should be around the estimated uncertainty" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": { 135 | "inputHidden": false, 136 | "outputHidden": false, 137 | "pycharm": { 138 | "name": "#%% md\n" 139 | } 140 | }, 141 | "source": [ 142 | "Now we instantiate a Gaussian from the zfit PDF library (more on how to create your own PDFs later)" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": { 149 | "pycharm": { 150 | "name": "#%%\n" 151 | } 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "gauss = zfit.pdf.Gauss(obs=obs, mu=mu, sigma=sigma)" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "gauss.plot.plotpdf()" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": { 170 | "inputHidden": false, 171 | "outputHidden": false, 172 | "pycharm": { 173 | "name": "#%% md\n" 174 | } 175 | }, 176 | "source": [ 177 | "This pdf contains several useful methods, such as calculating a probability, calculating its integral, sampling etc." 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": { 184 | "pycharm": { 185 | "name": "#%%\n" 186 | } 187 | }, 188 | "outputs": [], 189 | "source": [ 190 | "# Let's get some probabilities.\n", 191 | "consts = [-1, 0, 1]\n", 192 | "probs = gauss.pdf(consts)\n", 193 | "print(f\"x values: {consts}\\nresult: {probs}\")" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": { 199 | "inputHidden": false, 200 | "outputHidden": false, 201 | "pycharm": { 202 | "name": "#%% md\n" 203 | } 204 | }, 205 | "source": [ 206 | "## Fitting\n", 207 | "\n", 208 | "To fit, we need to take three steps: create the negative $\\log\\mathcal{L}$, instantiate a minimizer and then minimize the likelihood." 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": { 215 | "inputHidden": false, 216 | "outputHidden": false, 217 | "pycharm": { 218 | "name": "#%%\n" 219 | } 220 | }, 221 | "outputs": [], 222 | "source": [ 223 | "# Create the negative log likelihood\n", 224 | "\n", 225 | "nll = zfit.loss.UnbinnedNLL(model=gauss, data=data) # loss\n", 226 | "\n", 227 | "# Load and instantiate a minimizer\n", 228 | "minimizer = zfit.minimize.Minuit()\n", 229 | "result = minimizer.minimize(loss=nll)\n", 230 | "\n", 231 | "print(result)" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": { 237 | "inputHidden": false, 238 | "outputHidden": false, 239 | "pycharm": { 240 | "name": "#%% md\n" 241 | } 242 | }, 243 | "source": [ 244 | "And we can plot the result to see how it went." 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": { 251 | "inputHidden": false, 252 | "outputHidden": false, 253 | "pycharm": { 254 | "name": "#%%\n" 255 | } 256 | }, 257 | "outputs": [], 258 | "source": [ 259 | "%matplotlib inline" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": { 266 | "inputHidden": false, 267 | "outputHidden": false, 268 | "pycharm": { 269 | "name": "#%%\n" 270 | } 271 | }, 272 | "outputs": [], 273 | "source": [ 274 | "n_bins = 50\n", 275 | "mplhep.histplot(data.to_binned(50))\n", 276 | "rescale = obs.v1.volume / n_bins * float(data.nevents)\n", 277 | "ax = gauss.plot.plotpdf(scale=rescale)\n", 278 | "\n", 279 | "# x = np.linspace(*obs.v1.limits, num=1000)\n", 280 | "# probs = gauss.pdf(x)\n", 281 | "# _ = plt.plot(x, rescale * probs)" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": null, 287 | "metadata": {}, 288 | "outputs": [], 289 | "source": [ 290 | "obs.v1.volume" 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "metadata": { 296 | "inputHidden": false, 297 | "outputHidden": false, 298 | "pycharm": { 299 | "name": "#%% md\n" 300 | } 301 | }, 302 | "source": [ 303 | "The `FitResult` that we obtained contains information about the minimization and can now be used to calculate the errors" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": null, 309 | "metadata": { 310 | "pycharm": { 311 | "name": "#%%\n" 312 | } 313 | }, 314 | "outputs": [], 315 | "source": [ 316 | "print(f\"Function result: {result.fmin}\", result.fmin)\n", 317 | "print(f\"Converged: {result.converged} and valid: {result.valid}\", )\n", 318 | "print(result)" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [ 327 | "# we still have access to everything\n", 328 | "result.loss.model[0]" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": null, 334 | "metadata": { 335 | "pycharm": { 336 | "name": "#%%\n" 337 | } 338 | }, 339 | "outputs": [], 340 | "source": [ 341 | "hesse_errors = result.hesse()\n", 342 | "minos_errors = result.errors()" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": null, 348 | "metadata": { 349 | "pycharm": { 350 | "name": "#%%\n" 351 | } 352 | }, 353 | "outputs": [], 354 | "source": [ 355 | "print(result)" 356 | ] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": {}, 361 | "source": [ 362 | "## Storing the result\n", 363 | "\n", 364 | "Everything is accessible, feel free to store it in your own format\n" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": null, 370 | "metadata": {}, 371 | "outputs": [], 372 | "source": [ 373 | "dumped = zfit.dill.dumps(result) # like pickle\n", 374 | "loaded = zfit.dill.loads(dumped)\n", 375 | "loadedpdf = loaded.loss.model[0]\n", 376 | "loadedpdf.plot.plotpdf()" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": null, 382 | "metadata": {}, 383 | "outputs": [], 384 | "source": [ 385 | "zfit.hs3.dumps(nll) # experimental, human-readable serialization" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": null, 391 | "metadata": {}, 392 | "outputs": [], 393 | "source": [] 394 | } 395 | ], 396 | "metadata": { 397 | "kernel_info": { 398 | "name": "zfit" 399 | }, 400 | "kernelspec": { 401 | "display_name": "Python 3 (ipykernel)", 402 | "language": "python", 403 | "name": "python3" 404 | }, 405 | "language_info": { 406 | "codemirror_mode": { 407 | "name": "ipython", 408 | "version": 3 409 | }, 410 | "file_extension": ".py", 411 | "mimetype": "text/x-python", 412 | "name": "python", 413 | "nbconvert_exporter": "python", 414 | "pygments_lexer": "ipython3", 415 | "version": "3.9.20" 416 | }, 417 | "nteract": { 418 | "version": "0.12.3" 419 | } 420 | }, 421 | "nbformat": 4, 422 | "nbformat_minor": 4 423 | } 424 | -------------------------------------------------------------------------------- /_website/tutorials/components/30 - Binned models.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "pycharm": { 7 | "name": "#%% md\n" 8 | } 9 | }, 10 | "source": [ 11 | "# Binned fits\n", 12 | "\n", 13 | "Binned models and data can be created in two ways:\n", 14 | "- from an unbinned model to a binned model or an unbinned dataset to a binned dataset\n", 15 | "- directly from a binned object" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": { 22 | "jupyter": { 23 | "outputs_hidden": false 24 | }, 25 | "pycharm": { 26 | "name": "#%%\n" 27 | } 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "import hist as hist\n", 32 | "import mplhep\n", 33 | "import numpy as np\n", 34 | "import zfit\n", 35 | "import zfit.z.numpy as znp" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "jupyter": { 43 | "outputs_hidden": false 44 | }, 45 | "pycharm": { 46 | "name": "#%%\n" 47 | } 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "from matplotlib import pyplot as plt\n", 52 | "\n", 53 | "normal_np = np.random.normal(loc=2., scale=3., size=10000)\n", 54 | "\n", 55 | "obs = zfit.Space(\"x\", -10, 10)\n", 56 | "\n", 57 | "mu = zfit.Parameter(\"mu\", 1., -4, 6)\n", 58 | "sigma = zfit.Parameter(\"sigma\", 1., 0.1, 10)\n", 59 | "model_nobin = zfit.pdf.Gauss(mu, sigma, obs)\n", 60 | "\n", 61 | "data_nobin = zfit.Data.from_numpy(obs, normal_np)\n", 62 | "\n", 63 | "loss_nobin = zfit.loss.UnbinnedNLL(model_nobin, data_nobin)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": { 70 | "jupyter": { 71 | "outputs_hidden": false 72 | }, 73 | "pycharm": { 74 | "name": "#%%\n" 75 | } 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "# make binned\n", 80 | "binning = zfit.binned.RegularBinning(50, -8, 10, name=\"x\")\n", 81 | "obs_bin = zfit.Space(\"x\", binning=binning)\n", 82 | "\n", 83 | "data = data_nobin.to_binned(obs_bin)\n", 84 | "model = model_nobin.to_binned(obs_bin)\n", 85 | "loss = zfit.loss.BinnedNLL(model, data)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": { 91 | "pycharm": { 92 | "name": "#%% md\n" 93 | } 94 | }, 95 | "source": [ 96 | "## Minimization\n", 97 | "\n", 98 | "Both loss look the same to a minimizer and from here on, the whole minimization process is the same.\n", 99 | "\n", 100 | "The following is the same as in the most simple case." 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": { 107 | "pycharm": { 108 | "name": "#%%\n" 109 | } 110 | }, 111 | "outputs": [], 112 | "source": [ 113 | "minimizer = zfit.minimize.Minuit()\n", 114 | "result = minimizer.minimize(loss)" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": { 121 | "pycharm": { 122 | "name": "#%%\n" 123 | } 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "result.hesse()\n", 128 | "print(result)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": { 134 | "pycharm": { 135 | "name": "#%% md\n" 136 | } 137 | }, 138 | "source": [ 139 | "## Plotting the PDF\n", 140 | "\n", 141 | "Since both PDFs are histograms, they can both be converted to histograms and plotted.\n", 142 | "\n", 143 | "Using the `to_hist` method of the model and the `BinnedData` respectively, the data can be converted to a histogram." 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": { 150 | "jupyter": { 151 | "outputs_hidden": false 152 | }, 153 | "pycharm": { 154 | "name": "#%%\n" 155 | } 156 | }, 157 | "outputs": [], 158 | "source": [ 159 | "model_hist = model.to_hist()\n", 160 | "\n", 161 | "plt.figure()\n", 162 | "mplhep.histplot(model_hist, density=1, label=\"model\")\n", 163 | "mplhep.histplot(data, density=1, label=\"data\")\n", 164 | "plt.legend()\n", 165 | "plt.title(\"After fit\")" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": { 171 | "pycharm": { 172 | "name": "#%% md\n" 173 | } 174 | }, 175 | "source": [ 176 | "## To and from histograms\n", 177 | "\n", 178 | "zfit interoperates with the Scikit-HEP histogram packages [hist](https://hist.readthedocs.io/en/latest/) and\n", 179 | "[boost-histogram](https://boost-histogram.readthedocs.io/en/latest/), most notably with the `NamedHist`\n", 180 | " (or `Hist` if axes have a name) class.\n", 181 | "\n", 182 | "We can create a `BinnedData` from a `(Named)Hist` and vice versa." 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": { 189 | "jupyter": { 190 | "outputs_hidden": false 191 | }, 192 | "pycharm": { 193 | "name": "#%%\n" 194 | } 195 | }, 196 | "outputs": [], 197 | "source": [ 198 | "h = hist.Hist(hist.axis.Regular(bins=15, start=-8, stop=10, name=\"x\"))\n", 199 | "h.fill(x=normal_np)\n", 200 | "mplhep.histplot(h)" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": { 207 | "jupyter": { 208 | "outputs_hidden": false 209 | }, 210 | "pycharm": { 211 | "name": "#%%\n" 212 | } 213 | }, 214 | "outputs": [], 215 | "source": [ 216 | "binned_data = zfit.data.BinnedData.from_hist(h)\n", 217 | "binned_data" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "metadata": { 224 | "jupyter": { 225 | "outputs_hidden": false 226 | }, 227 | "pycharm": { 228 | "name": "#%%\n" 229 | } 230 | }, 231 | "outputs": [], 232 | "source": [ 233 | "# convert back to hist\n", 234 | "h_back = binned_data.to_hist()\n", 235 | "\n", 236 | "plt.figure()\n", 237 | "mplhep.histplot(h, label=\"original\")\n", 238 | "mplhep.histplot(h_back, label=\"back\", alpha=0.5)\n", 239 | "plt.legend()" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": { 245 | "pycharm": { 246 | "name": "#%% md\n" 247 | } 248 | }, 249 | "source": [ 250 | "## Binned models from histograms\n", 251 | "\n", 252 | "With a binned dataset, we can directly create a model from it using `HistogramPDF`. In fact, we could even\n", 253 | "directly use the histogram to create a `HistogramPDF` from it." 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": null, 259 | "metadata": { 260 | "jupyter": { 261 | "outputs_hidden": false 262 | }, 263 | "pycharm": { 264 | "name": "#%%\n" 265 | } 266 | }, 267 | "outputs": [], 268 | "source": [ 269 | "histpdf = zfit.pdf.HistogramPDF(h)" 270 | ] 271 | }, 272 | { 273 | "cell_type": "markdown", 274 | "metadata": { 275 | "pycharm": { 276 | "name": "#%% md\n" 277 | } 278 | }, 279 | "source": [ 280 | "As previous models, this is a Binned PDF, so we can:\n", 281 | "- use the `to_hist` method to get a `(Named)Hist` back.\n", 282 | "- use the `to_binned` method to get a `BinnedData` back.\n", 283 | "- use the `counts` method to get the `counts` of the histogram.\n", 284 | "- use the `rel_counts` method to get the `relative counts` of the histogram.\n", 285 | "\n", 286 | "Furthermore, `HistogramPDF` also has the `pdf` and `ext_pdf` method like an unbined PDF. They return a\n", 287 | "`BinnedData` if a `BinnedData` is passed to them (where no evaluation is done on the data passed, just\n", 288 | "the axes are used). Both methods, `pdf` and `ext_pdf`, can also handle unbinned data." 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": null, 294 | "metadata": { 295 | "jupyter": { 296 | "outputs_hidden": false 297 | }, 298 | "pycharm": { 299 | "name": "#%%\n" 300 | } 301 | }, 302 | "outputs": [], 303 | "source": [ 304 | "x = znp.linspace(-8, 10, 100)\n", 305 | "plt.plot(histpdf.pdf(x), 'x')" 306 | ] 307 | }, 308 | { 309 | "cell_type": "markdown", 310 | "metadata": { 311 | "pycharm": { 312 | "name": "#%% md\n" 313 | } 314 | }, 315 | "source": [ 316 | "We can also go the other way around and produce a `Hist` from a `HistogramPDF`.\n", 317 | "There are two distinct ways to do this:\n", 318 | "- using the `to_hist` or `to_binneddata` method of the `HistogramPDF` to create a `Hist` or a `BinnedData`\n", 319 | " respectively that represents the exact shape of the PDF.\n", 320 | "- draw a sample from the histogram using the `sample` method. This will not result in an exact match to the\n", 321 | " PDFs shape but will have random fluctuations. This functionality can be used for example to perform\n", 322 | " toy studies." 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": null, 328 | "metadata": { 329 | "jupyter": { 330 | "outputs_hidden": false 331 | }, 332 | "pycharm": { 333 | "name": "#%%\n" 334 | } 335 | }, 336 | "outputs": [], 337 | "source": [ 338 | "azimov_hist = model.to_hist()\n", 339 | "azimov_data = model.to_binneddata()\n", 340 | "sampled_data = model.sample(1000)" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "metadata": { 347 | "jupyter": { 348 | "outputs_hidden": false 349 | }, 350 | "pycharm": { 351 | "name": "#%%\n" 352 | } 353 | }, 354 | "outputs": [], 355 | "source": [ 356 | "# The exact histogram from the PDF\n", 357 | "azimov_data" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "metadata": { 364 | "jupyter": { 365 | "outputs_hidden": false 366 | }, 367 | "pycharm": { 368 | "name": "#%%\n" 369 | } 370 | }, 371 | "outputs": [], 372 | "source": [ 373 | "# A sample from the histogram\n", 374 | "sampled_data" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": null, 380 | "metadata": { 381 | "pycharm": { 382 | "name": "#%%\n" 383 | } 384 | }, 385 | "outputs": [], 386 | "source": [] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": null, 391 | "metadata": { 392 | "pycharm": { 393 | "name": "#%%\n" 394 | } 395 | }, 396 | "outputs": [], 397 | "source": [] 398 | } 399 | ], 400 | "metadata": { 401 | "kernelspec": { 402 | "display_name": "Python 3 (ipykernel)", 403 | "language": "python", 404 | "name": "python3" 405 | }, 406 | "language_info": { 407 | "codemirror_mode": { 408 | "name": "ipython", 409 | "version": 3 410 | }, 411 | "file_extension": ".py", 412 | "mimetype": "text/x-python", 413 | "name": "python", 414 | "nbconvert_exporter": "python", 415 | "pygments_lexer": "ipython3", 416 | "version": "3.10.4" 417 | } 418 | }, 419 | "nbformat": 4, 420 | "nbformat_minor": 4 421 | } 422 | -------------------------------------------------------------------------------- /_website/tutorials/components/40 - Bayesian Inference.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "0", 6 | "metadata": {}, 7 | "source": [ 8 | "# Bayesian Inference\n", 9 | "\n", 10 | "A concise introduction to modern Bayesian inference in zfit covering essential features:\n", 11 | "- **Prior specification**: Define prior beliefs about parameters before seeing data\n", 12 | "- **MCMC sampling**: Use Markov Chain Monte Carlo (emcee) to sample from posterior distributions\n", 13 | "- **Convergence diagnostics**: Monitor R̂ (Gelman-Rubin statistic) and ESS (Effective Sample Size)\n", 14 | "- **ArviZ integration**: Advanced diagnostics and visualization tools\n", 15 | "- **Posterior analysis**: Extract credible intervals, means, and covariances\n", 16 | "\n", 17 | "\n", 18 | "---" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "id": "1", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "from __future__ import annotations\n", 29 | "\n", 30 | "import os\n", 31 | "\n", 32 | "os.environ[\"ZFIT_DISABLE_TF_WARNINGS\"] = \"1\" # Suppress TensorFlow warnings\n", 33 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"-1\" # disable GPU\n", 34 | "\n", 35 | "import matplotlib.pyplot as plt\n", 36 | "import numpy as np\n", 37 | "import zfit\n", 38 | "\n", 39 | "np.random.seed(42)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "id": "2", 45 | "metadata": {}, 46 | "source": [ 47 | "## Bayesian Analysis Fundamentals\n", 48 | "\n", 49 | "**Bayesian inference** uses Bayes' theorem to update beliefs about parameters given data:\n", 50 | "\n", 51 | "$$P(\\theta | data) = \\frac{P(data | \\theta) \\cdot P(\\theta)}{P(data)}$$\n", 52 | "\n", 53 | "Where:\n", 54 | "- **P(θ|data)**: Posterior - updated beliefs after seeing data\n", 55 | "- **P(data|θ)**: Likelihood - probability of observing data given parameters \n", 56 | "- **P(θ)**: Prior - initial beliefs about parameters before seeing data\n", 57 | "\n", 58 | "**Priors** encode domain knowledge or express ignorance. Unlike frequentist methods that treat parameters as fixed unknowns, Bayesian analysis treats them as random variables with probability distributions." 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "id": "3", 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "# Available prior distributions in zfit\n", 69 | "uniform_prior = zfit.prior.Uniform(lower=0, upper=10)\n", 70 | "normal_prior = zfit.prior.Normal(mu=5.0, sigma=1.0)\n", 71 | "gamma_prior = zfit.prior.Gamma(alpha=2.0, beta=1.0)\n", 72 | "half_normal_prior = zfit.prior.HalfNormal(sigma=0.5)\n", 73 | "poisson_prior = zfit.prior.Poisson(lam=3.0)\n", 74 | "exponential_prior = zfit.prior.Exponential(lam=2.0)\n", 75 | "student_t_prior = zfit.prior.StudentT(ndof=3, mu=0.0, sigma=1.0)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "id": "4", 81 | "metadata": {}, 82 | "source": [ 83 | "## 1. Model Setup with Priors\n", 84 | "\n", 85 | "Signal+background model with physics-motivated priors:\n", 86 | "- **μ**: Uniform around expected peak location \n", 87 | "- **σ**: HalfNormal (positive, favors smaller widths)\n", 88 | "- **λ**: Normal around typical decay rate\n", 89 | "- **Yields**: Normal based on expected counts\n", 90 | "\n", 91 | "Priors can be set during creation or modified later:" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "id": "5", 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "# Setting and changing priors\n", 102 | "param = zfit.Parameter(\"demo\", 1.0, lower=0.0, upper=5.0)\n", 103 | "print(f\"Initial prior: {param.prior}\")\n", 104 | "param.set_prior(zfit.prior.Normal(mu=2.0, sigma=0.5))\n", 105 | "print(f\"Updated prior: {param.prior}\")\n", 106 | "param.set_prior(zfit.prior.Exponential(lam=1.0))\n", 107 | "print(f\"Exponential prior: {param.prior}\")\n", 108 | "param.set_prior(None) # Remove prior\n", 109 | "print(f\"Removed prior: {param.prior}\")" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "id": "6", 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "# Define observable and parameters with priors\n", 120 | "obs = zfit.Space(\"mass\", 4.0, 6.0)\n", 121 | "\n", 122 | "# Signal parameters\n", 123 | "mu = zfit.Parameter(\"mu\", 5.1, 4.5, 5.5, prior=zfit.prior.Uniform(lower=4.8, upper=5.2))\n", 124 | "sigma = zfit.Parameter(\"sigma\", 0.2, 0.05, 0.3, prior=zfit.prior.HalfNormal(sigma=0.1))\n", 125 | "lambda_bkg = zfit.Parameter(\"lambda_bkg\", -1.2, -3.0, 0.0, prior=zfit.prior.Normal(mu=-1.0, sigma=0.5))\n", 126 | "\n", 127 | "# Yield parameters\n", 128 | "n_sig = zfit.Parameter(\"n_sig\", 900, 0, 5000, prior=zfit.prior.Normal(mu=1000, sigma=100))\n", 129 | "n_bkg = zfit.Parameter(\"n_bkg\", 600, 0, 2000, prior=zfit.prior.Normal(mu=500, sigma=50))\n", 130 | "\n", 131 | "# Create model\n", 132 | "signal = zfit.pdf.Gauss(obs=obs, mu=mu, sigma=sigma, extended=n_sig)\n", 133 | "background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_bkg, extended=n_bkg)\n", 134 | "model = zfit.pdf.SumPDF([signal, background])" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "id": "7", 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "# Generate synthetic data from the model\n", 145 | "true_params = {mu: 5.0, sigma: 0.1, lambda_bkg: -1.0, n_sig: 1000, n_bkg: 500}\n", 146 | "data = model.sample(n=1500, params=true_params)\n", 147 | "data.to_binned(50).to_hist().plot(label=\"Data\", color=\"black\", histtype=\"step\")" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "id": "8", 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "# Create loss function\n", 158 | "nll = zfit.loss.ExtendedUnbinnedNLL(model=model, data=data)" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "id": "9", 164 | "metadata": {}, 165 | "source": [ 166 | "## 3. MCMC Sampling\n", 167 | "\n", 168 | "MCMC constructs a Markov chain to sample from the posterior. The emcee ensemble sampler uses multiple walkers for efficiency and affine invariance.\n", 169 | "\n", 170 | "**Key parameters:**\n", 171 | "- **nwalkers**: Ensemble size, typically ≥ 2× parameters\n", 172 | "- **n_warmup**: Burn-in steps to reach stationarity, 200-500 for simple models, more for complex ones\n", 173 | "- **n_samples**: Production samples, 1000+ for final results, 100-500 for testing" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "id": "10", 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "# Initialize MCMC sampler\n", 184 | "sampler = zfit.mcmc.EmceeSampler(nwalkers=32, verbosity=8) # 8 shows progressbar" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "id": "11", 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "# Sample from posterior\n", 195 | "posterior = sampler.sample(\n", 196 | " loss=nll,\n", 197 | " n_samples=500, # Reduced for tutorial speed\n", 198 | " n_warmup=200,\n", 199 | ")" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "id": "12", 205 | "metadata": {}, 206 | "source": [ 207 | "## 4. Results Analysis\n", 208 | "\n", 209 | "The posterior provides parameter estimates and convergence diagnostics:\n", 210 | "\n", 211 | "- **R̂**: Gelman-Rubin statistic, convergence metric comparing within-chain to between-chain variance (≤ 1.1 indicates good convergence)\n", 212 | "- **ESS**: Effective sample size accounting for autocorrelation (higher = better sampling efficiency)\n", 213 | "- **Credible intervals**: Bayesian confidence intervals\n", 214 | "- **Methods**: `mean()`, `std()`, `credible_interval()`, `get_samples()`" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "id": "13", 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "print(posterior)" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "id": "14", 231 | "metadata": {}, 232 | "outputs": [], 233 | "source": [ 234 | "# Extract parameter estimates\n", 235 | "for param in model.get_params():\n", 236 | " mean_val = posterior.mean(param)\n", 237 | " std_val = posterior.std(param)\n", 238 | " print(f\"{param.name}: {mean_val:.4f} ± {std_val:.4f}\")\n", 239 | "\n", 240 | "print(\"\\n90% credible intervals:\")\n", 241 | "for param in model.get_params():\n", 242 | " lower, upper = posterior.credible_interval(param, alpha=0.1)\n", 243 | " print(f\"{param.name}: [{lower:.4f}, {upper:.4f}]\")" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "id": "15", 249 | "metadata": {}, 250 | "source": [ 251 | "## 5. Visualization\n", 252 | "\n", 253 | "Posterior plots show parameter uncertainties and compare to true values. Key insights:\n", 254 | "- **Width**: Parameter uncertainty\n", 255 | "- **Shape**: Non-Gaussian features\n", 256 | "- **Location**: How data updated the prior" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": null, 262 | "id": "16", 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [ 266 | "# Plot posterior distributions\n", 267 | "fig, axes = plt.subplots(2, 3, figsize=(12, 8))\n", 268 | "axes = axes.flatten()\n", 269 | "\n", 270 | "for i, param in enumerate(model.get_params()):\n", 271 | " if i < len(axes):\n", 272 | " samples = posterior.get_samples(param)\n", 273 | " axes[i].hist(samples, bins=30, alpha=0.7, density=True)\n", 274 | " axes[i].axvline(posterior.mean(param), color=\"red\", linestyle=\"--\", label=\"Mean\")\n", 275 | " axes[i].axvline(true_params[param], color=\"green\", linestyle=\"-\", label=\"True\")\n", 276 | " axes[i].set_title(f\"{param.name}\")\n", 277 | " axes[i].set_xlabel(\"Value\")\n", 278 | " axes[i].set_ylabel(\"Density\")\n", 279 | " axes[i].legend()\n", 280 | "\n", 281 | "# Remove empty subplot\n", 282 | "if len(model.get_params()) < len(axes):\n", 283 | " fig.delaxes(axes[-1])\n", 284 | "\n", 285 | "plt.tight_layout()\n", 286 | "plt.suptitle(\"Posterior Distributions\", y=1.02)\n", 287 | "plt.show()" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": null, 293 | "id": "17", 294 | "metadata": {}, 295 | "outputs": [], 296 | "source": [ 297 | "# ArviZ integration for advanced diagnostics\n", 298 | "import arviz as az\n", 299 | "\n", 300 | "# Convert to ArviZ InferenceData format\n", 301 | "idata = posterior.to_arviz()\n", 302 | "\n", 303 | "# Print summary with R-hat and ESS\n", 304 | "summary = az.summary(idata)\n", 305 | "print(summary)\n", 306 | "\n", 307 | "# Plot trace plots\n", 308 | "az.plot_trace(idata, compact=True)\n", 309 | "plt.tight_layout()\n", 310 | "plt.show()\n", 311 | "\n", 312 | "# Check R-hat values\n", 313 | "rhat = az.rhat(idata)\n", 314 | "print(\"\\nR-hat values (should be ≤ 1.1):\")\n", 315 | "for var in rhat.data_vars:\n", 316 | " print(f\"{var}: {float(rhat[var]):.3f}\")\n", 317 | "\n", 318 | "# Effective sample size\n", 319 | "ess = az.ess(idata)\n", 320 | "print(\"\\nEffective sample sizes:\")\n", 321 | "for var in ess.data_vars:\n", 322 | " print(f\"{var}: {float(ess[var]):.0f}\")" 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "id": "18", 328 | "metadata": {}, 329 | "source": [ 330 | "### After the fit is before the fit" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": null, 336 | "id": "19", 337 | "metadata": {}, 338 | "outputs": [], 339 | "source": [ 340 | "# 1. Posterior to prior for hierarchical modeling\n", 341 | "mu_posterior_prior = posterior.as_prior(mu)\n", 342 | "print(f\"Created KDE prior from posterior: {mu_posterior_prior}\")" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": null, 348 | "id": "20", 349 | "metadata": {}, 350 | "outputs": [], 351 | "source": [ 352 | "# Covariance matrix and correlations\n", 353 | "cov_matrix = posterior.covariance()\n", 354 | "param_names = [p.name for p in model.get_params()]\n", 355 | "corr_matrix = np.corrcoef(cov_matrix)\n", 356 | "\n", 357 | "plt.figure(figsize=(8, 6))\n", 358 | "plt.imshow(corr_matrix, cmap=\"coolwarm\", vmin=-1, vmax=1)\n", 359 | "plt.colorbar(label=\"Correlation\")\n", 360 | "plt.xticks(range(len(param_names)), param_names, rotation=45)\n", 361 | "plt.yticks(range(len(param_names)), param_names)\n", 362 | "plt.title(\"Parameter Correlation Matrix\")\n", 363 | "for i in range(len(param_names)):\n", 364 | " for j in range(len(param_names)):\n", 365 | " plt.text(\n", 366 | " j,\n", 367 | " i,\n", 368 | " f\"{corr_matrix[i, j]:.2f}\",\n", 369 | " ha=\"center\",\n", 370 | " va=\"center\",\n", 371 | " color=\"white\" if abs(corr_matrix[i, j]) > 0.5 else \"black\",\n", 372 | " )\n", 373 | "plt.tight_layout()\n", 374 | "plt.show()" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": null, 380 | "id": "21", 381 | "metadata": {}, 382 | "outputs": [], 383 | "source": [ 384 | "# Context manager for setting parameters to posterior means, same as FitResult\n", 385 | "original_mu = mu.value()\n", 386 | "\n", 387 | "with posterior:\n", 388 | " posterior_mu = mu.value()\n", 389 | "\n", 390 | "print(f\"Original mu: {original_mu:.4f}\")\n", 391 | "print(f\"Posterior mean mu: {posterior_mu:.4f}\")\n", 392 | "print(f\"After context: {mu.value():.4f}\")" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": null, 398 | "id": "22", 399 | "metadata": {}, 400 | "outputs": [], 401 | "source": [] 402 | } 403 | ], 404 | "metadata": {}, 405 | "nbformat": 4, 406 | "nbformat_minor": 5 407 | } 408 | -------------------------------------------------------------------------------- /_website/tutorials/components/90 - Serialization basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Serialization, loading and saving\n", 8 | "\n", 9 | "The long-term goal is to be able to save and load zfit objects, such as models, spaces, parameters, etc. This is not yet fully implemented, but some parts are already available, some stable, some more experimental.\n", 10 | "\n", 11 | "Overview:\n", 12 | "- Binary (pickle) loading and dumping of (frozen) `FitResult` is fully available\n", 13 | "- Human-readable serialization (also summarized under HS3) of\n", 14 | " - parameters and models is available, but not yet stable\n", 15 | " - losses and datasets are not yet available\n" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": { 22 | "jupyter": { 23 | "outputs_hidden": false 24 | } 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "import os\n", 29 | "import pathlib\n", 30 | "import pickle\n", 31 | "from pprint import pprint\n", 32 | "\n", 33 | "import mplhep\n", 34 | "import numpy as np\n", 35 | "import zfit\n", 36 | "import zfit.z.numpy as znp\n", 37 | "from matplotlib import pyplot as plt\n", 38 | "from zfit import z" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": { 45 | "jupyter": { 46 | "outputs_hidden": false 47 | } 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "mu = zfit.Parameter(\"mu\", 1.2, -4, 5)\n", 52 | "sigma = zfit.Parameter(\"sigma\", 3, 0, 10)\n", 53 | "obs = zfit.Space(\"obs1\", -10, 20)\n", 54 | "model = zfit.pdf.Gauss(mu=mu, sigma=sigma, obs=obs)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": { 61 | "jupyter": { 62 | "outputs_hidden": false 63 | } 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "data = model.sample(10000)\n", 68 | "loss = zfit.loss.UnbinnedNLL(model=model, data=data)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": { 75 | "jupyter": { 76 | "outputs_hidden": false 77 | } 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "minimizer = zfit.minimize.Minuit()" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": { 88 | "jupyter": { 89 | "outputs_hidden": false 90 | } 91 | }, 92 | "outputs": [], 93 | "source": [ 94 | "x = np.linspace(*obs.v1.limits, 1000)\n", 95 | "mu.set_value(1.5)\n", 96 | "sigma.set_value(2)\n", 97 | "mplhep.histplot(data.to_binned(50), density=True, label=\"data\")\n", 98 | "plt.plot(x, model.pdf(x), label=\"model pre fit\")\n", 99 | "result = minimizer.minimize(loss)\n", 100 | "plt.plot(x, model.pdf(x), label=\"model post fit\")\n", 101 | "plt.legend()" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": { 108 | "jupyter": { 109 | "outputs_hidden": false 110 | } 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "result.freeze()\n", 115 | "dumped_result = pickle.dumps(result)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": { 122 | "jupyter": { 123 | "outputs_hidden": false 124 | } 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "loaded_result = pickle.loads(dumped_result)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "jupyter": { 136 | "outputs_hidden": false 137 | } 138 | }, 139 | "outputs": [], 140 | "source": [ 141 | "mu.set_value(0.42)\n", 142 | "print(f\"mu before: {mu.value()}\")\n", 143 | "zfit.param.set_values(params=model.get_params(), values=loaded_result)\n", 144 | "print(f\"mu after: {mu.value()}, set to result value: {loaded_result.params[mu]['value']}\")" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": { 151 | "jupyter": { 152 | "outputs_hidden": false 153 | } 154 | }, 155 | "outputs": [], 156 | "source": [] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "## Human-readable serialization (HS3)\n", 163 | "\n", 164 | "**WARNING: this section is unstable and, apart from dumping for publishing on a \"if it works, great\" basis, everything else is recommended for power users only and will surely break in the future.**\n", 165 | "\n", 166 | "HS3 is the \"hep-statistics-serialization-standard\", that is currently being developed and aims to provide a human-readable serialization format for loading and dumping of the likelihood. It is not stable and neither is the implementation of it in zfit (which also doesn't follow it strictly for different reasons currently).\n", 167 | "\n", 168 | "We can either dump objects in the library directly, or create a complete dump to an HS3-like format." 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "model.to_dict()" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "mu.to_dict()" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "obs.to_dict()" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "### Recreate the object\n", 203 | "\n", 204 | "We can also recreate the object from the dictionary. As a simple example, let's do this for the model." 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": {}, 211 | "outputs": [], 212 | "source": [ 213 | "gauss2 = model.from_dict(model.to_dict()) # effectively creates a copy (parameters are shared!)\n", 214 | "gauss2" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "This is a bit of cheating, since we could use the model itself to use the `from_dict` (or more generally, the `from_*` methods). More generally, in this case, we need to know the class of the object (currently) in order to convert it back (this is not the case for the HS3 dumping below)." 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "gauss3 = zfit.pdf.Gauss.from_dict(model.to_dict())" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "### Dumping and loading\n", 238 | "\n", 239 | "These representations can be converted to anything JSON/YAML like. In fact, the objects already offer out-of-the-box some conversion methods." 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": null, 245 | "metadata": {}, 246 | "outputs": [], 247 | "source": [ 248 | "sigma.to_json()" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": {}, 255 | "outputs": [], 256 | "source": [ 257 | "sigma.to_yaml()" 258 | ] 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "metadata": {}, 263 | "source": [ 264 | "### Serializing large datasets\n", 265 | "\n", 266 | "We can also serialize data objects. However, binned data can be large (i.e. in the millions) and are theferore not suitable to be stored in plain text (which requires typically a factor of 10 more space). Therefore, we can use the `to_asdf` method to store the data in a binary format. This will convert any numpy-array into a binary format while just keeping a reference instead." 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [ 275 | "data.to_dict()" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "As we can see, naturally the whole data array is saved. Trying to convert this to JSON or YAML will fail as these dumpers by default cannot handle numpy arrays (one could convert the numpy arrays to lists, but the problem with space will remain)." 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": null, 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [ 291 | "try:\n", 292 | " data.to_json()\n", 293 | "except TypeError as error:\n", 294 | " print(error)" 295 | ] 296 | }, 297 | { 298 | "cell_type": "markdown", 299 | "metadata": {}, 300 | "source": [ 301 | "Let's follow the advice!" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": null, 307 | "metadata": {}, 308 | "outputs": [], 309 | "source": [ 310 | "data_asdf = data.to_asdf()\n", 311 | "data_asdf" 312 | ] 313 | }, 314 | { 315 | "cell_type": "markdown", 316 | "metadata": {}, 317 | "source": [ 318 | "### ASDF format\n", 319 | "\n", 320 | "The ASDF format stands for [Advanced Scientific Data Format](https://asdf.readthedocs.io/en/latest/). It is a mixture of yaml and a binary format that can store arbitrary data, including numpy arrays, pandas dataframes, astropy tables, etc.\n", 321 | "\n", 322 | "Two attributes are convenient to know:\n", 323 | " - `tree`: returns the dict representation of the data\n", 324 | " - `write_to(path)`: writes the data to a file in `path`" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": null, 330 | "metadata": {}, 331 | "outputs": [], 332 | "source": [ 333 | "data_asdf.tree" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": null, 339 | "metadata": {}, 340 | "outputs": [], 341 | "source": [] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "metadata": {}, 347 | "outputs": [], 348 | "source": [ 349 | "data_asdf.write_to(\"data.asdf\") # Will create a file in the current directory" 350 | ] 351 | }, 352 | { 353 | "cell_type": "markdown", 354 | "metadata": {}, 355 | "source": [ 356 | "We can inspect the file using the `head` command to printout the first 25 lines (out of a total of about 471!). As we can see, the beginning is a yaml representation of the data, while the end is a binary representation of the data (which produces weird signs). The file is not human-readable, but can be loaded by any ASDF library." 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": null, 362 | "metadata": {}, 363 | "outputs": [], 364 | "source": [ 365 | "!head -25 data.asdf" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": null, 371 | "metadata": {}, 372 | "outputs": [], 373 | "source": [ 374 | "!wc -l data.asdf # the file is about 471 lines long, filled with binary" 375 | ] 376 | }, 377 | { 378 | "cell_type": "markdown", 379 | "metadata": {}, 380 | "source": [ 381 | "Loading can be done using the `asdf` library directly too." 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": null, 387 | "metadata": {}, 388 | "outputs": [], 389 | "source": [ 390 | "import asdf\n", 391 | "\n", 392 | "with asdf.open(\"data.asdf\") as f:\n", 393 | " tree = f.tree\n", 394 | " data = zfit.Data.from_asdf(f)" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": null, 400 | "metadata": {}, 401 | "outputs": [], 402 | "source": [ 403 | "data.value()" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": null, 409 | "metadata": {}, 410 | "outputs": [], 411 | "source": [ 412 | "# cleanup of the file\n", 413 | "import pathlib\n", 414 | "\n", 415 | "pathlib.Path(\"data.asdf\").unlink()" 416 | ] 417 | }, 418 | { 419 | "cell_type": "markdown", 420 | "metadata": {}, 421 | "source": [ 422 | "## HS3 serialization\n", 423 | "\n", 424 | "To convert our objects into a HS3-like format, we can use the following functions.\n", 425 | "**The format is not yet stable and will change in the future.**\n", 426 | "\n", 427 | "It is therefore recommended to try out: if it works, great. If it errors, fine. Don't expect it to be able to load again in the future, but if it works, it's nice for publication\n", 428 | "\n", 429 | "### Objects\n", 430 | "\n", 431 | "We can serialize the objects itself, PDFs, spaces etc. The difference to the above mentioned serialization with `to_dict` is that the HS3 serialization is more verbose and contains more information, such as metadata and fields for other objects (e.g. the parameters of a PDF). It will also fill in some of the fields by extracting the information from the object." 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": null, 437 | "metadata": {}, 438 | "outputs": [], 439 | "source": [ 440 | "zfit.hs3.dumps(model)" 441 | ] 442 | }, 443 | { 444 | "cell_type": "code", 445 | "execution_count": null, 446 | "metadata": {}, 447 | "outputs": [], 448 | "source": [ 449 | "hs3obj = zfit.hs3.loads(zfit.hs3.dumps(model))" 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "execution_count": null, 455 | "metadata": {}, 456 | "outputs": [], 457 | "source": [ 458 | "list(hs3obj['distributions'].values())" 459 | ] 460 | }, 461 | { 462 | "cell_type": "markdown", 463 | "metadata": {}, 464 | "source": [ 465 | "## Publishing\n", 466 | "\n", 467 | "While the format is being improved constantly, a likelihood created with this format can in principle be published, maybe alongside the paper. If we may want to omit the data and only publish the model, we can just create a HS3 object with the pdf instead of the likelihood." 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": null, 473 | "metadata": {}, 474 | "outputs": [], 475 | "source": [ 476 | "hs3dumped = zfit.hs3.dumps(model)\n", 477 | "pprint(hs3dumped)" 478 | ] 479 | }, 480 | { 481 | "cell_type": "code", 482 | "execution_count": null, 483 | "metadata": {}, 484 | "outputs": [], 485 | "source": [ 486 | "hs3dumped = zfit.hs3.dumps(loss)\n", 487 | "pprint(hs3dumped)" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": null, 493 | "metadata": {}, 494 | "outputs": [], 495 | "source": [ 496 | "hs3dumped" 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "execution_count": null, 502 | "metadata": {}, 503 | "outputs": [], 504 | "source": [ 505 | "zfit.hs3.loads(hs3dumped)" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": null, 511 | "metadata": {}, 512 | "outputs": [], 513 | "source": [] 514 | } 515 | ], 516 | "metadata": { 517 | "kernelspec": { 518 | "display_name": "Python 3 (ipykernel)", 519 | "language": "python", 520 | "name": "python3" 521 | }, 522 | "language_info": { 523 | "codemirror_mode": { 524 | "name": "ipython", 525 | "version": 3 526 | }, 527 | "file_extension": ".py", 528 | "mimetype": "text/x-python", 529 | "name": "python", 530 | "nbconvert_exporter": "python", 531 | "pygments_lexer": "ipython3", 532 | "version": "3.10.4" 533 | } 534 | }, 535 | "nbformat": 4, 536 | "nbformat_minor": 4 537 | } 538 | -------------------------------------------------------------------------------- /_website/tutorials/components/61 - Custom Binned PDF.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Custom Binned PDFs\n", 8 | "\n", 9 | "A key feature of zfit is the ability to create custom PDFs and models. While unbinned PDFs operate on continuous data, binned PDFs work with histogrammed data where events are grouped into bins.\n", 10 | "\n", 11 | "In this tutorial, we will demonstrate how to create custom binned PDFs using two different approaches:\n", 12 | "1. **`_rel_counts` method**: For relative counts (normalized to 1)\n", 13 | "2. **`_counts` method**: For absolute counts (used in extended PDFs)\n", 14 | "\n", 15 | "## What are Binned PDFs?\n", 16 | "\n", 17 | "Binned PDFs in zfit work with discrete bins rather than continuous probability densities. They are particularly useful for:\n", 18 | "- Template fitting (e.g., Monte Carlo templates)\n", 19 | "- Histogram-based analyses\n", 20 | "- Dealing with large datasets where binning improves computational efficiency\n", 21 | "- Modeling discrete processes or when continuous approximations break down" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "import matplotlib.pyplot as plt\n", 31 | "import numpy as np\n", 32 | "import zfit\n", 33 | "import zfit.z.numpy as znp\n", 34 | "from zfit import z\n", 35 | "\n", 36 | "# Set up plotting\n", 37 | "plt.style.use('default')\n", 38 | "np.random.seed(42) # For reproducible examples" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "## Theory: `_rel_counts` vs `_counts`\n", 46 | "\n", 47 | "When creating custom binned PDFs in zfit, you need to implement one or both of these key methods:\n", 48 | "\n", 49 | "### `_rel_counts(self, x, params)`\n", 50 | "- **Purpose**: Returns the relative number of events in each bin\n", 51 | "- **Normalization**: Values sum to 1.0 (relative/normalized counts)\n", 52 | "- **Use case**: Standard (non-extended) binned PDFs\n", 53 | "- **Mathematical meaning**: Probability of finding an event in each bin\n", 54 | "- **Returns**: Tensor with shape matching the binning structure\n", 55 | "\n", 56 | "### `_counts(self, x, params)`\n", 57 | "- **Purpose**: Returns the absolute number of events in each bin \n", 58 | "- **Normalization**: Values sum to the total expected number of events\n", 59 | "- **Use case**: Extended binned PDFs where the total number of events is a parameter\n", 60 | "- **Mathematical meaning**: Expected number of events in each bin\n", 61 | "- **Returns**: Tensor with shape matching the binning structure\n", 62 | "\n", 63 | "### Important Note about Extended PDFs\n", 64 | "For extended PDFs that implement `_counts`, zfit automatically provides `rel_counts()` functionality. However, the behavior may depend on the specific zfit version and context. When working with extended PDFs, focus on implementing `_counts` correctly.\n", 65 | "\n", 66 | "Both methods should be decorated with `@zfit.supports()` to specify which features they support." 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "## Example 1: Custom Binned PDF with `_rel_counts`\n", 74 | "\n", 75 | "Let's create a custom binned Gaussian PDF that implements the `_rel_counts` method. This will return normalized counts that sum to 1." 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "class CustomBinnedGaussian(zfit.pdf.BaseBinnedPDF):\n", 85 | " \"\"\"A custom binned Gaussian PDF using _rel_counts method.\"\"\"\n", 86 | " \n", 87 | " def __init__(self, mu, sigma, obs, name=None, label=None):\n", 88 | " # Define the parameters for our PDF\n", 89 | " params = {\n", 90 | " 'mu': mu, # mean parameter\n", 91 | " 'sigma': sigma # standard deviation parameter\n", 92 | " }\n", 93 | " \n", 94 | " # Call parent constructor\n", 95 | " super().__init__(obs=obs, params=params, name=name, label=label)\n", 96 | " \n", 97 | " @zfit.supports(norm=\"space\")\n", 98 | " def _rel_counts(self, x, params):\n", 99 | " \"\"\"\n", 100 | " Calculate the relative counts (normalized) for each bin.\n", 101 | " \n", 102 | " Args:\n", 103 | " x: Binned data or space (typically not used directly in binned PDFs)\n", 104 | " params: Dictionary containing the PDF parameters\n", 105 | " \n", 106 | " Returns:\n", 107 | " Tensor of relative counts that sum to 1.0\n", 108 | " \"\"\"\n", 109 | " mu = params['mu']\n", 110 | " sigma = params['sigma']\n", 111 | " \n", 112 | " # Get the bin centers from the observation space\n", 113 | " # For binned PDFs, we work with the binning structure\n", 114 | " obs_space = self.space\n", 115 | " binning = obs_space.binning\n", 116 | " bin_centers = binning.centers[0] # Get centers for first (and only) axis\n", 117 | " \n", 118 | " # Calculate Gaussian values at bin centers\n", 119 | " gaussian_values = znp.exp(-0.5 * ((bin_centers - mu) / sigma) ** 2)\n", 120 | " \n", 121 | " # Normalize to get relative counts (sum to 1)\n", 122 | " normalized_values = gaussian_values / znp.sum(gaussian_values)\n", 123 | " \n", 124 | " return normalized_values" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "## Testing the `_rel_counts` Custom PDF\n", 132 | "\n", 133 | "Let's create and test our custom binned Gaussian PDF:" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "# Create binned observation space\n", 143 | "n_bins = 50\n", 144 | "binning = zfit.binned.RegularBinning(n_bins, -5, 5, name=\"x\")\n", 145 | "obs_binned = zfit.Space(\"x\", binning=binning)\n", 146 | "\n", 147 | "# Create parameters\n", 148 | "mu_param = zfit.Parameter(\"mu\", 0.5)\n", 149 | "sigma_param = zfit.Parameter(\"sigma\", 1.2)\n", 150 | "\n", 151 | "# Create our custom binned PDF\n", 152 | "custom_gauss = CustomBinnedGaussian(mu=mu_param, sigma=sigma_param, obs=obs_binned, \n", 153 | " name=\"CustomGaussian\")\n", 154 | "\n", 155 | "print(\"Created custom binned Gaussian PDF\")\n", 156 | "print(f\"Parameter values: μ = {mu_param.value():.2f}, σ = {sigma_param.value():.2f}\")\n", 157 | "\n", 158 | "# Test the rel_counts method\n", 159 | "rel_counts = custom_gauss.rel_counts(obs_binned)\n", 160 | "print(f\"Sum of relative counts: {znp.sum(rel_counts):.6f} (should be 1.0)\")\n", 161 | "print(f\"Shape of rel_counts: {rel_counts.shape}\")\n", 162 | "print(f\"First 5 rel_counts values: {rel_counts[:5]}\")" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "# Visualize the custom binned PDF\n", 172 | "fig, ax = plt.subplots(figsize=(10, 6))\n", 173 | "\n", 174 | "# Get bin centers for plotting\n", 175 | "bin_centers = obs_binned.binning.centers[0]\n", 176 | "rel_counts_values = rel_counts.numpy()\n", 177 | "\n", 178 | "# Plot as histogram\n", 179 | "ax.bar(bin_centers, rel_counts_values, width=0.18, alpha=0.7, \n", 180 | " label=f'Custom Binned Gaussian (μ={mu_param.value():.1f}, σ={sigma_param.value():.1f})',\n", 181 | " color='skyblue', edgecolor='navy')\n", 182 | "\n", 183 | "# Also plot the true continuous Gaussian for comparison\n", 184 | "x_continuous = np.linspace(-5, 5, 200)\n", 185 | "true_gaussian = np.exp(-0.5 * ((x_continuous - mu_param.value()) / sigma_param.value()) ** 2)\n", 186 | "true_gaussian = true_gaussian / np.sum(true_gaussian) * len(true_gaussian) / n_bins # Scale for comparison\n", 187 | "\n", 188 | "ax.plot(x_continuous, true_gaussian, 'r-', linewidth=2, \n", 189 | " label='True Continuous Gaussian (scaled)')\n", 190 | "\n", 191 | "ax.set_xlabel('x')\n", 192 | "ax.set_ylabel('Relative Counts')\n", 193 | "ax.set_title('Custom Binned PDF with _rel_counts Method')\n", 194 | "ax.legend()\n", 195 | "ax.grid(True, alpha=0.3)\n", 196 | "\n", 197 | "plt.tight_layout()\n", 198 | "plt.show()\n", 199 | "\n", 200 | "print(f\"Plotted {len(bin_centers)} bins with relative counts\")" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": {}, 206 | "source": [ 207 | "## Example 2: Custom Extended Binned PDF with `_counts`\n", 208 | "\n", 209 | "Now let's create an extended binned PDF that implements the `_counts` method. This returns absolute counts (not normalized), making it suitable for extended maximum likelihood fits." 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "class CustomExtendedBinnedPoisson(zfit.pdf.BaseBinnedPDF):\n", 219 | " \"\"\"A custom extended binned Poisson-like PDF using _counts method.\"\"\"\n", 220 | " \n", 221 | " def __init__(self, rate, total_events, obs, name=None, label=None):\n", 222 | " # Define the parameters\n", 223 | " params = {\n", 224 | " 'rate': rate, # Rate parameter (like lambda in Poisson)\n", 225 | " 'total_events': total_events # Total number of events (extended parameter)\n", 226 | " }\n", 227 | " \n", 228 | " # For extended PDFs, we need to set extended=True\n", 229 | " super().__init__(obs=obs, params=params, extended=True, name=name, label=label)\n", 230 | " \n", 231 | " @zfit.supports(norm=\"space\") \n", 232 | " def _counts(self, x, params):\n", 233 | " \"\"\"\n", 234 | " Calculate the absolute counts for each bin.\n", 235 | " \n", 236 | " Args:\n", 237 | " x: Binned data or space \n", 238 | " params: Dictionary containing the PDF parameters\n", 239 | " \n", 240 | " Returns:\n", 241 | " Tensor of absolute counts (not normalized)\n", 242 | " \"\"\"\n", 243 | " rate = params['rate']\n", 244 | " total_events = params['total_events']\n", 245 | " \n", 246 | " # Get the bin centers from the observation space\n", 247 | " obs_space = self.space\n", 248 | " binning = obs_space.binning\n", 249 | " bin_centers = binning.centers[0]\n", 250 | " \n", 251 | " # Create a Poisson-like distribution\n", 252 | " # Using exponential decay as an example shape\n", 253 | " shape_values = znp.exp(-rate * znp.abs(bin_centers))\n", 254 | " \n", 255 | " # Scale by total events to get absolute counts\n", 256 | " # The shape should be normalized first, then scaled\n", 257 | " normalized_shape = shape_values / znp.sum(shape_values)\n", 258 | " absolute_counts = normalized_shape * total_events\n", 259 | " \n", 260 | " return absolute_counts" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": {}, 266 | "source": [ 267 | "## Testing the `_counts` Custom PDF\n", 268 | "\n", 269 | "Let's create and test our extended binned PDF:" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": {}, 276 | "outputs": [], 277 | "source": [ 278 | "# Create parameters for the extended PDF\n", 279 | "rate_param = zfit.Parameter(\"rate\", 0.3, 0.01, 1.0)\n", 280 | "total_events_param = zfit.Parameter(\"total_events\", 1000, 100, 5000)\n", 281 | "\n", 282 | "# Create our custom extended binned PDF\n", 283 | "extended_pdf = CustomExtendedBinnedPoisson(rate=rate_param, \n", 284 | " total_events=total_events_param, \n", 285 | " obs=obs_binned,\n", 286 | " name=\"ExtendedPoisson\")\n", 287 | "\n", 288 | "print(\"Created custom extended binned PDF\")\n", 289 | "print(f\"Parameter values: rate = {rate_param.value():.2f}, total_events = {total_events_param.value():.0f}\")\n", 290 | "\n", 291 | "# Test the counts method \n", 292 | "absolute_counts = extended_pdf.counts(obs_binned)\n", 293 | "print(f\"Sum of absolute counts: {znp.sum(absolute_counts):.1f} (should equal total_events)\")\n", 294 | "print(f\"Expected total events: {total_events_param.value():.0f}\")\n", 295 | "print(f\"Shape of counts: {absolute_counts.shape}\")\n", 296 | "print(f\"First 5 counts values: {absolute_counts[:5]}\")" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": null, 302 | "metadata": {}, 303 | "outputs": [], 304 | "source": [ 305 | "# Visualize both custom PDFs\n", 306 | "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))\n", 307 | "\n", 308 | "# Plot 1: Compare relative counts \n", 309 | "ax1.bar(bin_centers, rel_counts.numpy(), width=0.15, alpha=0.7, \n", 310 | " label='Gaussian (_rel_counts)', color='skyblue', edgecolor='navy')\n", 311 | "\n", 312 | "# For comparison, get relative version of extended PDF\n", 313 | "extended_rel_counts = absolute_counts / znp.sum(absolute_counts)\n", 314 | "ax1.bar(bin_centers + 0.1, extended_rel_counts.numpy(), width=0.15, alpha=0.7,\n", 315 | " label='Extended Poisson (normalized)', color='lightcoral', edgecolor='darkred')\n", 316 | "\n", 317 | "ax1.set_xlabel('x')\n", 318 | "ax1.set_ylabel('Relative Counts')\n", 319 | "ax1.set_title('Comparison of Relative Counts')\n", 320 | "ax1.legend()\n", 321 | "ax1.grid(True, alpha=0.3)\n", 322 | "\n", 323 | "# Plot 2: Show absolute counts from extended PDF\n", 324 | "absolute_counts_values = absolute_counts.numpy()\n", 325 | "ax2.bar(bin_centers, absolute_counts_values, width=0.18, alpha=0.7,\n", 326 | " color='lightcoral', edgecolor='darkred', \n", 327 | " label=f'Extended PDF (_counts)\\nTotal: {znp.sum(absolute_counts):.0f}')\n", 328 | "\n", 329 | "ax2.set_xlabel('x')\n", 330 | "ax2.set_ylabel('Absolute Counts')\n", 331 | "ax2.set_title('Extended PDF Absolute Counts')\n", 332 | "ax2.legend()\n", 333 | "ax2.grid(True, alpha=0.3)\n", 334 | "\n", 335 | "plt.tight_layout()\n", 336 | "plt.show()\n", 337 | "\n", 338 | "print(\"Left plot: Relative counts comparison (both sum to 1)\")\n", 339 | "print(\"Right plot: Absolute counts from extended PDF (sum to total_events parameter)\")" 340 | ] 341 | }, 342 | { 343 | "cell_type": "markdown", 344 | "metadata": {}, 345 | "source": [ 346 | "## Summary\n", 347 | "\n", 348 | "In this tutorial, we covered how to create custom binned PDFs in zfit using two key methods:\n", 349 | "\n", 350 | "### Key Concepts Learned\n", 351 | "\n", 352 | "1. **`_rel_counts` Method**:\n", 353 | " - Returns normalized counts (sum to 1.0)\n", 354 | " - Used for standard binned PDFs\n", 355 | " - Ideal for shape-only analyses\n", 356 | "\n", 357 | "2. **`_counts` Method**: \n", 358 | " - Returns absolute counts (sum to total events)\n", 359 | " - Used for extended binned PDFs\n", 360 | " - Required when total events is a fit parameter\n", 361 | "\n", 362 | "3. **Implementation Pattern**:\n", 363 | " - Inherit from `zfit.pdf.BaseBinnedPDF`\n", 364 | " - Define parameters in `__init__`\n", 365 | " - Implement one or both count methods with proper decorators\n", 366 | " - Access binning through `self.space.binning`\n", 367 | "\n", 368 | "### Examples Demonstrated\n", 369 | "\n", 370 | "- **Basic Custom Binned Gaussian** with `_rel_counts`\n", 371 | "- **Extended Poisson-like PDF** with `_counts` \n", 372 | "- **Visual comparisons** between different approaches\n", 373 | "\n", 374 | "### Best Practices\n", 375 | "\n", 376 | "- Always use `@zfit.supports(norm=\"space\")` decorators\n", 377 | "- Use `znp` (zfit numpy) for numerical operations \n", 378 | "- Ensure `_rel_counts` output sums to 1.0\n", 379 | "- Set `extended=True` when implementing `_counts`\n", 380 | "- Access bin information via `self.space.binning.centers[0]`\n", 381 | "\n", 382 | "Custom binned PDFs open up powerful possibilities for template-based analyses, Monte Carlo studies, and situations where binning provides computational or statistical advantages over unbinned approaches.\n", 383 | "\n", 384 | "For more advanced topics, see the [Custom Models guide](../guides/custom_models.ipynb) and [Binned Models tutorial](30%20-%20Binned%20models.ipynb)." 385 | ] 386 | } 387 | ], 388 | "metadata": { 389 | "kernelspec": { 390 | "display_name": "Python 3", 391 | "language": "python", 392 | "name": "python3" 393 | }, 394 | "language_info": { 395 | "codemirror_mode": { 396 | "name": "ipython", 397 | "version": 3 398 | }, 399 | "file_extension": ".py", 400 | "name": "python", 401 | "nbconvert_exporter": "python", 402 | "pygments_lexer": "ipython3", 403 | "version": "3.8.5" 404 | } 405 | }, 406 | "nbformat": 4, 407 | "nbformat_minor": 4 408 | } 409 | -------------------------------------------------------------------------------- /_website/tutorials/components/05 - Exploring the FitResult.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "# FitResult\n", 9 | "\n", 10 | "In this tutorial, we will explore the `FitResult`of zfit. Specifically, we will examine the error methods hesse and errors as well as attributes like info, valid etc. We will also provide an example with weighted data to demonstrate how FitResult works with weighted datasets.\n", 11 | "\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "We will start out by creating a simple gaussian model and sampling some data from it. We will then fit the data with the same model and explore the `FitResult`." 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "import numpy as np\n", 28 | "import zfit\n", 29 | "import zfit.z.numpy as znp" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "obs = zfit.Space('x', 0, 10)\n", 39 | "mu = zfit.Parameter('mu', 5, 0, 10)\n", 40 | "sigma = zfit.Parameter('sigma', 1, 0, 10)\n", 41 | "nsig = zfit.Parameter('nsig', 1000, 0, 10000)\n", 42 | "gauss = zfit.pdf.Gauss(obs=obs, mu=mu, sigma=sigma,extended=nsig)\n", 43 | "data = gauss.sample()\n", 44 | "print(f\"The sampled data (poisson fluctuated) has {data.nevents} events.\")" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "We use an extended likelihood to fit the data." 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "nll = zfit.loss.ExtendedUnbinnedNLL(model=gauss, data=data)\n", 61 | "minimizer = zfit.minimize.Minuit()\n", 62 | "result = minimizer.minimize(nll)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "Simply printing the result will give you a beautified overview of the fit result." 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "print(result)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "## What happened\n", 86 | "\n", 87 | "First and foremost, the FitResult contains all the information about what happened with the minimization, most notably the `loss` that was minimized, the `minimizer` that was used and the `params` that were fitted (the latter has a beautified presentation)." 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "print(f\"\"\"\n", 97 | "loss: {result.loss}\n", 98 | "minimizer: {result.minimizer}\n", 99 | "params: {result.params}\n", 100 | "\"\"\")" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "### params\n", 108 | "\n", 109 | "`params` contains all the information of the parameters that was ever added to them. This includes the output of uncertainty methods, limits and much more.\n", 110 | "The actual content looks like this:" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "print(f\"params raw: {repr(result.params)}\")" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "The `FitResult` has a lot of attributes and methods. We will now explore some of them.\n", 127 | "\n", 128 | "\n", 129 | "All the displayed information can be accessed via the attributes of the `FitResult` object, namely\n", 130 | "- valid: whether the fit converged and is in general valid\n", 131 | "- converged: whether the fit converged\n", 132 | "- param at limit: whether any parameter is at its limit (approximate, hard to estimate)\n", 133 | "- edm: estimated distance to minimum\n", 134 | "- fmin: the minimum of the function, i.e. the negative log likelihood\n", 135 | "- values: the parameter values at the minimum in an array-like object" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "print(f\"\"\"\n", 145 | "valid: {result.valid}\n", 146 | "converged: {result.converged}\n", 147 | "param at limit: {result.params_at_limit}\n", 148 | "edm: {result.edm}\n", 149 | "fmin: {result.fmin}\n", 150 | "optimal values: {result.values}\n", 151 | "\"\"\")" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "## Error methods\n", 159 | "\n", 160 | "There are two main ways to estimate the uncertainties: Either using a profiling method that varies the parameters one by one and finds\n", 161 | "the point of 1 sigma (or the specified n sigma), resulting in asymmetric errors, or using a matrix inversion method that calculates\n", 162 | "an approximation of the former by using a second derivative matrix.\n", 163 | "\n", 164 | "The first method is called `errors` and the second `hesse`. Both methods are available in the `FitResult` object.\n", 165 | "\n", 166 | "### Pitfall weights\n", 167 | "\n", 168 | "For weighted likelihoods, the `errors` method will not report the correct uncertainties. Instead, `hesse` should be used\n", 169 | "as it will, by default, calculate the asymptotic correct approximations for weights as we will see a few lines below.\n", 170 | "\n", 171 | "### Arguments\n", 172 | "\n", 173 | "Both methods take some common arguments:\n", 174 | "- `params`: the parameters to calculate the errors for. If `None`, all parameters will be used. (this can be expensive!)\n", 175 | "- `name`: the name of the new result. If `None`, the name will be chosen automatically.\n", 176 | "- `cl`: the confidence level for the errors. The default is 0.68, which corresponds to 1 sigma.\n", 177 | "- `method`: the method to use. The default is `None` which will use the default method of the uncertainty estimator." 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "errors, new_result = result.errors(name=\"errors\")\n", 187 | "print(f\"New result: {new_result}\")\n", 188 | "print(result)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": {}, 194 | "source": [ 195 | "The uncertainties are added to the fit result. The `new_result` is usually `None` but in case a new minimum was found, it will be returned\n", 196 | "as the new result. In this case, the old result will be rendered invalid.\n", 197 | "\n", 198 | "There are currently two implementations, the minos method from `iminuit` (as `minuit_minos`) and a completely independent implementation\n", 199 | "(`zfit_errors`)." 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "### More information\n", 207 | "\n", 208 | "To find more information about the uncertainty estimation, the return value can be inspected. This is though also automatically added to the result\n", 209 | "to each parameter. Looking again at the raw `params` attribute, we find that all the information is there:\n", 210 | "\n", 211 | "_Note: this part is still under WIP and future plans are to standardize these attributes as well. Any ideas or inputs are very welcome!_" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": null, 217 | "metadata": {}, 218 | "outputs": [], 219 | "source": [ 220 | "print(f\"params raw: {repr(result.params)}\")" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "metadata": {}, 227 | "outputs": [], 228 | "source": [ 229 | "errors2, _ = result.errors(name=\"zfit_unc\", method=\"zfit_errors\")\n", 230 | "print(result)" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "As we see, they both agree well. We can also change the confidence level to 0.95, which corresponds to 2 sigma and recalculate the errors." 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "errors3, _ = result.errors(name=\"zfit_2sigma\", method=\"zfit_errors\", cl=0.95)\n", 247 | "print(result)" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": {}, 253 | "source": [ 254 | "### Hesse\n", 255 | "\n", 256 | "The hesse method approximates the errors by calculating the second derivative matrix of the function and inverting it.\n", 257 | "As for `errors` there are two implementations, one from `iminuit` (`minuit_hesse`) and one from `zfit` (`hesse_np`).\n", 258 | "\n", 259 | "Additionally, the `hesse` has a third option, `approx`: this is the approximation of the hessian estimated by the minimizer\n", 260 | "during the minimization procedure. This however *can* be `None`! Also, the accuracy can be low, especially if the\n", 261 | "fit converged rapidly." 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [ 270 | "hesse = result.hesse(name=\"h minuit\", method=\"minuit_hesse\", cl=0.95) # can also take the cl argument\n", 271 | "hesse2 = result.hesse(name=\"h zfit\", method=\"hesse_np\")\n", 272 | "hesse3 = result.hesse(name=\"h approx\", method=\"approx\")\n", 273 | "print(result)" 274 | ] 275 | }, 276 | { 277 | "cell_type": "markdown", 278 | "metadata": {}, 279 | "source": [ 280 | "Internally, zfit uses by default a numerical approximation of the hessian, which is usually sufficient and good for one-time use.\n", 281 | "However, if you want to use the hessian for multiple fits, it is recommended to force it to use the exact gradient provided by the\n", 282 | "backend. To make sure one or the other is used, you can set `zfit.run.set_autograd_mode(False)` or `zfit.run.set_autograd_mode(True)`." 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": null, 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [ 291 | "with zfit.run.set_autograd_mode(True):\n", 292 | " hesse4 = result.hesse(name=\"h autograd\", method=\"hesse_np\")\n", 293 | "print(result)" 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "metadata": {}, 299 | "source": [ 300 | "## Weighted uncertainties\n", 301 | "\n", 302 | "A weighted likelihood is technically not a likelihood anymore, and the errors are not calculated correctly. However, the hesse method\n", 303 | "can be corrected for weights, which is done automatically as soon as the dataset is weighted. The method for corrections can be specified using the `weightcorr` argument.\n", 304 | "There are two methods to calculate the weighted uncertainties:\n", 305 | " - `\"asymptotic\"` (default): The method used is the `asymptotically correct` yet computationally expensive method described in [Parameter uncertainties in weighted unbinned maximum likelihood fits](https://link.springer.com/article/10.1140/epjc/s10052-022-10254-8).\n", 306 | " - `\"sumw2\"`: The method used is the `effective size` method scaling the covariance matrix by the effective size of the dataset. This method is computationally significantly cheaper but can be less accurate.\n", 307 | "\n", 308 | "To disable the corrections, set `weightcorr=False`.\n", 309 | "\n", 310 | "The `\"asymptotic\"` correction involves the calculation of the jacobian with respect to each event, which can be expensive to compute. Again, zfit offers the possibility to use the\n", 311 | "autograd or the numerical jacobian." 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": null, 317 | "metadata": {}, 318 | "outputs": [], 319 | "source": [ 320 | "weighted_data = zfit.Data.from_tensor(obs=obs, tensor=data.value(), weights=znp.random.uniform(0.1, 5, size=(data.nevents,)))\n", 321 | "weighted_nll = zfit.loss.UnbinnedNLL(model=gauss, data=weighted_data)\n", 322 | "weighted_result = minimizer.minimize(weighted_nll)" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": null, 328 | "metadata": {}, 329 | "outputs": [], 330 | "source": [] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": null, 335 | "metadata": {}, 336 | "outputs": [], 337 | "source": [ 338 | "weighted_result.errors(name=\"errors\")" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [ 347 | "with zfit.run.set_autograd_mode(True):\n", 348 | " weighted_result.hesse(name=\"hesse autograd asy\", weightcorr=\"asymptotic\")\n", 349 | " weighted_result.hesse(name=\"hesse autograd np asy\", method=\"hesse_np\", weightcorr=\"asymptotic\")" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": null, 355 | "metadata": {}, 356 | "outputs": [], 357 | "source": [ 358 | "with zfit.run.set_autograd_mode(False):\n", 359 | " weighted_result.hesse(name=\"hesse numeric asy\") # weightcorr=\"asymptotic\" is default\n", 360 | " weighted_result.hesse(name=\"hesse numeric np asy\", method=\"hesse_np\")" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": null, 366 | "metadata": {}, 367 | "outputs": [], 368 | "source": [ 369 | "print(weighted_result) # FIXME: the errors are not correct for the nsig" 370 | ] 371 | }, 372 | { 373 | "cell_type": "markdown", 374 | "metadata": {}, 375 | "source": [ 376 | "As we can see, the errors are underestimated for the nuisance parameters using the minos method while the hesse method is correct.\n", 377 | "\n", 378 | "The `hesse` method can also be used with the `\"sumw2\"` correction, which is computationally much cheaper or without any correction." 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": null, 384 | "metadata": {}, 385 | "outputs": [], 386 | "source": [ 387 | "weighted_result.hesse(name=\"hesse autograd sumw2\", weightcorr=\"sumw2\")\n", 388 | "weighted_result.hesse(name=\"hesse numeric no corr\", weightcorr=False)" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": null, 394 | "metadata": {}, 395 | "outputs": [], 396 | "source": [ 397 | "print(weighted_result)" 398 | ] 399 | }, 400 | { 401 | "cell_type": "markdown", 402 | "metadata": {}, 403 | "source": [ 404 | "### Standardized minimizer information\n", 405 | "\n", 406 | "Some of the minimizers collect information about the loss during the minimization process, such as an approximation of the hessian, inverse hessian, gradient etc. They can be retrieved via `approx`, note however that they can be `None`.\n", 407 | "\n", 408 | "`hessian` and `inv_hessian` have an `invert` argument: if True and only one of the two is available, the other one will be inverted to obtain the request.\n" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": null, 414 | "metadata": {}, 415 | "outputs": [], 416 | "source": [ 417 | "print(f\"Approx gradient: {result.approx.gradient()}\") # gradient approx not available in iminuit\n", 418 | "print(f\"Approx hessian (no invert): {result.approx.hessian(invert=False)}\") # hessian approximation is also not available\n", 419 | "print(f\"Approx inverse hessian: {result.approx.inv_hessian(invert=False)}\") # inv_hessian is available\n", 420 | "print(f\"Approx hessian (can invert): {result.approx.hessian(invert=True)}\") # allowing the invert now inverts the inv_hessian" 421 | ] 422 | }, 423 | { 424 | "cell_type": "markdown", 425 | "metadata": {}, 426 | "source": [] 427 | }, 428 | { 429 | "cell_type": "markdown", 430 | "metadata": {}, 431 | "source": [ 432 | "### info\n", 433 | "The information returned by the minimizer. CAREFUL! This is a dictionary and can be different for different minimizers. The standardized keys can always be accessed in other ways, such as the approximations of the hessian, the covariance matrix etc." 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": null, 439 | "metadata": {}, 440 | "outputs": [], 441 | "source": [ 442 | "result.info.keys()" 443 | ] 444 | }, 445 | { 446 | "cell_type": "markdown", 447 | "metadata": {}, 448 | "source": [ 449 | "This can be helpful if underlying information from a specific minimizer should be retrieved. For example, the `original` key contains the original result from the minimizer while \"minuit\" is the actual `iminuit` minimizer that was used." 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "execution_count": null, 455 | "metadata": {}, 456 | "outputs": [], 457 | "source": [ 458 | "result.info.get(\"original\", f\"Not available for the minimizer: {result.minimizer}\")" 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": null, 464 | "metadata": {}, 465 | "outputs": [], 466 | "source": [ 467 | "result.info.get(\"minuit\", \"Not available, not iminuit used in minimization?\")" 468 | ] 469 | }, 470 | { 471 | "cell_type": "markdown", 472 | "metadata": {}, 473 | "source": [] 474 | }, 475 | { 476 | "cell_type": "markdown", 477 | "metadata": {}, 478 | "source": [ 479 | "### Finding problems\n", 480 | "\n", 481 | "If the fit failed for some reason, `valid` may be False. To find the actual reason, `message` should be human-readable information about what went wrong. If everything went well, the message will be empty" 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": null, 487 | "metadata": {}, 488 | "outputs": [], 489 | "source": [ 490 | "result.message" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": null, 496 | "metadata": {}, 497 | "outputs": [], 498 | "source": [] 499 | } 500 | ], 501 | "metadata": { 502 | "kernelspec": { 503 | "display_name": "Python 3 (ipykernel)", 504 | "language": "python", 505 | "name": "python3" 506 | }, 507 | "language_info": { 508 | "codemirror_mode": { 509 | "name": "ipython", 510 | "version": 3 511 | }, 512 | "file_extension": ".py", 513 | "mimetype": "text/x-python", 514 | "name": "python", 515 | "nbconvert_exporter": "python", 516 | "pygments_lexer": "ipython3", 517 | "version": "3.10.4" 518 | } 519 | }, 520 | "nbformat": 4, 521 | "nbformat_minor": 1 522 | } 523 | -------------------------------------------------------------------------------- /_website/tutorials/introduction/upgrade_guide_020.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "0", 6 | "metadata": {}, 7 | "source": [ 8 | "\n", 9 | "# Upgrade guide to 0.20\n", 10 | "\n", 11 | "With version 0.20, zfit prepares for a more stable and user-friendly interface. This guide will help you to upgrade your code to the new version and demonstrate the most significant changes. It is meant for people who are already familiar with zfit and want to upgrade their code.\n", 12 | "\n", 13 | "**Not all changes are everywhere reflected in the docs, [help with the docs is welcome](https://github.com/zfit/zfit/issues/556)** as well as [adding a more polished PDF](https://github.com/zfit/zfit/issues/512). (See [all issues with contributions wanted](https://github.com/zfit/zfit/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc) or [reach out](https://github.com/zfit/zfit#contact) to us on Mattermost, Gitter, GitHub, or e-mail if you're interested **in becoming a contributor**, from beginners to advanced)." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "id": "1", 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import numpy as np\n", 24 | "# standard imports\n", 25 | "import zfit\n", 26 | "import zfit.z.numpy as znp # use this \"numpy-like\" for mathematical operations\n", 27 | "import zfit_physics as zphys # physics module, with new physics-inspired PDFs\n", 28 | "from zfit import z" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "id": "2", 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "@z.function\n", 39 | "def maximum(x, y):\n", 40 | " return znp.maximum(x, y)\n", 41 | "\n", 42 | "\n", 43 | "# example usage of the numpy-like backend, use it if possible\n", 44 | "print(f\"sqrt with np:{np.sqrt(maximum(1., 2.))}, with z:{znp.sqrt(maximum(1., 2.))}\")\n", 45 | "print(f\"vectorized: {np.sqrt(maximum(znp.array([1., 2.]), znp.array([3., 4.])))}\")" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "id": "3", 51 | "metadata": {}, 52 | "source": [ 53 | "## Parameters\n", 54 | "\n", 55 | "The largest news comes from parameters: the `NameAlreadyTakenError` is gone (!). Multiple parameters with the same name can now be created and co-exist. The only limit is: they must not exist within the same PDF/loss etc., as this would lead to ambiguities." 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "id": "4", 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "param1 = zfit.Parameter(\"param1\", 1, 0, 10)\n", 66 | "param1too = zfit.Parameter(\"param1\", 2, 0, 10)\n", 67 | "# no error!" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "id": "5", 73 | "metadata": {}, 74 | "source": [ 75 | "## Labels\n", 76 | "\n", 77 | "Many objects, including parameters, can now have a label. This label is purely human-readable and can be used for plotting, printing, etc. It can contain arbitrary characters.\n", 78 | "\n", 79 | "The `name` of objects still exists and will in a future version probably be used for identification purpose (in conjunction with serialization). Therefore, use `label` for human-readable names and avoid `name` for that purpose." 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "id": "6", 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "param1again = zfit.Parameter(\"param1\", 3, 0, 10, label=r\"$param_1$ [GeV$^2$]\")" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "id": "7", 95 | "metadata": {}, 96 | "source": [ 97 | "## Space\n", 98 | "\n", 99 | "As explained [in the github discussion thread](https://github.com/zfit/zfit/discussions/533) there are major improvements and changes.\n", 100 | "- multispaces (adding two spaces, having disjoint observables) are now deprecated and will be removed. The new `TruncatedPDF` supports multiple spaces as limits and achieves a similar, if not better, functionality.\n", 101 | "- the return type of `Space.limit` will be changed in the future. For forwards compatibility, use `Space.v1.limit` (or similar methods) instead of `Space.limit`. The old one is still available via `Space.v0.limit`.\n", 102 | "- new ways of creating spaces\n", 103 | "\n", 104 | "More [examples on how to use the new spaces](https://github.com/zfit/zfit/blob/develop/examples/spaces.py)." 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "id": "8", 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "obs1 = zfit.Space(\"obs1\", -1, 1) # no tuple needed anymore\n", 115 | "obs2 = zfit.Space(\"obs2\", lower=-1, upper=1, label=\"observable two\")\n", 116 | "\n", 117 | "# create a space with multiple observables\n", 118 | "obs12 = obs1 * obs2" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "id": "9", 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "# limits are now as one would naively expect, and area has been renamed to volume (some are tensors, but that doesn't matter: they behave like numpy arrays)\n", 129 | "print(f\"old: limits={obs12.v0.limits}\")\n", 130 | "print(f\"new: limits={obs12.v1.limits}\") # we have just 1D arrays" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "id": "10", 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "# this allows, for example, for a more intuitive way\n", 141 | "np.linspace(*obs12.v1.limits, num=7)" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "id": "11", 147 | "metadata": {}, 148 | "source": [ 149 | "## Data\n", 150 | "\n", 151 | "Data handling has been significantly simplified and streamlined.\n", 152 | "- all places (or most) take directly `numpy` arrays, tensors or `pandas DataFrame` as input, using a `Data` object is not necessary anymore (but convenient, as it cuts the data to the space)\n", 153 | "- `Data` objects can now be created without the specific constructors (e.g. `zfit.Data.from_pandas`), but directly with the data. The constructors are still available for convenience and for more options.\n", 154 | "- `Data` objects are now stateless and offer instead `with`-methods to change the data. For example, `with_obs`, `with_weights` (can be `None` to have without weights) etc.\n", 155 | "- The `SamplerData` has been overhauld and has now a more public API including a `update_data` method that allows to change the data without creating a new object and without relying on a `create_sampler` method from a PDF.\n", 156 | "- `zfit.data.concat` has been added to concatenate data objects, similar to `pd.concat`." 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "id": "12", 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "data1_obs1 = zfit.Data(np.random.uniform(0, 1, 1000), obs=obs1)\n", 167 | "data1_obs2 = zfit.Data(np.random.uniform(0, 1, 1000), obs=obs2, label=\"My favourite $x$\")\n", 168 | "data2_obs1 = zfit.Data(np.random.normal(0, 1, 1000), obs=obs1)\n", 169 | "\n", 170 | "# similar like pd.concat\n", 171 | "data_obs12 = zfit.data.concat([data1_obs1, data1_obs2], axis=\"columns\")\n", 172 | "data_obs1 = zfit.data.concat([data1_obs1, data2_obs1], axis=\"index\")" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "id": "13", 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "# data can be accessed with \"obs\" directly\n", 183 | "data_obs12[\"obs1\"] # returns a numpy array shape (n,)\n", 184 | "data_obs12[[\"obs2\", \"obs1\"]] # returns a numpy array shape (n, 2)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "id": "14", 190 | "metadata": {}, 191 | "source": [ 192 | "## Binning\n", 193 | "\n", 194 | "Using a binned space has now the effect of creating a binned version. This happens for `Data` and `PDF` objects." 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "id": "15", 201 | "metadata": {}, 202 | "outputs": [], 203 | "source": [ 204 | "obs1_binned = obs1.with_binning(12)\n", 205 | "data_binned = zfit.Data(np.random.normal(0, 0.2, 1000), obs=obs1_binned)" 206 | ] 207 | }, 208 | { 209 | "cell_type": "markdown", 210 | "id": "16", 211 | "metadata": {}, 212 | "source": [ 213 | "## PDFs\n", 214 | "\n", 215 | "- there are a plethora of new PDFs, mostly covering physics inspired use-cases. Amongst the interesting ones are a `GeneralizedCB`, a more general version of the `DoubleCB` that should be preferred in the future. A Voigt profile is available, Bernstein polynomials, QGauss, GaussExpTail, etc. and in [zfit-physics](https://zfit.readthedocs.io/en/latest/user_api/zfit.pdf.html#physics-pdfs) HEP specific PDFS , from `CMSShape`, `Cruijff`, `Novosibirsk` and more. \n", 216 | "- the `TruncatedPDF` has been added to allow for a more flexible way of truncating a PDF. Any PDF can be converted to a truncated version using `to_truncated` (which, by default, truncates to the limits of the space).\n", 217 | "- PDFs have a new `plot` method that allows for a quick plotting of the PDF (it takes an \"obs\" argument that allows to simply project it!). This is still experimental and may changes, the main purpose is to allow for a quick check of the PDF in interactive environments. The function is fully compatible with matplotlib and takes an `ax` argument, it also allows to pass through any keyword arguments to the plotting function." 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "id": "17", 224 | "metadata": {}, 225 | "outputs": [], 226 | "source": [ 227 | "# all the new PDFs\n", 228 | "print(zfit.pdf.__all__)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "id": "18", 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [ 238 | "print(zphys.pdf.__all__)" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "id": "19", 245 | "metadata": { 246 | "jupyter": { 247 | "is_executing": true 248 | } 249 | }, 250 | "outputs": [], 251 | "source": [ 252 | "# create a PDF\n", 253 | "pdf = zfit.pdf.Gauss(\n", 254 | " mu=zfit.Parameter(\"mu\", 1.2), sigma=param1again, obs=obs1, extended=1000\n", 255 | ") # using an extended PDF, the truncated pdf automatically rescales\n", 256 | "pdf.plot.plotpdf() # quick plot\n", 257 | "# truncate it\n", 258 | "pdf_truncated = pdf.to_truncated(limits=(-0.7, 0.1))\n", 259 | "pdf_truncated.plot.plotpdf()" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "id": "20", 266 | "metadata": { 267 | "jupyter": { 268 | "is_executing": true 269 | } 270 | }, 271 | "outputs": [], 272 | "source": [ 273 | "# a truncated PDF can also have multiple limits, replacing the MultiSpace concept\n", 274 | "range1 = zfit.Space('obs1', -0.9, -0.1, label=\"lower range\") # an excellent use-case of a label\n", 275 | "range2 = zfit.Space('obs1', 0.3, 0.5, label=\"upper range\")\n", 276 | "pdf_disjoint = pdf.to_truncated(limits=(range1, range2))\n", 277 | "pdf_disjoint.plot.plotpdf()" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "id": "21", 284 | "metadata": { 285 | "jupyter": { 286 | "is_executing": true 287 | } 288 | }, 289 | "outputs": [], 290 | "source": [ 291 | "# binned pdfs from space work like the data\n", 292 | "gauss_binned = zfit.pdf.Gauss(mu=zfit.Parameter(\"mu\", 2.5), sigma=param1again, obs=obs1_binned, extended=1000)" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": null, 298 | "id": "22", 299 | "metadata": { 300 | "jupyter": { 301 | "is_executing": true 302 | } 303 | }, 304 | "outputs": [], 305 | "source": [ 306 | "# as mentioned before, PDFs can be evaluated directly on numpy arrays or pandas DataFrames\n", 307 | "pdf.pdf(data_obs1.to_pandas())" 308 | ] 309 | }, 310 | { 311 | "cell_type": "markdown", 312 | "id": "23", 313 | "metadata": {}, 314 | "source": [ 315 | "## Loss and minimizer\n", 316 | "\n", 317 | "They stay mostly the same (apart from improvements behind the scenes and bugfixes).\n", 318 | "Losses take now directly the data and the model, without the need of a `Data` object (if the data is already cut to the space).\n", 319 | "\n", 320 | "To use the automatic gradient, set `gradient=\"zfit\"` in the minimizer. This can speed up the minimization for larger fits.\n", 321 | "\n", 322 | "### Updated params\n", 323 | "\n", 324 | "The minimizer currently updates the parameter default values after each minimization. To keep this behavior, add `update_params()` call after the minimization.\n", 325 | "\n", 326 | "(experimentally, the update can be disabled with `zfit.run.experimental_disable_param_update(True)`, this will probably be the default in the future. Pay attention that using this experimental features most likely breaks current scripts. Feedback on this new feature is highly welcome!)" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": null, 332 | "id": "24", 333 | "metadata": { 334 | "jupyter": { 335 | "is_executing": true 336 | } 337 | }, 338 | "outputs": [], 339 | "source": [ 340 | "loss = zfit.loss.ExtendedUnbinnedNLL(model=pdf, data=data2_obs1.to_pandas())\n", 341 | "minimizer = zfit.minimize.Minuit(\n", 342 | " gradient=\"zfit\"\n", 343 | ") # to use the automatic gradient -> can fail, but can speed up the minimization" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": null, 349 | "id": "25", 350 | "metadata": { 351 | "jupyter": { 352 | "is_executing": true 353 | } 354 | }, 355 | "outputs": [], 356 | "source": [ 357 | "result = minimizer.minimize(loss).update_params()\n", 358 | "pdf.plot.plotpdf(full=False) # plot only the pdf, no labels" 359 | ] 360 | }, 361 | { 362 | "cell_type": "markdown", 363 | "id": "26", 364 | "metadata": {}, 365 | "source": [ 366 | "## Result\n", 367 | "\n", 368 | "The result has more usage for setting parameters, the `update_params` method is now available and can be used to set the parameters to the minimum as seen above.\n", 369 | "\n", 370 | "It can also be used in a context manager to temporarily set the parameters to the minimum and restore them afterwards.\n" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": null, 376 | "id": "27", 377 | "metadata": {}, 378 | "outputs": [], 379 | "source": [ 380 | "param1again.set_value(1.5)\n", 381 | "with result:\n", 382 | " print(f\"param1 set temporarily to {param1again}\")\n", 383 | "print(f\"param1 is now {param1again} again\")" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": null, 389 | "id": "28", 390 | "metadata": {}, 391 | "outputs": [], 392 | "source": [ 393 | "# or to set the parameters to the minimum\n", 394 | "zfit.param.set_values(result) # supports also a dict of {param: value}!\n", 395 | "print(param1again)" 396 | ] 397 | }, 398 | { 399 | "cell_type": "markdown", 400 | "id": "29", 401 | "metadata": {}, 402 | "source": [ 403 | "## Serialization\n", 404 | "\n", 405 | "The result can now be pickled like any object in zfit!\n", 406 | "(this was not possible before, only after calling `freeze` on the result)\n", 407 | " \n", 408 | "This works directly using `dill` (a library that extends `pickle`), but can fail if the garbage collector is not run. Therefore, zfit provides a slightly modified `dill` that can work as a drop-in replacement.\n", 409 | "\n", 410 | "The objects can be saved and loaded again and used as before. Ideal to store the minimum of a fit and use it later for statistical treatments, plotting, etc." 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": null, 416 | "id": "30", 417 | "metadata": {}, 418 | "outputs": [], 419 | "source": [ 420 | "result_serialized = zfit.dill.dumps(result)\n", 421 | "result_deserialized = zfit.dill.loads(result_serialized)" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": null, 427 | "id": "31", 428 | "metadata": {}, 429 | "outputs": [], 430 | "source": [ 431 | "# the result can be used as before\n", 432 | "result_deserialized.hesse() # the default name is now \"hesse\" and not \"minuit_hesse\"\n", 433 | "result_deserialized.errors() # the default name is now \"errors\" and not \"minuit_minos\"\n", 434 | "print(result_deserialized)" 435 | ] 436 | }, 437 | { 438 | "cell_type": "markdown", 439 | "id": "32", 440 | "metadata": {}, 441 | "source": [ 442 | "## Parameters as arguments\n", 443 | "\n", 444 | "The values of the parameters can now be directly used as arguments in functions of PDFs/loss. Methods in the pdf also take the parameters as arguments.\n", 445 | "\n", 446 | "The name of the argument has to match the name of the parameter given in initialization (or can also be the parameter itself)." 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": null, 452 | "id": "33", 453 | "metadata": {}, 454 | "outputs": [], 455 | "source": [ 456 | "from matplotlib import pyplot as plt\n", 457 | "\n", 458 | "x = znp.linspace(*obs1.v1.limits, 1000)\n", 459 | "plt.plot(x, pdf.pdf(x, params={\"param1\": 1.5}), label=\"param1=1.5\")\n", 460 | "plt.plot(x, pdf.pdf(x, params={param1again: 2.5}), label=\"param1=2.5\")\n", 461 | "plt.legend()" 462 | ] 463 | }, 464 | { 465 | "cell_type": "code", 466 | "execution_count": null, 467 | "id": "34", 468 | "metadata": {}, 469 | "outputs": [], 470 | "source": [ 471 | "import tqdm\n", 472 | "\n", 473 | "# similar for the loss\n", 474 | "param1dict = result_deserialized.params[param1again]\n", 475 | "param1min = param1dict[\"value\"]\n", 476 | "lower, upper = param1dict[\"errors\"][\"lower\"], param1dict[\"errors\"][\"upper\"]\n", 477 | "x = np.linspace(param1min + 2 * lower, param1min + 2 * upper, 50)\n", 478 | "y = []\n", 479 | "param1again.floating = False # not minimized\n", 480 | "for x_i in tqdm.tqdm(x):\n", 481 | " param1again.set_value(x_i)\n", 482 | " minimizer.minimize(loss).update_params() # set nuisance parameters to minimum\n", 483 | " y.append(loss.value())\n", 484 | "plt.plot(x, y)" 485 | ] 486 | }, 487 | { 488 | "cell_type": "code", 489 | "execution_count": null, 490 | "id": "35", 491 | "metadata": {}, 492 | "outputs": [], 493 | "source": [ 494 | "# a result can also be used as argument for PDFs or, here, for losses\n", 495 | "loss_before = loss.value()\n", 496 | "loss_min = loss.value(params=result_deserialized) # evaluate at minimum\n", 497 | "print(f\"loss before (random from before): {loss_before:.7} minimum value: {loss_min:.7} vs {result_deserialized.fmin:.7}\")" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": null, 503 | "id": "36", 504 | "metadata": {}, 505 | "outputs": [], 506 | "source": [ 507 | "# creating a PDF looks also different, but here we use the name of the parametrization and the axis (integers)\n", 508 | "\n", 509 | "\n", 510 | "class MyGauss2D(zfit.pdf.ZPDF):\n", 511 | " _PARAMS = (\"mu\", \"sigma\")\n", 512 | " _N_OBS = 2\n", 513 | "\n", 514 | " @zfit.supports() # this allows the params magic\n", 515 | " def _unnormalized_pdf(self, x, params):\n", 516 | " x0 = x[0] # this means \"axis 0\"\n", 517 | " x1 = x[1] # this means \"axis 1\"\n", 518 | " mu = params[\"mu\"]\n", 519 | " sigma = params[\"sigma\"]\n", 520 | " return znp.exp(-0.5 * ((x0 - mu) / sigma) ** 2) * x1 # fake, just for demonstration" 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "execution_count": null, 526 | "id": "37", 527 | "metadata": {}, 528 | "outputs": [], 529 | "source": [ 530 | "gauss2D = MyGauss2D(mu=0.8, sigma=param1again, obs=obs12, label=\"2D Gaussian$^2$\")\n", 531 | "gauss2D.plot.plotpdf(obs=\"obs1\") # we can project the 2D pdf to 1D" 532 | ] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "execution_count": null, 537 | "id": "38", 538 | "metadata": {}, 539 | "outputs": [], 540 | "source": [] 541 | } 542 | ], 543 | "metadata": { 544 | "kernelspec": { 545 | "display_name": "Python 3", 546 | "language": "python", 547 | "name": "python3" 548 | }, 549 | "language_info": { 550 | "codemirror_mode": { 551 | "name": "ipython", 552 | "version": 2 553 | }, 554 | "file_extension": ".py", 555 | "mimetype": "text/x-python", 556 | "name": "python", 557 | "nbconvert_exporter": "python", 558 | "pygments_lexer": "ipython2", 559 | "version": "2.7.6" 560 | } 561 | }, 562 | "nbformat": 4, 563 | "nbformat_minor": 5 564 | } 565 | -------------------------------------------------------------------------------- /_website/tutorials/components/33 - Binned fits.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# zfit binned\n", 8 | "\n", 9 | "There are two main ways of looking at \"binned fits\"\n", 10 | "- Either an analytic shape that could be fit unbinned but is fit to binned data *because of the datasize* (typical LHCb, Belle II,...)\n", 11 | "- stacking template histograms from simulation to provide the shape and fit to binned data (typical done in CMS, ATLAS, some LHCb,...)\n", 12 | "\n", 13 | "Some templated fits with uniform binning, no analytic components and specific morphing and constraints fit into the HistFactory model, implemented in [pyhf](https://github.com/scikit-hep/pyhf).\n", 14 | "These fits make a large portion of CMS and ATLAS analyses.\n", 15 | "\n", 16 | "zfit can, in principle, reproduce them too. However, it's comparably inefficient, a lot of code and slow. The main purpose is to support *anything that is beyond HistFactory*." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "import matplotlib.pyplot as plt\n", 26 | "import mplhep\n", 27 | "import numpy as np\n", 28 | "import zfit\n", 29 | "import zfit.z.numpy as znp # numpy-like backend interface\n", 30 | "\n", 31 | "zfit.settings.set_seed(43)" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## Binned parts\n", 39 | "\n", 40 | "zfit introduces binned equivalents to unbinned components and transformations from one to the other.\n", 41 | "For example:\n", 42 | "- `SumPDF` -> `BinnedSumPDF`\n", 43 | "- `Data` -> `BinnedData`\n", 44 | "- `UnbinnedNLL` -> `BinnedNLL`\n", 45 | "\n", 46 | "There are converters and new, histogram specific PDFs and methods." 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "## From unbinned to binned\n", 54 | "\n", 55 | "Let's start with an example, namely a simple, unbinned fit that we want to perform binned." 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "normal_np = np.random.normal(loc=2., scale=1.3, size=10000)\n", 65 | "\n", 66 | "obs = zfit.Space(\"x\", -10, 10)\n", 67 | "\n", 68 | "mu = zfit.Parameter(\"mu\", 1., -4, 6)\n", 69 | "sigma = zfit.Parameter(\"sigma\", 1., 0.1, 10)\n", 70 | "model_nobin = zfit.pdf.Gauss(mu, sigma, obs)\n", 71 | "\n", 72 | "data_nobin = zfit.Data(obs=obs, data=normal_np)\n", 73 | "\n", 74 | "loss_nobin = zfit.loss.UnbinnedNLL(model_nobin, data_nobin)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "minimizer = zfit.minimize.Minuit()" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "# make binned\n", 93 | "nbins = 50\n", 94 | "data = data_nobin.to_binned(nbins)\n", 95 | "model = model_nobin.to_binned(data.space)\n", 96 | "\n", 97 | "# we can create a binned NLL\n", 98 | "loss = zfit.loss.BinnedNLL(model, data)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "result = minimizer.minimize(loss)\n", 108 | "print(result)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "result.hesse(name=\"hesse\")" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "result.errors(name=\"errors\")" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "print(result)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "## Binned parts in detail\n", 143 | "\n", 144 | "`to_binned` creates a binned (and `to_unbinned` an unbinned) version of objects. It takes a binned Space, a binning or (as above), an integer (in which case a uniform binning is created).\n", 145 | "\n", 146 | "This creates implicitly a new, binned space." 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "obs_binned_auto = data.space\n", 156 | "print(obs_binned_auto)" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "print(f\"is_binned: {obs_binned_auto.is_binned}, binned obs binning: {obs_binned_auto.binning}\")\n", 166 | "print(f\"is_binned: {obs.is_binned}, unbinned obs binning:{obs.binning}\")" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "### Explicit conversion\n", 174 | "\n", 175 | "We can explicitly convert spaces, data and models to binned parts.\n", 176 | "\n", 177 | "Either number of bins for uniform binning or explicit binning." 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "obs_binned = obs.with_binning(nbins)\n", 187 | "print(obs_binned)\n", 188 | "\n", 189 | "# or we can create binnings (same as boost-histogram/hist)\n", 190 | "binning_regular = zfit.binned.RegularBinning(nbins, -10, 10, name='x')\n", 191 | "binning_variable = zfit.binned.VariableBinning([-10, -6, -1, -0.1, 0.4, 3, 10], name='x')" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": {}, 197 | "source": [ 198 | "Since a binning contains all the information needed to create a Space, a binning can be used to define a space directly." 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "obs_binned_variable = zfit.Space(binning=binning_variable)\n", 208 | "print(obs_binned_variable, obs_binned_variable.binning)" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "## Converting data, models" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "data_nobin.to_binned(obs_binned_variable)" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "model_nobin.to_binned(obs_binned_variable)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "# Compatibility with UHI\n", 241 | "\n", 242 | "zfit keeps compatibility with Universal Histogram Interface (UHI) and libraries that implement it (boost-histogram, hist).\n", 243 | "- `BinnedData` directly adheres to UHI (and has a `to_hist` attribute)\n", 244 | "- `BinnedPDF` has a `to_binneddata` and `to_hist` attribute\n", 245 | "\n", 246 | "Where a `BinnedData ` object is expected, a (named) UHI object is also possible. Same goes for the binning axes." 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": {}, 253 | "outputs": [], 254 | "source": [ 255 | "h = model.to_hist()\n", 256 | "h_scaled = h * 10_000" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": {}, 262 | "source": [ 263 | "## Binneddata\n", 264 | "\n", 265 | "Binned data has `counts`, `values` and `variances` attributes, it has a `binning` (aliased with axes)." 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": {}, 272 | "outputs": [], 273 | "source": [ 274 | "data.values()" 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": {}, 280 | "source": [ 281 | "## BinnedPDF\n", 282 | "\n", 283 | "A binned PDF has the same methods as the unbinned counterparts, namely `pdf`, `integrate` (and their `ext_*` parts) and `sample` that can respond to binned as well as unbinned data.\n", 284 | "\n", 285 | "Additionally, there are two more methods, namely\n", 286 | "- `counts` returns the absolute counts as for a histogram. Equivalent to `ext_pdf`, `ext_integrate`, this only works if the PDF is extended.\n", 287 | "- `rel_counts` relative counts, like a histogram, but the sum is normalized to 1\n", 288 | "\n", 289 | "\n", 290 | "### Note on Counts vs Density\n", 291 | "\n", 292 | "Counts are the *integrated* density, i.e. they differ by a factor `bin_width`. For regular binning, this is \"just\" a constant factor, as it's the same for all bins,\n", 293 | "but for Variable binning, this changes \"the shape\" of the histogram." 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": null, 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [ 302 | "binned_sample = model.sample(n=1_000)" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "## Plotting made easy\n", 310 | "\n", 311 | "This allows plotting to become a lot easier using `mplhep`, also for unbinned models." 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": null, 317 | "metadata": {}, 318 | "outputs": [], 319 | "source": [ 320 | "plt.title(\"Counts plot\")\n", 321 | "mplhep.histplot(data, label=\"data\")\n", 322 | "mplhep.histplot(model.to_hist() * [data.nevents],\n", 323 | " label=\"model\") # scaling up since model is not extended, i.e. has no yield\n", 324 | "plt.legend()" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": null, 330 | "metadata": {}, 331 | "outputs": [], 332 | "source": [ 333 | "plt.title(\"Counts plot\")\n", 334 | "mplhep.histplot(binned_sample, label=\"sampled data\")\n", 335 | "mplhep.histplot(model.to_hist() * [binned_sample.nevents],\n", 336 | " label=\"model\") # scaling up since model is not extended, i.e. has no yield\n", 337 | "plt.legend()" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": null, 343 | "metadata": {}, 344 | "outputs": [], 345 | "source": [ 346 | "# or using unbinned data points, we can do a density plot\n", 347 | "plt.title(\"Density plot\")\n", 348 | "mplhep.histplot(data.to_hist(), density=True, label=\"data\")\n", 349 | "x = znp.linspace(-10, 10, 200)\n", 350 | "plt.plot(x, model.pdf(x), label=\"model\")\n", 351 | "plt.legend()" 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": {}, 357 | "source": [ 358 | "## Binned loss functions\n", 359 | "\n", 360 | "We used above the `BinnedNLL`, but zfit offers more, namely an extended version and a BinnedChi2 (or least-square)." 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": null, 366 | "metadata": {}, 367 | "outputs": [], 368 | "source": [ 369 | "print(zfit.loss.__all__)" 370 | ] 371 | }, 372 | { 373 | "cell_type": "markdown", 374 | "metadata": {}, 375 | "source": [ 376 | "# Fitting using histograms\n", 377 | "\n", 378 | "There are a few new PDFs that are specific to histogram-like shapes, such as morphing interpolation and shape variations.\n", 379 | "\n", 380 | "Most simple a HistogramPDF wraps a histogram and acts as a PDF.\n", 381 | "\n", 382 | "By default, these histograms are extended automatically (which can be overruled using the `extended` argument)" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": null, 388 | "metadata": {}, 389 | "outputs": [], 390 | "source": [ 391 | "histpdf = zfit.pdf.HistogramPDF(h_scaled) # fixed yield\n", 392 | "print(np.sum(histpdf.counts()))" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": null, 398 | "metadata": {}, 399 | "outputs": [], 400 | "source": [ 401 | "sig_yield = zfit.Parameter('sig_yield', 4_000, 0, 100_000)" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": null, 407 | "metadata": {}, 408 | "outputs": [], 409 | "source": [ 410 | "histpdf = zfit.pdf.HistogramPDF(h, extended=sig_yield)\n", 411 | "print(np.sum(histpdf.counts()))" 412 | ] 413 | }, 414 | { 415 | "cell_type": "markdown", 416 | "metadata": {}, 417 | "source": [ 418 | "### Modifiers\n", 419 | "\n", 420 | "We may want to add modifiers, i.e. scale each bin by a value. `BinwiseScaleModifier` offers this functionality.\n", 421 | "\n", 422 | "Note however that these are *just free parameters* and not in any way constraint. This needs to be done manually." 423 | ] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "execution_count": null, 428 | "metadata": {}, 429 | "outputs": [], 430 | "source": [ 431 | "histpdf.space.binning.size" 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": null, 437 | "metadata": {}, 438 | "outputs": [], 439 | "source": [ 440 | "sig_pdf = zfit.pdf.BinwiseScaleModifier(histpdf,\n", 441 | " modifiers=True) # or we could give a list of parameters matching each bin\n", 442 | "modifiers = sig_pdf.params" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": null, 448 | "metadata": {}, 449 | "outputs": [], 450 | "source": [ 451 | "# modifiers = {f'modifier_{i}': zfit.Parameter(f'modifier_{i}', 1, 0, 10) for i in range(histpdf.space.binning.size[0])}\n", 452 | "# histpdf_scaled = zfit.pdf.BinwiseScaleModifier(histpdf, modifiers=modifiers)" 453 | ] 454 | }, 455 | { 456 | "cell_type": "code", 457 | "execution_count": null, 458 | "metadata": {}, 459 | "outputs": [], 460 | "source": [ 461 | "modifiers" 462 | ] 463 | }, 464 | { 465 | "cell_type": "code", 466 | "execution_count": null, 467 | "metadata": {}, 468 | "outputs": [], 469 | "source": [ 470 | "sig_pdf.get_yield()" 471 | ] 472 | }, 473 | { 474 | "cell_type": "markdown", 475 | "metadata": {}, 476 | "source": [ 477 | "## Morphing\n", 478 | "\n", 479 | "Let's create a background from simulation. Let's assume, we have a parameter in the simulation that we're unsure about.\n", 480 | "\n", 481 | "A common used technique is to use \"morphing\": creating multiple templates and interpolating between them. Typically, they are created at +1 and -1 sigma of the\n", 482 | "nuisance parameter (however, zfit allows arbitrary values and as many as wanted)" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": null, 488 | "metadata": {}, 489 | "outputs": [], 490 | "source": [ 491 | "bkg_hist = zfit.Data(np.random.exponential(scale=20, size=100_000) - 10, obs=obs_binned)\n", 492 | "\n", 493 | "# creating templates, different ways of going about it\n", 494 | "# 1. create unbinned and convert to binned\n", 495 | "bkg_m1_unbinned = zfit.Data(obs=obs, data=np.random.exponential(scale=35, size=100_000) - 10)\n", 496 | "bkg_hist_m1 = bkg_m1_unbinned.to_binned(obs_binned)\n", 497 | "\n", 498 | "# 2. directly create binned by using the binned observables\n", 499 | "bkg_hist_m05 = zfit.Data(obs=obs_binned, data=np.random.exponential(scale=26, size=100_000) - 10)\n", 500 | "\n", 501 | "# 3. use the `from_numpy` method that has more specific options than just `Data`\n", 502 | "bkg_hist_p1 = zfit.data.from_numpy(obs=obs_binned, array=np.random.exponential(scale=17, size=100_000) - 10)\n", 503 | "\n", 504 | "# put them into a dict that maps the modifier value to the histogram\n", 505 | "bkg_hists = {-1: bkg_hist_m1, -0.5: bkg_hist_m05, 0: bkg_hist, 1: bkg_hist_p1}\n", 506 | "bkg_histpdfs = {k: zfit.pdf.HistogramPDF(v) for k, v in bkg_hists.items()}\n", 507 | "mplhep.histplot(list(bkg_hists.values()), label=list(bkg_hists.keys()))\n", 508 | "plt.legend();" 509 | ] 510 | }, 511 | { 512 | "cell_type": "code", 513 | "execution_count": null, 514 | "metadata": {}, 515 | "outputs": [], 516 | "source": [ 517 | "alpha = zfit.Parameter(\"alpha\", 0, -3, 3)\n", 518 | "bkg_yield = zfit.Parameter(\"bkg_yield\", 15_000)" 519 | ] 520 | }, 521 | { 522 | "cell_type": "code", 523 | "execution_count": null, 524 | "metadata": {}, 525 | "outputs": [], 526 | "source": [ 527 | "bkg_pdf = zfit.pdf.SplineMorphingPDF(alpha, bkg_histpdfs, extended=bkg_yield)" 528 | ] 529 | }, 530 | { 531 | "cell_type": "code", 532 | "execution_count": null, 533 | "metadata": {}, 534 | "outputs": [], 535 | "source": [ 536 | "with alpha.set_value(-0.6): # we can change this value to play around\n", 537 | " mplhep.histplot(bkg_pdf.to_hist())" 538 | ] 539 | }, 540 | { 541 | "cell_type": "code", 542 | "execution_count": null, 543 | "metadata": {}, 544 | "outputs": [], 545 | "source": [ 546 | "# bkg_pdf = zfit.pdf.HistogramPDF(bkg_hist, extended=bkg_yield) # we don't use the spline for simplicity" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": null, 552 | "metadata": {}, 553 | "outputs": [], 554 | "source": [ 555 | "model = zfit.pdf.BinnedSumPDF([sig_pdf, bkg_pdf])\n", 556 | "model.to_hist()" 557 | ] 558 | }, 559 | { 560 | "cell_type": "code", 561 | "execution_count": null, 562 | "metadata": {}, 563 | "outputs": [], 564 | "source": [ 565 | "mods_signal = {m: np.random.normal(1.0, scale=0.14) for m in modifiers.values()}\n", 566 | "mods_bkg = {alpha: 0.1}\n", 567 | "\n", 568 | "bkghist = bkg_pdf.sample(n=10_000, params=mods_bkg).to_hist()\n", 569 | "sighist = sig_pdf.sample(1000, params=mods_signal).to_hist()\n", 570 | "data = bkghist + sighist\n", 571 | "data" 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": null, 577 | "metadata": {}, 578 | "outputs": [], 579 | "source": [ 580 | "modifier_constraints = zfit.constraint.GaussianConstraint(params=list(modifiers.values()), observation=np.ones(len(modifiers)),\n", 581 | " uncertainty=0.1 * np.ones(len(modifiers)))\n", 582 | "alpha_constraint = zfit.constraint.GaussianConstraint(alpha, 0, sigma=1)" 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": null, 588 | "metadata": {}, 589 | "outputs": [], 590 | "source": [ 591 | "loss_binned = zfit.loss.ExtendedBinnedNLL(model, data, constraints=[modifier_constraints, alpha_constraint])" 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": null, 597 | "metadata": {}, 598 | "outputs": [], 599 | "source": [ 600 | "result = minimizer.minimize(loss_binned)" 601 | ] 602 | }, 603 | { 604 | "cell_type": "code", 605 | "execution_count": null, 606 | "metadata": {}, 607 | "outputs": [], 608 | "source": [ 609 | "print(result)" 610 | ] 611 | }, 612 | { 613 | "cell_type": "code", 614 | "execution_count": null, 615 | "metadata": {}, 616 | "outputs": [], 617 | "source": [ 618 | "mplhep.histplot(model.to_hist(), label='model')\n", 619 | "mplhep.histplot(data, label='data')\n", 620 | "plt.legend()" 621 | ] 622 | }, 623 | { 624 | "cell_type": "code", 625 | "execution_count": null, 626 | "metadata": {}, 627 | "outputs": [], 628 | "source": [ 629 | "print(sig_pdf.get_yield())" 630 | ] 631 | }, 632 | { 633 | "cell_type": "markdown", 634 | "metadata": {}, 635 | "source": [ 636 | "## Binned to unbinned\n", 637 | "\n", 638 | "We can convert a histogram directly to an unbinned PDF with `to_unbinned` or smooth it out by interpolating with splines.\n" 639 | ] 640 | }, 641 | { 642 | "cell_type": "code", 643 | "execution_count": null, 644 | "metadata": {}, 645 | "outputs": [], 646 | "source": [ 647 | "unbinned_spline = zfit.pdf.SplinePDF(sig_pdf, label=\"splined model\")" 648 | ] 649 | }, 650 | { 651 | "cell_type": "code", 652 | "execution_count": null, 653 | "metadata": {}, 654 | "outputs": [], 655 | "source": [ 656 | "# plt.plot(x, unbinned_spline.pdf(x))\n", 657 | "mplhep.histplot(sig_pdf.to_hist(), density=True, label='binned model')\n", 658 | "unbinned_spline.plot.plotpdf(extended=False) # extended=False means plot the PDF density, not scaled to yield" 659 | ] 660 | }, 661 | { 662 | "cell_type": "markdown", 663 | "metadata": {}, 664 | "source": [ 665 | "# hepstats\n", 666 | "\n", 667 | "As before, we can now use hepstats to do further statistical treatment (supports binned PDFs).\n", 668 | "\n", 669 | "More tutorials on hepstats can be found [in the zfit guides](https://zfit-tutorials.readthedocs.io/en/latest/tutorials/guides/README.html) or in the [hepstats tutorials](https://mybinder.org/v2/gh/scikit-hep/hepstats/master)\n" 670 | ] 671 | }, 672 | { 673 | "cell_type": "code", 674 | "execution_count": null, 675 | "metadata": {}, 676 | "outputs": [], 677 | "source": [] 678 | } 679 | ], 680 | "metadata": { 681 | "kernelspec": { 682 | "display_name": "Python 3 (ipykernel)", 683 | "language": "python", 684 | "name": "python3" 685 | }, 686 | "language_info": { 687 | "codemirror_mode": { 688 | "name": "ipython", 689 | "version": 3 690 | }, 691 | "file_extension": ".py", 692 | "mimetype": "text/x-python", 693 | "name": "python", 694 | "nbconvert_exporter": "python", 695 | "pygments_lexer": "ipython3", 696 | "version": "3.10.4" 697 | }, 698 | "vscode": { 699 | "interpreter": { 700 | "hash": "35582a6a3ca7193893daa07e79c86f9b031e623ca33bcb273b52ae17295e8545" 701 | } 702 | } 703 | }, 704 | "nbformat": 4, 705 | "nbformat_minor": 4 706 | } 707 | --------------------------------------------------------------------------------