├── tests
    ├── __init__.py
    ├── test_helpers.py
    ├── test_get_zarr.py
    ├── test_data_reduction.py
    ├── test_lone_hit_integration.py
    ├── test_exhaust_plugin.py
    ├── test_config.py
    ├── test_statistics.py
    ├── test_peak_properties.py
    ├── test_peak_merging.py
    ├── test_pulse_processing.py
    ├── test_inline_plugin.py
    ├── test_down_chunk_plugin.py
    ├── test_saving.py
    ├── test_cut_plugin.py
    ├── test_sort.py
    ├── test_overlap_plugin.py
    ├── test_fixed_plugin_cache.py
    ├── test_mailbox.py
    └── test_peak_splitting.py
├── docs
    ├── source
    │   ├── __init__.py
    │   ├── developer
    │   │   ├── contributing.md
    │   │   ├── overlap_window.jpg
    │   │   ├── release.rst
    │   │   ├── documentation.rst
    │   │   ├── corrections.rst
    │   │   ├── pipeline.rst
    │   │   ├── storage.rst
    │   │   ├── overlaps.rst
    │   │   └── parallel.rst
    │   ├── reference
    │   │   ├── strax.storage.rst
    │   │   ├── strax.processing.rst
    │   │   └── strax.rst
    │   ├── basics
    │   │   └── setup.rst
    │   ├── build_release_notes.py
    │   ├── index.rst
    │   └── advanced
    │   │   ├── out_of_core.rst
    │   │   ├── chunking.rst
    │   │   ├── fuzzy_for.rst
    │   │   ├── superrun.rst
    │   │   └── recompression.rst
    ├── make_docs.sh
    ├── pull_request_template.md
    └── make.bat
├── strax
    ├── processing
    │   ├── __init__.py
    │   ├── data_reduction.py
    │   ├── statistics.py
    │   └── peak_properties.py
    ├── scripts
    │   ├── __init__.py
    │   └── rechunker.py
    ├── storage
    │   ├── __init__.py
    │   └── zipfiles.py
    ├── processor.py
    ├── plugins
    │   ├── __init__.py
    │   ├── exhaust_plugin.py
    │   ├── merge_only_plugin.py
    │   ├── down_chunking_plugin.py
    │   ├── cut_plugin.py
    │   ├── loop_plugin.py
    │   ├── overlap_window_plugin.py
    │   └── parrallel_source_plugin.py
    ├── processors
    │   ├── __init__.py
    │   ├── base.py
    │   └── single_thread.py
    ├── __init__.py
    ├── sort_enforcement.py
    └── io.py
├── MANIFEST.in
├── pytest.ini
├── .git-blame-ignore-revs
├── .coveragerc
├── .bumpversion.cfg
├── .readthedocs.yml
├── .gitattributes
├── .github
    ├── ISSUE_TEMPLATE
    │   └── bug_report.md
    ├── dependabot.yml
    └── workflows
    │   ├── pypi_install.yml
    │   ├── test_install.yml
    │   └── pytest.yml
├── setup.cfg
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── CONTRIBUTING.md
├── README.md
├── .pylintrc
├── pyproject.toml
└── CODE-OF-CONDUCT.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/source/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/strax/processing/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/strax/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/strax/storage/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.txt
2 | include *.md
3 | 


--------------------------------------------------------------------------------
/docs/source/developer/contributing.md:
--------------------------------------------------------------------------------
1 | ../../../CONTRIBUTING.md


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | filterwarnings =
3 | 	ignore::numba.NumbaExperimentalFeatureWarning
4 | 


--------------------------------------------------------------------------------
/docs/source/developer/overlap_window.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AxFoundation/strax/HEAD/docs/source/developer/overlap_window.jpg


--------------------------------------------------------------------------------
/strax/processor.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | # Legacy import, used in a single place in straxen.
3 | from .processors.threaded_mailbox import SHMExecutor
4 | 


--------------------------------------------------------------------------------
/.git-blame-ignore-revs:
--------------------------------------------------------------------------------
1 | 42ad25e0e6834c82f415149b206b49bf0cf5654f
2 | 5c4a277fbb155529dc5cf06435f0d7419977d7dd
3 | 8e431b2f5827d3f9c69088bd68b432dc6d4d4769
4 | 


--------------------------------------------------------------------------------
/docs/make_docs.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | make clean
3 | rm -r source/reference
4 | sphinx-apidoc -o source/reference ../strax
5 | rm source/reference/modules.rst
6 | make html
7 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | # .coveragerc to control coverage.py
2 | [report]
3 | # Regexes for lines to exclude from consideration
4 | exclude_lines =
5 |     if __name__ == .__main__.:
6 |     raise
7 | 
8 | ignore_errors = True
9 | 


--------------------------------------------------------------------------------
/.bumpversion.cfg:
--------------------------------------------------------------------------------
 1 | [bumpversion]
 2 | current_version = 2.2.1
 3 | files = strax/__init__.py docs/source/conf.py
 4 | commit = True
 5 | tag = True
 6 | 
 7 | [bumpversion:file:pyproject.toml]
 8 | search = version = "{current_version}"
 9 | replace = version = "{new_version}"
10 | 


--------------------------------------------------------------------------------
/strax/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | from .plugin import *
2 | from .cut_plugin import *
3 | from .loop_plugin import *
4 | from .merge_only_plugin import *
5 | from .overlap_window_plugin import *
6 | from .parrallel_source_plugin import *
7 | from .down_chunking_plugin import *
8 | from .exhaust_plugin import *
9 | 


--------------------------------------------------------------------------------
/docs/source/developer/release.rst:
--------------------------------------------------------------------------------
 1 | Release procedure
 2 | ==================
 3 | 
 4 | - Update personal fork & local master to Axfoundation fork
 5 | - Edit and commit HISTORY.md
 6 | - bumpversion patch (or minor/major, as appropriate)
 7 | - Push to personal and AxFoundation fork, with --tags
 8 | - fast-foward and push AxFoundation/stable
 9 | - Add release info on release page of github website
10 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | 
 3 | # Required
 4 | version: 2
 5 | 
 6 | sphinx:
 7 |   configuration: docs/source/conf.py
 8 | 
 9 | build:
10 |   os: ubuntu-22.04
11 |   apt_packages:
12 |     - graphviz
13 |   tools:
14 |     python: "3.10"
15 | 
16 | python:
17 |   install:
18 |     - method: pip
19 |       path: .
20 |       extra_requirements:
21 |         - docs
22 | 
23 | formats:
24 |   - htmlzip
25 |   - epub
26 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # https://www.aleksandrhovhannisyan.com/blog/crlf-vs-lf-normalizing-line-endings-in-git/#a-simple-gitattributes-config
2 | # We'll let Git's auto-detection algorithm infer if a file is text. If it is,
3 | # enforce LF line endings regardless of OS or git configurations.
4 | * text=auto eol=lf
5 | 
6 | # Isolate binary files in case the auto-detection algorithm fails and
7 | # marks them as text files (which could brick them).
8 | *.{png,jpg,jpeg,gif,webp,woff,woff2} binary
9 | 


--------------------------------------------------------------------------------
/strax/processors/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import *
 2 | from .threaded_mailbox import *
 3 | from .single_thread import *
 4 | 
 5 | # This is redundant with the star-imports above, but some flake8
 6 | # versions require this
 7 | from .threaded_mailbox import ThreadedMailboxProcessor
 8 | from .single_thread import SingleThreadProcessor
 9 | 
10 | PROCESSORS = {
11 |     "default": SingleThreadProcessor,
12 |     "threaded_mailbox": ThreadedMailboxProcessor,
13 |     "single_thread": SingleThreadProcessor,
14 | }
15 | 


--------------------------------------------------------------------------------
/docs/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | **What is the problem / what does the code in this PR do**
 2 | 
 3 | **Can you briefly describe how it works?**
 4 | 
 5 | **Can you give a minimal working example (or illustrate with a figure)?**
 6 | 
 7 | Please include the following if applicable:
 8 |   - Update the docstring(s)
 9 |   - Update the documentation
10 |   - Tests to check the (new) code is working as desired.
11 |   - Does it solve one of the open issues on github?
12 | 
13 | Please make sure that all automated tests have passed before asking for a review (you can save the PR as a draft otherwise).
14 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Insert the MWE of how to reproduce the error
15 | ```python
16 | YOUR CODE GOES HERE
17 | ```
18 | 
19 | **Expected behavior**
20 | A clear and concise description of what you expected to happen.
21 | 
22 | **Screenshots**
23 | If applicable, add screenshots to help explain your problem.
24 | 
25 | **Versions**
26 | Please add the version of strax and any related package
27 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # Set update schedule for GitHub Actions to check they are up to date
 2 | # If one of the github actions is out of date, dependabot will open a
 3 | # PR to update the version of that action
 4 | 
 5 | version: 2
 6 | updates:
 7 |   # Maintain the requirements in the github actiuons
 8 |   - package-ecosystem: "github-actions"
 9 |     directory: "/"
10 |     schedule:
11 |       # Check for updates to GitHub Actions every weekday
12 |       interval: "monthly"
13 |   # Maintain the requirements requirements folder
14 |   - package-ecosystem: "pip"
15 |     directory: "/"
16 |     schedule:
17 |       # Check for updates to requirements every week
18 |       interval: "monthly"
19 |     open-pull-requests-limit: 15
20 | 


--------------------------------------------------------------------------------
/strax/plugins/exhaust_plugin.py:
--------------------------------------------------------------------------------
 1 | from .plugin import Plugin
 2 | 
 3 | 
 4 | class ExhaustPlugin(Plugin):
 5 |     """Plugin that exhausts all chunks when fetching data."""
 6 | 
 7 |     def _fetch_chunk(self, d, iters, check_end_not_before=None):
 8 |         while super()._fetch_chunk(d, iters, check_end_not_before=check_end_not_before):
 9 |             pass
10 |         return False
11 | 
12 |     def do_compute(self, chunk_i=None, **kwargs):
13 |         if chunk_i != self.first_chunk:
14 |             raise RuntimeError(
15 |                 f"{self.__class__.__name__} is an ExhaustPlugin. "
16 |                 "It should read all chunks together can process them together."
17 |             )
18 |         return super().do_compute(chunk_i=chunk_i, **kwargs)
19 | 


--------------------------------------------------------------------------------
/tests/test_helpers.py:
--------------------------------------------------------------------------------
 1 | from strax import testutils
 2 | from hypothesis import given
 3 | import strax
 4 | 
 5 | 
 6 | @given(testutils.sorted_bounds())
 7 | def test_sorted_bounds(bs):
 8 |     assert is_sorted(bs)
 9 | 
10 | 
11 | @given(testutils.sorted_bounds(disjoint=True))
12 | def test_disjoint_bounds(bs):
13 |     assert is_sorted(bs)
14 |     assert is_disjoint(bs)
15 | 
16 | 
17 | @given(testutils.disjoint_sorted_intervals)
18 | def test_dsi(intvs):
19 |     bs = list(zip(intvs["time"].tolist(), strax.endtime(intvs).tolist()))
20 |     assert is_sorted(bs)
21 |     assert is_disjoint(bs)
22 | 
23 | 
24 | def is_sorted(bs):
25 |     return bs == sorted(bs)
26 | 
27 | 
28 | def is_disjoint(bs):
29 |     return all([bs[i][1] <= bs[i + 1][0] for i in range(len(bs) - 1)])
30 | 


--------------------------------------------------------------------------------
/tests/test_get_zarr.py:
--------------------------------------------------------------------------------
 1 | import strax
 2 | from strax.testutils import Records, Peaks, run_id
 3 | import tempfile
 4 | import numpy as np
 5 | 
 6 | 
 7 | def test_get_zarr():
 8 |     """Get a context for the tests below."""
 9 |     with tempfile.TemporaryDirectory() as temp_dir:
10 |         context = strax.Context(
11 |             storage=strax.DataDirectory(temp_dir, deep_scan=True),
12 |             register=[Records, Peaks],
13 |             use_per_run_defaults=True,
14 |         )
15 |         records = context.get_array(run_id, "records")
16 |         peaks = context.get_array(run_id, "peaks")
17 |         zgrp = context.get_zarr(run_id, ("records", "peaks"), storage="memory://")
18 | 
19 |     assert np.all(zgrp.records["time"] == records["time"])
20 |     assert np.all(zgrp.peaks["time"] == peaks["time"])
21 | 


--------------------------------------------------------------------------------
/docs/source/developer/documentation.rst:
--------------------------------------------------------------------------------
 1 | Writing documentation
 2 | ======================
 3 | 
 4 | To write documentation, please refer to the existing for examples. To add new pages:
 5 |  - Add a new ``.rst`` file in the basics/advanced/developer folder within ./docs.
 6 |  - Add the link to the file in the docs/index.rst
 7 |  - run ``bash make_docs.sh``. This will run sphinx locally, this allows one to
 8 |    preview if the results are the desired results. Several modules need be
 9 |    installed in order to run this script.
10 |  - Add the ``.rst`` file, the ``index.rst`` to git.
11 | 
12 | Updating ``docs/reference``
13 | ---------------------------
14 | The ``docs/reference`` is only updated with ``bash make_docs.sh``.
15 | In case modules are added/removed, one needs to rerun this script to and commit
16 | the changes to the files in ``docs/reference``.
17 | 


--------------------------------------------------------------------------------
/strax/processors/base.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import typing as ty
 3 | 
 4 | import strax
 5 | 
 6 | export, __all__ = strax.exporter()
 7 | 
 8 | 
 9 | @export
10 | class ProcessorComponents(ty.NamedTuple):
11 |     """Specification to assemble a processor."""
12 | 
13 |     plugins: ty.Dict[str, strax.Plugin]
14 |     loaders: ty.Dict[str, ty.Callable]
15 |     # Required for inline ParallelSource plugin.
16 |     loader_plugins: ty.Dict[str, strax.Plugin]
17 |     savers: ty.Dict[str, ty.List[strax.Saver]]
18 |     targets: ty.Tuple[str]
19 | 
20 | 
21 | @export
22 | class BaseProcessor:
23 |     components: ProcessorComponents
24 | 
25 |     def __init__(self, components: ProcessorComponents, **kwargs):
26 |         self.log = logging.getLogger(self.__class__.__name__)
27 |         self.components = components
28 | 
29 |     def iter(self):
30 |         raise NotImplementedError
31 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [aliases]
 2 | test=pytest
 3 | 
 4 | [mypy]
 5 | disable_error_code = attr-defined, name-defined, union-attr
 6 | 
 7 | [flake8]
 8 | # Set maximum width of the line to 100
 9 | max-line-length = 100
10 | 
11 | # E203 whitespace before ':'
12 | # E402 module level import not at top of file
13 | # E501 line too long
14 | # E731 do not assign a lambda expression, use a def
15 | # F541 f-string is missing placeholders
16 | # F401 imported but unused
17 | # F403 unable to detect undefined names
18 | # F405 name may be undefined, or defined from star imports
19 | # W503 line break before binary operator
20 | 
21 | ignore = E203, E731, F541, W503
22 | 
23 | per-file-ignores =
24 |     strax/*__init__.py: F401, F403
25 |     strax/plugin.py: F401
26 |     strax/processing/general.py: E402
27 |     tests/*: F403, F405
28 |     tests/plugins/test_plugins.py: F401
29 |     docs/source/build_datastructure_doc.py: E501
30 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Jupyter
 2 | .ipynb_checkpoints
 3 | 
 4 | # Data (temporary?)
 5 | *.json
 6 | *.bz2
 7 | *.zstd
 8 | *.npy
 9 | *.blosc
10 | *.h5
11 | strax_data
12 | from_fake_daq
13 | from_eb
14 | from_eb_finished
15 | resource_cache
16 | raw
17 | reduced_raw
18 | processed
19 | temp_processed
20 | custom_data
21 | test_input_data
22 | *.zip
23 | 
24 | # cProfile output
25 | *.prof
26 | 
27 | # Python cache
28 | *.py[cod]
29 | __pycache__
30 | 
31 | # Testing caches
32 | .pytest_cache
33 | *pytestdebug.log
34 | .hypothesis
35 | 
36 | # Packages
37 | .eggs
38 | *.egg
39 | *.egg-info
40 | dist
41 | build
42 | eggs
43 | parts
44 | var
45 | sdist
46 | develop-eggs
47 | .installed.cfg
48 | lib
49 | lib64
50 | 
51 | # Sphinx
52 | docs/_build
53 | docs/source/developer/contributing.rst
54 | docs/source/reference/release_notes.rst
55 | 
56 | # Pycharm
57 | .idea
58 | 
59 | # coverage
60 | .coverage
61 | 
62 | # DS_Store
63 | .DS_Store
64 | 


--------------------------------------------------------------------------------
/.github/workflows/pypi_install.yml:
--------------------------------------------------------------------------------
 1 | # Pipy upload strax after a release (or manually).
 2 | # Mostly based on https://github.com/marketplace/actions/pypi-publish
 3 | name: Pipy
 4 | 
 5 | on:
 6 |   workflow_dispatch:
 7 |   release:
 8 |     types: [created]
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       # Setup steps
14 |       - name: Setup python
15 |         uses: actions/setup-python@v5
16 |         with:
17 |           python-version: "3.10"
18 |       - name: Checkout repo
19 |         uses: actions/checkout@v4
20 |       - name: Install dependencies
21 |         run: pip install build
22 |       - name: Build package
23 |         run: python -m build
24 |       # Do the publish
25 |       - name: Publish a Python distribution to PyPI
26 |         uses: pypa/gh-action-pypi-publish@master
27 |         with:
28 |           user: ${{ secrets.pipy_token }}
29 |           password: ${{ secrets.pypi_password }}
30 | 


--------------------------------------------------------------------------------
/.github/workflows/test_install.yml:
--------------------------------------------------------------------------------
 1 | # Test if we can actually install strax by installing
 2 | name: Installation test
 3 | 
 4 | on:
 5 |   workflow_dispatch:
 6 |   release:
 7 |     types: [created]
 8 |   pull_request:
 9 |     branches:
10 |       - master
11 |       - stable
12 |   push:
13 |     branches:
14 |       - master
15 | 
16 | jobs:
17 |   build:
18 |     name: "py${{ matrix.python-version }}"
19 |     runs-on: ubuntu-latest
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         python-version: ["3.10", "3.11"]
24 |     steps:
25 |       - name: Setup python
26 |         uses: actions/setup-python@v5
27 |         with:
28 |           python-version: ${{ matrix.python-version }}
29 |       - name: Checkout repo
30 |         uses: actions/checkout@v4
31 |       - name: Install strax
32 |         run: pip install .
33 |       - name: Test import
34 |         run: python -c "import strax; print(strax.__version__)"
35 |       - name: goodbye
36 |         run: echo goodbye
37 | 


--------------------------------------------------------------------------------
/docs/source/reference/strax.storage.rst:
--------------------------------------------------------------------------------
 1 | strax.storage package
 2 | =====================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | strax.storage.common module
 8 | ---------------------------
 9 | 
10 | .. automodule:: strax.storage.common
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | strax.storage.files module
16 | --------------------------
17 | 
18 | .. automodule:: strax.storage.files
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | strax.storage.mongo module
24 | --------------------------
25 | 
26 | .. automodule:: strax.storage.mongo
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | strax.storage.zipfiles module
32 | -----------------------------
33 | 
34 | .. automodule:: strax.storage.zipfiles
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 
39 | Module contents
40 | ---------------
41 | 
42 | .. automodule:: strax.storage
43 |    :members:
44 |    :undoc-members:
45 |    :show-inheritance:
46 | 


--------------------------------------------------------------------------------
/docs/source/basics/setup.rst:
--------------------------------------------------------------------------------
 1 | Setting up strax
 2 | ================
 3 | 
 4 | To install the latest stable version (from pypi), run `pip install strax`.
 5 | Dependencies should install automatically:
 6 | numpy, pandas, numba, two compression libraries (blosc and zstd)
 7 | and a few miscellaneous pure-python packages. Strax requires python >= 3.8.
 8 | 
 9 | If you want to try out strax on XENON1T data, you're probably better off installing strax's XENON bindings at `<https://github.com/XENONnT/straxen>`_. Strax will be automatically installed along with straxen.
10 | 
11 | You might want to install some dependencies (such as numpy and numba) via conda rather than pip, but it's up to you.
12 | 
13 | You can also clone the repository, then setup a developer installation with `python setup.py develop`.
14 | 
15 | If you experience problems during installation, try installing
16 | exactly the same version of the dependencies as used on the Github Actions integrated testing.
17 | Clone the repository, then do `pip install -r strax/extra_requirements/requirements-tests.txt`.
18 | 


--------------------------------------------------------------------------------
/strax/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | __version__ = "2.2.1"
 3 | 
 4 | # Glue the package together
 5 | # See https://www.youtube.com/watch?v=0oTh1CXRaQ0 if this confuses you
 6 | # The order of subpackes is not invariant, since we use strax.xxx inside strax
 7 | from .sort_enforcement import *
 8 | from .utils import *
 9 | from .chunk import *
10 | from .dtypes import *
11 | from strax.processing.general import *
12 | 
13 | from .storage.common import *
14 | from .storage.files import *
15 | from .storage.file_rechunker import *
16 | from .storage.mongo import *
17 | from .storage.zipfiles import *
18 | 
19 | from .config import *
20 | from .plugins import *
21 | 
22 | from .mailbox import *
23 | from .processor import *
24 | from .processors import *
25 | from .context import *
26 | from .run_selection import *
27 | 
28 | from .io import *
29 | 
30 | from strax.processing.data_reduction import *
31 | from strax.processing.pulse_processing import *
32 | from strax.processing.peak_building import *
33 | from strax.processing.peak_merging import *
34 | from strax.processing.peak_splitting import *
35 | from strax.processing.peak_properties import *
36 | from strax.processing.hitlets import *
37 | from strax.processing.statistics import *
38 | 


--------------------------------------------------------------------------------
/strax/plugins/merge_only_plugin.py:
--------------------------------------------------------------------------------
 1 | import strax
 2 | from .plugin import Plugin, SaveWhen
 3 | 
 4 | export, __all__ = strax.exporter()
 5 | 
 6 | 
 7 | ##
 8 | # "Plugins" for internal use
 9 | # These do not actually do computations, but do other tasks
10 | # for which posing as a plugin is helpful.
11 | # Do not subclass unless you know what you are doing..
12 | ##
13 | 
14 | 
15 | @export
16 | class MergeOnlyPlugin(Plugin):
17 |     """Plugin that merges data from its dependencies."""
18 | 
19 |     save_when = SaveWhen.EXPLICIT
20 | 
21 |     def infer_dtype(self):
22 |         deps_by_kind = self.dependencies_by_kind()
23 |         if len(deps_by_kind) != 1:
24 |             raise ValueError(
25 |                 "MergeOnlyPlugins can only merge data of the same kind, but got multiple kinds: "
26 |                 + str(deps_by_kind)
27 |             )
28 | 
29 |         return strax.merged_dtype(
30 |             [
31 |                 self.deps[d].dtype_for(d)
32 |                 # Sorting is needed here to match what strax.Chunk does in merging
33 |                 for d in sorted(self.depends_on)
34 |             ]
35 |         )
36 | 
37 |     def compute(self, **kwargs):
38 |         return kwargs[list(kwargs.keys())[0]]
39 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | repos:
 4 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 5 |     rev: v5.0.0
 6 |     hooks:
 7 |     -   id: trailing-whitespace
 8 |     -   id: end-of-file-fixer
 9 |     -   id: check-yaml
10 |     -   id: check-added-large-files
11 | 
12 | -   repo: https://github.com/psf/black
13 |     rev: 24.10.0
14 |     hooks:
15 |     -   id: black
16 |         args: [--safe, --line-length=100, --preview]
17 |         language_version: python3
18 | 
19 | -   repo: https://github.com/pycqa/docformatter
20 |     rev: v1.7.6
21 |     hooks:
22 |     -   id: docformatter
23 |         additional_dependencies: [tomli]
24 |         args: [--config, pyproject.toml]
25 | 
26 | -   repo: https://github.com/pre-commit/mirrors-mypy
27 |     rev: v1.11.2
28 |     hooks:
29 |     -   id: mypy
30 |         additional_dependencies: [
31 |             types-PyYAML, types-tqdm, types-pytz,
32 |             types-requests, types-setuptools,
33 |         ]
34 | 
35 | -   repo: https://github.com/pycqa/flake8
36 |     rev: 7.1.1
37 |     hooks:
38 |     -   id: flake8
39 | 
40 | ci:
41 |     autoupdate_schedule: weekly
42 | 


--------------------------------------------------------------------------------
/docs/source/build_release_notes.py:
--------------------------------------------------------------------------------
 1 | from m2r import convert
 2 | import os
 3 | 
 4 | header = """
 5 | Release notes
 6 | ==============
 7 | 
 8 | """
 9 | 
10 | 
11 | def convert_release_notes():
12 |     """Convert the release notes to an RST page with links to PRs."""
13 |     this_dir = os.path.dirname(os.path.realpath(__file__))
14 |     notes = os.path.join(this_dir, "..", "..", "HISTORY.md")
15 |     with open(notes, "r") as f:
16 |         notes = f.read()
17 |     rst = convert(notes)
18 |     with_ref = ""
19 |     for line in rst.split("\n"):
20 |         # Get URL for PR
21 |         if "#" in line:
22 |             pr_number = line.split("#")[1]
23 |             while len(pr_number):
24 |                 try:
25 |                     pr_number = int(pr_number)
26 |                     break
27 |                 except ValueError:
28 |                     # Too many tailing characters to be an int
29 |                     pr_number = pr_number[:-1]
30 |             if pr_number:
31 |                 line = line.replace(
32 |                     f"#{pr_number}",
33 |                     f"`#{pr_number} <https://github.com/AxFoundation/strax/pull/{pr_number}>`_",
34 |                 )
35 |         with_ref += line + "\n"
36 |     target = os.path.join(this_dir, "reference", "release_notes.rst")
37 | 
38 |     with open(target, "w") as f:
39 |         f.write(header + with_ref)
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     convert_release_notes()
44 | 


--------------------------------------------------------------------------------
/strax/sort_enforcement.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from numba.extending import register_jitable
 3 | 
 4 | # Define error message as a constant
 5 | UNSTABLE_SORT_MESSAGE = (
 6 |     "quicksort and heapsort are not allowed due to non-deterministic behavior.\n"
 7 |     "Please use mergesort for deterministic sorting behavior."
 8 | )
 9 | 
10 | 
11 | # Define custom exception for sorting errors
12 | class SortingError(Exception):
13 |     pass
14 | 
15 | 
16 | def stable_sort(arr, kind="mergesort", **kwargs):
17 |     """Stable sort function using mergesort, w/o numba optimization.
18 | 
19 |     Args:
20 |         arr: numpy array to sort
21 |         kind: sorting algorithm to use (only 'mergesort' is allowed)
22 | 
23 |     Returns:
24 |         Sorted array using mergesort algorithm
25 | 
26 |     """
27 |     if kind != "mergesort":
28 |         raise SortingError(UNSTABLE_SORT_MESSAGE)
29 |     return np.sort(arr, kind="mergesort", **kwargs)
30 | 
31 | 
32 | @register_jitable
33 | def stable_argsort(arr, kind="mergesort"):
34 |     """Numba-optimized stable argsort function using mergesort.
35 | 
36 |     Args:
37 |         arr: numpy array to sort
38 |         kind: sorting algorithm to use (only 'mergesort' is allowed)
39 | 
40 |     Returns:
41 |         Indices that would sort the array using mergesort algorithm
42 | 
43 |     """
44 |     if kind != "mergesort":
45 |         raise SortingError(UNSTABLE_SORT_MESSAGE)
46 |     return np.argsort(arr, kind="mergesort")
47 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2018-2023, strax developers.
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/tests/test_data_reduction.py:
--------------------------------------------------------------------------------
 1 | from hypothesis import given, settings
 2 | 
 3 | from strax.testutils import *
 4 | 
 5 | 
 6 | # TODO: test with multiple fake pulses and dt != 1
 7 | @settings(deadline=None)
 8 | @given(single_fake_pulse)
 9 | def test_cut_outside_hits(records):
10 |     hits = strax.find_hits(records, min_amplitude=1)
11 | 
12 |     # Set all record waveforms to 1 (still and 0 out of bounds)
13 |     for r in records:
14 |         r["data"] = 0
15 |         r["data"][: r["length"]] = 1
16 |         assert np.all(np.in1d(r["data"], [0, 1]))
17 | 
18 |     left_extension = 2
19 |     right_extension = 3
20 | 
21 |     records_out = strax.cut_outside_hits(
22 |         records, hits, left_extension=left_extension, right_extension=right_extension
23 |     )
24 | 
25 |     assert len(records_out) == len(records)
26 |     if len(records) == 0:
27 |         return
28 | 
29 |     # All fields except data are unchanged
30 |     for x in records.dtype.names:
31 |         if x == "data":
32 |             continue
33 |         if x == "reduction_level":
34 |             np.testing.assert_array_equal(
35 |                 records_out[x],
36 |                 np.ones(len(records), dtype=np.int16) * strax.ReductionLevel.HITS_ONLY,
37 |             )
38 |         else:
39 |             np.testing.assert_array_equal(records_out[x], records[x], err_msg=f"Field {x} mangled!")
40 | 
41 |     records = records_out
42 | 
43 |     # Super-laborious dumb check
44 |     for r in records:
45 |         for i, w in enumerate(r["data"][: r["length"]]):
46 |             t = r["time"] + i * r["dt"]
47 |             for h in hits:
48 |                 if h["time"] - left_extension <= t < strax.endtime(h) + right_extension:
49 |                     assert w == 1, f"Position {i} should be preserved"
50 |                     break
51 |             else:
52 |                 assert w == 0, f"Position {i} should be cut"
53 | 


--------------------------------------------------------------------------------
/tests/test_lone_hit_integration.py:
--------------------------------------------------------------------------------
 1 | from strax.testutils import several_fake_records
 2 | import numpy as np
 3 | from hypothesis import given, settings
 4 | import hypothesis.strategies as st
 5 | 
 6 | import strax
 7 | 
 8 | 
 9 | @settings(deadline=None)
10 | @given(
11 |     several_fake_records,
12 |     st.integers(min_value=0, max_value=100),
13 |     st.integers(min_value=0, max_value=100),
14 | )
15 | def test_lone_hits_integration_bounds(records, left_extension, right_extension):
16 |     """Loops over hits and tests if integration bounds overlap."""
17 |     n_channel = 0
18 |     if len(records):
19 |         n_channel = records["channel"].max() + 1
20 | 
21 |     hits = strax.find_hits(records, np.ones(n_channel))
22 | 
23 |     strax.find_hit_integration_bounds(
24 |         hits,
25 |         np.zeros(0, dtype=strax.time_dt_fields),
26 |         records,
27 |         (left_extension, right_extension),
28 |         n_channel,
29 |         allow_bounds_beyond_records=False,
30 |     )
31 |     _test_overlap(hits)
32 | 
33 |     hits["left_integration"] = 0
34 |     hits["right_integration"] = 0
35 | 
36 |     strax.find_hit_integration_bounds(
37 |         hits,
38 |         np.zeros(0, dtype=strax.time_dt_fields),
39 |         records,
40 |         (left_extension, right_extension),
41 |         n_channel,
42 |         allow_bounds_beyond_records=True,
43 |     )
44 |     _test_overlap(hits)
45 | 
46 | 
47 | def _test_overlap(hits):
48 |     tester = np.zeros(len(hits), dtype=strax.time_fields)
49 |     tester["time"] = hits["time"] - (hits["left_integration"] - hits["left"]) * hits["dt"]
50 |     tester["endtime"] = hits["time"] + (hits["right_integration"] - hits["left"]) * hits["dt"]
51 | 
52 |     for ch in np.unique(hits["channel"]):
53 |         mask = hits["channel"] == ch
54 |         test_ch = np.all((tester[mask]["endtime"][:-1] - tester[mask]["time"][1:]) <= 0)
55 |         assert np.all(test_ch), "Hits overlap!"
56 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | ## Contribution guidelines
 2 | 
 3 | You're welcome to contribute to strax!
 4 | 
 5 | Currently, many features are still in significant flux, and the documentation is still very basic. Until more people start getting involved in development, we're probably not even following our own advice below...
 6 | 
 7 | ### Please fork
 8 | Please work in a fork, then submit pull requests.
 9 | Only maintainers sometimes work in branches if there is a good reason for it.
10 | 
11 | ### No large files
12 | Avoid committing large (> 100 kB) files. We'd like to keep the repository no more than a few MB.
13 | 
14 | For example, do not commit jupyter notebooks with high-resolution plots (clear the output first), or long configuration files, or binary test data.
15 | 
16 | While it's possible to rewrite history to remove large files, this is a bit of work and messes with the repository's consistency. Once data has gone to master it's especially difficult, then there's a risk of others merging the files back in later unless they cooperate in the history-rewriting.
17 | 
18 | This is one reason to prefer forks over branches; if you commit a huge file by mistake it's just in your fork.
19 | 
20 | ### Code style
21 | Of course, please write nice and clean code :-)
22 | 
23 | PEP8-compatibility is great (you can test with flake8) but not as important as other good coding habits such as avoiding duplication. See e.g. the [famous beyond PEP8 talk](https://www.youtube.com/watch?v=wf-BqAjZb8M).
24 | 
25 | In particular, don't go into code someone else is maintaining to "PEP8-ify" it (or worse, use some automatic styling tool)
26 | 
27 | Other style guidelines (docstrings etc.) are yet to be determined.
28 | 
29 | ### Pull requests
30 | When accepting pull requests, preferrably squash as it attributes all the commits to one single pull request. One might consider merging the pull request without squashing if it's a few commits that mostly outline discrete steps of an implementation which seem worth keeping.
31 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # strax
 2 | Streaming analysis for xenon experiments
 3 | 
 4 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1340632.svg)](https://doi.org/10.5281/zenodo.1340632)
 5 | [![Readthedocs Badge](https://readthedocs.org/projects/strax/badge/?version=latest)](https://strax.readthedocs.io/en/latest/?badge=latest)
 6 | [![Coverage Status](https://coveralls.io/repos/github/AxFoundation/strax/badge.svg?branch=master)](https://coveralls.io/github/AxFoundation/strax?branch=master)
 7 | [![tests](https://github.com/AxFoundation/strax/actions/workflows/pytest.yml/badge.svg?branch=master)](https://github.com/AxFoundation/strax/actions/workflows/pytest.yml)
 8 | [![CodeFactor](https://www.codefactor.io/repository/github/axfoundation/strax/badge)](https://www.codefactor.io/repository/github/axfoundation/strax)
 9 | [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/AxFoundation/strax/master.svg)](https://results.pre-commit.ci/latest/github/AxFoundation/strax/master)
10 | 
11 | [![PyPI version shields.io](https://img.shields.io/pypi/v/strax.svg)](https://pypi.python.org/pypi/strax/)
12 | [![Python Versions](https://img.shields.io/pypi/pyversions/strax.svg)](https://pypi.python.org/pypi/strax)
13 | [![PyPI downloads](https://img.shields.io/pypi/dm/strax.svg)](https://pypistats.org/packages/strax)
14 | [![Join the chat at https://gitter.im/AxFoundation/strax](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/AxFoundation/strax?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
15 | 
16 | 
17 | Strax is an analysis framework for pulse-only digitization data, specialized for live data reduction at speeds of 50-100 MB(raw) / core / sec. For more information, please see the [strax documentation](https://strax.readthedocs.io).
18 | 
19 | Strax' primary aim is to support noble liquid TPC dark matter searches, such as XENONnT. The XENON-specific algorithms live in the separate package [straxen](https://github.com/XENONnT/straxen). If you want to try out strax, you probably want to start there. This package only contains the core framework and basic algorithms any TPCs would want to use.
20 | 


--------------------------------------------------------------------------------
/docs/source/developer/corrections.rst:
--------------------------------------------------------------------------------
 1 | Corrections
 2 | ========
 3 | 
 4 | Overview
 5 | ---------
 6 | Corrections is a centralized interface that allows to store, query, and retrieve information about detector effects (corrections) where this information cab used at the event building process to remove (correct) such effects for a given data type. The information is stored in MongoDB as collection using ``pandas.DataFrame()`` format and with a pandas.DatetimeIndex() this allows track time-dependent information as often detector conditions change over time. Corrections also add the functionality to differentiate between ONLINE and OFFLINE versioning, where ONLINE corrections are used during online processing and, therefore, changes in the past are not allowed, and OFFLINE version meant to be used for re-processing where changes in the past are allowed. Below we explain key features of the corrections class:
 7 | 
 8 | *  ``read``: Retrive entire collection as ``pandas.DataFrame()``
 9 | *  ``read_at``: Retrieve collection based on a time period (indexes) with limit rows(documents), using indexes greatly reduces the number of documents MongoDB needs to scan, then is a faster method for querying specific information;
10 | *  ``write``: Store (save) entire collection as ``pandas.DataFrame()`` in the DB.
11 | *  ``interpolate``: Often, data is limited in any DB then interpolation is needed when trying to retrieve information at a given time (DateTime). User has the option to use pandas interpolation methods see, e.g.  `link <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.interpolate.html>`_.
12 | 
13 | 
14 | Finally, a few remarks regarding modifications of collection(``pandas.DataFrame()``). For convention, the user should provide dates(index) in UTC format. In addition, the user has the flexibility to modify or add rows (documents) to any ``pandas.DataFrame()`` (collections) with the only requirement the changes in the past are only for OFFLINE values, for instance, there could be some scenarios where user wants to add a new date (DateTime index) or wants to fill out non-physical values (NaNs) later.
15 | 


--------------------------------------------------------------------------------
/strax/plugins/down_chunking_plugin.py:
--------------------------------------------------------------------------------
 1 | from typing import Generator
 2 | 
 3 | import strax
 4 | from .plugin import Plugin
 5 | 
 6 | export, __all__ = strax.exporter()
 7 | 
 8 | 
 9 | ##
10 | # Plugin which allows to use yield in plugins compute method.
11 | # Allows to chunk down output before storing to disk.
12 | # Only works if multiprocessing is omitted.
13 | ##
14 | 
15 | 
16 | @export
17 | class DownChunkingPlugin(Plugin):
18 |     """Plugin that merges data from its dependencies."""
19 | 
20 |     parallel = False
21 | 
22 |     def __init__(self):
23 |         super().__init__()
24 | 
25 |         if self.parallel:
26 |             raise NotImplementedError(
27 |                 f'Plugin "{self.__class__.__name__}" is a DownChunkingPlugin which '
28 |                 "currently does not support parallel processing."
29 |             )
30 | 
31 |     def _iter_compute(self, chunk_i, **inputs_merged):
32 |         return self.do_compute(chunk_i=chunk_i, **inputs_merged)
33 | 
34 |     def _fix_output(self, result, start, end, superrun, subruns, _dtype=None):
35 |         """Wrapper around _fix_output to support the return of iterators."""
36 |         if not isinstance(result, Generator):
37 |             raise ValueError(
38 |                 f"Plugin {self.__class__.__name__} should return a generator in compute method."
39 |             )
40 | 
41 |         for _result in result:
42 |             if isinstance(_result, dict):
43 |                 values = _result.values()
44 |             else:
45 |                 if self.multi_output:
46 |                     raise ValueError(
47 |                         f"{self.__class__.__name__} is multi-output and should "
48 |                         "provide a generator of dict output."
49 |                     )
50 |                 values = [_result]
51 |             if not all(isinstance(v, strax.Chunk) for v in values):
52 |                 raise ValueError(
53 |                     f"Plugin {self.__class__.__name__} should yield (dict of) "
54 |                     "strax.Chunk in compute method."
55 |                 )
56 |             yield self.superrun_transformation(_result, superrun, subruns)
57 | 


--------------------------------------------------------------------------------
/docs/source/reference/strax.processing.rst:
--------------------------------------------------------------------------------
 1 | strax.processing package
 2 | ========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | strax.processing.data\_reduction module
 8 | ---------------------------------------
 9 | 
10 | .. automodule:: strax.processing.data_reduction
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | strax.processing.general module
16 | -------------------------------
17 | 
18 | .. automodule:: strax.processing.general
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | strax.processing.hitlets module
24 | -------------------------------
25 | 
26 | .. automodule:: strax.processing.hitlets
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | strax.processing.peak\_building module
32 | --------------------------------------
33 | 
34 | .. automodule:: strax.processing.peak_building
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 
39 | strax.processing.peak\_merging module
40 | -------------------------------------
41 | 
42 | .. automodule:: strax.processing.peak_merging
43 |    :members:
44 |    :undoc-members:
45 |    :show-inheritance:
46 | 
47 | strax.processing.peak\_properties module
48 | ----------------------------------------
49 | 
50 | .. automodule:: strax.processing.peak_properties
51 |    :members:
52 |    :undoc-members:
53 |    :show-inheritance:
54 | 
55 | strax.processing.peak\_splitting module
56 | ---------------------------------------
57 | 
58 | .. automodule:: strax.processing.peak_splitting
59 |    :members:
60 |    :undoc-members:
61 |    :show-inheritance:
62 | 
63 | strax.processing.pulse\_processing module
64 | -----------------------------------------
65 | 
66 | .. automodule:: strax.processing.pulse_processing
67 |    :members:
68 |    :undoc-members:
69 |    :show-inheritance:
70 | 
71 | strax.processing.statistics module
72 | ----------------------------------
73 | 
74 | .. automodule:: strax.processing.statistics
75 |    :members:
76 |    :undoc-members:
77 |    :show-inheritance:
78 | 
79 | Module contents
80 | ---------------
81 | 
82 | .. automodule:: strax.processing
83 |    :members:
84 |    :undoc-members:
85 |    :show-inheritance:
86 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
 1 | [MESSAGES CONTROL]
 2 | # Jelle: CodeFactor has a whitelist of pylint messages
 3 | # I removed:
 4 | #  - cyclic-import    (we use this all the time in strax, see __init__.py)
 5 | #  - no-else-return   (I think this makes sense for symmetric conditions, see https://dmerej.info/blog/post/else-after-return-yea-or-nay/)
 6 | #  - len-as-condition (if you do 'if data' on a numpy array it will crash)
 7 | #  - unnecessary-pass (sometimes pass makes the code more readable)
 8 | disable=all
 9 | enable=assert-on-tuple,astroid-error,bad-except-order,bad-inline-option,bad-option-value,bad-reversed-sequence,bare-except,binary-op-exception,boolean-datetime,catching-non-exception,cell-var-from-loop,confusing-with-statement,consider-merging-isinstance,consider-using-enumerate,consider-using-ternary,continue-in-finally,deprecated-pragma,django-not-available,duplicate-except,duplicate-key,eval-used,exec-used,expression-not-assigned,fatal,file-ignored,fixme,global-at-module-level,global-statement,global-variable-not-assigned,global-variable-undefined,http-response-with-content-type-json,http-response-with-json-dumps,invalid-all-object,invalid-characters-in-docstring,literal-comparison,locally-disabled,locally-enabled,lost-exception,lowercase-l-suffix,misplaced-bare-raise,missing-final-newline,missing-kwoa,mixed-line-endings,model-has-unicode,model-missing-unicode,model-no-explicit-unicode,model-unicode-not-callable,multiple-imports,multiple-statements,new-db-field-with-default,no-else-raise,non-ascii-bytes-literals,nonexistent-operator,not-an-iterable,not-in-loop,notimplemented-raised,overlapping-except,parse-error,pointless-statement,pointless-string-statement,raising-bad-type,raising-non-exception,raw-checker-failed,redefine-in-handler,redefined-argument-from-local,redefined-builtin,redundant-content-type-for-json-response,reimported,relative-import,return-outside-function,simplifiable-if-statement,singleton-comparison,syntax-error,trailing-comma-tuple,trailing-newlines,unbalanced-tuple-unpacking,undefined-all-variable,undefined-loop-variable,unexpected-line-ending-format,unidiomatic-typecheck,unnecessary-lambda,unnecessary-semicolon,unneeded-not,unpacking-non-sequence,unreachable,unrecognized-inline-option,used-before-assignment,useless-else-on-loop,using-constant-test,wildcard-import,yield-outside-function,useless-return
10 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool]
 2 | [tool.poetry]
 3 | name = "strax"
 4 | version = "2.2.1"
 5 | description = "Streaming analysis for xenon TPCs"
 6 | readme = "README.md"
 7 | authors = [
 8 |   "strax developers",
 9 | ]
10 | classifiers = [
11 |   "Development Status :: 5 - Production/Stable",
12 |   "License :: OSI Approved :: BSD License",
13 |   "Natural Language :: English",
14 |   "Programming Language :: Python :: 3.10",
15 |   "Programming Language :: Python :: 3.11",
16 |   "Programming Language :: Python :: 3.12",
17 |   "Intended Audience :: Science/Research",
18 |   "Programming Language :: Python :: Implementation :: CPython",
19 |   "Topic :: Scientific/Engineering :: Physics",
20 | ]
21 | repository = "https://github.com/AxFoundation/strax"
22 | 
23 | [tool.poetry.scripts]
24 | rechunker = "strax.scripts.rechunker:main"
25 | 
26 | [tool.poetry.dependencies]
27 | python = ">=3.10,<3.13"
28 | blosc = "*"
29 | click = "*"
30 | deepdiff = "*"
31 | dill = "*"
32 | fsspec = "*"
33 | immutabledict = "*"
34 | lz4 = "*"
35 | numba = ">=0.43.1"
36 | numexpr = "*"
37 | numpy = ">=1.18.5"
38 | numcodecs = "<0.16.0"
39 | packaging = "*"
40 | pandas = "*"
41 | psutil = "*"
42 | pymongo = "*"
43 | scipy = "*"
44 | tqdm = ">=4.46.0"
45 | zarr = "<3.0.0"
46 | zstd = "*"
47 | zstandard = "*"
48 | sphinx = { version = "*", optional = true }
49 | sphinx_rtd_theme = { version = "*", optional = true }
50 | nbsphinx = { version = "*", optional = true }
51 | recommonmark = { version = "*", optional = true }
52 | graphviz = { version = "*", optional = true }
53 | m2r = { version = "*", optional = true }
54 | mistune = { version = "0.8.4", optional = true }
55 | urllib3 = { version = "2.2.2", optional = true }
56 | lxml_html_clean = { version = "*", optional = true }
57 | 
58 | [tool.poetry.extras]
59 | docs = [
60 |   "sphinx",
61 |   "sphinx_rtd_theme",
62 |   "nbsphinx",
63 |   "recommonmark",
64 |   "graphviz",
65 |   "m2r",
66 |   "mistune",
67 |   "urllib3",
68 |   "lxml_html_clean",
69 | ]
70 | 
71 | [build-system]
72 | requires = ["poetry-core>=1.0.8", "setuptools>=61.0"]
73 | build-backend = "poetry.core.masonry.api"
74 | 
75 | [tool.black]
76 | line-length = 100
77 | preview = true
78 | 
79 | [tool.docformatter]
80 | recursive = true
81 | in-place = true
82 | style = "sphinx"
83 | wrap-summaries = 100
84 | wrap-descriptions = 100
85 | blank = true
86 | 


--------------------------------------------------------------------------------
/docs/source/reference/strax.rst:
--------------------------------------------------------------------------------
  1 | strax package
  2 | =============
  3 | 
  4 | Subpackages
  5 | -----------
  6 | 
  7 | .. toctree::
  8 |    :maxdepth: 4
  9 | 
 10 |    strax.processing
 11 |    strax.storage
 12 | 
 13 | Submodules
 14 | ----------
 15 | 
 16 | strax.chunk module
 17 | ------------------
 18 | 
 19 | .. automodule:: strax.chunk
 20 |    :members:
 21 |    :undoc-members:
 22 |    :show-inheritance:
 23 | 
 24 | strax.config module
 25 | -------------------
 26 | 
 27 | .. automodule:: strax.config
 28 |    :members:
 29 |    :undoc-members:
 30 |    :show-inheritance:
 31 | 
 32 | strax.context module
 33 | --------------------
 34 | 
 35 | .. automodule:: strax.context
 36 |    :members:
 37 |    :undoc-members:
 38 |    :show-inheritance:
 39 | 
 40 | strax.corrections module
 41 | ------------------------
 42 | 
 43 | .. automodule:: strax.corrections
 44 |    :members:
 45 |    :undoc-members:
 46 |    :show-inheritance:
 47 | 
 48 | strax.dtypes module
 49 | -------------------
 50 | 
 51 | .. automodule:: strax.dtypes
 52 |    :members:
 53 |    :undoc-members:
 54 |    :show-inheritance:
 55 | 
 56 | strax.io module
 57 | ---------------
 58 | 
 59 | .. automodule:: strax.io
 60 |    :members:
 61 |    :undoc-members:
 62 |    :show-inheritance:
 63 | 
 64 | strax.mailbox module
 65 | --------------------
 66 | 
 67 | .. automodule:: strax.mailbox
 68 |    :members:
 69 |    :undoc-members:
 70 |    :show-inheritance:
 71 | 
 72 | strax.plugin module
 73 | -------------------
 74 | 
 75 | .. automodule:: strax.plugin
 76 |    :members:
 77 |    :undoc-members:
 78 |    :show-inheritance:
 79 | 
 80 | strax.processor module
 81 | ----------------------
 82 | 
 83 | .. automodule:: strax.processor
 84 |    :members:
 85 |    :undoc-members:
 86 |    :show-inheritance:
 87 | 
 88 | strax.run\_selection module
 89 | ---------------------------
 90 | 
 91 | .. automodule:: strax.run_selection
 92 |    :members:
 93 |    :undoc-members:
 94 |    :show-inheritance:
 95 | 
 96 | strax.testutils module
 97 | ----------------------
 98 | 
 99 | .. automodule:: strax.testutils
100 |    :members:
101 |    :undoc-members:
102 |    :show-inheritance:
103 | 
104 | strax.utils module
105 | ------------------
106 | 
107 | .. automodule:: strax.utils
108 |    :members:
109 |    :undoc-members:
110 |    :show-inheritance:
111 | 
112 | Module contents
113 | ---------------
114 | 
115 | .. automodule:: strax
116 |    :members:
117 |    :undoc-members:
118 |    :show-inheritance:
119 | 


--------------------------------------------------------------------------------
/docs/source/developer/pipeline.rst:
--------------------------------------------------------------------------------
 1 | Pipeline
 2 | =========
 3 | 
 4 | This describes how strax chains computations from multiple plugins together in a pipeline.
 5 | 
 6 | In python, pipeline components can offer two semantics. In **pull-semantics**, usually implemented with generators, somebody calls ``next`` to pull output, and ``StopIteration`` signals nothing more is coming. In **push-semantics**, usually implemented with coroutines, input is pushed in with a ``send`` method.  If cleanup is required, a ``close`` method must be invoked. These can be chained together to make pipelines. Either can also be implemented with custom classes instead of standard python generators/coroutines.
 7 | 
 8 | Strax primarily uses pull-semantics:
 9 |   * Loaders are plain iterators;
10 |   * Plugins iterate over inputs, and expect their results to be iterated over;
11 |   * Savers use both semantics. Usually they iterate over their input. However, during multiprocessing, savers have their inputs sent into them, and must be explicitly closed.
12 | 
13 | Mailboxes
14 | ----------
15 | Strax could not be built by just chaining iterators or coroutines.
16 |   * Pipelines can have multiple inputs and outputs, which generally come at different speeds; we cannot simply push on or pull from one endpoint.
17 |   * For parallellization, we must run the same computation on several chunks at a time, then gather the results.
18 | 
19 | The *mailbox* class provides the additional machinery that handles this. During processing, each data type has a mailbox.
20 | A data type's mailbox iterates over the results of the plugin or loader that produces it. It also provides an iterator to each plugin that needs it as an input.
21 | 
22 | The input iterators given to the plugins must be somewhat magical. If we call ``next``, but the input is not yet available, we must pause (and do something else) until it is.
23 | To enable this suspending, strax runs each plugin in a separate thread. (We could use a framework like ``asyncio`` instead if we wanted to run our own scheduler, now we just use the OS's scheduler.)
24 | 
25 | The threads in strax are thus motivated by `concurrency <https://en.wikipedia.org/wiki/Concurrency_(computer_science)>`_, not parallelism. As a bonus, they do allow different plugins to run simultaneously. The benefit is limited by python's global interpreter lock, but this does not affect IO or computations in numpy and numba.
26 | 
27 | 
28 | 
29 | Exception propgagation
30 | ------------------------
31 | 
32 | TODO: document MailboxKilled etc.
33 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | ======
 2 | Strax
 3 | ======
 4 | 
 5 | Github page: https://github.com/AxFoundation/strax
 6 | 
 7 | Strax is an analysis framework for pulse-only digitization data,
 8 | specialized for live data processing at speeds of 50-100 MB(raw) / core / sec.
 9 | 
10 | For comparison, this is more than 100x faster than the XENON1T processor `pax <http://github.com/XENON1T/pax>`_,
11 | and does not require a preprocessing stage ('eventbuilder').
12 | It achieves this due to using `numpy <https://docs.scipy.org/doc/numpy/>`_ `structured arrays <https://docs.scipy.org/doc/numpy/user/basics.rec.html>`_ internally,
13 | which are supported by the amazing just-in-time compiler `numba <http://numba.pydata.org/>`_.
14 | 
15 | Strax is primarily developed for the XENONnT experiment, although the configuration and specific algorithms for XENONnT are hosted at `<https://github.com/XENONnT/straxen>`_. You can find its documentation `here <https://straxen.readthedocs.io>`_.
16 | 
17 | You might also find these presentations useful:
18 | 
19 | * `Talk on strax at the first XENONnT software telecon (May 2018) <https://docs.google.com/presentation/d/1khf-RNp6K-Q3TW1nQr5xUdrCUPGTJ8lDlDxnAh3s__U>`_
20 | * `Talk on strax for DAQ experts (May 2018) <https://docs.google.com/presentation/d/1qZmbAKJmzn7iTbBbkzhTvHmiBqdbYyxhgheRRrDhTeY>`_
21 | 
22 | 
23 | .. toctree::
24 |     :maxdepth: 1
25 |     :caption: Setup and basics
26 | 
27 |     basics/setup
28 |     basics/overview
29 | 
30 | .. toctree::
31 |     :maxdepth: 1
32 |     :caption: Advanced usage
33 | 
34 |     advanced/plugin_dev
35 |     advanced/config
36 |     advanced/chunking
37 |     advanced/superrun
38 |     advanced/out_of_core
39 |     advanced/recompression
40 |     advanced/fuzzy_for
41 | 
42 | .. toctree::
43 |     :maxdepth: 1
44 |     :caption: Developer documentation
45 | 
46 |     developer/pipeline
47 |     developer/parallel
48 |     developer/overlaps
49 |     developer/storage
50 |     developer/contributing
51 |     developer/documentation
52 |     developer/release
53 | 
54 | The above pages describe how strax's processing framework works under the hood, and explains some implementation choices. It's meant for people who want to do core development on strax; users or even plugin developers should not need it.
55 | 
56 | .. toctree::
57 |     :maxdepth: 1
58 |     :caption: API Reference
59 | 
60 |     reference/strax
61 | 
62 | .. toctree::
63 |     :maxdepth: 2
64 |     :caption: Release notes
65 | 
66 |     reference/release_notes
67 | 
68 | 
69 | * :ref:`genindex`
70 | * :ref:`modindex`
71 | 


--------------------------------------------------------------------------------
/tests/test_exhaust_plugin.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | import numpy as np
 3 | import strax
 4 | from strax import Plugin, ExhaustPlugin
 5 | 
 6 | 
 7 | @strax.takes_config(
 8 |     strax.Option(name="n_chunks", default=10),
 9 |     strax.Option(name="n_items", default=10),
10 | )
11 | class ToExhaust(Plugin):
12 |     depends_on: Tuple = tuple()
13 |     provides: str = "to_exhaust"
14 | 
15 |     dtype = strax.time_fields
16 | 
17 |     source_done = False
18 | 
19 |     def compute(self, chunk_i):
20 |         data = np.empty(self.config["n_items"], dtype=self.dtype)
21 |         data["time"] = np.arange(self.config["n_items"]) + chunk_i * self.config["n_items"]
22 |         data["endtime"] = data["time"]
23 | 
24 |         if chunk_i == self.config["n_chunks"] - 1:
25 |             self.source_done = True
26 | 
27 |         return self.chunk(
28 |             data=data,
29 |             start=int(data[0]["time"]),
30 |             end=int(strax.endtime(data[-1])) + 1,  # to make sure that data is continuous
31 |         )
32 | 
33 |     def source_finished(self):
34 |         return self.source_done
35 | 
36 |     def is_ready(self, chunk_i):
37 |         if "ready" not in self.__dict__:
38 |             self.ready = False
39 |         self.ready ^= True  # Flip
40 |         return self.ready
41 | 
42 | 
43 | @strax.takes_config(
44 |     strax.Option(name="n_chunks", default=10),
45 |     strax.Option(name="n_items", default=10),
46 | )
47 | class Exhausted(ExhaustPlugin):
48 |     depends_on: str = "to_exhaust"
49 |     provides: str = "exhausted"
50 | 
51 |     dtype = strax.time_fields
52 | 
53 |     def compute(self, to_exhaust):
54 |         return to_exhaust
55 | 
56 |     def _fetch_chunk(self, d, iters, check_end_not_before=None):
57 |         flag = self.input_buffer[d] is None  # only check if we have not read anything yet
58 |         super()._fetch_chunk(d, iters, check_end_not_before=check_end_not_before)
59 |         if flag and (len(self.input_buffer[d]) != self.config["n_chunks"] * self.config["n_items"]):
60 |             raise RuntimeError("Exhausted plugin did not read all chunks!")
61 |         return False
62 | 
63 | 
64 | def test_exhaust_plugin():
65 |     """Test the ExhaustPlugin, about whether it can really exhaust the data or not."""
66 |     st = strax.Context(storage=[])
67 |     st.register((ToExhaust, Exhausted))
68 |     st.storage = [
69 |         strax.DataDirectory(
70 |             "./strax_data",
71 |             provide_run_metadata=True,
72 |         )
73 |     ]
74 |     run_id = "000000"
75 |     st.make(run_id, "to_exhaust")
76 |     st.get_array(run_id, "exhausted")
77 | 


--------------------------------------------------------------------------------
/tests/test_config.py:
--------------------------------------------------------------------------------
 1 | import strax
 2 | import numpy as np
 3 | import tempfile
 4 | import unittest
 5 | import hypothesis
 6 | from hypothesis import given
 7 | 
 8 | 
 9 | @strax.takes_config(
10 |     strax.Option(name="int_option", type=int, default=42),
11 |     strax.Option(name="str_option", type=str, default="forty_two"),
12 |     strax.Config(name="mixed", type=int, default=42),
13 | )
14 | class DummyPlugin(strax.Plugin):
15 |     depends_on = ()
16 |     provides = ("dummy_data",)
17 |     dtype = strax.dtypes.time_fields + [
18 |         (("Some data description", "some_data_name"), np.int32),
19 |     ]
20 | 
21 |     int_config = strax.Config(type=int, default=42)
22 |     str_config = strax.Config(type=str, default="forty_two")
23 | 
24 | 
25 | class TestPluginConfig(unittest.TestCase):
26 |     @staticmethod
27 |     def get_plugin(config):
28 |         with tempfile.TemporaryDirectory() as temp_dir:
29 |             context = strax.Context(
30 |                 storage=strax.DataDirectory(temp_dir, deep_scan=True),
31 |                 config=config,
32 |                 register=[DummyPlugin],
33 |                 use_per_run_defaults=True,
34 |             )
35 | 
36 |             return context.get_single_plugin("321", "dummy_data")
37 | 
38 |     def test_config_defaults(self):
39 |         p = self.get_plugin({})
40 |         assert p.int_config == p.int_option == 42
41 |         assert p.str_option == p.str_config == "forty_two"
42 | 
43 |     @given(
44 |         hypothesis.strategies.integers(),
45 |         hypothesis.strategies.text(),
46 |     )
47 |     def test_config_attr_access(self, int_value, str_value):
48 |         config = {
49 |             "int_config": int_value,
50 |             "str_config": str_value,
51 |             "int_option": int_value,
52 |             "str_option": str_value,
53 |         }
54 |         p = self.get_plugin(config)
55 | 
56 |         assert p.int_config == p.int_option == int_value
57 |         assert p.str_option == p.str_config == str_value
58 | 
59 |     @given(
60 |         hypothesis.strategies.integers(),
61 |         hypothesis.strategies.text(),
62 |     )
63 |     def test_config_dict_access(self, int_value, str_value):
64 |         """Test backward compatibility."""
65 |         config = {
66 |             "int_config": int_value,
67 |             "str_config": str_value,
68 |             "int_option": int_value,
69 |             "str_option": str_value,
70 |         }
71 | 
72 |         p = self.get_plugin(config)
73 |         assert p.config["int_config"] == p.config["int_option"] == int_value
74 |         assert p.config["str_config"] == p.config["str_option"] == str_value
75 | 
76 |     def test_config_backward_compatibility(self):
77 |         p = self.get_plugin({})
78 |         assert p.mixed == 42
79 | 


--------------------------------------------------------------------------------
/docs/source/developer/storage.rst:
--------------------------------------------------------------------------------
 1 | Storage
 2 | ========
 3 | 
 4 | Overview
 5 | ---------
 6 | Players in strax's storage system take on one of three roles:
 7 |   * ``StorageFrontend``: Find data locations, and communicate this to one or more ``StorageBackend`` instances;
 8 |   * ``StorageBackend``: load pieces of data, and create instances of ``Saver``;
 9 |   * ``Saver``: save pieces of data to a specific location.
10 | 
11 | As an example, a ``StorageFrontend`` could talk to a database that tracks which data is stored where.
12 | A ``StorageBackend`` then retrieves data from local disks, while another might retrieve it remotely using SSH or other transfer systems.
13 | The front-end decides which backend is appropriate for a given request. Finally, a ``Savers`` guides the process of writing a particular
14 | piece of data to disk or databases (potentially from multiple cores), compressing and rechunking as needed.
15 | 
16 | To implement a new way of storing and/or tracking data, you must implement (subclass) all or some of these classes.
17 | This means subclassing them and overriding a few specific methods
18 | (called 'abstract methods' because they ``raise NotImplementedError`` if they are not overridden).
19 | 
20 | Keys
21 | -----
22 | In strax, a piece of data is identified by a *DataKey*, consisting of three components:
23 |   * The run id
24 |   * The data type
25 |   * The complete *lineage* of the data. This includes, for the data type itself, and all types it depends on (and their dependencies, and so forth):
26 |     * The plugin class name that produced the data;
27 |     * The version string of the plugin;
28 |     * The values of all configuration options the plugin took (whether they were explicitly specified or left as default).
29 | 
30 | When you ask for data using ``Context.get_xxx``, the context will produce a key like this, and pass it to the ``StorageFrontend``.
31 | It then looks for a filename or database collection name that matches this key -- something a ``StorageBackend`` understands. which is therefore generically called a *backend key*.
32 | The matching between DataKey and backend key can be done very strictly, or more loosely, depending on how the context is configured.
33 | This way you can choose to be completely sure about what data you get, or be more flexible and load whatever is available.
34 | TODO: ref context documentation.
35 | 
36 | 
37 | Run-level metadata
38 | -------------------
39 | Metadata can be associated with a run, but no particular data type. The ``StorageFrontend`` must take care of saving and loading these.
40 | 
41 | Such run-level metadata can be crucial in providing run-dependent default setting for configuration options, for example, calibrated quantities necessary
42 | for data processing (e.g. electron lifetime and PMT gains).
43 | 


--------------------------------------------------------------------------------
/tests/test_statistics.py:
--------------------------------------------------------------------------------
 1 | import strax
 2 | import numpy as np
 3 | from scipy.stats import norm
 4 | from strax.processing.hitlets import highest_density_region_width
 5 | 
 6 | 
 7 | def test_highest_density_region():
 8 |     """Unity test for highest density regions."""
 9 |     # Some distribution:
10 |     distribution = np.array([0, 0, 3, 4, 2, 0, 1])
11 |     # Truth dict always stores fraction desired, intervals:
12 |     truth_dict = {0.2: [[2, 4]], 0.7: [[2, 5], [6, 7]]}
13 |     _test_highest_density_region(distribution, truth_dict)
14 | 
15 |     # Distribution with an offset:
16 |     distribution = np.array([0, 0, 3, 4, 2, 0, 1]) + 2
17 |     truth_dict = {0.2: [[2, 5]], 0.7: [[0, len(distribution)]]}
18 |     _test_highest_density_region(distribution, truth_dict)
19 | 
20 | 
21 | def _test_highest_density_region(distribution, truth_dict):
22 |     intervals, heights = strax.highest_density_region(
23 |         distribution,
24 |         np.array(list(truth_dict.keys())),
25 |         only_upper_part=True,
26 |     )
27 |     for fraction_ind, (key, values) in enumerate(truth_dict.items()):
28 |         for ind_interval, interval in enumerate(values):
29 |             int_found = intervals[fraction_ind, :, ind_interval]
30 |             mes = (
31 |                 f"Have not found the correct edges for a fraction of {key}% found {int_found}, but"
32 |                 f" expected {interval}"
33 |             )
34 |             assert np.all(int_found == interval), mes
35 | 
36 | 
37 | def test_too_small_buffer():
38 |     """Unit test to check whether a too small buffer leads to np.nans."""
39 |     distribution = np.ones(1000)
40 |     distribution[::4] = 0
41 |     indicies, _ = strax.highest_density_region(
42 |         distribution,
43 |         np.array([0.5]),
44 |         only_upper_part=True,
45 |     )
46 |     assert np.all(indicies == -1)
47 | 
48 |     width = highest_density_region_width(
49 |         distribution, fractions_desired=np.array([0.5]), _buffer_size=10
50 |     )
51 |     assert np.all(np.isnan(width))
52 | 
53 | 
54 | def test_true_hdr():
55 |     """Tests if highest density region returns for a normal distribution the expected -1/1
56 |     boundaries for 68.27% coverage.
57 | 
58 |     We are not using a too high precision here to reduce the total test time.
59 | 
60 |     """
61 |     x = np.arange(-5, 5, 10**-4)
62 |     data = norm.pdf(x)
63 |     data /= np.sum(data)
64 |     index, _ = strax.highest_density_region(data, fractions_desired=np.array([0.6827]))
65 |     a_index = index[0, 0, 0]
66 |     b_index = index[0, 1, 0]
67 |     area = np.sum(data[a_index : (b_index - 1)])
68 | 
69 |     assert np.isclose(area, 0.6827, rtol=10**-4), (area, 0.6827)
70 |     assert np.isclose(x[a_index], -1, rtol=10**-3), (x[a_index], -1)
71 |     assert np.isclose(x[b_index - 1], 1, rtol=10**-3), (x[b_index - 1], 1)
72 | 


--------------------------------------------------------------------------------
/docs/source/developer/overlaps.rst:
--------------------------------------------------------------------------------
 1 | Chunk boundary handling
 2 | ========================
 3 | 
 4 | Many algorithms need to look back and ahead in the data. For example, we want to group nearby PMT pulses together into peaks.
 5 | Or, to ensure non-overlapping events, we want to group triggers with others that occur just before or ahead.
 6 | 
 7 | During online processing, however, not all data is available. How can you look ahead if the data has not been acquired?
 8 | Even during offline processing, you may not be able to keep all data of a given type for a run into RAM.
 9 | 
10 | Overlap window plugins
11 | -----------------------
12 | 
13 | Strax includes the `OverlapWindowPlugin` to deal with this case. To use it, specify a **window size**: a maximum duration which the algorithm needs to look back or ahead. Then write your algorithm as if there are no chunk breaks -- everything will be taken care of behind the scenes.
14 | 
15 | The illustration below shows how `OverlapWindowPlugin` works. Imagine this is a event finder plugin, which finds events (green and red dashes) in a stream of peaks (continuous blue line).
16 | 
17 | .. image:: overlap_window.jpg
18 | 
19 | * Outputs too close to the end of a chunk are discarded, except for the last chunk. When a chunk arrives, it is generally not known to be the last, so we keep this data around internally and emit it once we get word there will be no more chunks.
20 | * Inputs close to the end of a chunk (pink region) are cached, and added to the input for the next chunk. Note we must cache *two* windows of input: to interpret data at (end - one window) we need all data from (end - two windows) to (end).
21 | * For the next chunk, outputs fully in the region between (end - window) and (end) of the previous chunk are discarded. These are invalid, and moreover, we sent out the valid ouputs for that range during the previous chunks.
22 | 
23 | Note from the figure that outputs straddling (end - window) are initially discarded; they are recomputed during the next chunk.
24 | 
25 | If the endtimes of two objects are separated by more than a window size, they must be guaranteed to not influence each other. If your algorithm does not have this guarantee, you cannot use `OverlapWindowPlugin` and must implement a custom solution. Make sure your window is large enough so this guarantee holds even if the objects themselves have a relevant length.
26 | 
27 | Chunk breaks and the DAQ reader
28 | --------------------------------
29 | 
30 | Low-level datastreams are too large to be routed through a single core. Instead, each CPU sees only a chunk of data. However, the `OverlapWindowPlugin` will not work, because it keeps has state (the cached input, and the temporarily cached output), it cannot be parallelized.
31 | 
32 | For the low-level datastream, we take a different approach
33 | 
34 | TODO: document pre- and post-chunk stuff here.
35 | 


--------------------------------------------------------------------------------
/tests/test_peak_properties.py:
--------------------------------------------------------------------------------
 1 | import strax
 2 | import numpy as np
 3 | from hypothesis import given, strategies, settings
 4 | 
 5 | 
 6 | def get_filled_peaks(peak_length, data_length, n_widths):
 7 |     dtype = [
 8 |         (("Start time since unix epoch [ns]", "time"), np.int64),
 9 |         (("dt in ns", "dt"), np.int64),
10 |         (("length of p", "length"), np.int16),
11 |         (("area of p", "area"), np.float64),
12 |         (("data of p", "data"), (np.float64, data_length)),
13 |     ]
14 |     if n_widths is not None:
15 |         dtype += [
16 |             (("center_time of p", "center_time"), np.int64),
17 |             (("median_time of p", "median_time"), np.float64),
18 |             (("width of p", "width"), (np.float64, n_widths)),
19 |             (
20 |                 ("area_decile_from_midpoint of p", "area_decile_from_midpoint"),
21 |                 (np.float64, n_widths),
22 |             ),
23 |         ]
24 |     peaks = np.zeros(peak_length, dtype=dtype)
25 |     dt = 1
26 |     peaks["time"] = np.arange(peak_length) * dt
27 |     peaks["dt"] = dt
28 | 
29 |     # Fill the peaks with random length data
30 |     for p in peaks:
31 |         length = np.random.randint(1, data_length)
32 |         p["length"] = length
33 |         wf = np.random.random(size=length)
34 |         p["data"][:length] = wf
35 |     if len(peaks):
36 |         # Compute sum area
37 |         peaks["area"] = np.sum(peaks["data"], axis=1)
38 |     return peaks
39 | 
40 | 
41 | @settings(max_examples=100, deadline=None)
42 | @given(
43 |     # number of peaks
44 |     strategies.integers(min_value=0, max_value=20),
45 |     # length of the data field in the peaks
46 |     strategies.integers(min_value=2, max_value=20),
47 | )
48 | def test_index_of_fraction(peak_length, data_length):
49 |     """Test strax.index_of_fraction."""
50 |     peaks = get_filled_peaks(peak_length, data_length, n_widths=None)
51 | 
52 |     fraction_desired = np.random.random(size=peak_length)
53 |     res = strax.index_of_fraction(peaks, fraction_desired)
54 |     assert len(res) == len(peaks), "Lost peaks"
55 |     if len(peaks):
56 |         assert np.max(res) <= data_length, "Index returned out of bound"
57 | 
58 | 
59 | @settings(max_examples=100, deadline=None)
60 | @given(
61 |     # number of peaks
62 |     strategies.integers(min_value=0, max_value=20),
63 |     # length of the data field in the peaks
64 |     strategies.integers(min_value=2, max_value=20),
65 |     # Number of widths to compute
66 |     strategies.integers(min_value=2, max_value=10),
67 | )
68 | def test_compute_center_time_widths(peak_length, data_length, n_widths):
69 |     """Test strax.compute_properties."""
70 |     peaks = get_filled_peaks(peak_length, data_length, n_widths)
71 | 
72 |     # Make a copy of peaks to test that they don't remain the same later
73 |     pre_peaks = peaks.copy()
74 |     strax.compute_properties(peaks)
75 | 
76 |     assert len(pre_peaks) == len(peaks), "Lost peaks"
77 |     if np.sum(peaks["area"] > 0) > 10:
78 |         mess = (
79 |             "Highly unlikely that from at least 10 positive area peaks "
80 |             "none were able to compute the width"
81 |         )
82 |         assert np.any(peaks["width"] != pre_peaks["width"]), mess
83 | 


--------------------------------------------------------------------------------
/tests/test_peak_merging.py:
--------------------------------------------------------------------------------
 1 | import hypothesis
 2 | import numpy as np
 3 | 
 4 | import strax
 5 | from strax.testutils import disjoint_sorted_intervals, fake_hits
 6 | 
 7 | 
 8 | @hypothesis.given(disjoint_sorted_intervals, disjoint_sorted_intervals)
 9 | @hypothesis.settings(max_examples=1000, deadline=None)
10 | def test_replace_merged(intervals, merge_instructions):
11 |     # First we have to create some merged intervals.
12 |     # We abuse the interval generation mechanism to create 'merge_instructions'
13 |     # i.e. something to tell us which indices of intervals must be merged
14 |     # together.
15 | 
16 |     merged_itvs = []
17 |     to_remove = []
18 |     for x in merge_instructions:
19 |         start, end_inclusive = x["time"], x["time"] + x["length"] - 1
20 |         if end_inclusive == start or end_inclusive >= len(intervals):
21 |             # Pointless / invalid merge instruction
22 |             continue
23 |         to_remove.extend(list(range(start, end_inclusive + 1)))
24 |         new = np.zeros(1, strax.interval_dtype)[0]
25 |         new["time"] = intervals[start]["time"]
26 |         new["length"] = strax.endtime(intervals[end_inclusive]) - new["time"]
27 |         new["dt"] = 1
28 |         merged_itvs.append(new)
29 |     removed_itvs = []
30 |     kept_itvs = []
31 |     for i, itv in enumerate(intervals):
32 |         if i in to_remove:
33 |             removed_itvs.append(itv)
34 |         else:
35 |             kept_itvs.append(itv)
36 | 
37 |     kept_itvs = np.array(kept_itvs)
38 |     merged_itvs = np.array(merged_itvs)
39 | 
40 |     result = strax.replace_merged(intervals, merged_itvs)
41 |     assert len(result) == len(merged_itvs) + len(kept_itvs)
42 |     assert np.all(np.diff(result["time"]) > 0), "Not sorted"
43 |     assert np.all(result["time"][1:] - strax.endtime(result)[:-1] >= 0), "Overlap"
44 |     for x in kept_itvs:
45 |         assert x in result, "Removed too many"
46 |     for x in merged_itvs:
47 |         assert x in result, "Didn't put in merged"
48 |     for x in result:
49 |         assert np.isin(x, merged_itvs) or np.isin(x, kept_itvs), "Invented itv"
50 | 
51 | 
52 | @hypothesis.given(
53 |     fake_hits,
54 |     hypothesis.strategies.integers(min_value=0, max_value=int(1e18)),
55 |     hypothesis.strategies.integers(min_value=0, max_value=100),
56 |     hypothesis.strategies.integers(min_value=1, max_value=2),
57 | )
58 | @hypothesis.settings(deadline=None)
59 | def test_add_lone_hits(hits, time_offset, peak_length, dt):
60 |     peak = np.zeros(1, dtype=strax.peak_dtype())
61 |     peak["time"] = time_offset
62 |     hits["time"] += time_offset
63 |     peak["length"] = peak_length
64 |     hits["area"] = 1
65 |     peak["dt"] = dt
66 | 
67 |     to_pe = np.ones(10000)
68 |     strax.add_lone_hits(peak, hits, to_pe)
69 | 
70 |     if not peak_length:
71 |         assert peak["area"] == 0
72 |         assert peak["data"].sum() == 0
73 |         return
74 | 
75 |     split_hits = strax.split_by_containment(hits, peak)[0]
76 |     dummy_peak = np.zeros(peak_length)
77 | 
78 |     for h in split_hits:
79 |         dummy_peak[(h["time"] - time_offset) // dt] += h["area"]
80 |     peak = peak[0]
81 |     assert peak["area"] == np.sum(split_hits["area"])
82 |     assert np.all(peak["data"][:peak_length] == dummy_peak)
83 | 


--------------------------------------------------------------------------------
/.github/workflows/pytest.yml:
--------------------------------------------------------------------------------
 1 | # Test strax on each PR
 2 | name: tests
 3 | 
 4 | # Trigger this code when a new release is published
 5 | on:
 6 |   workflow_dispatch:
 7 |   release:
 8 |     types: [ created ]
 9 |   pull_request:
10 |     branches:
11 |       - master
12 |       - stable
13 |   push:
14 |     branches:
15 |       - master
16 | 
17 | jobs:
18 |   test:
19 |     name: "${{ matrix.test }}_py${{ matrix.python-version }}"
20 |     runs-on: ubuntu-latest
21 |     strategy:
22 |       fail-fast: false
23 |       matrix:
24 |         python-version: ["3.10", "3.11"]
25 |         test: ["coveralls", "pytest"]
26 |         # Installation on py3.10 is rather slow at the moment
27 |         exclude:
28 |           - python-version: "3.11"
29 |             test: coveralls
30 |     steps:
31 |       - name: Checkout repo
32 |         uses: actions/checkout@v4
33 |       - name: Setup python
34 |         uses: actions/setup-python@v5
35 |         with:
36 |           python-version: ${{ matrix.python-version }}
37 |       - name: Install dependencies
38 |         run: sudo apt-get install -y graphviz
39 |       - name: Install requirements for Python 3.10
40 |         if: matrix.python-version == '3.10'
41 |         run: pip install git+https://github.com/XENONnT/base_environment.git@el7.2025.01.3 --force-reinstall
42 |       - name: Install requirements for Python 3.11
43 |         if: matrix.python-version == '3.11'
44 |         run: pip install git+https://github.com/XENONnT/base_environment.git --force-reinstall
45 |       - name: Install strax
46 |         run: pip install .
47 |       - name: Start MongoDB
48 |         uses: supercharge/mongodb-github-action@1.11.0
49 |         with:
50 |           mongodb-version: 4.2
51 |       # Perform coveralls (if coverage is set to True) or pytest
52 |       - name: Test package
53 |         if: matrix.test == 'pytest'
54 |         env:
55 |           TEST_MONGO_URI: 'mongodb://localhost:27017/'
56 |         run: |
57 |           pytest -v --durations 0
58 |       - name: Coverage run
59 |         if: matrix.test == 'coveralls'
60 |         env:
61 |           NUMBA_DISABLE_JIT: 1
62 |           TEST_MONGO_URI: 'mongodb://localhost:27017/'
63 |         run: |
64 |           coverage run --source=strax -m pytest --durations 0 -v
65 |       - name: Coverage run - backward compatibility check with straxen
66 |         if: matrix.test == 'coveralls'
67 |         env:
68 |           NUMBA_DISABLE_JIT: 1
69 |           TEST_MONGO_URI: 'mongodb://localhost:27017/'
70 |         run: |
71 |           echo "clone straxen"
72 |           straxen_dir="../straxen/"
73 |           git clone --single-branch --branch master https://github.com/XENONnT/straxen.git $straxen_dir
74 |           bash $straxen_dir/.github/scripts/create_pre_apply_function.sh $HOME
75 |           pip install -e $straxen_dir  # Reinstall since tests might reflect new code.
76 |           echo "Run straxen tests"
77 |           coverage run --append --source=strax -m pytest $straxen_dir
78 |           coverage report
79 |       - name: Coveralls upload
80 |         if: matrix.test == 'coveralls'
81 |         env:
82 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
83 |         run: |
84 |           coverage report
85 |           coveralls --service=github
86 |       - name: goodbye
87 |         run: echo goodbye
88 | 


--------------------------------------------------------------------------------
/tests/test_pulse_processing.py:
--------------------------------------------------------------------------------
 1 | from strax.testutils import single_fake_pulse
 2 | 
 3 | import numpy as np
 4 | from hypothesis import given, settings
 5 | from scipy.ndimage import convolve1d
 6 | 
 7 | import strax
 8 | 
 9 | 
10 | def _find_hits(r):
11 |     # Test pulses have dt=1 and time=0
12 |     # hm, maybe this doesn't test everything?
13 | 
14 |     hits = strax.find_hits(r, min_amplitude=1)
15 | 
16 |     # dt = 1, so:
17 |     np.testing.assert_equal(hits["time"], hits["left"])
18 | 
19 |     # NB: exclusive right bound, no + 1 here
20 |     np.testing.assert_equal(hits["length"], hits["right"] - hits["left"])
21 | 
22 |     # Check hits are properly integrated
23 |     for h in hits:
24 |         q = r[h["record_i"]]
25 |         assert q["data"][h["left"] : h["right"]].sum() == h["area"]
26 | 
27 |     return list(zip(hits["left"], hits["right"]))
28 | 
29 | 
30 | def test_find_hits():
31 |     """Tests the hitfinder with simple example pulses."""
32 |     for w, should_find_intervals in [
33 |         ([], []),
34 |         ([1], [(0, 1)]),
35 |         ([1, 0], [(0, 1)]),
36 |         ([1, 0, 1], [(0, 1), (2, 3)]),
37 |         ([1, 0, 1, 0], [(0, 1), (2, 3)]),
38 |         ([1, 0, 1, 0, 1], [(0, 1), (2, 3), (4, 5)]),
39 |         ([0, 1, 2, 0, 4, -1, 60, 700, -4], [(1, 3), (4, 5), (6, 8)]),
40 |         ([1, 1, 2, 0, 4, -1, 60, 700, -4], [(0, 3), (4, 5), (6, 8)]),
41 |         ([1, 0, 2, 3, 4, -1, 60, 700, -4], [(0, 1), (2, 5), (6, 8)]),
42 |         ([1, 0, 2, 3, 4, -1, 60, 700, 800], [(0, 1), (2, 5), (6, 9)]),
43 |         ([0, 0, 2, 3, 4, -1, 60, 700, 800], [(2, 5), (6, 9)]),
44 |     ]:
45 |         records = np.zeros(1, strax.record_dtype(9))
46 |         records[0]["data"][: len(w)] = w
47 |         records["dt"] = 1
48 |         records["length"] = 9
49 | 
50 |         results = _find_hits(records)
51 |         assert len(results) == len(should_find_intervals)
52 |         assert results == should_find_intervals
53 | 
54 | 
55 | @settings(deadline=None)
56 | @given(single_fake_pulse)
57 | def test_find_hits_randomize(records):
58 |     """Tests the hitfinder with whatever hypothesis can throw at it (well, pulse only takes (0, 1),
59 |     and we only test a single pulse at a time)"""
60 |     results = _find_hits(records)
61 |     w = records[0]["data"]
62 | 
63 |     # Check for false positives
64 |     for ll, rr in results:
65 |         assert np.all(w[ll:rr] == 1)
66 | 
67 |     # Check for false negatives
68 |     for i in range(len(results) - 1):
69 |         l_ = results[i][1]
70 |         r_ = results[i + 1][0]
71 |         assert not np.any(w[l_:r_] == 1)
72 | 
73 | 
74 | def test_filter_waveforms():
75 |     """Test that filter_records gives the same output as a simple convolution applied to the
76 |     original pulse (before splitting into records)"""
77 |     wv = np.random.randn(300)
78 |     ir = np.random.randn(41)
79 |     ir[10] += 10  # Because it crashes for max at edges
80 |     origin = np.argmax(ir) - (len(ir) // 2)
81 |     wv_after = convolve1d(wv, ir, mode="constant", origin=origin)
82 | 
83 |     wvs = wv.reshape(3, 100)
84 |     wvs = strax.filter_waveforms(
85 |         wvs,
86 |         ir,
87 |         prev_r=np.array([strax.NO_RECORD_LINK, 0, 1]),
88 |         next_r=np.array([1, 2, strax.NO_RECORD_LINK]),
89 |     )
90 |     wv_after_2 = np.reshape(wvs, -1)
91 | 
92 |     assert np.abs(wv_after - wv_after_2).sum() < 1e-9
93 | 


--------------------------------------------------------------------------------
/docs/source/advanced/out_of_core.rst:
--------------------------------------------------------------------------------
 1 | Out of core computation
 2 | =======================
 3 | 
 4 | Overview and motivation
 5 | ------------------------
 6 | Many times analyses involve performing some computation not implemented by a plugin (e.g. plotting)
 7 | that require loading more data than can fit into memory,
 8 | these type of tasks are commonly reffered to as out-of-core computations.
 9 | Out-of-core algorithms usually involve a few repeating steps:
10 | 
11 | 1. chunk the dataset into managable sizes
12 | 2. load the data chunk by chunk
13 | 3. perform some computation on each chunk
14 | 4. save a summary of the results for each chunk
15 | 5. perform some combination of the per-chunk results into a final result.
16 | 
17 | While it is of course possible to implement these operations yourself, it can be tedious and repetative and the code becomes very rigid to the specific calculations being performed.
18 | A better approach is to use abstractions of commonly performed operations that use out-of-core algorithms under the hood to get the same result as if the operations were performed on the entire dataset.
19 | Code written using these abstractions can then run both on in-memory datasets as well as out-of-core datasets alike.
20 | More importantly the implmentations of these algorithms can be written once and packaged to then be used by all.
21 | 
22 | Data chunking
23 | -------------
24 | The zarr package provides an abstraction of the data-access api of numpy arrays for chunked and compressed data stored in memory or disk.
25 | zarr provides an array abstraction with identical behavior to a numpy array when accessing data but where the underlyign data is actually a collection of compressed (optional) chunks.
26 | the strax context provides a convenience method for loading data directly into zarr arrays.
27 | 
28 | .. code-block:: python
29 | 
30 |     import strax
31 | 
32 |     context = strax.Context(**CONTEXT_KWARGS)
33 | 
34 |     # you can pass the same arguments you pass to context.get_array()
35 |     zgrp = context.get_zarr(RUN_IDs, DATA_TYPES, **GET_ARRAY_KWARGS)
36 | 
37 |     # the zarr group contains multiple arrays, one for each data type
38 |     z = zgrp.data_type
39 | 
40 |     # individual arrays are also accessible via the __getitem__ interface
41 |     z = zgrp['data_type']
42 | 
43 |     # numpy-like data access, abstracting away the underlying
44 |     # data reading which may include readin multiple chunks from disk/memory
45 |     # and decompression then concatenation to return an in memory numpy array
46 |     z[:100]
47 | 
48 | 
49 | Data processing
50 | ---------------
51 | The dask package provides abstractions for most of the numpy and pandas apis.
52 | The dask.Array and dask.DataFrame objects implement their respective apis
53 | using fully distributed algorithms, only loading a fraction of the total data into memory
54 | at any given moment for a given computing partition (thread/process/HPC-job).
55 | 
56 | .. code-block:: python
57 | 
58 |     import dask.array as da
59 | 
60 |     # easily convert to dask.Array abstraction for processing
61 |     darr = da.from_zarr(z)
62 | 
63 |     # its recommended to rechunk to sizes more appropriate for processing
64 |     # see dask documentation for details
65 |     darr.rechunk(CHUNK_SIZE)
66 | 
67 |     # you can also convert the dask.Array abstraction
68 |     # to a dask.DataFrame abstraction if you need the pandas api
69 |     ddf = darr.to_dask_dataframe()
70 | 


--------------------------------------------------------------------------------
/CODE-OF-CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |   advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |   address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |   professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 
73 | [homepage]: https://www.contributor-covenant.org
74 | 


--------------------------------------------------------------------------------
/tests/test_inline_plugin.py:
--------------------------------------------------------------------------------
 1 | import shutil
 2 | from unittest import TestCase
 3 | import immutabledict
 4 | import strax
 5 | from strax.testutils import Records, Peaks, PeakClassification, run_id
 6 | 
 7 | 
 8 | class ParallelPeaks(Peaks):
 9 |     parallel = "process"
10 | 
11 | 
12 | class ParallelPeakClassification(PeakClassification):
13 |     parallel = "process"
14 |     save_when = {k: strax.SaveWhen.EXPLICIT for k in PeakClassification.provides}
15 |     save_when["lone_hits"] = strax.SaveWhen.ALWAYS
16 |     save_when = immutabledict.immutabledict(save_when)
17 | 
18 | 
19 | class ParallelEnds(strax.Plugin):
20 |     """The most stupid plugin to make sure that we depend on _some_ of the output of
21 |     ParallelPeakClassification."""
22 | 
23 |     parallel = "process"
24 |     provides = "parallel_ends"
25 |     depends_on = "peak_classification"
26 |     dtype = strax.time_fields
27 | 
28 |     def compute(self, peaks):
29 |         return {"time": peaks["time"], "endtime": strax.endtime(peaks)}
30 | 
31 | 
32 | class TestInline(TestCase):
33 |     store_at = "./.test_inline"
34 | 
35 |     def setUp(self) -> None:
36 |         st = strax.context.Context(
37 |             allow_multiprocess=True,
38 |             allow_lazy=False,
39 |             max_messages=4,
40 |             timeout=60,
41 |             config=dict(bonus_area=9),
42 |         )
43 |         st.storage = [strax.DataDirectory(self.store_at)]
44 |         for p in [Records, ParallelPeaks, ParallelPeakClassification, ParallelEnds]:
45 |             st.register(p)
46 |         self.st = st
47 |         assert not any(st.is_stored(run_id, t) for t in st._plugin_class_registry.keys())
48 | 
49 |     def tearDown(self) -> None:
50 |         shutil.rmtree(self.store_at)
51 | 
52 |     def test_inline(self, **make_kwargs):
53 |         st = self.st
54 |         targets = ("records", "parallel_ends")
55 |         st.make(
56 |             run_id,
57 |             targets,
58 |             allow_multiple=True,
59 |             processor="threaded_mailbox",
60 |             **make_kwargs,
61 |         )
62 |         for target in list(st._plugin_class_registry.keys()):
63 |             should_be_stored = st.get_save_when(target) == strax.SaveWhen.ALWAYS
64 |             if target in targets and not should_be_stored:
65 |                 # redundant check but just in case someone ever changes
66 |                 # this test the records test plugin
67 |                 should_be_stored = st.get_save_when(target) == strax.SaveWhen.TARGET
68 |             assert st.is_stored(run_id, target) == should_be_stored
69 | 
70 |     def test_inline_with_multi_processing(self, **make_kwargs):
71 |         self.test_inline(max_workers=2, **make_kwargs)
72 | 
73 |     def test_inline_with_temp_config(self, **make_kwargs):
74 |         self.test_inline_with_multi_processing(config=dict(secret_time_offset=10), **make_kwargs)
75 | 
76 |     def test_inline_bare(self, n_chunks=3):
77 |         """Get the plugin from a bare processor and run in this thread."""
78 |         st = self.st
79 |         st.set_config(dict(n_chunks=n_chunks))
80 |         targets = list(st._plugin_class_registry.keys())
81 |         components = st.get_components(run_id, targets=targets)
82 |         parallel_components = strax.ParallelSourcePlugin.inline_plugins(
83 |             components, start_from="records", log=st.log
84 |         )
85 |         parallel_plugin = parallel_components.plugins["parallel_ends"]
86 |         for chunk_i in range(n_chunks):
87 |             assert len(parallel_plugin.do_compute(chunk_i=chunk_i))
88 | 


--------------------------------------------------------------------------------
/tests/test_down_chunk_plugin.py:
--------------------------------------------------------------------------------
 1 | from immutabledict import immutabledict
 2 | from strax.testutils import RecordsWithTimeStructure, DownSampleRecords, run_id
 3 | import strax
 4 | import numpy as np
 5 | 
 6 | import os
 7 | import tempfile
 8 | import shutil
 9 | import uuid
10 | import unittest
11 | 
12 | 
13 | class TestContext(unittest.TestCase):
14 |     """Tests for DownChunkPlugin class."""
15 | 
16 |     def setUp(self):
17 |         """Make temp folder to write data to."""
18 |         temp_folder = uuid.uuid4().hex
19 |         self.tempdir = os.path.join(tempfile.gettempdir(), temp_folder)
20 |         assert not os.path.exists(self.tempdir)
21 | 
22 |     def tearDown(self):
23 |         if os.path.exists(self.tempdir):
24 |             shutil.rmtree(self.tempdir)
25 | 
26 |     def test_down_chunking(self):
27 |         st = self.get_context()
28 |         st.register(RecordsWithTimeStructure)
29 |         st.register(DownSampleRecords)
30 | 
31 |         st.make(run_id, "records")
32 |         st.make(run_id, "records_down_chunked")
33 | 
34 |         chunks_records = st.get_metadata(run_id, "records")["chunks"]
35 |         chunks_records_down_chunked = st.get_metadata(run_id, "records_down_chunked")["chunks"]
36 | 
37 |         _chunks_are_downsampled = len(chunks_records) * 2 == len(chunks_records_down_chunked)
38 |         assert _chunks_are_downsampled
39 | 
40 |         _chunks_are_continues = np.all(
41 |             [
42 |                 chunks_records_down_chunked[i]["end"] == chunks_records_down_chunked[i + 1]["start"]
43 |                 for i in range(len(chunks_records_down_chunked) - 1)
44 |             ]
45 |         )
46 |         assert _chunks_are_continues
47 | 
48 |     def test_down_chunking_multi_processing(self):
49 |         st = self.get_context(allow_multiprocess=True)
50 |         st.register(RecordsWithTimeStructure)
51 |         st.register(DownSampleRecords)
52 | 
53 |         st.make(run_id, "records", max_workers=1)
54 | 
55 |         class TestMultiProcessing(DownSampleRecords):
56 |             parallel = True
57 | 
58 |         st.register(TestMultiProcessing)
59 |         with self.assertRaises(NotImplementedError):
60 |             st.make(run_id, "records_down_chunked", max_workers=2)
61 | 
62 |     def test_down_chunking_multi_output(self):
63 |         st = self.get_context(allow_multiprocess=True)
64 |         st.register(RecordsWithTimeStructure)
65 |         st.register(DownSampleRecords)
66 | 
67 |         st.make(run_id, "records", max_workers=1)
68 | 
69 |         class TestMultiOutput(DownSampleRecords):
70 |             provides = ("records_down_chunked", "records_down_chunked_copy")
71 |             data_kind = immutabledict(zip(provides, provides))
72 | 
73 |             def infer_dtype(self):
74 |                 return {p: DownSampleRecords.dtype for p in self.provides}
75 | 
76 |             def compute(self, records, start, end):
77 |                 for r in super().compute(records, start, end):
78 |                     yield r
79 | 
80 |         st.register(TestMultiOutput)
81 |         with self.assertRaises(ValueError):
82 |             st.make(run_id, "records_down_chunked", max_workers=2)
83 | 
84 |     def get_context(self, **kwargs):
85 |         """Simple context to run tests."""
86 |         st = strax.Context(storage=self.get_mock_sf(), check_available=("records",), **kwargs)
87 |         return st
88 | 
89 |     def get_mock_sf(self):
90 |         mock_rundb = [{"name": "0", strax.RUN_DEFAULTS_KEY: dict(base_area=43)}]
91 |         sf = strax.DataDirectory(path=self.tempdir, deep_scan=True, provide_run_metadata=True)
92 |         for d in mock_rundb:
93 |             sf.write_run_metadata(d["name"], d)
94 |         return sf
95 | 


--------------------------------------------------------------------------------
/tests/test_saving.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import strax
 3 | from strax.testutils import Records, Peaks
 4 | import os
 5 | import tempfile
 6 | 
 7 | from strax import RUN_METADATA_PATTERN
 8 | 
 9 | 
10 | class TestPerRunDefaults(unittest.TestCase):
11 |     """Test the saving behavior of the context."""
12 | 
13 |     def setUp(self):
14 |         self.test_run_id = "0"
15 |         self.target = "records"
16 |         self.tempdir = tempfile.TemporaryDirectory()
17 |         self.path = self.tempdir.name
18 |         self.st = strax.Context(
19 |             use_per_run_defaults=True, register=[Records], storage=[strax.DataDirectory(self.path)]
20 |         )
21 |         assert not self.st.is_stored(self.test_run_id, self.target)
22 | 
23 |     def tearDown(self):
24 |         self.tempdir.cleanup()
25 | 
26 |     def test_savewhen_never(self, **kwargs):
27 |         self.set_save_when("NEVER")
28 |         self.st.make(self.test_run_id, self.target, **kwargs)
29 |         assert not self.is_stored()
30 | 
31 |     def test_savewhen_never_with_save(self):
32 |         should_fail_with_save = self.test_savewhen_never
33 |         self.assertRaises(ValueError, should_fail_with_save, save=self.target)
34 | 
35 |     def test_savewhen_explict_without_save(self):
36 |         self.set_save_when("EXPLICIT")
37 |         self.st.make(self.test_run_id, self.target)
38 |         assert not self.is_stored()
39 | 
40 |     def test_savewhen_explict_with_save(self):
41 |         self.set_save_when("EXPLICIT")
42 |         self.st.make(self.test_run_id, self.target, save=self.target)
43 |         assert self.is_stored()
44 | 
45 |     def test_savewhen_target(self):
46 |         self.set_save_when("TARGET")
47 |         self.st.make(self.test_run_id, self.target)
48 |         assert self.is_stored()
49 | 
50 |     def test_savewhen_always(self):
51 |         self.set_save_when("ALWAYS")
52 |         self.st.make(self.test_run_id, self.target)
53 |         assert self.is_stored()
54 | 
55 |     def is_stored(self):
56 |         return self.st.is_stored(self.test_run_id, self.target)
57 | 
58 |     def set_save_when(self, mode: str):
59 |         if not hasattr(strax.SaveWhen, mode.upper()):
60 |             raise ValueError(f"No such saving mode {mode}")
61 |         save_mode = getattr(strax.SaveWhen, mode.upper())
62 |         self.st._plugin_class_registry[self.target].save_when = save_mode
63 | 
64 |     def test_raise_corruption(self):
65 |         self.set_save_when("ALWAYS")
66 |         self.st.make(self.test_run_id, self.target)
67 |         assert self.is_stored()
68 |         storage = self.st.storage[0]
69 |         data_key = self.st.key_for(self.test_run_id, self.target)
70 |         data_path = os.path.join(storage.path, str(data_key))
71 |         assert os.path.exists(data_path)
72 |         metadata = storage.backends[0].get_metadata(data_path)
73 |         assert isinstance(metadata, dict)
74 | 
75 |         # copied from FileSytemBackend (maybe abstractify the method separately?)
76 |         prefix = strax.dirname_to_prefix(data_path)
77 |         metadata_json = RUN_METADATA_PATTERN % prefix
78 |         md_path = os.path.join(data_path, metadata_json)
79 |         assert os.path.exists(md_path)
80 | 
81 |         # Corrupt the metadata (making it non-JSON parsable)
82 |         md_file = open(md_path, "a")
83 |         # Append 'hello' at the end of file
84 |         md_file.write("Adding a non-JSON line to the file to corrupt the metadata")
85 |         # Close the file
86 |         md_file.close()
87 | 
88 |         # Now we should get an error since the metadata data is corrupted
89 |         with self.assertRaises(strax.DataCorrupted):
90 |             self.st.get_array(self.test_run_id, self.target)
91 | 
92 |         # Also test the error is raised if be build a target that depends on corrupted data
93 |         self.st.register(Peaks)
94 |         with self.assertRaises(strax.DataCorrupted):
95 |             self.st.get_array(self.test_run_id, "peaks")
96 | 
97 |         # Cleanup if someone wants to re-use this self.st
98 |         del self.st._plugin_class_registry["peaks"]
99 | 


--------------------------------------------------------------------------------
/docs/source/developer/parallel.rst:
--------------------------------------------------------------------------------
 1 | Parallelization
 2 | ================
 3 | 
 4 | Strax can process data at 50-100 raw-MB /sec single core, which is not enough for live online processing at high DAQ rates. We must thus parallelize at least some of the signal processing.
 5 | 
 6 | Not all plugins can be parallelized. For example, we cannot assign event numbers  (0, 1, 2, ...) in parallel if we want unique numbers that increment without gaps. We also cannot save to a single file in parallel.
 7 | 
 8 | Multithreading
 9 | ---------------
10 | To get parallelization, plugins can defer computations to a pool of **threads** or **processes**. If they do, they yield futures to the output mailbox instead of the actual results (numpy arrays). The mailbox awaits the futures and ensures each consumer gets the results in order.
11 | 
12 | A plugin indicates to strax it is paralellizable by setting its ``parallel`` attribute to True. This usually causes strax to outsource computations to a pool of threads. Every chunk will result in a call to the thread pool. This has little overhead, though the performance gain is limited by the global interpreter lock. If the computation is in pure python, there is no benefit; however, numpy and numba code can benefit significantly (until the pure-python overhead around it becomes the limiting factor, at high numbers of cores).
13 | 
14 | Loaders use multithreading by default, since their work is eminently parallelizable: they just load some data and decompress it (using low-level compressors that happily release the GIL). Savers that rechunk the data (e.g. to achieve more sysadmin-friendly filesizes) are not parallelizable. Savers that do not rechunk use multithreading just like loaders.
15 | 
16 | 
17 | Multiprocessing
18 | ----------------
19 | 
20 | Strax can also use multiprocessing for parallelization. This is useful to free pure-python computations from the shackles of the GIL. Low-level plugins deal with a massive data flow, so parallelizing theircomputations in separate processes is very inefficient due to data transfer overhead. Thread parallelization works fine (since the algorithms are implemented in numba) until you reach ~10 cores, when the GIL becomes binding due to pure-python overhead.
21 | 
22 | You can set the ``parallel`` attribute to ``process``, to suggest strax should use a process pool instead of a thread pool. This is often not a good idea: multiprocessing incurs overhead from (1) forking the strax process and (2) pickling and unpickling the results in the child and parent processes. Strax will still not use multiprocessing at all unless you:
23 |   - Set the allow_multiprocess context option to True,
24 |   - Set max_workers to a value higher than 1 in the get_xxx call.
25 | 
26 | During multiprocessing, computations of chunks from ``parallel='process'`` plugins will be outsourced to a process pool. Additionally, to avoid data transfer overhead, strax attempts to gather as many savers, dependencies, and savers of dependencies of a ``parallel='process'`` plugin to "inline" them: their computations are set to happen immedately after the main plugin's computation in the same process. This is achieved behind the scenes by replacing the plugin with a container-like plugin called ParallelSourcePlugin. Only parallelizable plugins and savers that do not rechunk will be inlined.
27 | 
28 | Since savers can become inlined, they should work even if they are forked. That implies they cannot keep state, and must store metadata for each chunk in their backend as it arrives. For example, the FileStore backend produces a json file with metadata for each chunk. When the saver is closed, all the json files are read in and concatenated. A saver that can't do this should set `allow_fork = False`.
29 | 
30 | 
31 | Multi-run parallelization: a different problem
32 | ------------------------------------------------
33 | 
34 | Paralellizing quick (re)processing of many runs is a different problem altogether. It is easier in one way: since runs are assumed to be independent, we can simply process each run on a single core, and use our multiple cores to process multiple runs. However, it is harder in another: the large volume of desired result data may exceed available RAM. We can use Dask dataframes for this. Probably we can just copy/reuse the code in hax.
35 | 


--------------------------------------------------------------------------------
/tests/test_cut_plugin.py:
--------------------------------------------------------------------------------
  1 | from strax import testutils
  2 | import strax
  3 | import numpy as np
  4 | from hypothesis import given, strategies, example, settings
  5 | 
  6 | # Initialize. We test both dt time-fields and time time-field
  7 | _dtype_name = "var"
  8 | _cut_dtype = ("variable 0", _dtype_name)
  9 | full_dt_dtype = [(_cut_dtype, np.float64)] + strax.time_dt_fields
 10 | full_time_dtype = [(_cut_dtype, np.float64)] + strax.time_fields
 11 | 
 12 | 
 13 | def get_some_array(disjoint=True):
 14 |     # Either 0 or 1
 15 |     take_dt = np.random.choice(2)
 16 | 
 17 |     # Stolen from testutils.bounds_to_intervals
 18 |     def bounds_to_intervals(bs, dt=1):
 19 |         x = np.zeros(len(bs), dtype=full_dt_dtype if take_dt else full_time_dtype)
 20 |         x["time"] = [x[0] for x in bs]
 21 |         # Remember: exclusive right bound...
 22 |         if take_dt:
 23 |             x["length"] = [x[1] - x[0] for x in bs]
 24 |             x["dt"] = 1
 25 |         else:
 26 |             x["endtime"] = x["time"] + ([x[1] - x[0] for x in bs]) * dt
 27 |         return x
 28 | 
 29 |     # Randomly input either of full_dt_dtype or full_time_dtype
 30 |     sorted_intervals = testutils.sorted_bounds(disjoint=disjoint).map(bounds_to_intervals)
 31 |     return sorted_intervals
 32 | 
 33 | 
 34 | @given(
 35 |     get_some_array().filter(lambda x: len(x) >= 0), strategies.integers(min_value=-10, max_value=10)
 36 | )
 37 | @settings(deadline=None)
 38 | # Examples for readability
 39 | @example(
 40 |     input_peaks=np.array(
 41 |         [(-11, 0, 1), (0, 1, 3), (-5, 3, 5), (11, 5, 7), (7, 7, 9)],
 42 |         dtype=[(_cut_dtype, np.float64)] + strax.time_fields,
 43 |     ),
 44 |     cut_threshold=5,
 45 | )
 46 | @example(
 47 |     input_peaks=np.array(
 48 |         [(0, 0, 1, 1), (1, 1, 1, 1), (5, 2, 2, 1), (11, 4, 2, 4)],
 49 |         dtype=[(_cut_dtype, np.int16)] + strax.time_dt_fields,
 50 |     ),
 51 |     cut_threshold=-1,
 52 | )
 53 | def test_cut_plugin(input_peaks, cut_threshold):
 54 |     # Just one chunk will do
 55 |     chunks = [input_peaks]
 56 |     _dtype = input_peaks.dtype
 57 | 
 58 |     class ToBeCut(strax.Plugin):
 59 |         """Data to be cut with strax.CutPlugin."""
 60 | 
 61 |         depends_on = tuple()
 62 |         dtype = _dtype
 63 |         provides = "to_be_cut"
 64 |         data_kind = "to_be_cut"  # match with depends_on below
 65 | 
 66 |         def compute(self, chunk_i):
 67 |             data = chunks[chunk_i]
 68 |             return self.chunk(
 69 |                 data=data,
 70 |                 start=(int(data[0]["time"]) if len(data) else np.arange(len(chunks))[chunk_i]),
 71 |                 end=(
 72 |                     int(strax.endtime(data[-1]))
 73 |                     if len(data)
 74 |                     else np.arange(1, len(chunks) + 1)[chunk_i]
 75 |                 ),
 76 |             )
 77 | 
 78 |         # Hack to make peak output stop after a few chunks
 79 |         def is_ready(self, chunk_i):
 80 |             return chunk_i < len(chunks)
 81 | 
 82 |         def source_finished(self):
 83 |             return True
 84 | 
 85 |     class CutSomething(strax.CutPlugin):
 86 |         """Minimal working example of CutPlugin."""
 87 | 
 88 |         depends_on = ("to_be_cut",)
 89 | 
 90 |         def cut_by(self, to_be_cut):
 91 |             return to_be_cut[_dtype_name] > cut_threshold
 92 | 
 93 |     st = strax.Context(storage=[])
 94 |     st.register(ToBeCut)
 95 |     st.register(CutSomething)
 96 | 
 97 |     result = st.get_array(run_id="some_run", targets=strax.camel_to_snake(CutSomething.__name__))
 98 |     correct_answer = np.sum(input_peaks[_dtype_name] > cut_threshold)
 99 |     assert len(result) == len(input_peaks), "WTF??"
100 |     assert correct_answer == np.sum(
101 |         result["cut_something"]
102 |     ), "Cut plugin does not give boolean arrays correctly"
103 | 
104 |     if len(input_peaks):
105 |         assert (
106 |             strax.endtime(input_peaks).max() == strax.endtime(result).max()
107 |         ), "last end time got scrambled"
108 |         assert np.all(input_peaks["time"] == result["time"]), "(start) times got scrambled"
109 |         assert np.all(
110 |             strax.endtime(input_peaks) == strax.endtime(result)
111 |         ), "Some end times got scrambled"
112 | 


--------------------------------------------------------------------------------
/docs/source/advanced/chunking.rst:
--------------------------------------------------------------------------------
 1 | Strax data model
 2 | =================
 3 | 
 4 | Data type and kind
 5 | -------------------
 6 | 
 7 | All data lives in *data types*, such as `raw_records` or `peak_basics`. Each of these has a fixed numpy datatype.
 8 | 
 9 | If a single row of two data types refers to the same physical / logical thing, such as an event or a peak, we say those data types have the same `data_kind`.
10 | 
11 | 
12 | The Laws of Chunking
13 | ---------------------
14 | You shall obey them.
15 | 
16 | 1. Each data row corresponds to a time interval. Time and (endtime or (dt and length)) are mandatory fields in all datatypes.
17 | 2. Strax handles data in chunks. A chunk is also an interval (containing rows of data which are individually intervals).
18 | 3. Suppose you have a chunk of some datatype reaching from [t0, t1), then
19 | 
20 |    a. It contains all and only data that starts >= t0 or ends <= t1;
21 |    b. All data outside the chunk ends <= t0, or starts >= t1. (Remember intervals are half-open; the boundary cases are not ambiguous.)
22 |    c. In particular, every data row lies completely in one chunk. No data whatsoever lies partially in more than one chunk. This means chunks cannot be split at arbitrary times.
23 | 
24 | 4. Zero-duration data rows are not allowed. Zero-duration chunks are allowed, but they cannot contain data.
25 | 
26 | 
27 | Incoming data
28 | -------------
29 | From the perspective of a plugin, all incoming data is time-synchronized and merged by kind. Specifically:
30 | 
31 | * Data of the same kind is merged into a single array. If you depend on `events`, `peaks` and `peak_basics`, you will get two arrays: `events` and `peaks`. The second will be the merged array of `peaks` and `peak_basics`.
32 | * Data of different kinds are synchronized by time. Strax will fetch a chunks of the first kind (`events`), then fetch as much as needed from the second kind (`peaks`) until you have all peaks that end before or at exactly the same time as the last event.
33 | 
34 | This example is a bit odd: when loading data of multiple kinds that are contained in each other, e.g. events and peaks, you very often want to use a `LoopPlugin` rather than a straight-up Plugin.
35 | 
36 | Outgoing data
37 | -------------
38 | Plugins can chunk their output as they wish, including withholding some data until the next chunk is sent out. Of course this requires keeping state, which means you cannot parallelize: see the chunk boundary handling section later in this documentation.
39 | 
40 | Savers, too, are free to chunk their data as they like; for example, to create files of convenient sizes. This affects the chunks you get when loading or reprocessing data. If you don't want this, e.g. if the next plugin in line assumes a particular kind of chunking you want to preserve, set the attribute `rechunk_on_save = False`.
41 | 
42 | In cases where rechunking is permitted, a plugin can also specify a desired minimum uncompressed chunk size in bytes via the `chunk_target_size` attribute, with 200 MB as the default value. Chunks are concatenated until this desired size is exceeded, or all chunks have been combined, whereupon the data is compressed and written to disk.
43 | 
44 | 
45 | Sorted output requirement
46 | --------------------------
47 | Strax requires that all output is sorted by time inside chunks.
48 | 
49 | Additionally, most or all plugins will assume that incoming data is time-ordered between chunks. That is, a subsequent chunk should not contain any data that starts before an item from a previous chunk ends. Strax data must be either consist of disjoint things, or if there are overlaps, chunk boundaries must fall in places where gaps exist.
50 | 
51 | It would have been much harder to code an algorithm if you do not know when you have seen all input before a certain time. Essentially you would have to wait until the end of the run before you can process any data, which goes against the idea of processing your data as a stream.
52 | 
53 | If your plugin removes or adds items from the original incoming array, it must output a different *data kind*. For example, during the initial data reduction steps, we remove items from 'raw_records' to make a new data kind 'records'. Here we change data kind, even though the fields in the output data type are identical to the fields in the input data type.
54 | 


--------------------------------------------------------------------------------
/docs/source/advanced/fuzzy_for.rst:
--------------------------------------------------------------------------------
 1 | Fuzzy for functionality
 2 | =======================
 3 | Since strax tracks lineages, updates to low level plugins may change the
 4 | availability of high level data. When a low level plugin is changed (for example
 5 | the version of a plugin is incremented), strax will recognize that the data corresponding
 6 | to the plugin whereof the version is changed is not stored (since only the
 7 | previous version is stored). This safeguards that the data that the user is loading
 8 | is always consistent with the context.
 9 | 
10 | **This functionality can partially be disabled using fuzzy-for settings. This should
11 | only be done temporarily or for quick checks as strax is not anymore checking if
12 | the entire ancestry of the requested and the delivered data is consistent.**
13 | 
14 | When to use
15 | -----------
16 | There are situations where the above robustness of the context is not what the user
17 | wants. Such situations can be if a user is developing a new plugin on the master
18 | branch, when the master branch has some changes in the lower level plugins.
19 | The user in this case cannot easily check if the plugin works on data, as no data
20 | is available in the context of the master branch. In this case, the user might want
21 | to tell the context to just load whatever data is available, ignoring changes in
22 | a specific plugin. Another example would be if a dataset was simulated with specific
23 | instructions and a user wants to quickly look at the data in the simulated dataset
24 | without having to manually check which context was used for simulating this data
25 | (of course, the best way to solve this would be to open the metadata that is stored
26 | with the simulation files and construct the context from those options).
27 | 
28 | How to use
29 | ----------
30 | There are two ways of ignoring the lineage. Both are set in the context config
31 | (see context.context_config):
32 |  - ``fuzzy_for_options`` a tuple of options to specify that each option with a
33 |    name in the tuple can be ignored
34 |  - ``fuzzy_for`` a tuple of data-types to ignore.
35 | 
36 | In the example below, we will use setting the ``fuzzy_for`` option. We will use
37 | the online context from `straxen <http://github.com/XENONnT/straxen>`_ to illustrate
38 | how the options are set.
39 | 
40 | 
41 | .. code-block:: python
42 | 
43 |     import straxen
44 |     # Use a context that can load data from a datatype 'peak-basics'
45 |     st = straxen.contexts.xenonnt_online()
46 |     run_id, target = '022880', 'peak_basics'
47 | 
48 |     # Check if the data is stored for this run and datatype
49 |     print(f'{run_id} {target} is stored: {st.is_stored(run_id, target)}')
50 | 
51 |     # Now let's mimic the situation wherein the version of the plugin that provides
52 |     # peak basics has changed (it has a different version). We will do so by changing
53 |     # the version of the plugin below
54 |     PeakBasics = st._plugin_class_registry[target]
55 |     PeakBasics.__version__ = 'does_not_exist'
56 |     print(f'{run_id} {target} is stored: {st.is_stored(run_id, target)}')
57 | 
58 |     # The print statement will tell us the data is not stored. To load the data
59 |     # from the default version of PeakBasics we will use the fuzzy-for option:
60 |     st.context_config['fuzzy_for'] = (target,)
61 |     print(f'{run_id} {target} is stored: {st.is_stored(run_id, target)}')
62 | 
63 | The block above prints:
64 | 
65 | .. code-block::  bash
66 | 
67 |     022880 peak_basics is stored: True
68 |     022880 peak_basics is stored: False
69 |     022880 peak_basics is stored: True
70 | 
71 | Is it advisable / safe to use?
72 | ------------------------------
73 | For running production analyses, one should never base results on a context where
74 | fuzzy-ness is enabled.
75 | 
76 | For quick tests, it is save to use. If new data is made based on a fuzzy context,
77 | this is not stored to prevent the creation of data-files with unreproducible
78 | results.
79 | 
80 | Additionally (depending on the StorageFrontend), loading data with fuzzy options
81 | will be generally much slower. For example, the most commonly used StorageFrontend,
82 | the DataDirectory scans all folders within it's parent directory and filters the
83 | meta-data in search for a folder with a lineage compatible with the fuzzy for
84 | options.
85 | 


--------------------------------------------------------------------------------
/strax/scripts/rechunker.py:
--------------------------------------------------------------------------------
  1 | import os.path
  2 | import argparse
  3 | 
  4 | import strax
  5 | import pandas as pd
  6 | 
  7 | 
  8 | def parse_args():
  9 |     parser = argparse.ArgumentParser(
 10 |         description="Rechunker for FileSytemBackend. Interfaces with strax.rechunker."
 11 |         "Please see the documentation of strax.rechunker for more information: "
 12 |         "https://github.com/AxFoundation/strax/blob/31c114c5f8329e53289d5127fb2125e71c3d6aae/strax/storage/files.py#L371"  # noqa
 13 |     )
 14 |     parser.add_argument(
 15 |         "--source",
 16 |         type=str,
 17 |         help="Target directory to rechunk, should be a folder in a "
 18 |         "strax.DataDirectory (one datatype)",
 19 |     )
 20 |     parser.add_argument(
 21 |         "--dest",
 22 |         "--destination",
 23 |         default=None,
 24 |         dest="dest",
 25 |         type=str,
 26 |         help="Where to store rechunked data. If nothing is specified, replace the source.",
 27 |     )
 28 |     parser.add_argument(
 29 |         "--compressor",
 30 |         choices=list(strax.io.COMPRESSORS.keys()),
 31 |         help="Recompress using one of these compressors. If nothing specified, "
 32 |         "use the same compressor as the source",
 33 |     )
 34 |     parser.add_argument(
 35 |         "--rechunk", default=True, choices=[True, False], type=bool, help="rechunk the data"
 36 |     )
 37 |     parser.add_argument(
 38 |         "--target_size_mb",
 39 |         "--target-size-mb",
 40 |         dest="target_size_mb",
 41 |         type=int,
 42 |         default=strax.DEFAULT_CHUNK_SIZE_MB,
 43 |         help="Target size MB (uncompressed) of the rechunked data",
 44 |     )
 45 |     parser.add_argument(
 46 |         "--write_stats_to",
 47 |         "--write-stats-to",
 48 |         dest="write_stats_to",
 49 |         type=str,
 50 |         default=None,
 51 |         help="Write some information to this file (csv format)",
 52 |     )
 53 |     parser.add_argument(
 54 |         "--parallel",
 55 |         type=str,
 56 |         default="False",
 57 |         choices=["False", "True", "thread", "process"],
 58 |         help="Parallelize using threadpool or processpool",
 59 |     )
 60 |     parser.add_argument(
 61 |         "--max_workers", type=int, default=4, help="Max workers if parallel is specified"
 62 |     )
 63 |     parser.add_argument("--profile_memory", action="store_true", help="Profile memory usage")
 64 |     args = parser.parse_args()
 65 |     return args
 66 | 
 67 | 
 68 | def main():
 69 |     args = parse_args()
 70 |     if args.profile_memory:
 71 |         from memory_profiler import memory_usage
 72 |         import time
 73 | 
 74 |         start = time.time()
 75 |         mem = memory_usage(proc=(rechunk, (args,)))
 76 |         print(f"Memory profiler says peak RAM usage was: {max(mem):.1f} MB")
 77 |         print(f"Took {time.time() - start:.1f} s = {(time.time() - start) / 3600:.2f} h ")
 78 |         print("Bye, bye")
 79 |     else:
 80 |         rechunk(args)
 81 | 
 82 | 
 83 | def rechunk(args):
 84 |     source_mb = strax.utils.dir_size_mb(args.source)
 85 |     report = strax.rechunker(
 86 |         source_directory=args.source,
 87 |         dest_directory=args.dest,
 88 |         replace=args.dest is None,
 89 |         compressor=args.compressor,
 90 |         target_size_mb=args.target_size_mb,
 91 |         rechunk=args.rechunk,
 92 |         parallel={"False": False, "True": True}.get(args.parallel, args.parallel),
 93 |         max_workers=args.max_workers,
 94 |     )
 95 |     if args.dest is not None:
 96 |         recompressed_mb = strax.utils.dir_size_mb(report.get("dest_directory", args.dest))
 97 |     else:
 98 |         recompressed_mb = strax.utils.dir_size_mb(args.source)
 99 |     report.update(dict(source_mb=source_mb, dest_mb=recompressed_mb))
100 |     if args.write_stats_to:
101 |         if os.path.exists(args.write_stats_to):
102 |             df = pd.read_csv(args.write_stats_to)
103 |         else:
104 |             df = pd.DataFrame()
105 |         df_new = pd.concat([df, pd.DataFrame({k: [v] for k, v in report.items()})])
106 |         df_new.to_csv(args.write_stats_to, index=False)
107 | 
108 |     print(f"Re-compressed {args.source}")
109 |     for k, v in report.items():
110 |         print(f"\t{k:16}\t{v}")
111 | 
112 | 
113 | if __name__ == "__main__":
114 |     main()
115 | 


--------------------------------------------------------------------------------
/strax/processors/single_thread.py:
--------------------------------------------------------------------------------
  1 | import typing as ty
  2 | 
  3 | from .base import BaseProcessor, ProcessorComponents
  4 | from .post_office import PostOffice, Spy
  5 | 
  6 | 
  7 | import strax
  8 | 
  9 | export, __all__ = strax.exporter()
 10 | 
 11 | 
 12 | @export
 13 | class SingleThreadProcessor(BaseProcessor):
 14 |     def __init__(
 15 |         self, components: ProcessorComponents, allow_rechunk=True, is_superrun=False, **kwargs
 16 |     ):
 17 |         super().__init__(components, allow_rechunk=allow_rechunk, is_superrun=is_superrun, **kwargs)
 18 | 
 19 |         self.log.debug("Processor components are: " + str(components))
 20 | 
 21 |         # Do not use executors: work in one thread in one process
 22 |         self.process_executor = self.thread_executor = None
 23 | 
 24 |         self.post_office = PostOffice()
 25 | 
 26 |         for d, loader in components.loaders.items():
 27 |             assert d not in components.plugins
 28 |             self.post_office.register_producer(loader(executor=self.thread_executor), topic=d)
 29 | 
 30 |         plugins_seen: ty.List[strax.Plugin] = []
 31 |         for d, p in components.plugins.items():
 32 |             # Multi-output plugins are listed multiple times in components.plugins;
 33 |             # ensure we only process each plugin once.
 34 |             if p in plugins_seen:
 35 |                 continue
 36 |             plugins_seen.append(p)
 37 | 
 38 |             # Some data_types might be already saved and can be loaded;
 39 |             # remove them from the list of provides
 40 |             self.post_office.register_producer(
 41 |                 p.iter(iters={dep: self.post_office.get_iter(dep, d) for dep in p.depends_on}),
 42 |                 topic=strax.to_str_tuple(p.provides),
 43 |                 registered=tuple(components.loaders),
 44 |             )
 45 | 
 46 |         dtypes_built = {d: p for p in components.plugins.values() for d in p.provides}
 47 |         for d, savers in components.savers.items():
 48 |             for saver in savers:
 49 |                 if d in dtypes_built:
 50 |                     rechunk = dtypes_built[d].can_rechunk(d) and allow_rechunk
 51 |                 else:
 52 |                     rechunk = is_superrun and allow_rechunk
 53 | 
 54 |                 self.post_office.register_spy(SaverSpy(saver, rechunk=rechunk), topic=d)
 55 | 
 56 |     def iter(self):
 57 |         target = self.components.targets[0]
 58 |         final_generator = self.post_office.get_iter(topic=target, reader="FINAL")
 59 | 
 60 |         self.log.debug(f"Yielding {target}")
 61 | 
 62 |         try:
 63 |             yield from final_generator
 64 | 
 65 |         except Exception:
 66 |             # Exception in one of the producers. Close savers (they will record
 67 |             # the exception from sys.exc_info()) then reraise.
 68 |             self.log.fatal(f"Exception during processing, closing savers and reraising")
 69 |             self.post_office.kill_spies()
 70 |             raise
 71 | 
 72 |         except GeneratorExit:
 73 |             self.log.fatal(
 74 |                 "Exception in code that called the processor: detected "
 75 |                 "GeneratorExit from python shutting down. "
 76 |                 "Closing savers and exiting."
 77 |             )
 78 |             # Strax savers look at sys.exc_info(). Having only "GeneratorExit"
 79 |             # there is unhelpful.. this should set it to something better:
 80 |             try:
 81 |                 raise RuntimeError("Exception in caller, see log for details")
 82 |             except RuntimeError:
 83 |                 self.post_office.kill_spies()
 84 | 
 85 |         self.log.debug("Processing finished")
 86 | 
 87 | 
 88 | class SaverSpy(Spy):
 89 |     """Spy that saves messages to a saver."""
 90 | 
 91 |     def __init__(self, saver, rechunk=False):
 92 |         self.saver = saver
 93 |         self.rechunker = strax.Rechunker(rechunk, self.saver.md["run_id"])
 94 |         self.chunk_number = 0
 95 | 
 96 |     def receive(self, chunk):
 97 |         self._save_chunk(self.rechunker.receive(chunk))
 98 | 
 99 |     def _save_chunk(self, chunks):
100 |         for chunk in chunks:
101 |             if chunk is None:
102 |                 continue
103 |             self.saver.save(chunk, self.chunk_number)
104 |             self.chunk_number += 1
105 | 
106 |     def close(self):
107 |         self._save_chunk(self.rechunker.flush())
108 |         self.saver.close()
109 | 


--------------------------------------------------------------------------------
/tests/test_sort.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | import warnings
 4 | from hypothesis import given, strategies
 5 | from hypothesis.extra.numpy import arrays, integer_dtypes
 6 | from strax.sort_enforcement import SortingError, stable_sort, stable_argsort
 7 | 
 8 | 
 9 | class TestSortEnforcement(unittest.TestCase):
10 |     @given(arrays(integer_dtypes(), strategies.integers(1, 100)))
11 |     def test_explicit_stable_sort(self, arr):
12 |         """Test explicit stable_sort function with generated arrays."""
13 |         with warnings.catch_warnings():
14 |             warnings.simplefilter("error")  # Turn warnings into errors
15 |             sorted_arr = stable_sort(arr)
16 |             np.testing.assert_array_equal(sorted_arr, np.sort(arr, kind="mergesort"))
17 |             # Verify the array is actually sorted
18 |             self.assertTrue(np.all(sorted_arr[:-1] <= sorted_arr[1:]))
19 | 
20 |     @given(arrays(integer_dtypes(), strategies.integers(1, 100)))
21 |     def test_explicit_stable_argsort(self, arr):
22 |         """Test explicit stable_argsort function with generated arrays."""
23 |         with warnings.catch_warnings():
24 |             warnings.simplefilter("error")  # Turn warnings into errors
25 |             sorted_indices = stable_argsort(arr)
26 |             np.testing.assert_array_equal(sorted_indices, np.argsort(arr, kind="mergesort"))
27 |             # Verify the indices actually sort the array
28 |             sorted_arr = arr[sorted_indices]
29 |             self.assertTrue(np.all(sorted_arr[:-1] <= sorted_arr[1:]))
30 | 
31 |     @given(
32 |         arrays(integer_dtypes(), strategies.integers(1, 100)),
33 |         strategies.sampled_from(["quicksort", "heapsort"]),
34 |     )
35 |     def test_wrapped_quicksort_rejection(self, arr, sort_kind):
36 |         """Test that quicksort and heapsort raise errors in wrapped functions."""
37 |         with self.assertRaises(SortingError):
38 |             stable_sort(arr, kind=sort_kind)
39 |         with self.assertRaises(SortingError):
40 |             stable_argsort(arr, kind=sort_kind)
41 | 
42 |     @given(arrays(integer_dtypes(), strategies.integers(1, 100)))
43 |     def test_original_numpy_unaffected(self, arr):
44 |         """Test that original numpy sort functions still work with quicksort."""
45 |         try:
46 |             quicksort_result = np.sort(arr, kind="quicksort")
47 |             self.assertTrue(np.all(quicksort_result[:-1] <= quicksort_result[1:]))
48 | 
49 |             quicksort_indices = np.argsort(arr, kind="quicksort")
50 |             sorted_arr = arr[quicksort_indices]
51 |             self.assertTrue(np.all(sorted_arr[:-1] <= sorted_arr[1:]))
52 |         except Exception as e:
53 |             self.fail(f"numpy sort with quicksort raised an unexpected exception: {e}")
54 | 
55 |     @given(
56 |         strategies.lists(
57 |             strategies.tuples(
58 |                 strategies.integers(1, 10),  # num field
59 |                 strategies.text(min_size=1, max_size=1),  # letter field
60 |             ),
61 |             min_size=1,
62 |             max_size=100,
63 |         )
64 |     )
65 |     def test_sort_stability(self, data):
66 |         """Test that wrapped sorting is stable using generated structured arrays."""
67 |         # Convert list of tuples to structured array
68 |         arr = np.array(data, dtype=[("num", int), ("letter", "U1")])
69 | 
70 |         # First sort by letter to establish initial order
71 |         arr_by_letter = stable_sort(arr, order="letter")
72 |         # Then sort by number - if sort is stable, items with same number
73 |         # should maintain their relative order from the letter sort
74 |         final_sort = stable_sort(arr_by_letter, order="num")
75 | 
76 |         # Verify sorting works correctly
77 |         for i in range(len(final_sort) - 1):
78 |             # Check primary sort key (number)
79 |             self.assertTrue(
80 |                 final_sort[i]["num"] <= final_sort[i + 1]["num"],
81 |                 f"Primary sort failed: {final_sort[i]} should come before {final_sort[i + 1]}",
82 |             )
83 | 
84 |             # If numbers are equal, check that letter order is preserved
85 |             if final_sort[i]["num"] == final_sort[i + 1]["num"]:
86 |                 self.assertTrue(
87 |                     final_sort[i]["letter"] <= final_sort[i + 1]["letter"],
88 |                     f"Stability violated: for equal numbers {final_sort[i]['num']}, "
89 |                     f"letter {final_sort[i]['letter']} should come "
90 |                     f"before or equal to {final_sort[i + 1]['letter']}",
91 |                 )
92 | 
93 | 
94 | if __name__ == "__main__":
95 |     unittest.main(verbosity=2)
96 | 


--------------------------------------------------------------------------------
/tests/test_overlap_plugin.py:
--------------------------------------------------------------------------------
  1 | from strax import testutils
  2 | 
  3 | import numpy as np
  4 | 
  5 | from hypothesis import given, strategies, example, settings
  6 | 
  7 | import strax
  8 | 
  9 | 
 10 | @given(
 11 |     testutils.disjoint_sorted_intervals.filter(lambda x: len(x) > 0),
 12 |     strategies.integers(min_value=0, max_value=3),
 13 | )
 14 | @settings(deadline=None)
 15 | # Examples that trigger issue #49
 16 | @example(
 17 |     input_peaks=np.array([(0, 1, 1, 0), (1, 10, 1, 0), (11, 1, 1, 0)], dtype=strax.interval_dtype),
 18 |     split_i=2,
 19 | )
 20 | @example(
 21 |     input_peaks=np.array(
 22 |         [(0, 1, 1, 0), (1, 1, 1, 0), (2, 9, 1, 0), (11, 1, 1, 0)], dtype=strax.interval_dtype
 23 |     ),
 24 |     split_i=3,
 25 | )
 26 | # Other example that caused failures at some point
 27 | @example(
 28 |     input_peaks=np.array([(0, 1, 1, 0), (7, 6, 1, 0), (13, 1, 1, 0)], dtype=strax.interval_dtype),
 29 |     split_i=2,
 30 | )
 31 | def test_overlap_plugin(input_peaks, split_i):
 32 |     """Counting the number of nearby peaks should not depend on how peaks are chunked."""
 33 |     chunks = np.split(input_peaks, [split_i])
 34 |     chunks = [c for c in chunks if not len(c) == 0]
 35 | 
 36 |     class Peaks(strax.Plugin):
 37 |         depends_on = tuple()
 38 |         dtype = strax.interval_dtype
 39 | 
 40 |         def compute(self, chunk_i):
 41 |             data = chunks[chunk_i]
 42 |             return self.chunk(
 43 |                 data=data, start=int(data[0]["time"]), end=int(strax.endtime(data[-1]))
 44 |             )
 45 | 
 46 |         # Hack to make peak output stop after a few chunks
 47 |         def is_ready(self, chunk_i):
 48 |             return chunk_i < len(chunks)
 49 | 
 50 |         def source_finished(self):
 51 |             return True
 52 | 
 53 |     window = 10
 54 | 
 55 |     # Note we must apply this to endtime, not time, since
 56 |     # peaks straddling the overlap threshold are assigned to the NEXT window.
 57 |     # If we used time it would fail on examples with peaks larger than window.
 58 |     # In real life, the window should simply be chosen large enough that this
 59 |     # is not an issue.
 60 |     def count_in_window(ts, w=window):
 61 |         # Terribly inefficient algorithm...
 62 |         result = np.zeros(len(ts), dtype=np.int16)
 63 |         for i, t in enumerate(ts):
 64 |             result[i] = ((ts < t + w) & (ts > t - w)).sum()
 65 |         return result
 66 | 
 67 |     class WithinWindow(strax.OverlapWindowPlugin):
 68 |         depends_on = ("peaks",)
 69 |         dtype = [("n_within_window", np.int16)] + strax.time_fields
 70 | 
 71 |         def get_window_size(self):
 72 |             return window
 73 | 
 74 |         def compute(self, peaks):
 75 |             return dict(
 76 |                 n_within_window=count_in_window(strax.endtime(peaks)),
 77 |                 time=peaks["time"][:1],
 78 |                 endtime=strax.endtime(peaks)[-1:],
 79 |             )
 80 | 
 81 |     class MultipleWithinWindow(WithinWindow):
 82 |         provides = ("within_window", "multiple_within_window")
 83 |         data_kind = dict(
 84 |             within_window="within_window", multiple_within_window="multiple_within_window"
 85 |         )
 86 |         dtype = dict(
 87 |             within_window=[("n_within_window", np.int16)] + strax.time_fields,
 88 |             multiple_within_window=[("window_length", np.int16)] + strax.time_fields,
 89 |         )
 90 | 
 91 |         def compute(self, peaks):
 92 |             within_window = dict(
 93 |                 n_within_window=count_in_window(strax.endtime(peaks)),
 94 |                 time=peaks["time"][:1],
 95 |                 endtime=strax.endtime(peaks)[-1:],
 96 |             )
 97 |             multiple_within_window = dict(
 98 |                 window_length=peaks["length"],
 99 |                 time=peaks["time"],
100 |                 endtime=strax.endtime(peaks),
101 |             )
102 |             return dict(
103 |                 within_window=within_window,
104 |                 multiple_within_window=multiple_within_window,
105 |             )
106 | 
107 |     st = strax.Context(storage=[])
108 |     st.register(Peaks)
109 |     for plugin in (WithinWindow, MultipleWithinWindow):
110 |         st.register(plugin)
111 | 
112 |         result = st.get_array(run_id="some_run", targets="within_window")
113 |         expected = count_in_window(strax.endtime(input_peaks))
114 | 
115 |         assert len(expected) == len(input_peaks), "WTF??"
116 |         assert isinstance(result, np.ndarray), "Did not get an array"
117 |         assert len(result) == len(expected), "Result has wrong length"
118 |         np.testing.assert_equal(result["n_within_window"], expected, "Counting went wrong")
119 | 


--------------------------------------------------------------------------------
/strax/processing/data_reduction.py:
--------------------------------------------------------------------------------
  1 | """Functions to perform in-place pulse-level data reduction."""
  2 | 
  3 | import numpy as np
  4 | import numba
  5 | from enum import IntEnum
  6 | 
  7 | import strax
  8 | from strax.processing.pulse_processing import NO_RECORD_LINK, record_links
  9 | 
 10 | export, __all__ = strax.exporter()
 11 | 
 12 | 
 13 | @export
 14 | class ReductionLevel(IntEnum):
 15 |     """Identifies what type of data reduction has been used on a record."""
 16 | 
 17 |     # Record not modified
 18 |     NO_REDUCTION = 0
 19 |     # Samples near pulse start/end were removed
 20 |     BASELINE_CUT = 1
 21 |     # Samples far from a threshold excursion were removed
 22 |     HITS_ONLY = 2
 23 |     # The record has been replaced with a simpler waveform
 24 |     WAVEFORM_REPLACED = 3
 25 |     # The raw waveform has been deleted, only metadata survives
 26 |     METADATA_ONLY = 4
 27 | 
 28 | 
 29 | @export
 30 | @numba.njit(nogil=True, cache=True)
 31 | def cut_baseline(records, n_before=48, n_after=30):
 32 |     """Replace first n_before and last n_after samples of pulses by 0."""
 33 |     # records.data.shape[1] gives a numba error (file issue?)
 34 |     if not len(records):
 35 |         return
 36 |     samples_per_record = len(records[0]["data"])
 37 | 
 38 |     for d_i, d in enumerate(records):
 39 |         if d.record_i == 0:
 40 |             d.data[:n_before] = 0
 41 | 
 42 |         clear_from = d.pulse_length - n_after
 43 |         clear_from -= d.record_i.astype(np.int32) * samples_per_record
 44 |         clear_from = max(0, clear_from)
 45 |         if clear_from < samples_per_record:
 46 |             d.data[clear_from:] = 0
 47 |         d["reduction_level"] = ReductionLevel.BASELINE_CUT
 48 | 
 49 | 
 50 | @export
 51 | def cut_outside_hits(records, hits, left_extension=2, right_extension=15):
 52 |     """Return records with waveforms zeroed if not within left_extension or right_extension of hits.
 53 |     These extensions properly account for breaking of pulses into records.
 54 | 
 55 |     If you pass an incomplete (e.g. cut) set of records, we will not save data around hits found in
 56 |     the removed records, even if this stretches into records that you did pass.
 57 | 
 58 |     """
 59 |     if not len(records):
 60 |         return records
 61 | 
 62 |     # Create a copy of records with blanked data
 63 |     # Even a simple records.copy() is mightily slow in numba,
 64 |     # and assignments to struct arrays seem troublesome.
 65 |     # The obvious solution:
 66 |     #     new_recs = records.copy()
 67 |     #     new_recs['data'] = 0
 68 |     # is quite slow.
 69 |     # Replacing the last = with *= gives a factor 2 speed boost.
 70 |     # But ~40% faster still is this:
 71 |     meta_fields = [x for x in records.dtype.names if x not in ["data", "reduction_level"]]
 72 | 
 73 |     new_recs = np.zeros(len(records), dtype=records.dtype)
 74 |     new_recs[meta_fields] = records[meta_fields]
 75 |     new_recs["reduction_level"] = ReductionLevel.HITS_ONLY
 76 | 
 77 |     _cut_outside_hits(records, hits, new_recs, left_extension, right_extension)
 78 | 
 79 |     return new_recs
 80 | 
 81 | 
 82 | @numba.njit(nogil=True, cache=True)
 83 | def _cut_outside_hits(records, hits, new_recs, left_extension=2, right_extension=15):
 84 |     if not len(records):
 85 |         return
 86 |     samples_per_record = len(records[0]["data"])
 87 | 
 88 |     previous_record, next_record = record_links(records)
 89 | 
 90 |     for hit_i, h in enumerate(hits):
 91 |         rec_i = h["record_i"]
 92 |         r = records[rec_i]
 93 | 
 94 |         # Indices to keep, with 0 at the start of this record
 95 |         start_keep = h["left"] - left_extension
 96 |         end_keep = h["right"] + right_extension
 97 | 
 98 |         # Indices of samples to keep in this record
 99 |         (a, b), _ = strax.overlap_indices(0, r["length"], start_keep, end_keep - start_keep)
100 |         new_recs[rec_i]["data"][a:b] = records[rec_i]["data"][a:b]
101 | 
102 |         # Keep samples in previous record, if there was one
103 |         if start_keep < 0:
104 |             prev_ri = previous_record[rec_i]
105 |             if prev_ri != NO_RECORD_LINK:
106 |                 # Note start_keep is negative, so this keeps the
107 |                 # last few samples of the previous record
108 |                 a_prev = start_keep
109 |                 new_recs[prev_ri]["data"][a_prev:] = records[prev_ri]["data"][a_prev:]
110 | 
111 |         # Same for the next record, if there is one
112 |         if end_keep > samples_per_record:
113 |             next_ri = next_record[rec_i]
114 |             if next_ri != NO_RECORD_LINK:
115 |                 b_next = end_keep - samples_per_record
116 |                 new_recs[next_ri]["data"][:b_next] = records[next_ri]["data"][:b_next]
117 | 


--------------------------------------------------------------------------------
/tests/test_fixed_plugin_cache.py:
--------------------------------------------------------------------------------
  1 | from strax.testutils import Records, Peaks
  2 | import strax
  3 | import unittest
  4 | import numpy as np
  5 | 
  6 | 
  7 | class ChannelIsRunidRecords(Records):
  8 |     """Set the channel field equal to the run_id."""
  9 | 
 10 |     def compute(self, chunk_i):
 11 |         res = super().compute(chunk_i)
 12 |         res.data["channel"][:] = int(self.run_id)
 13 |         return res
 14 | 
 15 | 
 16 | class MaxChannelPeaks(Peaks):
 17 |     def infer_dtype(self):
 18 |         # We are going to check later that the infer_dtype is always called.
 19 |         dtype = strax.peak_dtype() + [(("PMT with median most records", "max_pmt"), np.int16)]
 20 |         self.dtype_is_set = True
 21 |         return dtype
 22 | 
 23 |     def compute(self, records):
 24 |         assert np.all(records["channel"] == int(self.run_id))
 25 |         res = super().compute(records)
 26 |         res["max_pmt"] = records["channel"].mean()
 27 |         return res
 28 | 
 29 | 
 30 | class TestContextFixedPluginCache(unittest.TestCase):
 31 |     """Test the _fixed_plugin_cache of a context."""
 32 | 
 33 |     def test_load_runs(self, n_runs=3, config_update=None, **kwargs):
 34 |         """Try loading data for n_runs to make sure that we are."""
 35 |         run_ids = [str(r) for r in range(n_runs)]
 36 |         st = self.get_context(use_per_run_defaults=False)
 37 |         if config_update is not None:
 38 |             st.set_context_config(config_update)
 39 |         data = st.get_array(run_ids, "records", **kwargs)
 40 |         run_id_channel_diff = data["run_id"].astype(np.int64) - data["channel"]
 41 |         assert np.all(run_id_channel_diff == 0)
 42 | 
 43 |         # To be sure also double check Peaks as self.deps of the Plugin
 44 |         # class should be correctly taken care of by the context.
 45 |         peaks_data = st.get_array(run_ids, "peaks")
 46 |         run_id_max_pmt_diff = peaks_data["max_pmt"] - peaks_data["run_id"].astype(np.int64)
 47 |         assert np.all(run_id_max_pmt_diff == 0)
 48 | 
 49 |     def test_get_plugin(self, n_runs=3):
 50 |         run_ids = [str(r) for r in range(n_runs)]
 51 |         st = self.get_context(use_per_run_defaults=False)
 52 |         plugins_seen = []
 53 |         for run in run_ids:
 54 |             p = st.get_single_plugin(run, "records")
 55 |             plugins_seen.append(p)
 56 |             assert p.run_id == run
 57 | 
 58 |         # If we passed around a reference instead of a copy of the
 59 |         # plugin, this would be a problem.
 60 |         for r_i, run in enumerate(run_ids):
 61 |             assert plugins_seen[r_i].run_id == run
 62 | 
 63 |     def test_load_runs_multicore(self):
 64 |         """Load the runs.
 65 | 
 66 |         If the references are mixed up the results are inconsistent
 67 | 
 68 |         """
 69 |         multicore_config = dict(
 70 |             allow_lazy=False,
 71 |             timeout=60,
 72 |             allow_multiprocess=True,
 73 |         )
 74 |         self.test_load_runs(n_runs=10, config_update=multicore_config, max_workers=10)
 75 | 
 76 |     def test_cache_changes(self):
 77 |         """
 78 |         Test that the _fixed_plugin_cache changes if we:
 79 |           - Change the config
 80 |           - Change the version of a plugin
 81 | 
 82 |         """
 83 |         st = self.get_context(use_per_run_defaults=False)
 84 | 
 85 |         # Compute the key/hash under which we will store the plugins
 86 |         first_key = st._context_hash()
 87 |         assert first_key is not None
 88 | 
 89 |         # Change the config triggers a new key
 90 |         st.set_config({"bla": 1})
 91 |         second_key = st._context_hash()
 92 | 
 93 |         # Change the version of a plugin triggers a new key
 94 |         st._plugin_class_registry["records"].__version__ = -1
 95 |         third_key = st._context_hash()
 96 | 
 97 |         assert first_key != second_key != third_key
 98 | 
 99 |     def test_set_dtype(self):
100 |         st = self.get_context(use_per_run_defaults=False)
101 | 
102 |         # Compute the key/hash under which we will store the plugins
103 |         st.key_for("0", "peaks")
104 |         assert st._fixed_plugin_cache[st._context_hash()]["peaks"].dtype_is_set
105 | 
106 |         # Now recreate for a new run
107 |         st.key_for("1", "peaks")
108 |         assert st._fixed_plugin_cache[st._context_hash()]["peaks"].dtype_is_set
109 | 
110 |     @staticmethod
111 |     def get_context(use_per_run_defaults: bool):
112 |         """Get simple context."""
113 |         st = strax.Context(
114 |             storage=[], register=(ChannelIsRunidRecords, MaxChannelPeaks), config=dict(bonus_area=1)
115 |         )
116 |         st.set_context_config({"use_per_run_defaults": use_per_run_defaults})
117 |         return st
118 | 


--------------------------------------------------------------------------------
/strax/storage/zipfiles.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import os.path as osp
  4 | import shutil
  5 | import zipfile
  6 | 
  7 | import strax
  8 | from .files import RUN_METADATA_PATTERN
  9 | 
 10 | export, __all__ = strax.exporter()
 11 | 
 12 | 
 13 | @export
 14 | class ZipDirectory(strax.StorageFrontend):
 15 |     """ZipFile-based storage frontend for strax.
 16 | 
 17 |     All data for one run is assumed to be in a single zip file <run_id>.zip, with the same
 18 |     file/directory structure as created by FileStore.
 19 | 
 20 |     We cannot write zip files directly (this would result in concurrency hell), instead these zip
 21 |     files are made by zipping stuff from FileSytemBackend.
 22 | 
 23 |     """
 24 | 
 25 |     storage_typ = strax.StorageType.COMPRESSED
 26 | 
 27 |     def __init__(self, path=".", *args, readonly=True, **kwargs):
 28 |         if not readonly:
 29 |             raise NotImplementedError("Zipfiles are currently read-only")
 30 |         super().__init__(*args, readonly=readonly, **kwargs)
 31 |         self.backends = [ZipFileBackend()]
 32 |         self.path = path
 33 |         if not osp.exists(path):
 34 |             os.makedirs(path)
 35 | 
 36 |     def _find(self, key, write, allow_incomplete, fuzzy_for, fuzzy_for_options):
 37 |         assert not write
 38 | 
 39 |         # Check exact match / write case
 40 |         bk = self._backend_key(key)
 41 |         with zipfile.ZipFile(self._zipname(key)) as zp:
 42 |             try:
 43 |                 dirname = str(key)
 44 |                 prefix = strax.dirname_to_prefix(dirname)
 45 |                 zp.getinfo(f"{dirname}/{RUN_METADATA_PATTERN % prefix}")
 46 |                 return bk
 47 |             except KeyError:
 48 |                 pass
 49 | 
 50 |             if not len(fuzzy_for) and not len(fuzzy_for_options):
 51 |                 raise strax.DataNotAvailable
 52 | 
 53 |         raise NotImplementedError("Fuzzy matching within zipfiles not yet implemented")
 54 | 
 55 |     def run_metadata(self, run_id):
 56 |         with zipfile.ZipFile(self._zipname(run_id)) as zp:
 57 |             try:
 58 |                 with zp.open(RUN_METADATA_PATTERN % run_id) as f:
 59 |                     return json.loads(f.read())
 60 |             except KeyError:
 61 |                 raise strax.RunMetadataNotAvailable
 62 | 
 63 |     def write_run_metadata(self, run_id, metadata):
 64 |         raise NotImplementedError("Zipfiles cannot write")
 65 | 
 66 |     def remove(self, key):
 67 |         raise NotImplementedError("Zipfiles cannot write")
 68 | 
 69 |     def _set_write_complete(self, key):
 70 |         raise NotImplementedError("Zipfiles cannot write")
 71 | 
 72 |     def _backend_key(self, key):
 73 |         return (self.backends[0].__class__.__name__, (self._zipname(key), str(key)))
 74 | 
 75 |     def _zipname(self, key):
 76 |         zipname = osp.join(self.path, key.run_id + ".zip")
 77 |         # Since we're never writing, this check can be here
 78 |         # is this a bad idea?
 79 |         if not osp.exists(zipname):
 80 |             raise strax.DataNotAvailable
 81 |         return zipname
 82 | 
 83 |     @staticmethod
 84 |     def zip_dir(input_dir, output_zipfile, delete=False):
 85 |         """Zips subdirectories of input_dir to output_zipfile (without compression).
 86 | 
 87 |         Travels into subdirectories, but not sub-subdirectories. Skips any other files in directory.
 88 |         :param delete: If True, delete original directories
 89 | 
 90 |         """
 91 |         with zipfile.ZipFile(output_zipfile, mode="w") as zp:
 92 |             for dirn in os.listdir(input_dir):
 93 |                 full_dirn = os.path.join(input_dir, dirn)
 94 |                 if not osp.isdir(full_dirn):
 95 |                     continue
 96 |                 for fn in os.listdir(full_dirn):
 97 |                     zp.write(os.path.join(full_dirn, fn), arcname=os.path.join(dirn, fn))
 98 |                 if delete:
 99 |                     shutil.rmtree(full_dirn)
100 | 
101 | 
102 | @export
103 | class ZipFileBackend(strax.StorageBackend):
104 |     def _read_chunk(self, zipn_and_dirn, chunk_info, dtype, compressor):
105 |         zipn, dirn = zipn_and_dirn
106 |         with zipfile.ZipFile(zipn) as zp:
107 |             with zp.open(dirn + "/" + chunk_info["filename"]) as f:
108 |                 return strax.load_file(f, dtype=dtype, compressor=compressor)
109 | 
110 |     def _get_metadata(self, zipn_and_dirn):
111 |         zipn, dirn = zipn_and_dirn
112 |         with zipfile.ZipFile(zipn) as zp:
113 |             prefix = strax.dirname_to_prefix(dirn)
114 |             with zp.open(f"{dirn}/{RUN_METADATA_PATTERN % prefix}") as f:
115 |                 return json.loads(f.read())
116 | 
117 |     def saver(self, *args, **kwargs):
118 |         raise NotImplementedError("Zipfiles cannot write")
119 | 


--------------------------------------------------------------------------------
/strax/plugins/cut_plugin.py:
--------------------------------------------------------------------------------
  1 | import inspect
  2 | import numpy as np
  3 | import strax
  4 | from .plugin import Plugin, SaveWhen
  5 | from .merge_only_plugin import MergeOnlyPlugin
  6 | 
  7 | export, __all__ = strax.exporter()
  8 | 
  9 | 
 10 | @export
 11 | class CutPlugin(Plugin):
 12 |     """Generate a plugin that provides a boolean for a given cut specified by 'cut_by'."""
 13 | 
 14 |     save_when = SaveWhen.TARGET
 15 | 
 16 |     def __init__(self):
 17 |         super().__init__()
 18 | 
 19 |         compute_pars = list(inspect.signature(self.cut_by).parameters.keys())
 20 |         if "chunk_i" in compute_pars:
 21 |             self.compute_takes_chunk_i = True
 22 |             del compute_pars[compute_pars.index("chunk_i")]
 23 |         if "start" in compute_pars:
 24 |             if "end" not in compute_pars:
 25 |                 raise ValueError(f"Compute of {self} takes start, so it should also take end.")
 26 |             self.compute_takes_start_end = True
 27 |             del compute_pars[compute_pars.index("start")]
 28 |             del compute_pars[compute_pars.index("end")]
 29 |         self.compute_pars = compute_pars
 30 | 
 31 |         _name = strax.camel_to_snake(self.__class__.__name__)
 32 |         if not hasattr(self, "provides"):
 33 |             self.provides = _name
 34 |         if not hasattr(self, "cut_name"):
 35 |             self.cut_name = _name
 36 |         if not hasattr(self, "cut_description"):
 37 |             _description = _name
 38 |             if "cut_" not in _description:
 39 |                 _description = "Cut by " + _description
 40 |             else:
 41 |                 _description = " ".join(_description.split("_"))
 42 |             self.cut_description = _description
 43 | 
 44 |     def infer_dtype(self):
 45 |         dtype = [(self.cut_name, bool, self.cut_description)]
 46 |         # Alternatively one could use time_dt_fields for low level plugins.
 47 |         dtype = strax.time_fields + dtype
 48 |         return dtype
 49 | 
 50 |     def compute(self, **kwargs):
 51 |         if hasattr(self, "cut_by"):
 52 |             cut_by = self.cut_by
 53 |         else:
 54 |             raise NotImplementedError(f"{self.cut_name} does not have attribute 'cut_by'")
 55 | 
 56 |         # Take shape of the first data_type like in strax.plugin
 57 |         buff = list(kwargs.values())[0]
 58 | 
 59 |         # Generate result buffer
 60 |         r = np.zeros(len(buff), self.dtype)
 61 |         r["time"] = buff["time"]
 62 |         r["endtime"] = strax.endtime(buff)
 63 |         r[self.cut_name] = cut_by(**kwargs)
 64 |         return r
 65 | 
 66 |     def cut_by(self, **kwargs):
 67 |         # This should be provided by the user making a CutPlugin
 68 |         raise NotImplementedError()
 69 | 
 70 | 
 71 | @export
 72 | class CutList(MergeOnlyPlugin):
 73 |     """Base class that merges all existing cuts into a single array which can be loaded by the
 74 |     analysts."""
 75 | 
 76 |     __version__ = "0.0.0"
 77 | 
 78 |     save_when = SaveWhen.TARGET
 79 |     cuts = ()
 80 |     # need to declare depends_on here to satisfy strax
 81 |     # https://github.com/AxFoundation/strax/blob/df18c9cef38ea1cee9737d56b1bea078ebb246a9/strax/plugin.py#L99
 82 |     depends_on = ()
 83 |     _depends_on = ()
 84 | 
 85 |     def infer_dtype(self):
 86 |         dtype = super().infer_dtype()
 87 |         dtype += [
 88 |             (
 89 |                 (
 90 |                     f"Boolean AND of all cuts in {self.accumulated_cuts_string}",
 91 |                     self.accumulated_cuts_string,
 92 |                 ),
 93 |                 bool,
 94 |             )
 95 |         ]
 96 |         return dtype
 97 | 
 98 |     def compute(self, **kwargs):
 99 |         cuts = super().compute(**kwargs)
100 |         cuts_joint = np.zeros(len(cuts), self.dtype)
101 |         strax.copy_to_buffer(
102 |             cuts, cuts_joint, f"_copy_cuts_{strax.deterministic_hash(self.depends_on)}"
103 |         )
104 |         cuts_joint[self.accumulated_cuts_string] = get_accumulated_bool(cuts)
105 |         return cuts_joint
106 | 
107 |     @property  # type: ignore
108 |     def depends_on(self):  # noqa
109 |         if not len(self._depends_on):
110 |             deps = []
111 |             for c in self.cuts:
112 |                 deps.extend(strax.to_str_tuple(c.provides))
113 |             self._depends_on = tuple(deps)
114 |         return self._depends_on
115 | 
116 |     @depends_on.setter
117 |     def depends_on(self, str_or_tuple):
118 |         self._depends_on = strax.to_str_tuple(str_or_tuple)
119 | 
120 | 
121 | @export
122 | def get_accumulated_bool(array):
123 |     """Computes accumulated boolean over all cuts.
124 | 
125 |     :param array: Array containing merged cuts.
126 | 
127 |     """
128 |     fields = array.dtype.names
129 |     fields = np.array([f for f in fields if f not in ("time", "endtime")])
130 | 
131 |     res = np.ones(len(array), bool)
132 |     for field in fields:
133 |         res &= array[field]
134 |     return res
135 | 


--------------------------------------------------------------------------------
/tests/test_mailbox.py:
--------------------------------------------------------------------------------
  1 | import concurrent.futures
  2 | import threading
  3 | import time
  4 | 
  5 | import numpy as np
  6 | import pytest
  7 | 
  8 | import strax
  9 | 
 10 | SHORT_TIMEOUT = 0.1
 11 | LONG_TIMEOUT = 5 * SHORT_TIMEOUT
 12 | 
 13 | 
 14 | def reader(source, reader_sleeps=0, name=""):
 15 |     result = []
 16 |     for x in source:
 17 |         print(f"Reader {name} got {x}, sleeping for {reader_sleeps}")
 18 |         time.sleep(reader_sleeps)
 19 |         print(f"Reader {name} awoke")
 20 |         result.append(x)
 21 |     return result
 22 | 
 23 | 
 24 | def mailbox_tester(
 25 |     messages,
 26 |     numbers=None,
 27 |     lazy=False,
 28 |     reader_sleeps=0.0,
 29 |     max_messages=100,
 30 |     expected_result=None,
 31 |     timeout=SHORT_TIMEOUT,
 32 |     result_timeout=LONG_TIMEOUT,
 33 | ):
 34 |     if numbers is None:
 35 |         numbers = np.arange(len(messages))
 36 |     if expected_result is None:
 37 |         messages = np.asarray(messages)
 38 |         expected_result = messages[strax.stable_argsort(numbers)]
 39 | 
 40 |     mb = strax.Mailbox(max_messages=max_messages, timeout=timeout, lazy=lazy)
 41 | 
 42 |     n_readers = 2
 43 | 
 44 |     with concurrent.futures.ThreadPoolExecutor() as tp:
 45 |         futures = [
 46 |             tp.submit(reader, source=mb.subscribe(), reader_sleeps=reader_sleeps)
 47 |             for _ in range(n_readers)
 48 |         ]
 49 | 
 50 |         for i, _ in enumerate(messages):
 51 |             mb.send(messages[i], msg_number=numbers[i])
 52 |             print(f"Sent message {i}. Now {len(mb._mailbox)} ms in mailbox.")
 53 | 
 54 |         mb.close()
 55 | 
 56 |         # Results must be equal
 57 |         for f in futures:
 58 |             np.testing.assert_equal(f.result(timeout=result_timeout), expected_result)
 59 | 
 60 | 
 61 | def test_highlevel():
 62 |     """Test highlevel mailbox API."""
 63 |     for lazy in [False, True]:
 64 |         n_threads_start = len(threading.enumerate())
 65 |         print(f"Lazy mode: {lazy}")
 66 | 
 67 |         mb = strax.Mailbox(lazy=lazy)
 68 |         mb.add_sender(iter(list(range(10))))
 69 | 
 70 |         def test_reader(source):
 71 |             test_reader.got = r = []
 72 |             for s in source:
 73 |                 r.append(s)
 74 | 
 75 |         mb.add_reader(test_reader)
 76 |         mb.start()
 77 |         time.sleep(SHORT_TIMEOUT)
 78 |         assert hasattr(test_reader, "got")
 79 |         assert test_reader.got == list(range(10))
 80 |         mb.cleanup()
 81 |         threads = [f"{t.name} is dead: {True ^ t.is_alive()}" for t in threading.enumerate()]
 82 |         assert (
 83 |             len(threads) == n_threads_start
 84 |         ), f"Not all threads died. \n Threads running are:{threads}"
 85 | 
 86 | 
 87 | def test_result_timeout():
 88 |     """Test that our mailbox tester actually times out.
 89 | 
 90 |     (if not, the other tests might hang indefinitely if something is broken)
 91 | 
 92 |     """
 93 |     with pytest.raises(concurrent.futures.TimeoutError):
 94 |         mailbox_tester([0, 1], numbers=[1, 2], timeout=2 * LONG_TIMEOUT)
 95 | 
 96 | 
 97 | def test_read_timeout():
 98 |     """Subscribers time out if we cannot read for too long."""
 99 |     with pytest.raises(strax.MailboxReadTimeout):
100 |         mailbox_tester([0, 1], numbers=[1, 2])
101 | 
102 | 
103 | def test_write_timeout():
104 |     """Writers time out if we cannot write for too long."""
105 |     with pytest.raises(strax.MailboxFullTimeout):
106 |         mailbox_tester([0, 1, 2, 3, 4], max_messages=1, reader_sleeps=LONG_TIMEOUT)
107 | 
108 | 
109 | def test_reversed():
110 |     """Mailbox sorts messages properly."""
111 |     mailbox_tester(np.arange(10), numbers=np.arange(10)[::-1])
112 | 
113 | 
114 | def test_deadlock_regression():
115 |     """A reader thread may start after the first message is processed."""
116 |     # Test cannot run in lazy mode, cannot send without active subscriber
117 |     mb = strax.Mailbox(timeout=SHORT_TIMEOUT)
118 |     mb.send(0)
119 | 
120 |     readers = [
121 |         threading.Thread(target=reader, kwargs=dict(source=mb.subscribe(), name=str(i)))
122 |         for i in range(2)
123 |     ]
124 |     readers[0].start()
125 |     time.sleep(SHORT_TIMEOUT)
126 | 
127 |     readers[1].start()
128 |     mb.send(1)
129 |     mb.close()
130 | 
131 |     for t in readers:
132 |         t.join(SHORT_TIMEOUT)
133 |         assert not t.is_alive()
134 | 
135 | 
136 | def test_close_protection():
137 |     """Cannot send messages to a closed mailbox."""
138 |     mb = strax.Mailbox()
139 |     mb.close()
140 |     with pytest.raises(strax.MailBoxAlreadyClosed):
141 |         mb.send(0)
142 | 
143 | 
144 | def test_valid_msg_number():
145 |     """Message numbers are non-negative integers."""
146 |     mb = strax.Mailbox()
147 |     with pytest.raises(strax.InvalidMessageNumber):
148 |         mb.send(0, msg_number=-1)
149 |     with pytest.raises(strax.InvalidMessageNumber):
150 |         mb.send(0, msg_number="???")
151 | 
152 | 
153 | # Task for in the next test, must be global since we're using ProcessPool
154 | # (which must pickle)
155 | def _task(i):
156 |     time.sleep(SHORT_TIMEOUT)
157 |     return i
158 | 
159 | 
160 | def test_futures():
161 |     """Mailbox awaits futures before passing them to readers."""
162 |     # Timeouts are longer for this example,
163 |     # since they involve creating subprocesses.
164 |     exc = concurrent.futures.ProcessPoolExecutor()
165 |     futures = [exc.submit(_task, i) for i in range(3)]
166 |     mailbox_tester(
167 |         futures,
168 |         expected_result=[0, 1, 2],
169 |         result_timeout=5 * LONG_TIMEOUT,
170 |         timeout=5 * LONG_TIMEOUT,
171 |     )
172 | 


--------------------------------------------------------------------------------
/docs/source/advanced/superrun.rst:
--------------------------------------------------------------------------------
  1 | Superruns
  2 | =========
  3 | 
  4 | Overview and motivation
  5 | ------------------------
  6 | A superrun is a run defined by (parts of) other runs, which are called 'subruns'.
  7 | Superrun names start with an underscore. Regular run names cannot start with an underscore.
  8 | 
  9 | Strax builds data for a superrun by loading (and potentially building) each of the subruns, then
 10 | slicing and concatenating them as necessary. In addition superruns can be stored to disk as a
 11 | rechunked representation of its subruns. This currently only works for static lineages e.g. without
 12 | default-by-run_id settings. Stored superruns have the advantage that loading data is much faster
 13 | and different data_types of the same kind can be combined.
 14 | 
 15 | Superruns are useful to track common groupings of data. For example:
 16 | 
 17 | * 'Minimum bias' runs, consisting only of low-energy events, events passing some cuts, DM-candidates, PMT flashes, or other thing of interest. The low-level data of these is much smaller than that of all the full runs, and can be brought to a local analysis facility, enabling on-site low-level waveform watching.
 18 | * Grouping similar runs. For example, shifters might group good runs from a week of calibration data with some source under a single name, e.g. ``_kr_feb2019``.
 19 | 
 20 | 
 21 | Superruns can be built from other superruns. Thus, _sr1_v0.2 could be built from
 22 | _background_january, _background_february, etc.
 23 | 
 24 | Defining superruns and making data:
 25 | -----------------------------------
 26 | Use the `define_run` context method to define a new superrun. Currently it is only supported to
 27 | define superruns from a list of run_ids:
 28 | 
 29 | 
 30 | .. code-block:: python
 31 | 
 32 |     st.define_run('_awesome_superrun', ['123', '124'])
 33 | 
 34 | 
 35 | From a dictionary of time range tuples. The times must be 64-bit integer UTC timestamps since the unix epoch:
 36 | 
 37 | .. code-block:: python
 38 | 
 39 |         st.define_run('_awesome_superrun', {
 40 |             '123': [(start, stop), (start, stop), ...],
 41 |             '124': [(start, stop), (start, stop), ...],})
 42 | 
 43 | From a dataframe (or record array) with strax data:
 44 | 
 45 | 
 46 | .. code-block:: python
 47 | 
 48 |     st.define_run('_awesome_superrun', events_df)
 49 |     st.define_run('_awesome_superrun', events_df, from_run='123')
 50 | 
 51 | In this case, the run will be made of the time ranges that correspond exactly to ``events_df``. If ```events_df`` already has a ``run_id`` field (e.g. because it consists of data from multiple runs), you do not need to pass `from_run`, it will be read off from the data.
 52 | 
 53 | It is up to the storage frontend to process your request for defining a run. As a normal user, you
 54 | generally only have permissions to create a new run in the `DataDirectory` (local files) storage
 55 | frontend, where runs are recorded in json files.
 56 | 
 57 | Making superrun data is as easy as creating any other data. Once a superrun is defined we can make
 58 | for example event_info via:
 59 | 
 60 | 
 61 | .. code-block:: python
 62 | 
 63 |     st.make('_awesome_superrun', 'event_info)
 64 | 
 65 | For bookkeeping each stored superrun chunk contains information of its constituents in a field
 66 | called subruns e.g.:
 67 | 
 68 | 
 69 | .. code-block:: python
 70 | 
 71 |     {'0': {'end': 10, 'start': 0},
 72 |      '1': {'end': 30, 'start': 20},
 73 |      '2': {'end': 50, 'start': 40}}
 74 | 
 75 | Where the keys represent the subrun_ids and start/end the start and end of the corresponding
 76 | first/last chunk included in the superrun chunk. The same information can also be found in the
 77 | metadata of the individual chunks:
 78 | 
 79 | .. code-block:: python
 80 | 
 81 |     {'chunk_i': 0,
 82 |      'end': 50,
 83 |      'filename': 'records-j3nd2fjbiq-000000',
 84 |      'filesize': 2343,
 85 |      'first_endtime': 1,
 86 |      'first_time': 0,
 87 |      'last_endtime': 50,
 88 |      'last_time': 49,
 89 |      'n': 300,
 90 |      'nbytes': 77100,
 91 |      'run_id': '_superrun_test',
 92 |      'start': 0,
 93 |      'subruns': {'0': {'end': 10, 'start': 0},
 94 |                  '1': {'end': 30, 'start': 20},
 95 |                  '2': {'end': 50, 'start': 40}}}
 96 | 
 97 | After creating data we can load the superrun as we are used to and combine it with other data_types
 98 | of the same kind too.
 99 | 
100 | To work more easily with superruns all chunks have also the properties `chunk.is_superun` as well as
101 | `chunk.first_subrun` and `chunk.last_subrun`.
102 | 
103 | If you wish to make/store a superrun you have to specify the context option:
104 | 
105 | 
106 | .. code-block:: python
107 | 
108 |     st.set_context_config({'write_superruns': True})
109 | 
110 | 
111 | Superruns follow the same saving rules (SaveWhen.TARGET, SaveWhen.EXPLICIT or SaveWhen.ALWAYS) as regular runs.
112 | 
113 | How superruns work
114 | --------------------
115 | 
116 | As mentioned above, strax builds data for superruns by slicing data of the subruns. Thus, peaks
117 | from a superrun come from the peaks of the subruns, which are built from their own records as usual.
118 | 
119 | Defaults for settings can be runid-dependent in strax, although this is not preferred any longer.
120 | If an option specifies ``default_per_run=[(run, setting), (run2, setting2)]``, then runs in between
121 | run and run2 will use setting, and runs after run2 ``setting2``. Superruns store a deterministic hash
122 | of this ``default_per_run`` specification for tracking purposes.
123 | 
124 | You cannot currently go directly from the superrun's records to the superrun's peaks. This would be
125 | tricky to implement, since (1) (2) even with the same settings, many plugins choose to do something
126 | different depending on the run_id. For example, in straxen the gain model is specified by a file,
127 | but which gains from the file are actually used is dependent on the runid.
128 | 
129 | Thus, superruns won't help build data faster, but they will speed up loading data after it has been
130 | built. This is important, because strax' overhead for loading a run is larger than hax, due to its
131 | version and option tracking (this is only true if per-run-default options are allowed).
132 | 


--------------------------------------------------------------------------------
/strax/plugins/loop_plugin.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import strax
  3 | from .plugin import Plugin
  4 | from immutabledict import immutabledict
  5 | from warnings import warn
  6 | 
  7 | export, __all__ = strax.exporter()
  8 | 
  9 | 
 10 | @export
 11 | class LoopPlugin(Plugin):
 12 |     """Plugin that disguises multi-kind data-iteration by an event loop."""
 13 | 
 14 |     # time_selection: Kind of time selection to apply:
 15 |     # - touching: select things that (partially) overlap with the range.
 16 |     # NB! Use this option with care since if e.g. two events are
 17 |     # adjacent, touching windows might return ambiguous results as peaks
 18 |     # may be touching both events.
 19 |     # The number of samples to be desired to overlapped can be set by
 20 |     # self.touching_window. Otherwise 0 is assumed (see strax.touching_windows)
 21 |     # - fully_contained: (default) select things fully contained in the range
 22 |     time_selection = "fully_contained"
 23 | 
 24 |     def compute(self, **kwargs):
 25 |         # If not otherwise specified, data kind to loop over
 26 |         # is that of the first dependency (e.g. events)
 27 |         # Can't be in __init__: deps not initialized then
 28 |         if hasattr(self, "loop_over"):
 29 |             loop_over = self.loop_over
 30 |         else:
 31 |             loop_over = self.deps[self.depends_on[0]].data_kind
 32 |         if not isinstance(loop_over, str):
 33 |             raise TypeError('Please add "loop_over = <base>" to your plugin definition')
 34 | 
 35 |         # Group into lists of things (e.g. peaks)
 36 |         # contained in the base things (e.g. events)
 37 |         base = kwargs[loop_over]
 38 |         if len(base) > 1:
 39 |             assert np.all(base[1:]["time"] >= strax.endtime(base[:-1])), f"{base}s overlap"
 40 | 
 41 |         for k, things in kwargs.items():
 42 |             # Check for sorting
 43 |             difs = np.diff(things["time"])
 44 |             if difs.min(initial=0) < 0:
 45 |                 i_bad = np.argmin(difs)
 46 |                 examples = things[i_bad - 1 : i_bad + 3]
 47 |                 t0 = examples["time"].min()
 48 |                 raise ValueError(
 49 |                     f"Expected {k} to be sorted, but found "
 50 |                     + str([(x["time"] - t0, strax.endtime(x) - t0) for x in examples])
 51 |                 )
 52 | 
 53 |             if k != loop_over:
 54 |                 if self.time_selection == "fully_contained":
 55 |                     r = strax.split_by_containment(things, base)
 56 |                 elif self.time_selection == "touching":
 57 |                     # Experimental feature that should be handled with care:
 58 |                     # github.com/AxFoundation/strax/pull/424
 59 |                     warn(
 60 |                         f"{self.__class__.__name__} has a touching time "
 61 |                         "selection. This may lead to ambiguous results as two "
 62 |                         f"{loop_over}'s may contain the same {k}, thereby a "
 63 |                         f"given {k} can be included multiple times."
 64 |                     )
 65 |                     window = 0
 66 |                     if hasattr(self, "touching_window"):
 67 |                         window = self.touching_window
 68 |                     r = strax.split_touching_windows(things, base, window=window)
 69 |                 else:
 70 |                     raise RuntimeError("Unknown time_selection")
 71 |                 if len(r) != len(base):
 72 |                     raise RuntimeError(f"Split {k} into {len(r)}, should be {len(base)}!")
 73 |                 kwargs[k] = r
 74 | 
 75 |         if self.multi_output:
 76 |             # This is the a-typical case. Most of the time you just have
 77 |             # one output. Just doing the same as below but this time we
 78 |             # need to create a dict for the outputs.
 79 |             # NB: both outputs will need to have the same length as the
 80 |             # base!
 81 |             results = {k: np.zeros(len(base), dtype=self.dtype[k]) for k in self.provides}
 82 |             deps_by_kind = self.dependencies_by_kind()
 83 | 
 84 |             for i, base_chunk in enumerate(base):
 85 |                 res = self.compute_loop(
 86 |                     base_chunk, **{k: kwargs[k][i] for k in deps_by_kind if k != loop_over}
 87 |                 )
 88 |                 if not isinstance(res, (dict, immutabledict)):
 89 |                     raise AttributeError("Please provide result in compute loop as dict")
 90 |                 # Convert from dict to array row:
 91 |                 for provides, r in res.items():
 92 |                     for k, v in r.items():
 93 |                         if np.shape(v) != np.shape(results[provides][i][k]):
 94 |                             # Make sure that the buffer length as
 95 |                             # defined by the base matches the output of
 96 |                             # the compute argument.
 97 |                             raise ValueError(
 98 |                                 f"{provides} returned an improper length array "
 99 |                                 f"that is not equal to the {loop_over} "
100 |                                 "data-kind! Are you sure a LoopPlugin is the "
101 |                                 "right Plugin for your application?"
102 |                             )
103 |                         results[provides][i][k] = v
104 |         else:
105 |             # Normally you end up here were we are going to loop over
106 |             # base and add the results to the right format.
107 |             results = np.zeros(len(base), dtype=self.dtype)
108 |             deps_by_kind = self.dependencies_by_kind()
109 | 
110 |             for i, base_chunk in enumerate(base):
111 |                 r = self.compute_loop(
112 |                     base_chunk, **{k: kwargs[k][i] for k in deps_by_kind if k != loop_over}
113 |                 )
114 |                 if not isinstance(r, (dict, immutabledict)):
115 |                     raise AttributeError("Please provide result in compute loop as dict")
116 |                 # Convert from dict to array row:
117 |                 for k, v in r.items():
118 |                     results[i][k] = v
119 |         return results
120 | 
121 |     def compute_loop(self, *args, **kwargs):
122 |         raise NotImplementedError
123 | 


--------------------------------------------------------------------------------
/strax/processing/statistics.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import numba
  3 | 
  4 | 
  5 | import strax
  6 | from strax.sort_enforcement import stable_argsort, stable_sort
  7 | 
  8 | export, __all__ = strax.exporter()
  9 | 
 10 | 
 11 | @export
 12 | @numba.njit(nogil=True, cache=True)
 13 | def _compute_hdr_core(data, fractions_desired, only_upper_part=False, _buffer_size=10):
 14 |     """Core computation for highest density region initialization."""
 15 |     fi = 0
 16 |     res = np.zeros((len(fractions_desired), 2, _buffer_size), dtype=np.int32)
 17 |     res_amp = np.zeros(len(fractions_desired), dtype=np.float32)
 18 | 
 19 |     area_tot = np.sum(data)
 20 |     if area_tot <= 0:
 21 |         raise ValueError(
 22 |             "Highest density regions are not defined for distributions "
 23 |             "with a total probability of less-equal 0."
 24 |         )
 25 | 
 26 |     max_to_min = stable_argsort(data)[::-1]
 27 |     return max_to_min, area_tot, res, res_amp, fi
 28 | 
 29 | 
 30 | @export
 31 | @numba.njit(nogil=True, cache=True)
 32 | def _process_intervals_numba(ind, gaps, fi, res, g0, _buffer_size):
 33 |     """Process intervals using numba.
 34 | 
 35 |     Args:
 36 |         ind: Sorted indices
 37 |         gaps: Gap indices
 38 |         fi: Current fraction index
 39 |         res: Result buffer
 40 |         g0: Start index
 41 |         _buffer_size: Maximum number of intervals
 42 | 
 43 |     Returns:
 44 |         tuple: (fi + 1, res) Updated fraction index and result buffer
 45 | 
 46 |     """
 47 |     if len(gaps) > _buffer_size:
 48 |         res[fi, 0, :] = -1
 49 |         res[fi, 1, :] = -1
 50 |         return fi + 1, res
 51 | 
 52 |     g_ind = -1
 53 |     for g_ind, g in enumerate(gaps):
 54 |         interval = ind[g0:g]
 55 |         res[fi, 0, g_ind] = interval[0]
 56 |         res[fi, 1, g_ind] = interval[-1] + 1
 57 |         g0 = g
 58 | 
 59 |     interval = ind[g0:]
 60 |     res[fi, 0, g_ind + 1] = interval[0]
 61 |     res[fi, 1, g_ind + 1] = interval[-1] + 1
 62 |     return fi + 1, res
 63 | 
 64 | 
 65 | @export
 66 | @numba.njit(nogil=True, cache=True)
 67 | def _compute_fraction_seen(data, max_to_min, j, lowest_sample_seen, area_tot, only_upper_part):
 68 |     """Compute fraction seen (numba-compilable part).
 69 | 
 70 |     Args:
 71 |         data: Input distribution
 72 |         max_to_min: Sorted indices from max to min
 73 |         j: Current index
 74 |         lowest_sample_seen: Current lowest sample
 75 |         area_tot: Total area
 76 |         only_upper_part: If True, only compute area between max and current height
 77 | 
 78 |     Returns:
 79 |         tuple: (fraction_seen, sorted_data_max_to_j, actual_lowest)
 80 | 
 81 |     """
 82 |     lowest_sample_seen *= int(only_upper_part)
 83 |     sorted_data_max_to_j = data[max_to_min[:j]]
 84 |     return (
 85 |         np.sum(sorted_data_max_to_j - lowest_sample_seen) / area_tot,
 86 |         sorted_data_max_to_j,
 87 |         lowest_sample_seen,
 88 |     )
 89 | 
 90 | 
 91 | @export
 92 | @numba.njit(nogil=True, cache=True)
 93 | def _compute_true_height(sorted_data_sum, j, g, lowest_sample_seen):
 94 |     """Compute true height (numba-compilable part).
 95 | 
 96 |     Args:
 97 |         sorted_data_sum: Sum of sorted data
 98 |         j: Current index
 99 |         g: Fraction ratio
100 |         lowest_sample_seen: Current lowest sample
101 | 
102 |     Returns:
103 |         float: True height value
104 | 
105 |     """
106 |     return (1 - g) * sorted_data_sum / j + g * lowest_sample_seen
107 | 
108 | 
109 | @export
110 | def highest_density_region(data, fractions_desired, only_upper_part=False, _buffer_size=10):
111 |     """Compute highest density region for a given sampled distribution.
112 | 
113 |     This function splits only the stable sort operation into Python, keeping all other
114 |     computations numba-accelerated for maximum performance.
115 | 
116 |     Args:
117 |         data: Sampled distribution
118 |         fractions_desired: Area/probability for which HDR should be computed
119 |         only_upper_part: If True, only compute area between max and current height
120 |         _buffer_size: Size of result buffer (max number of allowed intervals)
121 | 
122 |     Returns:
123 |         tuple: (res, res_amp) where res contains interval indices and res_amp contains
124 |                amplitudes for desired fractions
125 | 
126 |     """
127 |     # Initialize using numba
128 |     max_to_min, area_tot, res, res_amp, fi = _compute_hdr_core(
129 |         data, fractions_desired, only_upper_part, _buffer_size
130 |     )
131 | 
132 |     lowest_sample_seen = np.inf
133 |     for j in range(1, len(data)):
134 |         if lowest_sample_seen == data[max_to_min[j]]:
135 |             continue
136 | 
137 |         lowest_sample_seen = data[max_to_min[j]]
138 | 
139 |         # Compute fraction seen (numba)
140 |         fraction_seen, sorted_data_max_to_j, actual_lowest = _compute_fraction_seen(
141 |             data, max_to_min, j, lowest_sample_seen, area_tot, only_upper_part
142 |         )
143 | 
144 |         m = fractions_desired[fi:] <= fraction_seen
145 |         if not np.any(m):
146 |             continue
147 | 
148 |         for fraction_desired in fractions_desired[fi : fi + np.sum(m)]:
149 |             g = fraction_desired / fraction_seen
150 |             # Compute true height (numba)
151 |             true_height = _compute_true_height(np.sum(sorted_data_max_to_j), j, g, actual_lowest)
152 |             res_amp[fi] = true_height
153 | 
154 |             # Only stable_sort in Python mode
155 |             with numba.objmode(ind="int64[:]"):
156 |                 ind = stable_sort(max_to_min[:j])
157 | 
158 |             # Rest stays in numba mode
159 |             gaps = np.arange(1, len(ind) + 1)
160 |             diff = ind[1:] - ind[:-1]
161 |             gaps = gaps[:-1][diff > 1]
162 | 
163 |             # Process intervals with numba
164 |             fi, res = _process_intervals_numba(ind, gaps, fi, res, 0, _buffer_size)
165 | 
166 |         if fi == len(fractions_desired):
167 |             return res, res_amp
168 | 
169 |     # Handle remaining fractions (in numba)
170 |     res[fi:, 0, 0] = 0
171 |     res[fi:, 1, 0] = len(data)
172 |     for ind, fraction_desired in enumerate(fractions_desired[fi:]):
173 |         res_amp[fi + ind] = (1 - fraction_desired) * np.sum(data) / len(data)
174 | 
175 |     return res, res_amp
176 | 


--------------------------------------------------------------------------------
/strax/processing/peak_properties.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import numba
  3 | 
  4 | import strax
  5 | 
  6 | export, __all__ = strax.exporter()
  7 | 
  8 | 
  9 | @export
 10 | @numba.njit(cache=True, nogil=True)
 11 | def index_of_fraction(peaks, fractions_desired):
 12 |     """Return the (fractional) indices at which the peaks reach fractions_desired of their area.
 13 | 
 14 |     :param peaks: strax peak(let)s or other data-bearing dtype
 15 |     :param fractions_desired: array of floats between 0 and 1
 16 |     :return: (len(peaks), len(fractions_desired)) array of floats
 17 | 
 18 |     """
 19 |     results = np.zeros((len(peaks), len(fractions_desired)), dtype=np.float32)
 20 | 
 21 |     for p_i, p in enumerate(peaks):
 22 |         if p["area"] <= 0:
 23 |             continue  # TODO: These occur a lot. Investigate!
 24 |         compute_index_of_fraction(p, fractions_desired, results[p_i])
 25 |     return results
 26 | 
 27 | 
 28 | @export
 29 | @numba.njit(nogil=True, cache=True)
 30 | def compute_index_of_fraction(peak, fractions_desired, result):
 31 |     """Store the (fractional) indices at which peak reaches fractions_desired of their area in
 32 |     result.
 33 | 
 34 |     :param peak: single strax peak(let) or other data-bearing dtype
 35 |     :param fractions_desired: array of floats between 0 and 1
 36 |     :return: len(fractions_desired) array of floats
 37 | 
 38 |     """
 39 |     area_tot = peak["area"]
 40 |     fraction_seen = 0
 41 |     current_fraction_index = 0
 42 |     needed_fraction = fractions_desired[current_fraction_index]
 43 |     for i, x in enumerate(peak["data"][: peak["length"]]):
 44 |         # How much of the area is in this sample?
 45 |         fraction_this_sample = x / area_tot
 46 | 
 47 |         # Are we passing any desired fractions in this sample?
 48 |         while fraction_seen + fraction_this_sample >= needed_fraction:
 49 |             area_needed = area_tot * (needed_fraction - fraction_seen)
 50 |             if x != 0:
 51 |                 result[current_fraction_index] = i + area_needed / x
 52 |             else:
 53 |                 result[current_fraction_index] = i
 54 | 
 55 |             # Advance to the next fraction
 56 |             current_fraction_index += 1
 57 |             if current_fraction_index > len(fractions_desired) - 1:
 58 |                 break
 59 |             needed_fraction = fractions_desired[current_fraction_index]
 60 | 
 61 |         if current_fraction_index > len(fractions_desired) - 1:
 62 |             break
 63 | 
 64 |         # Add this sample's area to the area seen
 65 |         fraction_seen += fraction_this_sample
 66 | 
 67 |     if needed_fraction == 1:
 68 |         # Sometimes floating-point errors prevent the full area
 69 |         # from being reached before the waveform ends
 70 |         result[-1] = peak["length"]
 71 | 
 72 | 
 73 | @export
 74 | def compute_widths(peaks):
 75 |     """Compute widths in ns at desired area fractions for peaks.
 76 | 
 77 |     :param peaks: single strax peak(let) or other data-bearing dtype
 78 | 
 79 |     """
 80 | 
 81 |     desired_widths = np.linspace(0, 1, len(peaks[0]["width"]))
 82 |     # 0% are width is 0 by definition, and it messes up the calculation below
 83 |     desired_widths = desired_widths[1:]
 84 | 
 85 |     # Which area fractions do we need times for?
 86 |     desired_fr = np.concatenate([0.5 - desired_widths / 2, 0.5 + desired_widths / 2])
 87 | 
 88 |     # We lose the 50% fraction with this operation, let's add it back
 89 |     desired_fr = strax.stable_sort(np.unique(np.append(desired_fr, [0.5])))
 90 | 
 91 |     fr_times = index_of_fraction(peaks, desired_fr)
 92 |     fr_times *= peaks["dt"].reshape(-1, 1)
 93 | 
 94 |     i = len(desired_fr) // 2
 95 |     median_time = fr_times[:, i]
 96 |     width = fr_times[:, i:] - fr_times[:, ::-1][:, i:]
 97 |     area_decile_from_midpoint = fr_times[:, ::2] - fr_times[:, i].reshape(-1, 1)
 98 |     return median_time, width, area_decile_from_midpoint
 99 | 
100 | 
101 | @numba.njit(cache=True, nogil=True)
102 | def compute_center_time(peaks):
103 |     """Compute the center time of the peaks.
104 | 
105 |     :param peaks: single strax peak(let) or other data-bearing dtype
106 | 
107 |     """
108 |     center_time = np.zeros(len(peaks), dtype=np.int64)
109 |     for p_i, p in enumerate(peaks):
110 |         data = p["data"][: p["length"]]
111 |         if data.sum() == 0.0:
112 |             # Zero-area peaks have centertime at startime
113 |             center_time[p_i] = p["time"]
114 |             continue
115 |         t = np.average(np.arange(p["length"]), weights=data)
116 |         center_time[p_i] = (t + 1 / 2) * p["dt"]
117 |         center_time[p_i] += p["time"]  # converting from float to int, implicit floor
118 |     center_time = np.clip(center_time, peaks["time"], strax.endtime(peaks))
119 |     return center_time
120 | 
121 | 
122 | @export
123 | @numba.njit(cache=True, nogil=True)
124 | def compute_area_fraction_top(peaks, n_top_channels):
125 |     """Compute the area fraction top for peaks."""
126 |     area_fraction_top = np.zeros(len(peaks), dtype=np.float32)
127 |     for peak_i in range(len(peaks)):
128 |         p = peaks[peak_i]
129 |         area_top = p["area_per_channel"][:n_top_channels].sum()
130 |         # Non-positive-area peaks get NaN AFT
131 |         if p["area"] > 0:
132 |             area_fraction_top[peak_i] = area_top / p["area"]
133 |         else:
134 |             area_fraction_top[peak_i] = np.nan
135 |     return area_fraction_top
136 | 
137 | 
138 | @export
139 | def compute_properties(peaks, n_top_channels=0, select_peaks_indices=None):
140 |     """Compute properties: median_time, width, area_decile_from_midpoint,
141 |     center_time, and area_fraction_top for peaks.
142 | 
143 |     :param peaks: single strax peak(let) or other data-bearing dtype
144 |     :param select_peaks_indices: array of integers informing which peaks to compute default to None
145 |         in which case compute for all peaks
146 | 
147 |     """
148 |     if not len(peaks) or (select_peaks_indices is not None and not len(select_peaks_indices)):
149 |         return
150 | 
151 |     if select_peaks_indices is None:
152 |         select_peaks_indices = slice(None)
153 | 
154 |     median_time, width, area_decile_from_midpoint = compute_widths(peaks[select_peaks_indices])
155 |     peaks["median_time"][select_peaks_indices] = median_time
156 |     peaks["width"][select_peaks_indices] = width
157 |     peaks["area_decile_from_midpoint"][select_peaks_indices] = area_decile_from_midpoint
158 | 
159 |     center_time = compute_center_time(peaks[select_peaks_indices])
160 |     peaks["center_time"][select_peaks_indices] = center_time
161 | 
162 |     if n_top_channels > 0:
163 |         area_fraction_top = compute_area_fraction_top(peaks[select_peaks_indices], n_top_channels)
164 |         peaks["area_fraction_top"][select_peaks_indices] = area_fraction_top
165 | 


--------------------------------------------------------------------------------
/tests/test_peak_splitting.py:
--------------------------------------------------------------------------------
  1 | import strax
  2 | import numpy as np
  3 | from hypothesis import given, settings, strategies
  4 | 
  5 | 
  6 | def get_int_array(min_value=0, max_value=1, min_size=0, max_size=20) -> strategies.lists:
  7 |     """Get array with ints.
  8 | 
  9 |     :param min_value: min value of items in array
 10 |     :param max_value: max value of items in array
 11 |     :param min_size: min number of samples in array
 12 |     :param max_size: max number of samples in array
 13 |     :return: strategies.lists of integers of specified format
 14 | 
 15 |     """
 16 |     return strategies.lists(
 17 |         strategies.integers(min_value=min_value, max_value=max_value),
 18 |         min_size=min_size,
 19 |         max_size=max_size,
 20 |     )
 21 | 
 22 | 
 23 | def get_float_array(min_value=0, max_value=1, min_size=0, max_size=20):
 24 |     """Get array with floats.
 25 | 
 26 |     :param min_value: min value of items in array
 27 |     :param max_value: max value of items in array
 28 |     :param min_size: min number of samples in array
 29 |     :param max_size: max number of samples in array
 30 |     :return: strategies.lists of floats of specified format
 31 | 
 32 |     """
 33 |     return strategies.lists(
 34 |         strategies.floats(min_value=min_value, max_value=max_value),
 35 |         min_size=min_size,
 36 |         max_size=max_size,
 37 |     )
 38 | 
 39 | 
 40 | @given(
 41 |     get_float_array(),
 42 |     get_int_array(max_value=100),
 43 |     get_float_array(min_size=20, max_size=150, max_value=100),
 44 | )
 45 | @settings(deadline=None)
 46 | def test_local_minimum(min_heights, min_ratios, w):
 47 |     """See _test_splitter_inner."""
 48 |     _test_splitter_inner(min_heights, min_ratios, w, "natural_breaks")
 49 | 
 50 | 
 51 | @given(
 52 |     get_float_array(),
 53 |     get_int_array(max_value=100),
 54 |     get_float_array(min_size=20, max_size=150, max_value=100),
 55 | )
 56 | @settings(deadline=None)
 57 | def test_natural_breaks(min_heights, min_ratios, w):
 58 |     """See _test_splitter_inner."""
 59 |     _test_splitter_inner(min_heights, min_ratios, w, "local_minimum")
 60 | 
 61 | 
 62 | def _test_splitter_inner(min_heights, min_ratios, waveform, splitter):
 63 |     """Test the specified splitting algorithm.
 64 | 
 65 |     :param min_heights: list of the minimum heights of the peaks to have a split
 66 |     :param min_ratios: list of the ratios of the peaks to have a split
 67 |     :param waveform: list (will be converted to array) of
 68 |     :param splitter: either 'local_minimum' or 'natural_breaks'
 69 | 
 70 |     """
 71 |     test_splitter = {
 72 |         "local_minimum": strax.processing.peak_splitting.LocalMinimumSplitter(),
 73 |         "natural_breaks": strax.processing.peak_splitting.NaturalBreaksSplitter(),
 74 |     }.get(splitter, None)
 75 |     print(f"Testing {splitter}")
 76 |     if test_splitter is None:
 77 |         raise NotImplementedError(f"Unknown splitter {splitter}")
 78 | 
 79 |     NO_MORE_SPLITS = strax.processing.peak_splitting.NO_MORE_SPLITS
 80 | 
 81 |     # mimick a peak
 82 |     waveform = np.array(waveform)
 83 | 
 84 |     for min_height, min_ratio in zip(min_heights, min_ratios):
 85 |         # Split according to the different splitters
 86 |         if splitter == "local_minimum":
 87 |             my_splits = test_splitter.find_split_points(
 88 |                 waveform, dt=None, peak_i=None, min_height=min_height, min_ratio=min_ratio
 89 |             )
 90 |         elif splitter == "natural_breaks":
 91 |             # Use min-height here as threshold (>1 meaningless)
 92 |             threshold = np.array([min_height])
 93 |             my_splits = test_splitter.find_split_points(
 94 |                 waveform,
 95 |                 dt=1,
 96 |                 peak_i=np.int64(0),
 97 |                 threshold=threshold,
 98 |                 normalize=0,
 99 |                 split_low=0,
100 |                 filter_wing_width=0,
101 |             )
102 | 
103 |         my_splits = np.array(list(my_splits))
104 | 
105 |         assert len(my_splits) >= 1
106 |         # get left and right from found splits
107 |         split_checks = [(int(split - 1), int(split + 1), int(split)) for split in my_splits[:, 0]]
108 | 
109 |         # discard last two split-entries if they exist
110 |         # they are len(w) and NO_MORE_SPLITS --> nothing to test
111 |         split_checks = split_checks[:-2]
112 | 
113 |         # This test does not have to work for the natural breaks
114 |         # algorithm as we use a moving average
115 |         if test_splitter == "local_minimum":
116 |             # check if left and right from split index value is bigger or equal
117 |             for left, right, split in split_checks:
118 |                 assert waveform[left] >= waveform[split]
119 |                 assert waveform[right] >= waveform[split]
120 | 
121 |         assert len(my_splits) <= int(len(waveform) / 2) + 1
122 |         assert min(my_splits[:, 0]) == NO_MORE_SPLITS
123 |         assert my_splits[-1, 0] == NO_MORE_SPLITS
124 | 
125 | 
126 | def test_splitter_outer():
127 |     data = [0, 2, 2, 0, 2, 2, 1]
128 |     records = np.zeros(1, dtype=strax.record_dtype(len(data)))
129 |     records["dt"] = 1
130 |     records["data"] = data
131 |     records["length"] = len(data)
132 |     records["pulse_length"] = len(data)
133 |     to_pe = np.ones(10)
134 | 
135 |     hits = strax.find_hits(records, np.ones(1))
136 |     hits["left_integration"] = hits["left"]
137 |     hits["right_integration"] = hits["right"]
138 |     peaks = np.zeros(1, dtype=strax.peak_dtype())
139 |     hitlets = np.zeros(1, dtype=strax.hitlet_with_data_dtype(10))
140 |     for data_type in (peaks, hitlets):
141 |         data_type["dt"] = 1
142 |         data_type["data"][0, : len(data)] = data
143 |         data_type["length"] = len(data)
144 | 
145 |     rlinks = strax.record_links(records)
146 |     peaks = strax.split_peaks(
147 |         peaks,
148 |         hits,
149 |         records,
150 |         rlinks,
151 |         to_pe,
152 |         algorithm="local_minimum",
153 |         data_type="peaks",
154 |         min_height=1,
155 |         min_ratio=0,
156 |     )
157 | 
158 |     hitlets = strax.split_peaks(
159 |         hitlets,
160 |         hits,
161 |         records,
162 |         rlinks,
163 |         to_pe,
164 |         algorithm="local_minimum",
165 |         data_type="hitlets",
166 |         min_height=1,
167 |         min_ratio=0,
168 |     )
169 | 
170 |     for name, data_type in zip(("peaks", "hitlets"), (peaks, hitlets)):
171 |         data = data_type[0]["data"][: data_type[0]["length"]]
172 |         assert np.all(
173 |             data == [0, 2, 2]
174 |         ), f"Wrong split for {name}, got {data}, expected {[0, 2, 2]}."
175 |         data = data_type[1]["data"][: data_type[1]["length"]]
176 |         assert np.all(
177 |             data == [0, 2, 2, 1]
178 |         ), f"Wrong split for {name}, got {data}, expected {[0, 2, 2, 1]}."
179 | 


--------------------------------------------------------------------------------
/strax/plugins/overlap_window_plugin.py:
--------------------------------------------------------------------------------
  1 | import strax
  2 | from .plugin import Plugin
  3 | 
  4 | export, __all__ = strax.exporter()
  5 | 
  6 | 
  7 | @export
  8 | class OverlapWindowPlugin(Plugin):
  9 |     """Plugin whose computation depends on having its inputs extend a certain window on both sides.
 10 | 
 11 |     Current implementation assumes:
 12 |     - All inputs are sorted by *endtime*. Since everything in strax is sorted
 13 |     by time, this only works for disjoint intervals such as peaks or events,
 14 |     but NOT records!
 15 |     - You must read time info for your data kind, or create a new data kind.
 16 | 
 17 |     """
 18 | 
 19 |     parallel = False
 20 |     max_trials = 10
 21 | 
 22 |     def __init__(self):
 23 |         super().__init__()
 24 |         self.cached_input = {}
 25 |         self.init_cached_results()
 26 |         self.sent_until = 0
 27 |         if self.clean_chunk_after_compute:
 28 |             raise ValueError(
 29 |                 "OverlapWindowPlugin cannot clean chunks after compute because you need them later."
 30 |             )
 31 |         # This guy can have a logger, it's not parallelized anyway
 32 | 
 33 |     def get_window_size(self):
 34 |         """Return the required window size in nanoseconds."""
 35 |         raise NotImplementedError
 36 | 
 37 |     def _get_window_size(self):
 38 |         window_size = self.get_window_size()
 39 |         if isinstance(window_size, (int, float)):
 40 |             return window_size, window_size
 41 |         elif isinstance(window_size, (list, tuple)) and len(window_size) == 2:
 42 |             if window_size[0] < 0 or window_size[1] < 0:
 43 |                 raise ValueError("Window size elements must be non-negative")
 44 |             return window_size
 45 |         else:
 46 |             raise ValueError(
 47 |                 "Window size must be an integer(float) or a tuple of two integer(float)s"
 48 |             )
 49 | 
 50 |     def init_cached_results(self):
 51 |         if self.multi_output:
 52 |             self.cached_results = {}
 53 |         else:
 54 |             self.cached_results = None
 55 | 
 56 |     def iter(self, iters, executor=None):
 57 |         yield from super().iter(iters, executor=executor)
 58 | 
 59 |         # Yield final results, kept at bay in fear of a new chunk
 60 |         yield self.cached_results
 61 | 
 62 |     def do_compute(self, chunk_i=None, **kwargs):
 63 |         if not len(kwargs):
 64 |             raise RuntimeError("OverlapWindowPlugin must have a dependency")
 65 | 
 66 |         # Add cached inputs to compute arguments
 67 |         for data_kind, chunk in kwargs.items():
 68 |             if len(self.cached_input):
 69 |                 kwargs[data_kind] = strax.Chunk.concatenate(
 70 |                     [self.cached_input[data_kind], chunk], self.allow_superrun
 71 |                 )
 72 | 
 73 |         # When does this batch of inputs end?
 74 |         ends = [c.end for c in kwargs.values()]
 75 |         if not len(set(ends)) == 1:
 76 |             raise RuntimeError(f"OverlapWindowPlugin got incongruent inputs: {kwargs}")
 77 |         end = ends[0]
 78 | 
 79 |         window_size = self._get_window_size()
 80 |         # When can we no longer trust our results?
 81 |         # Take slightly larger windows for safety: it is very easy for me
 82 |         # (or the user) to have made an off-by-one error
 83 |         invalid_beyond = int(end - 2 * window_size[1] - 1)
 84 | 
 85 |         # Compute new results
 86 |         result = super().do_compute(chunk_i=chunk_i, **kwargs)
 87 | 
 88 |         # Throw away results we already sent out
 89 |         # no error here though allow_early_split=False,
 90 |         # because result.split(t=invalid_beyond, allow_early_split=True) tunes the
 91 |         # sent_until to be not overlapping with result and
 92 |         # sent_until <= invalid_beyond
 93 |         if self.multi_output:
 94 |             # when multi_output=True, the result is a dict
 95 |             for data_type in result:
 96 |                 result[data_type] = result[data_type].split(
 97 |                     t=self.sent_until, allow_early_split=False
 98 |                 )[1]
 99 |         else:
100 |             result = result.split(t=self.sent_until, allow_early_split=False)[1]
101 | 
102 |         # Prepare to send out valid results, cache the rest
103 |         # Do not modify result anymore after these lines
104 |         # Note result.end <= invalid_beyond, with equality if there are no overlaps
105 |         if self.multi_output:
106 |             prev_split = self.cache_beyond(result, invalid_beyond, self.cached_results)
107 |             for data_type in result:
108 |                 result[data_type], self.cached_results[data_type] = result[data_type].split(
109 |                     t=prev_split, allow_early_split=True
110 |                 )
111 |             if len(set([c.start for c in self.cached_results.values()])) != 1:
112 |                 raise ValueError("Output start time inconsistency has not been resolved?")
113 |             self.sent_until = prev_split
114 |         else:
115 |             result, self.cached_results = result.split(t=invalid_beyond, allow_early_split=True)
116 |             self.sent_until = self.cached_results.start
117 | 
118 |         # Cache a necessary amount of input for next time
119 |         # Again, take a bit of overkill for good measure
120 |         # cache_inputs_beyond is smaller than sent_until
121 |         cache_inputs_beyond = int(self.sent_until - 2 * window_size[0] - 1)
122 | 
123 |         # Cache inputs, make sure that the chunks start at the same time to
124 |         # prevent issues in input buffers later on
125 |         self.cache_beyond(kwargs, cache_inputs_beyond, self.cached_input)
126 |         return result
127 | 
128 |     def cache_beyond(self, io, prev_split, cached):
129 |         original_prev_split = prev_split
130 |         for try_counter in range(self.max_trials):
131 |             for data, chunk in io.items():
132 |                 # data here can not either data_kind or data_type
133 |                 # do not temporarily modify result here because it will be used later
134 |                 # keep its original value!
135 |                 cached[data] = chunk.split(t=prev_split, allow_early_split=True)[1]
136 |                 prev_split = cached[data].start
137 |             unique_starts = set([c.start for c in cached.values()])
138 |             if len(unique_starts) == 1:
139 |                 self.log.debug(
140 |                     f"Success after {try_counter}. "
141 |                     f"Extra time is {original_prev_split - prev_split} ns"
142 |                 )
143 |                 break
144 |             else:
145 |                 self.log.debug(
146 |                     "Inconsistent start times of the cashed chunks {io} after"
147 |                     f" {try_counter}/{self.max_trials} passes."
148 |                 )
149 |         else:
150 |             raise ValueError(
151 |                 f"Buffer start time inconsistency cannot be resolved after {self.max_trials} tries"
152 |             )
153 |         return prev_split
154 | 


--------------------------------------------------------------------------------
/strax/io.py:
--------------------------------------------------------------------------------
  1 | """Read/write numpy arrays to/from compressed files or file-like objects."""
  2 | 
  3 | import os
  4 | import bz2
  5 | import json
  6 | 
  7 | import numpy as np
  8 | import blosc
  9 | import zstd
 10 | import zstandard
 11 | import lz4.frame as lz4
 12 | from ast import literal_eval
 13 | 
 14 | import strax
 15 | from strax import RUN_METADATA_PATTERN
 16 | 
 17 | export, __all__ = strax.exporter()
 18 | __all__.extend(["DECOMPRESS_BUFFER_SIZE"])
 19 | 
 20 | DECOMPRESS_BUFFER_SIZE = 64 * 1024 * 1024  # 64 MB
 21 | 
 22 | # use tqdm as loaded in utils (from tqdm.notebook when in a jupyter env)
 23 | tqdm = strax.utils.tqdm
 24 | 
 25 | blosc.set_releasegil(True)
 26 | blosc.set_nthreads(1)
 27 | 
 28 | 
 29 | def _bz2_decompress(f, buffer_size=DECOMPRESS_BUFFER_SIZE):
 30 |     decompressor = bz2.BZ2Decompressor()
 31 |     data = bytearray()  # Efficient mutable storage
 32 |     for d in iter(lambda: f.read(buffer_size), b""):
 33 |         data.extend(decompressor.decompress(d))
 34 |     return data
 35 | 
 36 | 
 37 | # zstd's default compression level is 3:
 38 | # https://github.com/sergey-dryabzhinsky/python-zstd/blob/eba9e633e0bc0e9c9762c985d0433e08405fd097/src/python-zstd.h#L53
 39 | # we also need to constraint the number of worker threads to 1
 40 | # https://github.com/sergey-dryabzhinsky/python-zstd/blob/eba9e633e0bc0e9c9762c985d0433e08405fd097/src/python-zstd.h#L98
 41 | _zstd_compress = lambda data: zstd.compress(data, 3, 1)
 42 | 
 43 | 
 44 | def _zstd_decompress(f, chunk_size=64 * 1024 * 1024):
 45 |     decompressor = zstandard.ZstdDecompressor().decompressobj()
 46 |     data = bytearray()  # Efficient mutable storage
 47 |     for d in iter(lambda: f.read(chunk_size), b""):
 48 |         data.extend(decompressor.decompress(d))
 49 |     return data
 50 | 
 51 | 
 52 | def _blosc_compress(data):
 53 |     if data.nbytes >= blosc.MAX_BUFFERSIZE:
 54 |         raise ValueError("Blosc's input buffer cannot exceed ~2 GB")
 55 |     return blosc.compress(data, shuffle=False)
 56 | 
 57 | 
 58 | def _blosc_decompress(f):
 59 |     data = f.read()
 60 |     data = blosc.decompress(data)
 61 |     return data
 62 | 
 63 | 
 64 | def _lz4_decompress(f, buffer_size=DECOMPRESS_BUFFER_SIZE):
 65 |     decompressor = lz4.LZ4FrameDecompressor()
 66 |     data = bytearray()  # Efficient mutable storage
 67 |     for d in iter(lambda: f.read(buffer_size), b""):
 68 |         data.extend(decompressor.decompress(d))
 69 |     return data
 70 | 
 71 | 
 72 | COMPRESSORS = dict(
 73 |     bz2=dict(compress=bz2.compress, decompress=bz2.decompress, _decompress=_bz2_decompress),
 74 |     zstd=dict(compress=_zstd_compress, decompress=zstd.decompress, _decompress=_zstd_decompress),
 75 |     blosc=dict(
 76 |         compress=_blosc_compress, decompress=blosc.decompress, _decompress=_blosc_decompress
 77 |     ),
 78 |     lz4=dict(compress=lz4.compress, decompress=lz4.decompress, _decompress=_lz4_decompress),
 79 | )
 80 | 
 81 | 
 82 | @export
 83 | def load_file(f, compressor, dtype):
 84 |     """Read and return data from file.
 85 | 
 86 |     :param f: file name or handle to read from
 87 |     :param compressor: compressor to use for decompressing. If not passed, will try to load it from
 88 |         json metadata file.
 89 |     :param dtype: numpy dtype of data to load
 90 | 
 91 |     """
 92 |     if isinstance(f, str):
 93 |         with open(f, mode="rb") as write_file:
 94 |             return _load_file(write_file, compressor, dtype)
 95 |     else:
 96 |         return _load_file(f, compressor, dtype)
 97 | 
 98 | 
 99 | def _load_file(f, compressor, dtype):
100 |     try:
101 |         data = COMPRESSORS[compressor]["_decompress"](f)
102 |         if not len(data):
103 |             return np.zeros(0, dtype=dtype)
104 |         try:
105 |             return np.frombuffer(data, dtype=dtype)
106 |         except ValueError as e:
107 |             raise ValueError(f"ValueError while loading data with dtype =\n\t{dtype}") from e
108 | 
109 |     except Exception:
110 |         raise strax.DataCorrupted(
111 |             f"Fatal Error while reading file {f}: " + strax.utils.formatted_exception()
112 |         )
113 | 
114 | 
115 | @export
116 | def save_file(f, data, compressor="zstd"):
117 |     """Save data to file and return number of bytes written.
118 | 
119 |     :param f: file name or handle to save to
120 |     :param data: data (numpy array) to save
121 |     :param compressor: compressor to use
122 | 
123 |     """
124 |     if isinstance(f, str):
125 |         final_fn = f
126 |         temp_fn = f + "_temp"
127 |         with open(temp_fn, mode="wb") as write_file:
128 |             result = _save_file(write_file, data, compressor)
129 |         os.rename(temp_fn, final_fn)
130 |         return result
131 |     else:
132 |         return _save_file(f, data, compressor)
133 | 
134 | 
135 | def _save_file(f, data, compressor="zstd"):
136 |     assert isinstance(data, np.ndarray), "Please pass a numpy array"
137 |     d_comp = COMPRESSORS[compressor]["compress"](data)
138 |     f.write(d_comp)
139 |     return len(d_comp)
140 | 
141 | 
142 | @export
143 | def dry_load_files(dirname, chunk_numbers=None, disable=False, **kwargs):
144 |     prefix = strax.storage.files.dirname_to_prefix(dirname)
145 |     metadata_json = RUN_METADATA_PATTERN % prefix
146 |     md_path = os.path.join(dirname, metadata_json)
147 | 
148 |     with open(md_path, mode="r") as f:
149 |         metadata = json.loads(f.read())
150 | 
151 |     dtype = literal_eval(metadata["dtype"])
152 | 
153 |     def load_chunk(chunk_info):
154 |         if chunk_info["n"] != 0:
155 |             data = load_file(
156 |                 os.path.join(dirname, f"{prefix}-{chunk_info['chunk_i']:06d}"),
157 |                 metadata["compressor"],
158 |                 dtype,
159 |             )
160 |             if len(data) != chunk_info["n"]:
161 |                 raise ValueError(
162 |                     f"Chunk {chunk_info['chunk_i']:06d} has {len(data)} "
163 |                     f"items, but metadata says {chunk_info['n']}."
164 |                 )
165 |         else:
166 |             data = np.empty(0, dtype)
167 |         return data
168 | 
169 |     # Load all chunks if chunk_numbers is None, otherwise load the specified chunk
170 |     if chunk_numbers is None:
171 |         chunk_numbers = list(range(len(metadata["chunks"])))
172 |     else:
173 |         if not isinstance(chunk_numbers, (int, list, tuple)):
174 |             raise ValueError(
175 |                 f"Chunk number must be int, list, or tuple, not {type(chunk_numbers)}."
176 |             )
177 |         chunk_numbers = (
178 |             chunk_numbers if isinstance(chunk_numbers, (list, tuple)) else [chunk_numbers]
179 |         )
180 |         if max(chunk_numbers) >= len(metadata["chunks"]):
181 |             raise ValueError(f"Chunk {max(chunk_numbers):06d} does not exist in {dirname}.")
182 | 
183 |     results = []
184 |     for c in tqdm(chunk_numbers, disable=disable):
185 |         chunk_info = metadata["chunks"][c]
186 |         x = load_chunk(chunk_info)
187 |         x = strax.apply_selection(x, **kwargs)
188 |         results.append(x)
189 | 
190 |     # No need to hstack if only one chunk is loaded
191 |     if len(results) == 1:
192 |         results = results[0]
193 |     else:
194 |         results = np.hstack(results)
195 |     return results if len(results) else np.empty(0, dtype)
196 | 


--------------------------------------------------------------------------------
/docs/source/advanced/recompression.rst:
--------------------------------------------------------------------------------
  1 | Recompressing & moving data
  2 | ===========================
  3 | There are two options for recompressing data:
  4 |  - via the context :py:func:`context.copy_to_frontend`
  5 |  - via a dedicated script ``rechunker`` that only works for filesystem backends and works outside the context.
  6 | 
  7 | In order to recompress data with another compression algorithm the
  8 | :py:func:`context.copy_to_frontend` function can be used.
  9 | The function works on a per run_id-, per datatype- basis. In the example
 10 | below, peaks data is copied to a second frontend.
 11 | 
 12 | 
 13 | .. code-block:: python
 14 | 
 15 |     import strax
 16 |     import os
 17 |     # Naturally, these plugins (Records and Peaks) only serve as examples
 18 |     # and are best replaced by a fully constructed context
 19 |     from strax.testutils import Records, Peaks, run_id
 20 | 
 21 |     # Initialize context (st):
 22 |     st = strax.Context(register=[Records, Peaks])
 23 | 
 24 |     # Initialize frontends
 25 |     storage_frontend_A = strax.DataDirectory('./folder_A')
 26 |     storage_frontend_B = strax.DataDirectory('./folder_B',
 27 |                                           readonly=True)
 28 |     st.storage = [storage_frontend_A,
 29 |                   storage_frontend_B]
 30 | 
 31 |     # In this example, we will only consider records
 32 |     target = "records"
 33 | 
 34 |     print(f'Are records stored?\n{st.is_stored(run_id, target)}')
 35 | 
 36 |     # Make the data (stores to every frontend available)
 37 |     st.get_array(run_id, 'records')
 38 | 
 39 |     for sf in st.storage:
 40 |         print(f'{target} stored in\n\t{sf}?\n\t{st._is_stored_in_sf(run_id, target, sf)}')
 41 | 
 42 | Which prints:
 43 | 
 44 | .. code-block:: rst
 45 | 
 46 |     Are records stored?
 47 |     False
 48 |     records stored in
 49 |         strax.storage.files.DataDirectory, path: ./folder_A?
 50 |         True
 51 |     records stored in
 52 |         strax.storage.files.DataDirectory, readonly: True, path: ./folder_B?
 53 |         False
 54 | 
 55 | Copy
 56 | ____
 57 | In the example above the `storage_frontend_B` was readonly, therefore,
 58 | when creating records, no is data stored there.
 59 | Below, we will copy the data from `storage_frontend_A` to
 60 | `storage_frontend_B`.
 61 | 
 62 | .. code-block:: python
 63 | 
 64 |     # First set the storage_frontend_B for readonly=False such that we can copy
 65 |     # data there
 66 |     storage_frontend_B.readonly = False
 67 | 
 68 |     # In the st.storage-list, storage_frontend_B is index 1
 69 |     index_frontend_B = 1
 70 |     st.copy_to_frontend(run_id, target,
 71 |                         target_frontend_id=index_frontend_B)
 72 | 
 73 |     for sf in [storage_frontend_A,  storage_frontend_B]:
 74 |         print(f'{target} stored in\n\t{sf}?\n\t{st._is_stored_in_sf(run_id, target, sf)}')
 75 | 
 76 | 
 77 | Which prints the following (so we can see that the copy to `folder_B`
 78 | was successful.
 79 | 
 80 | .. code-block:: rst
 81 | 
 82 |     records stored in
 83 |         strax.storage.files.DataDirectory, path: ./folder_A?
 84 |         True
 85 |     records stored in
 86 |         strax.storage.files.DataDirectory, path: ./folder_B?
 87 |         True
 88 | 
 89 | Copy and recompress
 90 | ___________________
 91 | Now, with a third storage frontend, we will recompress the data to
 92 | reduce the size on disk.
 93 | 
 94 | .. code-block:: python
 95 | 
 96 |     # Recompression with a different compressor
 97 |     # See strax.io.COMPRESSORS for more compressors
 98 |     target_compressor = 'bz2'
 99 | 
100 |     # Add the extra storage frontend
101 |     index_frontend_C = 2
102 |     storage_frontend_C = strax.DataDirectory('./folder_C')
103 |     st.storage.append(storage_frontend_C)
104 | 
105 |     # Copy and recompress
106 |     st.copy_to_frontend(run_id, target,
107 |                         target_frontend_id=index_frontend_C,
108 |                         target_compressor=target_compressor)
109 | 
110 |     for sf in st.storage:
111 |         first_cunk = os.path.join(sf.path,
112 |                                  '0-records-sqcyyhsfpv',
113 |                                  'records-sqcyyhsfpv-000000')
114 |         print(f'In {sf.path}, the first chunk is {os.path.getsize(first_cunk)} kB')
115 | 
116 | Which outputs:
117 | 
118 | .. code-block:: rst
119 | 
120 |     In ./folder_A, the first chunk is 275 kB
121 |     In ./folder_B, the first chunk is 275 kB
122 |     In ./folder_C, the first chunk is 65 kB
123 | 
124 | From the output we can see that the size of the first chunk of
125 | folder_C, the data much smaller than in folder_A/folder_B. This comes
126 | from the fact that `bz2` compresses the data much more than the default
127 | compressor `blosc`.
128 | 
129 | How does this work?
130 | __________________
131 | Strax knows from the metadata stored with the data with witch
132 | compressor the data was written. It is possible to use a different
133 | compressor when re-writing the data to disk (as done for `strax` knows
134 | from the metadata stored with the data with witch compressor the data
135 | was written. It is possible to use a different compressor when
136 | re-writing the data to disk (as done folder_C in the example above).
137 | 
138 | As such, for further use, it does not matter if the data is coming from
139 | either of folders folder_A-folder_C as the metadata will tell strax
140 | which compressor to use. Different compressors may have different
141 | performance for loading/writing data.
142 | 
143 | Rechunker script
144 | ================
145 | From strax v1.2.2 onwards, a ``rechunker`` script is automatically installed with strax.
146 | It can be used to re-write data in the ``FileSystem`` backend.
147 | 
148 | 
149 | For example:
150 | 
151 | .. code-block:: bash
152 | 
153 |     rechunker --source 009104-raw_records_aqmon-rfzvpzj4mf --compressor zstd
154 | 
155 | will output:
156 | 
157 | 
158 | .. code-block:: rst
159 | 
160 |     Will write to /tmp/tmpoj0xpr78 and make sub-folder 009104-raw_records_aqmon-rfzvpzj4mf
161 |     Rechunking 009104-raw_records_aqmon-rfzvpzj4mf to /tmp/tmpoj0xpr78/009104-raw_records_aqmon-rfzvpzj4mf
162 |     move /tmp/tmpoj0xpr78/009104-raw_records_aqmon-rfzvpzj4mf to 009104-raw_records_aqmon-rfzvpzj4mf
163 |     Re-compressed 009104-raw_records_aqmon-rfzvpzj4mf
164 |             backend_key             009104-raw_records_aqmon-rfzvpzj4mf
165 |             load_time               0.4088103771209717
166 |             write_time              0.07699322700500488
167 |             uncompressed_mb         1.178276
168 |             source_compressor       zstd
169 |             dest_compressor         zstd
170 |             source_mb               0.349217
171 |             dest_mb                 0.349218
172 | 
173 | Using script to profile write/read rates for compressors
174 | --------------------------------------------------------
175 | This script can easily be used to profile different compressors:
176 | 
177 | .. code-block:: bash
178 | 
179 |     for COMPRESSOR in zstd bz2 lz4 blosc zstd; \
180 |         do echo $COMPRESSOR; \
181 |         rechunker \
182 |             --source 009104-raw_records-rfzvpzj4mf \
183 |             --write_stats_to test.csv \
184 |             --compressor $COMPRESSOR; \
185 |         done
186 | 
187 | We can check the output in python using:
188 | 
189 | .. code-block:: python
190 | 
191 |   >>> import pandas as pd
192 |   >>> df = pd.read_csv('test.csv')
193 |   >>> df['read_mbs'] = df['uncompressed_mb']/df['load_time']
194 |   >>> df['write_mbs'] = df['uncompressed_mb']/df['write_time']
195 |   >>> print(df[['source_compressor', 'read_mbs', 'dest_compressor', 'write_mbs']].to_string())
196 |       source_compressor    read_mbs dest_compressor   write_mbs
197 |     0              zstd  313.922890            zstd  298.429123
198 |     1              zstd  284.530054             bz2    8.932259
199 |     2               bz2   20.289876             lz4  228.932498
200 |     3               lz4  372.491150           blosc  433.494794
201 |     4             blosc  725.154966            zstd  215.765177
202 | 


--------------------------------------------------------------------------------
/strax/plugins/parrallel_source_plugin.py:
--------------------------------------------------------------------------------
  1 | import strax
  2 | from .plugin import Plugin
  3 | 
  4 | export, __all__ = strax.exporter()
  5 | 
  6 | 
  7 | @export
  8 | class ParallelSourcePlugin(Plugin):
  9 |     """An plugin that inlines the computations of other plugins and the saving of their results.
 10 | 
 11 |     This evades data transfer (pickling and/or memory copy) penalties while multiprocessing.
 12 | 
 13 |     """
 14 | 
 15 |     parallel = "process"
 16 |     # should we set this here?
 17 |     input_timeout = 300
 18 | 
 19 |     @classmethod
 20 |     def inline_plugins(cls, components, start_from, log):
 21 |         plugins = components.plugins.copy()
 22 |         loader_plugins = components.loader_plugins.copy()
 23 |         log.debug(f"Try to inline plugins starting from {start_from}")
 24 | 
 25 |         sub_plugins = {start_from: plugins[start_from]}
 26 |         del plugins[start_from]
 27 | 
 28 |         # Gather all plugins that do not rechunk and which branch out as a
 29 |         # simple tree from the input plugin.
 30 |         # We'll run these all together in one process.
 31 |         while True:
 32 |             # Scan for plugins we can inline
 33 |             for p in plugins.values():
 34 |                 if p.parallel and all([d in sub_plugins for d in p.depends_on]):
 35 |                     for d in p.provides:
 36 |                         sub_plugins[d] = p
 37 |                         if d in plugins:
 38 |                             del plugins[d]
 39 |                     # Rescan
 40 |                     break
 41 |             else:
 42 |                 # No more plugins we can inline
 43 |                 break
 44 |         log.debug(f"Trying to inline the following sub-plugins: {sub_plugins}")
 45 |         if len(set(list(sub_plugins.values()))) == 1:
 46 |             # Just one plugin to inline: no use
 47 |             log.debug("Just one plugin to inline: skipping")
 48 |             return components
 49 | 
 50 |         # Which data types should we output? Three cases follow.
 51 |         outputs_to_send = set()
 52 | 
 53 |         # Case 1. Requested as a final target
 54 |         for p in sub_plugins.values():
 55 |             outputs_to_send.update(set(components.targets).intersection(set(p.provides)))
 56 |         # Case 2. Requested by a plugin we did not inline
 57 |         for d, p in plugins.items():
 58 |             outputs_to_send.update(set(p.depends_on))
 59 |         outputs_to_send &= sub_plugins.keys()
 60 | 
 61 |         # Inline savers that do not require rechunking
 62 |         savers = components.savers
 63 |         sub_savers = dict()
 64 |         for p in sub_plugins.values():
 65 |             for d in p.provides:
 66 |                 if d not in savers:
 67 |                     continue
 68 |                 if p.can_rechunk(d):
 69 |                     # Case 3. has a saver we can't inline
 70 |                     outputs_to_send.add(d)
 71 |                     continue
 72 | 
 73 |                 remaining_savers = []
 74 |                 for s_i, s in enumerate(savers[d]):
 75 |                     if not s.allow_fork:
 76 |                         # Case 3 again, cannot inline saver
 77 |                         outputs_to_send.add(d)
 78 |                         remaining_savers.append(s)
 79 |                         continue
 80 |                     if d not in sub_savers:
 81 |                         sub_savers[d] = []
 82 |                     s.is_forked = True
 83 |                     sub_savers[d].append(s)
 84 |                 savers[d] = remaining_savers
 85 | 
 86 |                 if not len(savers[d]):
 87 |                     del savers[d]
 88 | 
 89 |         p = cls(depends_on=sub_plugins[start_from].depends_on)
 90 |         p.run_id = sub_plugins[start_from]._run_id
 91 |         p.sub_plugins = sub_plugins
 92 |         assert len(outputs_to_send)
 93 |         p.provides = tuple(outputs_to_send)
 94 |         p.sub_savers = sub_savers
 95 |         p.start_from = start_from
 96 |         if p.multi_output:
 97 |             p.dtype = {}
 98 |             for d in outputs_to_send:
 99 |                 if d in p.sub_plugins:
100 |                     p.dtype[d] = p.sub_plugins[d].dtype_for(d)
101 |                 else:
102 |                     log.debug(f"Finding plugin that provides {d}")
103 |                     # Need to do some more work to get the plugin that
104 |                     # provides this data-type.
105 |                     for sp in p.sub_plugins.values():
106 |                         if d in sp.provides:
107 |                             log.debug(f"{sp} provides {d}")
108 |                             p.dtype[d] = sp.dtype_for(d)
109 |                             break
110 |         else:
111 |             to_send = list(outputs_to_send)[0]
112 |             p.dtype = p.sub_plugins[to_send].dtype_for(to_send)
113 |         for d in p.provides:
114 |             plugins[d] = p
115 | 
116 |         log.debug(f"Trying to find plugins for dependencies: {p.depends_on}")
117 | 
118 |         p.deps = {
119 |             d: plugins[d] if plugins.get(d, None) else loader_plugins[d] for d in p.depends_on
120 |         }
121 | 
122 |         log.debug(f"Inlined plugins: {p.sub_plugins}.Inlined savers: {p.sub_savers}")
123 | 
124 |         return strax.ProcessorComponents(
125 |             plugins, components.loaders, components.loader_plugins, savers, components.targets
126 |         )
127 | 
128 |     def __init__(self, depends_on):
129 |         self.depends_on = depends_on
130 |         super().__init__()
131 | 
132 |     def source_finished(self):
133 |         return self.sub_plugins[self.start_from].source_finished()
134 | 
135 |     def is_ready(self, chunk_i):
136 |         return self.sub_plugins[self.start_from].is_ready(chunk_i)
137 | 
138 |     def do_compute(self, chunk_i=None, **kwargs):
139 |         results = kwargs
140 | 
141 |         # Run the different plugin computations
142 |         while True:
143 |             for output_name, p in self.sub_plugins.items():
144 |                 if output_name in results:
145 |                     continue
146 |                 if any([d not in results for d in p.depends_on]):
147 |                     continue
148 |                 compute_kwargs = dict(chunk_i=chunk_i)
149 | 
150 |                 for kind, d_of_kind in p.dependencies_by_kind().items():
151 |                     compute_kwargs[kind] = strax.Chunk.merge([results[d] for d in d_of_kind])
152 | 
153 |                 # Store compute result(s)
154 |                 r = p.do_compute(**compute_kwargs)
155 |                 if p.multi_output:
156 |                     for d in r:
157 |                         results[d] = r[d]
158 |                 else:
159 |                     results[output_name] = r
160 | 
161 |                 # Rescan plugins to see if we can compute anything more
162 |                 break
163 | 
164 |             else:
165 |                 # Nothing further to compute
166 |                 break
167 |         for d in self.provides:
168 |             assert d in results, f"Output {d} missing!"
169 | 
170 |         # Save anything we can through the inlined savers
171 |         for d, savers in self.sub_savers.items():
172 |             for s in savers:
173 |                 s.save(chunk=results[d], chunk_i=chunk_i)
174 | 
175 |         # Remove results we do not need to send
176 |         for d in list(results.keys()):
177 |             if d not in self.provides:
178 |                 del results[d]
179 | 
180 |         if self.multi_output:
181 |             for k in self.provides:
182 |                 assert k in results
183 |                 assert isinstance(results[k], strax.Chunk)
184 |                 r0 = results[k]
185 |         else:
186 |             results = r0 = results[self.provides[0]]
187 |             assert isinstance(r0, strax.Chunk)
188 | 
189 |         return self._fix_output(
190 |             results, start=r0.start, end=r0.end, superrun=r0.superrun, subruns=r0.subruns
191 |         )
192 | 
193 |     def cleanup(self, wait_for):
194 |         print(f"{self.__class__.__name__} terminated. Waiting for {len(wait_for)} pending futures.")
195 |         for savers in self.sub_savers.values():
196 |             for s in savers:
197 |                 s.close(wait_for=wait_for)
198 |         super().cleanup(wait_for)
199 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  xml        to make Docutils-native XML files
 37 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 38 | 	echo.  linkcheck  to check all external links for integrity
 39 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 40 | 	echo.  coverage   to run coverage check of the documentation if enabled
 41 | 	goto end
 42 | )
 43 | 
 44 | if "%1" == "clean" (
 45 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 46 | 	del /q /s %BUILDDIR%\*
 47 | 	goto end
 48 | )
 49 | 
 50 | 
 51 | REM Check if sphinx-build is available and fallback to Python version if any
 52 | %SPHINXBUILD% 1>NUL 2>NUL
 53 | if errorlevel 9009 goto sphinx_python
 54 | goto sphinx_ok
 55 | 
 56 | :sphinx_python
 57 | 
 58 | set SPHINXBUILD=python -m sphinx.__init__
 59 | %SPHINXBUILD% 2> nul
 60 | if errorlevel 9009 (
 61 | 	echo.
 62 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 63 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 64 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 65 | 	echo.may add the Sphinx directory to PATH.
 66 | 	echo.
 67 | 	echo.If you don't have Sphinx installed, grab it from
 68 | 	echo.http://sphinx-doc.org/
 69 | 	exit /b 1
 70 | )
 71 | 
 72 | :sphinx_ok
 73 | 
 74 | 
 75 | if "%1" == "html" (
 76 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 77 | 	if errorlevel 1 exit /b 1
 78 | 	echo.
 79 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 80 | 	goto end
 81 | )
 82 | 
 83 | if "%1" == "dirhtml" (
 84 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 85 | 	if errorlevel 1 exit /b 1
 86 | 	echo.
 87 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 88 | 	goto end
 89 | )
 90 | 
 91 | if "%1" == "singlehtml" (
 92 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 93 | 	if errorlevel 1 exit /b 1
 94 | 	echo.
 95 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 96 | 	goto end
 97 | )
 98 | 
 99 | if "%1" == "pickle" (
100 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
101 | 	if errorlevel 1 exit /b 1
102 | 	echo.
103 | 	echo.Build finished; now you can process the pickle files.
104 | 	goto end
105 | )
106 | 
107 | if "%1" == "json" (
108 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
109 | 	if errorlevel 1 exit /b 1
110 | 	echo.
111 | 	echo.Build finished; now you can process the JSON files.
112 | 	goto end
113 | )
114 | 
115 | if "%1" == "htmlhelp" (
116 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
117 | 	if errorlevel 1 exit /b 1
118 | 	echo.
119 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
120 | .hhp project file in %BUILDDIR%/htmlhelp.
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "qthelp" (
125 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
129 | .qhcp project file in %BUILDDIR%/qthelp, like this:
130 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\strax.qhcp
131 | 	echo.To view the help file:
132 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\strax.ghc
133 | 	goto end
134 | )
135 | 
136 | if "%1" == "devhelp" (
137 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
138 | 	if errorlevel 1 exit /b 1
139 | 	echo.
140 | 	echo.Build finished.
141 | 	goto end
142 | )
143 | 
144 | if "%1" == "epub" (
145 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
146 | 	if errorlevel 1 exit /b 1
147 | 	echo.
148 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
149 | 	goto end
150 | )
151 | 
152 | if "%1" == "latex" (
153 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
154 | 	if errorlevel 1 exit /b 1
155 | 	echo.
156 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
157 | 	goto end
158 | )
159 | 
160 | if "%1" == "latexpdf" (
161 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
162 | 	cd %BUILDDIR%/latex
163 | 	make all-pdf
164 | 	cd %~dp0
165 | 	echo.
166 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
167 | 	goto end
168 | )
169 | 
170 | if "%1" == "latexpdfja" (
171 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
172 | 	cd %BUILDDIR%/latex
173 | 	make all-pdf-ja
174 | 	cd %~dp0
175 | 	echo.
176 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
177 | 	goto end
178 | )
179 | 
180 | if "%1" == "text" (
181 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
182 | 	if errorlevel 1 exit /b 1
183 | 	echo.
184 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
185 | 	goto end
186 | )
187 | 
188 | if "%1" == "man" (
189 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
190 | 	if errorlevel 1 exit /b 1
191 | 	echo.
192 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
193 | 	goto end
194 | )
195 | 
196 | if "%1" == "texinfo" (
197 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
198 | 	if errorlevel 1 exit /b 1
199 | 	echo.
200 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
201 | 	goto end
202 | )
203 | 
204 | if "%1" == "gettext" (
205 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
206 | 	if errorlevel 1 exit /b 1
207 | 	echo.
208 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
209 | 	goto end
210 | )
211 | 
212 | if "%1" == "changes" (
213 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
214 | 	if errorlevel 1 exit /b 1
215 | 	echo.
216 | 	echo.The overview file is in %BUILDDIR%/changes.
217 | 	goto end
218 | )
219 | 
220 | if "%1" == "linkcheck" (
221 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
222 | 	if errorlevel 1 exit /b 1
223 | 	echo.
224 | 	echo.Link check complete; look for any errors in the above output ^
225 | or in %BUILDDIR%/linkcheck/output.txt.
226 | 	goto end
227 | )
228 | 
229 | if "%1" == "doctest" (
230 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
231 | 	if errorlevel 1 exit /b 1
232 | 	echo.
233 | 	echo.Testing of doctests in the sources finished, look at the ^
234 | results in %BUILDDIR%/doctest/output.txt.
235 | 	goto end
236 | )
237 | 
238 | if "%1" == "coverage" (
239 | 	%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
240 | 	if errorlevel 1 exit /b 1
241 | 	echo.
242 | 	echo.Testing of coverage in the sources finished, look at the ^
243 | results in %BUILDDIR%/coverage/python.txt.
244 | 	goto end
245 | )
246 | 
247 | if "%1" == "xml" (
248 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
249 | 	if errorlevel 1 exit /b 1
250 | 	echo.
251 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
252 | 	goto end
253 | )
254 | 
255 | if "%1" == "pseudoxml" (
256 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
257 | 	if errorlevel 1 exit /b 1
258 | 	echo.
259 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
260 | 	goto end
261 | )
262 | 
263 | :end
264 | 


--------------------------------------------------------------------------------