├── npstreams
    ├── tests
    │   ├── __init__.py
    │   ├── data
    │   │   ├── test_data1.npy
    │   │   ├── test_data2.npy
    │   │   └── test_data3.npy
    │   ├── test_array_utils.py
    │   ├── test_stacking.py
    │   ├── test_array_stream.py
    │   ├── test_linalg.py
    │   ├── test_parallel.py
    │   ├── test_flow.py
    │   ├── test_cuda.py
    │   ├── test_iter_utils.py
    │   ├── test_reduce.py
    │   ├── test_numerics.py
    │   └── test_stats.py
    ├── __init__.py
    ├── stacking.py
    ├── array_utils.py
    ├── array_stream.py
    ├── flow.py
    ├── linalg.py
    ├── parallel.py
    ├── iter_utils.py
    ├── cuda.py
    ├── benchmarks.py
    ├── numerics.py
    ├── reduce.py
    └── stats.py
├── docs
    ├── whatsnew.rst
    ├── references.txt
    ├── recipes.rst
    ├── control_flow.rst
    ├── cuda.rst
    ├── installation.rst
    ├── conventions.rst
    ├── api.rst
    ├── index.rst
    ├── conf.py
    └── making_your_own.rst
├── MANIFEST.in
├── RELEASE-CHECKLIST.rst
├── .readthedocs.yml
├── .gitattributes
├── release-description.py
├── LICENSE.txt
├── CHANGELOG.rst
├── .gitignore
├── pyproject.toml
├── .github
    └── workflows
    │   └── ci.yml
└── README.md


/npstreams/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/whatsnew.rst:
--------------------------------------------------------------------------------
1 | What's new
2 | ==========
3 | 
4 | .. include:: ../CHANGELOG.rst


--------------------------------------------------------------------------------
/npstreams/tests/data/test_data1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LaurentRDC/npstreams/HEAD/npstreams/tests/data/test_data1.npy


--------------------------------------------------------------------------------
/npstreams/tests/data/test_data2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LaurentRDC/npstreams/HEAD/npstreams/tests/data/test_data2.npy


--------------------------------------------------------------------------------
/npstreams/tests/data/test_data3.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LaurentRDC/npstreams/HEAD/npstreams/tests/data/test_data3.npy


--------------------------------------------------------------------------------
/docs/references.txt:
--------------------------------------------------------------------------------
1 | .. _Numpy: http://www.numpy.org
2 | .. _Scipy: https://www.scipy.org
3 | .. _PyCUDA: https://documen.tician.de/pycuda/


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include CHANGELOG.rst
2 | include LICENSE
3 | include README.md
4 | 
5 | recursive-include npstreams/tests/data *
6 | 
7 | recursive-exclude docs *
8 | 
9 | global-exclude *.py[cod] __pycache__ *.so *.dylib


--------------------------------------------------------------------------------
/RELEASE-CHECKLIST.rst:
--------------------------------------------------------------------------------
1 | Release checklist
2 | -----------------
3 | 
4 | To create a release, simply create a tag that starts with 'v' (e.g. 'v2.0.0')::
5 | 
6 |     git tag -a "v2.0.0"
7 |     git push origin "v2.0.0"
8 | 
9 | The package will be automatically tested, released on GitHub and uploaded to PyPI.


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | sphinx:
 8 |   configuration: docs/conf.py
 9 | 
10 | build:
11 |   os: ubuntu-22.04
12 |   tools:
13 |     python: "3.10"
14 | 
15 | python:
16 |   install:
17 |     - method: pip
18 |       path: .
19 |       extra_requirements:
20 |         - development


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/release-description.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Extract the changes from last release
 3 | """
 4 | 
 5 | import sys
 6 | 
 7 | if __name__ == "__main__":
 8 |     filename = sys.argv[1]
 9 | 
10 |     with open(filename, mode="r") as f:
11 | 
12 |         # Look for the first second-level title
13 |         for line in f:
14 |             if line.startswith("Release"):
15 |                 break
16 | 
17 |         print(line, end="")
18 |         for line in f:
19 |             if not line.startswith("Release"):
20 |                 print(line, end="")
21 |             else:
22 |                 # Exit gracefully
23 |                 sys.exit(0)
24 |     # There was a problem: Exit with error
25 |     sys.exit(-1)
26 | 


--------------------------------------------------------------------------------
/npstreams/tests/test_array_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from npstreams import nan_to_num
 3 | 
 4 | 
 5 | def test_nan_to_num_generic():
 6 |     """Test that NaNs are replaced with a fill value"""
 7 |     with np.errstate(divide="ignore", invalid="ignore"):
 8 |         vals = nan_to_num(np.array([0]) / 0.0, fill_value=14)
 9 |     assert vals[0] == 14
10 | 
11 | 
12 | def test_nan_to_num_integer():
13 |     """Test that nan_to_num on integers does nothing"""
14 |     vals = nan_to_num(1)
15 |     assert vals == 1
16 |     vals = nan_to_num([1])
17 |     assert np.allclose(vals, np.array([1]))
18 | 
19 | 
20 | def test_nan_to_num_complex_good():
21 |     """Test nan_to_num on complex input"""
22 |     vals = nan_to_num(1 + 1j)
23 |     assert vals == 1 + 1j
24 | 


--------------------------------------------------------------------------------
/npstreams/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | __author__ = "Laurent P. René de Cotret"
 3 | __email__ = "laurent.decotret@outlook.com"
 4 | __license__ = "BSD"
 5 | __version__ = "1.7.0"
 6 | 
 7 | from .benchmarks import benchmark
 8 | from .array_stream import array_stream, ArrayStream
 9 | from .array_utils import nan_to_num
10 | from .linalg import idot, itensordot, ieinsum, iinner
11 | from .parallel import pmap, pmap_unordered, preduce
12 | from .flow import ipipe, iload, pload
13 | from .iter_utils import (
14 |     cyclic,
15 |     last,
16 |     chunked,
17 |     multilinspace,
18 |     linspace,
19 |     peek,
20 |     itercopy,
21 |     primed,
22 |     length_hint,
23 | )
24 | from .reduce import ireduce_ufunc, preduce_ufunc, reduce_ufunc
25 | from .stacking import stack
26 | from .stats import (
27 |     iaverage,
28 |     average,
29 |     imean,
30 |     mean,
31 |     istd,
32 |     std,
33 |     ivar,
34 |     var,
35 |     isem,
36 |     sem,
37 |     average_and_var,
38 |     ihistogram,
39 | )
40 | from .numerics import isum, sum, iprod, prod, isub, iall, iany, imax, imin
41 | 


--------------------------------------------------------------------------------
/npstreams/tests/test_stacking.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy as np
 4 | 
 5 | from npstreams import stack
 6 | import pytest
 7 | 
 8 | 
 9 | def test_stack_against_numpy_stack():
10 |     """Test against numpy.stack for axis = -1 and"""
11 |     stream = [np.random.random((15, 7, 2, 1)) for _ in range(10)]
12 | 
13 |     dense = np.stack(stream, axis=-1)
14 |     from_stack = stack(stream, axis=-1)
15 |     assert np.allclose(dense, from_stack)
16 | 
17 | 
18 | def test_stack_on_single_array():
19 |     """Test that npstreams.stack works with a single array"""
20 |     arr = np.random.random((16, 16))
21 |     stacked = stack(arr)
22 |     assert np.allclose(arr[..., np.newaxis], stacked)
23 | 
24 | 
25 | @pytest.mark.parametrize("axis", range(4))
26 | def test_stack_against_numpy_concatenate(axis):
27 |     """Test against numpy.concatenate for existing axes"""
28 |     stream = [np.random.random((15, 7, 2, 1)) for _ in range(10)]
29 | 
30 |     dense = np.concatenate(stream, axis=axis)
31 |     from_stack = stack(stream, axis=axis)
32 |     assert np.allclose(dense, from_stack)
33 | 


--------------------------------------------------------------------------------
/docs/recipes.rst:
--------------------------------------------------------------------------------
 1 | .. include:: references.txt
 2 | 
 3 | .. _recipes:
 4 | 
 5 | *******
 6 | Recipes
 7 | *******
 8 | 
 9 | Single-pass mean and error calculation
10 | --------------------------------------
11 | 
12 | Here is a snipped for a function that computes a mean
13 | and standard error in the mean (SEM) in a single pass::
14 | 
15 |     from npstreams import imean, isem, array_stream, itercopy
16 |     
17 |     # The `array_stream` decorator ensures that the elements of 
18 |     # the iterable `arrays` will be converted to ndarrays if possible
19 |     # This decorator is not required.
20 |     @array_stream   
21 |     def mean_and_error(arrays, axis = -1):
22 |         """ Yields (mean, error) pairs from a stream of arrays """
23 |         # itercopy creates a copy of the original stream
24 |         # The elements are only generated once, and then fed
25 |         # to those two copies; much more efficient than
26 |         # creating two streams from scratch.
27 |         arrays_for_mean, arrays_for_sem = itercopy(arrays)
28 | 
29 |         means = imean(arrays_for_mean, axis = axis)
30 |         errors = isem(arrays_for_sem, axis = axis)
31 | 
32 |         yield from zip(means, errors)


--------------------------------------------------------------------------------
/npstreams/stacking.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Stacking arrays from a stream
 4 | -----------------------------
 5 | """
 6 | from collections.abc import Sized
 7 | from functools import partial
 8 | 
 9 | import numpy as np
10 | 
11 | from .array_stream import array_stream
12 | 
13 | 
14 | @array_stream
15 | def stack(arrays, axis=-1):
16 |     """
17 |     Stack of all arrays from a stream. Generalization of numpy.stack
18 |     and numpy.concatenate.
19 | 
20 |     Parameters
21 |     ----------
22 |     arrays : iterable
23 |         Stream of NumPy arrays. Arrays must have shapes that broadcast together.
24 |     axis : int, optional
25 |         Stacking direction. If ``axis = -1``, arrays are stacked along a
26 |         new dimension.
27 | 
28 |     Returns
29 |     -------
30 |     stacked : ndarray
31 |         Cumulative stacked array.
32 |     """
33 |     # Shortcut : if axis == -1, this is exactly what ArrayStream.__array__
34 |     if axis == -1:
35 |         return np.array(arrays)
36 | 
37 |     # TODO: Shortcut if we already know the stream length
38 |     # Note : we are guaranteed that `arrays` is a stream of arrays
39 |     # at worst a tuple (arr,)
40 |     # Use npstreams.length_hint
41 |     arrays = iter(arrays)
42 |     first = next(arrays)
43 |     stack = np.array(first, copy=True)
44 | 
45 |     for array in arrays:
46 |         stack = np.concatenate([stack, array], axis=axis)
47 | 
48 |     return stack
49 | 


--------------------------------------------------------------------------------
/docs/control_flow.rst:
--------------------------------------------------------------------------------
 1 | .. include:: references.txt
 2 | 
 3 | .. _control_flow:
 4 | 
 5 | ************
 6 | Control Flow
 7 | ************
 8 | 
 9 | .. currentmodule:: npstreams
10 | 
11 | =========================
12 | Streaming array pipelines
13 | =========================
14 | 
15 | Before reducing your stream of arrays (e.g. averaging them together), you may want to 
16 | transform them. This can be done with the :func:`ipipe` function:
17 | 
18 | .. autofunction:: ipipe
19 |     :noindex:
20 | 
21 | Imagine we have the following pipeline, in which we want processes images in some iterable :data:`arrays` 
22 | as follows:
23 | 
24 | * Remove negative pixel intensity values;
25 | * Adjust the gamma value of images (from Scikit-image's :mod:`exposure` module);
26 | * Average the result together.
27 | 
28 | The following lines will do the trick::
29 | 
30 |     from functools import partial
31 |     from npstreams import ipipe, iaverage, last
32 |     from skimage.exposure import adjust_gamma
33 | 
34 |     def remove_negative(arr):
35 |         arr[arr < 0] = 0
36 |         return arr
37 | 
38 |     pipeline = ipipe(adjust_gamma, remove_negative, arrays)
39 |     avgs = last(iaverage(pipeline))
40 | 
41 | If the pipeline is computationally intensive, we can also pipe arrays in parallel using the 
42 | keyword-only ``processes``::
43 | 
44 |     pipeline = ipipe(adjust_gamma, remove_negative, arrays, processes = 4)  # 4 cores will be used
45 |     avgs = last(iaverage(pipeline))
46 | 
47 | Since :func:`ipipe` uses :func:`pmap` under the hood, we can also use all available cores
48 | by passing ``processes = None``.


--------------------------------------------------------------------------------
/docs/cuda.rst:
--------------------------------------------------------------------------------
 1 | .. include:: references.txt
 2 | 
 3 | .. _cuda:
 4 | 
 5 | ============
 6 | CUDA support
 7 | ============
 8 | 
 9 | .. currentmodule:: npstreams
10 | 
11 | What is CUDA
12 | ============
13 | 
14 | `CUDA <http://nvidia.com/cuda/>`_ is a computing platform taking advantage of Nvidia hardware. 
15 | It effectively allows for array computations on Graphical Processing Units (GPU).
16 | 
17 | :mod:`npstreams` relies on the (optional) `PyCUDA`_ library
18 | to access CUDA functionality.
19 | 
20 | Advantages of CUDA
21 | ------------------
22 | 
23 | TODO: benchmarks
24 | 
25 | CUDA in npstreams
26 | =================
27 | 
28 | `PyCUDA`_ is an optional dependency. Therefore, the CUDA-enabled functions are located in a separate
29 | module, the :mod:`npstreams.cuda` submodule. 
30 | 
31 | Importing from :mod:`npstreams.cuda` submodule
32 | ----------------------------------------------
33 | 
34 | Importing anything from the :mod:`npstreams.cuda` submodule will raise an ``ImportError`` in the following cases:
35 | 
36 |     * `PyCUDA`_ is not installed;
37 |     * No GPUs are available;
38 |     * CUDA compilation backend is not available, possibly due to incomplete installation.
39 | 
40 | With this in mind, it is wise to wrap import statements from :mod:`npstreams.cuda` in a ``try/except`` block.
41 | 
42 | CUDA-enabled routines
43 | ---------------------
44 | 
45 | A limited set of functions implemented in npstreams also have CUDA-enabled equivalents. For performance reasons,
46 | all CUDA-enabled routines operate along the 'stream' axis, i.e. as if the arrays had been stacked 
47 | along a new dimension.


--------------------------------------------------------------------------------
/npstreams/tests/test_array_stream.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import numpy as np
 3 | 
 4 | from npstreams.array_stream import array_stream, ArrayStream
 5 | 
 6 | 
 7 | @array_stream
 8 | def iden(arrays):
 9 |     yield from arrays
10 | 
11 | 
12 | def test_array_stream_decorator_type():
13 |     """Test that all object from an array stream are ndarrays"""
14 | 
15 |     stream = [0, 1, np.array([1])]
16 |     for arr in iden(stream):
17 |         assert isinstance(arr, np.ndarray)
18 | 
19 | 
20 | def test_single_array():
21 |     """Test that a 'stream' consisting of a single array is repackaged into an iterable"""
22 |     stream = np.array([1, 2, 3])
23 |     assert len(list(iden(stream))) == 1
24 | 
25 | 
26 | def test_array_stream_length_hint_sized_iterable():
27 |     """Test the accuracy of __length_hint__ for ArrayStream constructed
28 |     from a sized iterable"""
29 |     iterable = [1, 2, 3, 4, 5]
30 |     a = ArrayStream(iterable)
31 |     assert len(iterable) == a.__length_hint__()
32 | 
33 | 
34 | def test_array_stream_length_hint_not_sized_iterable():
35 |     """Test that __length_hint__ returns NotImplemented for ArrayStream constructed
36 |     from an unsized iterable"""
37 |     iterable = (0 for _ in range(10))
38 |     a = ArrayStream(iterable)
39 |     assert a.__length_hint__() is NotImplemented
40 | 
41 | 
42 | def test_array_stream_conversion_to_array():
43 |     """Test that numpy.array(Arraystream(...)) returns an array built as a stack of arrays"""
44 |     a = ArrayStream([np.random.random((16, 16)) for _ in range(10)])
45 |     arr = np.array(a)
46 |     assert arr.shape == (16, 16, 10)
47 | 


--------------------------------------------------------------------------------
/npstreams/array_utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Array utilities 
 4 | ---------------
 5 | """
 6 | import numpy as np
 7 | 
 8 | 
 9 | def nan_to_num(array, fill_value=0.0, copy=True):
10 |     """
11 |     Replace NaNs with another fill value.
12 | 
13 |     Parameters
14 |     ----------
15 |     array : array_like
16 |         Input data.
17 |     fill_value : float, optional
18 |         NaNs will be replaced by ``fill_value``. Default is 0.0, in keeping
19 |         with ``numpy.nan_to_num``.
20 |     copy : bool, optional
21 |         Whether to create a copy of `array` (True) or to replace values
22 |         in-place (False). The in-place operation only occurs if
23 |         casting to an array does not require a copy.
24 | 
25 |     Returns
26 |     -------
27 |     out : ndarray
28 |         Array without NaNs. If ``array`` was not of floating or complearray type,
29 |         ``array`` is returned unchanged.
30 | 
31 |     Notes
32 |     -----
33 |     Contrary to ``numpy.nan_to_num``, this functions does not handle
34 |     infinite values.
35 | 
36 |     See Also
37 |     --------
38 |     numpy.nan_to_num : replace NaNs and Infs with zeroes.
39 |     """
40 |     array = np.array(array, subok=True, copy=copy)
41 |     dtype = array.dtype.type
42 | 
43 |     # Non-inexact types do not have NaNs
44 |     if not np.issubdtype(dtype, np.inexact):
45 |         return array
46 | 
47 |     iscomplex = np.issubdtype(dtype, np.complexfloating)
48 |     dest = (array.real, array.imag) if iscomplex else (array,)
49 |     for d in dest:
50 |         np.copyto(d, fill_value, where=np.isnan(d))
51 |     return array
52 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017-2020, Laurent P. René de Cotret.
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are
 6 | met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 |        notice, this list of conditions and the following disclaimer.
10 | 
11 |     * Redistributions in binary form must reproduce the above
12 |        copyright notice, this list of conditions and the following
13 |        disclaimer in the documentation and/or other materials provided
14 |        with the distribution.
15 | 
16 |     * Neither the name of the NumPy Developers nor the names of any
17 |        contributors may be used to endorse or promote products derived
18 |        from this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/CHANGELOG.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Release 1.7.0
 3 | -------------
 4 | 
 5 | * Explicit support for NumPy 2, in addition to NumPy 1.
 6 | 
 7 | Release 1.6.6
 8 | -------------
 9 | 
10 | * Added the ability to automatically publish to PyPI.
11 |   
12 | Release 1.6.5
13 | -------------
14 | 
15 | * `Support for Python 3.6 and NumPy<1.17 has been dropped <https://numpy.org/neps/nep-0029-deprecation_policy.html>`_
16 | * Migration of testing infrastructure to pytest.
17 | * Tests are now included in the package itself.
18 | * Fixed some deprecation warnings from NumPy 1.20+.
19 | 
20 | Release 1.6.4
21 | -------------
22 | 
23 | * Fixed an issue regarding a deprecation of `collections.Sized` (in favour of `collections.abc.Sized`) in Python 3.10+
24 | * Code snippets in documentation are now tested for correctness.
25 | * Tests are now included in source distributions.
26 | 
27 | Release 1.6.3
28 | -------------
29 | 
30 | * Added support for Python 3.9
31 | 
32 | Release 1.6.2
33 | -------------
34 | 
35 | * Added the ability to run default benchmarks from the command line with ``python -m npsteams.benchmarks``.
36 | * Added explicit support for Python 3.8.
37 | * Bumped requirement for `numpy >= 1.14`.
38 | 
39 | Release 1.6.1
40 | -------------
41 | 
42 | * Added a changelog.
43 | * Added the possibility to use weights in ``ihistogram``.
44 | * Added the function ``average_and_var`` to compute the average and variance in a single pass.
45 | * Documentation regarding the ``ddof`` keyword in many statistical wrongly stated that the default value was 1. This has been corrected. 
46 | 
47 | Release 1.6
48 | -----------
49 | 
50 | * Fixed some issues with NumPy versions above 1.16.
51 | 
52 | Release 1.5.2
53 | -------------
54 | 
55 | * Added benchmarking capabilities.
56 | * Added the ``array_stream`` decorator.
57 | * Removed support for Python < 3.6.


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Visual studio cache
 10 | *.vs/
 11 | *.vscode/
 12 | 
 13 | # autogenerated documentation
 14 | docs/source/functions/
 15 | docs/source/classes/
 16 | 
 17 | # Jupyter notebooks
 18 | notebooks/
 19 | 
 20 | # Distribution / packaging
 21 | .Python
 22 | env/
 23 | build/
 24 | develop-eggs/
 25 | dist/
 26 | downloads/
 27 | eggs/
 28 | .eggs/
 29 | lib/
 30 | lib64/
 31 | parts/
 32 | sdist/
 33 | var/
 34 | 
 35 | *.egg-info/
 36 | .installed.cfg
 37 | *.egg
 38 | 
 39 | # PyInstaller
 40 | #  Usually these files are written by a python script from a template
 41 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 42 | *.manifest
 43 | *.spec
 44 | 
 45 | # Installer logs
 46 | pip-log.txt
 47 | pip-delete-this-directory.txt
 48 | 
 49 | # Unit test / coverage reports
 50 | htmlcov/
 51 | .tox/
 52 | .coverage
 53 | .coverage.*
 54 | .cache
 55 | nosetests.xml
 56 | coverage.xml
 57 | *,cover
 58 | .hypothesis/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | # These directories are autogenerated
 77 | docs/_build/
 78 | docs/functions/
 79 | docs/classes/
 80 | 
 81 | # PyBuilder
 82 | target/
 83 | 
 84 | # IPython Notebook
 85 | .ipynb_checkpoints
 86 | 
 87 | # pyenv
 88 | .python-version
 89 | 
 90 | # celery beat schedule file
 91 | celerybeat-schedule
 92 | 
 93 | # dotenv
 94 | .env
 95 | 
 96 | # virtualenv
 97 | venv/
 98 | ENV/
 99 | 
100 | # Spyder project settings
101 | .spyderproject
102 | 
103 | # Rope project settings
104 | .ropeproject
105 | 
106 | # PyCharm
107 | .idea/


--------------------------------------------------------------------------------
/npstreams/tests/test_linalg.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from random import randint, random
 4 | 
 5 | import numpy as np
 6 | 
 7 | from npstreams import idot, itensordot, iinner, ieinsum, last
 8 | import pytest
 9 | 
10 | 
11 | def test_idot_against_numpy_multidot():
12 |     """Test against numpy.linalg.multi_dot in 2D case"""
13 |     stream = [np.random.random((8, 8)) for _ in range(7)]
14 | 
15 |     from_numpy = np.linalg.multi_dot(stream)
16 |     from_stream = last(idot(stream))
17 | 
18 |     assert from_numpy.shape == from_stream.shape
19 |     assert np.allclose(from_numpy, from_stream)
20 | 
21 | 
22 | @pytest.mark.parametrize("axis", (0, 1, 2))
23 | def test_itensordot_against_numpy_tensordot(axis):
24 |     """Test against numpy.tensordot in 2D case"""
25 |     stream = tuple(np.random.random((8, 8)) for _ in range(2))
26 | 
27 |     from_numpy = np.tensordot(*stream)
28 |     from_stream = last(itensordot(stream))
29 | 
30 |     assert from_numpy.shape == from_stream.shape
31 |     assert np.allclose(from_numpy, from_stream)
32 | 
33 | 
34 | @pytest.mark.parametrize("axis", (0, 1, 2))
35 | def test_iinner_against_numpy_inner(axis):
36 |     """Test against numpy.tensordot in 2D case"""
37 |     stream = tuple(np.random.random((8, 8)) for _ in range(2))
38 | 
39 |     from_numpy = np.inner(*stream)
40 |     from_stream = last(iinner(stream))
41 | 
42 |     assert from_numpy.shape == from_stream.shape
43 |     assert np.allclose(from_numpy, from_stream)
44 | 
45 | 
46 | def test_ieinsum_against_numpy_einsum():
47 |     """Test against numpy.einsum"""
48 |     a = np.arange(60.0).reshape(3, 4, 5)
49 |     b = np.arange(24.0).reshape(4, 3, 2)
50 |     stream = [a, b]
51 | 
52 |     from_numpy = np.einsum("ijk,jil->kl", a, b)
53 |     from_stream = last(ieinsum(stream, "ijk,jil->kl"))
54 | 
55 |     assert from_numpy.shape == from_stream.shape
56 |     assert np.allclose(from_numpy, from_stream)
57 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["build", "setuptools>=61.0"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [tool.setuptools.dynamic]
 6 | version = {attr = "npstreams.__version__"}
 7 | 
 8 | [project]
 9 | name = "npstreams"
10 | dynamic = ["version"]
11 | authors = [
12 |   { name="Laurent P. René de Cotret", email="laurent.decotret@outlook.com" },
13 | ]
14 | maintainers = [
15 |   { name="Laurent P. René de Cotret", email="laurent.decotret@outlook.com" },
16 | ]
17 | description = "Streaming operations on NumPy arrays"
18 | readme = "README.md"
19 | license = {file = "LICENSE"}
20 | requires-python = ">=3.7, <4"
21 | dependencies = ["numpy >= 1.17, <3"]
22 | keywords=["streaming", "numpy", "math"]
23 | classifiers = [
24 |     "Environment :: Console",
25 |     "Intended Audience :: Science/Research",
26 |     "Topic :: Scientific/Engineering",
27 |     "License :: OSI Approved :: BSD License",
28 |     "Natural Language :: English",
29 |     "Operating System :: OS Independent",
30 |     "Programming Language :: Python",
31 |     "Programming Language :: Python :: 3",
32 | ]
33 | 
34 | [project.optional-dependencies]
35 | development = [
36 |     "Sphinx >= 3",
37 |     "sphinx_rtd_theme >= 0.4",
38 |     "pytest >= 6",
39 |     "scipy >= 1",
40 | ]
41 | 
42 | [project.urls]
43 | Documentation = "https://npstreams.readthedocs.io/"
44 | Repository = "https://github.com/LaurentRDC/npstreams"
45 | "Bug Tracker" = "https://github.com/LaurentRDC/npstreams/issues"
46 | 
47 | [tool.black]
48 | line-length = 120
49 | include = '\.pyi?$'
50 | 
51 | [tool.isort]
52 | profile = "black"
53 | 
54 | [tool.pytest.ini_options]
55 | minversion = "6.0"
56 | log_cli_level = "INFO"
57 | addopts = ["--doctest-modules"]
58 | testpaths = ["npstreams/tests"]
59 | 
60 | # See here for an explanation of how to include package data:
61 | # https://setuptools.pypa.io/en/latest/userguide/datafiles.html#package-data
62 | [tool.setuptools.package-data]
63 | npstreams = ["tests/data/*.npy"]
64 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
 1 | .. include:: references.txt
 2 | 
 3 | .. _installation:
 4 | 
 5 | ************
 6 | Installation
 7 | ************
 8 | 
 9 | Requirements
10 | ============
11 | 
12 | **npstreams** works on Linux, Mac OS X and Windows. It requires Python 3.7+ 
13 | as well as `numpy`_. `scipy`_ is an optional dependency that is only used in
14 | tests; however, if SciPy cannot be imported, tests will not fail.
15 | 
16 | To get access to the :mod:`npstreams.cuda` module, which contains CUDA-enabled routines,
17 | PyCUDA_ must be installed as well.
18 | 
19 | Install npstreams
20 | =================
21 | 
22 | npstreams is available on PyPI; it can be installed with `pip <https://pip.pypa.io>`_::
23 | 
24 |     python -m pip install npstreams
25 | 
26 | npstreams can also be installed with the conda package manager, from the conda-forge channel::
27 | 
28 |     conda config --add channels conda-forge
29 |     conda install npstreams
30 | 
31 | You can install the latest developer version of npstreams by cloning the git
32 | repository::
33 | 
34 |     git clone https://github.com/LaurentRDC/npstreams.git
35 | 
36 | ...then installing the package with::
37 | 
38 |     cd npstreams
39 |     pip install .
40 | 
41 | 
42 | Testing
43 | =======
44 | 
45 | If you want to check that all the tests are running correctly with your Python
46 | configuration, type::
47 | 
48 |     pip install .[development]
49 |     pytest
50 | 
51 | 
52 | Embedding in applications
53 | =========================
54 | 
55 | `npstreams` is designed to be used in conjuction with multiprocessing libraries, such as the standard 
56 | `multiprocessing` library. `npstreams` even uses `multiprocessing` directly in certain functions. 
57 | 
58 | In order to use the multicore functionality of `npstreams` in applications frozen with `py2exe`, `PyInstaller`, or `cx_Freeze`, 
59 | you will need to activate the ``multiprocessing.freeze_support()`` function. `You can read more 
60 | about it here. <https://docs.python.org/library/multiprocessing.html#multiprocessing.freeze_support>`_


--------------------------------------------------------------------------------
/npstreams/tests/test_parallel.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from npstreams import pmap, pmap_unordered, preduce
 3 | from functools import reduce
 4 | import numpy as np
 5 | from operator import add
 6 | 
 7 | 
 8 | def identity(obj, *args, **kwargs):
 9 |     """ignores args and kwargs"""
10 |     return obj
11 | 
12 | 
13 | def test_preduce_preduce_one_process():
14 |     """Test that preduce reduces to functools.reduce for a single process"""
15 |     integers = list(range(0, 10))
16 |     preduce_results = preduce(add, integers, processes=1)
17 |     reduce_results = reduce(add, integers)
18 | 
19 |     assert preduce_results == reduce_results
20 | 
21 | 
22 | def test_preduce_preduce_multiple_processes():
23 |     """Test that preduce reduces to functools.reduce for a single process"""
24 |     integers = list(range(0, 10))
25 |     preduce_results = preduce(add, integers, processes=2)
26 |     reduce_results = reduce(add, integers)
27 | 
28 |     assert preduce_results == reduce_results
29 | 
30 | 
31 | def test_preduce_on_numpy_arrays():
32 |     """Test sum of numpy arrays as parallel reduce"""
33 |     arrays = [np.zeros((32, 32)) for _ in range(10)]
34 |     s = preduce(add, arrays, processes=2)
35 | 
36 |     assert np.allclose(s, arrays[0])
37 | 
38 | 
39 | def test_preduce_with_kwargs():
40 |     """Test preduce with keyword-arguments"""
41 |     pass
42 | 
43 | 
44 | def test_pmap_trivial_map_no_args():
45 |     """Test that pmap is working with no positional arguments"""
46 |     integers = list(range(0, 10))
47 |     result = list(pmap(identity, integers, processes=2))
48 |     assert integers == result
49 | 
50 | 
51 | def test_pmap_trivial_map_kwargs():
52 |     """Test that pmap is working with args and kwargs"""
53 |     integers = list(range(0, 10))
54 |     result = list(pmap(identity, integers, processes=2, kwargs={"test": True}))
55 |     assert result == integers
56 | 
57 | 
58 | def test_pmap_trivial_map_no_args():
59 |     """Test that pmap_unordered is working with no positional arguments"""
60 |     integers = list(range(0, 10))
61 |     result = list(sorted(pmap_unordered(identity, integers, processes=2)))
62 |     assert integers == result
63 | 
64 | 
65 | def test_pmap_trivial_map_kwargs():
66 |     """Test that pmap_unordered is working with args and kwargs"""
67 |     integers = list(range(0, 10))
68 |     result = list(
69 |         sorted(pmap_unordered(identity, integers, processes=2, kwargs={"test": True}))
70 |     )
71 |     assert result == integers
72 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: Continuous integration
 2 | 
 3 | on: 
 4 |   push:
 5 |   pull_request:
 6 | 
 7 | jobs:
 8 |   build:
 9 |     # To prevent this job from running, have "[skip ci]" or "[ci skip]" in the commit message 
10 |     if: contains(toJson(github.event.commits), '[ci skip]') == false && contains(toJson(github.event.commits), '[skip ci]') == false
11 | 
12 |     runs-on: ${{ matrix.os }}
13 |     strategy:
14 |       fail-fast: false
15 |       matrix:
16 |         os: [ubuntu-latest, macos-latest, windows-latest]
17 |         python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
18 | 
19 |     steps:
20 |     - uses: actions/checkout@v4
21 | 
22 |     - name: Set up Python ${{ matrix.python-version }} on ${{ matrix.os }}
23 |       uses: actions/setup-python@v5
24 |       with:
25 |         python-version: ${{ matrix.python-version }}
26 | 
27 |     - name: Install dependencies
28 |       run: |
29 |         python -m pip install --upgrade pip
30 |         pip install .[development]
31 | 
32 |     # Note the use of the -Wa flag to show DeprecationWarnings
33 |     # We run the tests on the installed package
34 |     - name: Unit tests and doctests
35 |       run: |
36 |         python -Wa -m pytest
37 |     
38 |     - name: Build documentation
39 |       run:
40 |         sphinx-build -M html docs build/docs
41 | 
42 | 
43 |   release:
44 |     if: startsWith(github.ref, 'refs/tags/v')
45 |     needs: [build]
46 |     runs-on: ubuntu-latest
47 |     permissions:
48 |       id-token: write  # IMPORTANT: this permission is mandatory for trusted publishing
49 |       contents: write  # To create a release 
50 |     steps:
51 |     - uses: actions/checkout@v4
52 | 
53 |     - name: Set up Python
54 |       uses: actions/setup-python@v5
55 |       with:
56 |         python-version: "3.10"
57 | 
58 |     - name: Install dependencies
59 |       run: |
60 |         pip install build
61 |         pip install .[development]
62 | 
63 |     - name: Create release description
64 |       run: |
65 |         python release-description.py CHANGELOG.rst > description.md
66 |         cat description.md
67 | 
68 |     - name: Create source distribution
69 |       run: |
70 |         python -m build
71 | 
72 |     - name: Create release
73 |       uses: softprops/action-gh-release@v2
74 |       with:
75 |         body_path: description.md
76 |         files: |
77 |           dist/*
78 |     
79 |     # Github Actions have been set as a trusted publisher on PyPI's npstreams project,
80 |     # hence why no username, password, or token is required.
81 |     - name: Upload to PyPI
82 |       if: always()
83 |       uses: pypa/gh-action-pypi-publish@release/v1
84 | 


--------------------------------------------------------------------------------
/docs/conventions.rst:
--------------------------------------------------------------------------------
 1 | .. include:: references.txt
 2 | 
 3 | .. _conventions:
 4 | 
 5 | ***********
 6 | Conventions
 7 | ***********
 8 | 
 9 | .. currentmodule:: npstreams
10 | 
11 | Stream Conventions
12 | ------------------
13 | 
14 | Most (all?) functions in :mod:`npstreams` are designed to work on streams, or
15 | iterables of NumPy arrays. These iterables can be infinite. 
16 | The quintessential example is a stream of images progressively read from disk. 
17 | These streams of arrays must contain arrays that all have the same shape and data-type, 
18 | unless specified otherwise. 
19 | 
20 | An example of a function that operates on a stream of arrays of different shapes is :func:`ieinsum`
21 | 
22 | A single NumPy array can be passed where a stream is expected; the array will be repackaged
23 | into a stream of a single array.
24 | 
25 | Naming Conventions
26 | ------------------
27 | 
28 | In order to facilitate documentation, functions in :mod:`npstreams` follow the following conventions:
29 | 
30 |     * Routines are named after their closest equivalent in :mod:`numpy` and :mod:`scipy`.
31 |     * Routines with names starting with 'i' (e.g. :func:`iprod`) are generator functions; they yield running results
32 |       as they are being computer. Usually, these functions have a non-generator equivalent that
33 |       consumes the entire stream (e.g. :func:`iaverage` vs. :func:`average`).
34 |     * Routines with names starting with 'c' (e.g. :func:`csum`) are CUDA-enabled (requires :mod:`pycuda`)
35 |     * Routines with names starting with 'p' (e.g. :func:`pmap`) can be parallelized. The default
36 |       behavior is always to not use multiple cores. For example, the default behavior of :func:`pmap`
37 |       is to behave like :func:`map`.
38 | 
39 | Axis Conventions
40 | ----------------
41 | 
42 | NumPy arrays provide operations along axes. Similarly, :mod:`npstreams` also
43 | exposes the :data:`axis` keyword in some (most?) reduction functions like :func:`isum`
44 | and :func:`iprod`.
45 | 
46 | The convention for specification of the :data:`axis` parameter is as follows:
47 | 
48 |     * If ``axis = None``, arrays are flattened before being combined. The result will
49 |       be a scalar of a 0d array.
50 |     * The default (``axis = -1``) always corresponds to combining arrays along a
51 |       new axis. For example, summing images together along ``axis = -1`` is equivalent
52 |       to stacking images along a new axis, then averaging along this new axis
53 |     * if ``axis`` is an ``int``, then arrays are reduced according to this axis, and then combined.
54 | 
55 | CUDA-enabled functions
56 | ----------------------
57 | Some functions are implemented using CUDA


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
  1 | .. include:: references.txt
  2 | 
  3 | .. _api:
  4 | 
  5 | *************
  6 | Reference/API
  7 | *************
  8 | 
  9 | .. currentmodule:: npstreams
 10 | 
 11 | Click on any function below to see detailed information.
 12 | 
 13 | Creation of Streams
 14 | -------------------
 15 | 
 16 | Decorator for streaming functions which guarantees that the stream elements will be converted to arrays.
 17 | 
 18 | .. autosummary::
 19 |     :toctree: functions/
 20 | 
 21 |     array_stream
 22 | 
 23 | The :func:`array_stream` decorator wraps iterables into an :class:`ArrayStream` iterator. This is not 
 24 | required to use the functions defined here, but it provides some nice guarantees.
 25 | 
 26 | .. autosummary::
 27 |     :toctree: classes/
 28 | 
 29 |     ArrayStream
 30 | 
 31 | Statistical Functions
 32 | ---------------------
 33 |     
 34 | .. autosummary::
 35 |     :toctree: functions/
 36 | 
 37 |     imean
 38 |     iaverage
 39 |     istd
 40 |     ivar
 41 |     isem
 42 |     ihistogram
 43 | 
 44 | The following functions consume entire streams. By avoiding costly intermediate steps,
 45 | they can perform much faster than their generator versions.
 46 | 
 47 | .. autosummary::
 48 |     :toctree: functions/
 49 | 
 50 |     mean
 51 |     average
 52 |     std
 53 |     var
 54 |     sem
 55 |     average_and_var
 56 |     
 57 | Numerics
 58 | --------
 59 | 
 60 | .. autosummary::
 61 |     :toctree: functions/
 62 | 
 63 |     isum
 64 |     iprod
 65 |     isub
 66 | 
 67 | .. autosummary::
 68 |     :toctree: functions/
 69 | 
 70 |     sum
 71 |     prod
 72 | 
 73 | Linear Algebra
 74 | --------------
 75 | .. autosummary::
 76 |     :toctree: functions/
 77 | 
 78 |     idot
 79 |     iinner
 80 |     itensordot
 81 |     ieinsum
 82 | 
 83 | Control Flow
 84 | ------------
 85 | .. autosummary::
 86 |     :toctree: functions/
 87 | 
 88 |     ipipe
 89 |     iload
 90 |     pload
 91 | 
 92 | Comparisons
 93 | -----------
 94 | .. autosummary::
 95 |     :toctree: functions/
 96 | 
 97 |     iany
 98 |     iall
 99 |     imax
100 |     imin
101 | 
102 | Parallelization
103 | ---------------
104 | .. autosummary::
105 |     :toctree: functions/
106 | 
107 |     pmap
108 |     pmap_unordered
109 |     preduce
110 | 
111 | Stacking
112 | --------
113 | .. autosummary::
114 |     :toctree: functions/
115 | 
116 |     stack
117 |     
118 | Iterator Utilities
119 | ------------------
120 | .. autosummary::
121 |     :toctree: functions/
122 | 
123 |     last
124 |     cyclic
125 |     itercopy
126 |     chunked
127 |     linspace
128 |     multilinspace
129 |     peek
130 |     primed
131 |     length_hint
132 | 
133 | Array Utilities
134 | ---------------
135 | .. autosummary::
136 |     :toctree: functions/
137 | 
138 |     nan_to_num
139 | 
140 | Benchmarking
141 | ------------
142 | .. autosummary::
143 |     :toctree: functions/
144 | 
145 |     benchmark


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
  1 | .. include:: references.txt
  2 | 
  3 | .. _npstreams:
  4 | 
  5 | **************************************
  6 | `npstreams`: streaming NumPy functions
  7 | **************************************
  8 | 
  9 | :mod:`npstreams` is an open-source Python package for streaming NumPy array operations. 
 10 | The goal is to provide tested, (almost) drop-in replacements for NumPy functions (where possible) 
 11 | that operate on streams of arrays instead of dense arrays.
 12 | 
 13 | :mod:`npstreams` also provides some utilities for parallelization. These parallelization
 14 | generators can be combined with the streaming functions to drastically improve performance
 15 | in some cases.
 16 | 
 17 | The code presented herein has been in use at some point by the 
 18 | `Siwick research group <http://www.physics.mcgill.ca/siwicklab>`_.
 19 | 
 20 | Example
 21 | =======
 22 | 
 23 | Consider the following snippet to combine 50 images 
 24 | from an iterable :data:`source`::
 25 | 
 26 | 	import numpy as np
 27 | 
 28 | 	images = np.empty( shape = (2048, 2048, 50) )
 29 | 	for index, im in enumerate(source):
 30 | 	    images[:,:,index] = im
 31 | 	
 32 | 	avg = np.average(images, axis = 2)
 33 | 
 34 | If the :data:`source` iterable provided 10000 images, the above routine would
 35 | not work on most machines. Moreover, what if we want to transform the images 
 36 | one by one before averaging them? What about looking at the average while it 
 37 | is being computed? Let's look at an example::
 38 | 
 39 | 	import numpy as np
 40 | 	from npstreams import iaverage
 41 | 	from scipy.misc import imread
 42 | 
 43 | 	stream = map(imread, list_of_filenames)
 44 | 	averaged = iaverage(stream)
 45 | 
 46 | At this point, the generators :func:`map` and :func:`iaverage` are 'wired'
 47 | but will not compute anything until it is requested. We can look at the average evolve::
 48 | 
 49 |     import matplotlib.pyplot as plt
 50 |     for avg in average:
 51 |         plt.imshow(avg); plt.show()
 52 | 
 53 | We can also use :func:`last` to get at the final average::
 54 | 
 55 | 	from npstreams import last
 56 | 
 57 | 	total = last(averaged) # average of the entire stream. See also npstreams.average
 58 | 
 59 | Benchmark
 60 | =========
 61 | 
 62 | npstreams provides a function for benchmarking common use cases.
 63 | 
 64 | To run the benchmark with default parameters, from the interpreter::
 65 | 
 66 |     from npstreams import benchmark
 67 |     benchmark()
 68 | 
 69 | From a command-line terminal::
 70 | 
 71 |     python -m npstreams.benchmarks
 72 | 
 73 | The results will be printed to the screen.
 74 | 
 75 | Links
 76 | =====
 77 | 
 78 | * `Source code <https://github.com/LaurentRDC/npstreams>`_
 79 | * `Issues <https://github.com/LaurentRDC/npstreams/issues>`_
 80 | * `Docs <http://npstreams.readthedocs.org/>`_
 81 | 
 82 | .. _npstreams_docs:
 83 | 
 84 | General Documentation
 85 | =====================
 86 | 
 87 | .. toctree::
 88 |     :maxdepth: 3
 89 |     
 90 |     installation
 91 |     whatsnew
 92 |     conventions
 93 |     api
 94 |     cuda
 95 |     control_flow
 96 |     making_your_own
 97 |     recipes
 98 | 
 99 | Authors
100 | =======
101 | 
102 | * Laurent P. René de Cotret


--------------------------------------------------------------------------------
/npstreams/array_stream.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from collections.abc import Iterator
 4 | from functools import wraps
 5 | 
 6 | import numpy as np
 7 | from numpy import asanyarray
 8 | 
 9 | from .iter_utils import length_hint, peek
10 | 
11 | 
12 | class ArrayStream(Iterator):
13 |     """
14 |     Iterator of arrays. Elements from the stream are converted to
15 |     NumPy arrays. If ``stream`` is a single array, it will be
16 |     repackaged as a length 1 iterable.
17 | 
18 |     Arrays in the stream will be cast to the same data-type as the first
19 |     array in the stream. The stream data-type is located in the `dtype` attribute.
20 | 
21 |     .. versionadded:: 1.5.2
22 |     """
23 | 
24 |     def __init__(self, stream):
25 |         if isinstance(stream, np.ndarray):
26 |             stream = (stream,)
27 | 
28 |         self._sequence_length = length_hint(stream, default=NotImplemented)
29 | 
30 |         # Once length_hint has been determined, we can peek into the stream
31 |         first, stream = peek(stream)
32 |         self._iterator = iter(stream)
33 | 
34 |         first = asanyarray(first)
35 |         self.dtype = first.dtype
36 | 
37 |     def __repr__(self):
38 |         """Verbose string representation"""
39 |         representation = f"< {self.__class__.__name__} object"
40 |         representation += f" of data-type {self.dtype}"
41 | 
42 |         if not (self._sequence_length is NotImplemented):
43 |             representation += f" and a sequence length of {self._sequence_length}"
44 |         else:
45 |             representation += " of unknown length"
46 | 
47 |         return representation + " >"
48 | 
49 |     def __array__(self, *_, **__):
50 |         """Returns a dense array created from this stream."""
51 |         # As of numpy version 1.14, arrays are expanded into a list before contatenation
52 |         # Therefore, it's ok to build that list first
53 |         arraylist = list(self)
54 |         return np.stack(arraylist, axis=-1)
55 | 
56 |     def __length_hint__(self):
57 |         """
58 |         In certain cases, an ArrayStream can have a definite size.
59 |         See https://www.python.org/dev/peps/pep-0424/
60 |         """
61 |         return self._sequence_length
62 | 
63 |     def __next__(self):
64 |         n = self._iterator.__next__()
65 |         return asanyarray(n, dtype=self.dtype)
66 | 
67 | 
68 | def array_stream(func):
69 |     """
70 |     Decorates streaming functions to make sure that the stream
71 |     is a stream of ndarrays. Objects that are not arrays are transformed
72 |     into arrays. If the stream is in fact a single ndarray, this ndarray
73 |     is repackaged into a sequence of length 1.
74 | 
75 |     The first argument of the decorated function is assumed to be an iterable of
76 |     arrays, or an iterable of objects that can be casted to arrays.
77 | 
78 |     Note that using this decorator also ensures that the stream is only wrapped once
79 |     by the conversion function.
80 |     """
81 | 
82 |     @wraps(func)
83 |     def decorated(arrays, *args, **kwargs):
84 |         if isinstance(arrays, ArrayStream):
85 |             return func(arrays, *args, **kwargs)
86 |         return func(ArrayStream(arrays), *args, **kwargs)
87 | 
88 |     return decorated
89 | 


--------------------------------------------------------------------------------
/npstreams/tests/test_flow.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy as np
 4 | from pathlib import Path
 5 | from npstreams import array_stream, ipipe, last, iload, pload, isum
 6 | 
 7 | 
 8 | @array_stream
 9 | def iden(arrays):
10 |     yield from arrays
11 | 
12 | 
13 | def test_ipipe_order():
14 |     """Test that ipipe(f, g, h, arrays) -> f(g(h(arr))) for arr in arrays"""
15 |     stream = [np.random.random((15, 7, 2, 1)) for _ in range(10)]
16 |     squared = [np.cbrt(np.square(arr)) for arr in stream]
17 |     pipeline = ipipe(np.cbrt, np.square, stream)
18 | 
19 |     assert all(np.allclose(s, p) for s, p in zip(pipeline, squared))
20 | 
21 | 
22 | def test_ipipe_multiprocessing():
23 |     """Test that ipipe(f, g, h, arrays) -> f(g(h(arr))) for arr in arrays"""
24 |     stream = [np.random.random((15, 7, 2, 1)) for _ in range(10)]
25 |     squared = [np.cbrt(np.square(arr)) for arr in stream]
26 |     pipeline = ipipe(np.cbrt, np.square, stream, processes=2)
27 | 
28 |     assert all(np.allclose(s, p) for s, p in zip(pipeline, squared))
29 | 
30 | 
31 | def test_iload_glob():
32 |     """Test that iload works on glob-like patterns"""
33 |     stream = iload(Path(__file__).parent / "data" / "test_data*.npy", load_func=np.load)
34 |     s = last(isum(stream)).astype(float)  # Cast to float for np.allclose
35 |     assert np.allclose(s, np.zeros_like(s))
36 | 
37 | 
38 | def test_iload_file_list():
39 |     """Test that iload works on iterable of filenames"""
40 |     files = [
41 |         Path(__file__).parent / "data" / "test_data1.npy",
42 |         Path(__file__).parent / "data" / "test_data2.npy",
43 |         Path(__file__).parent / "data" / "test_data3.npy",
44 |     ]
45 |     stream = iload(files, load_func=np.load)
46 |     s = last(isum(stream)).astype(float)  # Cast to float for np.allclose
47 |     assert np.allclose(s, np.zeros_like(s))
48 | 
49 | 
50 | def test_pload_glob():
51 |     """Test that pload works on glob-like patterns"""
52 |     stream = pload(Path(__file__).parent / "data" / "test_data*.npy", load_func=np.load)
53 |     s = last(isum(stream)).astype(float)  # Cast to float for np.allclose
54 |     assert np.allclose(s, np.zeros_like(s))
55 | 
56 |     stream = pload(
57 |         Path(__file__).parent / "data" / "test_data*.npy",
58 |         load_func=np.load,
59 |         processes=2,
60 |     )
61 |     s = last(isum(stream)).astype(float)  # Cast to float for np.allclose
62 |     assert np.allclose(s, np.zeros_like(s))
63 | 
64 | 
65 | def test_pload_file_list():
66 |     """Test that pload works on iterable of filenames"""
67 |     files = [
68 |         Path(__file__).parent / "data" / "test_data1.npy",
69 |         Path(__file__).parent / "data" / "test_data2.npy",
70 |         Path(__file__).parent / "data" / "test_data3.npy",
71 |     ]
72 |     stream = pload(files, load_func=np.load)
73 |     s = last(isum(stream)).astype(float)  # Cast to float for np.allclose
74 |     assert np.allclose(s, np.zeros_like(s))
75 | 
76 |     files = [
77 |         Path(__file__).parent / "data" / "test_data1.npy",
78 |         Path(__file__).parent / "data" / "test_data2.npy",
79 |         Path(__file__).parent / "data" / "test_data3.npy",
80 |     ]
81 |     stream = pload(files, load_func=np.load, processes=2)
82 |     s = last(isum(stream)).astype(float)  # Cast to float for np.allclose
83 |     assert np.allclose(s, np.zeros_like(s))
84 | 


--------------------------------------------------------------------------------
/npstreams/tests/test_cuda.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from itertools import repeat
  4 | import numpy as np
  5 | import pytest
  6 | 
  7 | try:
  8 |     from npstreams.cuda import csum, cprod, caverage, cmean
  9 | 
 10 |     WITH_CUDA = True
 11 | except ImportError:
 12 |     WITH_CUDA = False
 13 | 
 14 | 
 15 | skip_if_no_cuda = pytest.mark.skipif(
 16 |     not WITH_CUDA, reason="PyCUDA is not installed/available"
 17 | )
 18 | 
 19 | 
 20 | @skip_if_no_cuda
 21 | def test_csum_zero_sum():
 22 |     stream = repeat(np.zeros((16, 16), dtype=float), times=5)
 23 |     s = csum(stream)
 24 |     assert np.allclose(s, np.zeros((16, 16)))
 25 | 
 26 | 
 27 | @skip_if_no_cuda
 28 | def test_csum_dtype():
 29 |     stream = repeat(np.zeros((16, 16), dtype=float), times=5)
 30 |     s = csum(stream, dtype=np.int16)
 31 |     assert np.allclose(s, np.zeros((16, 16)))
 32 |     assert s.dtype == np.int16
 33 | 
 34 | 
 35 | @skip_if_no_cuda
 36 | def test_csum_ignore_nans():
 37 |     """Test a sum of zeros with NaNs sprinkled"""
 38 |     source = [np.zeros((16,), dtype=float) for _ in range(10)]
 39 |     source.append(np.full((16,), fill_value=np.nan))
 40 |     summed = csum(source, ignore_nan=True)
 41 |     assert np.allclose(summed, np.zeros_like(summed))
 42 | 
 43 | 
 44 | @skip_if_no_cuda
 45 | def test_cprod_ones_prod():
 46 |     stream = repeat(np.ones((16, 16), dtype=float), times=5)
 47 |     s = cprod(stream)
 48 |     assert np.allclose(s, np.ones((16, 16)))
 49 | 
 50 | 
 51 | @skip_if_no_cuda
 52 | def test_cprod_ignore_nans():
 53 |     """Test that NaNs are ignored."""
 54 |     source = [np.ones((16,), dtype=float) for _ in range(10)]
 55 |     source.append(np.full_like(source[0], np.nan))
 56 |     product = cprod(source, ignore_nan=True)
 57 |     assert np.allclose(product, np.ones_like(product))
 58 | 
 59 | 
 60 | @skip_if_no_cuda
 61 | def test_cprod_dtype():
 62 |     """Test that dtype argument is working"""
 63 |     source = [np.ones((16,), dtype=float) for _ in range(10)]
 64 |     product = cprod(source, dtype=int)
 65 |     assert np.allclose(product, np.ones_like(product))
 66 |     assert product.dtype == int
 67 | 
 68 | 
 69 | @skip_if_no_cuda
 70 | def test_cavg_no_weights():
 71 |     stream = [np.random.random(size=(16, 16)) for _ in range(5)]
 72 |     from_caverage = caverage(stream)
 73 |     from_numpy = np.average(np.dstack(stream), axis=2)
 74 |     assert np.allclose(from_caverage, from_numpy)
 75 | 
 76 | 
 77 | @skip_if_no_cuda
 78 | def test_cavg_weighted_average():
 79 |     """Test results of weighted average against numpy.average"""
 80 |     stream = [np.random.random(size=(16, 16)) for _ in range(5)]
 81 | 
 82 |     weights = [np.random.random(size=stream[0].shape) for _ in stream]
 83 |     from_caverage = caverage(stream, weights=weights)
 84 |     from_numpy = np.average(np.dstack(stream), axis=2, weights=np.dstack(weights))
 85 |     assert np.allclose(from_caverage, from_numpy)
 86 | 
 87 | 
 88 | @skip_if_no_cuda
 89 | def test_cmean_of_ones():
 90 |     stream = repeat(np.ones((16, 16), dtype=float), times=5)
 91 |     s = cmean(stream)
 92 |     assert np.allclose(s, np.ones((16, 16)))
 93 | 
 94 | 
 95 | @skip_if_no_cuda
 96 | def test_cmean_random():
 97 |     """Test cmean against numpy.mean on random data"""
 98 |     stream = [np.random.random(size=(16, 16)) for _ in range(5)]
 99 |     from_cmean = cmean(stream)
100 |     from_numpy = np.mean(np.dstack(stream), axis=2)
101 |     assert np.allclose(from_cmean, from_numpy)
102 | 


--------------------------------------------------------------------------------
/npstreams/tests/test_iter_utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from itertools import repeat
  4 | from npstreams import last, chunked, linspace, multilinspace, cyclic, length_hint
  5 | import pytest
  6 | 
  7 | 
  8 | def test_last_trivial():
  9 |     """Test last() on iterable of identical values"""
 10 |     i = repeat(1, 10)
 11 |     assert last(i) == 1
 12 | 
 13 | 
 14 | def test_last_on_empty_iterable():
 15 |     """Test that last() raises RuntimeError for empty iterable"""
 16 |     with pytest.raises(RuntimeError):
 17 |         last(list())
 18 | 
 19 | 
 20 | def test_cyclic_numbers():
 21 |     """ """
 22 |     permutations = set(cyclic((1, 2, 3)))
 23 |     assert (1, 2, 3) in permutations
 24 |     assert (2, 3, 1) in permutations
 25 |     assert (3, 1, 2) in permutations
 26 |     assert len(permutations) == 3
 27 | 
 28 | 
 29 | def test_linspace_endpoint():
 30 |     """Test that the endpoint is included by linspace() when appropriate"""
 31 |     space = linspace(0, 1, num=10, endpoint=True)
 32 |     assert last(space) == 1
 33 | 
 34 |     space = linspace(0, 1, num=10, endpoint=False)
 35 |     assert round(abs(last(space) - 0.9), 7) == 0
 36 | 
 37 | 
 38 | def test_linspace_length():
 39 |     """Test that linspace() returns an iterable of the correct length"""
 40 |     space = list(linspace(0, 1, num=13, endpoint=True))
 41 |     assert len(space) == 13
 42 | 
 43 |     space = list(linspace(0, 1, num=13, endpoint=False))
 44 |     assert len(space) == 13
 45 | 
 46 | 
 47 | def test_multilinspace_endpoint():
 48 |     """Test that the endpoint is included by linspace() when appropriate"""
 49 |     space = multilinspace((0, 0), (1, 1), num=10, endpoint=True)
 50 |     assert last(space) == (1, 1)
 51 | 
 52 |     space = multilinspace((0, 0), (1, 1), num=10, endpoint=False)
 53 |     # Unfortunately there is no assertSequenceAlmostEqual
 54 |     assert last(space) == (0.8999999999999999, 0.8999999999999999)
 55 | 
 56 | 
 57 | def test_multilinspace_length():
 58 |     """Test that linspace() returns an iterable of the correct length"""
 59 |     space = list(multilinspace((0, 0), (1, 1), num=13, endpoint=True))
 60 |     assert len(space) == 13
 61 | 
 62 |     space = list(multilinspace((0, 0), (1, 1), num=13, endpoint=False))
 63 |     assert len(space) == 13
 64 | 
 65 | 
 66 | def test_chunked_larger_chunksize():
 67 |     """Test chunked() with a chunksize larger that the iterable it"""
 68 |     i = repeat(1, 10)
 69 |     chunks = chunked(i, chunksize=15)
 70 |     assert len(list(chunks)) == 1  # One single chunk is returned
 71 | 
 72 | 
 73 | def test_chunked_on_infinite_generator():
 74 |     """Test chunked() on an infinite iterable"""
 75 |     i = repeat(1)
 76 |     chunks = chunked(i, chunksize=15)
 77 |     for _ in range(10):
 78 |         assert len(next(chunks)) == 15
 79 | 
 80 | 
 81 | def test_chunked_chunked_nonint_chunksize():
 82 |     """Test that chunked raises a TypeError immediately if `chunksize` is not an integer"""
 83 |     with pytest.raises(TypeError):
 84 |         i = repeat(1)
 85 |         chunks = chunked(i, chunksize=15.0)
 86 | 
 87 | 
 88 | def test_length_hint_on_sized():
 89 |     """Test length_hint on a sized iterable"""
 90 |     l = [1, 2, 3, 4, 5]
 91 |     assert length_hint(l) == len(l)
 92 | 
 93 | 
 94 | def test_length_hint_on_unsized():
 95 |     """Test length_hint on an unsized iterable returns the default"""
 96 |     l = (0 for _ in range(10))
 97 |     assert length_hint(l, default=0) == 0
 98 | 
 99 | 
100 | def test_length_hint_on_method_if_implemented():
101 |     """Test length_hint returns the same as __length_hint__ if implemented"""
102 | 
103 |     class WithHint:
104 |         """Some dummy class with a length hint"""
105 | 
106 |         def __length_hint__(self):
107 |             return 1
108 | 
109 |     assert length_hint(WithHint(), default=0) == 1
110 | 


--------------------------------------------------------------------------------
/npstreams/flow.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Flow controls
  4 | -------------
  5 | """
  6 | from functools import partial
  7 | from glob import iglob
  8 | from pathlib import Path
  9 | 
 10 | from .array_stream import ArrayStream
 11 | from .parallel import pmap, pmap_unordered
 12 | 
 13 | 
 14 | def iload(files, load_func, **kwargs):
 15 |     """
 16 |     Create a stream of arrays from files, which are loaded lazily.
 17 | 
 18 |     In cases where the consumer function is much faster than data loading,
 19 |     consider using :func:`pload` instead.
 20 | 
 21 |     Parameters
 22 |     ----------
 23 |     pattern : iterable of str or str
 24 |         Either an iterable of filenames or a glob-like pattern str.
 25 |     load_func : callable, optional
 26 |         Function taking a filename as its first arguments
 27 |     kwargs
 28 |         Keyword arguments are passed to ``load_func``.
 29 | 
 30 |     Yields
 31 |     ------
 32 |     arr: `~numpy.ndarray`
 33 |         Loaded data.
 34 | 
 35 |     See Also
 36 |     --------
 37 |     pload : load files from parallel processes.
 38 | 
 39 |     Examples
 40 |     --------
 41 |     To load images using scikit-image ::
 42 | 
 43 |         from skimage.io import imread
 44 |         ims = iload('images_*.tif', imread)
 45 | 
 46 |     Keyword arguments are passed to the ``load_func``; for example,
 47 |     to specify the scikit-image plugin ``'tifffile'``::
 48 | 
 49 |         ims = iload('images_*.tif', imread, plugin = 'tifffile')
 50 | 
 51 |     In case the list of images is already known::
 52 | 
 53 |         ims = iload(['im1.tif', 'im2.tif', 'im3.tif'], imread)
 54 |     """
 55 |     # TODO: better handling of Paths
 56 |     if isinstance(files, Path):
 57 |         files = str(files)
 58 | 
 59 |     if isinstance(files, str):
 60 |         files = iglob(files)
 61 |     files = iter(files)
 62 | 
 63 |     yield from map(partial(load_func, **kwargs), files)
 64 | 
 65 | 
 66 | def pload(files, load_func, processes=1, **kwargs):
 67 |     """
 68 |     Create a stream of arrays from files, which are loaded lazily
 69 |     from multiple processes.
 70 | 
 71 |     This function should be preferred to :func:`iload` in cases where
 72 |     the consumer function is much faster than the data can be loaded.
 73 | 
 74 |     Parameters
 75 |     ----------
 76 |     pattern : iterable of str or str
 77 |         Either an iterable of filenames or a glob-like pattern str.
 78 |     load_func : callable, optional
 79 |         Function taking a filename as its first arguments
 80 |     processes : int or None, optional
 81 |         Number of processes to use. If `None`, maximal number of processes
 82 |         is used. Default is one.
 83 |     kwargs
 84 |         Keyword arguments are passed to ``load_func``.
 85 | 
 86 |     Yields
 87 |     ------
 88 |     arr: `~numpy.ndarray`
 89 |         Loaded data.
 90 | 
 91 |     See Also
 92 |     --------
 93 |     iload : load files lazily
 94 |     """
 95 |     if processes == 1:
 96 |         yield from iload(files, load_func, **kwargs)
 97 |         return
 98 | 
 99 |     # TODO: better handling of Paths
100 |     if isinstance(files, Path):
101 |         files = str(files)
102 | 
103 |     if isinstance(files, str):
104 |         files = iglob(files)
105 |     files = iter(files)
106 | 
107 |     yield from pmap_unordered(partial(load_func, **kwargs), files, processes=processes)
108 | 
109 | 
110 | # pmap does not support local functions
111 | def _pipe(funcs, array):
112 |     for func in funcs:
113 |         array = func(array)
114 |     return array
115 | 
116 | 
117 | def ipipe(*args, **kwargs):
118 |     """
119 |     Pipe arrays through a sequence of functions. For example:
120 | 
121 |     ``pipe(f, g, h, stream)`` is equivalent to ::
122 | 
123 |         for arr in stream:
124 |             yield f(g(h(arr)))
125 | 
126 |     Parameters
127 |     ----------
128 |     *funcs : callable
129 |         Callable that support Numpy arrays in their first argument. These
130 |         should *NOT* be generator functions.
131 |     arrays : iterable
132 |         Stream of arrays to be passed.
133 |     processes : int or None, optional, keyword-only
134 |         Number of processes to use. If `None`, maximal number of processes
135 |         is used. Default is one.
136 |     ntotal : int or None, optional, keyword-only
137 |         If the length of `arrays` is known, but passing `arrays` as a list
138 |         would take too much memory, the total number of arrays `ntotal` can be specified. This
139 |         allows for `pmap` to chunk better in case of ``processes > 1``.
140 | 
141 |     Yields
142 |     ------
143 |     piped : ndarray
144 |     """
145 |     arrays = ArrayStream(args[-1])
146 |     functions = tuple(reversed(args[:-1]))
147 |     yield from pmap(partial(_pipe, functions), arrays, **kwargs)
148 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # This file is execfile()d with the current directory set to its
  5 | # containing dir.
  6 | #
  7 | # Note that not all possible configuration values are present in this
  8 | # autogenerated file.
  9 | #
 10 | # All configuration values have a default; values that are commented out
 11 | # serve to show the default.
 12 | 
 13 | # If extensions (or modules to document with autodoc) are in another directory,
 14 | # add these directories to sys.path here. If the directory is relative to the
 15 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 16 | #
 17 | import os
 18 | import sys
 19 | 
 20 | currentpath = os.path.dirname(__file__)
 21 | sys.path.append(os.path.join(currentpath, ".."))
 22 | 
 23 | import npstreams
 24 | 
 25 | # -- General configuration ------------------------------------------------
 26 | 
 27 | # If your documentation needs a minimal Sphinx version, state it here.
 28 | #
 29 | # needs_sphinx = '1.5'
 30 | from datetime import datetime
 31 | import alabaster
 32 | 
 33 | year = datetime.now().year
 34 | 
 35 | # Add any Sphinx extension module names here, as strings. They can be
 36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 37 | # ones.
 38 | extensions = [
 39 |     "alabaster",
 40 |     "sphinx.ext.todo",
 41 |     "sphinx.ext.intersphinx",
 42 |     "sphinx.ext.autosummary",
 43 |     "sphinx.ext.autodoc",
 44 |     "sphinx.ext.napoleon",
 45 |     "sphinx.ext.mathjax",
 46 |     "sphinx.ext.doctest",
 47 | ]
 48 | 
 49 | intersphinx_mapping = {"numpy": ("http://docs.scipy.org/doc/numpy/", None)}
 50 | 
 51 | napoleon_google_docstring = False
 52 | autosummary_generate = True
 53 | 
 54 | # The suffix(es) of source filenames.
 55 | # You can specify multiple suffix as a list of string:
 56 | #
 57 | # source_suffix = ['.rst', '.md']
 58 | source_suffix = ".rst"
 59 | 
 60 | # The master toctree document.
 61 | master_doc = "index"
 62 | 
 63 | # Releases changelog extension
 64 | releases_release_uri = "https://github.com/LaurentRDC/npstreams/tree/%s"
 65 | releases_issue_uri = "https://github.com/LaurentRDC/npstreams/issues/%s"
 66 | 
 67 | # General information about the project.
 68 | project = "npstreams"
 69 | copyright = "%d Laurent P. René de Cotret" % year
 70 | author = "Laurent P. René de Cotret"
 71 | 
 72 | # The version info for the project you're documenting, acts as replacement for
 73 | # |version| and |release|, also used in various other places throughout the
 74 | # built documents.
 75 | #
 76 | # The short X.Y version.
 77 | version = npstreams.__version__
 78 | # The full version, including alpha/beta/rc tags.
 79 | release = version
 80 | 
 81 | # The language for content autogenerated by Sphinx. Refer to documentation
 82 | # for a list of supported languages.
 83 | #
 84 | # This is also used if you do content translation via gettext catalogs.
 85 | # Usually you set "language" from the command line for these cases.
 86 | language = None
 87 | 
 88 | # List of patterns, relative to source directory, that match files and
 89 | # directories to ignore when looking for source files.
 90 | # This patterns also effect to html_static_path and html_extra_path
 91 | exclude_patterns = []
 92 | exclude_trees = ["_build"]
 93 | 
 94 | # The name of the Pygments (syntax highlighting) style to use.
 95 | pygments_style = "sphinx"
 96 | 
 97 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 98 | todo_include_todos = True
 99 | 
100 | 
101 | # -- Options for HTML output ----------------------------------------------
102 | 
103 | # The theme to use for HTML and HTML Help pages.  See the documentation for
104 | # a list of builtin themes.
105 | #
106 | html_theme = "sphinx_rtd_theme"
107 | html_theme_path = ["_themes"]
108 | html_sidebars = {
109 |     "**": [
110 |         "about.html",
111 |         "navigation.html",
112 |         "searchbox.html",
113 |         "localtoc.html",
114 |         "sourcelink.html",
115 |     ]
116 | }
117 | # html_show_sourcelink = True
118 | 
119 | # Everything intersphinx's to Python.
120 | intersphinx_mapping = {"python": ("https://docs.python.org", None)}
121 | 
122 | # Autodoc settings
123 | autodoc_default_flags = ["members", "special-members"]
124 | autoclass_content = "both"
125 | 
126 | 
127 | def autodoc_skip_member(app, what, name, obj, skip, options):
128 |     exclusions = {"__weakref__", "__doc__", "__module__", "__dict__"}
129 |     exclude = name in exclusions
130 |     return skip or exclude
131 | 
132 | 
133 | def setup(app):
134 |     app.connect("autodoc-skip-member", autodoc_skip_member)
135 | 
136 | 
137 | doctest_global_setup = """
138 | import npstreams as ns
139 | """
140 | 
141 | 
142 | # Add any paths that contain custom static files (such as style sheets) here,
143 | # relative to this directory. They are copied after the builtin static files,
144 | # so a file named "default.css" will overwrite the builtin "default.css".
145 | html_static_path = []
146 | 
147 | # Suppress the warning about a non-local URI for status shields.
148 | suppress_warnings = ["image.nonlocal_uri"]
149 | 
150 | # Enable releases 'unstable prehistory' mode.
151 | releases_unstable_prehistory = True
152 | 


--------------------------------------------------------------------------------
/npstreams/linalg.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Numerics Functions
  4 | ------------------
  5 | """
  6 | from functools import partial
  7 | 
  8 | import numpy as np
  9 | 
 10 | from .array_stream import array_stream
 11 | 
 12 | 
 13 | @array_stream
 14 | def _ireduce_linalg(arrays, func, **kwargs):
 15 |     """
 16 |     Yield the cumulative reduction of a linag algebra function
 17 |     """
 18 |     arrays = iter(arrays)
 19 |     first = next(arrays)
 20 |     second = next(arrays)
 21 | 
 22 |     func = partial(func, **kwargs)
 23 | 
 24 |     accumulator = func(first, second)
 25 |     yield accumulator
 26 | 
 27 |     for array in arrays:
 28 |         func(accumulator, array, out=accumulator)
 29 |         yield accumulator
 30 | 
 31 | 
 32 | def idot(arrays):
 33 |     """
 34 |     Yields the cumulative array inner product (dot product) of arrays.
 35 | 
 36 |     Parameters
 37 |     ----------
 38 |     arrays : iterable
 39 |         Arrays to be reduced.
 40 | 
 41 |     Yields
 42 |     ------
 43 |     online_dot : ndarray
 44 | 
 45 |     See Also
 46 |     --------
 47 |     numpy.linalg.multi_dot : Compute the dot product of two or more arrays in a single function call,
 48 |                              while automatically selecting the fastest evaluation order.
 49 |     """
 50 |     yield from _ireduce_linalg(arrays=arrays, func=np.dot)
 51 | 
 52 | 
 53 | def itensordot(arrays, axes=2):
 54 |     """
 55 |     Yields the cumulative array inner product (dot product) of arrays.
 56 | 
 57 |     Parameters
 58 |     ----------
 59 |     arrays : iterable
 60 |         Arrays to be reduced.
 61 |     axes : int or (2,) array_like
 62 |         * integer_like: If an int N, sum over the last N axes of a
 63 |           and the first N axes of b in order. The sizes of the corresponding axes must match.
 64 |         * (2,) array_like: Or, a list of axes to be summed over, first sequence applying to a,
 65 |           second to b. Both elements array_like must be of the same length.
 66 | 
 67 |     Yields
 68 |     ------
 69 |     online_tensordot : ndarray
 70 | 
 71 |     See Also
 72 |     --------
 73 |     numpy.tensordot : Compute the tensordot on two tensors.
 74 |     """
 75 |     yield from _ireduce_linalg(arrays=arrays, func=np.tensordot, axes=axes)
 76 | 
 77 | 
 78 | def iinner(arrays):
 79 |     """
 80 |     Cumulative inner product of all arrays in a stream.
 81 | 
 82 |     Parameters
 83 |     ----------
 84 |     arrays : iterable
 85 |         Arrays to be reduced.
 86 | 
 87 |     Yields
 88 |     ------
 89 |     online_inner : ndarray or scalar
 90 |     """
 91 |     yield from _ireduce_linalg(arrays=arrays, func=np.inner)
 92 | 
 93 | 
 94 | def ieinsum(arrays, subscripts, **kwargs):
 95 |     """
 96 |     Evaluates the Einstein summation convention on the operands.
 97 | 
 98 |     Using the Einstein summation convention, many common multi-dimensional
 99 |     array operations can be represented in a simple fashion.
100 | 
101 |     Parameters
102 |     ----------
103 |     arrays : iterable
104 |         Arrays to be reduced.
105 |     subscripts : str
106 |         Specifies the subscripts for summation.
107 |     dtype : numpy.dtype or None, optional
108 |         The type of the yielded array and of the accumulator in which the elements
109 |         are combined. The dtype of a is used by default unless a has an integer dtype
110 |         of less precision than the default platform integer. In that case, if a is
111 |         signed then the platform integer is used while if a is unsigned then an
112 |         unsigned integer of the same precision as the platform integer is used.
113 |     order : {'C', 'F', 'A', 'K'}, optional
114 |         Controls the memory layout of the output. 'C' means it should
115 |         be C contiguous. 'F' means it should be Fortran contiguous,
116 |         'A' means it should be 'F' if the inputs are all 'F', 'C' otherwise.
117 |         'K' means it should be as close to the layout as the inputs as
118 |         is possible, including arbitrarily permuted axes.
119 |         Default is 'K'.
120 |     casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
121 |         Controls what kind of data casting may occur.  Setting this to
122 |         'unsafe' is not recommended, as it can adversely affect accumulations.
123 | 
124 |           * 'no' means the data types should not be cast at all.
125 |           * 'equiv' means only byte-order changes are allowed.
126 |           * 'safe' means only casts which can preserve values are allowed.
127 |           * 'same_kind' means only safe casts or casts within a kind,
128 |             like float64 to float32, are allowed.
129 |           * 'unsafe' means any data conversions may be done.
130 | 
131 |         Default is 'safe'.
132 |     optimize : {False, True, 'greedy', 'optimal'}, optional
133 |         Controls if intermediate optimization should occur. No optimization
134 |         will occur if False and True will default to the 'greedy' algorithm.
135 |         Also accepts an explicit contraction list from the ``np.einsum_path``
136 |         function. See ``np.einsum_path`` for more details. Default is False.
137 | 
138 |     Yields
139 |     ------
140 |     online_einsum : ndarray
141 |         Cumulative Einstein summation
142 |     """
143 |     yield from _ireduce_linalg(
144 |         arrays=arrays, func=partial(np.einsum, subscripts), **kwargs
145 |     )
146 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # npstreams
  2 | 
  3 | [![Documentation Build Status](https://readthedocs.org/projects/npstreams/badge/?version=master)](http://npstreams.readthedocs.io) [![PyPI Version](https://img.shields.io/pypi/v/npstreams.svg)](https://pypi.python.org/pypi/npstreams) [![Conda-forge Version](https://img.shields.io/conda/vn/conda-forge/npstreams.svg)](https://anaconda.org/conda-forge/npstreams) [![DOI badge](https://img.shields.io/badge/DOI-10.1186%2Fs40679--018--0060--y-blue)](https://doi.org/10.1186/s40679-018-0060-y)
  4 | 
  5 | npstreams is an open-source Python package for streaming NumPy array
  6 | operations. The goal is to provide tested routines that operate on
  7 | streams (or generators) of arrays instead of dense arrays.
  8 | 
  9 | Streaming reduction operations (sums, averages, etc.) can be implemented
 10 | in constant memory, which in turns allows for easy parallelization.
 11 | 
 12 | This approach has been a huge boon when working with lots of images; the
 13 | images are read one-by-one from disk and combined/processed in a
 14 | streaming fashion.
 15 | 
 16 | This package is developed in conjunction with other software projects in
 17 | the [Siwick research group](http://www.physics.mcgill.ca/siwicklab/).
 18 | 
 19 | ## Motivating Example
 20 | 
 21 | Consider the following snippet to combine 50 images from an iterable
 22 | `source`:
 23 | 
 24 | ```python
 25 | import numpy as np
 26 | 
 27 | images = np.empty( shape = (2048, 2048, 50) )
 28 | for index, im in enumerate(source):
 29 |     images[:,:,index] = im
 30 | 
 31 | avg = np.average(images, axis = 2)
 32 | ```
 33 | 
 34 | If the `source` iterable provided 1000 images, the above routine would
 35 | not work on most machines. Moreover, what if we want to transform the
 36 | images one by one before averaging them? What about looking at the
 37 | average while it is being computed? Let\'s look at an example:
 38 | 
 39 | ```python
 40 | import numpy as np
 41 | from npstreams import iaverage
 42 | from scipy.misc import imread
 43 | 
 44 | stream = map(imread, list_of_filenames)
 45 | averaged = iaverage(stream)
 46 | ```
 47 | 
 48 | At this point, the generators `map` and `iaverage` are \'wired\' but
 49 | will not compute anything until it is requested. We can look at the
 50 | average evolve:
 51 | 
 52 | ```python
 53 | import matplotlib.pyplot as plt
 54 | for avg in average:
 55 |     plt.imshow(avg); plt.show()
 56 | ```
 57 | 
 58 | We can also use `last` to get at the final average:
 59 | 
 60 | ```python
 61 | from npstreams import last
 62 | 
 63 | total = last(averaged) # average of the entire stream
 64 | ```
 65 | 
 66 | ## Streaming Functions
 67 | 
 68 | npstreams comes with some streaming functions built-in. Some examples:
 69 | 
 70 | -   Numerics : `isum`, `iprod`, `isub`, etc.
 71 | -   Statistics : `iaverage` (weighted mean), `ivar` (single-pass
 72 |     variance), etc.
 73 | 
 74 | More importantly, npstreams gives you all the tools required to build
 75 | your own streaming function. All routines are documented in the [API
 76 | Reference on readthedocs.io](http://npstreams.readthedocs.io).
 77 | 
 78 | ## Benchmarking
 79 | 
 80 | npstreams provides a function for benchmarking common use cases.
 81 | 
 82 | To run the benchmark with default parameters, from the interpreter:
 83 | 
 84 | ```python
 85 | from npstreams import benchmark
 86 | benchmark()
 87 | ```
 88 | 
 89 | From a command-line terminal:
 90 | 
 91 | ```bash
 92 | python -c 'import npstreams; npstreams.benchmark()'
 93 | ```
 94 | 
 95 | The results will be printed to the screen.
 96 | 
 97 | ## Future Work
 98 | 
 99 | Some of the features I want to implement in this package in the near
100 | future:
101 | 
102 | -   Optimize the CUDA-enabled routines
103 | -   More functions : more streaming functions borrowed from NumPy and
104 |     SciPy.
105 | 
106 | ## API Reference
107 | 
108 | The [API Reference on readthedocs.io](http://npstreams.readthedocs.io)
109 | provides API-level documentation, as well as tutorials.
110 | 
111 | ## Installation
112 | 
113 | The only requirement is NumPy. To have access to CUDA-enabled routines,
114 | PyCUDA must also be installed. npstreams is available on PyPI; it can be
115 | installed with [pip](https://pip.pypa.io).:
116 | 
117 | ```bash
118 | python -m pip install npstreams
119 | ```
120 | 
121 | npstreams can also be installed with the conda package manager, from the
122 | conda-forge channel:
123 | 
124 | ```bash
125 | conda config --add channels conda-forge
126 | conda install npstreams
127 | ```
128 | 
129 | To install the latest development version from
130 | [Github](https://github.com/LaurentRDC/npstreams):
131 | 
132 | ```bash
133 | python -m pip install git+git://github.com/LaurentRDC/npstreams.git
134 | ```
135 | 
136 | Tests can be run using the `pytest` package.
137 | 
138 | ## Citations
139 | 
140 | If you find this software useful, please consider citing the following
141 | publication:
142 | 
143 | > L. P. René de Cotret, M. R. Otto, M. J. Stern. and B. J. Siwick, *An open-source software ecosystem for the interactive exploration of ultrafast electron scattering data*, Advanced Structural and Chemical Imaging 4:11 (2018) [DOI: 10.1186/s40679-018-0060-y.](https://ascimaging.springeropen.com/articles/10.1186/s40679-018-0060-y)
144 | 
145 | 
146 | ## Support / Report Issues
147 | 
148 | All support requests and issue reports should be [filed on Github as an
149 | issue](https://github.com/LaurentRDC/npstreams/issues).
150 | 
151 | ## License
152 | 
153 | npstreams is made available under the BSD License, same as NumPy. For
154 | more details, see
155 | [LICENSE.txt](https://github.com/LaurentRDC/npstreams/blob/master/LICENSE.txt).
156 | 


--------------------------------------------------------------------------------
/docs/making_your_own.rst:
--------------------------------------------------------------------------------
  1 | .. include:: references.txt
  2 | 
  3 | .. _making_your_own:
  4 | 
  5 | ********************************************
  6 | Making your own Streaming Reduction Function
  7 | ********************************************
  8 | 
  9 | .. currentmodule:: npstreams
 10 | 
 11 | ============================================
 12 | The :func:`ireduce_ufunc` generator function
 13 | ============================================
 14 | 
 15 | You can assemble your own streaming reduction function from a **binary** NumPy ufunc
 16 | using the following generator function:
 17 | 
 18 |     .. autofunction:: ireduce_ufunc
 19 | 
 20 | The non-generator version is also available:
 21 | 
 22 |     .. autofunction:: reduce_ufunc
 23 | 
 24 | Note that while all NumPy ufuncs have a :meth:`reduce` method, not all of them are useful.
 25 | This is why :func:`ireduce_ufunc` and :func:`reduce_ufunc` will only work with **binary** ufuncs, 
 26 | most of which are listed below. For performance reasons, we further restrict the use of 
 27 | :func:`ireduce_ufunc` and :func:`reduce_ufunc` to ufuncs that have the same input types
 28 | as output types. Therefore, for example, :func:`numpy.greater` cannot be made to work with
 29 | :func:`ireduce_ufunc` and :func:`reduce_ufunc`.
 30 | 
 31 | NaNs handling
 32 | -------------
 33 | 
 34 | NumPy ufuncs can have an identity value, that is, a value such that ``ufunc(x1, identity)`` is always ``x1``. For such ufuncs,
 35 | :func:`ireduce_ufunc` and :func:`reduce_ufunc` can replace NaNs in the stream with the ufunc's identity value, if ``ignore_nan = True``.
 36 | Note that not all ufuncs have an identity value; for example, how would you define the identity value of ``numpy.maximum``? There is no answer.
 37 | 
 38 | .. _numpy_binary_ufuncs:
 39 | 
 40 | ===================
 41 | NumPy Binary Ufuncs
 42 | ===================
 43 | 
 44 | :func:`ireduce_ufunc` is tested to work on the following binary ufuncs, which are available in `NumPy`_.
 45 | 
 46 | 
 47 | Arithmetics
 48 | -----------
 49 | 
 50 | .. autosummary::
 51 |     :nosignatures:
 52 | 
 53 |     numpy.add
 54 |     numpy.subtract
 55 |     numpy.multiply
 56 |     numpy.divide
 57 |     numpy.logaddexp
 58 |     numpy.logaddexp2
 59 |     numpy.true_divide
 60 |     numpy.floor_divide
 61 |     numpy.power
 62 |     numpy.remainder
 63 |     numpy.mod
 64 |     numpy.fmod
 65 | 
 66 | Trigonometric functions
 67 | -----------------------
 68 | 
 69 | .. autosummary::
 70 |     :nosignatures:
 71 | 
 72 |     numpy.arctan2
 73 |     numpy.hypot
 74 | 
 75 | Bit-twiddling functions
 76 | -----------------------
 77 | 
 78 | .. autosummary::
 79 |     :nosignatures:
 80 | 
 81 |     numpy.bitwise_and
 82 |     numpy.bitwise_or
 83 |     numpy.bitwise_xor
 84 |     numpy.left_shift
 85 |     numpy.right_shift
 86 | 
 87 | Comparison functions
 88 | --------------------
 89 | 
 90 | .. autosummary::
 91 |     :nosignatures:
 92 | 
 93 |     numpy.maximum
 94 |     numpy.fmax
 95 |     numpy.minimum
 96 |     numpy.fmin
 97 | 
 98 | Floating functions
 99 | ------------------
100 | 
101 | .. autosummary::
102 |     :nosignatures:
103 | 
104 |     numpy.copysign
105 |     numpy.nextafter
106 |     numpy.ldexp
107 | 
108 | ==========================
109 | Example: Streaming Maximum
110 | ==========================
111 | 
112 | Let's create a streaming maximum function for a stream. First, we have to choose 
113 | how to handle NaNs; since ``numpy.maximum`` does not have an identity value, we must find
114 | another way. We can proceed as follows:
115 | 
116 | * If we want to propagate NaNs, we should use :func:`numpy.maximum`
117 | * If we want to ignore NaNs, we should use :func:`numpy.fmax`
118 | 
119 | Both of those functions are binary ufuncs, so we can use :func:`ireduce_ufunc`. Note that any function based
120 | on :func:`ireduce_ufunc` or :func:`reduce_ufunc` will automatically work on streams of numbers thanks to the
121 | :func:`array_stream` decorator.
122 | 
123 | Putting it all together::
124 | 
125 |     from npstreams import ireduce_ufunc
126 |     from numpy import maximum, fmax
127 | 
128 |     def imax(arrays, axis = -1, ignore_nan = False, **kwargs):
129 |         """
130 |         Streaming cumulative maximum along an axis.
131 | 
132 |         Parameters
133 |         ----------
134 |         arrays : iterable
135 |             Stream of arrays to be compared.
136 |         axis : int or None, optional
137 |             Axis along which to compute the maximum. If None, 
138 |             arrays are flattened before reduction.
139 |         ignore_nan : bool, optional
140 |             If True, NaNs are ignored. Default is False.
141 |         
142 |         Yields
143 |         ------
144 |         online_max : ndarray
145 |         """
146 |         ufunc = fmax if ignore_nan else maximum
147 |         yield from ireduce_ufunc(arrays, ufunc, axis = axis, **kwargs)
148 | 
149 | This will provide us with a streaming function, meaning that we can look at the progress
150 | as it is being computed. We can also create a function that returns the max of the stream
151 | like :meth:`numpy.ndarray.max()` using the :func:`reduce_ufunc` function::
152 | 
153 |     from npstreams import reduce_ufunc
154 | 
155 |     def smax(*args, **kwargs):  # s for stream
156 |         """
157 |         Maximum of a stream along an axis.
158 | 
159 |         Parameters
160 |         ----------
161 |         arrays : iterable
162 |             Stream of arrays to be compared.
163 |         axis : int or None, optional
164 |             Axis along which to compute the maximum. If None, 
165 |             arrays are flattened before reduction.
166 |         ignore_nan : bool, optional
167 |             If True, NaNs are ignored. Default is False.
168 |         
169 |         Yields
170 |         ------
171 |         max : ndarray
172 |         """
173 |         return reduce_ufunc(*args, **kwargs)


--------------------------------------------------------------------------------
/npstreams/parallel.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Parallelization utilities 
  4 | -------------------------
  5 | """
  6 | from collections.abc import Sized
  7 | from functools import partial, reduce
  8 | from multiprocessing import Pool
  9 | 
 10 | from .iter_utils import chunked
 11 | 
 12 | 
 13 | def preduce(func, iterable, args=None, kwargs=None, processes=1, ntotal=None):
 14 |     """
 15 |     Parallel application of the reduce function, with keyword arguments.
 16 | 
 17 |     Parameters
 18 |     ----------
 19 |     func : callable
 20 |         Function to be applied to every element of `iterable`.
 21 |     iterable : iterable
 22 |         Iterable of items to be reduced. Generators are consumed.
 23 |     args : tuple or None, optional
 24 |         Positional arguments of `function`.
 25 |     kwargs : dictionary or None, optional
 26 |         Keyword arguments of `function`.
 27 |     processes : int or None, optional
 28 |         Number of processes to use. If `None`, maximal number of processes
 29 |         is used. Default is one.
 30 |     ntotal : int or None, optional
 31 |         If the length of `iterable` is known, but passing `iterable` as a list
 32 |         would take too much memory, the total length `ntotal` can be specified. This
 33 |         allows for `preduce` to chunk better.
 34 | 
 35 |     Returns
 36 |     -------
 37 |     reduced : object
 38 | 
 39 |     Notes
 40 |     -----
 41 |     If `processes` is 1, `preduce` is equivalent to functools.reduce with the
 42 |     added benefit of using `args` and `kwargs`, but `initializer` is not supported.
 43 |     """
 44 |     if kwargs is None:
 45 |         kwargs = dict()
 46 | 
 47 |     if args is None:
 48 |         args = tuple()
 49 | 
 50 |     func = partial(func, *args, **kwargs)
 51 | 
 52 |     if processes == 1:
 53 |         return reduce(func, iterable)
 54 | 
 55 |     with Pool(processes) as pool:
 56 |         chunksize = 1
 57 |         if isinstance(iterable, Sized):
 58 |             chunksize = max(1, int(len(iterable) / pool._processes))
 59 |         elif ntotal is not None:
 60 |             chunksize = max(1, int(ntotal / pool._processes))
 61 | 
 62 |         # Some reductions are order-sensitive
 63 |         res = pool.imap(partial(reduce, func), tuple(chunked(iterable, chunksize)))
 64 |         return reduce(func, res)
 65 | 
 66 | 
 67 | def pmap(func, iterable, args=None, kwargs=None, processes=1, ntotal=None):
 68 |     """
 69 |     Parallel application of a function with keyword arguments.
 70 | 
 71 |     Parameters
 72 |     ----------
 73 |     func : callable
 74 |         Function to be applied to every element of `iterable`.
 75 |     iterable : iterable
 76 |         Iterable of items to be mapped.
 77 |     args : tuple or None, optional
 78 |         Positional arguments of `function`.
 79 |     kwargs : dictionary or None, optional
 80 |         Keyword arguments of `function`.
 81 |     processes : int or None, optional
 82 |         Number of processes to use. If `None`, maximal number of processes
 83 |         is used. Default is one.
 84 |     ntotal : int or None, optional
 85 |         If the length of `iterable` is known, but passing `iterable` as a list
 86 |         would take too much memory, the total length `ntotal` can be specified. This
 87 |         allows for `pmap` to chunk better.
 88 | 
 89 |     Yields
 90 |     ------
 91 |     Mapped values.
 92 | 
 93 |     See Also
 94 |     --------
 95 |     pmap_unordered : parallel map that does not preserve order
 96 | 
 97 |     Notes
 98 |     -----
 99 |     If `processes` is 1, `pmap` reduces to `map`, with the added benefit of
100 |     of using `kwargs`
101 |     """
102 |     if kwargs is None:
103 |         kwargs = dict()
104 | 
105 |     if args is None:
106 |         args = tuple()
107 | 
108 |     func = partial(func, *args, **kwargs)
109 | 
110 |     if processes == 1:
111 |         yield from map(func, iterable)
112 |         return
113 | 
114 |     with Pool(processes) as pool:
115 |         chunksize = 1
116 |         if isinstance(iterable, Sized):
117 |             chunksize = max(1, int(len(iterable) / pool._processes))
118 |         elif ntotal is not None:
119 |             chunksize = max(1, int(ntotal / pool._processes))
120 | 
121 |         yield from pool.imap(func=func, iterable=iterable, chunksize=chunksize)
122 | 
123 | 
124 | def pmap_unordered(func, iterable, args=None, kwargs=None, processes=1, ntotal=None):
125 |     """
126 |     Parallel application of a function with keyword arguments in no particular order.
127 |     This can reduce memory usage because results are not accumulated so that the order is preserved.
128 | 
129 |     Parameters
130 |     ----------
131 |     func : callable
132 |         Function to be applied to every element of `iterable`.
133 |     iterable : iterable
134 |         Iterable of items to be mapped.
135 |     args : tuple or None, optional
136 |         Positional arguments of `function`.
137 |     kwargs : dictionary or None, optional
138 |         Keyword arguments of `function`.
139 |     processes : int or None, optional
140 |         Number of processes to use. If `None`, maximal number of processes
141 |         is used. Default is one.
142 |     ntotal : int or None, optional
143 |         If the length of `iterable` is known, but passing `iterable` as a list
144 |         would take too much memory, the total length `ntotal` can be specified. This
145 |         allows for `pmap` to chunk better.
146 | 
147 |     Yields
148 |     ------
149 |     Mapped values.
150 | 
151 |     See Also
152 |     --------
153 |     pmap : parallel map that preserves order
154 | 
155 |     Notes
156 |     -----
157 |     If `processes` is 1, `pmap_unordered` reduces to `map`, with the added benefit of
158 |     of using `kwargs`
159 |     """
160 |     if kwargs is None:
161 |         kwargs = dict()
162 | 
163 |     if args is None:
164 |         args = tuple()
165 | 
166 |     func = partial(func, *args, **kwargs)
167 | 
168 |     if processes == 1:
169 |         yield from map(func, iterable)
170 |         return
171 | 
172 |     with Pool(processes) as pool:
173 |         chunksize = 1
174 |         if isinstance(iterable, Sized):
175 |             chunksize = max(1, int(len(iterable) / pool._processes))
176 |         elif ntotal is not None:
177 |             chunksize = max(1, int(ntotal / pool._processes))
178 | 
179 |         yield from pool.imap_unordered(
180 |             func=func, iterable=iterable, chunksize=chunksize
181 |         )
182 | 


--------------------------------------------------------------------------------
/npstreams/tests/test_reduce.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | 
  5 | from npstreams import ireduce_ufunc, preduce_ufunc, last, nan_to_num, reduce_ufunc
  6 | import pytest
  7 | 
  8 | # Only testing binary ufuncs that support floats
  9 | # i.e. leaving bitwise_* and logical_* behind
 10 | # Also, numpy.ldexp takes in ints and floats separately, so
 11 | # leave it behind
 12 | UFUNCS = (
 13 |     np.add,
 14 |     np.subtract,
 15 |     np.multiply,
 16 |     np.divide,
 17 |     np.logaddexp,
 18 |     np.logaddexp2,
 19 |     np.true_divide,
 20 |     np.floor_divide,
 21 |     np.power,
 22 |     np.remainder,
 23 |     np.mod,
 24 |     np.fmod,
 25 |     np.arctan2,
 26 |     np.hypot,
 27 |     np.maximum,
 28 |     np.fmax,
 29 |     np.minimum,
 30 |     np.fmin,
 31 |     np.copysign,
 32 |     np.nextafter,
 33 | )
 34 | 
 35 | UFUNCS_WITH_IDENTITY = list(filter(lambda u: u.identity is not None, UFUNCS))
 36 | 
 37 | 
 38 | def test_ireduce_ufunc_no_side_effects():
 39 |     """Test that no arrays in the stream are modified"""
 40 |     source = [np.random.random((16, 5, 8)) for _ in range(10)]
 41 |     stack = np.stack(source, axis=-1)
 42 |     for arr in source:
 43 |         arr.setflags(write=False)
 44 |     out = last(ireduce_ufunc(source, np.add))
 45 | 
 46 | 
 47 | def test_ireduce_ufunc_single_array():
 48 |     """Test ireduce_ufunc on a single array, not a sequence"""
 49 |     source = [np.random.random((16, 5, 8)) for _ in range(10)]
 50 |     stack = np.stack(source, axis=-1)
 51 |     source = np.ones((16, 16), dtype=int)
 52 |     out = last(ireduce_ufunc(source, np.add, axis=-1))
 53 |     assert np.allclose(source, out)
 54 | 
 55 | 
 56 | def test_ireduce_ufunc_out_parameter():
 57 |     """Test that the kwargs ``out`` is correctly passed to reduction function"""
 58 |     source = [np.random.random((16, 5, 8)) for _ in range(10)]
 59 |     stack = np.stack(source, axis=-1)
 60 |     not_out = last(ireduce_ufunc(source, np.add, axis=-1))
 61 |     out = np.empty_like(source[0])
 62 |     last(ireduce_ufunc(source, ufunc=np.add, out=out))
 63 | 
 64 |     assert np.allclose(not_out, out)
 65 | 
 66 |     not_out = last(ireduce_ufunc(source, np.add, axis=2))
 67 |     out = np.empty_like(source[0])
 68 |     from_out = last(ireduce_ufunc(source, ufunc=np.add, out=out, axis=2))
 69 | 
 70 |     assert np.allclose(not_out, from_out)
 71 | 
 72 | 
 73 | def test_ireduce_ufunc_ignore_nan_no_identity():
 74 |     """Test ireduce_ufunc on an ufunc with no identity raises
 75 |     an error for ignore_nan = True"""
 76 |     source = [np.ones((16, 16), dtype=int) for _ in range(5)]
 77 |     with pytest.raises(ValueError):
 78 |         ireduce_ufunc(source, np.maximum, axis=-1, ignore_nan=True)
 79 | 
 80 | 
 81 | def test_ireduce_ufunc_non_ufunc():
 82 |     """Test that ireduce_ufunc raises TypeError when a non-ufunc is passed"""
 83 |     with pytest.raises(TypeError):
 84 |         ireduce_ufunc(range(10), ufunc=lambda x: x)
 85 | 
 86 | 
 87 | def test_ireduce_ufunc_non_binary_ufunc():
 88 |     """Test that ireduce_ufunc raises ValueError if non-binary ufunc is used"""
 89 |     with pytest.raises(ValueError):
 90 |         ireduce_ufunc(range(10), ufunc=np.absolute)
 91 | 
 92 | 
 93 | @pytest.mark.parametrize("axis", (0, 1, 2, 3, None))
 94 | def test_ireduce_ufunc_output_shape(axis):
 95 |     """Test output shape"""
 96 |     source = [np.random.random((16, 5, 8)) for _ in range(10)]
 97 |     stack = np.stack(source, axis=-1)
 98 | 
 99 |     from_numpy = np.add.reduce(stack, axis=axis)
100 |     out = last(ireduce_ufunc(source, np.add, axis=axis))
101 |     assert from_numpy.shape == out.shape
102 |     assert np.allclose(out, from_numpy)
103 | 
104 | 
105 | @pytest.mark.parametrize("axis", (0, 1, 2, 3, None))
106 | def test_ireduce_ufunc_length(axis):
107 |     """Test that the number of elements yielded by ireduce_ufunc is correct"""
108 | 
109 |     source = (np.zeros((16, 5, 8)) for _ in range(10))
110 |     out = list(ireduce_ufunc(source, np.add, axis=axis))
111 |     assert 10 == len(out)
112 | 
113 | 
114 | @pytest.mark.parametrize("axis", (0, 1, 2, 3, None))
115 | def test_ireduce_ufunc_ignore_nan(axis):
116 |     """Test that ignore_nan is working"""
117 |     source = [np.random.random((16, 5, 8)) for _ in range(10)]
118 |     stack = np.stack(source, axis=-1)
119 | 
120 |     out = last(ireduce_ufunc(source, np.add, axis=axis, ignore_nan=True))
121 |     assert not np.any(np.isnan(out))
122 | 
123 | 
124 | def test_preduce_ufunc_trivial():
125 |     """Test preduce_ufunc for a sum of zeroes over two processes"""
126 |     stream = [np.zeros((8, 8)) for _ in range(10)]
127 |     s = preduce_ufunc(stream, ufunc=np.add, processes=2, ntotal=10)
128 |     assert np.allclose(s, np.zeros_like(s))
129 | 
130 | 
131 | def test_preduce_ufunc_correctess():
132 |     """Test preduce_ufunc is equivalent to reduce_ufunc for random sums"""
133 |     stream = [np.random.random((8, 8)) for _ in range(20)]
134 |     s = preduce_ufunc(stream, ufunc=np.add, processes=3, ntotal=10)
135 |     assert np.allclose(s, reduce_ufunc(stream, np.add))
136 | 
137 | 
138 | # Dynamics generation of tests on binary ufuncs
139 | @pytest.mark.parametrize("ufunc", UFUNCS)
140 | @pytest.mark.parametrize("axis", (0, 1, 2, -1))
141 | def test_binary_ufunc(ufunc, axis):
142 |     """Generate a test to ensure that ireduce_ufunc(..., ufunc, ...)
143 |     works as intendent."""
144 |     source = [np.random.random((16, 5, 8)) for _ in range(10)]
145 |     stack = np.stack(source, axis=-1)
146 | 
147 |     def sufunc(arrays, axis=-1):  # s for stream
148 |         return last(ireduce_ufunc(arrays, ufunc, axis=axis))
149 | 
150 |     from_numpy = ufunc.reduce(stack, axis=axis)
151 |     from_sufunc = sufunc(source, axis=axis)
152 |     assert from_sufunc.shape == from_numpy.shape
153 |     assert np.allclose(from_numpy, from_sufunc)
154 | 
155 | 
156 | @pytest.mark.parametrize("ufunc", UFUNCS_WITH_IDENTITY)
157 | def test_binary_ufunc_ignore_nan(ufunc):
158 |     """Generate a test to ensure that ireduce_ufunc(..., ufunc, ...)
159 |     works as intendent with NaNs in stream."""
160 | 
161 |     source = [np.random.random((16, 5, 8)) for _ in range(10)]
162 |     source[0][0, 0, 0] = np.nan
163 |     stack = nan_to_num(np.stack(source, axis=-1), fill_value=ufunc.identity)
164 | 
165 |     def sufunc(arrays, ignore_nan=False):  # s for stream
166 |         return last(ireduce_ufunc(arrays, ufunc, axis=1, ignore_nan=True))
167 | 
168 |     from_numpy = ufunc.reduce(stack, axis=1)
169 |     from_sufunc = sufunc(source)
170 |     assert from_numpy.shape == from_sufunc.shape
171 |     assert np.allclose(from_numpy, from_sufunc)
172 | 


--------------------------------------------------------------------------------
/npstreams/iter_utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Iterator/Generator utilities 
  4 | ----------------------------
  5 | """
  6 | from collections import deque
  7 | from functools import wraps
  8 | from itertools import chain, islice, tee
  9 | 
 10 | 
 11 | def primed(gen):
 12 |     """
 13 |     Decorator that primes a generator function, i.e. runs the function
 14 |     until the first ``yield`` statement. Useful in cases where there
 15 |     are preliminary checks when creating the generator.
 16 |     """
 17 | 
 18 |     @wraps(gen)
 19 |     def primed_gen(*args, **kwargs):
 20 |         generator = gen(*args, **kwargs)
 21 |         next(generator)
 22 |         return generator
 23 | 
 24 |     return primed_gen
 25 | 
 26 | 
 27 | @primed
 28 | def chunked(iterable, chunksize):
 29 |     """
 30 |     Generator yielding multiple iterables of length 'chunksize'.
 31 | 
 32 |     Parameters
 33 |     ----------
 34 |     iterable : iterable
 35 |         Iterable to be chunked.
 36 |     chunksize : int
 37 |         Chunk size.
 38 | 
 39 |     Yields
 40 |     ------
 41 |     chunk : iterable
 42 |         Iterable of size `chunksize`. In special case of iterable not being
 43 |         divisible by `chunksize`, the last `chunk` will be smaller.
 44 | 
 45 |     Raises
 46 |     ------
 47 |     TypeError : if `chunksize` is not an integer.
 48 |     """
 49 |     if not isinstance(chunksize, int):
 50 |         raise TypeError(
 51 |             f"Expected `chunksize` to be an integer, but received {chunksize}"
 52 |         )
 53 | 
 54 |     yield
 55 | 
 56 |     iterable = iter(iterable)
 57 | 
 58 |     next_chunk = tuple(islice(iterable, chunksize))
 59 |     while next_chunk:
 60 |         yield next_chunk
 61 |         next_chunk = tuple(islice(iterable, chunksize))
 62 | 
 63 | 
 64 | def peek(iterable):
 65 |     """
 66 |     Peek ahead in an iterable.
 67 | 
 68 |     Parameters
 69 |     ----------
 70 |     iterable : iterable
 71 | 
 72 |     Returns
 73 |     -------
 74 |     first : object
 75 |         First element of ``iterable``
 76 |     stream : iterable
 77 |         Iterable containing ``first`` and all other elements from ``iterable``
 78 |     """
 79 |     iterable = iter(iterable)
 80 |     ahead = next(iterable)
 81 |     return ahead, chain([ahead], iterable)
 82 | 
 83 | 
 84 | def itercopy(iterable, copies=2):
 85 |     """
 86 |     Split iterable into 'copies'. Once this is done, the original iterable *should
 87 |     not* be used again.
 88 | 
 89 |     Parameters
 90 |     ----------
 91 |     iterable : iterable
 92 |         Iterable to be split. Once it is split, the original iterable
 93 |         should not be used again.
 94 |     copies : int, optional
 95 |         Number of copies. Also determines the number of returned iterables.
 96 | 
 97 |     Returns
 98 |     -------
 99 |     iter1, iter2, ... : iterable
100 |         Copies of ``iterable``.
101 | 
102 |     Examples
103 |     --------
104 |     By rebinding the name of the original iterable, we make sure that it
105 |     will never be used again.
106 | 
107 |     >>> from npstreams import itercopy
108 |     >>> evens = (2*n for n in range(1000))
109 |     >>> evens, evens_copy = itercopy(evens, copies = 2)
110 | 
111 |     See Also
112 |     --------
113 |     itertools.tee : equivalent function
114 |     """
115 |     # itercopy is included because documentation of itertools.tee isn't obvious
116 |     # to everyone
117 |     return tee(iterable, copies)
118 | 
119 | 
120 | def linspace(start, stop, num, endpoint=True):
121 |     """
122 |     Generate linear space. This is sometimes more appropriate than
123 |     using `range`.
124 | 
125 |     Parameters
126 |     ----------
127 |     start : float
128 |         The starting value of the sequence.
129 |     stop : float
130 |         The end value of the sequence.
131 |     num : int
132 |         Number of samples to generate.
133 |     endpoint : bool, optional
134 |         If True (default), the endpoint is included in the linear space.
135 | 
136 |     Yields
137 |     ------
138 |     val : float
139 | 
140 |     See also
141 |     --------
142 |     numpy.linspace : generate linear space as a dense array.
143 |     """
144 |     # If endpoint are to be counted in,
145 |     # step does not count the last yield
146 |     if endpoint:
147 |         num -= 1
148 | 
149 |     step = (stop - start) / num
150 | 
151 |     val = start
152 |     for _ in range(num):
153 |         yield val
154 |         val += step
155 | 
156 |     if endpoint:
157 |         yield stop
158 | 
159 | 
160 | def multilinspace(start, stop, num, endpoint=True):
161 |     """
162 |     Generate multilinear space, for joining the values in two iterables.
163 | 
164 |     Parameters
165 |     ----------
166 |     start : iterable of floats
167 |         The starting value. This iterable will be consumed.
168 |     stop : iterable of floats
169 |         The end value. This iterable will be consumed.
170 |     num : int
171 |         Number of samples to generate.
172 |     endpoint : bool, optional
173 |         If True (default), the endpoint is included in the linear space.
174 | 
175 |     Yields
176 |     ------
177 |     val : tuple
178 |         Tuple of the same length as start and stop
179 | 
180 |     Examples
181 |     --------
182 |     >>> from npstreams import multilinspace
183 |     >>> multispace = multilinspace(start = (0, 0), stop = (1, 1), num = 4, endpoint = False)
184 |     >>> print(list(multispace))
185 |     [(0, 0), (0.25, 0.25), (0.5, 0.5), (0.75, 0.75)]
186 | 
187 |     See also
188 |     --------
189 |     linspace : generate a linear space between two numbers
190 |     """
191 |     start, stop = tuple(start), tuple(stop)
192 |     if len(start) != len(stop):
193 |         raise ValueError("start and stop must have the same length")
194 | 
195 |     spaces = tuple(
196 |         linspace(a, b, num=num, endpoint=endpoint) for a, b in zip(start, stop)
197 |     )
198 |     yield from zip(*spaces)
199 | 
200 | 
201 | def last(stream):
202 |     """
203 |     Retrieve the last item from a stream/iterator, consuming
204 |     iterables in the process. If empty stream, a RuntimeError is raised.
205 |     """
206 |     # Wonderful idea from itertools recipes
207 |     # https://docs.python.org/3.9/library/itertools.html#itertools-recipes
208 |     try:
209 |         return deque(stream, maxlen=1)[0]
210 |     except IndexError:
211 |         raise RuntimeError("Empty stream")
212 | 
213 | 
214 | def cyclic(iterable):
215 |     """
216 |     Yields cyclic permutations of an iterable.
217 | 
218 |     Examples
219 |     --------
220 |     >>> from npstreams import cyclic
221 |     >>> list(cyclic((1,2,3)))
222 |     [(1, 2, 3), (3, 1, 2), (2, 3, 1)]
223 |     """
224 |     iterable = tuple(iterable)
225 |     n = len(iterable)
226 |     yield from (tuple(iterable[i - j] for i in range(n)) for j in range(n))
227 | 
228 | 
229 | def length_hint(obj, default=0):
230 |     """
231 |     Return an estimate of the number of items in ``obj``.
232 | 
233 |     This is useful for presizing containers when building from an
234 |     iterable.
235 | 
236 |     If the object supports len(), the result will be
237 |     exact. Otherwise, it may over- or under-estimate by an
238 |     arbitrary amount. The result will be an integer >= 0.
239 | 
240 |     Notes
241 |     -----
242 |     Source : https://www.python.org/dev/peps/pep-0424/
243 | 
244 |     Examples
245 |     --------
246 |     >>> from npstreams import length_hint
247 |     >>> length_hint([1,2,3,4,5])          # Should be exact
248 |     5
249 |     >>> length_hint(None, default = 15)   # Does not implement __length_hint__
250 |     15
251 |     """
252 |     try:
253 |         return len(obj)
254 |     except TypeError:
255 |         try:
256 |             get_hint = type(obj).__length_hint__
257 |         except AttributeError:
258 |             return default
259 |         try:
260 |             hint = get_hint(obj)
261 |         except TypeError:
262 |             return default
263 |         if hint is NotImplemented:
264 |             return default
265 |         if not isinstance(hint, int):
266 |             raise TypeError("Length hint must be an integer, not %r" % type(hint))
267 |         if hint < 0:
268 |             raise ValueError("__length_hint__() should return >= 0")
269 |         return hint
270 | 


--------------------------------------------------------------------------------
/npstreams/cuda.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | CUDA-accelerated streaming operations
  4 | -------------------------------------
  5 | """
  6 | from functools import partial
  7 | from itertools import repeat
  8 | from operator import iadd, imul
  9 | from subprocess import run, PIPE
 10 | 
 11 | import numpy as np
 12 | 
 13 | from . import array_stream, itercopy, nan_to_num, peek
 14 | 
 15 | # Determine if
 16 | #   1. pycuda is installed;
 17 | #   2. pycuda can compile with nvcc
 18 | #   3. a GPU is available
 19 | 
 20 | try:
 21 |     import pycuda.gpuarray as gpuarray
 22 |     import pycuda.autoinit
 23 | except ImportError:
 24 |     raise ImportError("PyCUDA is not installed. CUDA capabilities are not available.")
 25 | else:
 26 |     import pycuda.driver as driver
 27 |     from pycuda.compiler import SourceModule
 28 | 
 29 | # Check if nvcc compiler is installed at all
 30 | nvcc_installed = run(["nvcc", "-h"], stdout=PIPE).returncode == 0
 31 | if not nvcc_installed:
 32 |     raise ImportError("CUDA compiler `nvcc` not installed.")
 33 | 
 34 | # Check that nvcc is at least set up properly
 35 | # For example, if nvcc is installed but C++ compiler is not in path
 36 | try:
 37 |     SourceModule("")
 38 | except driver.CompileError:
 39 |     raise ImportError("CUDA compiler `nvcc` is not properly set up.")
 40 | 
 41 | if driver.Device.count() == 0:
 42 |     raise ImportError("No GPU is available.")
 43 | 
 44 | 
 45 | @array_stream
 46 | def cuda_inplace_reduce(arrays, operator, dtype=None, ignore_nan=False, identity=0):
 47 |     """
 48 |     Inplace reduce on GPU arrays.
 49 | 
 50 |     Parameters
 51 |     ----------
 52 |     arrays : iterable
 53 |         Arrays to be reduced.
 54 |     operator : callable
 55 |         Callable of two arguments. This operator should operate in-place, storing the results into
 56 |         the buffer of the first argument, e.g. operator.iadd
 57 |     dtype : numpy.dtype, optional
 58 |         Arrays of the stream are cast to this dtype before reduction.
 59 |     ignore_nan : bool, optional
 60 |         If True, NaNs are replaced with ``identity``. Default is propagation of NaNs.
 61 |     identity : float, optional
 62 |         If ``ignore_nan = True``, NaNs are replaced with this value.
 63 | 
 64 |     Returns
 65 |     -------
 66 |     out : ndarray
 67 |     """
 68 |     # No need to cast all arrays if ``dtype`` is the same
 69 |     # type as the stream
 70 |     first, arrays = peek(arrays)
 71 |     if (dtype is not None) and (first.dtype != dtype):
 72 |         arrays = map(lambda arr: arr.astype(dtype), arrays)
 73 | 
 74 |     if ignore_nan:
 75 |         arrays = map(partial(nan_to_num, fill_value=identity), arrays)
 76 | 
 77 |     acc_gpu = gpuarray.to_gpu(next(arrays))  # Accumulator
 78 |     arr_gpu = gpuarray.empty_like(acc_gpu)  # GPU memory location for each array
 79 |     for arr in arrays:
 80 |         arr_gpu.set(arr)
 81 |         operator(acc_gpu, arr_gpu)
 82 | 
 83 |     return acc_gpu.get()
 84 | 
 85 | 
 86 | def csum(arrays, dtype=None, ignore_nan=False):
 87 |     """
 88 |     CUDA-enabled sum of stream of arrays. Arrays are summed along
 89 |     the streaming axis for performance reasons.
 90 | 
 91 |     Parameters
 92 |     ----------
 93 |     arrays : iterable
 94 |         Arrays to be summed.
 95 |     ignore_nan : bool, optional
 96 |         If True, NaNs are ignored. Default is propagation of NaNs.
 97 | 
 98 |     Returns
 99 |     -------
100 |     cuda_sum : ndarray
101 | 
102 |     See Also
103 |     --------
104 |     isum : streaming sum of array elements, possibly along different axes
105 |     """
106 |     return cuda_inplace_reduce(
107 |         arrays, operator=iadd, dtype=dtype, ignore_nan=ignore_nan, identity=0
108 |     )
109 | 
110 | 
111 | def cprod(arrays, dtype=None, ignore_nan=False):
112 |     """
113 |     CUDA-enabled product of a stream of arrays. Arrays are multiplied
114 |     along the streaming axis for performance reasons.
115 | 
116 |     Parameters
117 |     ----------
118 |     arrays : iterable
119 |         Arrays to be multiplied.
120 |     dtype : numpy.dtype, optional
121 |         The type of the yielded array and of the accumulator in which the elements
122 |         are summed. The dtype of a is used by default unless a has an integer dtype
123 |         of less precision than the default platform integer. In that case, if a is
124 |         signed then the platform integer is used while if a is unsigned then an
125 |         unsigned integer of the same precision as the platform integer is used.
126 |     ignore_nan : bool, optional
127 |         If True, NaNs are ignored. Default is propagation of NaNs.
128 | 
129 |     Yields
130 |     ------
131 |     online_prod : ndarray
132 |     """
133 |     return cuda_inplace_reduce(
134 |         arrays, operator=imul, dtype=dtype, ignore_nan=ignore_nan, identity=1
135 |     )
136 | 
137 | 
138 | @array_stream
139 | def cmean(arrays, ignore_nan=False):
140 |     """
141 |     CUDA-enabled mean of stream of arrays (i.e. unweighted average). Arrays are averaged
142 |     along the streaming axis for performance reasons.
143 | 
144 |     Parameters
145 |     ----------
146 |     arrays : iterable of ndarrays
147 |         Arrays to be averaged. This iterable can also a generator.
148 |     ignore_nan : bool, optional
149 |         If True, NaNs are set to zero weight. Default is propagation of NaNs.
150 | 
151 |     Returns
152 |     -------
153 |     cuda_mean : ndarray
154 | 
155 |     See also
156 |     --------
157 |     caverage : CUDA-enabled weighted average
158 |     imean : streaming mean of arrays, possibly along different axes
159 |     """
160 |     first, arrays = peek(arrays)
161 | 
162 |     # Need to know which array has NaNs, and modify the weights stream accordingly
163 |     if ignore_nan:
164 |         arrays, arrays2 = itercopy(arrays)
165 |         weights = map(
166 |             lambda arr, wgt: np.logical_not(np.isnan(arr)) * wgt, arrays2, weights
167 |         )
168 |         arrays = map(np.nan_to_num, arrays)
169 |         return caverage(arrays, weights, ignore_nan=False)
170 | 
171 |     accumulator = gpuarray.to_gpu(next(arrays))
172 |     array_gpu = gpuarray.empty_like(accumulator)
173 |     num_arrays = 1
174 |     for arr in arrays:
175 |         num_arrays += 1
176 |         array_gpu.set(arr)
177 |         accumulator += array_gpu
178 | 
179 |     return accumulator.get() / num_arrays
180 | 
181 | 
182 | @array_stream
183 | def caverage(arrays, weights=None, ignore_nan=False):
184 |     """
185 |     CUDA-enabled average of stream of arrays, possibly weighted. Arrays are averaged
186 |     along the streaming axis for performance reasons.
187 | 
188 |     Parameters
189 |     ----------
190 |     arrays : iterable of ndarrays
191 |         Arrays to be averaged. This iterable can also a generator.
192 |     weights : iterable of ndarray, iterable of floats, or None, optional
193 |         Iterable of weights associated with the values in each item of `images`.
194 |         Each value in an element of `images` contributes to the average
195 |         according to its associated weight. The weights array can either be a float
196 |         or an array of the same shape as any element of `images`. If weights=None,
197 |         then all data in each element of `images` are assumed to have a weight equal to one.
198 |     ignore_nan : bool, optional
199 |         If True, NaNs are set to zero weight. Default is propagation of NaNs.
200 | 
201 |     Returns
202 |     -------
203 |     cuda_avg : ndarray
204 | 
205 |     See also
206 |     --------
207 |     iaverage : streaming weighted average, possibly along different axes
208 |     """
209 |     if weights is None:
210 |         return cmean(arrays, ignore_nan)
211 | 
212 |     first, arrays = peek(arrays)
213 | 
214 |     # We make sure that weights is always an array
215 |     # This simplifies the handling of NaNs.
216 |     if weights is None:
217 |         weights = repeat(1)
218 |     weights = map(partial(np.broadcast_to, shape=first.shape), weights)
219 |     weights = map(
220 |         lambda arr: arr.astype(first.dtype), weights
221 |     )  # Won't work without this
222 | 
223 |     # Need to know which array has NaNs, and modify the weights stream accordingly
224 |     if ignore_nan:
225 |         arrays, arrays2 = itercopy(arrays)
226 |         weights = map(
227 |             lambda arr, wgt: np.logical_not(np.isnan(arr)) * wgt, arrays2, weights
228 |         )
229 |         arrays = map(np.nan_to_num, arrays)
230 | 
231 |     first = next(arrays)
232 |     fst_wgt = next(weights)
233 | 
234 |     arr_gpu = gpuarray.to_gpu(first * fst_wgt)
235 |     wgt_gpu = gpuarray.to_gpu(fst_wgt)
236 |     for arr, wgt in zip(arrays, weights):
237 |         arr_gpu += gpuarray.to_gpu(arr) * gpuarray.to_gpu(wgt)
238 |         wgt_gpu += gpuarray.to_gpu(wgt)
239 | 
240 |     arr_gpu /= wgt_gpu
241 |     return arr_gpu.get()
242 | 


--------------------------------------------------------------------------------
/npstreams/tests/test_numerics.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from random import randint, random
  4 | 
  5 | import numpy as np
  6 | 
  7 | from npstreams import isum, iprod, last, isub, iany, iall, prod
  8 | from npstreams import sum as nssum  # avoiding name clashes
  9 | import pytest
 10 | 
 11 | 
 12 | def test_isum_trivial():
 13 |     """Test a sum of zeros"""
 14 |     source = [np.zeros((16,), dtype=float) for _ in range(10)]
 15 |     summed = last(isum(source))
 16 |     assert np.allclose(summed, np.zeros_like(summed))
 17 | 
 18 | 
 19 | def test_isum_ignore_nans():
 20 |     """Test a sum of zeros with NaNs sprinkled"""
 21 |     source = [np.zeros((16,), dtype=float) for _ in range(10)]
 22 |     source.append(np.full((16,), fill_value=np.nan))
 23 |     summed = last(isum(source, ignore_nan=True))
 24 |     assert np.allclose(summed, np.zeros_like(summed))
 25 | 
 26 | 
 27 | def test_isum_length():
 28 |     """Test that the number of yielded elements is the same as source"""
 29 |     source = [np.zeros((16,), dtype=float) for _ in range(10)]
 30 |     summed = list(isum(source, axis=0))
 31 |     assert 10 == len(summed)
 32 | 
 33 | 
 34 | def test_isum_dtype():
 35 |     """Test a sum of floating zeros with an int accumulator"""
 36 |     source = [np.zeros((16,), dtype=float) for _ in range(10)]
 37 |     summed = last(isum(source, dtype=int))
 38 |     assert np.allclose(summed, np.zeros_like(summed))
 39 |     assert summed.dtype == int
 40 | 
 41 | 
 42 | def test_isum_axis():
 43 |     """Test that isum(axis = 0) yields 0d arrays"""
 44 |     source = [np.zeros((16,), dtype=float) for _ in range(10)]
 45 | 
 46 |     summed = last(isum(source, axis=0))
 47 |     assert np.allclose(summed, np.zeros_like(summed))
 48 | 
 49 |     summed = last(isum(source, axis=None))
 50 |     assert np.allclose(summed, 0)
 51 | 
 52 | 
 53 | def test_isum_return_shape():
 54 |     """Test that the shape of output is as expected"""
 55 |     source = [np.zeros((16,), dtype=float) for _ in range(10)]
 56 | 
 57 |     summed = last(isum(source, axis=0))
 58 |     assert summed.shape == (1, 10)
 59 | 
 60 | 
 61 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
 62 | def test_isum_against_numpy(axis):
 63 |     """Test that isum() returns the same as numpy.sum() for various axis inputs"""
 64 | 
 65 |     stream = [np.random.random((16, 16)) for _ in range(10)]
 66 |     stack = np.dstack(stream)
 67 | 
 68 |     from_numpy = np.sum(stack, axis=axis)
 69 |     from_isum = last(isum(stream, axis=axis))
 70 |     assert np.allclose(from_isum, from_numpy)
 71 | 
 72 | 
 73 | def test_sum_trivial():
 74 |     """Test a sum of zeros"""
 75 |     source = [np.zeros((16,), dtype=float) for _ in range(10)]
 76 |     summed = nssum(source)
 77 |     assert np.allclose(summed, np.zeros_like(summed))
 78 | 
 79 | 
 80 | def test_sum_ignore_nans():
 81 |     """Test a sum of zeros with NaNs sprinkled"""
 82 |     source = [np.zeros((16,), dtype=float) for _ in range(10)]
 83 |     source.append(np.full((16,), fill_value=np.nan))
 84 |     summed = nssum(source, ignore_nan=True)
 85 |     assert np.allclose(summed, np.zeros_like(summed))
 86 | 
 87 | 
 88 | def test_sum_dtype():
 89 |     """Test a sum of floating zeros with an int accumulator"""
 90 |     source = [np.zeros((16,), dtype=float) for _ in range(10)]
 91 |     summed = nssum(source, dtype=int)
 92 |     assert np.allclose(summed, np.zeros_like(summed))
 93 |     assert summed.dtype == int
 94 | 
 95 | 
 96 | def test_sum_axis():
 97 |     """Test that isum(axis = 0) yields 0d arrays"""
 98 |     source = [np.zeros((16,), dtype=float) for _ in range(10)]
 99 | 
100 |     summed = nssum(source, axis=0)
101 |     assert np.allclose(summed, np.zeros_like(summed))
102 | 
103 |     summed = nssum(source, axis=None)
104 |     assert np.allclose(summed, 0)
105 | 
106 | 
107 | def test_sum_return_shape():
108 |     """Test that the shape of output is as expected"""
109 |     source = [np.zeros((16,), dtype=float) for _ in range(10)]
110 | 
111 |     summed = nssum(source, axis=0)
112 |     assert summed.shape == (1, 10)
113 | 
114 | 
115 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
116 | def test_sum_against_numpy(axis):
117 |     """Test that isum() returns the same as numpy.sum() for various axis inputs"""
118 | 
119 |     stream = [np.random.random((16, 16)) for _ in range(10)]
120 |     stack = np.dstack(stream)
121 | 
122 |     from_numpy = np.sum(stack, axis=axis)
123 |     from_sum = nssum(stream, axis=axis)
124 |     assert np.allclose(from_sum, from_numpy)
125 | 
126 | 
127 | def test_iprod_trivial():
128 |     """Test a product of ones"""
129 |     source = [np.ones((16,), dtype=float) for _ in range(10)]
130 |     product = last(iprod(source))
131 |     assert np.allclose(product, np.ones_like(product))
132 | 
133 | 
134 | def test_iprod_ignore_nans():
135 |     """Test that NaNs are ignored."""
136 |     source = [np.ones((16,), dtype=float) for _ in range(10)]
137 |     source.append(np.full_like(source[0], np.nan))
138 |     product = last(iprod(source, ignore_nan=True))
139 |     assert np.allclose(product, np.ones_like(product))
140 | 
141 | 
142 | def test_iprod_dtype():
143 |     """Test that dtype argument is working"""
144 |     source = [np.ones((16,), dtype=float) for _ in range(10)]
145 |     product = last(iprod(source, dtype=int))
146 |     assert np.allclose(product, np.ones_like(product))
147 |     assert product.dtype == int
148 | 
149 | 
150 | def test_iprod_axis():
151 |     """Test that iprod(axis = 0) yields 0d arrays"""
152 |     source = [np.ones((16,), dtype=float) for _ in range(10)]
153 | 
154 |     summed = last(iprod(source, axis=0))
155 |     assert np.all(summed == 1)
156 | 
157 |     summed = last(iprod(source, axis=None))
158 |     assert np.allclose(summed, np.ones_like(summed))
159 | 
160 | 
161 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
162 | def test_iprod_against_numpy(axis):
163 |     """Test that iprod() returns the same as numpy.prod() for various axis inputs"""
164 | 
165 |     stream = [np.random.random((16, 16)) for _ in range(10)]
166 |     stack = np.dstack(stream)
167 | 
168 |     from_numpy = np.prod(stack, axis=axis)
169 |     from_stream = last(iprod(stream, axis=axis))
170 |     assert np.allclose(from_stream, from_numpy)
171 | 
172 | 
173 | def test_prod_trivial():
174 |     """Test a product of ones"""
175 |     source = [np.ones((16,), dtype=float) for _ in range(10)]
176 |     product = prod(source)
177 |     assert np.allclose(product, np.ones_like(product))
178 | 
179 | 
180 | def test_prod_ignore_nans():
181 |     """Test that NaNs are ignored."""
182 |     source = [np.ones((16,), dtype=float) for _ in range(10)]
183 |     source.append(np.full_like(source[0], np.nan))
184 |     product = prod(source, ignore_nan=True)
185 |     assert np.allclose(product, np.ones_like(product))
186 | 
187 | 
188 | def test_prod_dtype():
189 |     """Test that dtype argument is working"""
190 |     source = [np.ones((16,), dtype=float) for _ in range(10)]
191 |     product = prod(source, dtype=int)
192 |     assert np.allclose(product, np.ones_like(product))
193 |     assert product.dtype == int
194 | 
195 | 
196 | def test_prod_axis():
197 |     """Test that iprod(axis = 0) yields 0d arrays"""
198 |     source = [np.ones((16,), dtype=float) for _ in range(10)]
199 | 
200 |     summed = prod(source, axis=0)
201 |     assert np.all(summed == 1)
202 | 
203 |     summed = prod(source, axis=None)
204 |     assert np.allclose(summed, np.ones_like(summed))
205 | 
206 | 
207 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
208 | def test_prod_against_numpy(axis):
209 |     """Test that iprod() returns the same as numpy.prod() for various axis inputs"""
210 | 
211 |     stream = [np.random.random((16, 16)) for _ in range(10)]
212 |     stack = np.dstack(stream)
213 | 
214 |     from_numpy = np.prod(stack, axis=axis)
215 |     from_stream = prod(stream, axis=axis)
216 |     assert np.allclose(from_stream, from_numpy)
217 | 
218 | 
219 | @pytest.mark.parametrize("axis", (0, 1, 2))
220 | def test_isub_against_numpy(axis):
221 |     """Test against numpy.subtract.reduce"""
222 |     stream = [np.random.random((8, 16, 2)) for _ in range(11)]
223 |     stack = np.stack(stream, axis=-1)
224 | 
225 |     from_numpy = np.subtract.reduce(stack, axis=axis)
226 |     from_stream = last(isub(stream, axis=axis))
227 |     assert np.allclose(from_numpy, from_stream)
228 | 
229 | 
230 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
231 | def test_iall_against_numpy(axis):
232 |     """Test iall against numpy.all"""
233 |     stream = [np.zeros((8, 16, 2)) for _ in range(11)]
234 |     stream[3][3, 0, 1] = 1  # so that np.all(axis = None) evaluates to False
235 |     stack = np.stack(stream, axis=-1)
236 | 
237 |     from_numpy = np.all(stack, axis=axis)
238 |     from_stream = last(iall(stream, axis=axis))
239 |     assert np.allclose(from_numpy, from_stream)
240 | 
241 | 
242 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
243 | def test_iany_against_numpy(axis):
244 |     """Test iany against numpy.any"""
245 |     stream = [np.zeros((8, 16, 2)) for _ in range(11)]
246 |     stream[3][3, 0, 1] = 1  # so that np.all(axis = None) evaluates to False
247 |     stack = np.stack(stream, axis=-1)
248 | 
249 |     from_numpy = np.any(stack, axis=axis)
250 |     from_stream = last(iany(stream, axis=axis))
251 |     assert np.allclose(from_numpy, from_stream)
252 | 


--------------------------------------------------------------------------------
/npstreams/benchmarks.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """ 
  3 | Reliably benchmarking npstreams performance.
  4 | """
  5 | import inspect
  6 | import sys
  7 | import timeit
  8 | from collections import namedtuple
  9 | from contextlib import redirect_stdout
 10 | from functools import partial
 11 | from shutil import get_terminal_size
 12 | 
 13 | import numpy as np
 14 | 
 15 | from . import __version__
 16 | from .reduce import _check_binary_ufunc
 17 | 
 18 | UFUNC_SETUP = """
 19 | from npstreams import reduce_ufunc, stack
 20 | import numpy as np
 21 | from numpy import {ufunc.__name__}
 22 | 
 23 | np.random.seed(42056)
 24 | 
 25 | def stream():
 26 |     return (np.random.random({shape}) for _ in range(10))
 27 | """
 28 | 
 29 | FUNC_SETUP = """
 30 | from npstreams import stack
 31 | import numpy as np
 32 | from numpy     import {func.__name__} as np_{func.__name__}
 33 | from npstreams import {func.__name__} as ns_{func.__name__}
 34 | 
 35 | np.random.seed(42056)
 36 | 
 37 | def stream():
 38 |     return (np.random.random({shape}) for _ in range(10))
 39 | """
 40 | 
 41 | BenchmarkResults = namedtuple(
 42 |     "BenchmarkResults", field_names=["numpy_time", "npstreams_time", "shape"]
 43 | )
 44 | 
 45 | 
 46 | def autotimeit(statement, setup="pass", repeat=3):
 47 |     """
 48 |     Time a statement, automatically determining the number of times to
 49 |     run the statement so that the total excecution time is not too short.
 50 | 
 51 |     .. versionadded:: 1.5.2
 52 | 
 53 |     Parameters
 54 |     ----------
 55 |     statement: string
 56 |         Statement to time. The statement will be executed after the `setup` statement.
 57 |     setup : string, optional
 58 |         Setup statement executed before timing starts.
 59 |     repeat : int, optional
 60 |         Number of repeated timing to execute.
 61 | 
 62 |     Returns
 63 |     -------
 64 |     time : float
 65 |         Minimal time per execution of `statement` [seconds].
 66 |     """
 67 |     timer = timeit.Timer(stmt=statement, setup=setup)
 68 |     number, _ = timer.autorange()
 69 |     return min(timer.repeat(repeat=repeat, number=number)) / number
 70 | 
 71 | 
 72 | def benchmark(
 73 |     funcs=[np.average, np.mean, np.std, np.sum, np.prod],
 74 |     ufuncs=[np.add, np.multiply, np.power, np.true_divide, np.mod],
 75 |     shapes=[(4, 4), (8, 8), (16, 16), (64, 64)],
 76 |     file=None,
 77 | ):
 78 |     """
 79 |     Benchmark npstreams against numpy and print the results.
 80 | 
 81 |     There are two categories of benchmarks. The first category compares NumPy functions against
 82 |     npstreams versions of the same functions. The second category compares NumPy universal functions
 83 |     against dynamically-generated npstreams versions of those same universal functions.
 84 | 
 85 |     All benchmarks compare a reduction operation on a stream of arrays of varying sizes. The sequence length is fixed.
 86 | 
 87 |     .. versionadded:: 1.5.2
 88 | 
 89 |     Parameters
 90 |     ----------
 91 |     funcs : iterable of NumPy functions, optional
 92 |         NumPy functions to compare. An equivalent must exist in npstreams, e.g. `np.average` and `npstreams.average` .
 93 |         Functions without equivalents will be skipped.
 94 |     ufuncs : iterable of NumPy ufunc, optional
 95 |         Invalid ufuncs (e.g. non-binary ufuncs) will be skipped.
 96 |     shapes : iterable of tuples, optional
 97 |         Shapes of arrays to test. Streams of random numbers will be generated with arrays of those shapes.
 98 |         The sequence lengths are fixed.
 99 |     file : file-like or None, optional
100 |         File to which the benchmark results will be written. If None, sys.stdout will be used.
101 |     """
102 |     # Preliminaries
103 |     console_width = min(get_terminal_size().columns, 80)
104 |     func_test_name = "numpy.{f.__name__} vs npstreams.{f.__name__}".format
105 |     ufunc_test_name = (
106 |         "numpy.{f.__name__} vs npstreams.reduce_ufunc(numpy.{f.__name__}, ...)".format
107 |     )
108 | 
109 |     # Determine justification based on maximal shape functions
110 |     sh_just = max(map(lambda s: len(str(s)), shapes)) + 10
111 | 
112 |     # To make it easy to either write the results in a file or print to stdout,
113 |     # We actually redirect stdout.
114 |     if file is None:
115 |         file = sys.stdout
116 | 
117 |     with redirect_stdout(file):
118 |         # Start benchmarks --------------------------------------------------------
119 |         print(
120 |             "".ljust(console_width, "*"),
121 |             "npstreams performance benchmark".upper().center(console_width),
122 |             "",
123 |             "    npstreams".ljust(15) + f" {__version__}",
124 |             "    NumPy".ljust(15) + f" {np.__version__}",
125 |             "",
126 |             "    Speedup is NumPy time divided by npstreams time (Higher is better)",
127 |             "".ljust(console_width, "*"),
128 |             sep="\n",
129 |         )
130 | 
131 |         # Determine valid ufuncs and funcs first ----------------------------------
132 |         valid_ufuncs = comparable_ufuncs(ufuncs, file)
133 |         valid_funcs = comparable_funcs(funcs, file)
134 | 
135 |         # Benchmarking functions --------------------------------------------------
136 |         for func in sorted(valid_funcs, key=lambda fn: fn.__name__):
137 |             print(func_test_name(f=func).center(console_width), "\n")
138 | 
139 |             for (np_time, ns_time, shape) in benchmark_func(func, shapes):
140 |                 speedup = np_time / ns_time
141 |                 print(
142 |                     "    ",
143 |                     f"shape = {shape}".ljust(sh_just),
144 |                     f"speedup = {speedup:.4f}x",
145 |                 )
146 | 
147 |             print("".ljust(console_width, "-"))
148 | 
149 |         # Benchmarking universal functions ----------------------------------------
150 |         for ufunc in sorted(valid_ufuncs, key=lambda fn: fn.__name__):
151 |             print(ufunc_test_name(f=ufunc).center(console_width), "\n")
152 | 
153 |             for (np_time, ns_time, shape) in benchmark_ufunc(ufunc, shapes):
154 |                 speedup = np_time / ns_time
155 |                 print(
156 |                     "    ",
157 |                     f"shape = {shape}".ljust(sh_just),
158 |                     f"speedup = {speedup:.4f}x",
159 |                 )
160 | 
161 |             print("".ljust(console_width, "-"))
162 | 
163 | 
164 | def benchmark_ufunc(ufunc, shapes):
165 |     """
166 |     Compare the running time between a NumPy ufunc and the npstreams equivalent.
167 | 
168 |     Parameters
169 |     ----------
170 |     ufunc : NumPy ufunc
171 | 
172 |     shapes : iterable of tuples, optional
173 |         Shapes of arrays to test. Streams of random numbers will be generated with arrays of those shapes.
174 |         The sequence lengths are fixed.
175 | 
176 |     Yields
177 |     ------
178 |     results : BenchmarkResults
179 |     """
180 |     for shape in shapes:
181 | 
182 |         numpy_statement = f"{ufunc.__name__}.reduce(stack(stream()), axis = -1)"
183 |         npstreams_statement = f"reduce_ufunc(stream(), {ufunc.__name__}, axis = -1)"
184 | 
185 |         with np.errstate(invalid="ignore"):
186 |             np_time = autotimeit(
187 |                 numpy_statement, UFUNC_SETUP.format(ufunc=ufunc, shape=shape)
188 |             )
189 |             ns_time = autotimeit(
190 |                 npstreams_statement, UFUNC_SETUP.format(ufunc=ufunc, shape=shape)
191 |             )
192 | 
193 |         yield BenchmarkResults(np_time, ns_time, shape)
194 | 
195 | 
196 | def benchmark_func(func, shapes):
197 |     """
198 |     Compare the running time between a NumPy func and the npstreams equivalent.
199 | 
200 |     Parameters
201 |     ----------
202 |     func : NumPy func
203 | 
204 |     shapes : iterable of tuples, optional
205 |         Shapes of arrays to test. Streams of random numbers will be generated with arrays of those shapes.
206 |         The sequence lengths are fixed.
207 | 
208 |     Yields
209 |     ------
210 |     results : BenchmarkResults
211 |     """
212 |     for shape in shapes:
213 | 
214 |         numpy_statement = f"np_{func.__name__}(stack(stream()), axis = -1)"
215 |         npstreams_statement = f"ns_{func.__name__}(stream(), axis = -1)"
216 | 
217 |         with np.errstate(invalid="ignore"):
218 |             np_time = autotimeit(
219 |                 numpy_statement, FUNC_SETUP.format(func=func, shape=shape)
220 |             )
221 |             ns_time = autotimeit(
222 |                 npstreams_statement, FUNC_SETUP.format(func=func, shape=shape)
223 |             )
224 | 
225 |         yield BenchmarkResults(np_time, ns_time, shape)
226 | 
227 | 
228 | def comparable_ufuncs(ufuncs, file):
229 |     """
230 |     Yields ufuncs that can be compared between numpy and npstreams.
231 | 
232 |     Parameters
233 |     ----------
234 |     ufuncs : iterable of NumPy ufunc
235 |         NumPy ufuncs to check. Ufuncs that cannot be compared will be skipped.
236 | 
237 |     Yields
238 |     ------
239 |     ufunc : callable
240 |         NumPy ufuncs that can be compared with npstreams.
241 |     """
242 |     for ufunc in ufuncs:
243 |         if not isinstance(ufunc, np.ufunc):
244 |             print(
245 |                 f"Skipping function {ufunc.__name__} as it is not a NumPy Universal Function"
246 |             )
247 |             continue
248 | 
249 |         try:
250 |             _check_binary_ufunc(ufunc)
251 |         except ValueError:
252 |             print(
253 |                 f"Skipping function {ufunc.__name__} as it is not a valid binary ufunc"
254 |             )
255 |         else:
256 |             yield ufunc
257 | 
258 | 
259 | def comparable_funcs(funcs, file):
260 |     """
261 |     Yields NumPy functions that have npstreams equivalents.
262 | 
263 |     Parameters
264 |     ----------
265 |     ufuncs : iterable of NumPy functions
266 |         NumPy funcs to check.
267 | 
268 |     Yields
269 |     ------
270 |     ufunc : callable
271 |         NumPy funcs that have npstreams equivalents.
272 |     """
273 |     import npstreams
274 | 
275 |     npstreams_functions = set(
276 |         name for name, value in inspect.getmembers(npstreams, inspect.isfunction)
277 |     )
278 |     for func in funcs:
279 |         if func.__name__ not in npstreams_functions:
280 |             print(
281 |                 f"Skipping function {func.__name__} as there is no npstreams equivalent"
282 |             )
283 |         else:
284 |             yield func
285 | 
286 | 
287 | if __name__ == "__main__":
288 |     benchmark()
289 | 


--------------------------------------------------------------------------------
/npstreams/numerics.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Numerics Functions
  4 | ------------------
  5 | """
  6 | import numpy as np
  7 | 
  8 | from .reduce import ireduce_ufunc, reduce_ufunc
  9 | 
 10 | 
 11 | def isum(arrays, axis=-1, dtype=None, ignore_nan=False):
 12 |     """
 13 |     Streaming sum of array elements.
 14 | 
 15 |     Parameters
 16 |     ----------
 17 |     arrays : iterable
 18 |         Arrays to be summed.
 19 |     axis : int or None, optional
 20 |         Reduction axis. Default is to sum the arrays in the stream as if
 21 |         they had been stacked along a new axis, then sum along this new axis.
 22 |         If None, arrays are flattened before summing. If `axis` is an int larger that
 23 |         the number of dimensions in the arrays of the stream, arrays are summed
 24 |         along the new axis.
 25 |     dtype : numpy.dtype, optional
 26 |         The type of the yielded array and of the accumulator in which the elements
 27 |         are summed. The dtype of a is used by default unless a has an integer dtype
 28 |         of less precision than the default platform integer. In that case, if a is
 29 |         signed then the platform integer is used while if a is unsigned then an
 30 |         unsigned integer of the same precision as the platform integer is used.
 31 |     ignore_nan : bool, optional
 32 |         If True, NaNs are ignored. Default is propagation of NaNs.
 33 | 
 34 |     Yields
 35 |     ------
 36 |     online_sum : ndarray
 37 |     """
 38 |     yield from ireduce_ufunc(
 39 |         arrays, ufunc=np.add, axis=axis, ignore_nan=ignore_nan, dtype=dtype
 40 |     )
 41 | 
 42 | 
 43 | def sum(arrays, axis=-1, dtype=None, ignore_nan=False):
 44 |     """
 45 |     Sum of arrays in a stream.
 46 | 
 47 |     Parameters
 48 |     ----------
 49 |     arrays : iterable
 50 |         Arrays to be summed.
 51 |     axis : int or None, optional
 52 |         Reduction axis. Default is to sum the arrays in the stream as if
 53 |         they had been stacked along a new axis, then sum along this new axis.
 54 |         If None, arrays are flattened before summing. If `axis` is an int larger that
 55 |         the number of dimensions in the arrays of the stream, arrays are summed
 56 |         along the new axis.
 57 |     dtype : numpy.dtype, optional
 58 |         The type of the yielded array and of the accumulator in which the elements
 59 |         are summed. The dtype of a is used by default unless a has an integer dtype
 60 |         of less precision than the default platform integer. In that case, if a is
 61 |         signed then the platform integer is used while if a is unsigned then an
 62 |         unsigned integer of the same precision as the platform integer is used.
 63 |     ignore_nan : bool, optional
 64 |         If True, NaNs are ignored. Default is propagation of NaNs.
 65 | 
 66 |     Returns
 67 |     -------
 68 |     sum : ndarray
 69 |     """
 70 |     return reduce_ufunc(
 71 |         arrays, ufunc=np.add, axis=axis, dtype=dtype, ignore_nan=ignore_nan
 72 |     )
 73 | 
 74 | 
 75 | def iprod(arrays, axis=-1, dtype=None, ignore_nan=False):
 76 |     """
 77 |     Streaming product of array elements.
 78 | 
 79 |     Parameters
 80 |     ----------
 81 |     arrays : iterable
 82 |         Arrays to be multiplied.
 83 |     axis : int or None, optional
 84 |         Reduction axis. Default is to multiply the arrays in the stream as if
 85 |         they had been stacked along a new axis, then multiply along this new axis.
 86 |         If None, arrays are flattened before multiplication. If `axis` is an int larger that
 87 |         the number of dimensions in the arrays of the stream, arrays are multiplied
 88 |         along the new axis.
 89 |     dtype : numpy.dtype, optional
 90 |         The type of the yielded array and of the accumulator in which the elements
 91 |         are summed. The dtype of a is used by default unless a has an integer dtype
 92 |         of less precision than the default platform integer. In that case, if a is
 93 |         signed then the platform integer is used while if a is unsigned then an
 94 |         unsigned integer of the same precision as the platform integer is used.
 95 |     ignore_nan : bool, optional
 96 |         If True, NaNs are ignored. Default is propagation of NaNs.
 97 | 
 98 |     Yields
 99 |     ------
100 |     online_prod : ndarray
101 |     """
102 |     yield from ireduce_ufunc(
103 |         arrays, ufunc=np.multiply, axis=axis, dtype=dtype, ignore_nan=ignore_nan
104 |     )
105 | 
106 | 
107 | def prod(arrays, axis=-1, dtype=None, ignore_nan=False):
108 |     """
109 |     Product of arrays in a stream.
110 | 
111 |     Parameters
112 |     ----------
113 |     arrays : iterable
114 |         Arrays to be multiplied.
115 |     axis : int or None, optional
116 |         Reduction axis. Default is to multiply the arrays in the stream as if
117 |         they had been stacked along a new axis, then multiply along this new axis.
118 |         If None, arrays are flattened before multiplication. If `axis` is an int larger that
119 |         the number of dimensions in the arrays of the stream, arrays are multiplied
120 |         along the new axis.
121 |     dtype : numpy.dtype, optional
122 |         The type of the yielded array and of the accumulator in which the elements
123 |         are summed. The dtype of a is used by default unless a has an integer dtype
124 |         of less precision than the default platform integer. In that case, if a is
125 |         signed then the platform integer is used while if a is unsigned then an
126 |         unsigned integer of the same precision as the platform integer is used.
127 |     ignore_nan : bool, optional
128 |         If True, NaNs are ignored. Default is propagation of NaNs.
129 | 
130 |     Returns
131 |     -------
132 |     product : ndarray
133 |     """
134 |     return reduce_ufunc(
135 |         arrays, ufunc=np.multiply, axis=axis, dtype=dtype, ignore_nan=ignore_nan
136 |     )
137 | 
138 | 
139 | def isub(arrays, axis=-1, dtype=None):
140 |     """
141 |     Subtract elements in a reduction fashion. Equivalent to ``numpy.subtract.reduce`` on a dense array.
142 | 
143 |     Parameters
144 |     ----------
145 |     arrays : iterable
146 |         Arrays to be multiplied.
147 |     axis : int, optional
148 |         Reduction axis. Since subtraction is not reorderable (unlike a sum, for example),
149 |         `axis` must be specified as an int; full reduction (``axis = None``) will raise an exception.
150 |         Default is to subtract the arrays in the stream as if they had been stacked along a new axis,
151 |         then subtract along this new axis. If None, arrays are flattened before subtraction.
152 |         If `axis` is an int larger that the number of dimensions in the arrays of the stream,
153 |         arrays are subtracted along the new axis.
154 |     dtype : numpy.dtype, optional
155 |         The type of the yielded array and of the accumulator in which the elements
156 |         are combined. The dtype of a is used by default unless a has an integer dtype
157 |         of less precision than the default platform integer. In that case, if a is
158 |         signed then the platform integer is used while if a is unsigned then an
159 |         unsigned integer of the same precision as the platform integer is used.
160 | 
161 |     Yields
162 |     ------
163 |     online_sub : ndarray
164 | 
165 |     Raises
166 |     ------
167 |     ValueError
168 |         If `axis` is None. Since subtraction is not reorderable (unlike a sum, for example),
169 |         `axis` must be specified as an int.
170 |     """
171 |     if axis is None:
172 |         raise ValueError(
173 |             "Subtraction is not a reorderable operation, and \
174 |                           therefore a specific axis must be given."
175 |         )
176 |     yield from ireduce_ufunc(arrays, ufunc=np.subtract, axis=axis, dtype=dtype)
177 | 
178 | 
179 | def iall(arrays, axis=-1):
180 |     """
181 |     Test whether all array elements along a given axis evaluate to True
182 | 
183 |     Parameters
184 |     ----------
185 |     arrays : iterable
186 |         Arrays to be reduced.
187 |     axis : int or None, optional
188 |         Axis along which a logical AND reduction is performed. The default
189 |         is to perform a logical AND along the 'stream axis', as if all arrays in ``array``
190 |         were stacked along a new dimension. If ``axis = None``, arrays in ``arrays`` are flattened
191 |         before reduction.
192 | 
193 |     Yields
194 |     ------
195 |     all : ndarray, dtype bool
196 |     """
197 |     # TODO: use ``where`` keyword to only check places that are already ``True``
198 |     yield from ireduce_ufunc(arrays, ufunc=np.logical_and, axis=axis)
199 | 
200 | 
201 | def iany(arrays, axis=-1):
202 |     """
203 |     Test whether any array elements along a given axis evaluate to True.
204 | 
205 |     Parameters
206 |     ----------
207 |     arrays : iterable
208 |         Arrays to be reduced.
209 |     axis : int or None, optional
210 |         Axis along which a logical OR reduction is performed. The default
211 |         is to perform a logical AND along the 'stream axis', as if all arrays in ``array``
212 |         were stacked along a new dimension. If ``axis = None``, arrays in ``arrays`` are flattened
213 |         before reduction.
214 | 
215 |     Yields
216 |     ------
217 |     any : ndarray, dtype bool
218 |     """
219 |     # TODO: use ``where`` keyword to only check places that are not already ``True``
220 |     yield from ireduce_ufunc(arrays, ufunc=np.logical_or, axis=axis)
221 | 
222 | 
223 | def imax(arrays, axis, ignore_nan=False):
224 |     """
225 |     Maximum of a stream of arrays along an axis.
226 | 
227 |     Parameters
228 |     ----------
229 |     arrays : iterable
230 |         Arrays to be reduced.
231 |     axis : int or None, optional
232 |         Axis along which the maximum is found. The default
233 |         is to find the maximum along the 'stream axis', as if all arrays in ``array``
234 |         were stacked along a new dimension. If ``axis = None``, arrays in ``arrays`` are flattened
235 |         before reduction.
236 |     ignore_nan : bool, optional
237 |         If True, NaNs are ignored. Default is propagation of NaNs.
238 | 
239 |     Yields
240 |     ------
241 |     online_max : ndarray
242 |         Cumulative maximum.
243 |     """
244 |     ufunc = np.fmax if ignore_nan else np.maximum
245 |     yield from ireduce_ufunc(arrays, ufunc, axis)
246 | 
247 | 
248 | def imin(arrays, axis, ignore_nan=False):
249 |     """
250 |     Minimum of a stream of arrays along an axis.
251 | 
252 |     Parameters
253 |     ----------
254 |     arrays : iterable
255 |         Arrays to be reduced.
256 |     axis : int or None, optional
257 |         Axis along which the minimum is found. The default
258 |         is to find the minimum along the 'stream axis', as if all arrays in ``array``
259 |         were stacked along a new dimension. If ``axis = None``, arrays in ``arrays`` are flattened
260 |         before reduction.
261 |     ignore_nan : bool, optional
262 |         If True, NaNs are ignored. Default is propagation of NaNs.
263 | 
264 |     Yields
265 |     ------
266 |     online_min : ndarray
267 |         Cumulative minimum.
268 |     """
269 |     ufunc = np.fmin if ignore_nan else np.minimum
270 |     yield from ireduce_ufunc(arrays, ufunc, axis)
271 | 


--------------------------------------------------------------------------------
/npstreams/reduce.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | General stream reduction
  4 | ------------------------
  5 | """
  6 | from functools import lru_cache, partial
  7 | from itertools import islice, repeat
  8 | from multiprocessing import Pool
  9 | 
 10 | import numpy as np
 11 | 
 12 | from .array_stream import array_stream
 13 | from .array_utils import nan_to_num
 14 | from .iter_utils import chunked, last, peek, primed
 15 | from .parallel import preduce
 16 | 
 17 | identity = lambda i: i
 18 | 
 19 | 
 20 | @lru_cache(maxsize=128)
 21 | def _check_binary_ufunc(ufunc):
 22 |     """
 23 |     Check that ufunc is suitable for ``ireduce_ufunc``.
 24 | 
 25 |     Specifically, a binary ``numpy.ufunc`` function is required. Functions
 26 |     that returns a boolean are also not suitable because they cannot be accumulated.
 27 | 
 28 |     This function does not return anything.
 29 | 
 30 |     Parameters
 31 |     ----------
 32 |     ufunc : callable
 33 |         Function to check.
 34 | 
 35 |     Raises
 36 |     ------
 37 |     TypeError : if ``ufunc`` is not a ``numpy.ufunc``
 38 |     ValueError: if ``ufunc`` is not binary or the return type is boolean.
 39 |     """
 40 |     if not isinstance(ufunc, np.ufunc):
 41 |         raise TypeError(f"{ufunc.__name__} is not a NumPy Ufunc")
 42 |     if ufunc.nin != 2:
 43 |         raise ValueError(
 44 |             f"Only binary ufuncs are supported, and {ufunc.__name__} is not one of them"
 45 |         )
 46 | 
 47 | 
 48 | @primed
 49 | @array_stream
 50 | def ireduce_ufunc(arrays, ufunc, axis=-1, dtype=None, ignore_nan=False, **kwargs):
 51 |     """
 52 |     Streaming reduction generator function from a binary NumPy ufunc. Generator
 53 |     version of `reduce_ufunc`.
 54 | 
 55 |     ``ufunc`` must be a NumPy binary Ufunc (i.e. it takes two arguments). Moreover,
 56 |     for performance reasons, ufunc must have the same return types as input types.
 57 |     This precludes the use of ``numpy.greater``, for example.
 58 | 
 59 |     Note that performance is much better for the default ``axis = -1``. In such a case,
 60 |     reduction operations can occur in-place. This also allows to operate in constant-memory.
 61 | 
 62 |     Parameters
 63 |     ----------
 64 |     arrays : iterable
 65 |         Arrays to be reduced.
 66 |     ufunc : numpy.ufunc
 67 |         Binary universal function.
 68 |     axis : int or None, optional
 69 |         Reduction axis. Default is to reduce the arrays in the stream as if
 70 |         they had been stacked along a new axis, then reduce along this new axis.
 71 |         If None, arrays are flattened before reduction. If `axis` is an int larger that
 72 |         the number of dimensions in the arrays of the stream, arrays are reduced
 73 |         along the new axis. Note that not all of NumPy Ufuncs support
 74 |         ``axis = None``, e.g. ``numpy.subtract``.
 75 |     dtype : numpy.dtype or None, optional
 76 |         Overrides the dtype of the calculation and output arrays.
 77 |     ignore_nan : bool, optional
 78 |         If True and ufunc has an identity value (e.g. ``numpy.add.identity`` is 0), then NaNs
 79 |         are replaced with this identity. An error is raised if ``ufunc`` has no identity
 80 |         (e.g. ``numpy.maximum.identity`` is ``None``).
 81 |     kwargs
 82 |         Keyword arguments are passed to ``ufunc``. Note that some valid ufunc keyword arguments
 83 |         (e.g. ``keepdims``) are not valid for all streaming functions. Also, contrary to NumPy
 84 |         v. 1.10+, ``casting = 'unsafe`` is the default in npstreams.
 85 | 
 86 |     Yields
 87 |     ------
 88 |     reduced : ndarray or scalar
 89 | 
 90 |     Raises
 91 |     ------
 92 |     TypeError : if ``ufunc`` is not NumPy ufunc.
 93 |     ValueError : if ``ignore_nan`` is True but ``ufunc`` has no identity
 94 |     ValueError : if ``ufunc`` is not a binary ufunc
 95 |     ValueError : if ``ufunc`` does not have the same input type as output type
 96 |     """
 97 |     kwargs.update({"dtype": dtype, "axis": axis})
 98 | 
 99 |     _check_binary_ufunc(ufunc)
100 | 
101 |     if ignore_nan:
102 |         if ufunc.identity is None:
103 |             raise ValueError(
104 |                 f"Cannot ignore NaNs because {ufunc.__name__} has no identity value"
105 |             )
106 |         # TODO: use the ``where`` keyword in ufuncs instead
107 |         arrays = map(partial(nan_to_num, fill_value=ufunc.identity, copy=False), arrays)
108 | 
109 |     # Since ireduce_ufunc is primed, we need to wait here
110 |     # Priming is a way to start error checking before actually running
111 |     # any computations.
112 |     yield
113 | 
114 |     if kwargs["axis"] == -1:
115 |         yield from _ireduce_ufunc_new_axis(arrays, ufunc, **kwargs)
116 |         return
117 | 
118 |     if kwargs["axis"] is None:
119 |         yield from _ireduce_ufunc_all_axes(arrays, ufunc, **kwargs)
120 |         return
121 | 
122 |     first, arrays = peek(arrays)
123 | 
124 |     if kwargs["axis"] >= first.ndim:
125 |         kwargs["axis"] = -1
126 |         yield from ireduce_ufunc(arrays, ufunc, **kwargs)
127 |         return
128 | 
129 |     yield from _ireduce_ufunc_existing_axis(arrays, ufunc, **kwargs)
130 | 
131 | 
132 | def reduce_ufunc(arrays, ufunc, axis=-1, dtype=None, ignore_nan=False, **kwargs):
133 |     """
134 |     Reduce a stream using a binary NumPy ufunc. Function version of ``ireduce_ufunc``.
135 | 
136 |     ``ufunc`` must be a NumPy binary Ufunc (i.e. it takes two arguments). Moreover,
137 |     for performance reasons, ufunc must have the same return types as input types.
138 |     This precludes the use of ``numpy.greater``, for example.
139 | 
140 |     Note that performance is much better for the default ``axis = -1``. In such a case,
141 |     reduction operations can occur in-place. This also allows to operate in constant-memory.
142 | 
143 |     Parameters
144 |     ----------
145 |     arrays : iterable
146 |         Arrays to be reduced.
147 |     ufunc : numpy.ufunc
148 |         Binary universal function.
149 |     axis : int or None, optional
150 |         Reduction axis. Default is to reduce the arrays in the stream as if
151 |         they had been stacked along a new axis, then reduce along this new axis.
152 |         If None, arrays are flattened before reduction. If `axis` is an int larger that
153 |         the number of dimensions in the arrays of the stream, arrays are reduced
154 |         along the new axis. Note that not all of NumPy Ufuncs support
155 |         ``axis = None``, e.g. ``numpy.subtract``.
156 |     dtype : numpy.dtype or None, optional
157 |         Overrides the dtype of the calculation and output arrays.
158 |     ignore_nan : bool, optional
159 |         If True and ufunc has an identity value (e.g. ``numpy.add.identity`` is 0), then NaNs
160 |         are replaced with this identity. An error is raised if ``ufunc`` has no identity (e.g. ``numpy.maximum.identity`` is ``None``).
161 |     kwargs
162 |         Keyword arguments are passed to ``ufunc``. Note that some valid ufunc keyword arguments
163 |         (e.g. ``keepdims``) are not valid for all streaming functions. Note that
164 |         contrary to NumPy v. 1.10+, ``casting = 'unsafe`` is the default in npstreams.
165 | 
166 |     Returns
167 |     -------
168 |     reduced : ndarray or scalar
169 | 
170 |     Raises
171 |     ------
172 |     TypeError : if ``ufunc`` is not NumPy ufunc.
173 |     ValueError : if ``ignore_nan`` is True but ``ufunc`` has no identity
174 |     ValueError: if ``ufunc`` is not a binary ufunc
175 |     ValueError: if ``ufunc`` does not have the same input type as output type
176 |     """
177 |     return last(
178 |         ireduce_ufunc(
179 |             arrays, ufunc, axis=axis, dtype=dtype, ignore_nan=ignore_nan, **kwargs
180 |         )
181 |     )
182 | 
183 | 
184 | @array_stream
185 | def preduce_ufunc(
186 |     arrays,
187 |     ufunc,
188 |     axis=-1,
189 |     dtype=None,
190 |     ignore_nan=False,
191 |     processes=1,
192 |     ntotal=None,
193 |     **kwargs,
194 | ):
195 |     """
196 |     Parallel reduction of array streams.
197 | 
198 |     ``ufunc`` must be a NumPy binary Ufunc (i.e. it takes two arguments). Moreover,
199 |     for performance reasons, ufunc must have the same return types as input types.
200 |     This precludes the use of ``numpy.greater``, for example.
201 | 
202 |     Parameters
203 |     ----------
204 |     arrays : iterable
205 |         Arrays to be reduced.
206 |     ufunc : numpy.ufunc
207 |         Binary universal function.
208 |     axis : int or None, optional
209 |         Reduction axis. Default is to reduce the arrays in the stream as if
210 |         they had been stacked along a new axis, then reduce along this new axis.
211 |         If None, arrays are flattened before reduction. If `axis` is an int larger that
212 |         the number of dimensions in the arrays of the stream, arrays are reduced
213 |         along the new axis. Note that not all of NumPy Ufuncs support
214 |         ``axis = None``, e.g. ``numpy.subtract``.
215 |     dtype : numpy.dtype or None, optional
216 |         Overrides the dtype of the calculation and output arrays.
217 |     ignore_nan : bool, optional
218 |         If True and ufunc has an identity value (e.g. ``numpy.add.identity`` is 0), then NaNs
219 |         are replaced with this identity. An error is raised if ``ufunc`` has no identity (e.g. ``numpy.maximum.identity`` is ``None``).
220 |     processes : int or None, optional
221 |         Number of processes to use. If `None`, maximal number of processes
222 |         is used. Default is 1.
223 |     kwargs
224 |         Keyword arguments are passed to ``ufunc``. Note that some valid ufunc keyword arguments
225 |         (e.g. ``keepdims``) are not valid for all streaming functions. Also, contrary to NumPy
226 |         v. 1.10+, ``casting = 'unsafe`` is the default in npstreams.
227 |     """
228 |     if processes == 1:
229 |         return reduce_ufunc(arrays, ufunc, axis, dtype, ignore_nan, **kwargs)
230 | 
231 |     kwargs.update(
232 |         {"ufunc": ufunc, "ignore_nan": ignore_nan, "dtype": dtype, "axis": axis}
233 |     )
234 |     reduce = partial(reduce_ufunc, **kwargs)
235 |     # return preduce(reduce, arrays, processes = processes, ntotal = ntotal)
236 | 
237 |     with Pool(processes) as pool:
238 |         chunksize = 1
239 |         if ntotal is not None:
240 |             chunksize = max(1, int(ntotal / pool._processes))
241 |         res = pool.imap(reduce, chunked(arrays, chunksize))
242 |         return reduce(res)
243 | 
244 | 
245 | def _ireduce_ufunc_new_axis(arrays, ufunc, **kwargs):
246 |     """
247 |     Reduction operation for arrays, in the direction of a new axis (i.e. stacking).
248 | 
249 |     Parameters
250 |     ----------
251 |     arrays : iterable
252 |         Arrays to be reduced.
253 |     ufunc : numpy.ufunc
254 |         Binary universal function. Must have a signature of the form ufunc(x1, x2, ...)
255 |     kwargs
256 |         Keyword arguments are passed to ``ufunc``.
257 | 
258 |     Yields
259 |     ------
260 |     reduced : ndarray
261 |     """
262 |     arrays = iter(arrays)
263 |     first = next(arrays)
264 | 
265 |     kwargs.pop("axis")
266 | 
267 |     dtype = kwargs.get("dtype", None)
268 |     if dtype is None:
269 |         dtype = first.dtype
270 |     else:
271 |         kwargs["casting"] = "unsafe"
272 | 
273 |     # If the out parameter was already given
274 |     # we create the accumulator from it
275 |     # Otherwise, it is a copy of the first array
276 |     accumulator = kwargs.pop("out", None)
277 |     if accumulator is not None:
278 |         accumulator[:] = first
279 |     else:
280 |         accumulator = np.array(first, copy=True).astype(dtype)
281 |     yield accumulator
282 | 
283 |     for array in arrays:
284 |         ufunc(accumulator, array, out=accumulator, **kwargs)
285 |         yield accumulator
286 | 
287 | 
288 | def _ireduce_ufunc_existing_axis(arrays, ufunc, **kwargs):
289 |     """
290 |     Reduction operation for arrays, in the direction of an existing axis.
291 | 
292 |     Parameters
293 |     ----------
294 |     arrays : iterable
295 |         Arrays to be reduced.
296 |     ufunc : numpy.ufunc
297 |         Binary universal function. Must have a signature of the form ufunc(x1, x2, ...)
298 |     kwargs
299 |         Keyword arguments are passed to ``ufunc``. The ``out`` parameter is ignored.
300 | 
301 |     Yields
302 |     ------
303 |     reduced : ndarray
304 |     """
305 |     arrays = iter(arrays)
306 |     first = next(arrays)
307 | 
308 |     if kwargs["axis"] not in range(first.ndim):
309 |         axis = kwargs["axis"]
310 |         raise ValueError(f"Axis {axis} not supported on arrays of shape {first.shape}.")
311 | 
312 |     # Remove parameters that will not be used.
313 |     kwargs.pop("out", None)
314 | 
315 |     dtype = kwargs.get("dtype")
316 |     if dtype is None:
317 |         dtype = first.dtype
318 | 
319 |     axis_reduce = partial(ufunc.reduce, **kwargs)
320 | 
321 |     accumulator = np.atleast_1d(axis_reduce(first))
322 |     yield accumulator
323 | 
324 |     # On the first pass of the following loop, accumulator is missing a dimensions
325 |     # therefore, the stacking function cannot be 'concatenate'
326 |     second = next(arrays)
327 |     accumulator = np.stack([accumulator, np.atleast_1d(axis_reduce(second))], axis=-1)
328 |     yield accumulator
329 | 
330 |     # On the second pass, the new dimensions exists, and thus we switch to
331 |     # using concatenate.
332 |     for array in arrays:
333 |         reduced = np.expand_dims(
334 |             np.atleast_1d(axis_reduce(array)), axis=accumulator.ndim - 1
335 |         )
336 |         accumulator = np.concatenate([accumulator, reduced], axis=accumulator.ndim - 1)
337 |         yield accumulator
338 | 
339 | 
340 | def _ireduce_ufunc_all_axes(arrays, ufunc, **kwargs):
341 |     """
342 |     Reduction operation for arrays, over all axes.
343 | 
344 |     Parameters
345 |     ----------
346 |     arrays : iterable
347 |         Arrays to be reduced.
348 |     ufunc : numpy.ufunc
349 |         Binary universal function. Must have a signature of the form ufunc(x1, x2, ...)
350 |     kwargs
351 |         Keyword arguments are passed to ``ufunc``. The ``out`` parameter is ignored.
352 | 
353 |     Yields
354 |     ------
355 |     reduced : scalar
356 |     """
357 |     arrays = iter(arrays)
358 |     first = next(arrays)
359 | 
360 |     kwargs.pop("out", None)
361 | 
362 |     kwargs["axis"] = None
363 |     axis_reduce = partial(ufunc.reduce, **kwargs)
364 | 
365 |     accumulator = axis_reduce(first)
366 |     yield accumulator
367 | 
368 |     for array in arrays:
369 |         accumulator = axis_reduce([accumulator, axis_reduce(array)])
370 |         yield accumulator
371 | 


--------------------------------------------------------------------------------
/npstreams/tests/test_stats.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from itertools import repeat
  4 | from random import randint, random, seed
  5 | from warnings import catch_warnings, simplefilter
  6 | import pytest
  7 | 
  8 | import numpy as np
  9 | 
 10 | try:
 11 |     from scipy.stats import sem as scipy_sem
 12 | 
 13 |     WITH_SCIPY = True
 14 | except ImportError:
 15 |     WITH_SCIPY = False
 16 | 
 17 | from npstreams import (
 18 |     iaverage,
 19 |     imean,
 20 |     isem,
 21 |     istd,
 22 |     ivar,
 23 |     last,
 24 |     ihistogram,
 25 |     mean,
 26 |     average,
 27 |     sem,
 28 |     std,
 29 |     var,
 30 | )
 31 | 
 32 | seed(23)
 33 | 
 34 | 
 35 | def test_average_trivial():
 36 |     """Test average() on a stream of zeroes"""
 37 |     stream = repeat(np.zeros((64, 64), dtype=float), times=5)
 38 |     for av in average(stream):
 39 |         assert np.allclose(av, np.zeros_like(av))
 40 | 
 41 | 
 42 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
 43 | def test_average_vs_numpy(axis):
 44 |     """Test average vs. numpy.average"""
 45 |     stream = [np.random.random(size=(64, 64)) for _ in range(5)]
 46 |     stack = np.dstack(stream)
 47 | 
 48 |     from_stream = average(stream, axis=axis)
 49 |     from_numpy = np.average(stack, axis=axis)
 50 |     assert np.allclose(from_numpy, from_stream)
 51 | 
 52 | 
 53 | def test_average_weighted_average():
 54 |     """Test results of weighted average against numpy.average"""
 55 |     stream = [np.random.random(size=(16, 16)) for _ in range(5)]
 56 | 
 57 |     weights = [random() for _ in stream]
 58 |     from_average = average(stream, weights=weights)
 59 |     from_numpy = np.average(np.dstack(stream), axis=2, weights=np.array(weights))
 60 |     assert np.allclose(from_average, from_numpy)
 61 | 
 62 |     weights = [np.random.random(size=stream[0].shape) for _ in stream]
 63 |     from_average = average(stream, weights=weights)
 64 |     from_numpy = np.average(np.dstack(stream), axis=2, weights=np.dstack(weights))
 65 |     assert np.allclose(from_average, from_numpy)
 66 | 
 67 | 
 68 | def test_average_ignore_nan():
 69 |     """Test that NaNs are handled correctly"""
 70 |     stream = [np.random.random(size=(16, 12)) for _ in range(5)]
 71 |     for s in stream:
 72 |         s[randint(0, 15), randint(0, 11)] = np.nan
 73 | 
 74 |     with catch_warnings():
 75 |         simplefilter("ignore")
 76 |         from_average = average(stream, ignore_nan=True)
 77 |     from_numpy = np.nanmean(np.dstack(stream), axis=2)
 78 |     assert np.allclose(from_average, from_numpy)
 79 | 
 80 | 
 81 | def test_iaverage_trivial():
 82 |     """Test iaverage on stream of zeroes"""
 83 |     stream = repeat(np.zeros((64, 64), dtype=float), times=5)
 84 |     for av in iaverage(stream):
 85 |         assert np.allclose(av, np.zeros_like(av))
 86 | 
 87 | 
 88 | def test_iaverage_weighted_average():
 89 |     """Test results of weighted iverage against numpy.average"""
 90 |     stream = [np.random.random(size=(16, 16)) for _ in range(5)]
 91 | 
 92 |     weights = [random() for _ in stream]
 93 |     from_iaverage = last(iaverage(stream, weights=weights))
 94 |     from_numpy = np.average(np.dstack(stream), axis=2, weights=np.array(weights))
 95 |     assert np.allclose(from_iaverage, from_numpy)
 96 | 
 97 |     weights = [np.random.random(size=stream[0].shape) for _ in stream]
 98 |     from_iaverage = last(iaverage(stream, weights=weights))
 99 |     from_numpy = np.average(np.dstack(stream), axis=2, weights=np.dstack(weights))
100 |     assert np.allclose(from_iaverage, from_numpy)
101 | 
102 | 
103 | def test_iaverage_ignore_nan():
104 |     """Test that NaNs are handled correctly"""
105 |     stream = [np.random.random(size=(16, 12)) for _ in range(5)]
106 |     for s in stream:
107 |         s[randint(0, 15), randint(0, 11)] = np.nan
108 | 
109 |     with catch_warnings():
110 |         simplefilter("ignore")
111 |         from_iaverage = last(iaverage(stream, ignore_nan=True))
112 |     from_numpy = np.nanmean(np.dstack(stream), axis=2)
113 |     assert np.allclose(from_iaverage, from_numpy)
114 | 
115 | 
116 | def test_iaverage_length():
117 |     """Test that the number of yielded elements is the same as source"""
118 |     source = (np.zeros((16,)) for _ in range(5))
119 |     avg = list(iaverage(source, axis=0))
120 |     assert len(avg) == 5
121 | 
122 | 
123 | @pytest.mark.parametrize("dtype", (np.uint8, bool, np.int16, np.float16))
124 | def test_iaverage_output_dtype(dtype):
125 |     """Test that that yielded arrays are always floats"""
126 |     source = (np.zeros((16,), dtype=dtype) for _ in range(5))
127 |     avg = last(iaverage(source))
128 |     assert avg.dtype == float
129 | 
130 | 
131 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
132 | def test_iaverage_output_shape(axis):
133 |     """Test output shape"""
134 |     source = [np.random.random((16, 12, 5)) for _ in range(10)]
135 |     stack = np.stack(source, axis=-1)
136 | 
137 |     from_numpy = np.average(stack, axis=axis)
138 |     out = last(iaverage(source, axis=axis))
139 |     assert from_numpy.shape == out.shape
140 |     assert np.allclose(out, from_numpy)
141 | 
142 | 
143 | def test_mean_trivial():
144 |     """Test mean() on a stream of zeroes"""
145 |     stream = repeat(np.zeros((64, 64), dtype=float), times=5)
146 |     for av in mean(stream):
147 |         assert np.allclose(av, np.zeros_like(av))
148 | 
149 | 
150 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
151 | def test_mean_vs_numpy(axis):
152 |     """Test mean vs. numpy.mean"""
153 |     stream = [np.random.random(size=(64, 64)) for _ in range(5)]
154 |     stack = np.dstack(stream)
155 | 
156 |     from_stream = mean(stream, axis=axis)
157 |     from_numpy = np.mean(stack, axis=axis)
158 |     assert np.allclose(from_numpy, from_stream)
159 | 
160 | 
161 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
162 | def test_mean_against_numpy_nanmean(axis):
163 |     """Test results against numpy.mean"""
164 |     source = [np.random.random((16, 12, 5)) for _ in range(10)]
165 |     for arr in source:
166 |         arr[randint(0, 15), randint(0, 11), randint(0, 4)] = np.nan
167 |     stack = np.stack(source, axis=-1)
168 | 
169 |     from_numpy = np.nanmean(stack, axis=axis)
170 |     out = mean(source, axis=axis, ignore_nan=True)
171 |     assert from_numpy.shape == out.shape
172 |     assert np.allclose(out, from_numpy)
173 | 
174 | 
175 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
176 | def test_imean_against_numpy_mean(axis):
177 |     """Test results against numpy.mean"""
178 |     source = [np.random.random((16, 12, 5)) for _ in range(10)]
179 |     stack = np.stack(source, axis=-1)
180 | 
181 |     from_numpy = np.mean(stack, axis=axis)
182 |     out = last(imean(source, axis=axis))
183 |     assert from_numpy.shape == out.shape
184 |     assert np.allclose(out, from_numpy)
185 | 
186 | 
187 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
188 | def test_imean_against_numpy_nanmean(axis):
189 |     """Test results against numpy.mean"""
190 |     source = [np.random.random((16, 12, 5)) for _ in range(10)]
191 |     for arr in source:
192 |         arr[randint(0, 15), randint(0, 11), randint(0, 4)] = np.nan
193 |     stack = np.stack(source, axis=-1)
194 | 
195 |     from_numpy = np.nanmean(stack, axis=axis)
196 |     out = last(imean(source, axis=axis, ignore_nan=True))
197 |     assert from_numpy.shape == out.shape
198 |     assert np.allclose(out, from_numpy)
199 | 
200 | 
201 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
202 | def test_var_vs_numpy(axis):
203 |     """Test that the axis parameter is handled correctly"""
204 |     stream = [np.random.random((16, 7, 3)) for _ in range(5)]
205 |     stack = np.stack(stream, axis=-1)
206 | 
207 |     from_numpy = np.var(stack, axis=axis)
208 |     from_var = var(stream, axis=axis)
209 |     assert from_numpy.shape == from_var.shape
210 |     assert np.allclose(from_var, from_numpy)
211 | 
212 | 
213 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
214 | @pytest.mark.parametrize("ddof", range(4))
215 | def test_var_ddof(axis, ddof):
216 |     """Test that the ddof parameter is equivalent to numpy's"""
217 |     stream = [np.random.random((16, 7, 3)) for _ in range(10)]
218 |     stack = np.stack(stream, axis=-1)
219 | 
220 |     with catch_warnings():
221 |         simplefilter("ignore")
222 | 
223 |         from_numpy = np.var(stack, axis=axis, ddof=ddof)
224 |         from_var = var(stream, axis=axis, ddof=ddof)
225 |         assert from_numpy.shape == from_var.shape
226 |         assert np.allclose(from_var, from_numpy)
227 | 
228 | 
229 | def test_ivar_first():
230 |     """Test that the first yielded value of ivar is an array fo zeros"""
231 |     stream = repeat(np.random.random(size=(64, 64)), times=5)
232 |     first = next(ivar(stream))
233 | 
234 |     assert np.allclose(first, np.zeros_like(first))
235 | 
236 | 
237 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
238 | def test_ivar_output_shape(axis):
239 |     """Test that the axis parameter is handled correctly"""
240 |     stream = [np.random.random((16, 7, 3)) for _ in range(5)]
241 |     stack = np.stack(stream, axis=-1)
242 | 
243 |     from_numpy = np.var(stack, axis=axis)
244 |     from_ivar = last(ivar(stream, axis=axis))
245 |     assert from_numpy.shape == from_ivar.shape
246 |     assert np.allclose(from_ivar, from_numpy)
247 | 
248 | 
249 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
250 | @pytest.mark.parametrize("ddof", range(4))
251 | def test_ivar_ddof(axis, ddof):
252 |     """Test that the ddof parameter is equivalent to numpy's"""
253 |     stream = [np.random.random((16, 7, 3)) for _ in range(10)]
254 |     stack = np.stack(stream, axis=-1)
255 | 
256 |     with catch_warnings():
257 |         simplefilter("ignore")
258 | 
259 |         from_numpy = np.var(stack, axis=axis, ddof=ddof)
260 |         from_ivar = last(ivar(stream, axis=axis, ddof=ddof))
261 |         assert from_numpy.shape == from_ivar.shape
262 |         assert np.allclose(from_ivar, from_numpy)
263 | 
264 | 
265 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
266 | @pytest.mark.parametrize("ddof", range(4))
267 | def test_std_against_numpy_std(axis, ddof):
268 |     stream = [np.random.random((16, 7, 3)) for _ in range(10)]
269 |     stack = np.stack(stream, axis=-1)
270 | 
271 |     with catch_warnings():
272 |         simplefilter("ignore")
273 | 
274 |         from_numpy = np.std(stack, axis=axis, ddof=ddof)
275 |         from_ivar = std(stream, axis=axis, ddof=ddof)
276 |         assert from_numpy.shape == from_ivar.shape
277 |         assert np.allclose(from_ivar, from_numpy)
278 | 
279 | 
280 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
281 | @pytest.mark.parametrize("ddof", range(4))
282 | def test_std_against_numpy_nanstd(axis, ddof):
283 |     source = [np.random.random((16, 12, 5)) for _ in range(10)]
284 |     for arr in source:
285 |         arr[randint(0, 15), randint(0, 11), randint(0, 4)] = np.nan
286 |     stack = np.stack(source, axis=-1)
287 | 
288 |     from_numpy = np.nanstd(stack, axis=axis, ddof=ddof)
289 |     from_ivar = std(source, axis=axis, ddof=ddof, ignore_nan=True)
290 |     assert from_numpy.shape == from_ivar.shape
291 |     assert np.allclose(from_ivar, from_numpy)
292 | 
293 | 
294 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
295 | @pytest.mark.parametrize("ddof", range(4))
296 | def test_istd_against_numpy_std(axis, ddof):
297 |     stream = [np.random.random((16, 7, 3)) for _ in range(10)]
298 |     stack = np.stack(stream, axis=-1)
299 | 
300 |     with catch_warnings():
301 |         simplefilter("ignore")
302 | 
303 |         from_numpy = np.std(stack, axis=axis, ddof=ddof)
304 |         from_ivar = last(istd(stream, axis=axis, ddof=ddof))
305 |         assert from_numpy.shape == from_ivar.shape
306 |         assert np.allclose(from_ivar, from_numpy)
307 | 
308 | 
309 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
310 | @pytest.mark.parametrize("ddof", range(4))
311 | def test_istd_against_numpy_nanstd(axis, ddof):
312 |     source = [np.random.random((16, 12, 5)) for _ in range(10)]
313 |     for arr in source:
314 |         arr[randint(0, 15), randint(0, 11), randint(0, 4)] = np.nan
315 |     stack = np.stack(source, axis=-1)
316 | 
317 |     from_numpy = np.nanstd(stack, axis=axis, ddof=ddof)
318 |     from_ivar = last(istd(source, axis=axis, ddof=ddof, ignore_nan=True))
319 |     assert from_numpy.shape == from_ivar.shape
320 |     assert np.allclose(from_ivar, from_numpy)
321 | 
322 | 
323 | @pytest.mark.skipif(not WITH_SCIPY, reason="SciPy is not installed/importable")
324 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
325 | @pytest.mark.parametrize("ddof", range(4))
326 | def test_sem_against_scipy_no_nans(axis, ddof):
327 |     """Test that isem outputs the same as scipy.stats.sem"""
328 |     source = [np.random.random((16, 12, 5)) for _ in range(10)]
329 |     stack = np.stack(source, axis=-1)
330 | 
331 |     from_scipy = scipy_sem(stack, axis=axis, ddof=ddof)
332 |     from_isem = sem(source, axis=axis, ddof=ddof)
333 |     assert from_scipy.shape == from_isem.shape
334 |     assert np.allclose(from_isem, from_scipy)
335 | 
336 | 
337 | @pytest.mark.skipif(not WITH_SCIPY, reason="SciPy is not installed/importable")
338 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
339 | @pytest.mark.parametrize("ddof", range(4))
340 | def test_sem_against_scipy_with_nans(axis, ddof):
341 |     """Test that isem outputs the same as scipy.stats.sem when NaNs are ignored."""
342 |     source = [np.random.random((16, 12, 5)) for _ in range(10)]
343 |     for arr in source:
344 |         arr[randint(0, 15), randint(0, 11), randint(0, 4)] = np.nan
345 |     stack = np.stack(source, axis=-1)
346 | 
347 |     from_scipy = scipy_sem(stack, axis=axis, ddof=ddof, nan_policy="omit")
348 |     from_isem = sem(source, axis=axis, ddof=ddof, ignore_nan=True)
349 |     assert from_scipy.shape == from_isem.shape
350 |     assert np.allclose(from_isem, from_scipy)
351 | 
352 | 
353 | @pytest.mark.skipif(not WITH_SCIPY, reason="SciPy is not installed/importable")
354 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
355 | @pytest.mark.parametrize("ddof", range(4))
356 | def test_isem_against_scipy_no_nans(axis, ddof):
357 |     """Test that isem outputs the same as scipy.stats.sem"""
358 |     source = [np.random.random((16, 12, 5)) for _ in range(10)]
359 |     stack = np.stack(source, axis=-1)
360 | 
361 |     from_scipy = scipy_sem(stack, axis=axis, ddof=ddof)
362 |     from_isem = last(isem(source, axis=axis, ddof=ddof))
363 |     assert from_scipy.shape == from_isem.shape
364 |     assert np.allclose(from_isem, from_scipy)
365 | 
366 | 
367 | @pytest.mark.skipif(not WITH_SCIPY, reason="SciPy is not installed/importable")
368 | @pytest.mark.parametrize("axis", (0, 1, 2, None))
369 | @pytest.mark.parametrize("ddof", range(4))
370 | def test_isem_against_scipy_with_nans(axis, ddof):
371 |     """Test that isem outputs the same as scipy.stats.sem when NaNs are ignored."""
372 |     source = [np.random.random((16, 12, 5)) for _ in range(10)]
373 |     for arr in source:
374 |         arr[randint(0, 15), randint(0, 11), randint(0, 4)] = np.nan
375 |     stack = np.stack(source, axis=-1)
376 | 
377 |     from_scipy = scipy_sem(stack, axis=axis, ddof=ddof, nan_policy="omit")
378 |     from_isem = last(isem(source, axis=axis, ddof=ddof, ignore_nan=True))
379 |     assert from_scipy.shape == from_isem.shape
380 |     assert np.allclose(from_isem, from_scipy)
381 | 
382 | 
383 | def test_ihistogram_against_numpy_no_weights():
384 |     """Test ihistogram against numpy.histogram with no weights"""
385 |     source = [np.random.random((16, 12, 5)) for _ in range(10)]
386 |     stack = np.stack(source, axis=-1)
387 | 
388 |     bins = np.linspace(0, 1, num=10)
389 |     from_numpy = np.histogram(stack, bins=bins)[0]
390 |     from_ihistogram = last(ihistogram(source, bins=bins))
391 | 
392 |     # Since histogram output is int, cannot use allclose
393 |     assert np.all(np.equal(from_numpy, from_ihistogram))
394 | 
395 | 
396 | def test_ihistogram_trivial_weights():
397 |     """Test ihistogram with weights being all 1s vs. weights=None"""
398 |     source = [np.random.random((16, 12, 5)) for _ in range(10)]
399 |     weights = [np.array([1]) for _ in source]
400 | 
401 |     bins = np.linspace(0, 1, num=10)
402 |     none_weights = last(ihistogram(source, bins=bins, weights=None))
403 |     trivial_weights = last(ihistogram(source, bins=bins, weights=weights))
404 | 
405 |     assert np.all(np.equal(none_weights, trivial_weights))
406 | 


--------------------------------------------------------------------------------
/npstreams/stats.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Statistical functions
  4 | ---------------------
  5 | """
  6 | from functools import partial
  7 | from itertools import count, repeat, starmap
  8 | from operator import truediv
  9 | from warnings import catch_warnings, simplefilter
 10 | 
 11 | import numpy as np
 12 | 
 13 | from .array_stream import array_stream
 14 | from .array_utils import nan_to_num
 15 | from .iter_utils import itercopy, last, peek
 16 | from .numerics import isum
 17 | 
 18 | 
 19 | @array_stream
 20 | def _iaverage(arrays, axis=-1, weights=None, ignore_nan=False):
 21 |     """
 22 |     Primitive version of weighted averaging that yields the running sum and running weights sum,
 23 |     but avoids the costly division at every step.
 24 |     """
 25 |     # Special case: in the easiest case, no need to calculate
 26 |     # weights and ignore nans.
 27 |     # This case is pretty common
 28 |     if (weights is None) and (not ignore_nan) and (axis == -1):
 29 |         yield from zip(isum(arrays, axis=axis, dtype=float, ignore_nan=False), count(1))
 30 |         return
 31 | 
 32 |     first, arrays = peek(arrays)
 33 | 
 34 |     # We make sure that weights is always an array
 35 |     # This simplifies the handling of NaNs.
 36 |     if weights is None:
 37 |         weights = repeat(1)
 38 |     weights = map(partial(np.broadcast_to, shape=first.shape), weights)
 39 | 
 40 |     # Need to know which array has NaNs, and modify the weights stream accordingly
 41 |     if ignore_nan:
 42 |         arrays, arrays2 = itercopy(arrays)
 43 |         weights = map(
 44 |             lambda arr, wgt: np.logical_not(np.isnan(arr)) * wgt, arrays2, weights
 45 |         )
 46 | 
 47 |     weights1, weights2 = itercopy(weights)
 48 | 
 49 |     sum_of_weights = isum(weights1, axis=axis, dtype=float)
 50 |     weighted_arrays = map(lambda arr, wgt: arr * wgt, arrays, weights2)
 51 |     weighted_sum = isum(weighted_arrays, axis=axis, ignore_nan=ignore_nan, dtype=float)
 52 | 
 53 |     yield from zip(weighted_sum, sum_of_weights)
 54 | 
 55 | 
 56 | @array_stream
 57 | def average(arrays, axis=-1, weights=None, ignore_nan=False):
 58 |     """
 59 |     Average (weighted) of a stream of arrays. This function consumes the
 60 |     entire stream.
 61 | 
 62 |     Parameters
 63 |     ----------
 64 |     arrays : iterable of ndarrays
 65 |         Arrays to be averaged. This iterable can also a generator.
 66 |     axis : int, optional
 67 |         Reduction axis. Default is to average the arrays in the stream as if
 68 |         they had been stacked along a new axis, then average along this new axis.
 69 |         If None, arrays are flattened before averaging. If `axis` is an int larger that
 70 |         the number of dimensions in the arrays of the stream, arrays are averaged
 71 |         along the new axis.
 72 |     weights : iterable of ndarray, iterable of floats, or None, optional
 73 |         Iterable of weights associated with the values in each item of `arrays`.
 74 |         Each value in an element of `arrays` contributes to the average
 75 |         according to its associated weight. The weights array can either be a float
 76 |         or an array of the same shape as any element of `arrays`. If ``weights=None``,
 77 |         then all data in each element of `arrays` are assumed to have a weight equal to one.
 78 |     ignore_nan : bool, optional
 79 |         If True, NaNs are set to zero weight. Default is propagation of NaNs.
 80 | 
 81 |     Returns
 82 |     -------
 83 |     avg: `~numpy.ndarray`, dtype float
 84 |         Weighted average.
 85 | 
 86 |     See Also
 87 |     --------
 88 |     iaverage : streaming (weighted) average.
 89 |     numpy.average : (weighted) average of dense arrays
 90 |     mean : non-weighted average of a stream.
 91 |     """
 92 |     total_sum, total_weight = last(_iaverage(arrays, axis, weights, ignore_nan))
 93 |     with catch_warnings():
 94 |         simplefilter("ignore", category=RuntimeWarning)
 95 |         return np.true_divide(total_sum, total_weight)
 96 | 
 97 | 
 98 | @array_stream
 99 | def iaverage(arrays, axis=-1, weights=None, ignore_nan=False):
100 |     """
101 |     Streaming (weighted) average of arrays.
102 | 
103 |     Parameters
104 |     ----------
105 |     arrays : iterable of ndarrays
106 |         Arrays to be averaged. This iterable can also a generator.
107 |     axis : int, optional
108 |         Reduction axis. Default is to average the arrays in the stream as if
109 |         they had been stacked along a new axis, then average along this new axis.
110 |         If None, arrays are flattened before averaging. If `axis` is an int larger that
111 |         the number of dimensions in the arrays of the stream, arrays are averaged
112 |         along the new axis.
113 |     weights : iterable of ndarray, iterable of floats, or None, optional
114 |         Iterable of weights associated with the values in each item of `arrays`.
115 |         Each value in an element of `arrays` contributes to the average
116 |         according to its associated weight. The weights array can either be a float
117 |         or an array of the same shape as any element of `arrays`. If weights=None,
118 |         then all data in each element of `arrays` are assumed to have a weight equal to one.
119 |     ignore_nan : bool, optional
120 |         If True, NaNs are set to zero weight. Default is propagation of NaNs.
121 | 
122 |     Yields
123 |     ------
124 |     avg: `~numpy.ndarray`, dtype float
125 |         Weighted average.
126 | 
127 |     See Also
128 |     --------
129 |     imean : streaming array mean (non-weighted average).
130 |     """
131 |     # Primitive stream is composed of tuples (running_sum, running_weights)
132 |     primitive = _iaverage(arrays, axis, weights, ignore_nan)
133 |     yield from map(lambda element: truediv(*element), primitive)
134 | 
135 | 
136 | @array_stream
137 | def mean(arrays, axis=-1, ignore_nan=False):
138 |     """
139 |     Mean of a stream of arrays. This function consumes the
140 |     entire stream.
141 | 
142 |     Parameters
143 |     ----------
144 |     arrays : iterable of ndarrays
145 |         Arrays to be averaged. This iterable can also a generator.
146 |     axis : int, optional
147 |         Reduction axis. Default is to average the arrays in the stream as if
148 |         they had been stacked along a new axis, then average along this new axis.
149 |         If None, arrays are flattened before averaging. If `axis` is an int larger that
150 |         the number of dimensions in the arrays of the stream, arrays are averaged
151 |         along the new axis.
152 |     ignore_nan : bool, optional
153 |         If True, NaNs are set to zero weight. Default is propagation of NaNs.
154 | 
155 |     Returns
156 |     -------
157 |     mean: `~numpy.ndarray`, dtype float
158 |         Total mean array.
159 |     """
160 |     total_sum, total_count = last(
161 |         _iaverage(arrays, axis, weights=None, ignore_nan=ignore_nan)
162 |     )
163 |     return total_sum / total_count
164 | 
165 | 
166 | @array_stream
167 | def imean(arrays, axis=-1, ignore_nan=False):
168 |     """
169 |     Streaming mean of arrays. Equivalent to `iaverage(arrays, weights = None)`.
170 | 
171 |     Parameters
172 |     ----------
173 |     arrays : iterable of ndarrays
174 |         Arrays to be averaged. This iterable can also a generator.
175 |     axis : int, optional
176 |         Reduction axis. Default is to average the arrays in the stream as if
177 |         they had been stacked along a new axis, then average along this new axis.
178 |         If None, arrays are flattened before averaging. If `axis` is an int larger that
179 |         the number of dimensions in the arrays of the stream, arrays are averaged
180 |         along the new axis.
181 |     ignore_nan : bool, optional
182 |         If True, NaNs are set to zero weight. Default is propagation of NaNs.
183 | 
184 |     Yields
185 |     ------
186 |     mean: `~numpy.ndarray`, dtype float
187 |         Online mean array.
188 |     """
189 |     # Primitive stream is composed of tuples (running_sum, running_count)
190 |     primitive = _iaverage(arrays, axis, weights=None, ignore_nan=ignore_nan)
191 |     yield from map(lambda element: truediv(*element), primitive)
192 | 
193 | 
194 | @array_stream
195 | def _ivar(arrays, axis=-1, weights=None, ignore_nan=False):
196 |     """
197 |     Primitive version of weighted variance that yields the running average, running average of squares and running weights sum,
198 |     but avoids the costly division and squaring at every step.
199 |     """
200 |     first, arrays = peek(arrays)
201 | 
202 |     # We make sure that weights is always an array
203 |     # This simplifies the handling of NaNs.
204 |     if weights is None:
205 |         weights = repeat(1)
206 |     weights = map(partial(np.broadcast_to, shape=first.shape), weights)
207 | 
208 |     # Need to know which array has NaNs, and modify the weights stream accordingly
209 |     if ignore_nan:
210 |         arrays, arrays2 = itercopy(arrays)
211 |         weights = map(
212 |             lambda arr, wgt: np.logical_not(np.isnan(arr)) * wgt, arrays2, weights
213 |         )
214 | 
215 |     arrays, arrays2 = itercopy(arrays)
216 |     weights, weights2, weights3 = itercopy(weights, 3)
217 | 
218 |     avgs = iaverage(arrays, axis=axis, weights=weights, ignore_nan=ignore_nan)
219 |     avg_of_squares = iaverage(
220 |         map(np.square, arrays2), axis=axis, weights=weights2, ignore_nan=ignore_nan
221 |     )
222 |     sum_of_weights = isum(weights3, axis=axis, ignore_nan=ignore_nan)
223 | 
224 |     yield from zip(avgs, avg_of_squares, sum_of_weights)
225 | 
226 | 
227 | @array_stream
228 | def average_and_var(arrays, axis=-1, ddof=0, weights=None, ignore_nan=False):
229 |     """
230 |     Calculate the simultaneous average and variance of a stream of arrays. This is done in
231 |     single iteration for maximum performance.
232 | 
233 |     .. versionadded:: 1.6.1
234 | 
235 |     Parameters
236 |     ----------
237 |     arrays : iterable of ndarrays
238 |         Arrays to be combined. This iterable can also a generator.
239 |     axis : int, optional
240 |         Reduction axis. Default is to combine the arrays in the stream as if
241 |         they had been stacked along a new axis, then compute the variance along this new axis.
242 |         If None, arrays are flattened. If `axis` is an int larger that
243 |         the number of dimensions in the arrays of the stream, variance is computed
244 |         along the new axis.
245 |     ddof : int, optional
246 |         Means Delta Degrees of Freedom. The divisor used in calculations
247 |         is ``N - ddof``, where ``N`` represents the number of elements.
248 |     weights : iterable of ndarray, iterable of floats, or None, optional
249 |         Iterable of weights associated with the values in each item of `arrays`.
250 |         Each value in an element of `arrays` contributes to the variance
251 |         according to its associated weight. The weights array can either be a float
252 |         or an array of the same shape as any element of `arrays`. If weights=None,
253 |         then all data in each element of `arrays` are assumed to have a weight equal to one.
254 |     ignore_nan : bool, optional
255 |         If True, NaNs are set to zero weight. Default is propagation of NaNs.
256 | 
257 |     Returns
258 |     -------
259 |     average : `~numpy.ndarray`
260 |         Average, possibly weighted.
261 |     var: `~numpy.ndarray`
262 |         Variance, possibly weighted.
263 | 
264 |     Notes
265 |     -----
266 |     Since the calculation of the variance requires knowledge of the average, this function is a
267 |     very thin wrapper around `var`.
268 | 
269 |     References
270 |     ----------
271 |     .. [#] D. H. D. West, Updating the mean and variance estimates: an improved method.
272 |         Communications of the ACM Vol. 22, Issue 9, pp. 532 - 535 (1979)
273 |     """
274 |     # Since the variance calculation requires knowing the average,
275 |     # `average_and_var` runs in the exact same time as `var`
276 |     avg, sq_avg, swgt = last(
277 |         _ivar(arrays=arrays, axis=axis, weights=weights, ignore_nan=ignore_nan)
278 |     )
279 |     variance = (sq_avg - avg**2) * (swgt / (swgt - ddof))
280 |     return avg, variance
281 | 
282 | 
283 | @array_stream
284 | def var(arrays, axis=-1, ddof=0, weights=None, ignore_nan=False):
285 |     """
286 |     Total variance of a stream of arrays. Weights are also supported. This function
287 |     consumes the input stream.
288 | 
289 |     Parameters
290 |     ----------
291 |     arrays : iterable of ndarrays
292 |         Arrays to be combined. This iterable can also a generator.
293 |     axis : int, optional
294 |         Reduction axis. Default is to combine the arrays in the stream as if
295 |         they had been stacked along a new axis, then compute the variance along this new axis.
296 |         If None, arrays are flattened. If `axis` is an int larger that
297 |         the number of dimensions in the arrays of the stream, variance is computed
298 |         along the new axis.
299 |     ddof : int, optional
300 |         Means Delta Degrees of Freedom.  The divisor used in calculations
301 |         is ``N - ddof``, where ``N`` represents the number of elements.
302 |     weights : iterable of ndarray, iterable of floats, or None, optional
303 |         Iterable of weights associated with the values in each item of `arrays`.
304 |         Each value in an element of `arrays` contributes to the variance
305 |         according to its associated weight. The weights array can either be a float
306 |         or an array of the same shape as any element of `arrays`. If weights=None,
307 |         then all data in each element of `arrays` are assumed to have a weight equal to one.
308 |     ignore_nan : bool, optional
309 |         If True, NaNs are set to zero weight. Default is propagation of NaNs.
310 | 
311 |     Returns
312 |     -------
313 |     var: `~numpy.ndarray`
314 |         Variance.
315 | 
316 |     See Also
317 |     --------
318 |     ivar : streaming variance
319 |     numpy.var : variance calculation for dense arrays. Weights are not supported.
320 | 
321 |     References
322 |     ----------
323 |     .. [#] D. H. D. West, Updating the mean and variance estimates: an improved method.
324 |         Communications of the ACM Vol. 22, Issue 9, pp. 532 - 535 (1979)
325 |     """
326 |     _, variance = average_and_var(
327 |         arrays=arrays, axis=axis, ddof=ddof, weights=weights, ignore_nan=ignore_nan
328 |     )
329 |     return variance
330 | 
331 | 
332 | @array_stream
333 | def ivar(arrays, axis=-1, ddof=0, weights=None, ignore_nan=False):
334 |     """
335 |     Streaming variance of arrays. Weights are also supported.
336 | 
337 |     Parameters
338 |     ----------
339 |     arrays : iterable of ndarrays
340 |         Arrays to be combined. This iterable can also a generator.
341 |     axis : int, optional
342 |         Reduction axis. Default is to combine the arrays in the stream as if
343 |         they had been stacked along a new axis, then compute the variance along this new axis.
344 |         If None, arrays are flattened. If `axis` is an int larger that
345 |         the number of dimensions in the arrays of the stream, variance is computed
346 |         along the new axis.
347 |     ddof : int, optional
348 |         Means Delta Degrees of Freedom.  The divisor used in calculations
349 |         is ``N - ddof``, where ``N`` represents the number of elements.
350 |     weights : iterable of ndarray, iterable of floats, or None, optional
351 |         Iterable of weights associated with the values in each item of `arrays`.
352 |         Each value in an element of `arrays` contributes to the variance
353 |         according to its associated weight. The weights array can either be a float
354 |         or an array of the same shape as any element of `arrays`. If weights=None,
355 |         then all data in each element of `arrays` are assumed to have a weight equal to one.
356 |     ignore_nan : bool, optional
357 |         If True, NaNs are set to zero weight. Default is propagation of NaNs.
358 | 
359 |     Yields
360 |     ------
361 |     var: `~numpy.ndarray`
362 |         Variance.
363 | 
364 |     See Also
365 |     --------
366 |     numpy.var : variance calculation for dense arrays. Weights are not supported.
367 | 
368 |     References
369 |     ----------
370 |     .. [#] D. H. D. West, Updating the mean and variance estimates: an improved method.
371 |         Communications of the ACM Vol. 22, Issue 9, pp. 532 - 535 (1979)
372 |     """
373 |     primitive = _ivar(arrays=arrays, axis=axis, weights=weights, ignore_nan=ignore_nan)
374 |     for avg, sq_avg, swgt in primitive:
375 |         yield (sq_avg - avg**2) * (swgt / (swgt - ddof))
376 | 
377 | 
378 | @array_stream
379 | def std(arrays, axis=-1, ddof=0, weights=None, ignore_nan=False):
380 |     """
381 |     Total standard deviation of arrays. Weights are also supported. This function
382 |     consumes the input stream.
383 | 
384 |     Parameters
385 |     ----------
386 |     arrays : iterable of ndarrays
387 |         Arrays to be combined. This iterable can also a generator.
388 |     axis : int, optional
389 |         Reduction axis. Default is to combine the arrays in the stream as if
390 |         they had been stacked along a new axis, then compute the standard deviation along this new axis.
391 |         If None, arrays are flattened. If `axis` is an int larger that
392 |         the number of dimensions in the arrays of the stream, standard deviation is computed
393 |         along the new axis.
394 |     ddof : int, optional
395 |         Means Delta Degrees of Freedom.  The divisor used in calculations
396 |         is ``N - ddof``, where ``N`` represents the number of elements.
397 |     weights : iterable of ndarray, iterable of floats, or None, optional
398 |         Iterable of weights associated with the values in each item of `arrays`.
399 |         Each value in an element of `arrays` contributes to the standard deviation
400 |         according to its associated weight. The weights array can either be a float
401 |         or an array of the same shape as any element of `arrays`. If weights=None,
402 |         then all data in each element of `arrays` are assumed to have a weight equal to one.
403 |     ignore_nan : bool, optional
404 |         If True, NaNs are set to zero weight. Default is propagation of NaNs.
405 | 
406 |     Returns
407 |     -------
408 |     std: `~numpy.ndarray`
409 |         Standard deviation
410 | 
411 |     See Also
412 |     --------
413 |     istd : streaming standard deviation.
414 |     numpy.std : standard deviation calculation of dense arrays. Weights are not supported.
415 |     """
416 |     return np.sqrt(
417 |         var(arrays=arrays, axis=axis, ddof=ddof, weights=weights, ignore_nan=ignore_nan)
418 |     )
419 | 
420 | 
421 | @array_stream
422 | def istd(arrays, axis=-1, ddof=0, weights=None, ignore_nan=False):
423 |     """
424 |     Streaming standard deviation of arrays. Weights are also supported.
425 |     This is equivalent to calling `numpy.std(axis = 2)` on a stack of images.
426 | 
427 |     Parameters
428 |     ----------
429 |     arrays : iterable of ndarrays
430 |         Arrays to be combined. This iterable can also a generator.
431 |     axis : int, optional
432 |         Reduction axis. Default is to combine the arrays in the stream as if
433 |         they had been stacked along a new axis, then compute the standard deviation along this new axis.
434 |         If None, arrays are flattened. If `axis` is an int larger that
435 |         the number of dimensions in the arrays of the stream, standard deviation is computed
436 |         along the new axis.
437 |     ddof : int, optional
438 |         Means Delta Degrees of Freedom.  The divisor used in calculations
439 |         is ``N - ddof``, where ``N`` represents the number of elements.
440 |     weights : iterable of ndarray, iterable of floats, or None, optional
441 |         Iterable of weights associated with the values in each item of `arrays`.
442 |         Each value in an element of `arrays` contributes to the standard deviation
443 |         according to its associated weight. The weights array can either be a float
444 |         or an array of the same shape as any element of `arrays`. If weights=None,
445 |         then all data in each element of `arrays` are assumed to have a weight equal to one.
446 |     ignore_nan : bool, optional
447 |         If True, NaNs are set to zero weight. Default is propagation of NaNs.
448 | 
449 |     Yields
450 |     ------
451 |     std: `~numpy.ndarray`
452 |         Standard deviation
453 | 
454 |     See Also
455 |     --------
456 |     std : total standard deviation.
457 |     numpy.std : standard deviation calculation of dense arrays. Weights are not supported.
458 |     """
459 |     yield from map(
460 |         np.sqrt,
461 |         ivar(
462 |             arrays=arrays, axis=axis, ddof=ddof, weights=weights, ignore_nan=ignore_nan
463 |         ),
464 |     )
465 | 
466 | 
467 | @array_stream
468 | def sem(arrays, axis=-1, ddof=0, weights=None, ignore_nan=False):
469 |     """
470 |     Standard error in the mean (SEM) of a stream of arrays. This function consumes
471 |     the entire stream.
472 | 
473 |     Parameters
474 |     ----------
475 |     arrays : iterable of ndarrays
476 |         Arrays to be combined. This iterable can also a generator.
477 |     axis : int, optional
478 |         Reduction axis. Default is to combine the arrays in the stream as if
479 |         they had been stacked along a new axis, then compute the standard error along this new axis.
480 |         If None, arrays are flattened. If `axis` is an int larger that
481 |         the number of dimensions in the arrays of the stream, standard error is computed
482 |         along the new axis.
483 |     ddof : int, optional
484 |         Means Delta Degrees of Freedom.  The divisor used in calculations
485 |         is ``N - ddof``, where ``N`` represents the number of elements.
486 |     weights : iterable of ndarray, iterable of floats, or None, optional
487 |         Iterable of weights associated with the values in each item of `arrays`.
488 |         Each value in an element of `arrays` contributes to the standard error
489 |         according to its associated weight. The weights array can either be a float
490 |         or an array of the same shape as any element of `arrays`. If weights=None,
491 |         then all data in each element of `arrays` are assumed to have a weight equal to one.
492 |     ignore_nan : bool, optional
493 |         If True, NaNs are set to zero weight. Default is propagation of NaNs.
494 | 
495 |     Returns
496 |     -------
497 |     sem: `~numpy.ndarray`, dtype float
498 |         Standard error in the mean.
499 | 
500 |     See Also
501 |     --------
502 |     scipy.stats.sem : standard error in the mean of dense arrays.
503 |     """
504 |     avg, sq_avg, swgt = last(
505 |         _ivar(arrays=arrays, axis=axis, weights=weights, ignore_nan=ignore_nan)
506 |     )
507 |     return np.sqrt((sq_avg - avg**2) * (1 / (swgt - ddof)))
508 | 
509 | 
510 | @array_stream
511 | def isem(arrays, axis=-1, ddof=1, weights=None, ignore_nan=False):
512 |     """
513 |     Streaming standard error in the mean (SEM) of arrays. This is equivalent to
514 |     calling `scipy.stats.sem(axis = 2)` on a stack of images.
515 | 
516 |     Parameters
517 |     ----------
518 |     arrays : iterable of ndarrays
519 |         Arrays to be combined. This iterable can also a generator.
520 |     axis : int, optional
521 |         Reduction axis. Default is to combine the arrays in the stream as if
522 |         they had been stacked along a new axis, then compute the standard error along this new axis.
523 |         If None, arrays are flattened. If `axis` is an int larger that
524 |         the number of dimensions in the arrays of the stream, standard error is computed
525 |         along the new axis.
526 |     ddof : int, optional
527 |         Means Delta Degrees of Freedom.  The divisor used in calculations
528 |         is ``N - ddof``, where ``N`` represents the number of elements.
529 |     weights : iterable of ndarray, iterable of floats, or None, optional
530 |         Iterable of weights associated with the values in each item of `arrays`.
531 |         Each value in an element of `arrays` contributes to the standard error
532 |         according to its associated weight. The weights array can either be a float
533 |         or an array of the same shape as any element of `arrays`. If weights=None,
534 |         then all data in each element of `arrays` are assumed to have a weight equal to one.
535 |     ignore_nan : bool, optional
536 |         If True, NaNs are set to zero weight. Default is propagation of NaNs.
537 | 
538 |     Yields
539 |     ------
540 |     sem: `~numpy.ndarray`, dtype float
541 |         Standard error in the mean.
542 | 
543 |     See Also
544 |     --------
545 |     scipy.stats.sem : standard error in the mean of dense arrays.
546 |     """
547 |     primitive = _ivar(arrays=arrays, axis=axis, weights=weights, ignore_nan=ignore_nan)
548 |     for avg, sq_avg, swgt in primitive:
549 |         yield np.sqrt((sq_avg - avg**2) * (1 / (swgt - ddof)))
550 | 
551 | 
552 | @array_stream
553 | def ihistogram(arrays, bins, range=None, weights=None):
554 |     """
555 |     Streaming histogram calculation.
556 | 
557 |     Parameters
558 |     ----------
559 |     arrays : iterable of ndarrays
560 |         Arrays to be combined. This iterable can also a generator. Arrays in this stream
561 |         can be of any shape; the histogram is computed over the flattened array.
562 |     bins : iterable
563 |         Bin edges, including the rightmost edge, allowing for non-uniform bin widths.
564 |         To determine the appropriate bins automatically, see ``numpy.histogram_bin_edges``.
565 |     weights : iterable of ndarray, iterable of floats, or None, optional
566 |         Iterable of weights associated with the values in each item of `arrays`.
567 |         Each value in a only contributes its associated weight towards the
568 |         bin count (instead of 1). The weights array can either be a float
569 |         or an array of the same shape as any element of `arrays`. If ``weights=None``,
570 |         then all data in each element of `arrays` are assumed to have a weight equal to one.
571 | 
572 |         .. versionadded:: 1.6.1
573 | 
574 |     Yields
575 |     ------
576 |     hist : `~numpy.ndarray`
577 |         Streamed histogram.
578 | 
579 |     See Also
580 |     --------
581 |     numpy.histogram : 1D histogram of dense arrays.
582 |     numpy.histogram_bin_edges : automatic selection of bins
583 |     """
584 |     bins = np.asarray(bins)
585 |     first, arrays = peek(arrays)
586 | 
587 |     if weights is None:
588 |         weights = repeat(None)
589 |     else:
590 |         weights = map(partial(np.broadcast_to, shape=first.shape), weights)
591 | 
592 |     # np.histogram also returns the bin edges, which we ignore
593 |     hist_func = lambda arr, wgt: np.histogram(arr, bins=bins, weights=wgt)[0]
594 |     yield from isum(starmap(hist_func, zip(arrays, weights)))
595 | 


--------------------------------------------------------------------------------