├── .gitattributes
├── .github
    └── workflows
    │   ├── ci.yaml
    │   └── pypi-release.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE.txt
├── README.md
├── ci
    └── environment.yml
├── conftest.py
├── diagrams
    ├── aggregate.png
    ├── aggregate_dims.png
    ├── aggregate_dims.svg
    ├── diagram.docx
    ├── label_contiguous_1d.png
    ├── multi_arange.png
    └── multi_cumsum.png
├── numpy_groupies
    ├── __init__.py
    ├── aggregate_numba.py
    ├── aggregate_numpy.py
    ├── aggregate_numpy_ufunc.py
    ├── aggregate_pandas.py
    ├── aggregate_purepy.py
    ├── benchmarks
    │   ├── __init__.py
    │   ├── generic.py
    │   └── simple.py
    ├── tests
    │   ├── __init__.py
    │   ├── test_compare.py
    │   ├── test_generic.py
    │   ├── test_indices.py
    │   └── test_utils.py
    └── utils.py
└── pyproject.toml


/.gitattributes:
--------------------------------------------------------------------------------
1 | numpy_groupies/_version.py export-subst
2 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - "master"
 6 |   pull_request:
 7 |     branches:
 8 |       - "*"
 9 |   schedule:
10 |     - cron: "0 0 * * *" # Daily “At 00:00”
11 |   workflow_dispatch: # allows you to trigger manually
12 | 
13 | concurrency:
14 |   group: ${{ github.workflow }}-${{ github.ref }}
15 |   cancel-in-progress: true
16 | 
17 | jobs:
18 |   build:
19 |     name: Build (${{ matrix.python-version }}, ${{ matrix.os }})
20 |     runs-on: ${{ matrix.os }}
21 |     defaults:
22 |       run:
23 |         shell: bash -l {0}
24 |     strategy:
25 |       fail-fast: false
26 |       matrix:
27 |         os: ["ubuntu-latest"]
28 |         python-version: ["3.9", "3.10", "3.11", "3.12"]
29 |     steps:
30 |       - uses: actions/checkout@v4
31 |         with:
32 |           fetch-depth: 1
33 |       - name: Set environment variables
34 |         run: |
35 |           echo "CONDA_ENV_FILE=ci/environment.yml" >> $GITHUB_ENV
36 |           echo "PYTHON_VERSION=${{ matrix.python-version }}" >> $GITHUB_ENV
37 | 
38 |       - name: Setup micromamba
39 |         uses: mamba-org/setup-micromamba@v1
40 |         with:
41 |           environment-file: ${{ env.CONDA_ENV_FILE }}
42 |           environment-name: npg-tests
43 |           cache-environment: true
44 |           cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{matrix.python-version}}-${{hashFiles(env.CONDA_ENV_FILE)}}"
45 |           create-args: >-
46 |             python=${{matrix.python-version}}
47 |             conda
48 | 
49 |       # We only want to install this on one run, because otherwise we'll have
50 |       # duplicate annotations.
51 |       - name: Install error reporter
52 |         if: ${{ matrix.os }} == 'ubuntu-latest' and ${{ matrix.python-version }} == '3.11'
53 |         run: |
54 |           python -m pip install pytest-github-actions-annotate-failures
55 | 
56 |       - name: Set up conda environment
57 |         shell: bash -l {0}
58 |         run: |
59 |           python -m pip install -e .[dev]
60 |           conda list
61 | 
62 |       - name: Run Tests
63 |         shell: bash -l {0}
64 |         run: |
65 |           pytest
66 | 


--------------------------------------------------------------------------------
/.github/workflows/pypi-release.yaml:
--------------------------------------------------------------------------------
 1 | name: Publish to PyPI
 2 | on:
 3 |   workflow_dispatch:
 4 |   release:
 5 |     types:
 6 |       - published
 7 | 
 8 | jobs:
 9 |   build-artifacts:
10 |     runs-on: ubuntu-latest
11 |     if: github.repository == 'ml31415/numpy-groupies'
12 |     steps:
13 |       - uses: actions/checkout@v4
14 |         with:
15 |           fetch-depth: 1
16 |       - uses: actions/setup-python@v5
17 |         name: Install Python
18 |         with:
19 |           python-version: "3.11"
20 |       - name: Build tarball and wheels
21 |         run: |
22 |           git clean -xdf
23 |           git restore -SW .
24 |           pipx run build
25 |       - name: List contents of built dist
26 |         run: |
27 |           ls -ltrh
28 |           ls -ltrh dist
29 |       - name: Check built artifacts
30 |         run: |
31 |           pipx run twine check --strict dist/*
32 |           pwd
33 |           if [ -f dist/numpy_groupies-0.0.0.tar.gz ]; then
34 |             echo "❌ INVALID VERSION NUMBER"
35 |             exit 1
36 |           else
37 |             echo "✅ Looks good"
38 |           fi
39 |       - name: Test artifact installation
40 |         run: |
41 |           python -m pip install --upgrade pip
42 |           python -m pip install dist/*.tar.gz
43 |       - uses: actions/upload-artifact@v4
44 |         with:
45 |           name: release
46 |           path: dist
47 | 
48 |   test-built-dist:
49 |     needs: build-artifacts
50 |     runs-on: ubuntu-latest
51 |     steps:
52 |       - uses: actions/download-artifact@v4
53 |         with:
54 |           name: release
55 |           path: dist
56 |       - name: Publish package to TestPyPI
57 |         uses: pypa/gh-action-pypi-publish@release/v1
58 |         with:
59 |           password: ${{ secrets.TESTPYPI_TOKEN }}
60 |           repository-url: https://test.pypi.org/legacy/
61 |           skip-existing: true
62 | 
63 |   upload-to-pypi:
64 |     needs: test-built-dist
65 |     if: github.event_name == 'release'
66 |     runs-on: ubuntu-latest
67 |     steps:
68 |       - uses: actions/download-artifact@v4
69 |         with:
70 |           name: release
71 |           path: dist
72 |       - name: Publish package to PyPI
73 |         uses: pypa/gh-action-pypi-publish@release/v1
74 |         with:
75 |           password: ${{ secrets.PYPI_TOKEN }}
76 |           skip-existing: true
77 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | # C extensions
 4 | *.so
 5 | 
 6 | # Packages
 7 | *.egg
 8 | *.egg-info
 9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 | 
21 | # Installer logs
22 | pip-log.txt
23 | 
24 | # Unit test / coverage reports
25 | .coverage
26 | .tox
27 | nosetests.xml
28 | 
29 | # Translations
30 | *.mo
31 | 
32 | # Dev stuff
33 | .mr.developer.cfg
34 | .idea
35 | .project
36 | .pydevproject
37 | .settings/
38 | .cache/
39 | __pycache__/
40 | .eggs/
41 | .hypothesis/
42 | *~
43 | *.ini
44 | 
45 | # Dynamic versioning
46 | numpy_groupies/_version.py
47 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.5.0
 4 |     hooks:
 5 |       - id: debug-statements
 6 |       - id: detect-private-key
 7 |       - id: check-builtin-literals
 8 |       - id: check-case-conflict
 9 |       - id: check-executables-have-shebangs
10 |       - id: check-json
11 |       - id: check-merge-conflict
12 |       - id: check-symlinks
13 |       - id: check-toml
14 |       - id: check-xml
15 |       - id: check-yaml
16 |         exclude: (.pre-commit-config\.yaml)
17 | 
18 |   - repo: https://github.com/astral-sh/ruff-pre-commit
19 |     rev: v0.3.7
20 |     hooks:
21 |       - id: ruff
22 |       - id: ruff-format
23 | 
24 |   - repo: https://github.com/codespell-project/codespell
25 |     rev: v2.2.6
26 |     hooks:
27 |       - id: codespell
28 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016, numpy-groupies developers
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 |    list of conditions and the following disclaimer.
 9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 |    this list of conditions and the following disclaimer in the documentation
11 |    and/or other materials provided with the distribution.
12 | 
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![GitHub Workflow CI Status](https://img.shields.io/github/actions/workflow/status/ml31415/numpy-groupies/ci.yaml?branch=master&logo=github&style=flat)](https://github.com/ml31415/numpy-groupies/actions)
  2 | [![PyPI](https://img.shields.io/pypi/v/numpy-groupies.svg?style=flat)](https://pypi.org/project/numpy-groupies/)
  3 | [![Conda-forge](https://img.shields.io/conda/vn/conda-forge/numpy_groupies.svg?style=flat)](https://anaconda.org/conda-forge/numpy_groupies)
  4 | ![Python Version from PEP 621 TOML](https://img.shields.io/python/required-version-toml?tomlFilePath=https%3A%2F%2Fraw.githubusercontent.com%2Fml31415%2Fnumpy-groupies%2Fmaster%2Fpyproject.toml)
  5 | ![PyPI - Downloads](https://img.shields.io/pypi/dm/numpy-groupies)
  6 | 
  7 | # numpy-groupies
  8 | 
  9 | This package consists of a small library of optimised tools for doing things that can roughly 
 10 | be considered "group-indexing operations". The most prominent tool is `aggregate`, which is 
 11 | described in detail further down the page.
 12 | 
 13 | 
 14 | ## Installation
 15 | If you have `pip`, then simply:
 16 | ```
 17 | pip install numpy_groupies
 18 | ```
 19 | Note that `numpy_groupies` doesn't have any compulsory dependencies (even `numpy` is optional) 
 20 | so you should be able to install it fairly easily even without a package manager.  If you just 
 21 | want one particular implementation of `aggregate` (e.g. `aggregate_numpy.py`), you can download 
 22 | that one file, and copy-paste the contents of `utils.py` into the top of that file (replacing 
 23 | the `from .utils import (...)` line).
 24 | 
 25 | 
 26 | ## aggregate
 27 | 
 28 | ![aggregate_diagram](/diagrams/aggregate.png)
 29 | ```python
 30 | import numpy as np
 31 | import numpy_groupies as npg
 32 | group_idx = np.array([   3,   0,   0,   1,   0,   3,   5,   5,   0,   4])
 33 | a =         np.array([13.2, 3.5, 3.5,-8.2, 3.0,13.4,99.2,-7.1, 0.0,53.7])
 34 | npg.aggregate(group_idx, a, func='sum', fill_value=0)
 35 | # >>>          array([10.0, -8.2, 0.0, 26.6, 53.7, 92.1])
 36 | ```
 37 | `aggregate` takes an array of values, and an array giving the group number for each of those values. 
 38 | It then returns the sum (or mean, or std, or any, ...etc.) of the values in each group. You have 
 39 | probably come across this idea before - see [Matlab's `accumarray` function](http://uk.mathworks.com/help/matlab/ref/accumarray.html?refresh=true), or
 40 |  [`pandas` groupby concept](http://pandas.pydata.org/pandas-docs/dev/groupby.html), or
 41 |  [MapReduce paradigm](http://en.wikipedia.org/wiki/MapReduce), or simply the [basic histogram](https://en.wikipedia.org/wiki/Histogram).
 42 | 
 43 | A couple of implemented functions do not reduce the data, instead it calculates values cumulatively
 44 | while iterating over the data or permutates them. The output size matches the input size.
 45 | 
 46 | ```python
 47 | group_idx = np.array([4, 3, 3, 4, 4, 1, 1, 1, 7, 8, 7, 4, 3, 3, 1, 1])
 48 | a =         np.array([3, 4, 1, 3, 9, 9, 6, 7, 7, 0, 8, 2, 1, 8, 9, 8])
 49 | npg.aggregate(group_idx, a, func='cumsum')
 50 | # >>>          array([3, 4, 5, 6,15, 9,15,22, 7, 0,15,17, 6,14,31,39])
 51 | ```
 52 | 
 53 | 
 54 | ### Inputs
 55 | The function accepts various different combinations of inputs, producing various different shapes of output. 
 56 | We give a brief description of the general meaning of the inputs and then go over the different combinations 
 57 | in more detail:
 58 | 
 59 | * `group_idx` - array of non-negative integers to be used as the "labels" with which to group the values in `a`.
 60 | * `a` - array of values to be aggregated.
 61 | * `func='sum'` - the function to use for aggregation. See the section below for more details.
 62 | * `size=None` - the shape of the output array. If `None`, the maximum value in `group_idx` will set the size of the output.
 63 | * `fill_value=0` - value to use for output groups that do not appear anywhere in the `group_idx` input array.
 64 | * `order='C'` - for multidimensional output, this controls the layout in memory, can be `'F'` for fortran-style.
 65 | * `dtype=None` - the`dtype` of the output. `None` means choose a sensible type for the given `a`, `func`, and `fill_value`.
 66 | * `axis=None` - explained below.
 67 | * `ddof=0` - passed through into calculations of variance and standard deviation (see section on functions).
 68 | 
 69 | ![aggregate_dims_diagram](/diagrams/aggregate_dims.png)
 70 | 
 71 | * Form 1 is the simplest, taking `group_idx` and `a` of matching 1D lengths, and producing a 1D output.
 72 | * Form 2 is similar to Form 1, but takes a scalar `a`, which is broadcast out to the length of `group_idx`. Note that this is generally not that useful.
 73 | * Form 3 is more complicated. `group_idx` is the same length as the `a.shape[axis]`. The groups are broadcast out along the other axis/axes of `a`, thus the output is of shape `n_groups x a.shape[0] x ... x a.shape[axis-1] x a.shape[axis+1] x ... a.shape[-1]`, i.e. the output has two or more dimensions.
 74 | * Form 4 also produces output with two or more dimensions, but for very different reasons to Form 3.  Here `a` is 1D and `group_idx` is exactly `2D`, whereas in Form 3 `a` is `ND`, `group_idx` is `1D`, and we provide a value for `axis`.  The length of `a` must match `group_idx.shape[1]`, the value of `group_idx.shape[0]` determines the number of dimensions in the output, i.e. `group_idx[:,99]` gives the `(x,y,z)` group indices for the `a[99]`.
 75 | * Form 5 is the same as Form 4 but with scalar `a`. As with Form 2, this is rarely that helpful.
 76 | 
 77 | **Note on performance.** The `order` of the output is unlikely to affect performance of `aggregate` (although it may affect your downstream usage of that output), however the order of multidimensional `a` or `group_idx` can affect performance:  in Form 4 it is best if columns are contiguous in memory within `group_idx`, i.e. `group_idx[:, 99]` corresponds to a contiguous chunk of memory; in Form 3 it's best if all the data in `a` for `group_idx[i]` is contiguous, e.g. if `axis=1` then we want `a[:, 55]` to be contiguous.
 78 | 
 79 | 
 80 | ### Available functions
 81 | By default, `aggregate` assumes you want to sum the values within each group, however you can specify another 
 82 | function using the `func` kwarg.  This `func` can be any custom callable, however you will likely want one of
 83 | the following optimized functions. Note that not all functions might be provided by all implementations.
 84 | 
 85 | * `'sum'` - sum of items within each group (see example above).
 86 | * `'prod'` - product of items within each group
 87 | * `'mean'` - mean of items within each group
 88 | * `'var'`- variance of items within each group. Use `ddof` kwarg for degrees of freedom. The divisor used in calculations is `N - ddof`, where `N` represents the number of elements. By default `ddof` is zero.
 89 | * `'std'` - standard deviation of items within each group. Use `ddof` kwarg for degrees of freedom (see `var` above).
 90 | * `'min'` - minimum value of items within each group.
 91 | * `'max'` - maximum value of items within each group.
 92 | * `'first'` - first item in `a` from each group.
 93 | * `'last'` - last item in `a` from each group.
 94 | * `'argmax'` - the index in `a` of the maximum value in each group.
 95 | * `'argmin'` - the index in `a` of the minimum value in each group.
 96 | 
 97 | The above functions also have a `nan`-form, which skip the `nan` values instead of propagating them to the result of the calculation:
 98 | * `'nansum'`, `'nanprod'`, `'nanmean'`, `'nanvar'`, `'nanstd'`, `'nanmin'`, `'nanmax'`, `'nanfirst'`, `'nanlast'`, `'nanargmax'`, `'nanargmin'`
 99 | 
100 | The following functions are slightly different in that they always return boolean values. Their treatment of nans is also different from above:
101 | * `'all'` - `True` if all items within a group are truethy. Note that `np.all(nan)` is `True`, i.e. `nan` is actually truethy.
102 | * `'any'` - `True` if any items within a group are truethy.
103 | * `'allnan'` - `True` if all items within a group are `nan`.
104 | * `'anynan'` - `True` if any items within a group are `nan`.
105 | 
106 | The following functions don't reduce the data, but instead produce an output matching the size of the input:
107 | * `'cumsum'` - cumulative sum of items within each group.
108 | * `'cumprod'` - cumulative product of items within each group. (numba only)
109 | * `'cummin'` - cumulative minimum of items within each group. (numba only)
110 | * `'cummax'` - cumulative maximum of items within each group. (numba only)
111 | * `'sort'` - sort the items within each group in ascending order, use reverse=True to invert the order.
112 | 
113 | Finally, there are three functions which don't reduce each group to a single value, instead they return the full 
114 | set of items within the group:
115 | * `'array'` - simply returns the grouped items, using the same order as appeared in `a`. (numpy only)
116 | 
117 | 
118 | ### Examples
119 | Compute sums of consecutive integers, and then compute products of those consecutive integers.
120 | ```python
121 | group_idx = np.arange(5).repeat(3)
122 | # group_idx: array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4])
123 | a = np.arange(group_idx.size)
124 | # a: array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])
125 | x = npg.aggregate(group_idx, a) # sum is default
126 | # x: array([ 3, 12, 21, 30, 39])
127 | x = npg.aggregate(group_idx, a, 'prod')
128 | # x: array([ 0, 60, 336, 990, 2184])
129 | ```
130 | 
131 | Get variance ignoring nans, setting all-nan groups to `nan`.
132 | ```python
133 | x = npg.aggregate(group_idx, a, func='nanvar', fill_value=nan)
134 | ```
135 | 
136 | Count the number of elements in each group. Note that this is equivalent to doing `np.bincount(group_idx)`, 
137 | indeed that is how the numpy implementation does it.
138 | ```python
139 | x = npg.aggregate(group_idx, 1)
140 | ```
141 | 
142 | Sum 1000 values into a three-dimensional cube of size 15x15x15. Note that in this example all three dimensions 
143 | have the same size, but that doesn't have to be the case.
144 | ```python
145 | group_idx = np.random.randint(0, 15, size=(3, 1000))
146 | a = np.random.random(group_idx.shape[1])
147 | x = npg.aggregate(group_idx, a, func="sum", size=(15,15,15), order="F")
148 | # x.shape: (15, 15, 15)
149 | # np.isfortran(x): True
150 | ```
151 | 
152 | Use a custom function to generate some strings.
153 | ```python
154 | group_idx = np.array([1, 0,  1,  4,  1])
155 | a = np.array([12.0, 3.2, -15, 88, 12.9])
156 | x = npg.aggregate(group_idx, a,
157 |               func=lambda g: ' or maybe '.join(str(gg) for gg in g), fill_value='')
158 | # x: ['3.2', '12.0 or maybe -15.0 or maybe 12.9', '', '', '88.0']
159 | ```
160 | 
161 | Use the `axis` arg in order to do a sum-aggregation on three rows simultaneously.
162 | ```python
163 | a = np.array([[99, 2,  11, 14,  20],
164 | 	   	   [33, 76, 12, 100, 71],
165 | 		   [67, 10, -8, 1,   9]])
166 | group_idx = np.array([[3, 3, 7, 0, 0]])
167 | x = npg.aggregate(group_idx, a, axis=1)
168 | # x : [[ 34, 0, 0, 101, 0, 0, 0, 11],
169 | #      [171, 0, 0, 109, 0, 0, 0, 12],
170 | #      [ 10, 0, 0,  77, 0, 0, 0, -8]]
171 | ```
172 | 
173 | 
174 | ### Multiple implementations
175 | There are multiple implementations of `aggregate` provided. If you use `from numpy_groupies import aggregate`, 
176 | the best available implementation will automatically be selected. Otherwise you can pick a specific version directly 
177 | like `from numpy_groupies import aggregate_nb as aggregate` or by importing aggregate from the implementing module 
178 | `from numpy_groupies.aggregate_weave import aggregate`.
179 | 
180 | Currently the following implementations exist:
181 | * **numpy** - This is the default implementation. It uses plain `numpy`, mainly relying on `np.bincount` and basic indexing magic. It comes without other dependencies except `numpy` and shows reasonable performance for the occasional usage.
182 | * **numba** - This is the most performant implementation, based on jit compilation provided by numba and LLVM.
183 | * **pure python** - This implementation has no dependencies and uses only the standard library. It's horribly slow and should only be used, if there is no numpy available.
184 | * **numpy ufunc** - *Only for benchmarking.*  This implementation uses the `.at` method of numpy's `ufunc`s (e.g. `add.at`), which would appear to be designed for performing exactly the same calculation that `aggregate` executes, however the numpy implementation is rather incomplete.
185 | * **pandas** - *Only for reference.*  The pandas' `groupby` concept is the same as the task performed by `aggregate`. However, `pandas` is not actually faster than the default `numpy` implementation. Also, note that there may be room for improvement in the way that `pandas` is utilized here. Most notably, when computing multiple aggregations of the same data (e.g. `'min'` and `'max'`) pandas could potentially be used more efficiently.
186 | 
187 | All implementations have the same calling syntax and produce the same outputs, to within some floating-point error. 
188 | However some implementations only support a subset of the valid inputs and will sometimes throw `NotImplementedError`.
189 | 
190 | 
191 | ### Benchmarks
192 | Scripts for testing and benchmarking are included in this repository. For benchmarking, run 
193 | `python -m numpy_groupies.benchmarks.generic` from the root of this repository.
194 | 
195 | Below we are using `500,000` indices uniformly picked from `[0, 1000)`. The values of `a` are uniformly picked from 
196 | the interval `[0,1)`, with anything less than `0.2` then set to 0 (in order to serve as falsy values in boolean operations). 
197 | For `nan-` operations another 20% of the values are set to nan, leaving the remainder on the interval `[0.2,0.8)`.
198 | 
199 | The benchmarking results are given in ms for an i7-7560U running at 2.40GHz:
200 | 
201 | | function  | ufunc   | numpy   | numba   | pandas  |
202 | |-----------|---------|---------|---------|---------|
203 | | sum       | 1.950   | 1.728   | 0.708   | 11.832  |
204 | | prod      | 2.279   | 2.349   | 0.709   | 11.649  |
205 | | min       | 2.472   | 2.489   | 0.716   | 11.686  |
206 | | max       | 2.457   | 2.480   | 0.745   | 11.598  |
207 | | len       | 1.481   | 1.270   | 0.635   | 10.932  |
208 | | all       | 37.186  | 3.054   | 0.892   | 12.587  |
209 | | any       | 35.278  | 5.157   | 0.890   | 12.845  |
210 | | anynan    | 5.783   | 2.126   | 0.762   | 144.740 |
211 | | allnan    | 7.971   | 4.367   | 0.774   | 144.507 |
212 | | mean      | ----    | 2.500   | 0.825   | 13.284  |
213 | | std       | ----    | 4.528   | 0.965   | 12.193  |
214 | | var       | ----    | 4.269   | 0.969   | 12.657  |
215 | | first     | ----    | 1.847   | 0.811   | 11.584  |
216 | | last      | ----    | 1.309   | 0.581   | 11.842  |
217 | | argmax    | ----    | 3.504   | 1.411   | 293.640 |
218 | | argmin    | ----    | 6.996   | 1.347   | 290.977 |
219 | | nansum    | ----    | 5.388   | 1.569   | 15.239  |
220 | | nanprod   | ----    | 5.707   | 1.546   | 15.004  |
221 | | nanmin    | ----    | 5.831   | 1.700   | 14.292  |
222 | | nanmax    | ----    | 5.847   | 1.731   | 14.927  |
223 | | nanlen    | ----    | 3.170   | 1.529   | 14.529  |
224 | | nanall    | ----    | 6.499   | 1.640   | 15.931  |
225 | | nanany    | ----    | 8.041   | 1.656   | 15.839  |
226 | | nanmean   | ----    | 5.636   | 1.583   | 15.185  |
227 | | nanvar    | ----    | 7.514   | 1.682   | 15.643  |
228 | | nanstd    | ----    | 7.292   | 1.666   | 15.104  |
229 | | nanfirst  | ----    | 5.318   | 2.096   | 14.432  |
230 | | nanlast   | ----    | 4.943   | 1.473   | 14.637  |
231 | | nanargmin | ----    | 7.977   | 1.779   | 298.911 |
232 | | nanargmax | ----    | 5.869   | 1.802   | 301.022 |
233 | | cumsum    | ----    | 71.713  | 1.119   | 8.864   |
234 | | cumprod   | ----    | ----    | 1.123   | 12.100  |
235 | | cummax    | ----    | ----    | 1.062   | 12.133  |
236 | | cummin    | ----    | ----    | 0.973   | 11.908  |
237 | | arbitrary | ----    | 147.853 | 46.690  | 129.779 |
238 | | sort      | ----    | 167.699 | ----    | ----    |
239 | 
240 | _Linux(x86_64), Python 3.10.12, Numpy 1.25.2, Numba 0.58.0, Pandas 2.0.2_
241 | 
242 | ## Development
243 | This project was started by @ml31415 and the `numba` and `weave` implementations are by him. The pure 
244 | python and `numpy` implementations were written by @d1manson.
245 | 
246 | The authors hope that `numpy`'s `ufunc.at` methods or some other implementation of `aggregate` within
247 | `numpy` or `scipy` will eventually be fast enough, to make this package redundant. Numpy 1.25 actually
248 | contained major [improvements on ufunc speed](https://numpy.org/doc/stable/release/1.25.0-notes.html), 
249 | which reduced the speed gap between numpy and the numba implementation a lot.
250 | 


--------------------------------------------------------------------------------
/ci/environment.yml:
--------------------------------------------------------------------------------
 1 | name: npg-tests
 2 | channels:
 3 |   - conda-forge
 4 |   - nodefaults
 5 | dependencies:
 6 |   - numpy
 7 |   - pandas
 8 |   - numba
 9 |   - pytest
10 |   - numpy_groupies
11 | 


--------------------------------------------------------------------------------
/conftest.py:
--------------------------------------------------------------------------------
 1 | """
 2 | pytest configuration to silently discard test items with invalid parameter combinations
 3 | See: https://github.com/pytest-dev/pytest/issues/3730#issuecomment-567142496
 4 | """
 5 | 
 6 | 
 7 | def pytest_configure(config):
 8 |     config.addinivalue_line(
 9 |         "markers", "deselect_if(func): function to deselect tests from parametrization"
10 |     )
11 | 
12 | 
13 | def pytest_collection_modifyitems(config, items):
14 |     removed = []
15 |     kept = []
16 |     for item in items:
17 |         m = item.get_closest_marker("deselect_if")
18 |         if m:
19 |             func = m.kwargs["func"]
20 |             if func(**item.callspec.params):
21 |                 removed.append(item)
22 |                 continue
23 |         kept.append(item)
24 |     if removed:
25 |         config.hook.pytest_deselected(items=removed)
26 |         items[:] = kept
27 | 


--------------------------------------------------------------------------------
/diagrams/aggregate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ml31415/numpy-groupies/1dd8c3dfcc55235ae3753ce0db264da53bae4a80/diagrams/aggregate.png


--------------------------------------------------------------------------------
/diagrams/aggregate_dims.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ml31415/numpy-groupies/1dd8c3dfcc55235ae3753ce0db264da53bae4a80/diagrams/aggregate_dims.png


--------------------------------------------------------------------------------
/diagrams/aggregate_dims.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
  3 | 
  4 | <svg
  5 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
  6 |    xmlns:cc="http://creativecommons.org/ns#"
  7 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  8 |    xmlns:svg="http://www.w3.org/2000/svg"
  9 |    xmlns="http://www.w3.org/2000/svg"
 10 |    xmlns:xlink="http://www.w3.org/1999/xlink"
 11 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 12 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 13 |    width="899.61053"
 14 |    height="529.73029"
 15 |    id="svg2"
 16 |    version="1.1"
 17 |    inkscape:version="0.48.4 r"
 18 |    sodipodi:docname="aggregate_dims.svg"
 19 |    inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
 20 |    inkscape:export-xdpi="100"
 21 |    inkscape:export-ydpi="100">
 22 |   <defs
 23 |      id="defs4">
 24 |     <pattern
 25 |        inkscape:collect="always"
 26 |        xlink:href="#pattern5275"
 27 |        id="pattern6241"
 28 |        patternTransform="matrix(10,0,0,18.544032,-193.96854,-129.01309)" />
 29 |     <pattern
 30 |        inkscape:collect="always"
 31 |        xlink:href="#pattern5470"
 32 |        id="pattern6238"
 33 |        patternTransform="matrix(10,0,0,15.194616,-194.81761,-2.5743096)" />
 34 |     <pattern
 35 |        inkscape:collect="always"
 36 |        xlink:href="#pattern5491"
 37 |        id="pattern6227"
 38 |        patternTransform="matrix(10,0,0,15.855732,-23,-113.33828)" />
 39 |     <pattern
 40 |        inkscape:collect="always"
 41 |        xlink:href="#pattern5498"
 42 |        id="pattern6137"
 43 |        patternTransform="matrix(10,0,0,15.194616,-194.45923,-287.40696)" />
 44 |     <pattern
 45 |        inkscape:collect="always"
 46 |        xlink:href="#pattern5505"
 47 |        id="pattern6130"
 48 |        patternTransform="matrix(10,0,0,15.194616,-194.45925,-256.46749)" />
 49 |     <pattern
 50 |        inkscape:collect="always"
 51 |        xlink:href="#pattern5512"
 52 |        id="pattern6123"
 53 |        patternTransform="matrix(10,0,0,43.061529,93.957634,462.22964)" />
 54 |     <pattern
 55 |        inkscape:collect="always"
 56 |        xlink:href="#pattern5642"
 57 |        id="pattern5916"
 58 |        patternTransform="matrix(10,0,0,16.363046,0.35837606,74.953694)" />
 59 |     <pattern
 60 |        inkscape:collect="always"
 61 |        xlink:href="#Checkerboard"
 62 |        id="pattern5714"
 63 |        patternTransform="matrix(9.1156894,0,-4.931593e-7,16.82432,555.28756,110.0202)" />
 64 |     <pattern
 65 |        inkscape:collect="always"
 66 |        xlink:href="#Checkerboard"
 67 |        id="pattern5708"
 68 |        patternTransform="matrix(16.085254,-5.3148091e-8,0,16.32564,-52.090568,-267.9006)" />
 69 |     <inkscape:perspective
 70 |        sodipodi:type="inkscape:persp3d"
 71 |        inkscape:vp_x="20.602674 : 305.96482 : 1"
 72 |        inkscape:vp_y="0 : 1000 : 0"
 73 |        inkscape:vp_z="1369.5928 : 305.96482 : 1"
 74 |        inkscape:persp3d-origin="695.09773 : 163.3435 : 1"
 75 |        id="perspective5658" />
 76 |     <pattern
 77 |        inkscape:collect="always"
 78 |        xlink:href="#Checkerboard"
 79 |        id="pattern5656"
 80 |        patternTransform="matrix(9.3642593,0,0,16.202402,-3.5957529,-270.37048)" />
 81 |     <pattern
 82 |        inkscape:collect="always"
 83 |        xlink:href="#Checkerboard"
 84 |        id="pattern5642"
 85 |        patternTransform="matrix(10,0,0,16.363046,0.35837606,-176.68636)" />
 86 |     <pattern
 87 |        inkscape:collect="always"
 88 |        xlink:href="#pattern5505"
 89 |        id="pattern5616"
 90 |        patternTransform="matrix(10,0,0,15.194616,-194.45925,282.63585)" />
 91 |     <pattern
 92 |        inkscape:collect="always"
 93 |        xlink:href="#pattern5308"
 94 |        id="pattern5512"
 95 |        patternTransform="matrix(10,0,0,43.061529,93.957634,462.22967)" />
 96 |     <pattern
 97 |        inkscape:collect="always"
 98 |        xlink:href="#pattern5273"
 99 |        id="pattern5505"
100 |        patternTransform="matrix(10,0,0,15.194616,-194.45925,-256.46746)" />
101 |     <pattern
102 |        inkscape:collect="always"
103 |        xlink:href="#pattern5277"
104 |        id="pattern5498"
105 |        patternTransform="matrix(10,0,0,15.194616,-194.45923,-287.40693)" />
106 |     <pattern
107 |        inkscape:collect="always"
108 |        xlink:href="#Checkerboard"
109 |        id="pattern5491"
110 |        patternTransform="matrix(10,0,0,15.855732,-23,-122.09326)" />
111 |     <pattern
112 |        inkscape:collect="always"
113 |        xlink:href="#pattern5432"
114 |        id="pattern5472"
115 |        patternTransform="matrix(10,0,0,15.194616,-194.81761,-151.32929)" />
116 |     <pattern
117 |        inkscape:collect="always"
118 |        xlink:href="#pattern5432"
119 |        id="pattern5470"
120 |        patternTransform="matrix(10,0,0,15.194616,-194.81761,-11.32929)" />
121 |     <pattern
122 |        inkscape:collect="always"
123 |        xlink:href="#pattern5277"
124 |        id="pattern5432"
125 |        patternTransform="matrix(10,0,0,15.194616,-194.81761,-130.32929)" />
126 |     <pattern
127 |        inkscape:collect="always"
128 |        xlink:href="#Checkerboard"
129 |        id="pattern5308"
130 |        patternTransform="matrix(10,0,0,43.061529,85.957634,473.26756)" />
131 |     <pattern
132 |        inkscape:collect="always"
133 |        xlink:href="#pattern5224"
134 |        id="pattern5277"
135 |        patternTransform="matrix(10,0,0,15.194616,-202.45923,-287.40693)" />
136 |     <pattern
137 |        inkscape:collect="always"
138 |        xlink:href="#pattern5265"
139 |        id="pattern5275"
140 |        patternTransform="matrix(10,0,0,18.544032,-193.96854,-137.76807)" />
141 |     <pattern
142 |        inkscape:collect="always"
143 |        xlink:href="#pattern5224"
144 |        id="pattern5273"
145 |        patternTransform="matrix(10,0,0,15.194616,-202.45925,-245.42957)" />
146 |     <pattern
147 |        inkscape:collect="always"
148 |        xlink:href="#Checkerboard"
149 |        id="pattern5265"
150 |        patternTransform="matrix(10,0,0,18.544032,-19.528575,1.2884542)" />
151 |     <pattern
152 |        inkscape:collect="always"
153 |        xlink:href="#Checkerboard"
154 |        id="pattern5224"
155 |        patternTransform="matrix(10,0,0,15.194616,-16.132299,30.642012)" />
156 |     <pattern
157 |        inkscape:stockid="Checkerboard"
158 |        id="Checkerboard"
159 |        patternTransform="translate(0,0) scale(10,10)"
160 |        height="2"
161 |        width="2"
162 |        patternUnits="userSpaceOnUse"
163 |        inkscape:collect="always">
164 |       <rect
165 |          id="rect4549"
166 |          height="1"
167 |          width="1"
168 |          y="0"
169 |          x="0"
170 |          style="fill:black;stroke:none" />
171 |       <rect
172 |          id="rect4551"
173 |          height="1"
174 |          width="1"
175 |          y="1"
176 |          x="1"
177 |          style="fill:black;stroke:none" />
178 |     </pattern>
179 |   </defs>
180 |   <sodipodi:namedview
181 |      id="base"
182 |      pagecolor="#ffffff"
183 |      bordercolor="#666666"
184 |      borderopacity="1.0"
185 |      inkscape:pageopacity="0.0"
186 |      inkscape:pageshadow="2"
187 |      inkscape:zoom="2"
188 |      inkscape:cx="227.23882"
189 |      inkscape:cy="103.48214"
190 |      inkscape:document-units="px"
191 |      inkscape:current-layer="layer1"
192 |      showgrid="false"
193 |      fit-margin-right="10"
194 |      fit-margin-bottom="10"
195 |      fit-margin-top="10"
196 |      fit-margin-left="10"
197 |      inkscape:window-width="1366"
198 |      inkscape:window-height="745"
199 |      inkscape:window-x="249"
200 |      inkscape:window-y="1072"
201 |      inkscape:window-maximized="1" />
202 |   <metadata
203 |      id="metadata7">
204 |     <rdf:RDF>
205 |       <cc:Work
206 |          rdf:about="">
207 |         <dc:format>image/svg+xml</dc:format>
208 |         <dc:type
209 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
210 |         <dc:title></dc:title>
211 |       </cc:Work>
212 |     </rdf:RDF>
213 |   </metadata>
214 |   <g
215 |      inkscape:label="Layer 1"
216 |      inkscape:groupmode="layer"
217 |      id="layer1"
218 |      transform="translate(234,267.51702)">
219 |     <flowRoot
220 |        xml:space="preserve"
221 |        id="flowRoot5326"
222 |        style="font-size:40px;font-style:normal;font-weight:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"><flowRegion
223 |          id="flowRegion5328"><rect
224 |            id="rect5330"
225 |            width="106.98261"
226 |            height="46.698761"
227 |            x="4.2453418"
228 |            y="106.84968" /></flowRegion><flowPara
229 |          id="flowPara5332"></flowPara></flowRoot>    <g
230 |        id="g6045"
231 |        transform="translate(0,2.3577045)"
232 |        inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
233 |        inkscape:export-xdpi="100"
234 |        inkscape:export-ydpi="100">
235 |       <rect
236 |          inkscape:export-ydpi="100"
237 |          inkscape:export-xdpi="100"
238 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
239 |          ry="0"
240 |          y="-75.252502"
241 |          x="-110.57227"
242 |          height="17.385098"
243 |          width="18.377663"
244 |          id="rect5271"
245 |          style="fill:none;stroke:#3100ff;stroke-width:3.99999976;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
246 |       <rect
247 |          inkscape:export-ydpi="100"
248 |          inkscape:export-xdpi="100"
249 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
250 |          style="fill:url(#pattern5472);fill-opacity:1;fill-rule:nonzero;stroke:#ff00ff;stroke-width:4;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
251 |          id="rect5384"
252 |          width="311.30624"
253 |          height="17.385098"
254 |          x="-110.57227"
255 |          y="-106.84576"
256 |          ry="0" />
257 |       <flowRoot
258 |          inkscape:export-ydpi="100"
259 |          inkscape:export-xdpi="100"
260 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
261 |          xml:space="preserve"
262 |          id="flowRoot5386"
263 |          style="font-size:40px;font-style:normal;font-weight:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#ff00ff;fill-opacity:0.40784314;stroke:none;font-family:Sans"
264 |          transform="translate(-528.57152,-135.31805)"><flowRegion
265 |            id="flowRegion5388"><rect
266 |              id="rect5390"
267 |              width="387.84677"
268 |              height="289.38412"
269 |              x="18.011459"
270 |              y="23.59853"
271 |              style="text-align:end;text-anchor:end;fill:#ff00ff;fill-opacity:0.40784314" /></flowRegion><flowPara
272 |            id="flowPara5392"
273 |            style="font-size:20px">group_idx</flowPara></flowRoot>      <flowRoot
274 |          inkscape:export-ydpi="100"
275 |          inkscape:export-xdpi="100"
276 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
277 |          transform="translate(-528.57152,-104.47623)"
278 |          style="font-size:40px;font-style:normal;font-weight:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#3100ff;fill-opacity:1;stroke:none;font-family:Sans"
279 |          id="flowRoot5398"
280 |          xml:space="preserve"><flowRegion
281 |            id="flowRegion5400"><rect
282 |              style="text-align:end;text-anchor:end;fill:#3100ff;fill-opacity:1"
283 |              y="23.59853"
284 |              x="18.011459"
285 |              height="289.38412"
286 |              width="387.84677"
287 |              id="rect5402" /></flowRegion><flowPara
288 |            style="font-size:20px;fill:#3100ff;fill-opacity:1"
289 |            id="flowPara5404">a</flowPara></flowRoot>      <g
290 |          inkscape:export-ydpi="100"
291 |          inkscape:export-xdpi="100"
292 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
293 |          id="g5406"
294 |          transform="translate(7.6416152,125.03976)">
295 |         <rect
296 |            ry="0"
297 |            y="-159.56903"
298 |            x="-118.21389"
299 |            height="17.985479"
300 |            width="136.39816"
301 |            id="rect5408"
302 |            style="fill:url(#pattern5308);fill-opacity:1;fill-rule:nonzero;stroke:#31ff4d;stroke-width:4;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
303 |         <flowRoot
304 |            xml:space="preserve"
305 |            id="flowRoot5410"
306 |            style="font-size:40px;font-style:normal;font-weight:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#31ff4d;fill-opacity:0.97254902;stroke:none;font-family:Sans"
307 |            transform="translate(-536.21314,-185.82707)"><flowRegion
308 |              id="flowRegion5412"><rect
309 |                id="rect5414"
310 |                width="387.84677"
311 |                height="289.38412"
312 |                x="18.011459"
313 |                y="23.59853"
314 |                style="text-align:end;text-anchor:end;fill:#31ff4d;fill-opacity:0.97254902" /></flowRegion><flowPara
315 |              id="flowPara5416"
316 |              style="font-size:20px;fill:#31ff4d;fill-opacity:0.97254902">result</flowPara></flowRoot>      </g>
317 |       <rect
318 |          inkscape:export-ydpi="100"
319 |          inkscape:export-xdpi="100"
320 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
321 |          style="fill:none;stroke:#000000;stroke-width:4;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:0.31666667;stroke-dasharray:none;stroke-dashoffset:0"
322 |          id="rect5434"
323 |          width="430.47766"
324 |          height="115.4733"
325 |          x="-222.00002"
326 |          y="-116.02664" />
327 |     </g>
328 |     <rect
329 |        style="fill:url(#pattern6241);fill-opacity:1;fill-rule:nonzero;stroke:#3100ff;stroke-width:3.99999952;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
330 |        id="rect5269"
331 |        width="311.30624"
332 |        height="70.027321"
333 |        x="-110.57227"
334 |        y="76.429604"
335 |        ry="0"
336 |        inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
337 |        inkscape:export-xdpi="100"
338 |        inkscape:export-ydpi="100" />
339 |     <rect
340 |        style="fill:url(#pattern6238);fill-opacity:1;fill-rule:nonzero;stroke:#ff00ff;stroke-width:4;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
341 |        id="rect5438"
342 |        width="311.30624"
343 |        height="17.385098"
344 |        x="-110.57227"
345 |        y="41.909218"
346 |        ry="0"
347 |        inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
348 |        inkscape:export-xdpi="100"
349 |        inkscape:export-ydpi="100" />
350 |     <flowRoot
351 |        xml:space="preserve"
352 |        id="flowRoot5440"
353 |        style="font-size:40px;font-style:normal;font-weight:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#ff00ff;fill-opacity:0.40784314;stroke:none;font-family:Sans"
354 |        transform="translate(-528.57152,13.43693)"
355 |        inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
356 |        inkscape:export-xdpi="100"
357 |        inkscape:export-ydpi="100"><flowRegion
358 |          id="flowRegion5442"><rect
359 |            id="rect5444"
360 |            width="387.84677"
361 |            height="289.38412"
362 |            x="18.011459"
363 |            y="23.59853"
364 |            style="text-align:end;text-anchor:end;fill:#ff00ff;fill-opacity:0.40784314" /></flowRegion><flowPara
365 |          id="flowPara5446"
366 |          style="font-size:20px">group_idx</flowPara></flowRoot>    <flowRoot
367 |        transform="translate(-526.57152,46.27875)"
368 |        style="font-size:40px;font-style:normal;font-weight:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#3100ff;fill-opacity:1;stroke:none;font-family:Sans"
369 |        id="flowRoot5448"
370 |        xml:space="preserve"
371 |        inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
372 |        inkscape:export-xdpi="100"
373 |        inkscape:export-ydpi="100"><flowRegion
374 |          id="flowRegion5450"><rect
375 |            style="text-align:end;text-anchor:end;fill:#3100ff;fill-opacity:1"
376 |            y="23.59853"
377 |            x="18.011459"
378 |            height="289.38412"
379 |            width="387.84677"
380 |            id="rect5452" /></flowRegion><flowPara
381 |          style="font-size:20px;fill:#3100ff;fill-opacity:1"
382 |          id="flowPara5476">a, axis=1</flowPara><flowPara
383 |          style="font-size:20px;fill:#3100ff;fill-opacity:1"
384 |          id="flowPara6245" /></flowRoot>    <rect
385 |        style="fill:url(#pattern6227);fill-opacity:1;fill-rule:nonzero;stroke:#31ff4d;stroke-width:4;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
386 |        id="rect5458"
387 |        width="136.39816"
388 |        height="64.985481"
389 |        x="-110.57227"
390 |        y="171.22571"
391 |        ry="0"
392 |        inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
393 |        inkscape:export-xdpi="100"
394 |        inkscape:export-ydpi="100" />
395 |     <flowRoot
396 |        transform="translate(-533.57152,140.96767)"
397 |        style="font-size:40px;font-style:normal;font-weight:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#31ff4d;fill-opacity:0.97254902;stroke:none;font-family:Sans"
398 |        id="flowRoot5460"
399 |        xml:space="preserve"
400 |        inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
401 |        inkscape:export-xdpi="100"
402 |        inkscape:export-ydpi="100"><flowRegion
403 |          id="flowRegion5462"><rect
404 |            style="text-align:end;text-anchor:end;fill:#31ff4d;fill-opacity:0.97254902"
405 |            y="23.59853"
406 |            x="18.011459"
407 |            height="289.38412"
408 |            width="387.84677"
409 |            id="rect5464" /></flowRegion><flowPara
410 |          style="font-size:20px;fill:#31ff4d;fill-opacity:0.97254902"
411 |          id="flowPara5466">result</flowPara></flowRoot>    <rect
412 |        style="fill:none;stroke:#000000;stroke-width:4;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:0.31666667;stroke-dasharray:none;stroke-dashoffset:0"
413 |        id="rect5468"
414 |        width="430.47766"
415 |        height="217.4733"
416 |        x="-222.00002"
417 |        y="32.72834"
418 |        inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
419 |        inkscape:export-xdpi="100"
420 |        inkscape:export-ydpi="100" />
421 |     <g
422 |        id="g6006"
423 |        transform="translate(445.14176,-510.39202)"
424 |        inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
425 |        inkscape:export-xdpi="100"
426 |        inkscape:export-ydpi="100">
427 |       <rect
428 |          style="fill:url(#pattern5642);fill-opacity:1;fill-rule:nonzero;stroke:#ff00ff;stroke-width:3.99999952;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
429 |          id="rect5226"
430 |          width="311.30624"
431 |          height="48.566196"
432 |          x="-110.21389"
433 |          y="264.43042"
434 |          ry="0" />
435 |       <rect
436 |          style="fill:none;stroke:#000000;stroke-width:4;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:0.31666667;stroke-dasharray:none;stroke-dashoffset:0"
437 |          id="rect5518"
438 |          width="430.47766"
439 |          height="244.24001"
440 |          x="-222.00002"
441 |          y="254.88956"
442 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
443 |          inkscape:export-xdpi="100"
444 |          inkscape:export-ydpi="100" />
445 |       <flowRoot
446 |          xml:space="preserve"
447 |          id="flowRoot5522"
448 |          style="font-size:40px;font-style:normal;font-weight:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#ff00ff;fill-opacity:0.40784314;stroke:none;font-family:Sans"
449 |          transform="translate(-528.57152,238.5205)"
450 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
451 |          inkscape:export-xdpi="100"
452 |          inkscape:export-ydpi="100"><flowRegion
453 |            id="flowRegion5524"><rect
454 |              id="rect5526"
455 |              width="387.84677"
456 |              height="289.38412"
457 |              x="18.011459"
458 |              y="23.59853"
459 |              style="text-align:end;text-anchor:end;fill:#ff00ff;fill-opacity:0.40784314" /></flowRegion><flowPara
460 |            id="flowPara5528"
461 |            style="font-size:20px">group_idx</flowPara></flowRoot>      <rect
462 |          ry="0"
463 |          y="327.11935"
464 |          x="-110.21389"
465 |          height="17.385098"
466 |          width="311.30624"
467 |          id="rect5530"
468 |          style="fill:url(#pattern5616);fill-opacity:1;fill-rule:nonzero;stroke:#3100ff;stroke-width:4;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
469 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
470 |          inkscape:export-xdpi="100"
471 |          inkscape:export-ydpi="100" />
472 |       <flowRoot
473 |          transform="translate(-527.42894,298.54943)"
474 |          style="font-size:40px;font-style:normal;font-weight:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#3100ff;fill-opacity:1;stroke:none;font-family:Sans"
475 |          id="flowRoot5532"
476 |          xml:space="preserve"
477 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
478 |          inkscape:export-xdpi="100"
479 |          inkscape:export-ydpi="100"><flowRegion
480 |            id="flowRegion5534"><rect
481 |              style="text-align:end;text-anchor:end;fill:#3100ff;fill-opacity:1"
482 |              y="23.59853"
483 |              x="18.011459"
484 |              height="289.38412"
485 |              width="387.84677"
486 |              id="rect5536" /></flowRegion><flowPara
487 |            style="font-size:20px;fill:#3100ff;fill-opacity:1"
488 |            id="flowPara5538">a</flowPara></flowRoot>      <flowRoot
489 |          transform="translate(-528.66918,343.95524)"
490 |          style="font-size:40px;font-style:normal;font-weight:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#31ff4d;fill-opacity:0.97254902;stroke:none;font-family:Sans"
491 |          id="flowRoot5542"
492 |          xml:space="preserve"
493 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
494 |          inkscape:export-xdpi="100"
495 |          inkscape:export-ydpi="100"><flowRegion
496 |            id="flowRegion5544"><rect
497 |              style="text-align:end;text-anchor:end;fill:#31ff4d;fill-opacity:0.97254902"
498 |              y="23.59853"
499 |              x="18.011459"
500 |              height="289.38412"
501 |              width="387.84677"
502 |              id="rect5546" /></flowRegion><flowPara
503 |            style="font-size:20px;fill:#31ff4d;fill-opacity:0.97254902"
504 |            id="flowPara5548">result</flowPara></flowRoot>      <g
505 |          transform="translate(-1.1869814,7.1218883)"
506 |          id="g5716">
507 |         <rect
508 |            inkscape:export-ydpi="100"
509 |            inkscape:export-xdpi="100"
510 |            inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
511 |            ry="0"
512 |            y="375.36508"
513 |            x="-107.1385"
514 |            height="98.337669"
515 |            width="58.593956"
516 |            id="rect5644"
517 |            style="fill:url(#pattern5656);fill-opacity:1;fill-rule:nonzero;stroke:#31ff4d;stroke-width:3.99999952;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
518 |         <rect
519 |            style="fill:url(#pattern5708);fill-opacity:1;fill-rule:nonzero;stroke:#31ff4d;stroke-width:4.26032257;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
520 |            id="rect5688"
521 |            width="47.913658"
522 |            height="97.213654"
523 |            x="-52.633987"
524 |            y="350.91931"
525 |            ry="0"
526 |            inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
527 |            inkscape:export-xdpi="100"
528 |            inkscape:export-ydpi="100"
529 |            transform="matrix(0.88152551,-0.4721364,0,1,0,0)" />
530 |         <rect
531 |            style="fill:url(#pattern5714);fill-opacity:1;fill-rule:nonzero;stroke:#31ff4d;stroke-width:5.84869576;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
532 |            id="rect5710"
533 |            width="59.885006"
534 |            height="52.163929"
535 |            x="598.62207"
536 |            y="747.74231"
537 |            ry="0"
538 |            inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
539 |            inkscape:export-xdpi="100"
540 |            inkscape:export-ydpi="100"
541 |            transform="matrix(1,0,-0.88386769,0.46773701,0,0)" />
542 |       </g>
543 |     </g>
544 |     <g
545 |        id="g5985"
546 |        transform="translate(445.14176,-500.56797)"
547 |        inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
548 |        inkscape:export-xdpi="100"
549 |        inkscape:export-ydpi="100">
550 |       <rect
551 |          ry="0"
552 |          y="516.0705"
553 |          x="-110.21389"
554 |          height="48.566196"
555 |          width="311.30624"
556 |          id="rect5878"
557 |          style="fill:url(#pattern5916);fill-opacity:1;fill-rule:nonzero;stroke:#ff00ff;stroke-width:3.99999952;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
558 |       <rect
559 |          inkscape:export-ydpi="100"
560 |          inkscape:export-xdpi="100"
561 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
562 |          y="506.5296"
563 |          x="-222.00002"
564 |          height="244.24001"
565 |          width="430.47766"
566 |          id="rect5880"
567 |          style="fill:none;stroke:#000000;stroke-width:4;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:0.31666667;stroke-dasharray:none;stroke-dashoffset:0" />
568 |       <flowRoot
569 |          inkscape:export-ydpi="100"
570 |          inkscape:export-xdpi="100"
571 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
572 |          transform="translate(-528.57152,490.16055)"
573 |          style="font-size:40px;font-style:normal;font-weight:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#ff00ff;fill-opacity:0.40784314;stroke:none;font-family:Sans"
574 |          id="flowRoot5882"
575 |          xml:space="preserve"><flowRegion
576 |            id="flowRegion5884"><rect
577 |              style="text-align:end;text-anchor:end;fill:#ff00ff;fill-opacity:0.40784314"
578 |              y="23.59853"
579 |              x="18.011459"
580 |              height="289.38412"
581 |              width="387.84677"
582 |              id="rect5886" /></flowRegion><flowPara
583 |            style="font-size:20px"
584 |            id="flowPara5888">group_idx</flowPara></flowRoot>      <flowRoot
585 |          inkscape:export-ydpi="100"
586 |          inkscape:export-xdpi="100"
587 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
588 |          xml:space="preserve"
589 |          id="flowRoot5892"
590 |          style="font-size:40px;font-style:normal;font-weight:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#3100ff;fill-opacity:1;stroke:none;font-family:Sans"
591 |          transform="translate(-527.42894,550.18948)"><flowRegion
592 |            id="flowRegion5894"><rect
593 |              id="rect5896"
594 |              width="387.84677"
595 |              height="289.38412"
596 |              x="18.011459"
597 |              y="23.59853"
598 |              style="text-align:end;text-anchor:end;fill:#3100ff;fill-opacity:1" /></flowRegion><flowPara
599 |            id="flowPara5898"
600 |            style="font-size:20px;fill:#3100ff;fill-opacity:1">a</flowPara></flowRoot>      <flowRoot
601 |          inkscape:export-ydpi="100"
602 |          inkscape:export-xdpi="100"
603 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
604 |          xml:space="preserve"
605 |          id="flowRoot5900"
606 |          style="font-size:40px;font-style:normal;font-weight:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#31ff4d;fill-opacity:0.97254902;stroke:none;font-family:Sans"
607 |          transform="translate(-528.66918,595.59529)"><flowRegion
608 |            id="flowRegion5902"><rect
609 |              id="rect5904"
610 |              width="387.84677"
611 |              height="289.38412"
612 |              x="18.011459"
613 |              y="23.59853"
614 |              style="text-align:end;text-anchor:end;fill:#31ff4d;fill-opacity:0.97254902" /></flowRegion><flowPara
615 |            id="flowPara5906"
616 |            style="font-size:20px;fill:#31ff4d;fill-opacity:0.97254902">result</flowPara></flowRoot>      <g
617 |          id="g5908"
618 |          transform="translate(-1.1869814,258.76194)">
619 |         <rect
620 |            style="fill:url(#pattern5656);fill-opacity:1;fill-rule:nonzero;stroke:#31ff4d;stroke-width:3.99999952;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
621 |            id="rect5910"
622 |            width="58.593956"
623 |            height="98.337669"
624 |            x="-107.1385"
625 |            y="375.36508"
626 |            ry="0"
627 |            inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
628 |            inkscape:export-xdpi="100"
629 |            inkscape:export-ydpi="100" />
630 |         <rect
631 |            transform="matrix(0.88152551,-0.4721364,0,1,0,0)"
632 |            inkscape:export-ydpi="100"
633 |            inkscape:export-xdpi="100"
634 |            inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
635 |            ry="0"
636 |            y="350.91931"
637 |            x="-52.633987"
638 |            height="97.213654"
639 |            width="47.913658"
640 |            id="rect5912"
641 |            style="fill:url(#pattern5708);fill-opacity:1;fill-rule:nonzero;stroke:#31ff4d;stroke-width:4.26032257;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
642 |         <rect
643 |            transform="matrix(1,0,-0.88386769,0.46773701,0,0)"
644 |            inkscape:export-ydpi="100"
645 |            inkscape:export-xdpi="100"
646 |            inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
647 |            ry="0"
648 |            y="747.74231"
649 |            x="598.62207"
650 |            height="52.163929"
651 |            width="59.885006"
652 |            id="rect5914"
653 |            style="fill:url(#pattern5714);fill-opacity:1;fill-rule:nonzero;stroke:#31ff4d;stroke-width:5.84869576;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
654 |       </g>
655 |       <rect
656 |          style="fill:none;stroke:#3100ff;stroke-width:3.99999976;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
657 |          id="rect5931"
658 |          width="18.377663"
659 |          height="17.385098"
660 |          x="-110.57227"
661 |          y="577.58728"
662 |          ry="0"
663 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
664 |          inkscape:export-xdpi="100"
665 |          inkscape:export-ydpi="100" />
666 |     </g>
667 |     <g
668 |        id="g6199"
669 |        transform="translate(0,-0.47579075)"
670 |        inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
671 |        inkscape:export-xdpi="100"
672 |        inkscape:export-ydpi="100">
673 |       <rect
674 |          inkscape:export-ydpi="100"
675 |          inkscape:export-xdpi="100"
676 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
677 |          y="-255.02667"
678 |          x="-222.00002"
679 |          height="115.4733"
680 |          width="430.47766"
681 |          id="rect5418"
682 |          style="fill:none;stroke:#000000;stroke-width:4;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:0.31666667;stroke-dasharray:none;stroke-dashoffset:0" />
683 |       <rect
684 |          inkscape:export-ydpi="100"
685 |          inkscape:export-xdpi="100"
686 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
687 |          ry="0"
688 |          y="-242.92343"
689 |          x="-110.21389"
690 |          height="17.385098"
691 |          width="311.30624"
692 |          id="rect2985"
693 |          style="fill:url(#pattern6137);fill-opacity:1;fill-rule:nonzero;stroke:#ff00ff;stroke-width:4;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
694 |       <flowRoot
695 |          inkscape:export-ydpi="100"
696 |          inkscape:export-xdpi="100"
697 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
698 |          transform="translate(-528.57152,-271.39572)"
699 |          style="font-size:40px;font-style:normal;font-weight:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#ff00ff;fill-opacity:0.40784314;stroke:none;font-family:Sans"
700 |          id="flowRoot5310"
701 |          xml:space="preserve"><flowRegion
702 |            id="flowRegion5312"><rect
703 |              style="text-align:end;text-anchor:end;fill:#ff00ff;fill-opacity:0.40784314"
704 |              y="23.59853"
705 |              x="18.011459"
706 |              height="289.38412"
707 |              width="387.84677"
708 |              id="rect5314" /></flowRegion><flowPara
709 |            style="font-size:20px"
710 |            id="flowPara5316">group_idx</flowPara></flowRoot>      <rect
711 |          inkscape:export-ydpi="100"
712 |          inkscape:export-xdpi="100"
713 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
714 |          style="fill:url(#pattern6130);fill-opacity:1;fill-rule:nonzero;stroke:#3100ff;stroke-width:4;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
715 |          id="rect5267"
716 |          width="311.30624"
717 |          height="17.385098"
718 |          x="-110.21389"
719 |          y="-211.98396"
720 |          ry="0" />
721 |       <flowRoot
722 |          inkscape:export-ydpi="100"
723 |          inkscape:export-xdpi="100"
724 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
725 |          xml:space="preserve"
726 |          id="flowRoot5318"
727 |          style="font-size:40px;font-style:normal;font-weight:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#3100ff;fill-opacity:1;stroke:none;font-family:Sans"
728 |          transform="translate(-527.42894,-240.55391)"><flowRegion
729 |            id="flowRegion5320"><rect
730 |              id="rect5322"
731 |              width="387.84677"
732 |              height="289.38412"
733 |              x="18.011459"
734 |              y="23.59853"
735 |              style="text-align:end;text-anchor:end;fill:#3100ff;fill-opacity:1" /></flowRegion><flowPara
736 |            id="flowPara5324"
737 |            style="font-size:20px;fill:#3100ff;fill-opacity:1">a</flowPara></flowRoot>      <rect
738 |          inkscape:export-ydpi="100"
739 |          inkscape:export-xdpi="100"
740 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
741 |          ry="0"
742 |          y="-170.60695"
743 |          x="-110.21389"
744 |          height="17.985479"
745 |          width="136.39816"
746 |          id="rect5279"
747 |          style="fill:url(#pattern6123);fill-opacity:1;fill-rule:nonzero;stroke:#31ff4d;stroke-width:4;stroke-linecap:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
748 |       <flowRoot
749 |          inkscape:export-ydpi="100"
750 |          inkscape:export-xdpi="100"
751 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
752 |          xml:space="preserve"
753 |          id="flowRoot5334"
754 |          style="font-size:40px;font-style:normal;font-weight:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#31ff4d;fill-opacity:0.97254902;stroke:none;font-family:Sans"
755 |          transform="translate(-528.66918,-196.86499)"><flowRegion
756 |            id="flowRegion5336"><rect
757 |              id="rect5338"
758 |              width="387.84677"
759 |              height="289.38412"
760 |              x="18.011459"
761 |              y="23.59853"
762 |              style="text-align:end;text-anchor:end;fill:#31ff4d;fill-opacity:0.97254902" /></flowRegion><flowPara
763 |            id="flowPara5340"
764 |            style="font-size:20px;fill:#31ff4d;fill-opacity:0.97254902">result</flowPara></flowRoot>      <flowRoot
765 |          xml:space="preserve"
766 |          id="flowRoot6140"
767 |          style="font-size:40px;font-style:normal;font-weight:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#000000;fill-opacity:0.29128238;stroke:none;font-family:Sans"
768 |          transform="translate(-201.21046,-189.98847)"
769 |          inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
770 |          inkscape:export-xdpi="100"
771 |          inkscape:export-ydpi="100"><flowRegion
772 |            id="flowRegion6142"><rect
773 |              id="rect6144"
774 |              width="387.84677"
775 |              height="289.38412"
776 |              x="18.011459"
777 |              y="23.59853"
778 |              style="text-align:end;text-anchor:end;fill:#000000;fill-opacity:0.29128238" /></flowRegion><flowPara
779 |            id="flowPara6146"
780 |            style="font-size:20px;fill:#000000;fill-opacity:0.29128238">form 1</flowPara></flowRoot>    </g>
781 |     <flowRoot
782 |        xml:space="preserve"
783 |        id="flowRoot6148"
784 |        style="font-size:40px;font-style:normal;font-weight:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#000000;fill-opacity:0.29128238;stroke:none;font-family:Sans"
785 |        transform="translate(-201.21046,-49.690874)"
786 |        inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
787 |        inkscape:export-xdpi="100"
788 |        inkscape:export-ydpi="100"><flowRegion
789 |          id="flowRegion6150"><rect
790 |            id="rect6152"
791 |            width="387.84677"
792 |            height="289.38412"
793 |            x="18.011459"
794 |            y="23.59853"
795 |            style="text-align:end;text-anchor:end;fill:#000000;fill-opacity:0.29128238" /></flowRegion><flowPara
796 |          id="flowPara6154"
797 |          style="font-size:20px;fill:#000000;fill-opacity:0.29128238">form 2</flowPara></flowRoot>    <flowRoot
798 |        inkscape:export-ydpi="100"
799 |        inkscape:export-xdpi="100"
800 |        inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
801 |        transform="translate(-201.21046,199.31918)"
802 |        style="font-size:40px;font-style:normal;font-weight:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#000000;fill-opacity:0.29128238;stroke:none;font-family:Sans"
803 |        id="flowRoot6156"
804 |        xml:space="preserve"><flowRegion
805 |          id="flowRegion6158"><rect
806 |            style="text-align:end;text-anchor:end;fill:#000000;fill-opacity:0.29128238"
807 |            y="23.59853"
808 |            x="18.011459"
809 |            height="289.38412"
810 |            width="387.84677"
811 |            id="rect6160" /></flowRegion><flowPara
812 |          style="font-size:20px;fill:#000000;fill-opacity:0.29128238"
813 |          id="flowPara6162">form 3</flowPara></flowRoot>    <flowRoot
814 |        xml:space="preserve"
815 |        id="flowRoot6164"
816 |        style="font-size:40px;font-style:normal;font-weight:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#000000;fill-opacity:0.29128238;stroke:none;font-family:Sans"
817 |        transform="translate(243.9313,-58.710688)"
818 |        inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
819 |        inkscape:export-xdpi="100"
820 |        inkscape:export-ydpi="100"><flowRegion
821 |          id="flowRegion6166"><rect
822 |            id="rect6168"
823 |            width="387.84677"
824 |            height="289.38412"
825 |            x="18.011459"
826 |            y="23.59853"
827 |            style="text-align:end;text-anchor:end;fill:#000000;fill-opacity:0.29128238" /></flowRegion><flowPara
828 |          id="flowPara6170"
829 |          style="font-size:20px;fill:#000000;fill-opacity:0.29128238">form 4</flowPara></flowRoot>    <flowRoot
830 |        inkscape:export-ydpi="100"
831 |        inkscape:export-xdpi="100"
832 |        inkscape:export-filename="C:\Users\daniel\Google Drive\Python\OSS forks\accumarray\diagrams\aggregate_dims.png"
833 |        transform="translate(243.9313,199.3676)"
834 |        style="font-size:40px;font-style:normal;font-weight:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#000000;fill-opacity:0.29128238;stroke:none;font-family:Sans"
835 |        id="flowRoot6172"
836 |        xml:space="preserve"><flowRegion
837 |          id="flowRegion6174"><rect
838 |            style="text-align:end;text-anchor:end;fill:#000000;fill-opacity:0.29128238"
839 |            y="23.59853"
840 |            x="18.011459"
841 |            height="289.38412"
842 |            width="387.84677"
843 |            id="rect6176" /></flowRegion><flowPara
844 |          style="font-size:20px;fill:#000000;fill-opacity:0.29128238"
845 |          id="flowPara6178">form 5</flowPara></flowRoot>    <text
846 |        xml:space="preserve"
847 |        style="font-size:17.41650963px;font-style:normal;font-weight:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#3100ff;fill-opacity:1;stroke:none;font-family:Sans"
848 |        x="129.75897"
849 |        y="176.0551"
850 |        id="text6253"
851 |        sodipodi:linespacing="125%"><tspan
852 |          sodipodi:role="line"
853 |          x="129.75897"
854 |          y="176.0551"
855 |          id="tspan6257"
856 |          style="font-size:14px">note ndim(a) can be &gt;2</tspan></text>
857 |     <text
858 |        sodipodi:linespacing="125%"
859 |        id="text6261"
860 |        y="190.5551"
861 |        x="120.75897"
862 |        style="font-size:17.41650963px;font-style:normal;font-weight:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#31ff4d;fill-opacity:1;stroke:none;font-family:Sans"
863 |        xml:space="preserve"><tspan
864 |          style="font-size:14px;fill:#31ff4d;fill-opacity:1"
865 |          id="tspan6263"
866 |          y="190.5551"
867 |          x="120.75897"
868 |          sodipodi:role="line">and ndim(result) = ndim(a)</tspan></text>
869 |   </g>
870 | </svg>
871 | 


--------------------------------------------------------------------------------
/diagrams/diagram.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ml31415/numpy-groupies/1dd8c3dfcc55235ae3753ce0db264da53bae4a80/diagrams/diagram.docx


--------------------------------------------------------------------------------
/diagrams/label_contiguous_1d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ml31415/numpy-groupies/1dd8c3dfcc55235ae3753ce0db264da53bae4a80/diagrams/label_contiguous_1d.png


--------------------------------------------------------------------------------
/diagrams/multi_arange.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ml31415/numpy-groupies/1dd8c3dfcc55235ae3753ce0db264da53bae4a80/diagrams/multi_arange.png


--------------------------------------------------------------------------------
/diagrams/multi_cumsum.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ml31415/numpy-groupies/1dd8c3dfcc55235ae3753ce0db264da53bae4a80/diagrams/multi_cumsum.png


--------------------------------------------------------------------------------
/numpy_groupies/__init__.py:
--------------------------------------------------------------------------------
 1 | from .aggregate_purepy import aggregate as aggregate_py
 2 | 
 3 | 
 4 | def dummy_no_impl(*args, **kwargs):
 5 |     raise NotImplementedError(
 6 |         "You may need to install another package (numpy or numba) to access a working implementation."
 7 |     )
 8 | 
 9 | 
10 | aggregate = aggregate_py
11 | 
12 | try:
13 |     import numpy as np
14 | except ImportError:
15 |     aggregate_np = aggregate_ufunc = dummy_no_impl
16 |     multi_arange = multi_cumsum = label_contiguous_1d = dummy_no_impl
17 | else:
18 |     from .aggregate_numpy import aggregate
19 | 
20 |     aggregate_np = aggregate
21 |     from .aggregate_numpy_ufunc import aggregate as aggregate_ufunc
22 |     from .utils import (
23 |         label_contiguous_1d,
24 |         multi_arange,
25 |         relabel_groups_masked,
26 |         relabel_groups_unique,
27 |         unpack,
28 |     )
29 | 
30 | 
31 | try:
32 |     import numba
33 | except ImportError:
34 |     aggregate_nb = None
35 | else:
36 |     from .aggregate_numba import aggregate as aggregate_nb
37 |     from .aggregate_numba import step_count, step_indices
38 | 
39 |     aggregate = aggregate_nb
40 | 
41 | 
42 | def uaggregate(group_idx, a, **kwargs):
43 |     return unpack(group_idx, aggregate(group_idx, a, **kwargs))
44 | 
45 | 
46 | try:
47 |     # Version is added only when packaged
48 |     from ._version import __version__
49 | except ImportError:
50 |     try:
51 |         from setuptools_scm import get_version
52 |     except ImportError:
53 |         __version__ = "0.0.0"
54 |     else:
55 |         __version__ = get_version(root="..", relative_to=__file__)
56 |         del get_version
57 | 


--------------------------------------------------------------------------------
/numpy_groupies/aggregate_numba.py:
--------------------------------------------------------------------------------
  1 | import numba as nb
  2 | import numpy as np
  3 | 
  4 | from .utils import (
  5 |     aggregate_common_doc,
  6 |     aliasing,
  7 |     check_dtype,
  8 |     check_fill_value,
  9 |     funcs_no_separate_nan,
 10 |     get_func,
 11 |     input_validation,
 12 | )
 13 | 
 14 | 
 15 | class AggregateOp(object):
 16 |     """
 17 |     Every subclass of AggregateOp handles a different aggregation operation. There are
 18 |     several private class methods that need to be overwritten by the subclasses
 19 |     in order to implement different functionality.
 20 | 
 21 |     On object instantiation, all necessary static methods are compiled together into
 22 |     two jitted callables, one for scalar arguments, and one for arrays. Calling the
 23 |     instantiated object picks the right cached callable, does some further preprocessing
 24 |     and then executes the actual aggregation operation.
 25 |     """
 26 | 
 27 |     forced_fill_value = None
 28 |     counter_fill_value = 1
 29 |     counter_dtype = bool
 30 |     mean_fill_value = None
 31 |     mean_dtype = np.float64
 32 |     outer = False
 33 |     reverse = False
 34 |     nans = False
 35 | 
 36 |     def __init__(self, func=None, **kwargs):
 37 |         if func is None:
 38 |             func = type(self).__name__.lower()
 39 |         self.func = func
 40 |         self.__dict__.update(kwargs)
 41 |         # Cache the compiled functions, so they don't have to be recompiled on every call
 42 |         self._jit_scalar = self.callable(self.nans, self.reverse, scalar=True)
 43 |         self._jit_non_scalar = self.callable(self.nans, self.reverse, scalar=False)
 44 | 
 45 |     def __call__(
 46 |         self,
 47 |         group_idx,
 48 |         a,
 49 |         size=None,
 50 |         fill_value=0,
 51 |         order="C",
 52 |         dtype=None,
 53 |         axis=None,
 54 |         ddof=0,
 55 |     ):
 56 |         iv = input_validation(
 57 |             group_idx,
 58 |             a,
 59 |             size=size,
 60 |             order=order,
 61 |             axis=axis,
 62 |             check_bounds=False,
 63 |             func=self.func,
 64 |         )
 65 |         group_idx, a, flat_size, ndim_idx, size, unravel_shape = iv
 66 | 
 67 |         # TODO: The typecheck should be done by the class itself, not by check_dtype
 68 |         dtype = check_dtype(dtype, self.func, a, len(group_idx))
 69 |         check_fill_value(fill_value, dtype, func=self.func)
 70 |         input_dtype = type(a) if np.isscalar(a) else a.dtype
 71 |         ret, counter, mean, outer = self._initialize(
 72 |             flat_size, fill_value, dtype, input_dtype, group_idx.size
 73 |         )
 74 |         group_idx = np.ascontiguousarray(group_idx)
 75 | 
 76 |         if not np.isscalar(a):
 77 |             a = np.ascontiguousarray(a)
 78 |             jitfunc = self._jit_non_scalar
 79 |         else:
 80 |             jitfunc = self._jit_scalar
 81 |         jitfunc(group_idx, a, ret, counter, mean, outer, fill_value, ddof)
 82 |         self._finalize(ret, counter, fill_value)
 83 | 
 84 |         if self.outer:
 85 |             ret = outer
 86 | 
 87 |         # Deal with ndimensional indexing
 88 |         if ndim_idx > 1:
 89 |             if unravel_shape is not None:
 90 |                 # argreductions only
 91 |                 mask = ret == fill_value
 92 |                 ret[mask] = 0
 93 |                 ret = np.unravel_index(ret, unravel_shape)[axis]
 94 |                 ret[mask] = fill_value
 95 |             ret = ret.reshape(size, order=order)
 96 |         return ret
 97 | 
 98 |     @classmethod
 99 |     def _initialize(cls, flat_size, fill_value, dtype, input_dtype, input_size):
100 |         if cls.forced_fill_value is None:
101 |             ret = np.full(flat_size, fill_value, dtype=dtype)
102 |         else:
103 |             ret = np.full(flat_size, cls.forced_fill_value, dtype=dtype)
104 | 
105 |         counter = mean = outer = None
106 |         if cls.counter_fill_value is not None:
107 |             counter = np.full_like(ret, cls.counter_fill_value, dtype=cls.counter_dtype)
108 |         if cls.mean_fill_value is not None:
109 |             dtype = cls.mean_dtype if cls.mean_dtype else input_dtype
110 |             mean = np.full_like(ret, cls.mean_fill_value, dtype=dtype)
111 |         if cls.outer:
112 |             outer = np.full(input_size, fill_value, dtype=dtype)
113 | 
114 |         return ret, counter, mean, outer
115 | 
116 |     @classmethod
117 |     def _finalize(cls, ret, counter, fill_value):
118 |         if cls.forced_fill_value is not None and fill_value != cls.forced_fill_value:
119 |             if cls.counter_dtype == bool:
120 |                 ret[counter] = fill_value
121 |             else:
122 |                 ret[~counter.astype(bool)] = fill_value
123 | 
124 |     @classmethod
125 |     def callable(cls, nans=False, reverse=False, scalar=False):
126 |         """Compile a jitted function doing the hard part of the job"""
127 |         _valgetter = cls._valgetter_scalar if scalar else cls._valgetter
128 |         valgetter = nb.njit(_valgetter)
129 |         outersetter = nb.njit(cls._outersetter)
130 | 
131 |         if not nans:
132 |             inner = nb.njit(cls._inner)
133 |         else:
134 |             cls_inner = nb.njit(cls._inner)
135 |             cls_nan_check = nb.njit(cls._nan_check)
136 | 
137 |             @nb.njit
138 |             def inner(ri, val, ret, counter, mean, fill_value):
139 |                 if not cls_nan_check(val):
140 |                     cls_inner(ri, val, ret, counter, mean, fill_value)
141 | 
142 |         @nb.njit
143 |         def loop(group_idx, a, ret, counter, mean, outer, fill_value, ddof):
144 |             # ddof needs to be present for being exchangeable with loop_2pass
145 |             size = len(ret)
146 |             rng = (
147 |                 range(len(group_idx) - 1, -1, -1) if reverse else range(len(group_idx))
148 |             )
149 |             for i in rng:
150 |                 ri = group_idx[i]
151 |                 if ri < 0:
152 |                     raise ValueError("negative indices not supported")
153 |                 if ri >= size:
154 |                     raise ValueError("one or more indices in group_idx are too large")
155 |                 val = valgetter(a, i)
156 |                 inner(ri, val, ret, counter, mean, fill_value)
157 |                 outersetter(outer, i, ret[ri])
158 | 
159 |         return loop
160 | 
161 |     @staticmethod
162 |     def _valgetter(a, i):
163 |         return a[i]
164 | 
165 |     @staticmethod
166 |     def _valgetter_scalar(a, i):
167 |         return a
168 | 
169 |     @staticmethod
170 |     def _nan_check(val):
171 |         return val != val
172 | 
173 |     @staticmethod
174 |     def _inner(ri, val, ret, counter, mean, fill_value):
175 |         raise NotImplementedError("subclasses need to overwrite _inner")
176 | 
177 |     @staticmethod
178 |     def _outersetter(outer, i, val):
179 |         pass
180 | 
181 | 
182 | class Aggregate2pass(AggregateOp):
183 |     """Base class for everything that needs to process the data twice like mean, var and std."""
184 | 
185 |     @classmethod
186 |     def callable(cls, nans=False, reverse=False, scalar=False):
187 |         # Careful, cls needs to be passed, so that the overwritten methods remain available in
188 |         # AggregateOp.callable
189 |         loop_1st = super().callable(nans=nans, reverse=reverse, scalar=scalar)
190 | 
191 |         _2pass_inner = nb.njit(cls._2pass_inner)
192 | 
193 |         @nb.njit
194 |         def loop_2nd(ret, counter, mean, fill_value, ddof):
195 |             for ri in range(len(ret)):
196 |                 if counter[ri] > ddof:
197 |                     ret[ri] = _2pass_inner(ri, ret, counter, mean, ddof)
198 |                 else:
199 |                     ret[ri] = fill_value
200 | 
201 |         @nb.njit
202 |         def loop_2pass(group_idx, a, ret, counter, mean, outer, fill_value, ddof):
203 |             loop_1st(group_idx, a, ret, counter, mean, outer, fill_value, ddof)
204 |             loop_2nd(ret, counter, mean, fill_value, ddof)
205 | 
206 |         return loop_2pass
207 | 
208 |     @staticmethod
209 |     def _2pass_inner(ri, ret, counter, mean, ddof):
210 |         raise NotImplementedError("subclasses need to overwrite _2pass_inner")
211 | 
212 |     @classmethod
213 |     def _finalize(cls, ret, counter, fill_value):
214 |         """Copying the fill value is already done in the 2nd pass"""
215 |         pass
216 | 
217 | 
218 | class AggregateNtoN(AggregateOp):
219 |     """Base class for cumulative functions, where the output size matches the input size."""
220 | 
221 |     outer = True
222 | 
223 |     @staticmethod
224 |     def _outersetter(outer, i, val):
225 |         outer[i] = val
226 | 
227 | 
228 | class AggregateGeneric(AggregateOp):
229 |     """Base class for jitting arbitrary functions."""
230 | 
231 |     counter_fill_value = None
232 | 
233 |     def __init__(self, func, **kwargs):
234 |         self.func = func
235 |         self.__dict__.update(kwargs)
236 |         self._jitfunc = self.callable(self.nans)
237 | 
238 |     def __call__(
239 |         self,
240 |         group_idx,
241 |         a,
242 |         size=None,
243 |         fill_value=0,
244 |         order="C",
245 |         dtype=None,
246 |         axis=None,
247 |         ddof=0,
248 |     ):
249 |         iv = input_validation(
250 |             group_idx, a, size=size, order=order, axis=axis, check_bounds=False
251 |         )
252 |         group_idx, a, flat_size, ndim_idx, size, _ = iv
253 | 
254 |         # TODO: The typecheck should be done by the class itself, not by check_dtype
255 |         dtype = check_dtype(dtype, self.func, a, len(group_idx))
256 |         check_fill_value(fill_value, dtype, func=self.func)
257 |         input_dtype = type(a) if np.isscalar(a) else a.dtype
258 |         ret, _, _, _ = self._initialize(
259 |             flat_size, fill_value, dtype, input_dtype, group_idx.size
260 |         )
261 |         group_idx = np.ascontiguousarray(group_idx)
262 | 
263 |         sortidx = np.argsort(group_idx, kind="mergesort")
264 |         self._jitfunc(sortidx, group_idx, a, ret)
265 | 
266 |         # Deal with ndimensional indexing
267 |         if ndim_idx > 1:
268 |             ret = ret.reshape(size, order=order)
269 |         return ret
270 | 
271 |     def callable(self, nans=False):
272 |         """Compile a jitted function and loop it over the sorted data."""
273 |         func = nb.njit(self.func)
274 | 
275 |         @nb.njit
276 |         def loop(sortidx, group_idx, a, ret):
277 |             size = len(ret)
278 |             group_idx_srt = group_idx[sortidx]
279 |             a_srt = a[sortidx]
280 | 
281 |             indices = step_indices(group_idx_srt)
282 |             for i in range(len(indices) - 1):
283 |                 start_idx, stop_idx = indices[i], indices[i + 1]
284 |                 ri = group_idx_srt[start_idx]
285 |                 if ri < 0:
286 |                     raise ValueError("negative indices not supported")
287 |                 if ri >= size:
288 |                     raise ValueError("one or more indices in group_idx are too large")
289 |                 ret[ri] = func(a_srt[start_idx:stop_idx])
290 | 
291 |         return loop
292 | 
293 | 
294 | class Sum(AggregateOp):
295 |     forced_fill_value = 0
296 | 
297 |     @staticmethod
298 |     def _inner(ri, val, ret, counter, mean, fill_value):
299 |         counter[ri] = 0
300 |         ret[ri] += val
301 | 
302 | 
303 | class Prod(AggregateOp):
304 |     forced_fill_value = 1
305 | 
306 |     @staticmethod
307 |     def _inner(ri, val, ret, counter, mean, fill_value):
308 |         counter[ri] = 0
309 |         ret[ri] *= val
310 | 
311 | 
312 | class Len(AggregateOp):
313 |     forced_fill_value = 0
314 | 
315 |     @staticmethod
316 |     def _inner(ri, val, ret, counter, mean, fill_value):
317 |         counter[ri] = 0
318 |         ret[ri] += 1
319 | 
320 | 
321 | class All(AggregateOp):
322 |     forced_fill_value = 1
323 | 
324 |     @staticmethod
325 |     def _inner(ri, val, ret, counter, mean, fill_value):
326 |         counter[ri] = 0
327 |         ret[ri] &= bool(val)
328 | 
329 | 
330 | class Any(AggregateOp):
331 |     forced_fill_value = 0
332 | 
333 |     @staticmethod
334 |     def _inner(ri, val, ret, counter, mean, fill_value):
335 |         counter[ri] = 0
336 |         ret[ri] |= bool(val)
337 | 
338 | 
339 | class Last(AggregateOp):
340 |     counter_fill_value = None
341 | 
342 |     @staticmethod
343 |     def _inner(ri, val, ret, counter, mean, fill_value):
344 |         ret[ri] = val
345 | 
346 | 
347 | class First(Last):
348 |     reverse = True
349 | 
350 | 
351 | class AllNan(AggregateOp):
352 |     forced_fill_value = 1
353 | 
354 |     @staticmethod
355 |     def _inner(ri, val, ret, counter, mean, fill_value):
356 |         counter[ri] = 0
357 |         ret[ri] &= val != val
358 | 
359 | 
360 | class AnyNan(AggregateOp):
361 |     forced_fill_value = 0
362 | 
363 |     @staticmethod
364 |     def _inner(ri, val, ret, counter, mean, fill_value):
365 |         counter[ri] = 0
366 |         ret[ri] |= val != val
367 | 
368 | 
369 | class Max(AggregateOp):
370 |     @staticmethod
371 |     def _inner(ri, val, ret, counter, mean, fill_value):
372 |         if counter[ri]:
373 |             ret[ri] = val
374 |             counter[ri] = 0
375 |         elif ret[ri] < val:
376 |             ret[ri] = val
377 | 
378 | 
379 | class Min(AggregateOp):
380 |     @staticmethod
381 |     def _inner(ri, val, ret, counter, mean, fill_value):
382 |         if counter[ri]:
383 |             ret[ri] = val
384 |             counter[ri] = 0
385 |         elif ret[ri] > val:
386 |             ret[ri] = val
387 | 
388 | 
389 | class ArgMax(AggregateOp):
390 |     mean_fill_value = np.nan
391 | 
392 |     @staticmethod
393 |     def _valgetter(a, i):
394 |         return a[i], i
395 | 
396 |     @staticmethod
397 |     def _nan_check(val):
398 |         return val[0] != val[0]
399 | 
400 |     @staticmethod
401 |     def _inner(ri, val, ret, counter, mean, fill_value):
402 |         cmp_val, arg = val
403 |         if counter[ri]:
404 |             # start of a new group
405 |             counter[ri] = 0
406 |             mean[ri] = cmp_val
407 |             if cmp_val == cmp_val:
408 |                 # Don't point on nans
409 |                 ret[ri] = arg
410 |         elif mean[ri] < cmp_val:
411 |             # larger valid value found
412 |             mean[ri] = cmp_val
413 |             ret[ri] = arg
414 |         elif cmp_val != cmp_val:
415 |             # nan found, reset group
416 |             mean[ri] = cmp_val
417 |             ret[ri] = fill_value
418 | 
419 | 
420 | class ArgMin(ArgMax):
421 |     @staticmethod
422 |     def _inner(ri, val, ret, counter, mean, fill_value):
423 |         cmp_val, arg = val
424 |         if counter[ri]:
425 |             # start of a new group
426 |             counter[ri] = 0
427 |             mean[ri] = cmp_val
428 |             if cmp_val == cmp_val:
429 |                 # Don't point on nans
430 |                 ret[ri] = arg
431 |         elif mean[ri] > cmp_val:
432 |             # larger valid value found
433 |             mean[ri] = cmp_val
434 |             ret[ri] = arg
435 |         elif cmp_val != cmp_val:
436 |             # nan found, reset group
437 |             mean[ri] = cmp_val
438 |             ret[ri] = fill_value
439 | 
440 | 
441 | class SumOfSquares(AggregateOp):
442 |     forced_fill_value = 0
443 | 
444 |     @staticmethod
445 |     def _inner(ri, val, ret, counter, mean, fill_value):
446 |         counter[ri] = 0
447 |         ret[ri] += val * val
448 | 
449 | 
450 | class Mean(Aggregate2pass):
451 |     forced_fill_value = 0
452 |     counter_fill_value = 0
453 |     counter_dtype = int
454 | 
455 |     @staticmethod
456 |     def _inner(ri, val, ret, counter, mean, fill_value):
457 |         counter[ri] += 1
458 |         ret[ri] += val
459 | 
460 |     @staticmethod
461 |     def _2pass_inner(ri, ret, counter, mean, ddof):
462 |         return ret[ri] / counter[ri]
463 | 
464 | 
465 | class Std(Mean):
466 |     mean_fill_value = 0
467 | 
468 |     @staticmethod
469 |     def _inner(ri, val, ret, counter, mean, fill_value):
470 |         counter[ri] += 1
471 |         mean[ri] += val
472 |         ret[ri] += val * val
473 | 
474 |     @staticmethod
475 |     def _2pass_inner(ri, ret, counter, mean, ddof):
476 |         mean2 = mean[ri] * mean[ri]
477 |         return np.sqrt((ret[ri] - mean2 / counter[ri]) / (counter[ri] - ddof))
478 | 
479 | 
480 | class Var(Std):
481 |     @staticmethod
482 |     def _2pass_inner(ri, ret, counter, mean, ddof):
483 |         mean2 = mean[ri] * mean[ri]
484 |         return (ret[ri] - mean2 / counter[ri]) / (counter[ri] - ddof)
485 | 
486 | 
487 | class CumSum(AggregateNtoN, Sum):
488 |     pass
489 | 
490 | 
491 | class CumProd(AggregateNtoN, Prod):
492 |     pass
493 | 
494 | 
495 | class CumMax(AggregateNtoN, Max):
496 |     pass
497 | 
498 | 
499 | class CumMin(AggregateNtoN, Min):
500 |     pass
501 | 
502 | 
503 | def get_funcs():
504 |     funcs = {}
505 |     for op in (
506 |         Sum,
507 |         Prod,
508 |         Len,
509 |         All,
510 |         Any,
511 |         Last,
512 |         First,
513 |         AllNan,
514 |         AnyNan,
515 |         Min,
516 |         Max,
517 |         ArgMin,
518 |         ArgMax,
519 |         Mean,
520 |         Std,
521 |         Var,
522 |         SumOfSquares,
523 |         CumSum,
524 |         CumProd,
525 |         CumMax,
526 |         CumMin,
527 |     ):
528 |         funcname = op.__name__.lower()
529 |         funcs[funcname] = op(funcname)
530 |         if funcname not in funcs_no_separate_nan:
531 |             funcname = "nan" + funcname
532 |             funcs[funcname] = op(funcname, nans=True)
533 |     return funcs
534 | 
535 | 
536 | _impl_dict = get_funcs()
537 | _default_cache = {}
538 | 
539 | 
540 | def aggregate(
541 |     group_idx,
542 |     a,
543 |     func="sum",
544 |     size=None,
545 |     fill_value=0,
546 |     order="C",
547 |     dtype=None,
548 |     axis=None,
549 |     cache=True,
550 |     **kwargs,
551 | ):
552 |     func = get_func(func, aliasing, _impl_dict)
553 |     if not isinstance(func, str):
554 |         if cache in (None, False):
555 |             # Keep None and False in order to accept empty dictionaries
556 |             aggregate_op = AggregateGeneric(func)
557 |         else:
558 |             if cache is True:
559 |                 cache = _default_cache
560 |             aggregate_op = cache.setdefault(func, AggregateGeneric(func))
561 |         return aggregate_op(
562 |             group_idx, a, size, fill_value, order, dtype, axis, **kwargs
563 |         )
564 |     else:
565 |         func = _impl_dict[func]
566 |         return func(group_idx, a, size, fill_value, order, dtype, axis, **kwargs)
567 | 
568 | 
569 | aggregate.__doc__ = (
570 |     """
571 |     This is the numba implementation of aggregate.
572 |     """
573 |     + aggregate_common_doc
574 | )
575 | 
576 | 
577 | @nb.njit
578 | def step_count(group_idx):
579 |     """Return the amount of index changes within group_idx."""
580 |     cmp_pos = 0
581 |     steps = 1
582 |     if len(group_idx) < 1:
583 |         return 0
584 |     for i in range(len(group_idx)):
585 |         if group_idx[cmp_pos] != group_idx[i]:
586 |             cmp_pos = i
587 |             steps += 1
588 |     return steps
589 | 
590 | 
591 | @nb.njit
592 | def step_indices(group_idx):
593 |     """Return the edges of areas within group_idx, which are filled with the same value."""
594 |     ilen = step_count(group_idx) + 1
595 |     indices = np.empty(ilen, np.int64)
596 |     indices[0] = 0
597 |     indices[-1] = group_idx.size
598 |     cmp_pos = 0
599 |     ri = 1
600 |     for i in range(len(group_idx)):
601 |         if group_idx[cmp_pos] != group_idx[i]:
602 |             cmp_pos = i
603 |             indices[ri] = i
604 |             ri += 1
605 |     return indices
606 | 


--------------------------------------------------------------------------------
/numpy_groupies/aggregate_numpy.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from .utils import (
  4 |     aggregate_common_doc,
  5 |     aliasing,
  6 |     check_boolean,
  7 |     check_dtype,
  8 |     check_fill_value,
  9 |     funcs_no_separate_nan,
 10 |     get_func,
 11 |     input_validation,
 12 |     iscomplexobj,
 13 |     maxval,
 14 |     minimum_dtype,
 15 |     minimum_dtype_scalar,
 16 |     minval,
 17 | )
 18 | 
 19 | 
 20 | def _sum(group_idx, a, size, fill_value, dtype=None):
 21 |     dtype = minimum_dtype_scalar(fill_value, dtype, a)
 22 | 
 23 |     if np.ndim(a) == 0:
 24 |         ret = np.bincount(group_idx, minlength=size).astype(dtype, copy=False)
 25 |         if a != 1:
 26 |             ret *= a
 27 |     else:
 28 |         if iscomplexobj(a):
 29 |             ret = np.empty(size, dtype=dtype)
 30 |             ret.real = np.bincount(group_idx, weights=a.real, minlength=size)
 31 |             ret.imag = np.bincount(group_idx, weights=a.imag, minlength=size)
 32 |         else:
 33 |             ret = np.bincount(group_idx, weights=a, minlength=size).astype(
 34 |                 dtype, copy=False
 35 |             )
 36 | 
 37 |     if fill_value != 0:
 38 |         _fill_untouched(group_idx, ret, fill_value)
 39 |     return ret
 40 | 
 41 | 
 42 | def _prod(group_idx, a, size, fill_value, dtype=None):
 43 |     dtype = minimum_dtype_scalar(fill_value, dtype, a)
 44 |     ret = np.full(size, fill_value, dtype=dtype)
 45 |     if fill_value != 1:
 46 |         ret[group_idx] = 1  # product starts from 1
 47 |     np.multiply.at(ret, group_idx, a)
 48 |     return ret
 49 | 
 50 | 
 51 | def _len(group_idx, a, size, fill_value, dtype=None):
 52 |     return _sum(group_idx, 1, size, fill_value, dtype=int)
 53 | 
 54 | 
 55 | def _last(group_idx, a, size, fill_value, dtype=None):
 56 |     dtype = minimum_dtype(fill_value, dtype or a.dtype)
 57 |     ret = np.full(size, fill_value, dtype=dtype)
 58 |     # repeated indexing gives last value, see:
 59 |     # the phrase "leaving behind the last value"  on this page:
 60 |     # http://wiki.scipy.org/Tentative_NumPy_Tutorial
 61 |     ret[group_idx] = a
 62 |     return ret
 63 | 
 64 | 
 65 | def _first(group_idx, a, size, fill_value, dtype=None):
 66 |     dtype = minimum_dtype(fill_value, dtype or a.dtype)
 67 |     ret = np.full(size, fill_value, dtype=dtype)
 68 |     ret[group_idx[::-1]] = a[::-1]  # same trick as _last, but in reverse
 69 |     return ret
 70 | 
 71 | 
 72 | def _all(group_idx, a, size, fill_value, dtype=None):
 73 |     check_boolean(fill_value)
 74 |     ret = np.full(size, fill_value, dtype=bool)
 75 |     if not fill_value:
 76 |         ret[group_idx] = True
 77 |     ret[group_idx.compress(np.logical_not(a))] = False
 78 |     return ret
 79 | 
 80 | 
 81 | def _any(group_idx, a, size, fill_value, dtype=None):
 82 |     check_boolean(fill_value)
 83 |     ret = np.full(size, fill_value, dtype=bool)
 84 |     if fill_value:
 85 |         ret[group_idx] = False
 86 |     ret[group_idx.compress(a)] = True
 87 |     return ret
 88 | 
 89 | 
 90 | def _min(group_idx, a, size, fill_value, dtype=None):
 91 |     dtype = minimum_dtype(fill_value, dtype or a.dtype)
 92 |     dmax = maxval(fill_value, dtype)
 93 |     with np.errstate(invalid="ignore"):
 94 |         ret = np.full(size, fill_value, dtype=dtype)
 95 |     if fill_value != dmax:
 96 |         ret[group_idx] = dmax  # min starts from maximum
 97 |     with np.errstate(invalid="ignore"):
 98 |         np.minimum.at(ret, group_idx, a)
 99 |     return ret
100 | 
101 | 
102 | def _max(group_idx, a, size, fill_value, dtype=None):
103 |     dtype = minimum_dtype(fill_value, dtype or a.dtype)
104 |     dmin = minval(fill_value, dtype)
105 |     with np.errstate(invalid="ignore"):
106 |         ret = np.full(size, fill_value, dtype=dtype)
107 |     if fill_value != dmin:
108 |         ret[group_idx] = dmin  # max starts from minimum
109 |     with np.errstate(invalid="ignore"):
110 |         np.maximum.at(ret, group_idx, a)
111 |     return ret
112 | 
113 | 
114 | def _argmax(group_idx, a, size, fill_value, dtype=int, _nansqueeze=False):
115 |     a_ = np.where(np.isnan(a), -np.inf, a) if _nansqueeze else a
116 |     group_max = _max(group_idx, a_, size, np.nan)
117 |     # nan should never be maximum, so use a and not a_
118 |     is_max = a == group_max[group_idx]
119 |     ret = np.full(size, fill_value, dtype=dtype)
120 |     group_idx_max = group_idx[is_max]
121 |     (argmax,) = is_max.nonzero()
122 |     ret[group_idx_max[::-1]] = argmax[
123 |         ::-1
124 |     ]  # reverse to ensure first value for each group wins
125 |     return ret
126 | 
127 | 
128 | def _argmin(group_idx, a, size, fill_value, dtype=int, _nansqueeze=False):
129 |     a_ = np.where(np.isnan(a), np.inf, a) if _nansqueeze else a
130 |     group_min = _min(group_idx, a_, size, np.nan)
131 |     # nan should never be minimum, so use a and not a_
132 |     is_min = a == group_min[group_idx]
133 |     ret = np.full(size, fill_value, dtype=dtype)
134 |     group_idx_min = group_idx[is_min]
135 |     (argmin,) = is_min.nonzero()
136 |     ret[group_idx_min[::-1]] = argmin[
137 |         ::-1
138 |     ]  # reverse to ensure first value for each group wins
139 |     return ret
140 | 
141 | 
142 | def _mean(group_idx, a, size, fill_value, dtype=np.dtype(np.float64)):
143 |     if np.ndim(a) == 0:
144 |         raise ValueError("cannot take mean with scalar a")
145 |     counts = np.bincount(group_idx, minlength=size)
146 |     if iscomplexobj(a):
147 |         dtype = a.dtype  # TODO: this is a bit clumsy
148 |         sums = np.empty(size, dtype=dtype)
149 |         sums.real = np.bincount(group_idx, weights=a.real, minlength=size)
150 |         sums.imag = np.bincount(group_idx, weights=a.imag, minlength=size)
151 |     else:
152 |         sums = np.bincount(group_idx, weights=a, minlength=size)
153 | 
154 |     with np.errstate(divide="ignore", invalid="ignore"):
155 |         ret = sums / counts
156 |     if not np.isnan(fill_value):
157 |         ret[counts == 0] = fill_value
158 |     if iscomplexobj(a):
159 |         return ret
160 |     else:
161 |         return ret.astype(dtype, copy=False)
162 | 
163 | 
164 | def _sum_of_squres(group_idx, a, size, fill_value, dtype=np.dtype(np.float64)):
165 |     ret = np.bincount(group_idx, weights=a * a, minlength=size)
166 |     if fill_value != 0:
167 |         counts = np.bincount(group_idx, minlength=size)
168 |         ret[counts == 0] = fill_value
169 |     if iscomplexobj(a):
170 |         return ret
171 |     else:
172 |         return ret.astype(dtype, copy=False)
173 | 
174 | 
175 | def _var(
176 |     group_idx, a, size, fill_value, dtype=np.dtype(np.float64), sqrt=False, ddof=0
177 | ):
178 |     if np.ndim(a) == 0:
179 |         raise ValueError("cannot take variance with scalar a")
180 |     counts = np.bincount(group_idx, minlength=size)
181 |     sums = np.bincount(group_idx, weights=a, minlength=size)
182 |     with np.errstate(divide="ignore", invalid="ignore"):
183 |         means = sums / counts
184 |         counts = np.where(counts > ddof, counts - ddof, 0)
185 |         ret = (
186 |             np.bincount(group_idx, (a - means[group_idx]) ** 2, minlength=size) / counts
187 |         )
188 |     if sqrt:
189 |         ret = np.sqrt(ret)  # this is now std not var
190 |     if not np.isnan(fill_value):
191 |         ret[counts == 0] = fill_value
192 |     if iscomplexobj(a):
193 |         return ret
194 |     else:
195 |         return ret.astype(dtype, copy=False)
196 | 
197 | 
198 | def _std(group_idx, a, size, fill_value, dtype=np.dtype(np.float64), ddof=0):
199 |     return _var(group_idx, a, size, fill_value, dtype=dtype, sqrt=True, ddof=ddof)
200 | 
201 | 
202 | def _allnan(group_idx, a, size, fill_value, dtype=bool):
203 |     return _all(group_idx, np.isnan(a), size, fill_value=fill_value, dtype=dtype)
204 | 
205 | 
206 | def _anynan(group_idx, a, size, fill_value, dtype=bool):
207 |     return _any(group_idx, np.isnan(a), size, fill_value=fill_value, dtype=dtype)
208 | 
209 | 
210 | def _sort(group_idx, a, size=None, fill_value=None, dtype=None, reverse=False):
211 |     sortidx = np.lexsort((-a if reverse else a, group_idx))
212 |     # Reverse sorting back to into grouped order, but preserving groupwise sorting
213 |     revidx = np.argsort(np.argsort(group_idx, kind="mergesort"), kind="mergesort")
214 |     return a[sortidx][revidx]
215 | 
216 | 
217 | def _array(group_idx, a, size, fill_value, dtype=None):
218 |     """groups a into separate arrays, keeping the order intact."""
219 |     if fill_value is not None and not (np.isscalar(fill_value) or len(fill_value) == 0):
220 |         raise ValueError("fill_value must be None, a scalar or an empty sequence")
221 |     order_group_idx = np.argsort(group_idx, kind="mergesort")
222 |     counts = np.bincount(group_idx, minlength=size)
223 |     ret = np.split(a[order_group_idx], np.cumsum(counts)[:-1])
224 |     ret = np.asanyarray(ret, dtype="object")
225 |     if fill_value is None or np.isscalar(fill_value):
226 |         _fill_untouched(group_idx, ret, fill_value)
227 |     return ret
228 | 
229 | 
230 | def _generic_callable(
231 |     group_idx, a, size, fill_value, dtype=None, func=lambda g: g, **kwargs
232 | ):
233 |     """groups a by inds, and then applies foo to each group in turn, placing
234 |     the results in an array."""
235 |     groups = _array(group_idx, a, size, ())
236 |     ret = np.full(size, fill_value, dtype=dtype or np.float64)
237 | 
238 |     for i, grp in enumerate(groups):
239 |         if np.ndim(grp) == 1 and len(grp) > 0:
240 |             ret[i] = func(grp)
241 |     return ret
242 | 
243 | 
244 | def _cumsum(group_idx, a, size, fill_value=None, dtype=None):
245 |     """
246 |     N to N aggregate operation of cumsum. Perform cumulative sum for each group.
247 | 
248 |     group_idx = np.array([4, 3, 3, 4, 4, 1, 1, 1, 7, 8, 7, 4, 3, 3, 1, 1])
249 |     a = np.array([3, 4, 1, 3, 9, 9, 6, 7, 7, 0, 8, 2, 1, 8, 9, 8])
250 |     _cumsum(group_idx, a, np.max(group_idx) + 1)
251 |     >>> array([ 3,  4,  5,  6, 15,  9, 15, 22,  7,  0, 15, 17,  6, 14, 31, 39])
252 |     """
253 |     sortidx = np.argsort(group_idx, kind="mergesort")
254 |     invsortidx = np.argsort(sortidx, kind="mergesort")
255 |     group_idx_srt = group_idx[sortidx]
256 | 
257 |     a_srt = a[sortidx]
258 |     a_srt_cumsum = np.cumsum(a_srt, dtype=dtype)
259 | 
260 |     increasing = np.arange(len(a), dtype=int)
261 |     group_starts = _min(group_idx_srt, increasing, size, fill_value=0)[group_idx_srt]
262 |     # First subtract large numbers
263 |     a_srt_cumsum -= a_srt_cumsum[group_starts]
264 |     # Then add potentially small numbers
265 |     a_srt_cumsum += a_srt[group_starts]
266 |     return a_srt_cumsum[invsortidx]
267 | 
268 | 
269 | def _nancumsum(group_idx, a, size, fill_value=None, dtype=None):
270 |     a_nonans = np.where(np.isnan(a), 0, a)
271 |     group_idx_nonans = np.where(
272 |         np.isnan(group_idx), np.nanmax(group_idx) + 1, group_idx
273 |     )
274 |     return _cumsum(group_idx_nonans, a_nonans, size, fill_value=fill_value, dtype=dtype)
275 | 
276 | 
277 | _impl_dict = dict(
278 |     min=_min,
279 |     max=_max,
280 |     sum=_sum,
281 |     prod=_prod,
282 |     last=_last,
283 |     first=_first,
284 |     all=_all,
285 |     any=_any,
286 |     mean=_mean,
287 |     std=_std,
288 |     var=_var,
289 |     anynan=_anynan,
290 |     allnan=_allnan,
291 |     sort=_sort,
292 |     array=_array,
293 |     argmax=_argmax,
294 |     argmin=_argmin,
295 |     len=_len,
296 |     cumsum=_cumsum,
297 |     sumofsquares=_sum_of_squres,
298 |     generic=_generic_callable,
299 | )
300 | _impl_dict.update(
301 |     ("nan" + k, v)
302 |     for k, v in list(_impl_dict.items())
303 |     if k not in funcs_no_separate_nan
304 | )
305 | _impl_dict["nancumsum"] = _nancumsum
306 | 
307 | 
308 | def _aggregate_base(
309 |     group_idx,
310 |     a,
311 |     func="sum",
312 |     size=None,
313 |     fill_value=0,
314 |     order="C",
315 |     dtype=None,
316 |     axis=None,
317 |     _impl_dict=_impl_dict,
318 |     is_pandas=False,
319 |     **kwargs,
320 | ):
321 |     iv = input_validation(group_idx, a, size=size, order=order, axis=axis, func=func)
322 |     group_idx, a, flat_size, ndim_idx, size, unravel_shape = iv
323 | 
324 |     if group_idx.dtype == np.dtype("uint64"):
325 |         # Force conversion to signed int, to avoid issues with bincount etc later
326 |         group_idx = group_idx.astype(int)
327 | 
328 |     func = get_func(func, aliasing, _impl_dict)
329 |     if not isinstance(func, str):
330 |         # do simple grouping and execute function in loop
331 |         ret = _impl_dict.get("generic", _generic_callable)(
332 |             group_idx, a, flat_size, fill_value, func=func, dtype=dtype, **kwargs
333 |         )
334 |     else:
335 |         # deal with nans and find the function
336 |         if func.startswith("nan"):
337 |             if np.ndim(a) == 0:
338 |                 raise ValueError("nan-version not supported for scalar input.")
339 |             if "nan" in func:
340 |                 if "arg" in func:
341 |                     kwargs["_nansqueeze"] = True
342 |                 elif "cum" in func:
343 |                     pass
344 |                 else:
345 |                     good = ~np.isnan(a)
346 |                     if "len" not in func or is_pandas:
347 |                         # a is not needed for len, nanlen!
348 |                         a = a[good]
349 |                     group_idx = group_idx[good]
350 | 
351 |         dtype = check_dtype(dtype, func, a, flat_size)
352 |         check_fill_value(fill_value, dtype, func=func)
353 |         func = _impl_dict[func]
354 |         ret = func(
355 |             group_idx, a, flat_size, fill_value=fill_value, dtype=dtype, **kwargs
356 |         )
357 | 
358 |     # deal with ndimensional indexing
359 |     if ndim_idx > 1:
360 |         if unravel_shape is not None:
361 |             # A negative fill_value cannot, and should not, be unraveled.
362 |             mask = ret == fill_value
363 |             ret[mask] = 0
364 |             ret = np.unravel_index(ret, unravel_shape)[axis]
365 |             ret[mask] = fill_value
366 |         ret = ret.reshape(size, order=order)
367 |     return ret
368 | 
369 | 
370 | def aggregate(
371 |     group_idx,
372 |     a,
373 |     func="sum",
374 |     size=None,
375 |     fill_value=0,
376 |     order="C",
377 |     dtype=None,
378 |     axis=None,
379 |     **kwargs,
380 | ):
381 |     return _aggregate_base(
382 |         group_idx,
383 |         a,
384 |         size=size,
385 |         fill_value=fill_value,
386 |         order=order,
387 |         dtype=dtype,
388 |         func=func,
389 |         axis=axis,
390 |         _impl_dict=_impl_dict,
391 |         **kwargs,
392 |     )
393 | 
394 | 
395 | aggregate.__doc__ = (
396 |     """
397 |     This is the pure numpy implementation of aggregate.
398 |     """
399 |     + aggregate_common_doc
400 | )
401 | 
402 | 
403 | def _fill_untouched(idx, ret, fill_value):
404 |     """any elements of ret not indexed by idx are set to fill_value."""
405 |     untouched = np.ones_like(ret, dtype=bool)
406 |     untouched[idx] = False
407 |     ret[untouched] = fill_value
408 | 


--------------------------------------------------------------------------------
/numpy_groupies/aggregate_numpy_ufunc.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from .aggregate_numpy import _aggregate_base
  4 | from .utils import (
  5 |     aggregate_common_doc,
  6 |     aliasing,
  7 |     check_boolean,
  8 |     get_func,
  9 |     maxval,
 10 |     minimum_dtype,
 11 |     minimum_dtype_scalar,
 12 |     minval,
 13 | )
 14 | 
 15 | 
 16 | def _anynan(group_idx, a, size, fill_value, dtype=None):
 17 |     return _any(group_idx, np.isnan(a), size, fill_value=fill_value, dtype=dtype)
 18 | 
 19 | 
 20 | def _allnan(group_idx, a, size, fill_value, dtype=None):
 21 |     return _all(group_idx, np.isnan(a), size, fill_value=fill_value, dtype=dtype)
 22 | 
 23 | 
 24 | def _any(group_idx, a, size, fill_value, dtype=None):
 25 |     check_boolean(fill_value)
 26 |     ret = np.full(size, fill_value, dtype=bool)
 27 |     if fill_value:
 28 |         ret[group_idx] = False  # any-test should start from False
 29 |     np.logical_or.at(ret, group_idx, a)
 30 |     return ret
 31 | 
 32 | 
 33 | def _all(group_idx, a, size, fill_value, dtype=None):
 34 |     check_boolean(fill_value)
 35 |     ret = np.full(size, fill_value, dtype=bool)
 36 |     if not fill_value:
 37 |         ret[group_idx] = True  # all-test should start from True
 38 |     np.logical_and.at(ret, group_idx, a)
 39 |     return ret
 40 | 
 41 | 
 42 | def _sum(group_idx, a, size, fill_value, dtype=None):
 43 |     dtype = minimum_dtype_scalar(fill_value, dtype, a)
 44 |     ret = np.full(size, fill_value, dtype=dtype)
 45 |     if fill_value != 0:
 46 |         ret[group_idx] = 0  # sums should start at 0
 47 |     np.add.at(ret, group_idx, a)
 48 |     return ret
 49 | 
 50 | 
 51 | def _len(group_idx, a, size, fill_value, dtype=None):
 52 |     return _sum(group_idx, 1, size, fill_value, dtype=int)
 53 | 
 54 | 
 55 | def _prod(group_idx, a, size, fill_value, dtype=None):
 56 |     """Same as aggregate_numpy.py"""
 57 |     dtype = minimum_dtype_scalar(fill_value, dtype, a)
 58 |     ret = np.full(size, fill_value, dtype=dtype)
 59 |     if fill_value != 1:
 60 |         ret[group_idx] = 1  # product should start from 1
 61 |     np.multiply.at(ret, group_idx, a)
 62 |     return ret
 63 | 
 64 | 
 65 | def _min(group_idx, a, size, fill_value, dtype=None):
 66 |     """Same as aggregate_numpy.py"""
 67 |     dtype = minimum_dtype(fill_value, dtype or a.dtype)
 68 |     dmax = maxval(fill_value, dtype)
 69 |     ret = np.full(size, fill_value, dtype=dtype)
 70 |     if fill_value != dmax:
 71 |         ret[group_idx] = dmax  # min starts from maximum
 72 |     np.minimum.at(ret, group_idx, a)
 73 |     return ret
 74 | 
 75 | 
 76 | def _max(group_idx, a, size, fill_value, dtype=None):
 77 |     """Same as aggregate_numpy.py"""
 78 |     dtype = minimum_dtype(fill_value, dtype or a.dtype)
 79 |     dmin = minval(fill_value, dtype)
 80 |     ret = np.full(size, fill_value, dtype=dtype)
 81 |     if fill_value != dmin:
 82 |         ret[group_idx] = dmin  # max starts from minimum
 83 |     np.maximum.at(ret, group_idx, a)
 84 |     return ret
 85 | 
 86 | 
 87 | _impl_dict = dict(
 88 |     min=_min,
 89 |     max=_max,
 90 |     sum=_sum,
 91 |     prod=_prod,
 92 |     all=_all,
 93 |     any=_any,
 94 |     allnan=_allnan,
 95 |     anynan=_anynan,
 96 |     len=_len,
 97 | )
 98 | 
 99 | 
100 | def aggregate(
101 |     group_idx,
102 |     a,
103 |     func="sum",
104 |     size=None,
105 |     fill_value=0,
106 |     order="C",
107 |     dtype=None,
108 |     axis=None,
109 |     **kwargs,
110 | ):
111 |     func = get_func(func, aliasing, _impl_dict)
112 |     if not isinstance(func, str):
113 |         raise NotImplementedError("No such ufunc available")
114 |     return _aggregate_base(
115 |         group_idx,
116 |         a,
117 |         size=size,
118 |         fill_value=fill_value,
119 |         order=order,
120 |         dtype=dtype,
121 |         func=func,
122 |         axis=axis,
123 |         _impl_dict=_impl_dict,
124 |         **kwargs,
125 |     )
126 | 
127 | 
128 | aggregate.__doc__ = (
129 |     """
130 |     Unlike ``aggregate_numpy``, which in most cases does some custom
131 |     optimisations, this version simply uses ``numpy``'s ``ufunc.at``.
132 | 
133 |     As of version 1.14 this gives fairly poor performance. There should
134 |     normally be no need to use this version, it is intended to be used in
135 |     testing and benchmarking only.
136 |     """
137 |     + aggregate_common_doc
138 | )
139 | 


--------------------------------------------------------------------------------
/numpy_groupies/aggregate_pandas.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | 
 6 | from .aggregate_numpy import _aggregate_base
 7 | from .utils import (
 8 |     aggregate_common_doc,
 9 |     allnan,
10 |     anynan,
11 |     check_dtype,
12 |     funcs_no_separate_nan,
13 | )
14 | 
15 | 
16 | def _wrapper(group_idx, a, size, fill_value, func="sum", dtype=None, ddof=0, **kwargs):
17 |     funcname = func.__name__ if callable(func) else func
18 |     kwargs = {}
19 |     if funcname in ("var", "std"):
20 |         kwargs["ddof"] = ddof
21 |     df = pd.DataFrame({"group_idx": group_idx, "a": a})
22 |     if func == "sort":
23 |         grouped = df.groupby("group_idx", sort=True)
24 |     else:
25 |         grouped = df.groupby("group_idx", sort=False).aggregate(func, **kwargs)
26 | 
27 |     dtype = check_dtype(dtype, getattr(func, "__name__", funcname), a, size)
28 |     if funcname.startswith("cum"):
29 |         ret = grouped.values[:, 0]
30 |     else:
31 |         ret = np.full(size, fill_value, dtype=dtype)
32 |         with np.errstate(invalid="ignore"):
33 |             ret[grouped.index] = grouped.values[:, 0]
34 |     return ret
35 | 
36 | 
37 | _supported_funcs = "sum prod all any min max mean var std first last cumsum cumprod cummax cummin".split()
38 | _impl_dict = {fn: partial(_wrapper, func=fn) for fn in _supported_funcs}
39 | _impl_dict.update(
40 |     ("nan" + fn, partial(_wrapper, func=fn))
41 |     for fn in _supported_funcs
42 |     if fn not in funcs_no_separate_nan
43 | )
44 | _impl_dict.update(
45 |     allnan=partial(_wrapper, func=allnan),
46 |     anynan=partial(_wrapper, func=anynan),
47 |     len=partial(_wrapper, func="count"),
48 |     nanlen=partial(_wrapper, func="count"),
49 |     argmax=partial(_wrapper, func="idxmax"),
50 |     argmin=partial(_wrapper, func="idxmin"),
51 |     nanargmax=partial(_wrapper, func="idxmax"),
52 |     nanargmin=partial(_wrapper, func="idxmin"),
53 |     generic=_wrapper,
54 | )
55 | 
56 | 
57 | def aggregate(
58 |     group_idx,
59 |     a,
60 |     func="sum",
61 |     size=None,
62 |     fill_value=0,
63 |     order="C",
64 |     dtype=None,
65 |     axis=None,
66 |     **kwargs,
67 | ):
68 |     return _aggregate_base(
69 |         group_idx,
70 |         a,
71 |         size=size,
72 |         fill_value=fill_value,
73 |         order=order,
74 |         dtype=dtype,
75 |         func=func,
76 |         axis=axis,
77 |         _impl_dict=_impl_dict,
78 |         is_pandas=True,
79 |         **kwargs,
80 |     )
81 | 
82 | 
83 | aggregate.__doc__ = (
84 |     """
85 |     This is the pandas implementation of aggregate. It makes use of
86 |     `pandas`'s groupby machienery and is mainly used for reference
87 |     and benchmarking.
88 |     """
89 |     + aggregate_common_doc
90 | )
91 | 


--------------------------------------------------------------------------------
/numpy_groupies/aggregate_purepy.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import math
  3 | import operator
  4 | 
  5 | from .utils import aggregate_common_doc
  6 | from .utils import aliasing_py as aliasing
  7 | from .utils import funcs_no_separate_nan, get_func
  8 | 
  9 | # min, max, sum, all, any - builtin
 10 | 
 11 | 
 12 | def _last(x):
 13 |     return x[-1]
 14 | 
 15 | 
 16 | def _first(x):
 17 |     return x[0]
 18 | 
 19 | 
 20 | def _array(x):
 21 |     return x
 22 | 
 23 | 
 24 | def _mean(x):
 25 |     return sum(x) / len(x)
 26 | 
 27 | 
 28 | def _var(x, ddof=0):
 29 |     mean = _mean(x)
 30 |     return sum((xx - mean) ** 2 for xx in x) / (len(x) - ddof)
 31 | 
 32 | 
 33 | def _std(x, ddof=0):
 34 |     return math.sqrt(_var(x, ddof=ddof))
 35 | 
 36 | 
 37 | def _prod(x):
 38 |     r = x[0]
 39 |     for xx in x[1:]:
 40 |         r *= xx
 41 |     return r
 42 | 
 43 | 
 44 | def _anynan(x):
 45 |     return any(math.isnan(xx) for xx in x)
 46 | 
 47 | 
 48 | def _allnan(x):
 49 |     return all(math.isnan(xx) for xx in x)
 50 | 
 51 | 
 52 | def _argmax(x_and_idx):
 53 |     return max(x_and_idx, key=operator.itemgetter(1))[0]
 54 | 
 55 | 
 56 | _argmax.x_and_idx = True  # tell aggregate what to use as first arg
 57 | 
 58 | 
 59 | def _argmin(x_and_idx):
 60 |     return min(x_and_idx, key=operator.itemgetter(1))[0]
 61 | 
 62 | 
 63 | _argmin.x_and_idx = True  # tell aggregate what to use as first arg
 64 | 
 65 | 
 66 | def _sort(group_idx, a, reverse=False):
 67 |     def _argsort(unordered):
 68 |         return sorted(range(len(unordered)), key=lambda k: unordered[k])
 69 | 
 70 |     sortidx = _argsort(
 71 |         list((gi, aj) for gi, aj in zip(group_idx, -a if reverse else a))
 72 |     )
 73 |     revidx = _argsort(_argsort(group_idx))
 74 |     a_srt = [a[si] for si in sortidx]
 75 |     return [a_srt[ri] for ri in revidx]
 76 | 
 77 | 
 78 | _impl_dict = dict(
 79 |     min=min,
 80 |     max=max,
 81 |     sum=sum,
 82 |     prod=_prod,
 83 |     last=_last,
 84 |     first=_first,
 85 |     all=all,
 86 |     any=any,
 87 |     mean=_mean,
 88 |     std=_std,
 89 |     var=_var,
 90 |     anynan=_anynan,
 91 |     allnan=_allnan,
 92 |     sort=_sort,
 93 |     array=_array,
 94 |     argmax=_argmax,
 95 |     argmin=_argmin,
 96 |     len=len,
 97 | )
 98 | _impl_dict.update(
 99 |     ("nan" + k, v)
100 |     for k, v in list(_impl_dict.items())
101 |     if k not in funcs_no_separate_nan
102 | )
103 | 
104 | 
105 | def aggregate(
106 |     group_idx,
107 |     a,
108 |     func="sum",
109 |     size=None,
110 |     fill_value=0,
111 |     order=None,
112 |     dtype=None,
113 |     axis=None,
114 |     **kwargs,
115 | ):
116 |     if axis is not None:
117 |         raise NotImplementedError("axis arg not supported in purepy implementation.")
118 | 
119 |     # Check for 2d group_idx
120 |     if size is None:
121 |         try:
122 |             size = 1 + int(max(group_idx))
123 |         except (TypeError, ValueError):
124 |             raise NotImplementedError(
125 |                 "pure python implementation doesn't accept ndim idx input."
126 |             )
127 | 
128 |     for i in group_idx:
129 |         try:
130 |             i = int(i)
131 |         except (TypeError, ValueError):
132 |             if isinstance(i, (list, tuple)):
133 |                 raise NotImplementedError(
134 |                     "pure python implementation doesn't accept ndim idx input."
135 |                 )
136 |             else:
137 |                 try:
138 |                     len(i)
139 |                 except TypeError:
140 |                     raise ValueError(f"invalid value found in group_idx: {i}")
141 |                 else:
142 |                     raise NotImplementedError(
143 |                         "pure python implementation doesn't accept ndim indexed input."
144 |                     )
145 |         else:
146 |             if i < 0:
147 |                 raise ValueError("group_idx contains negative value")
148 | 
149 |     func = get_func(func, aliasing, _impl_dict)
150 |     if isinstance(a, (int, float)):
151 |         if func not in ("sum", "prod", "len"):
152 |             raise ValueError(
153 |                 "scalar inputs are supported only for 'sum', 'prod' and 'len'"
154 |             )
155 |         a = [a] * len(group_idx)
156 |     elif len(group_idx) != len(a):
157 |         raise ValueError("group_idx and a must be of the same length")
158 | 
159 |     if isinstance(func, str):
160 |         if func.startswith("nan"):
161 |             func = func[3:]
162 |             # remove nans
163 |             group_idx, a = zip(
164 |                 *((ix, val) for ix, val in zip(group_idx, a) if not math.isnan(val))
165 |             )
166 | 
167 |         func = _impl_dict[func]
168 |     if func is _sort:
169 |         return _sort(group_idx, a, reverse=kwargs.get("reverse", False))
170 | 
171 |     # sort data and evaluate function on groups
172 |     ret = [fill_value] * size
173 |     if not getattr(func, "x_and_idx", False):
174 |         data = sorted(zip(group_idx, a), key=operator.itemgetter(0))
175 |         for ix, group in itertools.groupby(data, key=operator.itemgetter(0)):
176 |             ret[ix] = func(list(val for _, val in group), **kwargs)
177 |     else:
178 |         data = sorted(zip(range(len(a)), group_idx, a), key=operator.itemgetter(1))
179 |         for ix, group in itertools.groupby(data, key=operator.itemgetter(1)):
180 |             ret[ix] = func(list((val_idx, val) for val_idx, _, val in group), **kwargs)
181 | 
182 |     return ret
183 | 
184 | 
185 | aggregate.__doc__ = (
186 |     """
187 |     This is the pure python implementation of aggregate. It is terribly slow.
188 |     Using the numpy version is highly recommended.
189 |     """
190 |     + aggregate_common_doc
191 | )
192 | 


--------------------------------------------------------------------------------
/numpy_groupies/benchmarks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ml31415/numpy-groupies/1dd8c3dfcc55235ae3753ce0db264da53bae4a80/numpy_groupies/benchmarks/__init__.py


--------------------------------------------------------------------------------
/numpy_groupies/benchmarks/generic.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python -B
  2 | 
  3 | import platform
  4 | import sys
  5 | import timeit
  6 | from operator import itemgetter
  7 | 
  8 | import numpy as np
  9 | 
 10 | from numpy_groupies.tests import _implementations, aggregate_numpy
 11 | from numpy_groupies.utils import allnan, anynan, nanfirst, nanlast
 12 | 
 13 | 
 14 | def aggregate_grouploop(*args, **kwargs):
 15 |     """wraps func in lambda which prevents aggregate_numpy from
 16 |     recognising and optimising it. Instead it groups and loops."""
 17 |     extrafuncs = {
 18 |         "allnan": allnan,
 19 |         "anynan": anynan,
 20 |         "first": itemgetter(0),
 21 |         "last": itemgetter(-1),
 22 |         "nanfirst": nanfirst,
 23 |         "nanlast": nanlast,
 24 |     }
 25 |     func = kwargs.pop("func")
 26 |     func = extrafuncs.get(func, func)
 27 |     if isinstance(func, str):
 28 |         raise NotImplementedError("Grouploop needs to be called with a function")
 29 |     return aggregate_numpy.aggregate(*args, func=lambda x: func(x), **kwargs)
 30 | 
 31 | 
 32 | def arbitrary(iterator):
 33 |     tmp = 0
 34 |     for i, x in enumerate(iterator, 1):
 35 |         tmp += x**i
 36 |     return tmp
 37 | 
 38 | 
 39 | func_list = (
 40 |     np.sum,
 41 |     np.prod,
 42 |     np.min,
 43 |     np.max,
 44 |     len,
 45 |     np.all,
 46 |     np.any,
 47 |     "anynan",
 48 |     "allnan",
 49 |     np.mean,
 50 |     np.std,
 51 |     np.var,
 52 |     "first",
 53 |     "last",
 54 |     "argmax",
 55 |     "argmin",
 56 |     np.nansum,
 57 |     np.nanprod,
 58 |     np.nanmin,
 59 |     np.nanmax,
 60 |     "nanlen",
 61 |     "nanall",
 62 |     "nanany",
 63 |     np.nanmean,
 64 |     np.nanvar,
 65 |     np.nanstd,
 66 |     "nanfirst",
 67 |     "nanlast",
 68 |     "nanargmin",
 69 |     "nanargmax",
 70 |     "cumsum",
 71 |     "cumprod",
 72 |     "cummax",
 73 |     "cummin",
 74 |     arbitrary,
 75 |     "sort",
 76 | )
 77 | 
 78 | 
 79 | def benchmark_data(size=5e5, seed=100):
 80 |     rnd = np.random.RandomState(seed=seed)
 81 |     group_idx = rnd.randint(0, int(1e3), int(size))
 82 |     a = rnd.random_sample(group_idx.size)
 83 |     a[a > 0.8] = 0
 84 |     nana = a.copy()
 85 |     nana[(nana < 0.2) & (nana != 0)] = np.nan
 86 |     nan_share = np.mean(np.isnan(nana))
 87 |     assert 0.15 < nan_share < 0.25, f"{nan_share * 100:3f}% nans"
 88 |     return a, nana, group_idx
 89 | 
 90 | 
 91 | def benchmark(implementations, repeat=5, size=5e5, seed=100, raise_errors=False):
 92 |     a, nana, group_idx = benchmark_data(size=size, seed=seed)
 93 | 
 94 |     print(
 95 |         "function"
 96 |         + "".join(impl.__name__.rsplit("_", 1)[1].rjust(14) for impl in implementations)
 97 |     )
 98 |     print("-" * (9 + 14 * len(implementations)))
 99 |     for func in func_list:
100 |         func_name = getattr(func, "__name__", func)
101 |         print(func_name.ljust(9), end="")
102 |         results = []
103 |         used_a = nana if "nan" in func_name else a
104 | 
105 |         for impl in implementations:
106 |             if impl is None:
107 |                 print("----".rjust(14), end="")
108 |                 continue
109 |             aggregatefunc = impl.aggregate
110 | 
111 |             try:
112 |                 res = aggregatefunc(group_idx, used_a, func=func)
113 |             except NotImplementedError:
114 |                 print("----".rjust(14), end="")
115 |                 continue
116 |             except Exception:
117 |                 if raise_errors:
118 |                     raise
119 |                 print("ERROR".rjust(14), end="")
120 |             else:
121 |                 results.append(res)
122 |                 try:
123 |                     np.testing.assert_array_almost_equal(res, results[0])
124 |                 except AssertionError:
125 |                     print("FAIL".rjust(14), end="")
126 |                 else:
127 |                     t0 = min(
128 |                         timeit.Timer(
129 |                             lambda: aggregatefunc(group_idx, used_a, func=func)
130 |                         ).repeat(repeat=repeat, number=1)
131 |                     )
132 |                     print(f"{t0 * 1000:.3f}".rjust(14), end="")
133 |             sys.stdout.flush()
134 |         print()
135 | 
136 |     implementation_names = [impl.__name__.rsplit("_", 1)[1] for impl in implementations]
137 |     postfix = ""
138 |     if "numba" in implementation_names:
139 |         import numba
140 | 
141 |         postfix += f", Numba {numba.__version__}"
142 |     if "pandas" in implementation_names:
143 |         import pandas
144 | 
145 |         postfix += f", Pandas {pandas.__version__}"
146 |     print(
147 |         f"{platform.system()}({platform.machine()}), Python {sys.version.split()[0]}, Numpy {np.version.version}"
148 |         f"{postfix}"
149 |     )
150 | 
151 | 
152 | if __name__ == "__main__":
153 |     implementations = (
154 |         _implementations if "--purepy" in sys.argv else _implementations[1:]
155 |     )
156 |     implementations = (
157 |         implementations if "--pandas" in sys.argv else implementations[:-1]
158 |     )
159 |     benchmark(implementations, raise_errors=False)
160 | 


--------------------------------------------------------------------------------
/numpy_groupies/benchmarks/simple.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python -B
  2 | 
  3 | import timeit
  4 | 
  5 | import numpy as np
  6 | 
  7 | from numpy_groupies import aggregate_np, aggregate_py, aggregate_ufunc
  8 | from numpy_groupies.aggregate_pandas import aggregate as aggregate_pd
  9 | from numpy_groupies.utils import aliasing
 10 | 
 11 | 
 12 | def aggregate_group_loop(*args, **kwargs):
 13 |     """wraps func in lambda which prevents aggregate_numpy from
 14 |     recognising and optimising it. Instead it groups and loops."""
 15 |     func = kwargs["func"]
 16 |     del kwargs["func"]
 17 |     return aggregate_np(*args, func=lambda x: func(x), **kwargs)
 18 | 
 19 | 
 20 | print("-----simple examples----------")
 21 | test_a = np.array([12.0, 3.2, -15, 88, 12.9])
 22 | test_group_idx = np.array([1, 0, 1, 4, 1])
 23 | print("test_a: ", test_a)
 24 | print("test_group_idx: ", test_group_idx)
 25 | print("aggregate(test_group_idx, test_a):")
 26 | print(aggregate_np(test_group_idx, test_a))  # group vals by idx and sum
 27 | # array([3.2, 9.9, 0., 0., 88.])
 28 | print("aggregate(test_group_idx, test_a, sz=8, func='min', fill_value=np.nan):")
 29 | print(aggregate_np(test_group_idx, test_a, size=8, func="min", fill_value=np.nan))
 30 | # array([3.2, -15., nan, 88., nan, nan, nan, nan])
 31 | print(
 32 |     "aggregate_py(test_group_idx, test_a, sz=5, func=lambda x: ' + '.join(str(xx) for xx in x),fill_value='')"
 33 | )
 34 | print(
 35 |     aggregate_py(
 36 |         test_group_idx,
 37 |         test_a,
 38 |         size=5,
 39 |         func=lambda x: " + ".join(str(xx) for xx in x),
 40 |         fill_value="",
 41 |     )
 42 | )
 43 | 
 44 | 
 45 | print("")
 46 | print("---------testing--------------")
 47 | print("compare against group-and-loop with numpy")
 48 | testable_funcs = {
 49 |     aliasing[f]: f
 50 |     for f in (np.sum, np.prod, np.any, np.all, np.min, np.max, np.std, np.var, np.mean)
 51 | }
 52 | test_group_idx = np.random.randint(0, int(1e3), int(1e5))
 53 | test_a = np.random.rand(int(1e5)) * 100 - 50
 54 | test_a[test_a > 25] = 0  # for use with bool functions
 55 | for name, f in testable_funcs.items():
 56 |     numpy_loop_group = aggregate_group_loop(test_group_idx, test_a, func=f)
 57 | 
 58 |     for acc_func, acc_name in [
 59 |         (aggregate_np, "np-optimised"),
 60 |         (aggregate_ufunc, "np-ufunc-at"),
 61 |         (aggregate_py, "purepy"),
 62 |         (aggregate_pd, "pandas"),
 63 |     ]:
 64 |         try:
 65 |             test_out = acc_func(test_group_idx, test_a, func=name)
 66 |             test_out = np.asarray(test_out)
 67 |             if not np.allclose(test_out, numpy_loop_group.astype(test_out.dtype)):
 68 |                 print(
 69 |                     name,
 70 |                     acc_name,
 71 |                     "FAILED test, output: [" + acc_name + "; correct]...",
 72 |                 )
 73 |                 print(np.vstack((test_out, numpy_loop_group)))
 74 |             else:
 75 |                 print(name, acc_name, "PASSED test")
 76 |         except NotImplementedError:
 77 |             print(name, acc_name, "NOT IMPLEMENTED")
 78 | 
 79 | print("")
 80 | print("----------benchmarking-------------")
 81 | print(
 82 |     "Note that the actual observed speedup depends on a variety of properties of the input."
 83 | )
 84 | print("Here we are using 100,000 indices uniformly picked from [0, 1000).")
 85 | print("Specifically, about 25% of the values are 0 (for use with bool operations),")
 86 | print("the remainder are uniformly distributed on [-50,25).")
 87 | print("Times are scaled to 10 repetitions (actual number of reps used may not be 10).")
 88 | 
 89 | print(
 90 |     "".join(
 91 |         [
 92 |             "function".rjust(8),
 93 |             "pure-py".rjust(14),
 94 |             "np-grouploop".rjust(14),
 95 |             "np-ufuncat".rjust(14),
 96 |             "np-optimised".rjust(14),
 97 |             "pandas".rjust(14),
 98 |             "ratio".rjust(15),
 99 |         ]
100 |     )
101 | )
102 | 
103 | for name, f in testable_funcs.items():
104 |     print(name.rjust(8), end="")
105 |     times = [None] * 5
106 |     for ii, acc_func in enumerate(
107 |         [
108 |             aggregate_py,
109 |             aggregate_group_loop,
110 |             aggregate_ufunc,
111 |             aggregate_np,
112 |             aggregate_pd,
113 |         ]
114 |     ):
115 |         try:
116 |             func = f if acc_func is aggregate_group_loop else name
117 |             reps = 3 if acc_func is aggregate_py else 20
118 |             times[ii] = (
119 |                 timeit.Timer(
120 |                     lambda: acc_func(test_group_idx, test_a, func=func)
121 |                 ).timeit(number=reps)
122 |                 / reps
123 |                 * 10
124 |             )
125 |             print(f"{times[ii] * 1000:.1f}ms".rjust(13), end="")
126 |         except NotImplementedError:
127 |             print("no-impl".rjust(13), end="")
128 | 
129 |     denom = min(t for t in times if t is not None)
130 |     ratios = [
131 |         ("-".center(4) if t is None else str(round(t / denom, 1))).center(5)
132 |         for t in times
133 |     ]
134 |     print("   ", (":".join(ratios)))
135 | 


--------------------------------------------------------------------------------
/numpy_groupies/tests/__init__.py:
--------------------------------------------------------------------------------
  1 | from functools import wraps
  2 | 
  3 | import pytest
  4 | 
  5 | from .. import aggregate_numpy, aggregate_numpy_ufunc, aggregate_purepy
  6 | 
  7 | try:
  8 |     from .. import aggregate_numba
  9 | except ImportError:
 10 |     aggregate_numba = None
 11 | try:
 12 |     from .. import aggregate_pandas
 13 | except ImportError:
 14 |     aggregate_pandas = None
 15 | 
 16 | _implementations = [
 17 |     aggregate_purepy,
 18 |     aggregate_numpy_ufunc,
 19 |     aggregate_numpy,
 20 |     aggregate_numba,
 21 |     aggregate_pandas,
 22 | ]
 23 | _implementations = [i for i in _implementations if i is not None]
 24 | 
 25 | 
 26 | def _impl_name(impl):
 27 |     if not impl or type(impl).__name__ == "NotSetType":
 28 |         return
 29 |     return impl.__name__.rsplit("aggregate_", 1)[1].rsplit("_", 1)[-1]
 30 | 
 31 | 
 32 | _implemented_by_impl_name = {
 33 |     "numpy": {"not_implemented": ("cumprod", "cummax", "cummin")},
 34 |     "purepy": {
 35 |         "not_implemented": ("cumsum", "cumprod", "cummax", "cummin", "sumofsquares")
 36 |     },
 37 |     "numba": {"not_implemented": ("array", "list", "sort")},
 38 |     "pandas": {
 39 |         "not_implemented": ("array", "list", "sort", "sumofsquares", "nansumofsquares")
 40 |     },
 41 |     "ufunc": {
 42 |         "implemented": (
 43 |             "sum",
 44 |             "prod",
 45 |             "min",
 46 |             "max",
 47 |             "len",
 48 |             "all",
 49 |             "any",
 50 |             "anynan",
 51 |             "allnan",
 52 |         )
 53 |     },
 54 | }
 55 | 
 56 | 
 57 | def _is_implemented(impl_name, funcname):
 58 |     func_description = _implemented_by_impl_name[impl_name]
 59 |     not_implemented = func_description.get("not_implemented", [])
 60 |     implemented = func_description.get("implemented", [])
 61 |     if impl_name == "purepy" and funcname.startswith("nan"):
 62 |         return False
 63 |     if funcname in not_implemented:
 64 |         return False
 65 |     if implemented and funcname not in implemented:
 66 |         return False
 67 |     return True
 68 | 
 69 | 
 70 | def _wrap_notimplemented_skip(impl, name=None):
 71 |     """Some implementations lack some functionality. That's ok, let's skip that instead of raising errors."""
 72 | 
 73 |     @wraps(impl)
 74 |     def try_skip(*args, **kwargs):
 75 |         try:
 76 |             return impl(*args, **kwargs)
 77 |         except NotImplementedError:
 78 |             impl_name = impl.__module__.split("_")[-1]
 79 |             func = kwargs.pop("func", None)
 80 |             if callable(func):
 81 |                 func = func.__name__
 82 |             if not _is_implemented(impl_name, func):
 83 |                 pytest.skip("Functionality not implemented")
 84 | 
 85 |     if name:
 86 |         try_skip.__name__ = name
 87 |     return try_skip
 88 | 
 89 | 
 90 | func_list = (
 91 |     "sum",
 92 |     "prod",
 93 |     "min",
 94 |     "max",
 95 |     "all",
 96 |     "any",
 97 |     "mean",
 98 |     "std",
 99 |     "var",
100 |     "len",
101 |     "argmin",
102 |     "argmax",
103 |     "anynan",
104 |     "allnan",
105 |     "cumsum",
106 |     "sumofsquares",
107 |     "nansum",
108 |     "nanprod",
109 |     "nanmin",
110 |     "nanmax",
111 |     "nanmean",
112 |     "nanstd",
113 |     "nanvar",
114 |     "nanlen",
115 |     "nanargmin",
116 |     "nanargmax",
117 |     "nansumofsquares",
118 | )
119 | 


--------------------------------------------------------------------------------
/numpy_groupies/tests/test_compare.py:
--------------------------------------------------------------------------------
  1 | """
  2 | In this test, aggregate_numpy is taken as a reference implementation and this
  3 | results are compared against the results of the other implementations. Implementations
  4 | may throw NotImplementedError in order to show missing functionality without throwing
  5 | test errors.
  6 | """
  7 | 
  8 | from itertools import product
  9 | 
 10 | import numpy as np
 11 | import pytest
 12 | 
 13 | from . import (
 14 |     _impl_name,
 15 |     _is_implemented,
 16 |     aggregate_numba,
 17 |     aggregate_numpy,
 18 |     aggregate_numpy_ufunc,
 19 |     aggregate_pandas,
 20 |     aggregate_purepy,
 21 |     func_list,
 22 | )
 23 | 
 24 | 
 25 | class AttrDict(dict):
 26 |     __getattr__ = dict.__getitem__
 27 | 
 28 | 
 29 | TEST_PAIRS = ["np/py", "ufunc/np", "numba/np", "pandas/np"]
 30 | 
 31 | 
 32 | @pytest.fixture(params=TEST_PAIRS, scope="module")
 33 | def aggregate_cmp(request, seed=100):
 34 |     test_pair = request.param
 35 |     if test_pair == "np/py":
 36 |         # Some functions in purepy are not implemented
 37 |         func_ref = aggregate_purepy.aggregate
 38 |         func = aggregate_numpy.aggregate
 39 |         group_cnt = 100
 40 |     else:
 41 |         group_cnt = 1000
 42 |         func_ref = aggregate_numpy.aggregate
 43 |         if "ufunc" in request.param:
 44 |             impl = aggregate_numpy_ufunc
 45 |         elif "numba" in request.param:
 46 |             impl = aggregate_numba
 47 |         elif "pandas" in request.param:
 48 |             impl = aggregate_pandas
 49 |         else:
 50 |             impl = None
 51 | 
 52 |         if not impl:
 53 |             pytest.skip("Implementation not available")
 54 |         name = _impl_name(impl)
 55 |         func = impl.aggregate
 56 | 
 57 |     rnd = np.random.RandomState(seed=seed)
 58 | 
 59 |     # Gives 100000 duplicates of size 10 each
 60 |     group_idx = np.repeat(np.arange(group_cnt), 2)
 61 |     rnd.shuffle(group_idx)
 62 |     group_idx = np.repeat(group_idx, 10)
 63 | 
 64 |     a = rnd.randn(group_idx.size)
 65 |     nana = a.copy()
 66 |     nana[::3] = np.nan
 67 |     nana[: (len(nana) // 2)] = np.nan
 68 |     somea = a.copy()
 69 |     somea[somea < 0.3] = 0
 70 |     somea[::31] = np.nan
 71 |     return AttrDict(locals())
 72 | 
 73 | 
 74 | def _deselect_purepy(aggregate_cmp, *args, **kwargs):
 75 |     # purepy implementation does not handle ndim arrays
 76 |     # This is a won't fix and should be deselected instead of skipped
 77 |     return aggregate_cmp.endswith("py")
 78 | 
 79 | 
 80 | def _deselect_not_implemented(aggregate_cmp, func, fill_value, *args, **kwargs):
 81 |     impl_name = (
 82 |         "purepy" if aggregate_cmp.endswith("py") else aggregate_cmp.split("/", 1)[0]
 83 |     )
 84 |     funcname = getattr(func, "__name__", func)
 85 |     return not _is_implemented(impl_name, funcname)
 86 | 
 87 | 
 88 | def func_arbitrary(iterator):
 89 |     tmp = 0
 90 |     for x in iterator:
 91 |         tmp += x * x
 92 |     return tmp
 93 | 
 94 | 
 95 | def func_preserve_order(iterator):
 96 |     tmp = 0
 97 |     for i, x in enumerate(iterator, 1):
 98 |         tmp += x**i
 99 |     return tmp
100 | 
101 | 
102 | @pytest.mark.filterwarnings("ignore::FutureWarning")  # handled pandas deprecation
103 | @pytest.mark.filterwarnings("ignore:numpy.ufunc size changed")
104 | @pytest.mark.deselect_if(func=_deselect_not_implemented)
105 | @pytest.mark.parametrize("fill_value", [0, 1, np.nan])
106 | @pytest.mark.parametrize("func", func_list, ids=lambda x: getattr(x, "__name__", x))
107 | def test_cmp(aggregate_cmp, func, fill_value, decimal=10):
108 |     is_nanfunc = "nan" in getattr(func, "__name__", func)
109 |     a = aggregate_cmp.nana if is_nanfunc else aggregate_cmp.a
110 |     try:
111 |         ref = aggregate_cmp.func_ref(
112 |             aggregate_cmp.group_idx, a, func=func, fill_value=fill_value
113 |         )
114 |     except ValueError:
115 |         with pytest.raises(ValueError):
116 |             aggregate_cmp.func(
117 |                 aggregate_cmp.group_idx, a, func=func, fill_value=fill_value
118 |             )
119 |     else:
120 |         try:
121 |             res = aggregate_cmp.func(
122 |                 aggregate_cmp.group_idx, a, func=func, fill_value=fill_value
123 |             )
124 |         except ValueError:
125 |             if np.isnan(fill_value) and aggregate_cmp.test_pair.endswith("py"):
126 |                 pytest.skip(
127 |                     "pure python version uses lists and does not raise ValueErrors when inserting nan into integers"
128 |                 )
129 |             elif aggregate_cmp.test_pair.startswith("pandas"):
130 |                 pytest.skip("pandas now raises ValueError on all-nan arrays")
131 | 
132 |             else:
133 |                 raise
134 |         if isinstance(ref, np.ndarray):
135 |             assert res.dtype == ref.dtype
136 |         try:
137 |             np.testing.assert_allclose(res, ref, rtol=10**-decimal)
138 |         except AssertionError:
139 |             if "arg" in func and aggregate_cmp.test_pair.startswith("pandas"):
140 |                 pytest.skip(
141 |                     "pandas doesn't fill indices for all-nan groups with fill_value, but with -inf instead"
142 |                 )
143 |             else:
144 |                 raise
145 | 
146 | 
147 | @pytest.mark.deselect_if(func=_deselect_purepy)
148 | @pytest.mark.parametrize(["ndim", "order"], product([2, 3], ["C", "F"]))
149 | def test_cmp_ndim(aggregate_cmp, ndim, order, outsize=100, decimal=14):
150 |     nindices = int(outsize**ndim)
151 |     outshape = tuple([outsize] * ndim)
152 |     group_idx = np.random.randint(0, outsize, size=(ndim, nindices))
153 |     a = np.random.random(group_idx.shape[1])
154 | 
155 |     res = aggregate_cmp.func(group_idx, a, size=outshape, order=order)
156 |     ref = aggregate_cmp.func_ref(group_idx, a, size=outshape, order=order)
157 |     if ndim > 1 and order == "F":
158 |         # 1d arrays always return False here
159 |         assert np.isfortran(res)
160 |     else:
161 |         assert not np.isfortran(res)
162 |     assert res.shape == outshape
163 |     np.testing.assert_array_almost_equal(res, ref, decimal=decimal)
164 | 


--------------------------------------------------------------------------------
/numpy_groupies/tests/test_generic.py:
--------------------------------------------------------------------------------
  1 | """Tests, that are run against all implemented versions of aggregate."""
  2 | 
  3 | import itertools
  4 | import warnings
  5 | 
  6 | import numpy as np
  7 | import pytest
  8 | 
  9 | from . import (
 10 |     _impl_name,
 11 |     _implementations,
 12 |     _wrap_notimplemented_skip,
 13 |     func_list,
 14 |     _is_implemented,
 15 | )
 16 | 
 17 | 
 18 | @pytest.fixture(params=_implementations, ids=_impl_name)
 19 | def aggregate_all(request):
 20 |     impl = request.param
 21 |     if impl is None:
 22 |         pytest.skip("Implementation not available")
 23 |     name = _impl_name(impl)
 24 |     return _wrap_notimplemented_skip(impl.aggregate, "aggregate_" + name)
 25 | 
 26 | 
 27 | def _deselect_purepy(aggregate_all, *args, **kwargs):
 28 |     # purepy implementations does not handle nan values and ndim correctly.
 29 |     # So it needs to be excluded from several tests."""
 30 |     return aggregate_all.__name__.endswith("purepy")
 31 | 
 32 | 
 33 | def _deselect_purepy_and_pandas(aggregate_all, *args, **kwargs):
 34 |     # purepy and pandas implementation handle some nan cases differently.
 35 |     # So they need to be excluded from several tests."""
 36 |     return aggregate_all.__name__.endswith(("pandas", "purepy"))
 37 | 
 38 | 
 39 | def _deselect_purepy_and_invalid_axis(aggregate_all, func, size, axis):
 40 |     impl_name = aggregate_all.__name__.split("_")[-1]
 41 |     if impl_name == "purepy":
 42 |         # purepy does not handle axis parameter
 43 |         return True
 44 |     if axis >= len(size):
 45 |         return True
 46 |     if not _is_implemented(impl_name, func):
 47 |         return True
 48 |     return False
 49 | 
 50 | 
 51 | def _deselect_not_implemented(aggregate_all, func, *args, **kwargs):
 52 |     impl_name = aggregate_all.__name__.split("_")[-1]
 53 |     return not _is_implemented(impl_name, func)
 54 | 
 55 | 
 56 | def test_preserve_missing(aggregate_all):
 57 |     res = aggregate_all(np.array([0, 1, 3, 1, 3]), np.arange(101, 106, dtype=int))
 58 |     np.testing.assert_array_equal(res, np.array([101, 206, 0, 208]))
 59 |     if not isinstance(res, list):
 60 |         assert "int" in res.dtype.name
 61 | 
 62 | 
 63 | @pytest.mark.parametrize("group_idx_type", [int, "uint32", "uint64"])
 64 | def test_uint_group_idx(aggregate_all, group_idx_type):
 65 |     group_idx = np.array([1, 1, 2, 2, 2, 2, 4, 4], dtype=group_idx_type)
 66 |     res = aggregate_all(group_idx, np.ones(group_idx.size), dtype=int)
 67 |     np.testing.assert_array_equal(res, np.array([0, 2, 4, 0, 2]))
 68 |     if not isinstance(res, list):
 69 |         assert "int" in res.dtype.name
 70 | 
 71 | 
 72 | def test_start_with_offset(aggregate_all):
 73 |     group_idx = np.array([1, 1, 2, 2, 2, 2, 4, 4])
 74 |     res = aggregate_all(group_idx, np.ones(group_idx.size), dtype=int)
 75 |     np.testing.assert_array_equal(res, np.array([0, 2, 4, 0, 2]))
 76 |     if not isinstance(res, list):
 77 |         assert "int" in res.dtype.name
 78 | 
 79 | 
 80 | @pytest.mark.parametrize(
 81 |     "floatfunc", [np.std, np.var, np.mean], ids=lambda x: x.__name__
 82 | )
 83 | def test_float_enforcement(aggregate_all, floatfunc):
 84 |     group_idx = np.arange(10).repeat(3)
 85 |     a = np.arange(group_idx.size)
 86 |     res = aggregate_all(group_idx, a, floatfunc)
 87 |     if not isinstance(res, list):
 88 |         assert "float" in res.dtype.name
 89 |     assert np.all(np.array(res) > 0)
 90 | 
 91 | 
 92 | def test_start_with_offset_prod(aggregate_all):
 93 |     group_idx = np.array([2, 2, 4, 4, 4, 7, 7, 7])
 94 |     res = aggregate_all(group_idx, group_idx, func=np.prod, dtype=int)
 95 |     np.testing.assert_array_equal(res, np.array([0, 0, 4, 0, 64, 0, 0, 343]))
 96 | 
 97 | 
 98 | def test_no_negative_indices(aggregate_all):
 99 |     for pos in (0, 10, -1):
100 |         group_idx = np.arange(5).repeat(5)
101 |         group_idx[pos] = -1
102 |         pytest.raises(ValueError, aggregate_all, group_idx, np.arange(len(group_idx)))
103 | 
104 | 
105 | def test_parameter_missing(aggregate_all):
106 |     pytest.raises(TypeError, aggregate_all, np.arange(5))
107 | 
108 | 
109 | def test_shape_mismatch(aggregate_all):
110 |     pytest.raises(ValueError, aggregate_all, np.array((1, 2, 3)), np.array((1, 2)))
111 | 
112 | 
113 | def test_create_lists(aggregate_all):
114 |     res = aggregate_all(
115 |         np.array([0, 1, 3, 1, 3]), np.arange(101, 106, dtype=int), func=list
116 |     )
117 |     np.testing.assert_array_equal(np.array(res[0]), np.array([101]))
118 |     assert res[2] == 0
119 |     np.testing.assert_array_equal(np.array(res[3]), np.array([103, 105]))
120 | 
121 | 
122 | def test_item_counting(aggregate_all):
123 |     group_idx = np.array([0, 1, 2, 3, 3, 3, 3, 4, 5, 5, 5, 6, 5, 4, 3, 8, 8])
124 |     a = np.arange(group_idx.size)
125 |     res = aggregate_all(group_idx, a, func=lambda x: len(x) > 1)
126 |     np.testing.assert_array_equal(res, np.array([0, 0, 0, 1, 1, 1, 0, 0, 1]))
127 | 
128 | 
129 | @pytest.mark.parametrize(
130 |     ["func", "fill_value"], [(np.array, None), (np.sum, -1)], ids=["array", "sum"]
131 | )
132 | def test_fill_value(aggregate_all, func, fill_value):
133 |     group_idx = np.array([0, 2, 2], dtype=int)
134 |     res = aggregate_all(
135 |         group_idx,
136 |         np.arange(len(group_idx), dtype=int),
137 |         func=func,
138 |         fill_value=fill_value,
139 |     )
140 |     assert res[1] == fill_value
141 | 
142 | 
143 | @pytest.mark.parametrize("order", ["C", "F"])
144 | def test_array_ordering(aggregate_all, order, size=10):
145 |     mat = np.zeros((size, size), order=order, dtype=float)
146 |     mat.flat[:] = np.arange(size * size)
147 |     assert aggregate_all(np.zeros(size, dtype=int), mat[0, :], order=order)[0] == sum(
148 |         range(size)
149 |     )
150 | 
151 | 
152 | @pytest.mark.deselect_if(func=_deselect_purepy)
153 | @pytest.mark.parametrize("size", [None, (10, 2)])
154 | def test_ndim_group_idx(aggregate_all, size):
155 |     group_idx = np.vstack((np.repeat(np.arange(10), 10), np.repeat([0, 1], 50)))
156 |     aggregate_all(group_idx, 1, size=size)
157 | 
158 | 
159 | @pytest.mark.deselect_if(func=_deselect_purepy)
160 | @pytest.mark.parametrize(["ndim", "order"], itertools.product([1, 2, 3], ["C", "F"]))
161 | def test_ndim_indexing(aggregate_all, ndim, order, outsize=10):
162 |     nindices = int(outsize**ndim)
163 |     outshape = tuple([outsize] * ndim)
164 |     group_idx = np.random.randint(0, outsize, size=(ndim, nindices))
165 |     a = np.random.random(group_idx.shape[1])
166 |     res = aggregate_all(group_idx, a, size=outshape, order=order)
167 |     if ndim > 1 and order == "F":
168 |         # 1d arrays always return False here
169 |         assert np.isfortran(res)
170 |     else:
171 |         assert not np.isfortran(res)
172 |     assert res.shape == outshape
173 | 
174 | 
175 | def test_len(aggregate_all, group_size=5):
176 |     group_idx = np.arange(0, 100, 2, dtype=int).repeat(group_size)
177 |     a = np.arange(group_idx.size)
178 |     res = aggregate_all(group_idx, a, func="len")
179 |     ref = aggregate_all(group_idx, 1, func="sum")
180 |     if isinstance(res, np.ndarray):
181 |         assert issubclass(res.dtype.type, np.integer)
182 |     else:
183 |         assert isinstance(res[0], int)
184 |     np.testing.assert_array_equal(res, ref)
185 |     group_idx = np.arange(0, 100, dtype=int).repeat(group_size)
186 |     a = np.arange(group_idx.size)
187 |     res = aggregate_all(group_idx, a, func=len)
188 |     if isinstance(res, np.ndarray):
189 |         assert np.all(res == group_size)
190 |     else:
191 |         assert all(x == group_size for x in res)
192 | 
193 | 
194 | def test_nan_len(aggregate_all):
195 |     group_idx = np.arange(0, 20, 2, dtype=int).repeat(5)
196 |     a = np.random.random(group_idx.size)
197 |     a[::4] = np.nan
198 |     a[::5] = np.nan
199 |     res = aggregate_all(group_idx, a, func="nanlen")
200 |     ref = aggregate_all(group_idx[~np.isnan(a)], 1, func="sum")
201 |     if isinstance(res, np.ndarray):
202 |         assert issubclass(res.dtype.type, np.integer)
203 |     else:
204 |         assert isinstance(res[0], int)
205 |     np.testing.assert_array_equal(res, ref)
206 | 
207 | 
208 | @pytest.mark.parametrize("first_last", ["first", "last"])
209 | def test_first_last(aggregate_all, first_last):
210 |     group_idx = np.arange(0, 100, 2, dtype=int).repeat(5)
211 |     a = np.arange(group_idx.size)
212 |     res = aggregate_all(group_idx, a, func=first_last, fill_value=-1)
213 |     ref = np.zeros(np.max(group_idx) + 1)
214 |     ref.fill(-1)
215 |     ref[::2] = np.arange(
216 |         0 if first_last == "first" else 4, group_idx.size, 5, dtype=int
217 |     )
218 |     np.testing.assert_array_equal(res, ref)
219 | 
220 | 
221 | @pytest.mark.parametrize(
222 |     ["first_last", "nanoffset"], itertools.product(["nanfirst", "nanlast"], [0, 2, 4])
223 | )
224 | def test_nan_first_last(aggregate_all, first_last, nanoffset):
225 |     group_idx = np.arange(0, 100, 2, dtype=int).repeat(5)
226 |     a = np.arange(group_idx.size, dtype=float)
227 | 
228 |     a[nanoffset::5] = np.nan
229 |     res = aggregate_all(group_idx, a, func=first_last, fill_value=-1)
230 |     ref = np.zeros(np.max(group_idx) + 1)
231 |     ref.fill(-1)
232 | 
233 |     if first_last == "nanfirst":
234 |         ref_offset = 1 if nanoffset == 0 else 0
235 |     else:
236 |         ref_offset = 3 if nanoffset == 4 else 4
237 |     ref[::2] = np.arange(ref_offset, group_idx.size, 5, dtype=int)
238 |     np.testing.assert_array_equal(res, ref)
239 | 
240 | 
241 | @pytest.mark.parametrize(["func", "ddof"], itertools.product(["var", "std"], [0, 1, 2]))
242 | def test_ddof(aggregate_all, func, ddof, size=20):
243 |     group_idx = np.zeros(20, dtype=int)
244 |     a = np.random.random(group_idx.size)
245 |     res = aggregate_all(group_idx, a, func, ddof=ddof)
246 |     ref_func = {"std": np.std, "var": np.var}.get(func)
247 |     ref = ref_func(a, ddof=ddof)
248 |     assert abs(res[0] - ref) < 1e-10
249 | 
250 | 
251 | @pytest.mark.parametrize("func", ["sum", "prod", "mean", "var", "std"])
252 | def test_scalar_input(aggregate_all, func):
253 |     group_idx = np.arange(0, 100, dtype=int).repeat(5)
254 |     if func not in ("sum", "prod"):
255 |         pytest.raises(
256 |             (ValueError, NotImplementedError), aggregate_all, group_idx, 1, func=func
257 |         )
258 |     else:
259 |         res = aggregate_all(group_idx, 1, func=func)
260 |         ref = aggregate_all(group_idx, np.ones_like(group_idx, dtype=int), func=func)
261 |         np.testing.assert_array_equal(res, ref)
262 | 
263 | 
264 | @pytest.mark.parametrize("func", ["sum", "prod", "mean", "var", "std", "all", "any"])
265 | def test_nan_input(aggregate_all, func, groups=100):
266 |     if aggregate_all.__name__.endswith("pandas"):
267 |         pytest.skip("pandas always skips nan values")
268 |     group_idx = np.arange(0, groups, dtype=int).repeat(5)
269 |     a = np.random.random(group_idx.size)
270 |     a[::2] = np.nan
271 | 
272 |     if func in ("all", "any"):
273 |         ref = np.ones(groups, dtype=bool)
274 |     else:
275 |         ref = np.full(groups, np.nan, dtype=float)
276 |     res = aggregate_all(group_idx, a, func=func)
277 |     np.testing.assert_array_equal(res, ref)
278 | 
279 | 
280 | def test_nan_input_len(aggregate_all, groups=100, group_size=5):
281 |     if aggregate_all.__name__.endswith("pandas"):
282 |         pytest.skip("pandas always skips nan values")
283 |     group_idx = np.arange(0, groups, dtype=int).repeat(group_size)
284 |     a = np.random.random(len(group_idx))
285 |     a[::2] = np.nan
286 |     ref = np.full(groups, group_size, dtype=int)
287 |     res = aggregate_all(group_idx, a, func=len)
288 |     np.testing.assert_array_equal(res, ref)
289 | 
290 | 
291 | def test_argmin_argmax_nonans(aggregate_all):
292 |     group_idx = np.array([0, 0, 0, 0, 3, 3, 3, 3])
293 |     a = np.array([4, 4, 3, 1, 10, 9, 9, 11])
294 | 
295 |     res = aggregate_all(group_idx, a, func="argmax", fill_value=-1)
296 |     np.testing.assert_array_equal(res, [0, -1, -1, 7])
297 | 
298 |     res = aggregate_all(group_idx, a, func="argmin", fill_value=-1)
299 |     np.testing.assert_array_equal(res, [3, -1, -1, 5])
300 | 
301 | 
302 | @pytest.mark.deselect_if(func=_deselect_purepy)
303 | def test_argmin_argmax_nans(aggregate_all):
304 |     if aggregate_all.__name__.endswith("pandas"):
305 |         pytest.skip("pandas always ignores nans")
306 | 
307 |     group_idx = np.array([0, 0, 0, 0, 3, 3, 3, 3])
308 |     a = np.array([4, 4, 3, 1, np.nan, 1, 2, 3])
309 | 
310 |     res = aggregate_all(group_idx, a, func="argmax", fill_value=-1)
311 |     np.testing.assert_array_equal(res, [0, -1, -1, -1])
312 | 
313 |     res = aggregate_all(group_idx, a, func="argmin", fill_value=-1)
314 |     np.testing.assert_array_equal(res, [3, -1, -1, -1])
315 | 
316 | 
317 | @pytest.mark.deselect_if(func=_deselect_purepy)
318 | def test_nanargmin_nanargmax_nans(aggregate_all):
319 |     if aggregate_all.__name__.endswith("pandas"):
320 |         pytest.skip(
321 |             "pandas doesn't fill indices for all-nan groups with fill_value but with -inf instead"
322 |         )
323 | 
324 |     group_idx = np.array([0, 0, 0, 0, 3, 3, 3, 3])
325 |     a = np.array([4, 4, np.nan, 1, np.nan, np.nan, np.nan, np.nan])
326 | 
327 |     res = aggregate_all(group_idx, a, func="nanargmax", fill_value=-1)
328 |     np.testing.assert_array_equal(res, [0, -1, -1, -1])
329 | 
330 |     res = aggregate_all(group_idx, a, func="nanargmin", fill_value=-1)
331 |     np.testing.assert_array_equal(res, [3, -1, -1, -1])
332 | 
333 | 
334 | def test_nanargmin_nanargmax_nonans(aggregate_all):
335 |     group_idx = np.array([0, 0, 0, 0, 3, 3, 3, 3])
336 |     a = np.array([4, 4, 3, 1, 10, 9, 9, 11])
337 | 
338 |     res = aggregate_all(group_idx, a, func="nanargmax", fill_value=-1)
339 |     np.testing.assert_array_equal(res, [0, -1, -1, 7])
340 | 
341 |     res = aggregate_all(group_idx, a, func="nanargmin", fill_value=-1)
342 |     np.testing.assert_array_equal(res, [3, -1, -1, 5])
343 | 
344 | 
345 | def test_min_max_inf(aggregate_all):
346 |     # https://github.com/ml31415/numpy-groupies/issues/40
347 |     res = aggregate_all(
348 |         np.array([0, 1, 2, 0, 1, 2]),
349 |         np.array([-np.inf, 0, -np.inf, -np.inf, 0, 0]),
350 |         func="max",
351 |     )
352 |     np.testing.assert_array_equal(res, [-np.inf, 0, 0])
353 | 
354 |     res = aggregate_all(
355 |         np.array([0, 1, 2, 0, 1, 2]),
356 |         np.array([np.inf, 0, np.inf, np.inf, 0, 0]),
357 |         func="min",
358 |     )
359 |     np.testing.assert_array_equal(res, [np.inf, 0, 0])
360 | 
361 | 
362 | def test_argmin_argmax_inf(aggregate_all):
363 |     # https://github.com/ml31415/numpy-groupies/issues/40
364 |     res = aggregate_all(
365 |         np.array([0, 1, 2, 0, 1, 2]),
366 |         np.array([-np.inf, 0, -np.inf, -np.inf, 0, 0]),
367 |         func="argmax",
368 |         fill_value=-1,
369 |     )
370 |     np.testing.assert_array_equal(res, [0, 1, 5])
371 | 
372 |     res = aggregate_all(
373 |         np.array([0, 1, 2, 0, 1, 2]),
374 |         np.array([np.inf, 0, np.inf, np.inf, 0, 0]),
375 |         func="argmin",
376 |         fill_value=-1,
377 |     )
378 |     np.testing.assert_array_equal(res, [0, 1, 5])
379 | 
380 | 
381 | def test_mean(aggregate_all):
382 |     group_idx = np.array([0, 0, 0, 0, 3, 3, 3, 3])
383 |     a = np.arange(len(group_idx))
384 | 
385 |     res = aggregate_all(group_idx, a, func="mean")
386 |     np.testing.assert_array_equal(res, [1.5, 0, 0, 5.5])
387 | 
388 | 
389 | def test_cumsum(aggregate_all):
390 |     group_idx = np.array([4, 3, 3, 4, 4, 1, 1, 1, 7, 8, 7, 4, 3, 3, 1, 1])
391 |     a = np.array([3, 4, 1, 3, 9, 9, 6, 7, 7, 0, 8, 2, 1, 8, 9, 8])
392 |     ref = np.array([3, 4, 5, 6, 15, 9, 15, 22, 7, 0, 15, 17, 6, 14, 31, 39])
393 | 
394 |     res = aggregate_all(group_idx, a, func="cumsum")
395 |     np.testing.assert_array_equal(res, ref)
396 | 
397 | 
398 | @pytest.mark.deselect_if(func=_deselect_purepy_and_pandas)
399 | def test_nancumsum(aggregate_all):
400 |     # https://github.com/ml31415/numpy-groupies/issues/79
401 |     group_idx = [0, 0, 0, 1, 1, 0, 0]
402 |     a = [2, 2, np.nan, 2, 2, 2, 2]
403 |     ref = [2.0, 4.0, 4.0, 2.0, 4.0, 6.0, 8.0]
404 | 
405 |     res = aggregate_all(group_idx, a, func="nancumsum")
406 |     np.testing.assert_array_equal(res, ref)
407 | 
408 | 
409 | def test_cummax(aggregate_all):
410 |     group_idx = np.array([4, 3, 3, 4, 4, 1, 1, 1, 7, 8, 7, 4, 3, 3, 1, 1])
411 |     a = np.array([3, 4, 1, 3, 9, 9, 6, 7, 7, 0, 8, 2, 1, 8, 9, 8])
412 |     ref = np.array([3, 4, 4, 3, 9, 9, 9, 9, 7, 0, 8, 9, 4, 8, 9, 9])
413 | 
414 |     res = aggregate_all(group_idx, a, func="cummax")
415 |     np.testing.assert_array_equal(res, ref)
416 | 
417 | 
418 | @pytest.mark.parametrize("order", ["normal", "reverse"])
419 | def test_list_ordering(aggregate_all, order):
420 |     group_idx = np.repeat(np.arange(5), 4)
421 |     a = np.arange(group_idx.size)
422 |     if order == "reverse":
423 |         a = a[::-1]
424 |     ref = a[:4]
425 | 
426 |     res = aggregate_all(group_idx, a, func=list)
427 |     np.testing.assert_array_equal(np.array(res[0]), ref)
428 | 
429 | 
430 | @pytest.mark.parametrize("order", ["normal", "reverse"])
431 | def test_sort(aggregate_all, order):
432 |     group_idx = np.array([3, 3, 3, 2, 2, 2, 1, 1, 1])
433 |     a = np.array([3, 2, 1, 3, 4, 5, 5, 10, 1])
434 |     ref_normal = np.array([1, 2, 3, 3, 4, 5, 1, 5, 10])
435 |     ref_reverse = np.array([3, 2, 1, 5, 4, 3, 10, 5, 1])
436 |     reverse = order == "reverse"
437 |     ref = ref_reverse if reverse else ref_normal
438 | 
439 |     res = aggregate_all(group_idx, a, func="sort", reverse=reverse)
440 |     np.testing.assert_array_equal(res, ref)
441 | 
442 | 
443 | @pytest.mark.deselect_if(func=_deselect_purepy_and_invalid_axis)
444 | @pytest.mark.parametrize("axis", (0, 1))
445 | @pytest.mark.parametrize("size", ((12,), (12, 5)))
446 | @pytest.mark.parametrize("func", func_list)
447 | def test_along_axis(aggregate_all, func, size, axis):
448 |     group_idx = np.zeros(size[axis], dtype=int)
449 |     a = np.random.randn(*size)
450 | 
451 |     # add some NaNs to test out nan-skipping
452 |     if "nan" in func and "nanarg" not in func:
453 |         a[[1, 4, 5], ...] = np.nan
454 |     elif "nanarg" in func and a.ndim > 1:
455 |         a[[1, 4, 5], 1] = np.nan
456 |     if func in ["any", "all"]:
457 |         a = a > 0.5
458 | 
459 |     # construct expected values for all cases
460 |     if func == "len":
461 |         expected = np.array(size[axis])
462 |     elif func == "nanlen":
463 |         expected = np.array((~np.isnan(a)).sum(axis=axis))
464 |     elif func == "anynan":
465 |         expected = np.isnan(a).any(axis=axis)
466 |     elif func == "allnan":
467 |         expected = np.isnan(a).all(axis=axis)
468 |     elif func == "sumofsquares":
469 |         expected = np.sum(a * a, axis=axis)
470 |     elif func == "nansumofsquares":
471 |         expected = np.nansum(a * a, axis=axis)
472 |     else:
473 |         with warnings.catch_warnings():
474 |             # Filter  expected warnings:
475 |             # - RuntimeWarning: All-NaN slice encountered
476 |             # - RuntimeWarning: Mean of empty slice
477 |             # - RuntimeWarning: Degrees of freedom <= 0 for slice.
478 |             warnings.simplefilter("ignore", RuntimeWarning)
479 |             expected = getattr(np, func)(a, axis=axis)
480 | 
481 |     # The default fill_value is 0, the following makes the output match numpy
482 |     fill_value = {
483 |         "nanprod": 1,
484 |         "nanvar": np.nan,
485 |         "nanstd": np.nan,
486 |         "nanmax": np.nan,
487 |         "nanmin": np.nan,
488 |         "nanmean": np.nan,
489 |     }.get(func, 0)
490 | 
491 |     actual = aggregate_all(group_idx, a, axis=axis, func=func, fill_value=fill_value)
492 |     assert actual.ndim == a.ndim
493 | 
494 |     # argmin, argmax don't support keepdims, so we can't use that to construct expected
495 |     # instead we squeeze out the extra dims in actual.
496 |     np.testing.assert_allclose(actual.squeeze(), expected)
497 | 
498 | 
499 | @pytest.mark.deselect_if(func=_deselect_purepy)
500 | def test_not_last_axis_reduction(aggregate_all):
501 |     group_idx = np.array([1, 2, 2, 0, 1])
502 |     a = np.array([[1.0, 2.0], [4.0, 4.0], [5.0, 2.0], [np.nan, 3.0], [8.0, 7.0]])
503 |     func = "nanmax"
504 |     fill_value = np.nan
505 |     axis = 0
506 |     actual = aggregate_all(group_idx, a, axis=axis, func=func, fill_value=fill_value)
507 |     expected = np.array([[np.nan, 3.0], [8.0, 7.0], [5.0, 4.0]])
508 |     np.testing.assert_allclose(expected, actual)
509 | 
510 | 
511 | @pytest.mark.deselect_if(func=_deselect_purepy)
512 | def test_custom_callable(aggregate_all):
513 |     def custom_callable(x):
514 |         return x.sum()
515 | 
516 |     size = (10,)
517 |     axis = -1
518 | 
519 |     group_idx = np.zeros(size, dtype=int)
520 |     a = np.random.randn(*size)
521 | 
522 |     expected = a.sum(axis=axis, keepdims=True)
523 |     actual = aggregate_all(group_idx, a, axis=axis, func=custom_callable, fill_value=0)
524 |     assert actual.ndim == a.ndim
525 | 
526 |     np.testing.assert_allclose(actual, expected)
527 | 
528 | 
529 | @pytest.mark.deselect_if(func=_deselect_purepy)
530 | def test_argreduction_nD_array_1D_idx(aggregate_all):
531 |     # https://github.com/ml31415/numpy-groupies/issues/41
532 |     group_idx = np.array([0, 0, 2, 2, 2, 1, 1, 2, 2, 1, 1, 0], dtype=int)
533 |     a = np.array([[1] * 12, [1] * 12])
534 |     actual = aggregate_all(group_idx, a, axis=-1, func="argmax")
535 |     expected = np.array([[0, 5, 2], [0, 5, 2]])
536 |     np.testing.assert_equal(actual, expected)
537 | 
538 | 
539 | @pytest.mark.deselect_if(func=_deselect_purepy)
540 | def test_argreduction_negative_fill_value(aggregate_all):
541 |     if aggregate_all.__name__.endswith("pandas"):
542 |         pytest.skip("pandas always skips nan values")
543 | 
544 |     group_idx = np.array([0, 0, 2, 2, 2, 1, 1, 2, 2, 1, 1, 0], dtype=int)
545 |     a = np.array([[1] * 12, [np.nan] * 12])
546 |     actual = aggregate_all(group_idx, a, axis=-1, fill_value=-1, func="argmax")
547 |     expected = np.array([[0, 5, 2], [-1, -1, -1]])
548 |     np.testing.assert_equal(actual, expected)
549 | 
550 | 
551 | @pytest.mark.deselect_if(func=_deselect_not_implemented)
552 | @pytest.mark.parametrize(
553 |     "nan_inds", (None, tuple([[1, 4, 5], Ellipsis]), tuple((1, (0, 1, 2, 3))))
554 | )
555 | @pytest.mark.parametrize("ddof", (0, 1))
556 | @pytest.mark.parametrize("func", ("nanvar", "nanstd"))
557 | def test_var_with_nan_fill_value(aggregate_all, ddof, nan_inds, func):
558 |     a = np.ones((12, 5))
559 |     group_idx = np.zeros(a.shape[-1:], dtype=int)
560 | 
561 |     if nan_inds is not None:
562 |         a[nan_inds] = np.nan
563 | 
564 |     with warnings.catch_warnings():
565 |         # Filter RuntimeWarning: Degrees of freedom <= 0 for slice.
566 |         warnings.simplefilter("ignore", RuntimeWarning)
567 |         expected = getattr(np, func)(a, keepdims=True, axis=-1, ddof=ddof)
568 | 
569 |     actual = aggregate_all(
570 |         group_idx, a, axis=-1, fill_value=np.nan, func=func, ddof=ddof
571 |     )
572 |     np.testing.assert_equal(actual, expected)
573 | 
574 | 
575 | def test_cumsum_accuracy(aggregate_all):
576 |     array = np.array(
577 |         [0.00000000e00, 0.00000000e00, 0.00000000e00, 3.27680000e04, 9.99999975e-06]
578 |     )
579 |     group_idx = np.array([0, 0, 0, 0, 1])
580 | 
581 |     actual = aggregate_all(group_idx, array, axis=-1, func="cumsum")
582 |     expected = array
583 |     np.testing.assert_allclose(actual, expected)
584 | 


--------------------------------------------------------------------------------
/numpy_groupies/tests/test_indices.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | from . import _impl_name, aggregate_numba
 5 | 
 6 | _implementations = [aggregate_numba]
 7 | _implementations = [i for i in _implementations if i is not None]
 8 | 
 9 | 
10 | @pytest.fixture(params=_implementations, ids=_impl_name)
11 | def aggregate_nb_wv(request):
12 |     if request.param is None:
13 |         pytest.skip("Implementation not available")
14 |     return request.param
15 | 
16 | 
17 | def test_step_indices_length(aggregate_nb_wv):
18 |     group_idx = np.array([1, 1, 1, 2, 2, 3, 3, 4, 4, 2, 2], dtype=int)
19 |     for _ in range(20):
20 |         np.random.shuffle(group_idx)
21 |         step_cnt_ref = np.count_nonzero(np.diff(group_idx))
22 |         assert aggregate_nb_wv.step_count(group_idx) == step_cnt_ref + 1
23 |         assert len(aggregate_nb_wv.step_indices(group_idx)) == step_cnt_ref + 2
24 | 
25 | 
26 | def test_step_indices_fields(aggregate_nb_wv):
27 |     group_idx = np.array([1, 1, 1, 2, 2, 3, 3, 4, 5, 2, 2], dtype=int)
28 |     steps = aggregate_nb_wv.step_indices(group_idx)
29 |     np.testing.assert_array_equal(steps, np.array([0, 3, 5, 7, 8, 9, 11]))
30 | 


--------------------------------------------------------------------------------
/numpy_groupies/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from ..utils import check_dtype, unpack
 4 | 
 5 | 
 6 | def test_check_dtype():
 7 |     dtype = check_dtype(None, "mean", np.arange(10, dtype=int), 10)
 8 |     assert np.issubdtype(dtype, np.floating)
 9 | 
10 | 
11 | def test_unpack():
12 |     """Keep this test, in case unpack might get reimplemented again at some point."""
13 |     group_idx = np.arange(10)
14 |     np.random.shuffle(group_idx)
15 |     group_idx = np.repeat(group_idx, 3)
16 |     vals = np.random.randn(np.max(group_idx) + 1)
17 |     np.testing.assert_array_equal(unpack(group_idx, vals), vals[group_idx])
18 | 
19 | 
20 | def test_unpack_long():
21 |     group_idx = np.repeat(np.arange(10000), 20)
22 |     vals = np.random.randn(np.max(group_idx) + 1)
23 |     np.testing.assert_array_equal(unpack(group_idx, vals), vals[group_idx])
24 | 


--------------------------------------------------------------------------------
/numpy_groupies/utils.py:
--------------------------------------------------------------------------------
  1 | """Common functionality for all aggregate implementations."""
  2 | 
  3 | import platform
  4 | import numpy as np
  5 | 
  6 | aggregate_common_doc = """
  7 |     See readme file at https://github.com/ml31415/numpy-groupies for a full
  8 |     description.  Below we reproduce the "Full description of inputs"
  9 |     section from that readme, note that the text below makes references to
 10 |     other portions of the readme that are not shown here.
 11 | 
 12 |     group_idx:
 13 |         this is an array of non-negative integers, to be used as the "labels"
 14 |         with which to group the values in ``a``. Although we have so far
 15 |         assumed that ``group_idx`` is one-dimensional, and the same length as
 16 |         ``a``, it can in fact be two-dimensional (or some form of nested
 17 |         sequences that can be converted to 2D).  When ``group_idx`` is 2D, the
 18 |         size of the 0th dimension corresponds to the number of dimensions in
 19 |         the output, i.e. ``group_idx[i,j]`` gives the index into the ith
 20 |         dimension in the output
 21 |         for ``a[j]``.  Note that ``a`` should still be 1D (or scalar), with
 22 |         length matching ``group_idx.shape[1]``.
 23 |     a:
 24 |         this is the array of values to be aggregated.  See above for a
 25 |         simple demonstration of what this means.  ``a`` will normally be a
 26 |         one-dimensional array, however it can also be a scalar in some cases.
 27 |     func: default='sum'
 28 |         the function to use for aggregation.  See the section above for
 29 |         details. Note that the simplest way to specify the function is using a
 30 |         string (e.g. ``func='max'``) however a number of aliases are also
 31 |         defined (e.g. you can use the ``func=np.max``, or even ``func=max``,
 32 |         where ``max`` is the
 33 |         builtin function).  To check the available aliases see ``utils.py``.
 34 |     size: default=None
 35 |         the shape of the output array. If ``None``, the maximum value in
 36 |         ``group_idx`` will set the size of the output.  Note that for
 37 |         multidimensional output you need to list the size of each dimension
 38 |         here, or give ``None``.
 39 |     fill_value: default=0
 40 |         in the example above, group 2 does not have any data, so requires some
 41 |         kind of filling value - in this case the default of ``0`` is used.  If
 42 |         you had set ``fill_value=nan`` or something else, that value would
 43 |         appear instead of ``0`` for the 2 element in the output.  Note that
 44 |         there are some subtle interactions between what is permitted for
 45 |         ``fill_value`` and the input/output ``dtype`` - exceptions should be
 46 |         raised in most cases to alert the programmer if issue arrise.
 47 |     order: default='C'
 48 |         this is relevant only for multimensional output.  It controls the
 49 |         layout of the output array in memory, can be ``'F'`` for fortran-style.
 50 |     dtype: default=None
 51 |         the ``dtype`` of the output.  By default something sensible is chosen
 52 |         based on the input, aggregation function, and ``fill_value``.
 53 |     ddof: default=0
 54 |         passed through into calculations of variance and standard deviation
 55 |         (see above).
 56 | """
 57 | 
 58 | funcs_common = "first last len mean var std allnan anynan max min argmax argmin sumofsquares cumsum cumprod cummax cummin".split()
 59 | funcs_no_separate_nan = frozenset(["sort", "rsort", "array", "allnan", "anynan"])
 60 | 
 61 | 
 62 | _alias_str = {
 63 |     "or": "any",
 64 |     "and": "all",
 65 |     "add": "sum",
 66 |     "count": "len",
 67 |     "plus": "sum",
 68 |     "multiply": "prod",
 69 |     "product": "prod",
 70 |     "times": "prod",
 71 |     "amax": "max",
 72 |     "maximum": "max",
 73 |     "amin": "min",
 74 |     "minimum": "min",
 75 |     "split": "array",
 76 |     "splice": "array",
 77 |     "sorted": "sort",
 78 |     "asort": "sort",
 79 |     "asorted": "sort",
 80 |     "rsorted": "sort",
 81 |     "dsort": "sort",
 82 |     "dsorted": "rsort",
 83 | }
 84 | 
 85 | _alias_builtin = {
 86 |     all: "all",
 87 |     any: "any",
 88 |     len: "len",
 89 |     max: "max",
 90 |     min: "min",
 91 |     sum: "sum",
 92 |     sorted: "sort",
 93 |     slice: "array",
 94 |     list: "array",
 95 | }
 96 | 
 97 | 
 98 | _alias_numpy = {
 99 |     np.add: "sum",
100 |     np.sum: "sum",
101 |     np.any: "any",
102 |     np.all: "all",
103 |     np.multiply: "prod",
104 |     np.prod: "prod",
105 |     np.amin: "min",
106 |     np.min: "min",
107 |     np.minimum: "min",
108 |     np.amax: "max",
109 |     np.max: "max",
110 |     np.maximum: "max",
111 |     np.argmax: "argmax",
112 |     np.argmin: "argmin",
113 |     np.mean: "mean",
114 |     np.std: "std",
115 |     np.var: "var",
116 |     np.array: "array",
117 |     np.asarray: "array",
118 |     np.sort: "sort",
119 |     np.cumsum: "cumsum",
120 |     np.cumprod: "cumprod",
121 |     np.nansum: "nansum",
122 |     np.nanprod: "nanprod",
123 |     np.nanmean: "nanmean",
124 |     np.nanvar: "nanvar",
125 |     np.nanmax: "nanmax",
126 |     np.nanmin: "nanmin",
127 |     np.nanstd: "nanstd",
128 |     np.nanargmax: "nanargmax",
129 |     np.nanargmin: "nanargmin",
130 |     np.nancumsum: "nancumsum",
131 | }
132 | 
133 | 
134 | def get_aliasing(*extra):
135 |     """
136 |     Assembles a dictionary that maps both strings and functions to a list of supported function names.
137 | 
138 |     Examples:
139 |         alias['add'] = 'sum'
140 |         alias[sorted] = 'sort'
141 | 
142 |     This function should only be called during import.
143 |     """
144 |     alias = dict((k, k) for k in funcs_common)
145 |     alias.update(_alias_str)
146 |     alias.update((fn, fn) for fn in _alias_builtin.values())
147 |     alias.update(_alias_builtin)
148 |     for d in extra:
149 |         alias.update(d)
150 |     alias.update((k, k) for k in set(alias.values()))
151 |     # Treat nan-functions as firstclass member and add them directly
152 |     for key in set(alias.values()):
153 |         if key not in funcs_no_separate_nan and not key.startswith("nan"):
154 |             key = "nan" + key
155 |             alias[key] = key
156 |     return alias
157 | 
158 | 
159 | aliasing_py = get_aliasing()
160 | aliasing = get_aliasing(_alias_numpy)
161 | 
162 | 
163 | def get_func(func, aliasing, implementations):
164 |     """Return the key of a found implementation or the func itself"""
165 |     try:
166 |         func_str = aliasing[func]
167 |     except KeyError:
168 |         if callable(func):
169 |             return func
170 |     else:
171 |         if func_str in implementations:
172 |             return func_str
173 |         if func_str.startswith("nan") and func_str[3:] in funcs_no_separate_nan:
174 |             raise ValueError(f"{func_str[3:]} does not have a nan-version")
175 |         else:
176 |             raise NotImplementedError("No such function available")
177 |     raise ValueError(
178 |         f"func {func} is neither a valid function string nor a callable object"
179 |     )
180 | 
181 | 
182 | def check_boolean(x):
183 |     if x not in (0, 1):
184 |         raise ValueError("Value not boolean")
185 | 
186 | 
187 | _next_int_dtype = dict(
188 |     bool=np.int8,
189 |     uint8=np.int16,
190 |     int8=np.int16,
191 |     uint16=np.int32,
192 |     int16=np.int32,
193 |     uint32=np.int64,
194 |     int32=np.int64,
195 | )
196 | 
197 | _next_float_dtype = dict(
198 |     float16=np.float32,
199 |     float32=np.float64,
200 |     float64=np.complex64,
201 |     complex64=np.complex128,
202 | )
203 | 
204 | 
205 | def minimum_dtype(x, dtype=np.bool_):
206 |     """
207 |     Returns the "most basic" dtype which represents `x` properly, which provides at least the same
208 |     value range as the specified dtype.
209 |     """
210 | 
211 |     def check_type(x, dtype):
212 |         try:
213 |             with np.errstate(invalid="ignore"):
214 |                 converted = np.array(x).astype(dtype)
215 |         except (ValueError, OverflowError, RuntimeWarning):
216 |             return False
217 |         # False if some overflow has happened
218 |         return converted == x or np.isnan(x)
219 | 
220 |     def type_loop(x, dtype, dtype_dict, default=None):
221 |         while True:
222 |             try:
223 |                 dtype = np.dtype(dtype_dict[dtype.name])
224 |                 if check_type(x, dtype):
225 |                     return np.dtype(dtype)
226 |             except KeyError:
227 |                 if default is not None:
228 |                     return np.dtype(default)
229 |                 raise ValueError(f"Can not determine dtype of {x!r}")
230 | 
231 |     dtype = np.dtype(dtype)
232 |     if check_type(x, dtype):
233 |         return dtype
234 | 
235 |     if np.issubdtype(dtype, np.inexact):
236 |         return type_loop(x, dtype, _next_float_dtype)
237 |     else:
238 |         return type_loop(x, dtype, _next_int_dtype, default=np.float32)
239 | 
240 | 
241 | def minimum_dtype_scalar(x, dtype, a):
242 |     if dtype is None:
243 |         dtype = np.dtype(type(a)) if isinstance(a, (int, float)) else a.dtype
244 |     return minimum_dtype(x, dtype)
245 | 
246 | 
247 | _forced_types = {
248 |     "array": object,
249 |     "all": bool,
250 |     "any": bool,
251 |     "nanall": bool,
252 |     "nanany": bool,
253 |     "len": np.int64,
254 |     "nanlen": np.int64,
255 |     "allnan": bool,
256 |     "anynan": bool,
257 |     "argmax": np.int64,
258 |     "argmin": np.int64,
259 |     "nanargmin": np.int64,
260 |     "nanargmax": np.int64,
261 | }
262 | if platform.architecture()[0] == "32bit":
263 |     _forced_types = {
264 |         "array": object,
265 |         "all": bool,
266 |         "any": bool,
267 |         "nanall": bool,
268 |         "nanany": bool,
269 |         "len": np.int32,
270 |         "nanlen": np.int32,
271 |         "allnan": bool,
272 |         "anynan": bool,
273 |         "argmax": np.int32,
274 |         "argmin": np.int32,
275 |         "nanargmin": np.int32,
276 |         "nanargmax": np.int32,
277 |     }
278 | _forced_float_types = {"mean", "var", "std", "nanmean", "nanvar", "nanstd"}
279 | _forced_same_type = {
280 |     "min",
281 |     "max",
282 |     "first",
283 |     "last",
284 |     "nanmin",
285 |     "nanmax",
286 |     "nanfirst",
287 |     "nanlast",
288 | }
289 | 
290 | 
291 | def check_dtype(dtype, func_str, a, n):
292 |     if np.isscalar(a) or not a.shape:
293 |         if func_str not in ("sum", "prod", "len"):
294 |             raise ValueError(
295 |                 "scalar inputs are supported only for 'sum', 'prod' and 'len'"
296 |             )
297 |         a_dtype = np.dtype(type(a))
298 |     else:
299 |         a_dtype = a.dtype
300 | 
301 |     if dtype is not None:
302 |         # dtype set by the user
303 |         # Careful here: np.bool != np.bool_ !
304 |         if np.issubdtype(dtype, np.bool_) and not (
305 |             "all" in func_str or "any" in func_str
306 |         ):
307 |             raise TypeError(
308 |                 f"function {func_str} requires a more complex datatype than bool"
309 |             )
310 |         if not np.issubdtype(dtype, np.integer) and func_str in ("len", "nanlen"):
311 |             raise TypeError(f"function {func_str} requires an integer datatype")
312 |         # TODO: Maybe have some more checks here
313 |         return np.dtype(dtype)
314 |     else:
315 |         try:
316 |             return np.dtype(_forced_types[func_str])
317 |         except KeyError:
318 |             if func_str in _forced_float_types:
319 |                 if np.issubdtype(a_dtype, np.floating):
320 |                     return a_dtype
321 |                 else:
322 |                     return np.dtype(np.float64)
323 |             else:
324 |                 if func_str == "sum":
325 |                     # Try to guess the minimally required int size
326 |                     if np.issubdtype(a_dtype, np.int64):
327 |                         # It's not getting bigger anymore
328 |                         # TODO: strictly speaking it might need float
329 |                         return np.dtype(np.int64)
330 |                     elif np.issubdtype(a_dtype, np.integer):
331 |                         maxval = np.iinfo(a_dtype).max * n
332 |                         return minimum_dtype(maxval, a_dtype)
333 |                     elif np.issubdtype(a_dtype, np.bool_):
334 |                         return minimum_dtype(n, a_dtype)
335 |                     else:
336 |                         # floating, inexact, whatever
337 |                         return a_dtype
338 |                 elif func_str in _forced_same_type:
339 |                     return a_dtype
340 |                 else:
341 |                     if isinstance(a_dtype, np.integer):
342 |                         return np.dtype(np.int64)
343 |                     else:
344 |                         return a_dtype
345 | 
346 | 
347 | def minval(fill_value, dtype):
348 |     dtype = minimum_dtype(fill_value, dtype)
349 |     if issubclass(dtype.type, np.floating):
350 |         return -np.inf
351 |     if issubclass(dtype.type, np.integer):
352 |         return np.iinfo(dtype).min
353 |     return np.finfo(dtype).min
354 | 
355 | 
356 | def maxval(fill_value, dtype):
357 |     dtype = minimum_dtype(fill_value, dtype)
358 |     if issubclass(dtype.type, np.floating):
359 |         return np.inf
360 |     if issubclass(dtype.type, np.integer):
361 |         return np.iinfo(dtype).max
362 |     return np.finfo(dtype).max
363 | 
364 | 
365 | def check_fill_value(fill_value, dtype, func=None):
366 |     if func in ("all", "any", "allnan", "anynan"):
367 |         check_boolean(fill_value)
368 |     else:
369 |         try:
370 |             return dtype.type(fill_value)
371 |         except ValueError:
372 |             raise ValueError(
373 |                 f"fill_value must be convertible into {dtype.type.__name__}"
374 |             )
375 | 
376 | 
377 | def check_group_idx(group_idx, a=None, check_min=True):
378 |     if a is not None and group_idx.size != a.size:
379 |         raise ValueError("The size of group_idx must be the same as a.size")
380 |     if not issubclass(group_idx.dtype.type, np.integer):
381 |         raise TypeError("group_idx must be of integer type")
382 |     if check_min and np.min(group_idx) < 0:
383 |         raise ValueError("group_idx contains negative indices")
384 | 
385 | 
386 | def _ravel_group_idx(group_idx, a, axis, size, order, method="ravel"):
387 |     ndim_a = a.ndim
388 |     # Create the broadcast-ready multidimensional indexing.
389 |     # Note the user could do this themselves, so this is
390 |     # very much just a convenience.
391 |     size_in = int(np.max(group_idx)) + 1 if size is None else size
392 |     group_idx_in = group_idx
393 |     group_idx = []
394 |     size = []
395 |     for ii, s in enumerate(a.shape):
396 |         if method == "ravel":
397 |             ii_idx = group_idx_in if ii == axis else np.arange(s)
398 |             ii_shape = [1] * ndim_a
399 |             ii_shape[ii] = s
400 |             group_idx.append(ii_idx.reshape(ii_shape))
401 |         size.append(size_in if ii == axis else s)
402 |     # Use the indexing, and return. It's a bit simpler than
403 |     # using trying to keep all the logic below happy
404 |     if method == "ravel":
405 |         group_idx = np.ravel_multi_index(group_idx, size, order=order, mode="raise")
406 |     elif method == "offset":
407 |         group_idx = offset_labels(group_idx_in, a.shape, axis, order, size_in)
408 |     return group_idx, size
409 | 
410 | 
411 | def offset_labels(group_idx, inshape, axis, order, size):
412 |     """
413 |     Offset group labels by dimension. This is used when we reduce over a subset of the dimensions of
414 |     group_idx. It assumes that the reductions dimensions have been flattened in the last dimension
415 |     Copied from
416 |     https://stackoverflow.com/questions/46256279/bin-elements-per-row-vectorized-2d-bincount-for-numpy
417 |     """
418 | 
419 |     newaxes = tuple(ax for ax in range(len(inshape)) if ax != axis)
420 |     group_idx = np.broadcast_to(np.expand_dims(group_idx, newaxes), inshape)
421 |     if axis not in (-1, len(inshape) - 1):
422 |         group_idx = np.moveaxis(group_idx, axis, -1)
423 |     newshape = group_idx.shape[:-1] + (-1,)
424 | 
425 |     group_idx = (
426 |         group_idx
427 |         + np.arange(np.prod(newshape[:-1]), dtype=int).reshape(newshape) * size
428 |     )
429 |     if axis not in (-1, len(inshape) - 1):
430 |         return np.moveaxis(group_idx, -1, axis)
431 |     else:
432 |         return group_idx
433 | 
434 | 
435 | def input_validation(
436 |     group_idx,
437 |     a,
438 |     size=None,
439 |     order="C",
440 |     axis=None,
441 |     ravel_group_idx=True,
442 |     check_bounds=True,
443 |     func=None,
444 | ):
445 |     """
446 |     Do some fairly extensive checking of group_idx and a, trying to give the user as much help as
447 |     possible with what is wrong. Also, convert ndim-indexing to 1d indexing.
448 |     """
449 |     if not isinstance(a, (int, float, complex)) and not is_duck_array(a):
450 |         a = np.asanyarray(a)
451 |     if not is_duck_array(group_idx):
452 |         group_idx = np.asanyarray(group_idx)
453 | 
454 |     if not np.issubdtype(group_idx.dtype, np.integer):
455 |         raise TypeError("group_idx must be of integer type")
456 | 
457 |     # This check works for multidimensional indexing as well
458 |     if check_bounds and np.any(group_idx < 0):
459 |         raise ValueError("negative indices not supported")
460 | 
461 |     ndim_idx = np.ndim(group_idx)
462 |     ndim_a = np.ndim(a)
463 | 
464 |     # Deal with the axis arg: if present, then turn 1d indexing into
465 |     # multi-dimensional indexing along the specified axis.
466 |     if axis is None:
467 |         if ndim_a > 1:
468 |             raise ValueError(
469 |                 "a must be scalar or 1 dimensional, use .ravel to flatten. Alternatively specify axis."
470 |             )
471 |     elif axis >= ndim_a or axis < -ndim_a:
472 |         raise ValueError("axis arg too large for np.ndim(a)")
473 |     else:
474 |         axis = axis if axis >= 0 else ndim_a + axis  # negative indexing
475 |         if ndim_idx > 1:
476 |             # TODO: we could support a sequence of axis values for multiple
477 |             # dimensions of group_idx.
478 |             raise NotImplementedError(
479 |                 "only 1d indexing currently supported with axis arg."
480 |             )
481 |         elif a.shape[axis] != len(group_idx):
482 |             raise ValueError("a.shape[axis] doesn't match length of group_idx.")
483 |         elif size is not None and not np.isscalar(size):
484 |             raise NotImplementedError(
485 |                 "when using axis arg, size must be None or scalar."
486 |             )
487 |         else:
488 |             is_form_3 = group_idx.ndim == 1 and a.ndim > 1 and axis is not None
489 |             orig_shape = a.shape if is_form_3 else group_idx.shape
490 |             if isinstance(func, str) and "arg" in func:
491 |                 unravel_shape = orig_shape
492 |             else:
493 |                 unravel_shape = None
494 | 
495 |             method = "offset" if axis == ndim_a - 1 else "ravel"
496 |             group_idx, size = _ravel_group_idx(
497 |                 group_idx, a, axis, size, order, method=method
498 |             )
499 |             flat_size = np.prod(size)
500 |             ndim_idx = ndim_a
501 |             size = (
502 |                 orig_shape
503 |                 if is_form_3 and not callable(func) and "cum" in func
504 |                 else size
505 |             )
506 |             return (
507 |                 group_idx.ravel(),
508 |                 a.ravel(),
509 |                 flat_size,
510 |                 ndim_idx,
511 |                 size,
512 |                 unravel_shape,
513 |             )
514 | 
515 |     if ndim_idx == 1:
516 |         if size is None:
517 |             size = int(np.max(group_idx)) + 1
518 |         else:
519 |             if not np.isscalar(size):
520 |                 raise ValueError("output size must be scalar or None")
521 |             if check_bounds and np.any(group_idx > size - 1):
522 |                 raise ValueError(f"one or more indices are too large for size {size}")
523 |         flat_size = size
524 |     else:
525 |         if size is None:
526 |             size = np.max(group_idx, axis=1).astype(int) + 1
527 |         elif np.isscalar(size):
528 |             raise ValueError(f"output size must be of length {len(group_idx)}")
529 |         elif len(size) != len(group_idx):
530 |             raise ValueError(
531 |                 f"{len(size)} sizes given, but {len(group_idx)} output dimensions specified in index"
532 |             )
533 |         if ravel_group_idx:
534 |             group_idx = np.ravel_multi_index(group_idx, size, order=order, mode="raise")
535 |         flat_size = np.prod(size)
536 | 
537 |     if not (np.ndim(a) == 0 or len(a) == group_idx.size):
538 |         raise ValueError(
539 |             "group_idx and a must be of the same length, or a can be scalar"
540 |         )
541 | 
542 |     return group_idx, a, flat_size, ndim_idx, size, None
543 | 
544 | 
545 | # General tools
546 | 
547 | 
548 | def unpack(group_idx, ret):
549 |     """
550 |     Take an aggregate packed array and uncompress it to the size of group_idx. This is equivalent to
551 |     ret[group_idx].
552 |     """
553 |     return ret[group_idx]
554 | 
555 | 
556 | def allnan(x):
557 |     return np.all(np.isnan(x))
558 | 
559 | 
560 | def anynan(x):
561 |     return np.any(np.isnan(x))
562 | 
563 | 
564 | def nanfirst(x):
565 |     return x[~np.isnan(x)][0]
566 | 
567 | 
568 | def nanlast(x):
569 |     return x[~np.isnan(x)][-1]
570 | 
571 | 
572 | def multi_arange(n):
573 |     """By example:
574 | 
575 |         #    0  1  2  3  4  5  6  7  8
576 |         n = [0, 0, 3, 0, 0, 2, 0, 2, 1]
577 |         res = [0, 1, 2, 0, 1, 0, 1, 0]
578 | 
579 |     That is it is equivalent to something like this :
580 | 
581 |         hstack((arange(n_i) for n_i in n))
582 | 
583 |     This version seems quite a bit faster, at least for some possible inputs, and at any rate it
584 |     encapsulates a task in a function.
585 |     """
586 |     if n.ndim != 1:
587 |         raise ValueError("n is supposed to be 1d array.")
588 | 
589 |     n_mask = n.astype(bool)
590 |     n_cumsum = np.cumsum(n)
591 |     ret = np.ones(n_cumsum[-1] + 1, dtype=int)
592 |     ret[n_cumsum[n_mask]] -= n[n_mask]
593 |     ret[0] -= 1
594 |     return np.cumsum(ret)[:-1]
595 | 
596 | 
597 | def label_contiguous_1d(X):
598 |     """
599 |     WARNING: API for this function is not liable to change!!!
600 | 
601 |     By example:
602 | 
603 |         X =      [F T T F F T F F F T T T]
604 |         result = [0 1 1 0 0 2 0 0 0 3 3 3]
605 | 
606 |     Or:
607 |         X =      [0 3 3 0 0 5 5 5 1 1 0 2]
608 |         result = [0 1 1 0 0 2 2 2 3 3 0 4]
609 | 
610 |     The ``0`` or ``False`` elements of ``X`` are labeled as ``0`` in the output. If ``X`` is a boolean
611 |     array, each contiguous block of ``True`` is given an integer label, if ``X`` is not boolean, then
612 |     each contiguous block of identical values is given an integer label. Integer labels are 1, 2, 3,
613 |     ..... (i.e. start a 1 and increase by 1 for each block with no skipped numbers.)
614 |     """
615 | 
616 |     if X.ndim != 1:
617 |         raise ValueError("this is for 1d masks only.")
618 | 
619 |     is_start = np.empty(len(X), dtype=bool)
620 |     is_start[0] = X[0]  # True if X[0] is True or non-zero
621 | 
622 |     if X.dtype.kind == "b":
623 |         is_start[1:] = ~X[:-1] & X[1:]
624 |         M = X
625 |     else:
626 |         M = X.astype(bool)
627 |         is_start[1:] = X[:-1] != X[1:]
628 |         is_start[~M] = False
629 | 
630 |     L = np.cumsum(is_start)
631 |     L[~M] = 0
632 |     return L
633 | 
634 | 
635 | def relabel_groups_unique(group_idx):
636 |     """
637 |     See also ``relabel_groups_masked``.
638 | 
639 |     keep_group:  [0 3 3 3 0 2 5 2 0 1 1 0 3 5 5]
640 |     ret:         [0 3 3 3 0 2 4 2 0 1 1 0 3 4 4]
641 | 
642 |     Description of above: unique groups in input was ``1,2,3,5``, i.e.
643 |     ``4`` was missing, so group 5 was relabled to be ``4``.
644 |     Relabeling maintains order, just "compressing" the higher numbers
645 |     to fill gaps.
646 |     """
647 | 
648 |     keep_group = np.zeros(np.max(group_idx) + 1, dtype=bool)
649 |     keep_group[0] = True
650 |     keep_group[group_idx] = True
651 |     return relabel_groups_masked(group_idx, keep_group)
652 | 
653 | 
654 | def relabel_groups_masked(group_idx, keep_group):
655 |     """
656 |     group_idx: [0 3 3 3 0 2 5 2 0 1 1 0 3 5 5]
657 | 
658 |                  0 1 2 3 4 5
659 |     keep_group: [0 1 0 1 1 1]
660 | 
661 |     ret:       [0 2 2 2 0 0 4 0 0 1 1 0 2 4 4]
662 | 
663 |     Description of above in words: remove group 2, and relabel group 3,4, and 5 to be 2, 3 and 4
664 |     respectively, in order to fill the gap.  Note that group 4 was never used in the input group_idx,
665 |     but the user supplied mask said to keep group 4, so group 5 is only moved up by one place to fill
666 |     the gap created by removing group 2.
667 | 
668 |     That is, the mask describes which groups to remove, the remaining groups are relabled to remove the
669 |     gaps created by the falsy elements in ``keep_group``. Note that ``keep_group[0]`` has no particular
670 |     meaning because it refers to the zero group which cannot be "removed".
671 | 
672 |     ``keep_group`` should be bool and ``group_idx`` int. Values in ``group_idx`` can be any order.
673 |     """
674 | 
675 |     keep_group = keep_group.astype(bool, copy=not keep_group[0])
676 |     if not keep_group[0]:  # ensuring keep_group[0] is True makes life easier
677 |         keep_group[0] = True
678 | 
679 |     relabel = np.zeros(keep_group.size, dtype=group_idx.dtype)
680 |     relabel[keep_group] = np.arange(np.count_nonzero(keep_group))
681 |     return relabel[group_idx]
682 | 
683 | 
684 | def is_duck_array(value):
685 |     """This function was copied from xarray/core/utils.py under the terms of Xarray's Apache-2 license."""
686 | 
687 |     if isinstance(value, np.ndarray):
688 |         return True
689 |     return (
690 |         hasattr(value, "ndim")
691 |         and hasattr(value, "shape")
692 |         and hasattr(value, "dtype")
693 |         and hasattr(value, "__array_function__")
694 |         and hasattr(value, "__array_ufunc__")
695 |     )
696 | 
697 | 
698 | def iscomplexobj(x):
699 |     """Copied from np.iscomplexobj so that we place fewer requirements on duck array types."""
700 | 
701 |     try:
702 |         dtype = x.dtype
703 |         type_ = dtype.type
704 |     except AttributeError:
705 |         type_ = np.asarray(x).dtype.type
706 |     return issubclass(type_, np.complexfloating)
707 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools", "setuptools-scm"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "numpy-groupies"
 7 | description = "Optimised tools for group-indexing operations: aggregated sum and more."
 8 | dynamic = ["version"]
 9 | readme = {file = "README.md", content-type = "text/markdown"}
10 | license = {file = "LICENSE.txt"}
11 | authors = [
12 |     {name = "Michael Löffler", email = "ml@occam.com.ua"},
13 |     {name = "Daniel Manson", email = "danielmanson.uk@gmail.com"}
14 | ]
15 | maintainers = [
16 |     {name = "Deepak Cherian", email = "dcherian@ucar.edu"}
17 | ]
18 | classifiers = [
19 |     "Development Status :: 4 - Beta",
20 |     "Intended Audience :: Science/Research",
21 |     "Intended Audience :: Developers",
22 |     "Operating System :: OS Independent",
23 |     "Programming Language :: Python :: 3",
24 |     "Programming Language :: Python :: 3.9",
25 |     "Programming Language :: Python :: 3.10",
26 |     "Programming Language :: Python :: 3.11",
27 |     "Programming Language :: Python :: 3.12",
28 |     "Topic :: Scientific/Engineering",
29 |     "Topic :: Software Development :: Libraries",
30 |     "License :: OSI Approved :: BSD License",
31 | ]
32 | keywords = ["accumarray", "aggregate", "groupby", "grouping", "indexing"]
33 | requires-python = ">=3.9"
34 | dependencies = ["numpy"]
35 | 
36 | [project.optional-dependencies]
37 | fast = [
38 |     "numba",
39 | ]
40 | dev = [
41 |     "pytest",
42 |     "numba",
43 |     "pandas",
44 | ]
45 | 
46 | [project.urls]
47 | source = "https://github.com/ml31415/numpy-groupies"
48 | tracker = "https://github.com/ml31415/numpy-groupies/issues"
49 | 
50 | [tool.black]
51 | line-length = 120
52 | 
53 | [tool.isort]
54 | profile = "black"
55 | honor_noqa = true
56 | 
57 | [tool.setuptools.packages.find]
58 | include = ["numpy_groupies*"]
59 | 
60 | [tool.setuptools_scm]
61 | write_to = "numpy_groupies/_version.py"
62 | 
63 | [tool.ruff.lint.per-file-ignores]
64 | "__init__.py" = ["F401"]
65 | 
66 | [tool.codespell]
67 | ignore-words-list = "nd,"
68 | ignore-regex = ".*codespell-ignore$"
69 | 


--------------------------------------------------------------------------------