├── tests
├── __init__.py
├── RunningStats.h
├── RunningRegression.h
├── main.cpp
├── RunningRegression.cpp
├── RunningStats.cpp
├── benchmark.py
├── __main__.py
└── test_runstats.py
├── MANIFEST.in
├── mypy.ini
├── docs
├── index.rst
├── _static
│ └── gj-logo.png
├── _templates
│ └── gumroad.html
├── api.rst
├── Makefile
├── make.bat
└── conf.py
├── requirements.txt
├── .gitignore
├── runstats
├── __init__.py
├── core.pxd
└── core.py
├── LICENSE
├── .github
└── workflows
│ ├── release.yml
│ └── integration.yml
├── tox.ini
├── setup.py
├── README.rst
└── .pylintrc
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.rst LICENSE
2 |
--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 |
3 | [mypy-runstats._core]
4 | ignore_missing_imports = True
5 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../README.rst
2 |
3 | .. toctree::
4 | :hidden:
5 |
6 | api
7 |
--------------------------------------------------------------------------------
/docs/_static/gj-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grantjenks/python-runstats/HEAD/docs/_static/gj-logo.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | -e .
2 | blue
3 | coverage
4 | cython
5 | doc8
6 | flake8
7 | isort
8 | mypy
9 | pylint
10 | pytest
11 | pytest-cov
12 | pytest-xdist
13 | rstcheck
14 | sphinx
15 | tox
16 | twine
17 | wheel
18 |
--------------------------------------------------------------------------------
/docs/_templates/gumroad.html:
--------------------------------------------------------------------------------
1 |
Give Support
2 | If you or your organization uses RunStats, consider financial support:
3 |
4 | Give to Python RunStats
5 |
6 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Generated files
2 | *.py[co]
3 | *.c
4 | *.so
5 |
6 | # Virtual environment directories
7 | /env*/
8 |
9 | # Test files and directories
10 | .coverage
11 | .pytest_cache/
12 | /.tox/
13 |
14 | # Setup directories
15 | /build/
16 | /dist/
17 | /runstats.egg-info/
18 | /docs/_build/
19 |
20 | # macOS metadata
21 | .DS_Store
22 |
--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
1 | .. automodule:: runstats
2 |
3 |
4 | Statistics
5 | ..........
6 |
7 | .. autoclass:: runstats.Statistics
8 | :members:
9 | :special-members:
10 |
11 |
12 | Regression
13 | ..........
14 |
15 | .. autoclass:: runstats.Regression
16 | :members:
17 | :special-members:
18 |
19 |
20 | ExponentialStatistics
21 | .....................
22 |
23 | .. autoclass:: runstats.ExponentialStatistics
24 | :members:
25 | :special-members:
26 |
--------------------------------------------------------------------------------
/runstats/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Python RunStats API
3 | ===================
4 |
5 | Online statistics and regression.
6 |
7 | """
8 |
9 | try:
10 | from ._core import ExponentialStatistics, Regression, Statistics
11 | except ImportError: # pragma: no cover
12 | from .core import ExponentialStatistics, Regression, Statistics
13 |
14 | __all__ = ['Statistics', 'Regression', 'ExponentialStatistics']
15 | __title__ = 'runstats'
16 | __version__ = '2.0.0'
17 | __author__ = 'Grant Jenks'
18 | __license__ = 'Apache 2.0'
19 | __copyright__ = '2013-2021, Grant Jenks'
20 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2013-2021 Grant Jenks
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use
4 | this file except in compliance with the License. You may obtain a copy of the
5 | License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software distributed
10 | under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11 | CONDITIONS OF ANY KIND, either express or implied. See the License for the
12 | specific language governing permissions and limitations under the License.
13 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/tests/RunningStats.h:
--------------------------------------------------------------------------------
1 | // Copied from https://www.johndcook.com/blog/skewness_kurtosis/
2 |
3 | #ifndef RUNNINGSTATS_H
4 | #define RUNNINGSTATS_H
5 |
6 | class RunningStats
7 | {
8 | public:
9 | RunningStats();
10 | void Clear();
11 | void Push(double x);
12 | long long NumDataValues() const;
13 | double Mean() const;
14 | double Variance() const;
15 | double StandardDeviation() const;
16 | double Skewness() const;
17 | double Kurtosis() const;
18 |
19 | friend RunningStats operator+(const RunningStats a, const RunningStats b);
20 | RunningStats& operator+=(const RunningStats &rhs);
21 |
22 | private:
23 | long long n;
24 | double M1, M2, M3, M4;
25 | };
26 |
27 | #endif
28 |
--------------------------------------------------------------------------------
/tests/RunningRegression.h:
--------------------------------------------------------------------------------
1 | // Copied from https://www.johndcook.com/blog/running_regression/
2 |
3 | #ifndef RUNNINGREGRESSION
4 | #define RUNNINGREGRESSION
5 |
6 | #include "RunningStats.h"
7 |
8 | class RunningRegression
9 | {
10 | public:
11 | RunningRegression();
12 | void Clear();
13 | void Push(double x, double y);
14 | long long NumDataValues() const;
15 | double Slope() const;
16 | double Intercept() const;
17 | double Correlation() const;
18 |
19 | friend RunningRegression operator+(
20 | const RunningRegression a, const RunningRegression b);
21 | RunningRegression& operator+=(const RunningRegression &rhs);
22 |
23 | private:
24 | RunningStats x_stats;
25 | RunningStats y_stats;
26 | double S_xy;
27 | long long n;
28 | };
29 |
30 | #endif
31 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/tests/main.cpp:
--------------------------------------------------------------------------------
1 | // Test program for C++ counterpart.
2 | //
3 | // Compile with:
4 | //
5 | // $ g++ main.cpp RunningStats.cpp RunningRegression.cpp
6 | //
7 | // Test with:
8 | //
9 | // $ ./a.out 5 4 3 2 1
10 | // $ python -m tests 5 4 3 2 1
11 | //
12 |
13 | #include
14 | #include
15 |
16 | #include "RunningStats.h"
17 | #include "RunningRegression.h"
18 |
19 | int main(int argc, char ** argv)
20 | {
21 | RunningStats stats = RunningStats();
22 |
23 | for (int index = 1; index < argc; index += 1)
24 | {
25 | double value = std::stod(std::string(argv[index]));
26 | stats.Push(value);
27 | }
28 |
29 | printf("Statistics\n");
30 | printf("Count: %lld\n", stats.NumDataValues());
31 | printf("Mean: %f\n", stats.Mean());
32 | printf("Variance: %f\n", stats.Variance());
33 | printf("StdDev: %f\n", stats.StandardDeviation());
34 | printf("Skewness: %f\n", stats.Skewness());
35 | printf("Kurtosis: %f\n", stats.Kurtosis());
36 |
37 | RunningRegression regr = RunningRegression();
38 |
39 | for (int index = 1; index < argc; index += 1)
40 | {
41 | double value = std::stod(std::string(argv[index]));
42 | regr.Push(index, value);
43 | }
44 |
45 | printf("\n");
46 | printf("Regression\n");
47 | printf("Count: %lld\n", regr.NumDataValues());
48 | printf("Slope: %f\n", regr.Slope());
49 | printf("Intercept: %f\n", regr.Intercept());
50 | printf("Correlation: %f\n", regr.Correlation());
51 |
52 | return 0;
53 | }
54 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: release
2 |
3 | on:
4 | push:
5 | tags:
6 | - v*
7 |
8 | jobs:
9 |
10 | builds:
11 | runs-on: ${{ matrix.os }}
12 | strategy:
13 | matrix:
14 | os: [ubuntu-20.04, windows-2019, macos-10.15]
15 |
16 | steps:
17 | - uses: actions/checkout@v2
18 |
19 | - uses: actions/setup-python@v2
20 |
21 | - name: Install cibuildwheel
22 | run: python -m pip install cibuildwheel==1.11.1.post1
23 |
24 | - name: Build wheels
25 | run: python -m cibuildwheel --output-dir wheelhouse
26 | env:
27 | CIBW_BEFORE_BUILD: pip install cython
28 | CIBW_TEST_REQUIRES: pytest pytest-cov pytest-xdist
29 | CIBW_TEST_COMMAND: pytest {project}/tests && python {project}/tests/benchmark.py
30 | CIBW_SKIP: pp*
31 |
32 | - uses: actions/upload-artifact@v2
33 | with:
34 | name: wheelhouse
35 | path: ./wheelhouse/*.whl
36 |
37 | upload:
38 | needs: builds
39 | runs-on: ubuntu-latest
40 |
41 | steps:
42 | - uses: actions/checkout@v2
43 |
44 | - name: Set up Python
45 | uses: actions/setup-python@v2
46 | with:
47 | python-version: 3.9
48 |
49 | - name: Install dependencies
50 | run: |
51 | pip install --upgrade pip
52 | pip install -r requirements.txt
53 |
54 | - name: Create source dist
55 | run: python setup.py sdist
56 |
57 | - name: Stage wheels
58 | uses: actions/download-artifact@v2
59 | with:
60 | name: wheelhouse
61 | path: wheelhouse
62 | - run: mv -v wheelhouse/* dist/
63 |
64 | - name: Publish package
65 | uses: pypa/gh-action-pypi-publish@release/v1
66 | with:
67 | user: __token__
68 | password: ${{ secrets.PYPI_API_TOKEN }}
69 |
--------------------------------------------------------------------------------
/tests/RunningRegression.cpp:
--------------------------------------------------------------------------------
1 | // Copied from https://www.johndcook.com/blog/running_regression/
2 |
3 | #include "RunningRegression.h"
4 |
5 | RunningRegression::RunningRegression()
6 | {
7 | Clear();
8 | }
9 |
10 | void RunningRegression::Clear()
11 | {
12 | x_stats.Clear();
13 | y_stats.Clear();
14 | S_xy = 0.0;
15 | n = 0;
16 | }
17 |
18 | void RunningRegression::Push(double x, double y)
19 | {
20 | S_xy += (x_stats.Mean() -x)*(y_stats.Mean() - y)*double(n)/double(n+1);
21 |
22 | x_stats.Push(x);
23 | y_stats.Push(y);
24 | n++;
25 | }
26 |
27 | long long RunningRegression::NumDataValues() const
28 | {
29 | return n;
30 | }
31 |
32 | double RunningRegression::Slope() const
33 | {
34 | double S_xx = x_stats.Variance()*(n - 1.0);
35 |
36 | return S_xy / S_xx;
37 | }
38 |
39 | double RunningRegression::Intercept() const
40 | {
41 | return y_stats.Mean() - Slope()*x_stats.Mean();
42 | }
43 |
44 | double RunningRegression::Correlation() const
45 | {
46 | double t = x_stats.StandardDeviation() * y_stats.StandardDeviation();
47 | return S_xy / ( (n-1) * t );
48 | }
49 |
50 | RunningRegression operator+(const RunningRegression a, const RunningRegression b)
51 | {
52 | RunningRegression combined;
53 |
54 | combined.x_stats = a.x_stats + b.x_stats;
55 | combined.y_stats = a.y_stats + b.y_stats;
56 | combined.n = a.n + b.n;
57 |
58 | double delta_x = b.x_stats.Mean() - a.x_stats.Mean();
59 | double delta_y = b.y_stats.Mean() - a.y_stats.Mean();
60 | combined.S_xy = a.S_xy + b.S_xy +
61 | double(a.n*b.n)*delta_x*delta_y/double(combined.n);
62 |
63 | return combined;
64 | }
65 |
66 | RunningRegression& RunningRegression::operator+=(const RunningRegression &rhs)
67 | {
68 | RunningRegression combined = *this + rhs;
69 | *this = combined;
70 | return *this;
71 | }
72 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist=bluecheck,doc8,docs,isortcheck,flake8,mypy,pylint,rstcheck,py36,py37,py38,py39
3 | skip_missing_interpreters=True
4 |
5 | [testenv]
6 | commands=pytest
7 | deps=
8 | pytest
9 | pytest-cov
10 | pytest-xdist
11 |
12 | [testenv:blue]
13 | commands=blue {toxinidir}/setup.py {toxinidir}/runstats {toxinidir}/tests
14 | deps=blue
15 |
16 | [testenv:bluecheck]
17 | commands=blue --check {toxinidir}/setup.py {toxinidir}/runstats {toxinidir}/tests
18 | deps=blue
19 |
20 | [testenv:doc8]
21 | deps=doc8
22 | commands=doc8 docs --ignore-path docs/_build
23 |
24 | [testenv:docs]
25 | allowlist_externals=make
26 | changedir=docs
27 | commands=make html
28 | deps=
29 | sphinx
30 |
31 | [testenv:flake8]
32 | commands=flake8 {toxinidir}/setup.py {toxinidir}/runstats {toxinidir}/tests
33 | deps=flake8
34 |
35 | [testenv:isort]
36 | commands=isort {toxinidir}/setup.py {toxinidir}/runstats {toxinidir}/tests
37 | deps=isort
38 |
39 | [testenv:isortcheck]
40 | commands=isort --check {toxinidir}/setup.py {toxinidir}/runstats {toxinidir}/tests
41 | deps=isort
42 |
43 | [testenv:mypy]
44 | commands=mypy {toxinidir}/runstats
45 | deps=mypy
46 |
47 | [testenv:pylint]
48 | commands=pylint {toxinidir}/runstats
49 | deps=
50 | pylint
51 |
52 | [testenv:rstcheck]
53 | commands=rstcheck {toxinidir}/README.rst
54 | deps=rstcheck
55 |
56 | [testenv:uploaddocs]
57 | allowlist_externals=rsync
58 | changedir=docs
59 | commands=
60 | rsync -azP --stats --delete _build/html/ \
61 | grantjenks.com:/srv/www/www.grantjenks.com/public/docs/runstats/
62 |
63 | [isort]
64 | multi_line_output = 3
65 | include_trailing_comma = True
66 | force_grid_wrap = 0
67 | use_parentheses = True
68 | ensure_newline_before_comments = True
69 | line_length = 79
70 |
71 | [pytest]
72 | addopts=
73 | -n auto
74 | --cov-branch
75 | --cov-fail-under=100
76 | --cov-report=term-missing
77 | --cov=runstats
78 | --doctest-glob="*.rst"
79 | testpaths=docs runstats tests README.rst
80 |
81 | [doc8]
82 | # ignore=D000
83 |
84 | [flake8]
85 | max-line-length=120
86 |
--------------------------------------------------------------------------------
/.github/workflows/integration.yml:
--------------------------------------------------------------------------------
1 | name: integration
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 | pull_request:
8 | branches:
9 | - master
10 |
11 | jobs:
12 |
13 | checks:
14 | runs-on: ubuntu-latest
15 | strategy:
16 | max-parallel: 6
17 | matrix:
18 | check: [bluecheck, doc8, docs, isortcheck, flake8, mypy, pylint, rstcheck]
19 |
20 | steps:
21 | - uses: actions/checkout@v2
22 | - name: Set up Python
23 | uses: actions/setup-python@v2
24 | with:
25 | python-version: 3.9
26 | - name: Install dependencies
27 | run: |
28 | pip install --upgrade pip
29 | pip install tox
30 | - name: Run checks with tox
31 | run: |
32 | tox -e ${{ matrix.check }}
33 |
34 | tests:
35 | needs: checks
36 | runs-on: ${{ matrix.os }}
37 | strategy:
38 | max-parallel: 4
39 | matrix:
40 | os: [ubuntu-latest]
41 | python-version: [3.6, 3.7, 3.8, 3.9]
42 |
43 | steps:
44 | - uses: actions/checkout@v2
45 |
46 | - uses: actions/setup-python@v2
47 | with:
48 | python-version: ${{ matrix.python-version }}
49 |
50 | - name: Install tox
51 | run: |
52 | pip install --upgrade pip
53 | pip install tox
54 |
55 | - name: Test with tox
56 | run: tox -e py
57 |
58 | - name: Install cython and pytest
59 | run: pip install cython pytest pytest-cov pytest-xdist
60 |
61 | - name: Build Cython extension
62 | run: python setup.py build_ext --inplace
63 |
64 | - name: Test with pytest
65 | run: pytest
66 |
67 | - name: Run benchmark
68 | if: matrix.os == 'ubuntu-latest'
69 | run: |
70 | pip install -e .
71 | python tests/benchmark.py
72 |
73 | builds:
74 | needs: tests
75 | runs-on: ${{ matrix.os }}
76 | strategy:
77 | matrix:
78 | os: [ubuntu-20.04, windows-2019, macos-10.15]
79 |
80 | steps:
81 | - uses: actions/checkout@v2
82 |
83 | - uses: actions/setup-python@v2
84 |
85 | - name: Install cibuildwheel
86 | run: python -m pip install cibuildwheel==1.11.1.post1
87 |
88 | - name: Build wheels
89 | run: python -m cibuildwheel --output-dir wheelhouse
90 | env:
91 | CIBW_BEFORE_BUILD: pip install cython
92 | CIBW_TEST_REQUIRES: pytest pytest-cov pytest-xdist
93 | CIBW_TEST_COMMAND: pytest {project}/tests && python {project}/tests/benchmark.py
94 | CIBW_SKIP: pp*
95 |
--------------------------------------------------------------------------------
/tests/RunningStats.cpp:
--------------------------------------------------------------------------------
1 | // Copied from https://www.johndcook.com/blog/skewness_kurtosis/
2 |
3 | #include "RunningStats.h"
4 | #include
5 | #include
6 |
7 | RunningStats::RunningStats()
8 | {
9 | Clear();
10 | }
11 |
12 | void RunningStats::Clear()
13 | {
14 | n = 0;
15 | M1 = M2 = M3 = M4 = 0.0;
16 | }
17 |
18 | void RunningStats::Push(double x)
19 | {
20 | double delta, delta_n, delta_n2, term1;
21 |
22 | long long n1 = n;
23 | n++;
24 | delta = x - M1;
25 | delta_n = delta / n;
26 | delta_n2 = delta_n * delta_n;
27 | term1 = delta * delta_n * n1;
28 | M1 += delta_n;
29 | M4 += term1 * delta_n2 * (n*n - 3*n + 3) + 6 * delta_n2 * M2 - 4 * delta_n * M3;
30 | M3 += term1 * delta_n * (n - 2) - 3 * delta_n * M2;
31 | M2 += term1;
32 | }
33 |
34 | long long RunningStats::NumDataValues() const
35 | {
36 | return n;
37 | }
38 |
39 | double RunningStats::Mean() const
40 | {
41 | return M1;
42 | }
43 |
44 | double RunningStats::Variance() const
45 | {
46 | return M2/(n-1.0);
47 | }
48 |
49 | double RunningStats::StandardDeviation() const
50 | {
51 | return sqrt( Variance() );
52 | }
53 |
54 | double RunningStats::Skewness() const
55 | {
56 | return sqrt(double(n)) * M3/ pow(M2, 1.5);
57 | }
58 |
59 | double RunningStats::Kurtosis() const
60 | {
61 | return double(n)*M4 / (M2*M2) - 3.0;
62 | }
63 |
64 | RunningStats operator+(const RunningStats a, const RunningStats b)
65 | {
66 | RunningStats combined;
67 |
68 | combined.n = a.n + b.n;
69 |
70 | double delta = b.M1 - a.M1;
71 | double delta2 = delta*delta;
72 | double delta3 = delta*delta2;
73 | double delta4 = delta2*delta2;
74 |
75 | combined.M1 = (a.n*a.M1 + b.n*b.M1) / combined.n;
76 |
77 | combined.M2 = a.M2 + b.M2 +
78 | delta2 * a.n * b.n / combined.n;
79 |
80 | combined.M3 = a.M3 + b.M3 +
81 | delta3 * a.n * b.n * (a.n - b.n)/(combined.n*combined.n);
82 | combined.M3 += 3.0*delta * (a.n*b.M2 - b.n*a.M2) / combined.n;
83 |
84 | combined.M4 = a.M4 + b.M4 + delta4*a.n*b.n * (a.n*a.n - a.n*b.n + b.n*b.n) /
85 | (combined.n*combined.n*combined.n);
86 | combined.M4 += 6.0*delta2 * (a.n*a.n*b.M2 + b.n*b.n*a.M2)/(combined.n*combined.n) +
87 | 4.0*delta*(a.n*b.M3 - b.n*a.M3) / combined.n;
88 |
89 | return combined;
90 | }
91 |
92 | RunningStats& RunningStats::operator+=(const RunningStats& rhs)
93 | {
94 | RunningStats combined = *this + rhs;
95 | *this = combined;
96 | return *this;
97 | }
98 |
--------------------------------------------------------------------------------
/tests/benchmark.py:
--------------------------------------------------------------------------------
1 | """Benchmark core versus fast implementations.
2 |
3 | """
4 |
5 | from __future__ import print_function
6 |
7 | import random
8 | import timeit
9 |
10 | random.seed(0)
11 | VALUES = [random.random() for _ in range(int(1e4))]
12 | PAIRS = [(pos, pos + (val * 2 - 1)) for pos, val in enumerate(VALUES)]
13 |
14 |
15 | def main():
16 | core_stats = timeit.repeat(
17 | setup='''
18 | from __main__ import VALUES
19 | from runstats.core import Statistics
20 | ''',
21 | stmt='''
22 | stats = Statistics(VALUES)
23 | stats.mean()
24 | ''',
25 | number=1,
26 | repeat=7,
27 | )[2]
28 |
29 | fast_stats = timeit.repeat(
30 | setup='''
31 | from __main__ import VALUES
32 | from runstats._core import Statistics
33 | ''',
34 | stmt='''
35 | stats = Statistics(VALUES)
36 | stats.mean()
37 | ''',
38 | number=1,
39 | repeat=7,
40 | )[2]
41 |
42 | speedup_stats = core_stats / fast_stats - 1
43 |
44 | core_exp_stats = timeit.repeat(
45 | setup='''
46 | from __main__ import VALUES
47 | from runstats.core import ExponentialStatistics
48 | exp_stats = ExponentialStatistics()
49 | ''',
50 | stmt='''
51 | for value in VALUES:
52 | exp_stats.push(value)
53 | exp_stats.mean()
54 | ''',
55 | number=1,
56 | repeat=7,
57 | )[2]
58 |
59 | fast_exp_stats = timeit.repeat(
60 | setup='''
61 | from __main__ import VALUES
62 | from runstats._core import ExponentialStatistics
63 | exp_stats = ExponentialStatistics()
64 | ''',
65 | stmt='''
66 | for value in VALUES:
67 | exp_stats.push(value)
68 | exp_stats.mean()
69 | ''',
70 | number=1,
71 | repeat=7,
72 | )[2]
73 |
74 | speedup_exp_stats = core_exp_stats / fast_exp_stats - 1
75 |
76 | core_regr = timeit.repeat(
77 | setup='''
78 | from __main__ import PAIRS
79 | from runstats.core import Regression
80 | regr = Regression()
81 | ''',
82 | stmt='''
83 | for pos, val in PAIRS:
84 | regr.push(pos, val)
85 | regr.slope()
86 | ''',
87 | number=1,
88 | repeat=7,
89 | )[2]
90 |
91 | fast_regr = timeit.repeat(
92 | setup='''
93 | from __main__ import PAIRS
94 | from runstats._core import Regression
95 | regr = Regression()
96 | ''',
97 | stmt='''
98 | for pos, val in PAIRS:
99 | regr.push(pos, val)
100 | regr.slope()
101 | ''',
102 | number=1,
103 | repeat=7,
104 | )[2]
105 |
106 | speedup_regr = core_regr / fast_regr - 1
107 |
108 | print('core.Statistics:', core_stats)
109 | print('_core.Statistics:', fast_stats)
110 | print(' Stats Speedup: %.2fx faster' % speedup_stats)
111 |
112 | print('core.ExponentialStatistics:', core_exp_stats)
113 | print('_core.ExponentialStatistics:', fast_exp_stats)
114 | print(' ExpStats Speedup: %.2fx faster' % speedup_exp_stats)
115 |
116 | print('core.Regression:', core_regr)
117 | print('_core.Regression:', fast_regr)
118 | print(' Regr Speedup: %.2fx faster' % speedup_regr)
119 |
120 |
121 | if __name__ == '__main__':
122 | main()
123 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | """Package Setup for RunStats
2 |
3 | Build binary extension in-place for testing with:
4 |
5 | $ python setup.py build_ext --inplace
6 |
7 | Create annotations for optimization:
8 |
9 | $ cython -3 -a runstats/core.py
10 | $ python3 -m http.server
11 | # Open runstats/core.html in browser.
12 |
13 | """
14 |
15 | import os
16 | import shutil
17 |
18 | from setuptools import Extension, setup
19 | from setuptools.command.test import test as TestCommand
20 |
21 | import runstats
22 |
23 |
24 | class Tox(TestCommand):
25 | def finalize_options(self):
26 | TestCommand.finalize_options(self)
27 | self.test_args = []
28 | self.test_suite = True
29 |
30 | def run_tests(self):
31 | import tox
32 |
33 | errno = tox.cmdline(self.test_args)
34 | exit(errno)
35 |
36 |
37 | with open('README.rst') as reader:
38 | readme = reader.read()
39 |
40 | args = dict(
41 | name=runstats.__title__,
42 | version=runstats.__version__,
43 | description='Compute statistics and regression in one pass',
44 | long_description=readme,
45 | long_description_content_type='text/x-rst',
46 | author='Grant Jenks',
47 | author_email='contact@grantjenks.com',
48 | url='http://www.grantjenks.com/docs/runstats/',
49 | license='Apache 2.0',
50 | packages=['runstats'],
51 | python_requires='>=3.6',
52 | tests_require=['tox'],
53 | cmdclass={'test': Tox},
54 | install_requires=[],
55 | project_urls={
56 | 'Documentation': 'http://www.grantjenks.com/docs/runstats/',
57 | 'Funding': 'http://gum.co/runstats',
58 | 'Source': 'https://github.com/grantjenks/python-runstats',
59 | 'Tracker': 'https://github.com/grantjenks/python-runstats/issues',
60 | },
61 | classifiers=[
62 | 'Development Status :: 5 - Production/Stable',
63 | 'Intended Audience :: Developers',
64 | 'License :: OSI Approved :: Apache Software License',
65 | 'Natural Language :: English',
66 | 'Programming Language :: Python',
67 | 'Programming Language :: Python :: 3',
68 | 'Programming Language :: Python :: 3.6',
69 | 'Programming Language :: Python :: 3.7',
70 | 'Programming Language :: Python :: 3.8',
71 | 'Programming Language :: Python :: 3.9',
72 | 'Programming Language :: Python :: Implementation :: CPython',
73 | ],
74 | )
75 |
76 | try:
77 | from Cython.Build import cythonize
78 |
79 | # Copy files to build binary.
80 |
81 | shutil.copy2('runstats/core.py', 'runstats/_core.py')
82 | shutil.copy2('runstats/core.pxd', 'runstats/_core.pxd')
83 |
84 | # Build binary extension.
85 |
86 | ext_modules = [Extension('runstats._core', ['runstats/_core.py'])]
87 | setup(
88 | ext_modules=cythonize(ext_modules, language_level='3'),
89 | **args,
90 | )
91 |
92 | # Remove copied files for static analysis and tests.
93 |
94 | os.remove('runstats/_core.py')
95 | os.remove('runstats/_core.pxd')
96 | except Exception as exception:
97 | print('*' * 79)
98 | print(exception)
99 | print('*' * 79)
100 | print('Failed to setup runstats with Cython. See error message above.')
101 | print('Falling back to pure-Python implementation.')
102 | print('*' * 79)
103 | setup(**args)
104 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 |
13 | import os
14 | import sys
15 | sys.path.insert(0, os.path.abspath('..'))
16 | import runstats
17 |
18 |
19 | # -- Project information -----------------------------------------------------
20 |
21 | project = 'RunStats'
22 | copyright = runstats.__copyright__
23 | author = runstats.__author__
24 |
25 | # The full version, including alpha/beta/rc tags
26 | release = runstats.__version__
27 |
28 |
29 | # -- General configuration ---------------------------------------------------
30 |
31 | # Add any Sphinx extension module names here, as strings. They can be
32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
33 | # ones.
34 | extensions = [
35 | 'sphinx.ext.autodoc',
36 | 'sphinx.ext.todo',
37 | 'sphinx.ext.viewcode',
38 | ]
39 |
40 | # Add any paths that contain templates here, relative to this directory.
41 | templates_path = ['_templates']
42 |
43 | # List of patterns, relative to source directory, that match files and
44 | # directories to ignore when looking for source files.
45 | # This pattern also affects html_static_path and html_extra_path.
46 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
47 |
48 |
49 | # -- Options for HTML output -------------------------------------------------
50 |
51 | # The theme to use for HTML and HTML Help pages. See the documentation for
52 | # a list of builtin themes.
53 | html_theme = 'alabaster'
54 |
55 | # Theme options are theme-specific and customize the look and feel of a theme
56 | # further. For a list of options available for each theme, see the
57 | # documentation.
58 | html_theme_options = {
59 | 'logo': 'gj-logo.png',
60 | 'logo_name': True,
61 | 'logo_text_align': 'center',
62 | 'analytics_id': 'UA-19364636-2',
63 | 'show_powered_by': False,
64 | 'show_related': True,
65 | 'github_user': 'grantjenks',
66 | 'github_repo': 'python-runstats',
67 | 'github_type': 'star',
68 | }
69 |
70 | # Add any paths that contain custom static files (such as style sheets) here,
71 | # relative to this directory. They are copied after the builtin static files,
72 | # so a file named "default.css" will overwrite the builtin "default.css".
73 | html_static_path = ['_static']
74 |
75 | # Custom sidebar templates, must be a dictionary that maps document names
76 | # to template names.
77 | #
78 | # The default sidebars (for documents that don't match any pattern) are
79 | # defined by theme itself. Builtin themes are using these templates by
80 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
81 | # 'searchbox.html']``.
82 | html_sidebars = {
83 | '**': [
84 | 'about.html',
85 | 'gumroad.html',
86 | 'localtoc.html',
87 | 'relations.html',
88 | 'searchbox.html',
89 | ]
90 | }
91 |
92 |
93 | # -- Options for todo extension ----------------------------------------------
94 |
95 | # If true, `todo` and `todoList` produce output, else they produce nothing.
96 | todo_include_todos = True
97 |
--------------------------------------------------------------------------------
/tests/__main__.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import sys
4 |
5 | from runstats import ExponentialStatistics as FastExponentialStatistics
6 | from runstats import Regression as FastRegression
7 | from runstats import Statistics as FastStatistics
8 | from runstats.core import ExponentialStatistics as CoreExponentialStatistics
9 | from runstats.core import Regression as CoreRegression
10 | from runstats.core import Statistics as CoreStatistics
11 |
12 | from .test_runstats import kurtosis, mean, skewness, stddev, variance
13 |
14 |
15 | def main():
16 | args = list(map(float, sys.argv[1:]))
17 |
18 | print('Statistics Functions')
19 | print('Count:', len(args))
20 | print('Mean:', mean(args))
21 | print('Variance:', variance(args))
22 | print('StdDev:', stddev(args))
23 | print('Skewness:', skewness(args))
24 | print('Kurtosis:', kurtosis(args))
25 |
26 | fast_stats = FastStatistics()
27 |
28 | for arg in args:
29 | fast_stats.push(arg)
30 |
31 | print()
32 | print('FastStatistics')
33 | print('Count:', len(fast_stats))
34 | print('Mean:', fast_stats.mean())
35 | print('Variance:', fast_stats.variance())
36 | print('StdDev:', fast_stats.stddev())
37 | print('Skewness:', fast_stats.skewness())
38 | print('Kurtosis:', fast_stats.kurtosis())
39 |
40 | core_stats = CoreStatistics()
41 |
42 | for arg in args:
43 | core_stats.push(arg)
44 |
45 | print()
46 | print('CoreStatistics')
47 | print('Count:', len(core_stats))
48 | print('Mean:', core_stats.mean())
49 | print('Variance:', core_stats.variance())
50 | print('StdDev:', core_stats.stddev())
51 | print('Skewness:', core_stats.skewness())
52 | print('Kurtosis:', core_stats.kurtosis())
53 |
54 | fast_exp_stats = FastExponentialStatistics()
55 |
56 | for arg in args:
57 | fast_exp_stats.push(arg)
58 |
59 | print()
60 | print('FastExponentialStatistics')
61 | print('Decay Rate (default):', fast_exp_stats.get_decay())
62 | print('Exponential Mean:', fast_exp_stats.mean())
63 | print('Exponential Variance:', fast_exp_stats.variance())
64 | print('Exponential StdDev:', fast_exp_stats.stddev())
65 |
66 | core_exp_stats = CoreExponentialStatistics()
67 |
68 | for arg in args:
69 | core_exp_stats.push(arg)
70 |
71 | print()
72 | print('CoreExponentialStatistics')
73 | print('Decay Rate (default):', core_exp_stats.get_decay())
74 | print('Exponential Mean:', core_exp_stats.mean())
75 | print('Exponential Variance:', core_exp_stats.variance())
76 | print('Exponential StdDev:', core_exp_stats.stddev())
77 |
78 | fast_regr = FastRegression()
79 |
80 | for index, arg in enumerate(args, 1):
81 | fast_regr.push(index, arg)
82 |
83 | print()
84 | print('FastRegression')
85 | print('Count:', len(fast_regr))
86 | print('Slope:', fast_regr.slope())
87 | print('Intercept:', fast_regr.intercept())
88 | print('Correlation:', fast_regr.correlation())
89 |
90 | core_regr = CoreRegression()
91 |
92 | for index, arg in enumerate(args, 1):
93 | core_regr.push(index, arg)
94 |
95 | print()
96 | print('CoreRegression')
97 | print('Count:', len(core_regr))
98 | print('Slope:', core_regr.slope())
99 | print('Intercept:', core_regr.intercept())
100 | print('Correlation:', core_regr.correlation())
101 |
102 |
103 | if __name__ == '__main__':
104 | main()
105 |
--------------------------------------------------------------------------------
/runstats/core.pxd:
--------------------------------------------------------------------------------
1 | import cython
2 |
3 |
4 | cdef public double NAN
5 |
6 |
7 | cdef class Statistics:
8 |
9 | cdef public double _count, _eta, _rho, _tau, _phi, _min, _max
10 |
11 | cpdef clear(self)
12 |
13 | cpdef get_state(self)
14 |
15 | cpdef set_state(self, state)
16 |
17 | cpdef __reduce__(self)
18 |
19 | cpdef Statistics copy(self, _=*)
20 |
21 | @cython.locals(
22 | delta=double,
23 | delta_n=double,
24 | delta_n2=double,
25 | term=double,
26 | )
27 | cpdef push(self, double value)
28 |
29 | cpdef double minimum(self)
30 |
31 | cpdef double maximum(self)
32 |
33 | cpdef double mean(self)
34 |
35 | cpdef double variance(self, double ddof=*)
36 |
37 | cpdef double stddev(self, double ddof=*)
38 |
39 | cpdef double skewness(self)
40 |
41 | cpdef double kurtosis(self)
42 |
43 | @cython.locals(sigma=Statistics)
44 | cpdef Statistics _add(self, Statistics that)
45 |
46 | @cython.locals(
47 | sum_count=double,
48 | delta=double,
49 | delta2=double,
50 | delta3=double,
51 | delta4=double,
52 | sum_eta=double,
53 | sum_rho=double,
54 | sum_tau=double,
55 | sum_phi=double,
56 | )
57 | cpdef Statistics _iadd(self, Statistics that)
58 |
59 | @cython.locals(sigma=Statistics)
60 | cpdef Statistics _mul(self, double that)
61 |
62 | cpdef Statistics _imul(self, double that)
63 |
64 |
65 | cpdef Statistics make_statistics(state)
66 |
67 |
68 | cdef class ExponentialStatistics:
69 | cdef public double _decay, _mean, _variance
70 |
71 | cpdef _set_decay(self, double value)
72 |
73 | cpdef clear(self, double mean=*, double variance=*, decay=*)
74 |
75 | cpdef get_state(self)
76 |
77 | cpdef set_state(self, state)
78 |
79 | cpdef __reduce__(self)
80 |
81 | cpdef ExponentialStatistics copy(self, _=*)
82 |
83 | @cython.locals(
84 | alpha=double,
85 | diff=double,
86 | incr=double,
87 | )
88 | cpdef push(self, double value)
89 |
90 | cpdef double mean(self)
91 |
92 | cpdef double variance(self)
93 |
94 | cpdef double stddev(self)
95 |
96 | @cython.locals(sigma=ExponentialStatistics)
97 | cpdef ExponentialStatistics _add(self, ExponentialStatistics that)
98 |
99 | cpdef ExponentialStatistics _iadd(self, ExponentialStatistics that)
100 |
101 | @cython.locals(
102 | sigma=ExponentialStatistics,
103 | )
104 | cpdef ExponentialStatistics _mul(self, double that)
105 |
106 | cpdef ExponentialStatistics _imul(self, double that)
107 |
108 |
109 | cpdef ExponentialStatistics make_exponential_statistics(state)
110 |
111 |
112 | cdef class Regression:
113 | cdef public Statistics _xstats, _ystats
114 | cdef public double _count, _sxy
115 |
116 | cpdef clear(self)
117 |
118 | cpdef get_state(self)
119 |
120 | cpdef set_state(self, state)
121 |
122 | cpdef __reduce__(self)
123 |
124 | cpdef Regression copy(self, _=*)
125 |
126 | cpdef push(self, double xcoord, double ycoord)
127 |
128 | @cython.locals(sxx=double)
129 | cpdef double slope(self, double ddof=*)
130 |
131 | cpdef double intercept(self, double ddof=*)
132 |
133 | @cython.locals(term=double)
134 | cpdef double correlation(self, double ddof=*)
135 |
136 | @cython.locals(sigma=Regression)
137 | cpdef Regression _add(self, Regression that)
138 |
139 | @cython.locals(
140 | sum_count=double,
141 | sum_xstats=Statistics,
142 | sum_ystats=Statistics,
143 | deltax=double,
144 | deltay=double,
145 | sum_sxy=double,
146 | )
147 | cpdef Regression _iadd(self, Regression that)
148 |
149 |
150 | cpdef Regression make_regression(state)
151 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | RunStats: Computing Statistics and Regression in One Pass
2 | =========================================================
3 |
4 | `RunStats`_ is an Apache2 licensed Python module for online statistics and
5 | online regression. Statistics and regression summaries are computed in a single
6 | pass. Previous values are not recorded in summaries.
7 |
8 | Long running systems often generate numbers summarizing performance. It could
9 | be the latency of a response or the time between requests. It's often useful to
10 | use these numbers in summary statistics like the arithmetic mean, minimum,
11 | standard deviation, etc. When many values are generated, computing these
12 | summaries can be computationally intensive. It may even be infeasible to keep
13 | every recorded value. In such cases computing online statistics and online
14 | regression is necessary.
15 |
16 | In other cases, you may only have one opportunity to observe all the recorded
17 | values. Python's generators work exactly this way. Traditional methods for
18 | calculating the variance and other higher moments requires multiple passes over
19 | the data. With generators, this is not possible and so computing statistics in
20 | a single pass is necessary.
21 |
22 | There are also scenarios where a user is not interested in a complete summary
23 | of the entire stream of data but rather wants to observe the current state of
24 | the system based on the recent past. In these cases exponential statistics are
25 | used. Instead of weighting all values uniformly in the statistics computation,
26 | an exponential decay weight is applied to older values. The decay rate is
27 | configurable and provides a mechanism for balancing recent values with past
28 | values.
29 |
30 | The Python `RunStats`_ module was designed for these cases by providing classes
31 | for computing online summary statistics and online linear regression in a
32 | single pass. Summary objects work on sequences which may be larger than memory
33 | or disk space permit. They may also be efficiently combined together to create
34 | aggregate summaries.
35 |
36 |
37 | Features
38 | --------
39 |
40 | - Pure-Python
41 | - Fully Documented
42 | - 100% Test Coverage
43 | - Numerically Stable
44 | - Optional Cython-optimized Extension (5-100 times faster)
45 | - Statistics summary computes mean, variance, standard deviation, skewness,
46 | kurtosis, minimum and maximum.
47 | - Regression summary computes slope, intercept and correlation.
48 | - Developed on Python 3.9
49 | - Tested on CPython 3.6, 3.7, 3.8, 3.9
50 | - Tested on Linux, Mac OS X, and Windows
51 | - Tested using GitHub Actions
52 |
53 | .. image:: https://github.com/grantjenks/python-runstats/workflows/integration/badge.svg
54 | :target: http://www.grantjenks.com/docs/runstats/
55 |
56 |
57 | Quickstart
58 | ----------
59 |
60 | Installing `RunStats`_ is simple with `pip `_::
61 |
62 | $ pip install runstats
63 |
64 | You can access documentation in the interpreter with Python's built-in help
65 | function:
66 |
67 | .. code-block:: python
68 |
69 | >>> import runstats
70 | >>> help(runstats) # doctest: +SKIP
71 | >>> help(runstats.Statistics) # doctest: +SKIP
72 | >>> help(runstats.Regression) # doctest: +SKIP
73 | >>> help(runstats.ExponentialStatistics) # doctest: +SKIP
74 |
75 |
76 | Tutorial
77 | --------
78 |
79 | The Python `RunStats`_ module provides three types for computing running
80 | statistics: Statistics, ExponentialStatistics and Regression.The Regression
81 | object leverages Statistics internally for its calculations. Each can be
82 | initialized without arguments:
83 |
84 | .. code-block:: python
85 |
86 | >>> from runstats import Statistics, Regression, ExponentialStatistics
87 | >>> stats = Statistics()
88 | >>> regr = Regression()
89 | >>> exp_stats = ExponentialStatistics()
90 |
91 | Statistics objects support four methods for modification. Use `push` to add
92 | values to the summary, `clear` to reset the summary, sum to combine Statistics
93 | summaries and multiply to weight summary Statistics by a scalar.
94 |
95 | .. code-block:: python
96 |
97 | >>> for num in range(10):
98 | ... stats.push(float(num))
99 | >>> stats.mean()
100 | 4.5
101 | >>> stats.maximum()
102 | 9.0
103 | >>> stats += stats
104 | >>> stats.mean()
105 | 4.5
106 | >>> stats.variance()
107 | 8.68421052631579
108 | >>> len(stats)
109 | 20
110 | >>> stats *= 2
111 | >>> len(stats)
112 | 40
113 | >>> stats.clear()
114 | >>> len(stats)
115 | 0
116 | >>> stats.minimum()
117 | nan
118 |
119 | Use the Python built-in `len` for the number of pushed values. Unfortunately
120 | the Python `min` and `max` built-ins may not be used for the minimum and
121 | maximum as sequences are expected instead. Therefore, there are `minimum` and
122 | `maximum` methods provided for that purpose:
123 |
124 | .. code-block:: python
125 |
126 | >>> import random
127 | >>> random.seed(0)
128 | >>> for __ in range(1000):
129 | ... stats.push(random.random())
130 | >>> len(stats)
131 | 1000
132 | >>> min(stats)
133 | Traceback (most recent call last):
134 | ...
135 | TypeError: ...
136 | >>> stats.minimum()
137 | 0.00024069652516689466
138 | >>> stats.maximum()
139 | 0.9996851255769114
140 |
141 | Statistics summaries provide five measures of a series: mean, variance,
142 | standard deviation, skewness and kurtosis:
143 |
144 | .. code-block:: python
145 |
146 | >>> stats = Statistics([1, 2, 5, 12, 5, 2, 1])
147 | >>> stats.mean()
148 | 4.0
149 | >>> stats.variance()
150 | 15.33333333333333
151 | >>> stats.stddev()
152 | 3.915780041490243
153 | >>> stats.skewness()
154 | 1.33122127314735
155 | >>> stats.kurtosis()
156 | 0.5496219281663506
157 |
158 | All internal calculations use Python's `float` type.
159 |
160 | Like Statistics, the Regression type supports some methods for modification:
161 | `push`, `clear` and sum:
162 |
163 | .. code-block:: python
164 |
165 | >>> regr.clear()
166 | >>> len(regr)
167 | 0
168 | >>> for num in range(10):
169 | ... regr.push(num, num + 5)
170 | >>> len(regr)
171 | 10
172 | >>> regr.slope()
173 | 1.0
174 | >>> more = Regression((num, num + 5) for num in range(10, 20))
175 | >>> total = regr + more
176 | >>> len(total)
177 | 20
178 | >>> total.slope()
179 | 1.0
180 | >>> total.intercept()
181 | 5.0
182 | >>> total.correlation()
183 | 1.0
184 |
185 | Regression summaries provide three measures of a series of pairs: slope,
186 | intercept and correlation. Note that, as a regression, the points need not
187 | exactly lie on a line:
188 |
189 | .. code-block:: python
190 |
191 | >>> regr = Regression([(1.2, 1.9), (3, 5.1), (4.9, 8.1), (7, 11)])
192 | >>> regr.slope()
193 | 1.5668320150154176
194 | >>> regr.intercept()
195 | 0.21850113956294415
196 | >>> regr.correlation()
197 | 0.9983810791694997
198 |
199 | Both constructors accept an optional iterable that is consumed and pushed into
200 | the summary. Note that you may pass a generator as an iterable and the
201 | generator will be entirely consumed.
202 |
203 | The ExponentialStatistics are constructed by providing a decay rate, initial
204 | mean, and initial variance. The decay rate has default 0.9 and must be between
205 | 0 and 1. The initial mean and variance default to zero.
206 |
207 | .. code-block:: python
208 |
209 | >>> exp_stats = ExponentialStatistics()
210 | >>> exp_stats.decay
211 | 0.9
212 | >>> exp_stats.mean()
213 | 0.0
214 | >>> exp_stats.variance()
215 | 0.0
216 |
217 | The decay rate is the weight by which the current statistics are discounted
218 | by. Consequently, (1 - decay) is the weight of the new value. Like the `Statistics` class,
219 | there are four methods for modification: `push`, `clear`, sum and
220 | multiply.
221 |
222 | .. code-block:: python
223 |
224 | >>> for num in range(10):
225 | ... exp_stats.push(num)
226 | >>> exp_stats.mean()
227 | 3.486784400999999
228 | >>> exp_stats.variance()
229 | 11.593430921943071
230 | >>> exp_stats.stddev()
231 | 3.4049127627507683
232 |
233 | The decay of the exponential statistics can also be changed. The value must be
234 | between 0 and 1.
235 |
236 | .. code-block:: python
237 |
238 | >>> exp_stats.decay
239 | 0.9
240 | >>> exp_stats.decay = 0.5
241 | >>> exp_stats.decay
242 | 0.5
243 | >>> exp_stats.decay = 10
244 | Traceback (most recent call last):
245 | ...
246 | ValueError: decay must be between 0 and 1
247 |
248 | The clear method allows to optionally set a new mean, new variance and new
249 | decay. If none are provided mean and variance reset to zero, while the decay is
250 | not changed.
251 |
252 | .. code-block:: python
253 |
254 | >>> exp_stats.clear()
255 | >>> exp_stats.decay
256 | 0.5
257 | >>> exp_stats.mean()
258 | 0.0
259 | >>> exp_stats.variance()
260 | 0.0
261 |
262 | Combining `ExponentialStatistics` is done by adding them together. The mean and
263 | variance are simply added to create a new object. To weight each
264 | `ExponentialStatistics`, multiply them by a constant factor. If two
265 | `ExponentialStatistics` are added then the leftmost decay is used for the new
266 | object. The `len` method is not supported.
267 |
268 | .. code-block:: python
269 |
270 | >>> alpha_stats = ExponentialStatistics(iterable=range(10))
271 | >>> beta_stats = ExponentialStatistics(decay=0.1)
272 | >>> for num in range(10):
273 | ... beta_stats.push(num)
274 | >>> exp_stats = beta_stats * 0.5 + alpha_stats * 0.5
275 | >>> exp_stats.decay
276 | 0.1
277 | >>> exp_stats.mean()
278 | 6.187836645
279 |
280 | All internal calculations of the Statistics and Regression classes are based
281 | entirely on the C++ code by John Cook as posted in a couple of articles:
282 |
283 | * `Computing Skewness and Kurtosis in One Pass`_
284 | * `Computing Linear Regression in One Pass`_
285 |
286 | .. _`Computing Skewness and Kurtosis in One Pass`: http://www.johndcook.com/blog/skewness_kurtosis/
287 | .. _`Computing Linear Regression in One Pass`: http://www.johndcook.com/blog/running_regression/
288 |
289 | The ExponentialStatistics implementation is based on:
290 |
291 | * Finch, 2009, Incremental Calculation of Weighted Mean and Variance
292 |
293 | The pure-Python version of `RunStats`_ is directly available if preferred.
294 |
295 | .. code-block:: python
296 |
297 | >>> import runstats.core # Pure-Python
298 | >>> runstats.core.Statistics
299 |
300 |
301 | When importing from `runstats` the Cython-optimized version `_core` is
302 | preferred and the `core` version is used as fallback. Micro-benchmarking
303 | Statistics and Regression by calling `push` repeatedly shows the
304 | Cython-optimized extension as 20-40 times faster than the pure-Python
305 | extension.
306 |
307 | .. _`RunStats`: http://www.grantjenks.com/docs/runstats/
308 |
309 |
310 | Reference and Indices
311 | ---------------------
312 |
313 | * `RunStats Documentation`_
314 | * `RunStats API Reference`_
315 | * `RunStats at PyPI`_
316 | * `RunStats at GitHub`_
317 | * `RunStats Issue Tracker`_
318 |
319 | .. _`RunStats Documentation`: http://www.grantjenks.com/docs/runstats/
320 | .. _`RunStats API Reference`: http://www.grantjenks.com/docs/runstats/api.html
321 | .. _`RunStats at PyPI`: https://pypi.python.org/pypi/runstats/
322 | .. _`RunStats at GitHub`: https://github.com/grantjenks/python-runstats/
323 | .. _`RunStats Issue Tracker`: https://github.com/grantjenks/python-runstats/issues/
324 |
325 |
326 | License
327 | -------
328 |
329 | Copyright 2013-2021 Grant Jenks
330 |
331 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use
332 | this file except in compliance with the License. You may obtain a copy of the
333 | License at
334 |
335 | http://www.apache.org/licenses/LICENSE-2.0
336 |
337 | Unless required by applicable law or agreed to in writing, software distributed
338 | under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
339 | CONDITIONS OF ANY KIND, either express or implied. See the License for the
340 | specific language governing permissions and limitations under the License.
341 |
--------------------------------------------------------------------------------
/runstats/core.py:
--------------------------------------------------------------------------------
1 | """Python RunStats
2 |
3 | Compute Statistics, Exponential Statistics and Regression in a single pass.
4 |
5 | """
6 |
7 | from __future__ import division
8 |
9 | NAN = float('nan')
10 |
11 |
12 | class Statistics:
13 | """Compute statistics in a single pass.
14 |
15 | Computes the minimum, maximum, mean, variance, standard deviation,
16 | skewness, and kurtosis.
17 | Statistics objects may also be added together and copied.
18 |
19 | Based entirely on the C++ code by John D Cook at
20 | http://www.johndcook.com/skewness_kurtosis.html
21 | """
22 |
23 | def __init__(self, iterable=()):
24 | """Initialize Statistics object.
25 |
26 | Iterates optional parameter `iterable` and pushes each value into the
27 | statistics summary.
28 | """
29 | self.clear()
30 | for value in iterable:
31 | self.push(value)
32 |
33 | def clear(self):
34 | """Clear Statistics object."""
35 | self._count = self._eta = self._rho = self._tau = self._phi = 0.0
36 | self._min = self._max = NAN
37 |
38 | def __eq__(self, that):
39 | return self.get_state() == that.get_state()
40 |
41 | def __ne__(self, that):
42 | return self.get_state() != that.get_state()
43 |
44 | def get_state(self):
45 | """Get internal state."""
46 | return (
47 | self._count,
48 | self._eta,
49 | self._rho,
50 | self._tau,
51 | self._phi,
52 | self._min,
53 | self._max,
54 | )
55 |
56 | def set_state(self, state):
57 | """Set internal state."""
58 | (
59 | self._count,
60 | self._eta,
61 | self._rho,
62 | self._tau,
63 | self._phi,
64 | self._min,
65 | self._max,
66 | ) = state
67 |
68 | @classmethod
69 | def fromstate(cls, state):
70 | """Return Statistics object from state."""
71 | stats = cls()
72 | stats.set_state(state)
73 | return stats
74 |
75 | def __reduce__(self):
76 | return make_statistics, (self.get_state(),)
77 |
78 | def copy(self, _=None):
79 | """Copy Statistics object."""
80 | return self.fromstate(self.get_state())
81 |
82 | def __copy__(self, _=None):
83 | """Copy Statistics object."""
84 | return self.copy(_)
85 |
86 | __deepcopy__ = __copy__
87 |
88 | def __len__(self):
89 | """Number of values that have been pushed."""
90 | return int(self._count)
91 |
92 | def push(self, value):
93 | """Add `value` to the Statistics summary."""
94 | if self._count == 0.0:
95 | self._min = value
96 | self._max = value
97 | else:
98 | self._min = min(self._min, value)
99 | self._max = max(self._max, value)
100 |
101 | delta = value - self._eta
102 | delta_n = delta / (self._count + 1)
103 | delta_n2 = delta_n * delta_n
104 | term = delta * delta_n * self._count
105 |
106 | self._count += 1
107 | self._eta += delta_n
108 | self._phi += (
109 | term * delta_n2 * (self._count ** 2 - 3 * self._count + 3)
110 | + 6 * delta_n2 * self._rho
111 | - 4 * delta_n * self._tau
112 | )
113 | self._tau += (
114 | term * delta_n * (self._count - 2) - 3 * delta_n * self._rho
115 | )
116 | self._rho += term
117 |
118 | def minimum(self):
119 | """Minimum of values."""
120 | return self._min
121 |
122 | def maximum(self):
123 | """Maximum of values."""
124 | return self._max
125 |
126 | def mean(self):
127 | """Mean of values."""
128 | return self._eta
129 |
130 | def variance(self, ddof=1.0):
131 | """Variance of values (with `ddof` degrees of freedom)."""
132 | return self._rho / (self._count - ddof)
133 |
134 | def stddev(self, ddof=1.0):
135 | """Standard deviation of values (with `ddof` degrees of freedom)."""
136 | return self.variance(ddof) ** 0.5
137 |
138 | def skewness(self):
139 | """Skewness of values."""
140 | return (self._count ** 0.5) * self._tau / (self._rho ** 1.5)
141 |
142 | def kurtosis(self):
143 | """Kurtosis of values."""
144 | return self._count * self._phi / (self._rho * self._rho) - 3.0
145 |
146 | def _add(self, that):
147 | """Add two Statistics objects together."""
148 | sigma = self.copy()
149 | sigma._iadd(that)
150 | return sigma
151 |
152 | def __add__(self, that):
153 | """Add two Statistics objects together."""
154 | return self._add(that)
155 |
156 | def _iadd(self, that):
157 | """Add another Statistics object to this one."""
158 | sum_count = self._count + that._count
159 | if sum_count == 0:
160 | return self
161 |
162 | delta = that._eta - self._eta
163 | delta2 = delta ** 2
164 | delta3 = delta ** 3
165 | delta4 = delta ** 4
166 |
167 | sum_eta = (
168 | self._count * self._eta + that._count * that._eta
169 | ) / sum_count
170 |
171 | sum_rho = (
172 | self._rho
173 | + that._rho
174 | + delta2 * self._count * that._count / sum_count
175 | )
176 |
177 | sum_tau = (
178 | self._tau
179 | + that._tau
180 | + delta3
181 | * self._count
182 | * that._count
183 | * (self._count - that._count)
184 | / (sum_count ** 2)
185 | + 3.0
186 | * delta
187 | * (self._count * that._rho - that._count * self._rho)
188 | / sum_count
189 | )
190 |
191 | sum_phi = (
192 | self._phi
193 | + that._phi
194 | + delta4
195 | * self._count
196 | * that._count
197 | * (self._count ** 2 - self._count * that._count + that._count ** 2)
198 | / (sum_count ** 3)
199 | + 6.0
200 | * delta2
201 | * (
202 | self._count * self._count * that._rho
203 | + that._count * that._count * self._rho
204 | )
205 | / (sum_count ** 2)
206 | + 4.0
207 | * delta
208 | * (self._count * that._tau - that._count * self._tau)
209 | / sum_count
210 | )
211 |
212 | if self._count == 0.0:
213 | self._min = that._min
214 | self._max = that._max
215 | elif that._count != 0.0:
216 | self._min = min(self._min, that._min)
217 | self._max = max(self._max, that._max)
218 |
219 | self._count = sum_count
220 | self._eta = sum_eta
221 | self._rho = sum_rho
222 | self._tau = sum_tau
223 | self._phi = sum_phi
224 |
225 | return self
226 |
227 | def __iadd__(self, that):
228 | """Add another Statistics object to this one."""
229 | return self._iadd(that)
230 |
231 | def _mul(self, that):
232 | """Multiply by a scalar to change Statistics weighting."""
233 | sigma = self.copy()
234 | sigma._imul(that)
235 | return sigma
236 |
237 | def __mul__(self, that):
238 | """Multiply by a scalar to change Statistics weighting."""
239 | if isinstance(self, Statistics):
240 | return self._mul(that)
241 | # https://stackoverflow.com/q/33218006/232571
242 | return that._mul(self) # pragma: no cover
243 |
244 | __rmul__ = __mul__
245 |
246 | def _imul(self, that):
247 | """Multiply by a scalar to change Statistics weighting in-place."""
248 | self._count *= that
249 | self._rho *= that
250 | self._tau *= that
251 | self._phi *= that
252 | return self
253 |
254 | def __imul__(self, that):
255 | """Multiply by a scalar to change Statistics weighting in-place."""
256 | return self._imul(that)
257 |
258 |
259 | def make_statistics(state):
260 | """Make Statistics object from state."""
261 | return Statistics.fromstate(state)
262 |
263 |
264 | class ExponentialStatistics:
265 | """Compute exponential mean and variance in a single pass.
266 |
267 | ExponentialStatistics objects may also be copied.
268 |
269 | Based on
270 | "Finch, 2009, Incremental Calculation of Weighted Mean and Variance" at
271 | https://nanopdf.com/download/incremental-calculation-of-weighted-mean-and-variance_pdf
272 |
273 | For an explanation of these statistics refer to e.g.:
274 | https://nestedsoftware.com/2018/04/04/exponential-moving-average-on-streaming-data-4hhl.24876.html
275 |
276 | """
277 |
278 | def __init__(self, decay=0.9, mean=0.0, variance=0.0, iterable=()):
279 | """Initialize ExponentialStatistics object.
280 |
281 | Incrementally tracks mean and variance and exponentially discounts old
282 | values.
283 |
284 | Requires a `decay` rate in exclusive range (0, 1) for discounting
285 | previous statistics.
286 |
287 | Optionally allows setting initial mean and variance. Default 0.
288 |
289 | Iterates optional parameter `iterable` and pushes each value into the
290 | statistics summary.
291 |
292 | """
293 | self.clear(mean, variance, decay)
294 | for value in iterable:
295 | self.push(value)
296 |
297 | @property
298 | def decay(self):
299 | """Exponential decay rate of old values."""
300 | return self._decay
301 |
302 | @decay.setter
303 | def decay(self, value):
304 | self._set_decay(value)
305 |
306 | def _set_decay(self, value):
307 | if not 0 <= value <= 1:
308 | raise ValueError('decay must be between 0 and 1')
309 | self._decay = value
310 |
311 | def clear(self, mean=0.0, variance=0.0, decay=None):
312 | """Clear ExponentialStatistics object."""
313 | self._mean = mean
314 | self._variance = variance
315 | if decay is not None:
316 | self._set_decay(decay)
317 |
318 | def __eq__(self, that):
319 | return self.get_state() == that.get_state()
320 |
321 | def __ne__(self, that):
322 | return self.get_state() != that.get_state()
323 |
324 | def get_state(self):
325 | """Get internal state."""
326 | return self._decay, self._mean, self._variance
327 |
328 | def set_state(self, state):
329 | """Set internal state."""
330 | (
331 | self._decay,
332 | self._mean,
333 | self._variance,
334 | ) = state
335 |
336 | @classmethod
337 | def fromstate(cls, state):
338 | """Return ExponentialStatistics object from state."""
339 | stats = cls()
340 | stats.set_state(state)
341 | return stats
342 |
343 | def __reduce__(self):
344 | return make_exponential_statistics, (self.get_state(),)
345 |
346 | def copy(self, _=None):
347 | """Copy ExponentialStatistics object."""
348 | return self.fromstate(self.get_state())
349 |
350 | def __copy__(self, _=None):
351 | """Copy ExponentialStatistics object."""
352 | return self.copy(_)
353 |
354 | __deepcopy__ = __copy__
355 |
356 | def push(self, value):
357 | """Add `value` to the ExponentialStatistics summary."""
358 | alpha = 1.0 - self._decay
359 | diff = value - self._mean
360 | incr = alpha * diff
361 | self._variance += alpha * (self._decay * diff ** 2 - self._variance)
362 | self._mean += incr
363 |
364 | def mean(self):
365 | """Exponential mean of values."""
366 | return self._mean
367 |
368 | def variance(self):
369 | """Exponential variance of values."""
370 | return self._variance
371 |
372 | def stddev(self):
373 | """Exponential standard deviation of values."""
374 | return self.variance() ** 0.5
375 |
376 | def _add(self, that):
377 | """Add two ExponentialStatistics objects together."""
378 | sigma = self.copy()
379 | sigma._iadd(that)
380 | return sigma
381 |
382 | def __add__(self, that):
383 | """Add two ExponentialStatistics objects together."""
384 | return self._add(that)
385 |
386 | def _iadd(self, that):
387 | """Add another ExponentialStatistics object to this one."""
388 | self._mean += that.mean()
389 | self._variance += that.variance()
390 | return self
391 |
392 | def __iadd__(self, that):
393 | """Add another ExponentialStatistics object to this one."""
394 | return self._iadd(that)
395 |
396 | def _mul(self, that):
397 | """Multiply by a scalar to change ExponentialStatistics weighting."""
398 | sigma = self.copy()
399 | sigma._imul(that)
400 | return sigma
401 |
402 | def __mul__(self, that):
403 | """Multiply by a scalar to change ExponentialStatistics weighting."""
404 | if isinstance(self, ExponentialStatistics):
405 | return self._mul(that)
406 | # https://stackoverflow.com/q/33218006/232571
407 | return that._mul(self) # pragma: no cover
408 |
409 | def _imul(self, that):
410 | """Multiply by a scalar to change ExponentialStatistics weighting
411 | in-place.
412 |
413 | """
414 | self._mean *= that
415 | self._variance *= that
416 | return self
417 |
418 | def __imul__(self, that):
419 | """Multiply by a scalar to change ExponentialStatistics weighting
420 | in-place.
421 |
422 | """
423 | return self._imul(that)
424 |
425 |
426 | def make_exponential_statistics(state):
427 | """Make ExponentialStatistics object from state."""
428 | return ExponentialStatistics.fromstate(state)
429 |
430 |
431 | class Regression:
432 | """
433 | Compute simple linear regression in a single pass.
434 |
435 | Computes the slope, intercept, and correlation.
436 | Regression objects may also be added together and copied.
437 |
438 | Based entirely on the C++ code by John D Cook at
439 | http://www.johndcook.com/running_regression.html
440 | """
441 |
442 | def __init__(self, iterable=()):
443 | """Initialize Regression object.
444 |
445 | Iterates optional parameter `iterable` and pushes each pair into the
446 | regression summary.
447 | """
448 | self._xstats = Statistics()
449 | self._ystats = Statistics()
450 | self._count = self._sxy = 0.0
451 |
452 | for xcoord, ycoord in iterable:
453 | self.push(xcoord, ycoord)
454 |
455 | def __eq__(self, that):
456 | return self.get_state() == that.get_state()
457 |
458 | def __ne__(self, that):
459 | return self.get_state() != that.get_state()
460 |
461 | def clear(self):
462 | """Clear Regression object."""
463 | self._xstats.clear()
464 | self._ystats.clear()
465 | self._count = self._sxy = 0.0
466 |
467 | def get_state(self):
468 | """Get internal state."""
469 | return (
470 | self._count,
471 | self._sxy,
472 | self._xstats.get_state(),
473 | self._ystats.get_state(),
474 | )
475 |
476 | def set_state(self, state):
477 | """Set internal state."""
478 | count, sxy, xstats, ystats = state
479 | self._count = count
480 | self._sxy = sxy
481 | self._xstats.set_state(xstats)
482 | self._ystats.set_state(ystats)
483 |
484 | @classmethod
485 | def fromstate(cls, state):
486 | """Return Regression object from state."""
487 | regr = cls()
488 | regr.set_state(state)
489 | return regr
490 |
491 | def __reduce__(self):
492 | return make_regression, (self.get_state(),)
493 |
494 | def copy(self, _=None):
495 | """Copy Regression object."""
496 | return self.fromstate(self.get_state())
497 |
498 | def __copy__(self, _=None):
499 | """Copy Regression object."""
500 | return self.copy(_)
501 |
502 | __deepcopy__ = __copy__
503 |
504 | def __len__(self):
505 | """Number of values that have been pushed."""
506 | return int(self._count)
507 |
508 | def push(self, xcoord, ycoord):
509 | """Add a pair `(x, y)` to the Regression summary."""
510 | self._sxy += (
511 | (self._xstats.mean() - xcoord)
512 | * (self._ystats.mean() - ycoord)
513 | * self._count
514 | / (self._count + 1)
515 | )
516 | self._xstats.push(xcoord)
517 | self._ystats.push(ycoord)
518 | self._count += 1
519 |
520 | def slope(self, ddof=1.0):
521 | """Slope of values (with `ddof` degrees of freedom)."""
522 | sxx = self._xstats.variance(ddof) * (self._count - ddof)
523 | return self._sxy / sxx
524 |
525 | def intercept(self, ddof=1.0):
526 | """Intercept of values (with `ddof` degrees of freedom)."""
527 | return self._ystats.mean() - self.slope(ddof) * self._xstats.mean()
528 |
529 | def correlation(self, ddof=1.0):
530 | """Correlation of values (with `ddof` degrees of freedom)."""
531 | term = self._xstats.stddev(ddof) * self._ystats.stddev(ddof)
532 | return self._sxy / ((self._count - ddof) * term)
533 |
534 | def _add(self, that):
535 | """Add two Regression objects together."""
536 | sigma = self.copy()
537 | sigma._iadd(that)
538 | return sigma
539 |
540 | def __add__(self, that):
541 | """Add two Regression objects together."""
542 | return self._add(that)
543 |
544 | def _iadd(self, that):
545 | """Add another Regression object to this one."""
546 | sum_count = self._count + that._count
547 | if sum_count == 0:
548 | return self
549 |
550 | sum_xstats = self._xstats._add(that._xstats)
551 | sum_ystats = self._ystats._add(that._ystats)
552 |
553 | deltax = that._xstats.mean() - self._xstats.mean()
554 | deltay = that._ystats.mean() - self._ystats.mean()
555 | sum_sxy = (
556 | self._sxy
557 | + that._sxy
558 | + self._count * that._count * deltax * deltay / sum_count
559 | )
560 |
561 | self._count = sum_count
562 | self._xstats = sum_xstats
563 | self._ystats = sum_ystats
564 | self._sxy = sum_sxy
565 |
566 | return self
567 |
568 | def __iadd__(self, that):
569 | """Add another Regression object to this one."""
570 | return self._iadd(that)
571 |
572 |
573 | def make_regression(state):
574 | """Make Regression object from state."""
575 | return Regression.fromstate(state)
576 |
--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
1 | [MASTER]
2 |
3 | # A comma-separated list of package or module names from where C extensions may
4 | # be loaded. Extensions are loading into the active Python interpreter and may
5 | # run arbitrary code.
6 | extension-pkg-whitelist=
7 |
8 | # Specify a score threshold to be exceeded before program exits with error.
9 | fail-under=10.0
10 |
11 | # Add files or directories to the blacklist. They should be base names, not
12 | # paths.
13 | ignore=CVS
14 |
15 | # Add files or directories matching the regex patterns to the blacklist. The
16 | # regex matches against base names, not paths.
17 | ignore-patterns=
18 |
19 | # Python code to execute, usually for sys.path manipulation such as
20 | # pygtk.require().
21 | #init-hook=
22 |
23 | # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
24 | # number of processors available to use.
25 | jobs=1
26 |
27 | # Control the amount of potential inferred values when inferring a single
28 | # object. This can help the performance when dealing with large functions or
29 | # complex, nested conditions.
30 | limit-inference-results=100
31 |
32 | # List of plugins (as comma separated values of python module names) to load,
33 | # usually to register additional checkers.
34 | load-plugins=
35 |
36 | # Pickle collected data for later comparisons.
37 | persistent=yes
38 |
39 | # When enabled, pylint would attempt to guess common misconfiguration and emit
40 | # user-friendly hints instead of false-positive error messages.
41 | suggestion-mode=yes
42 |
43 | # Allow loading of arbitrary C extensions. Extensions are imported into the
44 | # active Python interpreter and may run arbitrary code.
45 | unsafe-load-any-extension=no
46 |
47 |
48 | [MESSAGES CONTROL]
49 |
50 | # Only show warnings with the listed confidence levels. Leave empty to show
51 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
52 | confidence=
53 |
54 | # Disable the message, report, category or checker with the given id(s). You
55 | # can either give multiple identifiers separated by comma (,) or put this
56 | # option multiple times (only on the command line, not in the configuration
57 | # file where it should appear only once). You can also use "--disable=all" to
58 | # disable everything first and then reenable specific checks. For example, if
59 | # you want to run only the similarities checker, you can use "--disable=all
60 | # --enable=similarities". If you want to run only the classes checker, but have
61 | # no Warning level messages displayed, use "--disable=all --enable=classes
62 | # --disable=W".
63 | disable=print-statement,
64 | parameter-unpacking,
65 | unpacking-in-except,
66 | old-raise-syntax,
67 | backtick,
68 | long-suffix,
69 | old-ne-operator,
70 | old-octal-literal,
71 | import-star-module-level,
72 | non-ascii-bytes-literal,
73 | raw-checker-failed,
74 | bad-inline-option,
75 | locally-disabled,
76 | file-ignored,
77 | suppressed-message,
78 | useless-suppression,
79 | deprecated-pragma,
80 | use-symbolic-message-instead,
81 | apply-builtin,
82 | basestring-builtin,
83 | buffer-builtin,
84 | cmp-builtin,
85 | coerce-builtin,
86 | execfile-builtin,
87 | file-builtin,
88 | long-builtin,
89 | raw_input-builtin,
90 | reduce-builtin,
91 | standarderror-builtin,
92 | unicode-builtin,
93 | xrange-builtin,
94 | coerce-method,
95 | delslice-method,
96 | getslice-method,
97 | setslice-method,
98 | no-absolute-import,
99 | old-division,
100 | dict-iter-method,
101 | dict-view-method,
102 | next-method-called,
103 | metaclass-assignment,
104 | indexing-exception,
105 | raising-string,
106 | reload-builtin,
107 | oct-method,
108 | hex-method,
109 | nonzero-method,
110 | cmp-method,
111 | input-builtin,
112 | round-builtin,
113 | intern-builtin,
114 | unichr-builtin,
115 | map-builtin-not-iterating,
116 | zip-builtin-not-iterating,
117 | range-builtin-not-iterating,
118 | filter-builtin-not-iterating,
119 | using-cmp-argument,
120 | eq-without-hash,
121 | div-method,
122 | idiv-method,
123 | rdiv-method,
124 | exception-message-attribute,
125 | invalid-str-codec,
126 | sys-max-int,
127 | bad-python3-import,
128 | deprecated-string-function,
129 | deprecated-str-translate-call,
130 | deprecated-itertools-function,
131 | deprecated-types-field,
132 | next-method-defined,
133 | dict-items-not-iterating,
134 | dict-keys-not-iterating,
135 | dict-values-not-iterating,
136 | deprecated-operator-function,
137 | deprecated-urllib-function,
138 | xreadlines-attribute,
139 | deprecated-sys-function,
140 | exception-escape,
141 | comprehension-escape,
142 | protected-access,
143 | attribute-defined-outside-init,
144 |
145 | # Enable the message, report, category or checker with the given id(s). You can
146 | # either give multiple identifier separated by comma (,) or put this option
147 | # multiple time (only on the command line, not in the configuration file where
148 | # it should appear only once). See also the "--disable" option for examples.
149 | enable=c-extension-no-member
150 |
151 |
152 | [REPORTS]
153 |
154 | # Python expression which should return a score less than or equal to 10. You
155 | # have access to the variables 'error', 'warning', 'refactor', and 'convention'
156 | # which contain the number of messages in each category, as well as 'statement'
157 | # which is the total number of statements analyzed. This score is used by the
158 | # global evaluation report (RP0004).
159 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
160 |
161 | # Template used to display messages. This is a python new-style format string
162 | # used to format the message information. See doc for all details.
163 | #msg-template=
164 |
165 | # Set the output format. Available formats are text, parseable, colorized, json
166 | # and msvs (visual studio). You can also give a reporter class, e.g.
167 | # mypackage.mymodule.MyReporterClass.
168 | output-format=text
169 |
170 | # Tells whether to display a full report or only the messages.
171 | reports=no
172 |
173 | # Activate the evaluation score.
174 | score=yes
175 |
176 |
177 | [REFACTORING]
178 |
179 | # Maximum number of nested blocks for function / method body
180 | max-nested-blocks=5
181 |
182 | # Complete name of functions that never returns. When checking for
183 | # inconsistent-return-statements if a never returning function is called then
184 | # it will be considered as an explicit return statement and no message will be
185 | # printed.
186 | never-returning-functions=sys.exit
187 |
188 |
189 | [LOGGING]
190 |
191 | # The type of string formatting that logging methods do. `old` means using %
192 | # formatting, `new` is for `{}` formatting.
193 | logging-format-style=old
194 |
195 | # Logging modules to check that the string format arguments are in logging
196 | # function parameter format.
197 | logging-modules=logging
198 |
199 |
200 | [SPELLING]
201 |
202 | # Limits count of emitted suggestions for spelling mistakes.
203 | max-spelling-suggestions=4
204 |
205 | # Spelling dictionary name. Available dictionaries: none. To make it work,
206 | # install the python-enchant package.
207 | spelling-dict=
208 |
209 | # List of comma separated words that should not be checked.
210 | spelling-ignore-words=
211 |
212 | # A path to a file that contains the private dictionary; one word per line.
213 | spelling-private-dict-file=
214 |
215 | # Tells whether to store unknown words to the private dictionary (see the
216 | # --spelling-private-dict-file option) instead of raising a message.
217 | spelling-store-unknown-words=no
218 |
219 |
220 | [MISCELLANEOUS]
221 |
222 | # List of note tags to take in consideration, separated by a comma.
223 | notes=FIXME,
224 | XXX,
225 | TODO
226 |
227 | # Regular expression of note tags to take in consideration.
228 | #notes-rgx=
229 |
230 |
231 | [TYPECHECK]
232 |
233 | # List of decorators that produce context managers, such as
234 | # contextlib.contextmanager. Add to this list to register other decorators that
235 | # produce valid context managers.
236 | contextmanager-decorators=contextlib.contextmanager
237 |
238 | # List of members which are set dynamically and missed by pylint inference
239 | # system, and so shouldn't trigger E1101 when accessed. Python regular
240 | # expressions are accepted.
241 | generated-members=
242 |
243 | # Tells whether missing members accessed in mixin class should be ignored. A
244 | # mixin class is detected if its name ends with "mixin" (case insensitive).
245 | ignore-mixin-members=yes
246 |
247 | # Tells whether to warn about missing members when the owner of the attribute
248 | # is inferred to be None.
249 | ignore-none=yes
250 |
251 | # This flag controls whether pylint should warn about no-member and similar
252 | # checks whenever an opaque object is returned when inferring. The inference
253 | # can return multiple potential results while evaluating a Python object, but
254 | # some branches might not be evaluated, which results in partial inference. In
255 | # that case, it might be useful to still emit no-member and other checks for
256 | # the rest of the inferred objects.
257 | ignore-on-opaque-inference=yes
258 |
259 | # List of class names for which member attributes should not be checked (useful
260 | # for classes with dynamically set attributes). This supports the use of
261 | # qualified names.
262 | ignored-classes=optparse.Values,thread._local,_thread._local
263 |
264 | # List of module names for which member attributes should not be checked
265 | # (useful for modules/projects where namespaces are manipulated during runtime
266 | # and thus existing member attributes cannot be deduced by static analysis). It
267 | # supports qualified module names, as well as Unix pattern matching.
268 | ignored-modules=
269 |
270 | # Show a hint with possible names when a member name was not found. The aspect
271 | # of finding the hint is based on edit distance.
272 | missing-member-hint=yes
273 |
274 | # The minimum edit distance a name should have in order to be considered a
275 | # similar match for a missing member name.
276 | missing-member-hint-distance=1
277 |
278 | # The total number of similar names that should be taken in consideration when
279 | # showing a hint for a missing member.
280 | missing-member-max-choices=1
281 |
282 | # List of decorators that change the signature of a decorated function.
283 | signature-mutators=
284 |
285 |
286 | [VARIABLES]
287 |
288 | # List of additional names supposed to be defined in builtins. Remember that
289 | # you should avoid defining new builtins when possible.
290 | additional-builtins=
291 |
292 | # Tells whether unused global variables should be treated as a violation.
293 | allow-global-unused-variables=yes
294 |
295 | # List of strings which can identify a callback function by name. A callback
296 | # name must start or end with one of those strings.
297 | callbacks=cb_,
298 | _cb
299 |
300 | # A regular expression matching the name of dummy variables (i.e. expected to
301 | # not be used).
302 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
303 |
304 | # Argument names that match this expression will be ignored. Default to name
305 | # with leading underscore.
306 | ignored-argument-names=_.*|^ignored_|^unused_
307 |
308 | # Tells whether we should check for unused import in __init__ files.
309 | init-import=no
310 |
311 | # List of qualified module names which can have objects that can redefine
312 | # builtins.
313 | redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
314 |
315 |
316 | [FORMAT]
317 |
318 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
319 | expected-line-ending-format=
320 |
321 | # Regexp for a line that is allowed to be longer than the limit.
322 | ignore-long-lines=^\s*(# )??$
323 |
324 | # Number of spaces of indent required inside a hanging or continued line.
325 | indent-after-paren=4
326 |
327 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
328 | # tab).
329 | indent-string=' '
330 |
331 | # Maximum number of characters on a single line.
332 | max-line-length=100
333 |
334 | # Maximum number of lines in a module.
335 | max-module-lines=1000
336 |
337 | # Allow the body of a class to be on the same line as the declaration if body
338 | # contains single statement.
339 | single-line-class-stmt=no
340 |
341 | # Allow the body of an if to be on the same line as the test if there is no
342 | # else.
343 | single-line-if-stmt=no
344 |
345 |
346 | [SIMILARITIES]
347 |
348 | # Ignore comments when computing similarities.
349 | ignore-comments=yes
350 |
351 | # Ignore docstrings when computing similarities.
352 | ignore-docstrings=yes
353 |
354 | # Ignore imports when computing similarities.
355 | ignore-imports=no
356 |
357 | # Minimum lines number of a similarity.
358 | min-similarity-lines=4
359 |
360 |
361 | [BASIC]
362 |
363 | # Naming style matching correct argument names.
364 | argument-naming-style=snake_case
365 |
366 | # Regular expression matching correct argument names. Overrides argument-
367 | # naming-style.
368 | #argument-rgx=
369 |
370 | # Naming style matching correct attribute names.
371 | attr-naming-style=snake_case
372 |
373 | # Regular expression matching correct attribute names. Overrides attr-naming-
374 | # style.
375 | #attr-rgx=
376 |
377 | # Bad variable names which should always be refused, separated by a comma.
378 | bad-names=foo,
379 | bar,
380 | baz,
381 | toto,
382 | tutu,
383 | tata
384 |
385 | # Bad variable names regexes, separated by a comma. If names match any regex,
386 | # they will always be refused
387 | bad-names-rgxs=
388 |
389 | # Naming style matching correct class attribute names.
390 | class-attribute-naming-style=any
391 |
392 | # Regular expression matching correct class attribute names. Overrides class-
393 | # attribute-naming-style.
394 | #class-attribute-rgx=
395 |
396 | # Naming style matching correct class names.
397 | class-naming-style=PascalCase
398 |
399 | # Regular expression matching correct class names. Overrides class-naming-
400 | # style.
401 | #class-rgx=
402 |
403 | # Naming style matching correct constant names.
404 | const-naming-style=UPPER_CASE
405 |
406 | # Regular expression matching correct constant names. Overrides const-naming-
407 | # style.
408 | #const-rgx=
409 |
410 | # Minimum line length for functions/classes that require docstrings, shorter
411 | # ones are exempt.
412 | docstring-min-length=-1
413 |
414 | # Naming style matching correct function names.
415 | function-naming-style=snake_case
416 |
417 | # Regular expression matching correct function names. Overrides function-
418 | # naming-style.
419 | #function-rgx=
420 |
421 | # Good variable names which should always be accepted, separated by a comma.
422 | good-names=i,
423 | j,
424 | k,
425 | ex,
426 | Run,
427 | _
428 |
429 | # Good variable names regexes, separated by a comma. If names match any regex,
430 | # they will always be accepted
431 | good-names-rgxs=
432 |
433 | # Include a hint for the correct naming format with invalid-name.
434 | include-naming-hint=no
435 |
436 | # Naming style matching correct inline iteration names.
437 | inlinevar-naming-style=any
438 |
439 | # Regular expression matching correct inline iteration names. Overrides
440 | # inlinevar-naming-style.
441 | #inlinevar-rgx=
442 |
443 | # Naming style matching correct method names.
444 | method-naming-style=snake_case
445 |
446 | # Regular expression matching correct method names. Overrides method-naming-
447 | # style.
448 | #method-rgx=
449 |
450 | # Naming style matching correct module names.
451 | module-naming-style=snake_case
452 |
453 | # Regular expression matching correct module names. Overrides module-naming-
454 | # style.
455 | #module-rgx=
456 |
457 | # Colon-delimited sets of names that determine each other's naming style when
458 | # the name regexes allow several styles.
459 | name-group=
460 |
461 | # Regular expression which should only match function or class names that do
462 | # not require a docstring.
463 | no-docstring-rgx=^_
464 |
465 | # List of decorators that produce properties, such as abc.abstractproperty. Add
466 | # to this list to register other decorators that produce valid properties.
467 | # These decorators are taken in consideration only for invalid-name.
468 | property-classes=abc.abstractproperty
469 |
470 | # Naming style matching correct variable names.
471 | variable-naming-style=snake_case
472 |
473 | # Regular expression matching correct variable names. Overrides variable-
474 | # naming-style.
475 | #variable-rgx=
476 |
477 |
478 | [STRING]
479 |
480 | # This flag controls whether inconsistent-quotes generates a warning when the
481 | # character used as a quote delimiter is used inconsistently within a module.
482 | check-quote-consistency=no
483 |
484 | # This flag controls whether the implicit-str-concat should generate a warning
485 | # on implicit string concatenation in sequences defined over several lines.
486 | check-str-concat-over-line-jumps=no
487 |
488 |
489 | [IMPORTS]
490 |
491 | # List of modules that can be imported at any level, not just the top level
492 | # one.
493 | allow-any-import-level=
494 |
495 | # Allow wildcard imports from modules that define __all__.
496 | allow-wildcard-with-all=no
497 |
498 | # Analyse import fallback blocks. This can be used to support both Python 2 and
499 | # 3 compatible code, which means that the block might have code that exists
500 | # only in one or another interpreter, leading to false positives when analysed.
501 | analyse-fallback-blocks=no
502 |
503 | # Deprecated modules which should not be used, separated by a comma.
504 | deprecated-modules=optparse,tkinter.tix
505 |
506 | # Create a graph of external dependencies in the given file (report RP0402 must
507 | # not be disabled).
508 | ext-import-graph=
509 |
510 | # Create a graph of every (i.e. internal and external) dependencies in the
511 | # given file (report RP0402 must not be disabled).
512 | import-graph=
513 |
514 | # Create a graph of internal dependencies in the given file (report RP0402 must
515 | # not be disabled).
516 | int-import-graph=
517 |
518 | # Force import order to recognize a module as part of the standard
519 | # compatibility libraries.
520 | known-standard-library=
521 |
522 | # Force import order to recognize a module as part of a third party library.
523 | known-third-party=enchant
524 |
525 | # Couples of modules and preferred modules, separated by a comma.
526 | preferred-modules=
527 |
528 |
529 | [CLASSES]
530 |
531 | # List of method names used to declare (i.e. assign) instance attributes.
532 | defining-attr-methods=__init__,
533 | __new__,
534 | setUp,
535 | __post_init__
536 |
537 | # List of member names, which should be excluded from the protected access
538 | # warning.
539 | exclude-protected=_asdict,
540 | _fields,
541 | _replace,
542 | _source,
543 | _make
544 |
545 | # List of valid names for the first argument in a class method.
546 | valid-classmethod-first-arg=cls
547 |
548 | # List of valid names for the first argument in a metaclass class method.
549 | valid-metaclass-classmethod-first-arg=cls
550 |
551 |
552 | [DESIGN]
553 |
554 | # Maximum number of arguments for function / method.
555 | max-args=5
556 |
557 | # Maximum number of attributes for a class (see R0902).
558 | max-attributes=7
559 |
560 | # Maximum number of boolean expressions in an if statement (see R0916).
561 | max-bool-expr=5
562 |
563 | # Maximum number of branch for function / method body.
564 | max-branches=12
565 |
566 | # Maximum number of locals for function / method body.
567 | max-locals=15
568 |
569 | # Maximum number of parents for a class (see R0901).
570 | max-parents=7
571 |
572 | # Maximum number of public methods for a class (see R0904).
573 | max-public-methods=20
574 |
575 | # Maximum number of return / yield for function / method body.
576 | max-returns=6
577 |
578 | # Maximum number of statements in function / method body.
579 | max-statements=50
580 |
581 | # Minimum number of public methods for a class (see R0903).
582 | min-public-methods=2
583 |
584 |
585 | [EXCEPTIONS]
586 |
587 | # Exceptions that will emit a warning when being caught. Defaults to
588 | # "BaseException, Exception".
589 | overgeneral-exceptions=BaseException,
590 | Exception
591 |
--------------------------------------------------------------------------------
/tests/test_runstats.py:
--------------------------------------------------------------------------------
1 | """Test runstats module.
2 |
3 | """
4 |
5 | import copy
6 | import math
7 | import pickle
8 | import random
9 |
10 | import pytest
11 |
12 | from runstats import ExponentialStatistics as FastExponentialStatistics
13 | from runstats import Regression as FastRegression
14 | from runstats import Statistics as FastStatistics
15 | from runstats.core import ExponentialStatistics as CoreExponentialStatistics
16 | from runstats.core import Regression as CoreRegression
17 | from runstats.core import Statistics as CoreStatistics
18 |
19 | limit = 1e-2
20 | count = 1000
21 |
22 |
23 | def mean(values):
24 | return sum(values) / len(values)
25 |
26 |
27 | def variance(values, ddof=1.0):
28 | temp = mean(values)
29 | return sum((value - temp) ** 2 for value in values) / (len(values) - ddof)
30 |
31 |
32 | def stddev(values, ddof=1.0):
33 | return variance(values, ddof) ** 0.5
34 |
35 |
36 | def skewness(values):
37 | temp = mean(values)
38 | numerator = sum((value - temp) ** 3 for value in values) / len(values)
39 | denominator = (
40 | sum((value - temp) ** 2 for value in values) / len(values)
41 | ) ** 1.5
42 | return numerator / denominator
43 |
44 |
45 | def kurtosis(values):
46 | temp = mean(values)
47 | numerator = sum((value - temp) ** 4 for value in values) / len(values)
48 | sum_diff_2 = sum((value - temp) ** 2 for value in values)
49 | denominator = (sum_diff_2 / len(values)) ** 2
50 | return (numerator / denominator) - 3
51 |
52 |
53 | def error(value, test):
54 | return abs((test - value) / value)
55 |
56 |
57 | @pytest.mark.parametrize(
58 | 'Statistics,Regression',
59 | [
60 | (CoreStatistics, CoreRegression),
61 | (FastStatistics, FastRegression),
62 | ],
63 | )
64 | def test_statistics(Statistics, Regression):
65 | random.seed(0)
66 | alpha = [random.random() for _ in range(count)]
67 |
68 | alpha_stats = Statistics()
69 |
70 | for val in alpha:
71 | alpha_stats.push(val)
72 |
73 | assert len(alpha_stats) == count
74 | assert error(mean(alpha), alpha_stats.mean()) < limit
75 | assert error(variance(alpha, 0.0), alpha_stats.variance(0.0)) < limit
76 | assert error(variance(alpha, 1.0), alpha_stats.variance(1.0)) < limit
77 | assert error(stddev(alpha, 0.0), alpha_stats.stddev(0.0)) < limit
78 | assert error(stddev(alpha, 1.0), alpha_stats.stddev(1.0)) < limit
79 | assert error(skewness(alpha), alpha_stats.skewness()) < limit
80 | assert error(kurtosis(alpha), alpha_stats.kurtosis()) < limit
81 | assert alpha_stats.minimum() == min(alpha)
82 | assert alpha_stats.maximum() == max(alpha)
83 |
84 | alpha_stats.clear()
85 |
86 | assert len(alpha_stats) == 0
87 |
88 | alpha_stats = Statistics(alpha)
89 |
90 | beta = [random.random() for _ in range(count)]
91 |
92 | beta_stats = Statistics()
93 |
94 | for val in beta:
95 | beta_stats.push(val)
96 |
97 | gamma_stats = alpha_stats + beta_stats
98 |
99 | assert len(beta_stats) != len(gamma_stats)
100 | assert error(mean(alpha + beta), gamma_stats.mean()) < limit
101 | assert (
102 | error(variance(alpha + beta, 1.0), gamma_stats.variance(1.0)) < limit
103 | )
104 | assert (
105 | error(variance(alpha + beta, 0.0), gamma_stats.variance(0.0)) < limit
106 | )
107 | assert error(stddev(alpha + beta, 1.0), gamma_stats.stddev(1.0)) < limit
108 | assert error(stddev(alpha + beta, 0.0), gamma_stats.stddev(0.0)) < limit
109 | assert error(skewness(alpha + beta), gamma_stats.skewness()) < limit
110 | assert error(kurtosis(alpha + beta), gamma_stats.kurtosis()) < limit
111 | assert gamma_stats.minimum() == min(alpha + beta)
112 | assert gamma_stats.maximum() == max(alpha + beta)
113 |
114 | delta_stats = beta_stats.copy()
115 | delta_stats += alpha_stats
116 |
117 | assert len(beta_stats) != len(delta_stats)
118 | assert error(mean(alpha + beta), delta_stats.mean()) < limit
119 | assert (
120 | error(variance(alpha + beta, 1.0), delta_stats.variance(1.0)) < limit
121 | )
122 | assert (
123 | error(variance(alpha + beta, 0.0), delta_stats.variance(0.0)) < limit
124 | )
125 | assert error(stddev(alpha + beta, 1.0), delta_stats.stddev(1.0)) < limit
126 | assert error(stddev(alpha + beta, 0.0), delta_stats.stddev(0.0)) < limit
127 | assert error(skewness(alpha + beta), delta_stats.skewness()) < limit
128 | assert error(kurtosis(alpha + beta), delta_stats.kurtosis()) < limit
129 | assert delta_stats.minimum() == min(alpha + beta)
130 | assert delta_stats.maximum() == max(alpha + beta)
131 |
132 |
133 | @pytest.mark.parametrize(
134 | 'ExponentialStatistics',
135 | [CoreExponentialStatistics, FastExponentialStatistics],
136 | )
137 | def test_exponential_statistics(ExponentialStatistics):
138 | random.seed(0)
139 | alpha = [random.random() for _ in range(count)]
140 | big_alpha = [random.random() for _ in range(count * 100)]
141 |
142 | alpha_exp_stats_zero = ExponentialStatistics(0.9999)
143 | alpha_exp_stats_init = ExponentialStatistics(
144 | decay=0.9999,
145 | mean=mean(alpha),
146 | variance=variance(alpha, 0),
147 | )
148 |
149 | for val in big_alpha:
150 | alpha_exp_stats_zero.push(val)
151 | alpha_exp_stats_init.push(val)
152 |
153 | assert error(mean(big_alpha), alpha_exp_stats_zero.mean()) < limit
154 | assert error(mean(big_alpha), alpha_exp_stats_init.mean()) < limit
155 | assert (
156 | error(variance(big_alpha, 0), alpha_exp_stats_zero.variance()) < limit
157 | )
158 | assert (
159 | error(variance(big_alpha, 0), alpha_exp_stats_init.variance()) < limit
160 | )
161 | assert error(stddev(big_alpha, 0), alpha_exp_stats_zero.stddev()) < limit
162 | assert error(stddev(big_alpha, 0), alpha_exp_stats_init.stddev()) < limit
163 |
164 | alpha_exp_stats_zero.clear()
165 | alpha_exp_stats_zero.decay = 0.1
166 | alpha_exp_stats_init.clear(
167 | decay=0.1, mean=mean(alpha), variance=variance(alpha, 0)
168 | )
169 |
170 | for val in big_alpha:
171 | alpha_exp_stats_zero.push(val)
172 | alpha_exp_stats_init.push(val)
173 |
174 | assert (
175 | error(alpha_exp_stats_zero.mean(), alpha_exp_stats_init.mean()) < limit
176 | )
177 | assert (
178 | error(alpha_exp_stats_zero.variance(), alpha_exp_stats_init.variance())
179 | < limit
180 | )
181 | assert (
182 | error(alpha_exp_stats_zero.stddev(), alpha_exp_stats_init.stddev())
183 | < limit
184 | )
185 |
186 | alpha_exp_stats = ExponentialStatistics(0.1, iterable=alpha)
187 | beta = [random.random() * 2 for _ in range(count)]
188 | beta_exp_stats = ExponentialStatistics(0.1)
189 |
190 | assert alpha_exp_stats != beta_exp_stats
191 |
192 | for val in beta:
193 | alpha_exp_stats.push(val)
194 | beta_exp_stats.push(val)
195 |
196 | assert alpha_exp_stats == beta_exp_stats
197 |
198 | for val in alpha:
199 | alpha_exp_stats.push(val)
200 | beta_exp_stats.push(val)
201 |
202 | assert alpha_exp_stats == beta_exp_stats
203 |
204 | current_mean = alpha_exp_stats.mean()
205 | current_variance = alpha_exp_stats.variance()
206 | alpha_exp_stats.decay = 0.99999999
207 |
208 | for val in range(10):
209 | alpha_exp_stats.push(val)
210 |
211 | assert (error(current_mean, alpha_exp_stats.mean())) < limit
212 | assert (error(current_variance, alpha_exp_stats.variance())) < limit
213 |
214 | alpha_exp_stats.decay = 0.1
215 |
216 | for val in range(10):
217 | alpha_exp_stats.push(val)
218 |
219 | assert (error(current_mean, alpha_exp_stats.mean())) > limit
220 | assert (error(current_variance, alpha_exp_stats.variance())) > limit
221 |
222 |
223 | @pytest.mark.parametrize(
224 | 'ExponentialStatistics',
225 | [CoreExponentialStatistics, FastExponentialStatistics],
226 | )
227 | def test_bad_decay(ExponentialStatistics):
228 | with pytest.raises(ValueError):
229 | ExponentialStatistics(decay=2.0)
230 | with pytest.raises(ValueError):
231 | ExponentialStatistics(decay=-1.0)
232 |
233 |
234 | @pytest.mark.parametrize(
235 | 'Statistics,Regression',
236 | [
237 | (CoreStatistics, CoreRegression),
238 | (FastStatistics, FastRegression),
239 | ],
240 | )
241 | def test_add_statistics(Statistics, Regression):
242 | stats0 = Statistics()
243 | stats10 = Statistics(range(10))
244 | assert (stats0 + stats10) == stats10
245 | assert (stats10 + stats0) == stats10
246 | stats0 += stats10
247 |
248 |
249 | @pytest.mark.parametrize(
250 | 'ExponentialStatistics',
251 | [CoreExponentialStatistics, FastExponentialStatistics],
252 | )
253 | def test_add_exponential_statistics(ExponentialStatistics):
254 | exp_stats0 = ExponentialStatistics(0.9)
255 | exp_stats10 = ExponentialStatistics(0.9, iterable=range(10))
256 | assert (exp_stats0 + exp_stats10) == exp_stats10
257 | assert (exp_stats10 + exp_stats0) == exp_stats10
258 | exp_stats0 += exp_stats10
259 | exp_stats0 *= 2
260 | with pytest.raises(TypeError):
261 | exp_stats0 * object()
262 | with pytest.raises(TypeError):
263 | object() * exp_stats0
264 |
265 |
266 | def correlation(values):
267 | sigma_x = sum(xxx for xxx, yyy in values) / len(values)
268 | sigma_y = sum(yyy for xxx, yyy in values) / len(values)
269 | sigma_xy = sum(xxx * yyy for xxx, yyy in values) / len(values)
270 | sigma_x2 = sum(xxx ** 2 for xxx, yyy in values) / len(values)
271 | sigma_y2 = sum(yyy ** 2 for xxx, yyy in values) / len(values)
272 | return (sigma_xy - sigma_x * sigma_y) / (
273 | ((sigma_x2 - sigma_x ** 2) * (sigma_y2 - sigma_y ** 2)) ** 0.5
274 | )
275 |
276 |
277 | @pytest.mark.parametrize(
278 | 'Statistics,Regression',
279 | [
280 | (CoreStatistics, CoreRegression),
281 | (FastStatistics, FastRegression),
282 | ],
283 | )
284 | def test_regression(Statistics, Regression):
285 | random.seed(0)
286 | alpha, beta, rand = 5.0, 10.0, 1.0
287 |
288 | points = [
289 | (xxx, alpha * xxx + beta + rand * (0.5 - random.random()))
290 | for xxx in range(count)
291 | ]
292 |
293 | regr = Regression()
294 |
295 | for xxx, yyy in points:
296 | regr.push(xxx, yyy)
297 |
298 | assert error(alpha, regr.slope()) < limit
299 | assert error(beta, regr.intercept()) < limit
300 | assert error(correlation(points), regr.correlation()) < limit
301 |
302 | regr_copy = regr.copy()
303 |
304 | more_points = [
305 | (xxx, alpha * xxx + beta + rand * (0.5 - random.random()))
306 | for xxx in range(count, 2 * count)
307 | ]
308 |
309 | for xxx, yyy in more_points:
310 | regr_copy.push(xxx, yyy)
311 |
312 | regr_more = Regression(more_points)
313 |
314 | regr_sum = regr + regr_more
315 |
316 | assert len(regr_copy) == len(regr_sum) == (2 * count)
317 | assert error(regr_copy.slope(), regr_sum.slope()) < limit
318 | assert error(regr_copy.intercept(), regr_sum.intercept()) < limit
319 | assert error(regr_copy.correlation(), regr_sum.correlation()) < limit
320 |
321 | regr += regr_more
322 |
323 | assert len(regr) == len(regr_copy) == (2 * count)
324 | assert error(regr.slope(), regr_copy.slope()) < limit
325 | assert error(regr.intercept(), regr_copy.intercept()) < limit
326 | assert error(regr.correlation(), regr_copy.correlation()) < limit
327 |
328 | regr.clear()
329 |
330 | assert len(regr) == 0
331 |
332 |
333 | @pytest.mark.parametrize(
334 | 'Statistics,Regression',
335 | [
336 | (CoreStatistics, CoreRegression),
337 | (FastStatistics, FastRegression),
338 | ],
339 | )
340 | def test_get_set_state_statistics(Statistics, Regression):
341 | random.seed(0)
342 | tail = -10
343 | vals = [random.random() for _ in range(count)]
344 |
345 | stats = Statistics(vals[:tail])
346 | state = stats.get_state()
347 |
348 | for num in vals[tail:]:
349 | stats.push(num)
350 |
351 | new_stats = Statistics()
352 | new_stats.set_state(state)
353 |
354 | for num in vals[tail:]:
355 | new_stats.push(num)
356 |
357 | assert stats.mean() == new_stats.mean()
358 | assert stats.variance() == new_stats.variance()
359 | assert stats.minimum() == new_stats.minimum()
360 | assert stats.maximum() == new_stats.maximum()
361 | assert stats.kurtosis() == new_stats.kurtosis()
362 | assert stats.skewness() == new_stats.skewness()
363 |
364 | assert stats == Statistics.fromstate(stats.get_state())
365 |
366 |
367 | @pytest.mark.parametrize(
368 | 'ExponentialStatistics',
369 | [CoreExponentialStatistics, FastExponentialStatistics],
370 | )
371 | def test_get_set_state_exponential_statistics(ExponentialStatistics):
372 | random.seed(0)
373 | vals = [random.random() for _ in range(count)]
374 | exp_stats = ExponentialStatistics(iterable=vals)
375 | exp_state = exp_stats.get_state()
376 |
377 | new_exp_stats = ExponentialStatistics(0.8)
378 | assert exp_stats != new_exp_stats
379 | assert new_exp_stats.decay == 0.8
380 | new_exp_stats.set_state(exp_state)
381 | assert new_exp_stats.decay == 0.9
382 | assert exp_stats == new_exp_stats
383 | new_exp_stats.decay = 0.1
384 | assert exp_stats != new_exp_stats
385 | assert exp_stats.mean() == new_exp_stats.mean()
386 | assert exp_stats.variance() == new_exp_stats.variance()
387 | assert new_exp_stats.decay == 0.1
388 |
389 | assert exp_stats == ExponentialStatistics.fromstate(exp_stats.get_state())
390 |
391 |
392 | @pytest.mark.parametrize(
393 | 'Statistics,Regression',
394 | [
395 | (CoreStatistics, CoreRegression),
396 | (FastStatistics, FastRegression),
397 | ],
398 | )
399 | def test_get_set_state_regression(Statistics, Regression):
400 | random.seed(0)
401 | tail = -10
402 | alpha, beta, rand = 5.0, 10.0, 20.0
403 | points = [
404 | (xxx, alpha * xxx + beta + rand * (0.5 - random.random()))
405 | for xxx in range(count)
406 | ]
407 |
408 | regr = Regression(points[:tail])
409 | state = regr.get_state()
410 |
411 | for xxx, yyy in points[tail:]:
412 | regr.push(xxx, yyy)
413 |
414 | new_regr = Regression()
415 | new_regr.set_state(state)
416 |
417 | for xxx, yyy in points[tail:]:
418 | new_regr.push(xxx, yyy)
419 |
420 | assert regr.slope() == new_regr.slope()
421 | assert regr.intercept() == new_regr.intercept()
422 | assert regr.correlation() == new_regr.correlation()
423 |
424 | assert regr == Regression.fromstate(regr.get_state())
425 |
426 |
427 | @pytest.mark.parametrize(
428 | 'Statistics,Regression',
429 | [
430 | (CoreStatistics, CoreRegression),
431 | (FastStatistics, FastRegression),
432 | ],
433 | )
434 | def test_pickle_statistics(Statistics, Regression):
435 | stats = Statistics(range(10))
436 | for num in range(pickle.HIGHEST_PROTOCOL):
437 | pickled_stats = pickle.dumps(stats, protocol=num)
438 | unpickled_stats = pickle.loads(pickled_stats)
439 | assert stats == unpickled_stats, 'protocol: %s' % num
440 |
441 |
442 | @pytest.mark.parametrize(
443 | 'ExponentialStatistics',
444 | [CoreExponentialStatistics, FastExponentialStatistics],
445 | )
446 | def test_pickle_exponential_statistics(ExponentialStatistics):
447 | exp_stats = ExponentialStatistics(0.9, iterable=range(10))
448 | for num in range(pickle.HIGHEST_PROTOCOL):
449 | pickled_exp_stats = pickle.dumps(exp_stats, protocol=num)
450 | unpickled_exp_stats = pickle.loads(pickled_exp_stats)
451 | assert exp_stats == unpickled_exp_stats, 'protocol: %s' % num
452 |
453 |
454 | @pytest.mark.parametrize(
455 | 'Statistics,Regression',
456 | [
457 | (CoreStatistics, CoreRegression),
458 | (FastStatistics, FastRegression),
459 | ],
460 | )
461 | def test_pickle_regression(Statistics, Regression):
462 | regr = Regression(enumerate(range(10)))
463 | for num in range(pickle.HIGHEST_PROTOCOL):
464 | pickled_regr = pickle.dumps(regr, protocol=num)
465 | unpickled_regr = pickle.loads(pickled_regr)
466 | assert regr == unpickled_regr, 'protocol: %s' % num
467 |
468 |
469 | @pytest.mark.parametrize(
470 | 'Statistics,Regression',
471 | [
472 | (CoreStatistics, CoreRegression),
473 | (FastStatistics, FastRegression),
474 | ],
475 | )
476 | def test_copy_statistics(Statistics, Regression):
477 | stats = Statistics(range(10))
478 | copy_stats = copy.copy(stats)
479 | assert stats == copy_stats
480 | deepcopy_stats = copy.deepcopy(stats)
481 | assert stats == deepcopy_stats
482 |
483 |
484 | @pytest.mark.parametrize(
485 | 'ExponentialStatistics',
486 | [CoreExponentialStatistics, FastExponentialStatistics],
487 | )
488 | def test_copy_exponential_statistics(ExponentialStatistics):
489 | exp_stats = ExponentialStatistics(0.9, iterable=range(10))
490 | copy_exp_stats = copy.copy(exp_stats)
491 | assert exp_stats == copy_exp_stats
492 | deepcopy_exp_stats = copy.deepcopy(exp_stats)
493 | assert exp_stats == deepcopy_exp_stats
494 |
495 |
496 | @pytest.mark.parametrize(
497 | 'Statistics,Regression',
498 | [
499 | (CoreStatistics, CoreRegression),
500 | (FastStatistics, FastRegression),
501 | ],
502 | )
503 | def test_copy_regression(Statistics, Regression):
504 | regr = Regression(enumerate(range(10)))
505 | copy_regr = copy.copy(regr)
506 | assert regr == copy_regr
507 | deepcopy_regr = copy.deepcopy(regr)
508 | assert regr == deepcopy_regr
509 |
510 |
511 | @pytest.mark.parametrize(
512 | 'Statistics,Regression',
513 | [
514 | (CoreStatistics, CoreRegression),
515 | (FastStatistics, FastRegression),
516 | ],
517 | )
518 | def test_equality_statistics(Statistics, Regression):
519 | stats1 = Statistics(range(10))
520 | stats2 = Statistics(range(10))
521 | assert stats1 == stats2
522 | stats2.push(42)
523 | assert stats1 != stats2
524 |
525 |
526 | @pytest.mark.parametrize(
527 | 'ExponentialStatistics',
528 | [CoreExponentialStatistics, FastExponentialStatistics],
529 | )
530 | def test_equality_exponential_statistics(ExponentialStatistics):
531 | exp_stats1 = ExponentialStatistics(0.9, iterable=range(10))
532 | exp_stats2 = ExponentialStatistics(0.9, iterable=range(10))
533 | assert exp_stats1 == exp_stats2
534 | exp_stats2.push(42)
535 | assert exp_stats1 != exp_stats2
536 |
537 |
538 | @pytest.mark.parametrize(
539 | 'Statistics,Regression',
540 | [
541 | (CoreStatistics, CoreRegression),
542 | (FastStatistics, FastRegression),
543 | ],
544 | )
545 | def test_equality_regression(Statistics, Regression):
546 | regr1 = Regression(enumerate(range(10)))
547 | regr2 = Regression(enumerate(range(10)))
548 | assert regr1 == regr2
549 | regr2.push(42, 42)
550 | assert regr1 != regr2
551 |
552 |
553 | @pytest.mark.parametrize(
554 | 'Statistics,Regression',
555 | [
556 | (CoreStatistics, CoreRegression),
557 | (FastStatistics, FastRegression),
558 | ],
559 | )
560 | def test_sum_stats_count0(Statistics, Regression):
561 | stats1 = Statistics()
562 | stats2 = Statistics()
563 | sumstats = stats1 + stats2
564 | assert len(sumstats) == 0
565 |
566 |
567 | @pytest.mark.parametrize(
568 | 'Statistics,Regression',
569 | [
570 | (CoreStatistics, CoreRegression),
571 | (FastStatistics, FastRegression),
572 | ],
573 | )
574 | def test_sum_regr_count0(Statistics, Regression):
575 | regr1 = Regression()
576 | regr2 = Regression()
577 | sumregr = regr1 + regr2
578 | assert len(sumregr) == 0
579 |
580 |
581 | @pytest.mark.parametrize(
582 | 'Statistics,Regression',
583 | [
584 | (CoreStatistics, CoreRegression),
585 | (FastStatistics, FastRegression),
586 | ],
587 | )
588 | def test_multiply(Statistics, Regression):
589 | stats1 = Statistics(range(10))
590 | stats2 = Statistics(range(10)) * 2
591 | stats4 = 2 * stats2
592 | assert len(stats2) == 2 * len(stats1)
593 | assert len(stats4) == 2 * len(stats2)
594 | assert stats1.mean() == stats2.mean()
595 | assert stats1.mean() == stats4.mean()
596 | assert stats1.minimum() == stats2.minimum()
597 | assert stats1.maximum() == stats2.maximum()
598 | assert stats1.minimum() == stats4.minimum()
599 | assert stats1.maximum() == stats4.maximum()
600 | assert (stats1 + stats1).variance() == stats2.variance()
601 | assert (stats1 + stats1).kurtosis() == stats2.kurtosis()
602 | assert (stats1 + stats1).skewness() == stats2.skewness()
603 | assert (stats2 + stats2).variance() == stats4.variance()
604 | assert (stats2 + stats2).kurtosis() == stats4.kurtosis()
605 | assert (stats2 + stats2).skewness() == stats4.skewness()
606 | assert (2 * stats2) == stats4
607 | stats1 *= 4
608 | assert stats1 == stats4
609 | stats5 = math.e * stats1
610 | assert stats1.mean() == stats5.mean()
611 | with pytest.raises(TypeError):
612 | stats1 * object()
613 | with pytest.raises(TypeError):
614 | object() * stats1
615 |
616 |
617 | @pytest.mark.parametrize(
618 | 'ExponentialStatistics',
619 | [CoreExponentialStatistics, FastExponentialStatistics],
620 | )
621 | def test_expoential_batch(ExponentialStatistics):
622 | random.seed(0)
623 |
624 | alpha = [random.random() for _ in range(count)]
625 | beta = [random.random() * 2 for _ in range(count)]
626 |
627 | alpha_exp_stats = ExponentialStatistics(0.1, iterable=alpha)
628 | beta_exp_stats = ExponentialStatistics(0.9, iterable=beta)
629 |
630 | gamma_exp_stats = alpha_exp_stats * 0.3 + beta_exp_stats * 0.7
631 |
632 | weighted_mean = alpha_exp_stats.mean() * 0.3 + beta_exp_stats.mean() * 0.7
633 | assert weighted_mean == gamma_exp_stats.mean()
634 |
635 | weighted_var = (
636 | alpha_exp_stats.variance() * 0.3 + beta_exp_stats.variance() * 0.7
637 | )
638 | assert weighted_var == gamma_exp_stats.variance()
639 | assert alpha_exp_stats._decay == gamma_exp_stats._decay
640 | assert beta_exp_stats._decay != gamma_exp_stats._decay
641 |
--------------------------------------------------------------------------------