├── tests ├── __init__.py ├── RunningStats.h ├── RunningRegression.h ├── main.cpp ├── RunningRegression.cpp ├── RunningStats.cpp ├── benchmark.py ├── __main__.py └── test_runstats.py ├── MANIFEST.in ├── mypy.ini ├── docs ├── index.rst ├── _static │ └── gj-logo.png ├── _templates │ └── gumroad.html ├── api.rst ├── Makefile ├── make.bat └── conf.py ├── requirements.txt ├── .gitignore ├── runstats ├── __init__.py ├── core.pxd └── core.py ├── LICENSE ├── .github └── workflows │ ├── release.yml │ └── integration.yml ├── tox.ini ├── setup.py ├── README.rst └── .pylintrc /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst LICENSE 2 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | 3 | [mypy-runstats._core] 4 | ignore_missing_imports = True 5 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../README.rst 2 | 3 | .. toctree:: 4 | :hidden: 5 | 6 | api 7 | -------------------------------------------------------------------------------- /docs/_static/gj-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grantjenks/python-runstats/HEAD/docs/_static/gj-logo.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -e . 2 | blue 3 | coverage 4 | cython 5 | doc8 6 | flake8 7 | isort 8 | mypy 9 | pylint 10 | pytest 11 | pytest-cov 12 | pytest-xdist 13 | rstcheck 14 | sphinx 15 | tox 16 | twine 17 | wheel 18 | -------------------------------------------------------------------------------- /docs/_templates/gumroad.html: -------------------------------------------------------------------------------- 1 |

Give Support

2 |

If you or your organization uses RunStats, consider financial support:

3 |

4 | Give to Python RunStats 5 |

6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated files 2 | *.py[co] 3 | *.c 4 | *.so 5 | 6 | # Virtual environment directories 7 | /env*/ 8 | 9 | # Test files and directories 10 | .coverage 11 | .pytest_cache/ 12 | /.tox/ 13 | 14 | # Setup directories 15 | /build/ 16 | /dist/ 17 | /runstats.egg-info/ 18 | /docs/_build/ 19 | 20 | # macOS metadata 21 | .DS_Store 22 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: runstats 2 | 3 | 4 | Statistics 5 | .......... 6 | 7 | .. autoclass:: runstats.Statistics 8 | :members: 9 | :special-members: 10 | 11 | 12 | Regression 13 | .......... 14 | 15 | .. autoclass:: runstats.Regression 16 | :members: 17 | :special-members: 18 | 19 | 20 | ExponentialStatistics 21 | ..................... 22 | 23 | .. autoclass:: runstats.ExponentialStatistics 24 | :members: 25 | :special-members: 26 | -------------------------------------------------------------------------------- /runstats/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Python RunStats API 3 | =================== 4 | 5 | Online statistics and regression. 6 | 7 | """ 8 | 9 | try: 10 | from ._core import ExponentialStatistics, Regression, Statistics 11 | except ImportError: # pragma: no cover 12 | from .core import ExponentialStatistics, Regression, Statistics 13 | 14 | __all__ = ['Statistics', 'Regression', 'ExponentialStatistics'] 15 | __title__ = 'runstats' 16 | __version__ = '2.0.0' 17 | __author__ = 'Grant Jenks' 18 | __license__ = 'Apache 2.0' 19 | __copyright__ = '2013-2021, Grant Jenks' 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2013-2021 Grant Jenks 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use 4 | this file except in compliance with the License. You may obtain a copy of the 5 | License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software distributed 10 | under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 11 | CONDITIONS OF ANY KIND, either express or implied. See the License for the 12 | specific language governing permissions and limitations under the License. 13 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /tests/RunningStats.h: -------------------------------------------------------------------------------- 1 | // Copied from https://www.johndcook.com/blog/skewness_kurtosis/ 2 | 3 | #ifndef RUNNINGSTATS_H 4 | #define RUNNINGSTATS_H 5 | 6 | class RunningStats 7 | { 8 | public: 9 | RunningStats(); 10 | void Clear(); 11 | void Push(double x); 12 | long long NumDataValues() const; 13 | double Mean() const; 14 | double Variance() const; 15 | double StandardDeviation() const; 16 | double Skewness() const; 17 | double Kurtosis() const; 18 | 19 | friend RunningStats operator+(const RunningStats a, const RunningStats b); 20 | RunningStats& operator+=(const RunningStats &rhs); 21 | 22 | private: 23 | long long n; 24 | double M1, M2, M3, M4; 25 | }; 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /tests/RunningRegression.h: -------------------------------------------------------------------------------- 1 | // Copied from https://www.johndcook.com/blog/running_regression/ 2 | 3 | #ifndef RUNNINGREGRESSION 4 | #define RUNNINGREGRESSION 5 | 6 | #include "RunningStats.h" 7 | 8 | class RunningRegression 9 | { 10 | public: 11 | RunningRegression(); 12 | void Clear(); 13 | void Push(double x, double y); 14 | long long NumDataValues() const; 15 | double Slope() const; 16 | double Intercept() const; 17 | double Correlation() const; 18 | 19 | friend RunningRegression operator+( 20 | const RunningRegression a, const RunningRegression b); 21 | RunningRegression& operator+=(const RunningRegression &rhs); 22 | 23 | private: 24 | RunningStats x_stats; 25 | RunningStats y_stats; 26 | double S_xy; 27 | long long n; 28 | }; 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /tests/main.cpp: -------------------------------------------------------------------------------- 1 | // Test program for C++ counterpart. 2 | // 3 | // Compile with: 4 | // 5 | // $ g++ main.cpp RunningStats.cpp RunningRegression.cpp 6 | // 7 | // Test with: 8 | // 9 | // $ ./a.out 5 4 3 2 1 10 | // $ python -m tests 5 4 3 2 1 11 | // 12 | 13 | #include 14 | #include 15 | 16 | #include "RunningStats.h" 17 | #include "RunningRegression.h" 18 | 19 | int main(int argc, char ** argv) 20 | { 21 | RunningStats stats = RunningStats(); 22 | 23 | for (int index = 1; index < argc; index += 1) 24 | { 25 | double value = std::stod(std::string(argv[index])); 26 | stats.Push(value); 27 | } 28 | 29 | printf("Statistics\n"); 30 | printf("Count: %lld\n", stats.NumDataValues()); 31 | printf("Mean: %f\n", stats.Mean()); 32 | printf("Variance: %f\n", stats.Variance()); 33 | printf("StdDev: %f\n", stats.StandardDeviation()); 34 | printf("Skewness: %f\n", stats.Skewness()); 35 | printf("Kurtosis: %f\n", stats.Kurtosis()); 36 | 37 | RunningRegression regr = RunningRegression(); 38 | 39 | for (int index = 1; index < argc; index += 1) 40 | { 41 | double value = std::stod(std::string(argv[index])); 42 | regr.Push(index, value); 43 | } 44 | 45 | printf("\n"); 46 | printf("Regression\n"); 47 | printf("Count: %lld\n", regr.NumDataValues()); 48 | printf("Slope: %f\n", regr.Slope()); 49 | printf("Intercept: %f\n", regr.Intercept()); 50 | printf("Correlation: %f\n", regr.Correlation()); 51 | 52 | return 0; 53 | } 54 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | 8 | jobs: 9 | 10 | builds: 11 | runs-on: ${{ matrix.os }} 12 | strategy: 13 | matrix: 14 | os: [ubuntu-20.04, windows-2019, macos-10.15] 15 | 16 | steps: 17 | - uses: actions/checkout@v2 18 | 19 | - uses: actions/setup-python@v2 20 | 21 | - name: Install cibuildwheel 22 | run: python -m pip install cibuildwheel==1.11.1.post1 23 | 24 | - name: Build wheels 25 | run: python -m cibuildwheel --output-dir wheelhouse 26 | env: 27 | CIBW_BEFORE_BUILD: pip install cython 28 | CIBW_TEST_REQUIRES: pytest pytest-cov pytest-xdist 29 | CIBW_TEST_COMMAND: pytest {project}/tests && python {project}/tests/benchmark.py 30 | CIBW_SKIP: pp* 31 | 32 | - uses: actions/upload-artifact@v2 33 | with: 34 | name: wheelhouse 35 | path: ./wheelhouse/*.whl 36 | 37 | upload: 38 | needs: builds 39 | runs-on: ubuntu-latest 40 | 41 | steps: 42 | - uses: actions/checkout@v2 43 | 44 | - name: Set up Python 45 | uses: actions/setup-python@v2 46 | with: 47 | python-version: 3.9 48 | 49 | - name: Install dependencies 50 | run: | 51 | pip install --upgrade pip 52 | pip install -r requirements.txt 53 | 54 | - name: Create source dist 55 | run: python setup.py sdist 56 | 57 | - name: Stage wheels 58 | uses: actions/download-artifact@v2 59 | with: 60 | name: wheelhouse 61 | path: wheelhouse 62 | - run: mv -v wheelhouse/* dist/ 63 | 64 | - name: Publish package 65 | uses: pypa/gh-action-pypi-publish@release/v1 66 | with: 67 | user: __token__ 68 | password: ${{ secrets.PYPI_API_TOKEN }} 69 | -------------------------------------------------------------------------------- /tests/RunningRegression.cpp: -------------------------------------------------------------------------------- 1 | // Copied from https://www.johndcook.com/blog/running_regression/ 2 | 3 | #include "RunningRegression.h" 4 | 5 | RunningRegression::RunningRegression() 6 | { 7 | Clear(); 8 | } 9 | 10 | void RunningRegression::Clear() 11 | { 12 | x_stats.Clear(); 13 | y_stats.Clear(); 14 | S_xy = 0.0; 15 | n = 0; 16 | } 17 | 18 | void RunningRegression::Push(double x, double y) 19 | { 20 | S_xy += (x_stats.Mean() -x)*(y_stats.Mean() - y)*double(n)/double(n+1); 21 | 22 | x_stats.Push(x); 23 | y_stats.Push(y); 24 | n++; 25 | } 26 | 27 | long long RunningRegression::NumDataValues() const 28 | { 29 | return n; 30 | } 31 | 32 | double RunningRegression::Slope() const 33 | { 34 | double S_xx = x_stats.Variance()*(n - 1.0); 35 | 36 | return S_xy / S_xx; 37 | } 38 | 39 | double RunningRegression::Intercept() const 40 | { 41 | return y_stats.Mean() - Slope()*x_stats.Mean(); 42 | } 43 | 44 | double RunningRegression::Correlation() const 45 | { 46 | double t = x_stats.StandardDeviation() * y_stats.StandardDeviation(); 47 | return S_xy / ( (n-1) * t ); 48 | } 49 | 50 | RunningRegression operator+(const RunningRegression a, const RunningRegression b) 51 | { 52 | RunningRegression combined; 53 | 54 | combined.x_stats = a.x_stats + b.x_stats; 55 | combined.y_stats = a.y_stats + b.y_stats; 56 | combined.n = a.n + b.n; 57 | 58 | double delta_x = b.x_stats.Mean() - a.x_stats.Mean(); 59 | double delta_y = b.y_stats.Mean() - a.y_stats.Mean(); 60 | combined.S_xy = a.S_xy + b.S_xy + 61 | double(a.n*b.n)*delta_x*delta_y/double(combined.n); 62 | 63 | return combined; 64 | } 65 | 66 | RunningRegression& RunningRegression::operator+=(const RunningRegression &rhs) 67 | { 68 | RunningRegression combined = *this + rhs; 69 | *this = combined; 70 | return *this; 71 | } 72 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist=bluecheck,doc8,docs,isortcheck,flake8,mypy,pylint,rstcheck,py36,py37,py38,py39 3 | skip_missing_interpreters=True 4 | 5 | [testenv] 6 | commands=pytest 7 | deps= 8 | pytest 9 | pytest-cov 10 | pytest-xdist 11 | 12 | [testenv:blue] 13 | commands=blue {toxinidir}/setup.py {toxinidir}/runstats {toxinidir}/tests 14 | deps=blue 15 | 16 | [testenv:bluecheck] 17 | commands=blue --check {toxinidir}/setup.py {toxinidir}/runstats {toxinidir}/tests 18 | deps=blue 19 | 20 | [testenv:doc8] 21 | deps=doc8 22 | commands=doc8 docs --ignore-path docs/_build 23 | 24 | [testenv:docs] 25 | allowlist_externals=make 26 | changedir=docs 27 | commands=make html 28 | deps= 29 | sphinx 30 | 31 | [testenv:flake8] 32 | commands=flake8 {toxinidir}/setup.py {toxinidir}/runstats {toxinidir}/tests 33 | deps=flake8 34 | 35 | [testenv:isort] 36 | commands=isort {toxinidir}/setup.py {toxinidir}/runstats {toxinidir}/tests 37 | deps=isort 38 | 39 | [testenv:isortcheck] 40 | commands=isort --check {toxinidir}/setup.py {toxinidir}/runstats {toxinidir}/tests 41 | deps=isort 42 | 43 | [testenv:mypy] 44 | commands=mypy {toxinidir}/runstats 45 | deps=mypy 46 | 47 | [testenv:pylint] 48 | commands=pylint {toxinidir}/runstats 49 | deps= 50 | pylint 51 | 52 | [testenv:rstcheck] 53 | commands=rstcheck {toxinidir}/README.rst 54 | deps=rstcheck 55 | 56 | [testenv:uploaddocs] 57 | allowlist_externals=rsync 58 | changedir=docs 59 | commands= 60 | rsync -azP --stats --delete _build/html/ \ 61 | grantjenks.com:/srv/www/www.grantjenks.com/public/docs/runstats/ 62 | 63 | [isort] 64 | multi_line_output = 3 65 | include_trailing_comma = True 66 | force_grid_wrap = 0 67 | use_parentheses = True 68 | ensure_newline_before_comments = True 69 | line_length = 79 70 | 71 | [pytest] 72 | addopts= 73 | -n auto 74 | --cov-branch 75 | --cov-fail-under=100 76 | --cov-report=term-missing 77 | --cov=runstats 78 | --doctest-glob="*.rst" 79 | testpaths=docs runstats tests README.rst 80 | 81 | [doc8] 82 | # ignore=D000 83 | 84 | [flake8] 85 | max-line-length=120 86 | -------------------------------------------------------------------------------- /.github/workflows/integration.yml: -------------------------------------------------------------------------------- 1 | name: integration 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | 13 | checks: 14 | runs-on: ubuntu-latest 15 | strategy: 16 | max-parallel: 6 17 | matrix: 18 | check: [bluecheck, doc8, docs, isortcheck, flake8, mypy, pylint, rstcheck] 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | - name: Set up Python 23 | uses: actions/setup-python@v2 24 | with: 25 | python-version: 3.9 26 | - name: Install dependencies 27 | run: | 28 | pip install --upgrade pip 29 | pip install tox 30 | - name: Run checks with tox 31 | run: | 32 | tox -e ${{ matrix.check }} 33 | 34 | tests: 35 | needs: checks 36 | runs-on: ${{ matrix.os }} 37 | strategy: 38 | max-parallel: 4 39 | matrix: 40 | os: [ubuntu-latest] 41 | python-version: [3.6, 3.7, 3.8, 3.9] 42 | 43 | steps: 44 | - uses: actions/checkout@v2 45 | 46 | - uses: actions/setup-python@v2 47 | with: 48 | python-version: ${{ matrix.python-version }} 49 | 50 | - name: Install tox 51 | run: | 52 | pip install --upgrade pip 53 | pip install tox 54 | 55 | - name: Test with tox 56 | run: tox -e py 57 | 58 | - name: Install cython and pytest 59 | run: pip install cython pytest pytest-cov pytest-xdist 60 | 61 | - name: Build Cython extension 62 | run: python setup.py build_ext --inplace 63 | 64 | - name: Test with pytest 65 | run: pytest 66 | 67 | - name: Run benchmark 68 | if: matrix.os == 'ubuntu-latest' 69 | run: | 70 | pip install -e . 71 | python tests/benchmark.py 72 | 73 | builds: 74 | needs: tests 75 | runs-on: ${{ matrix.os }} 76 | strategy: 77 | matrix: 78 | os: [ubuntu-20.04, windows-2019, macos-10.15] 79 | 80 | steps: 81 | - uses: actions/checkout@v2 82 | 83 | - uses: actions/setup-python@v2 84 | 85 | - name: Install cibuildwheel 86 | run: python -m pip install cibuildwheel==1.11.1.post1 87 | 88 | - name: Build wheels 89 | run: python -m cibuildwheel --output-dir wheelhouse 90 | env: 91 | CIBW_BEFORE_BUILD: pip install cython 92 | CIBW_TEST_REQUIRES: pytest pytest-cov pytest-xdist 93 | CIBW_TEST_COMMAND: pytest {project}/tests && python {project}/tests/benchmark.py 94 | CIBW_SKIP: pp* 95 | -------------------------------------------------------------------------------- /tests/RunningStats.cpp: -------------------------------------------------------------------------------- 1 | // Copied from https://www.johndcook.com/blog/skewness_kurtosis/ 2 | 3 | #include "RunningStats.h" 4 | #include 5 | #include 6 | 7 | RunningStats::RunningStats() 8 | { 9 | Clear(); 10 | } 11 | 12 | void RunningStats::Clear() 13 | { 14 | n = 0; 15 | M1 = M2 = M3 = M4 = 0.0; 16 | } 17 | 18 | void RunningStats::Push(double x) 19 | { 20 | double delta, delta_n, delta_n2, term1; 21 | 22 | long long n1 = n; 23 | n++; 24 | delta = x - M1; 25 | delta_n = delta / n; 26 | delta_n2 = delta_n * delta_n; 27 | term1 = delta * delta_n * n1; 28 | M1 += delta_n; 29 | M4 += term1 * delta_n2 * (n*n - 3*n + 3) + 6 * delta_n2 * M2 - 4 * delta_n * M3; 30 | M3 += term1 * delta_n * (n - 2) - 3 * delta_n * M2; 31 | M2 += term1; 32 | } 33 | 34 | long long RunningStats::NumDataValues() const 35 | { 36 | return n; 37 | } 38 | 39 | double RunningStats::Mean() const 40 | { 41 | return M1; 42 | } 43 | 44 | double RunningStats::Variance() const 45 | { 46 | return M2/(n-1.0); 47 | } 48 | 49 | double RunningStats::StandardDeviation() const 50 | { 51 | return sqrt( Variance() ); 52 | } 53 | 54 | double RunningStats::Skewness() const 55 | { 56 | return sqrt(double(n)) * M3/ pow(M2, 1.5); 57 | } 58 | 59 | double RunningStats::Kurtosis() const 60 | { 61 | return double(n)*M4 / (M2*M2) - 3.0; 62 | } 63 | 64 | RunningStats operator+(const RunningStats a, const RunningStats b) 65 | { 66 | RunningStats combined; 67 | 68 | combined.n = a.n + b.n; 69 | 70 | double delta = b.M1 - a.M1; 71 | double delta2 = delta*delta; 72 | double delta3 = delta*delta2; 73 | double delta4 = delta2*delta2; 74 | 75 | combined.M1 = (a.n*a.M1 + b.n*b.M1) / combined.n; 76 | 77 | combined.M2 = a.M2 + b.M2 + 78 | delta2 * a.n * b.n / combined.n; 79 | 80 | combined.M3 = a.M3 + b.M3 + 81 | delta3 * a.n * b.n * (a.n - b.n)/(combined.n*combined.n); 82 | combined.M3 += 3.0*delta * (a.n*b.M2 - b.n*a.M2) / combined.n; 83 | 84 | combined.M4 = a.M4 + b.M4 + delta4*a.n*b.n * (a.n*a.n - a.n*b.n + b.n*b.n) / 85 | (combined.n*combined.n*combined.n); 86 | combined.M4 += 6.0*delta2 * (a.n*a.n*b.M2 + b.n*b.n*a.M2)/(combined.n*combined.n) + 87 | 4.0*delta*(a.n*b.M3 - b.n*a.M3) / combined.n; 88 | 89 | return combined; 90 | } 91 | 92 | RunningStats& RunningStats::operator+=(const RunningStats& rhs) 93 | { 94 | RunningStats combined = *this + rhs; 95 | *this = combined; 96 | return *this; 97 | } 98 | -------------------------------------------------------------------------------- /tests/benchmark.py: -------------------------------------------------------------------------------- 1 | """Benchmark core versus fast implementations. 2 | 3 | """ 4 | 5 | from __future__ import print_function 6 | 7 | import random 8 | import timeit 9 | 10 | random.seed(0) 11 | VALUES = [random.random() for _ in range(int(1e4))] 12 | PAIRS = [(pos, pos + (val * 2 - 1)) for pos, val in enumerate(VALUES)] 13 | 14 | 15 | def main(): 16 | core_stats = timeit.repeat( 17 | setup=''' 18 | from __main__ import VALUES 19 | from runstats.core import Statistics 20 | ''', 21 | stmt=''' 22 | stats = Statistics(VALUES) 23 | stats.mean() 24 | ''', 25 | number=1, 26 | repeat=7, 27 | )[2] 28 | 29 | fast_stats = timeit.repeat( 30 | setup=''' 31 | from __main__ import VALUES 32 | from runstats._core import Statistics 33 | ''', 34 | stmt=''' 35 | stats = Statistics(VALUES) 36 | stats.mean() 37 | ''', 38 | number=1, 39 | repeat=7, 40 | )[2] 41 | 42 | speedup_stats = core_stats / fast_stats - 1 43 | 44 | core_exp_stats = timeit.repeat( 45 | setup=''' 46 | from __main__ import VALUES 47 | from runstats.core import ExponentialStatistics 48 | exp_stats = ExponentialStatistics() 49 | ''', 50 | stmt=''' 51 | for value in VALUES: 52 | exp_stats.push(value) 53 | exp_stats.mean() 54 | ''', 55 | number=1, 56 | repeat=7, 57 | )[2] 58 | 59 | fast_exp_stats = timeit.repeat( 60 | setup=''' 61 | from __main__ import VALUES 62 | from runstats._core import ExponentialStatistics 63 | exp_stats = ExponentialStatistics() 64 | ''', 65 | stmt=''' 66 | for value in VALUES: 67 | exp_stats.push(value) 68 | exp_stats.mean() 69 | ''', 70 | number=1, 71 | repeat=7, 72 | )[2] 73 | 74 | speedup_exp_stats = core_exp_stats / fast_exp_stats - 1 75 | 76 | core_regr = timeit.repeat( 77 | setup=''' 78 | from __main__ import PAIRS 79 | from runstats.core import Regression 80 | regr = Regression() 81 | ''', 82 | stmt=''' 83 | for pos, val in PAIRS: 84 | regr.push(pos, val) 85 | regr.slope() 86 | ''', 87 | number=1, 88 | repeat=7, 89 | )[2] 90 | 91 | fast_regr = timeit.repeat( 92 | setup=''' 93 | from __main__ import PAIRS 94 | from runstats._core import Regression 95 | regr = Regression() 96 | ''', 97 | stmt=''' 98 | for pos, val in PAIRS: 99 | regr.push(pos, val) 100 | regr.slope() 101 | ''', 102 | number=1, 103 | repeat=7, 104 | )[2] 105 | 106 | speedup_regr = core_regr / fast_regr - 1 107 | 108 | print('core.Statistics:', core_stats) 109 | print('_core.Statistics:', fast_stats) 110 | print(' Stats Speedup: %.2fx faster' % speedup_stats) 111 | 112 | print('core.ExponentialStatistics:', core_exp_stats) 113 | print('_core.ExponentialStatistics:', fast_exp_stats) 114 | print(' ExpStats Speedup: %.2fx faster' % speedup_exp_stats) 115 | 116 | print('core.Regression:', core_regr) 117 | print('_core.Regression:', fast_regr) 118 | print(' Regr Speedup: %.2fx faster' % speedup_regr) 119 | 120 | 121 | if __name__ == '__main__': 122 | main() 123 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Package Setup for RunStats 2 | 3 | Build binary extension in-place for testing with: 4 | 5 | $ python setup.py build_ext --inplace 6 | 7 | Create annotations for optimization: 8 | 9 | $ cython -3 -a runstats/core.py 10 | $ python3 -m http.server 11 | # Open runstats/core.html in browser. 12 | 13 | """ 14 | 15 | import os 16 | import shutil 17 | 18 | from setuptools import Extension, setup 19 | from setuptools.command.test import test as TestCommand 20 | 21 | import runstats 22 | 23 | 24 | class Tox(TestCommand): 25 | def finalize_options(self): 26 | TestCommand.finalize_options(self) 27 | self.test_args = [] 28 | self.test_suite = True 29 | 30 | def run_tests(self): 31 | import tox 32 | 33 | errno = tox.cmdline(self.test_args) 34 | exit(errno) 35 | 36 | 37 | with open('README.rst') as reader: 38 | readme = reader.read() 39 | 40 | args = dict( 41 | name=runstats.__title__, 42 | version=runstats.__version__, 43 | description='Compute statistics and regression in one pass', 44 | long_description=readme, 45 | long_description_content_type='text/x-rst', 46 | author='Grant Jenks', 47 | author_email='contact@grantjenks.com', 48 | url='http://www.grantjenks.com/docs/runstats/', 49 | license='Apache 2.0', 50 | packages=['runstats'], 51 | python_requires='>=3.6', 52 | tests_require=['tox'], 53 | cmdclass={'test': Tox}, 54 | install_requires=[], 55 | project_urls={ 56 | 'Documentation': 'http://www.grantjenks.com/docs/runstats/', 57 | 'Funding': 'http://gum.co/runstats', 58 | 'Source': 'https://github.com/grantjenks/python-runstats', 59 | 'Tracker': 'https://github.com/grantjenks/python-runstats/issues', 60 | }, 61 | classifiers=[ 62 | 'Development Status :: 5 - Production/Stable', 63 | 'Intended Audience :: Developers', 64 | 'License :: OSI Approved :: Apache Software License', 65 | 'Natural Language :: English', 66 | 'Programming Language :: Python', 67 | 'Programming Language :: Python :: 3', 68 | 'Programming Language :: Python :: 3.6', 69 | 'Programming Language :: Python :: 3.7', 70 | 'Programming Language :: Python :: 3.8', 71 | 'Programming Language :: Python :: 3.9', 72 | 'Programming Language :: Python :: Implementation :: CPython', 73 | ], 74 | ) 75 | 76 | try: 77 | from Cython.Build import cythonize 78 | 79 | # Copy files to build binary. 80 | 81 | shutil.copy2('runstats/core.py', 'runstats/_core.py') 82 | shutil.copy2('runstats/core.pxd', 'runstats/_core.pxd') 83 | 84 | # Build binary extension. 85 | 86 | ext_modules = [Extension('runstats._core', ['runstats/_core.py'])] 87 | setup( 88 | ext_modules=cythonize(ext_modules, language_level='3'), 89 | **args, 90 | ) 91 | 92 | # Remove copied files for static analysis and tests. 93 | 94 | os.remove('runstats/_core.py') 95 | os.remove('runstats/_core.pxd') 96 | except Exception as exception: 97 | print('*' * 79) 98 | print(exception) 99 | print('*' * 79) 100 | print('Failed to setup runstats with Cython. See error message above.') 101 | print('Falling back to pure-Python implementation.') 102 | print('*' * 79) 103 | setup(**args) 104 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | 13 | import os 14 | import sys 15 | sys.path.insert(0, os.path.abspath('..')) 16 | import runstats 17 | 18 | 19 | # -- Project information ----------------------------------------------------- 20 | 21 | project = 'RunStats' 22 | copyright = runstats.__copyright__ 23 | author = runstats.__author__ 24 | 25 | # The full version, including alpha/beta/rc tags 26 | release = runstats.__version__ 27 | 28 | 29 | # -- General configuration --------------------------------------------------- 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = [ 35 | 'sphinx.ext.autodoc', 36 | 'sphinx.ext.todo', 37 | 'sphinx.ext.viewcode', 38 | ] 39 | 40 | # Add any paths that contain templates here, relative to this directory. 41 | templates_path = ['_templates'] 42 | 43 | # List of patterns, relative to source directory, that match files and 44 | # directories to ignore when looking for source files. 45 | # This pattern also affects html_static_path and html_extra_path. 46 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 47 | 48 | 49 | # -- Options for HTML output ------------------------------------------------- 50 | 51 | # The theme to use for HTML and HTML Help pages. See the documentation for 52 | # a list of builtin themes. 53 | html_theme = 'alabaster' 54 | 55 | # Theme options are theme-specific and customize the look and feel of a theme 56 | # further. For a list of options available for each theme, see the 57 | # documentation. 58 | html_theme_options = { 59 | 'logo': 'gj-logo.png', 60 | 'logo_name': True, 61 | 'logo_text_align': 'center', 62 | 'analytics_id': 'UA-19364636-2', 63 | 'show_powered_by': False, 64 | 'show_related': True, 65 | 'github_user': 'grantjenks', 66 | 'github_repo': 'python-runstats', 67 | 'github_type': 'star', 68 | } 69 | 70 | # Add any paths that contain custom static files (such as style sheets) here, 71 | # relative to this directory. They are copied after the builtin static files, 72 | # so a file named "default.css" will overwrite the builtin "default.css". 73 | html_static_path = ['_static'] 74 | 75 | # Custom sidebar templates, must be a dictionary that maps document names 76 | # to template names. 77 | # 78 | # The default sidebars (for documents that don't match any pattern) are 79 | # defined by theme itself. Builtin themes are using these templates by 80 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 81 | # 'searchbox.html']``. 82 | html_sidebars = { 83 | '**': [ 84 | 'about.html', 85 | 'gumroad.html', 86 | 'localtoc.html', 87 | 'relations.html', 88 | 'searchbox.html', 89 | ] 90 | } 91 | 92 | 93 | # -- Options for todo extension ---------------------------------------------- 94 | 95 | # If true, `todo` and `todoList` produce output, else they produce nothing. 96 | todo_include_todos = True 97 | -------------------------------------------------------------------------------- /tests/__main__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import sys 4 | 5 | from runstats import ExponentialStatistics as FastExponentialStatistics 6 | from runstats import Regression as FastRegression 7 | from runstats import Statistics as FastStatistics 8 | from runstats.core import ExponentialStatistics as CoreExponentialStatistics 9 | from runstats.core import Regression as CoreRegression 10 | from runstats.core import Statistics as CoreStatistics 11 | 12 | from .test_runstats import kurtosis, mean, skewness, stddev, variance 13 | 14 | 15 | def main(): 16 | args = list(map(float, sys.argv[1:])) 17 | 18 | print('Statistics Functions') 19 | print('Count:', len(args)) 20 | print('Mean:', mean(args)) 21 | print('Variance:', variance(args)) 22 | print('StdDev:', stddev(args)) 23 | print('Skewness:', skewness(args)) 24 | print('Kurtosis:', kurtosis(args)) 25 | 26 | fast_stats = FastStatistics() 27 | 28 | for arg in args: 29 | fast_stats.push(arg) 30 | 31 | print() 32 | print('FastStatistics') 33 | print('Count:', len(fast_stats)) 34 | print('Mean:', fast_stats.mean()) 35 | print('Variance:', fast_stats.variance()) 36 | print('StdDev:', fast_stats.stddev()) 37 | print('Skewness:', fast_stats.skewness()) 38 | print('Kurtosis:', fast_stats.kurtosis()) 39 | 40 | core_stats = CoreStatistics() 41 | 42 | for arg in args: 43 | core_stats.push(arg) 44 | 45 | print() 46 | print('CoreStatistics') 47 | print('Count:', len(core_stats)) 48 | print('Mean:', core_stats.mean()) 49 | print('Variance:', core_stats.variance()) 50 | print('StdDev:', core_stats.stddev()) 51 | print('Skewness:', core_stats.skewness()) 52 | print('Kurtosis:', core_stats.kurtosis()) 53 | 54 | fast_exp_stats = FastExponentialStatistics() 55 | 56 | for arg in args: 57 | fast_exp_stats.push(arg) 58 | 59 | print() 60 | print('FastExponentialStatistics') 61 | print('Decay Rate (default):', fast_exp_stats.get_decay()) 62 | print('Exponential Mean:', fast_exp_stats.mean()) 63 | print('Exponential Variance:', fast_exp_stats.variance()) 64 | print('Exponential StdDev:', fast_exp_stats.stddev()) 65 | 66 | core_exp_stats = CoreExponentialStatistics() 67 | 68 | for arg in args: 69 | core_exp_stats.push(arg) 70 | 71 | print() 72 | print('CoreExponentialStatistics') 73 | print('Decay Rate (default):', core_exp_stats.get_decay()) 74 | print('Exponential Mean:', core_exp_stats.mean()) 75 | print('Exponential Variance:', core_exp_stats.variance()) 76 | print('Exponential StdDev:', core_exp_stats.stddev()) 77 | 78 | fast_regr = FastRegression() 79 | 80 | for index, arg in enumerate(args, 1): 81 | fast_regr.push(index, arg) 82 | 83 | print() 84 | print('FastRegression') 85 | print('Count:', len(fast_regr)) 86 | print('Slope:', fast_regr.slope()) 87 | print('Intercept:', fast_regr.intercept()) 88 | print('Correlation:', fast_regr.correlation()) 89 | 90 | core_regr = CoreRegression() 91 | 92 | for index, arg in enumerate(args, 1): 93 | core_regr.push(index, arg) 94 | 95 | print() 96 | print('CoreRegression') 97 | print('Count:', len(core_regr)) 98 | print('Slope:', core_regr.slope()) 99 | print('Intercept:', core_regr.intercept()) 100 | print('Correlation:', core_regr.correlation()) 101 | 102 | 103 | if __name__ == '__main__': 104 | main() 105 | -------------------------------------------------------------------------------- /runstats/core.pxd: -------------------------------------------------------------------------------- 1 | import cython 2 | 3 | 4 | cdef public double NAN 5 | 6 | 7 | cdef class Statistics: 8 | 9 | cdef public double _count, _eta, _rho, _tau, _phi, _min, _max 10 | 11 | cpdef clear(self) 12 | 13 | cpdef get_state(self) 14 | 15 | cpdef set_state(self, state) 16 | 17 | cpdef __reduce__(self) 18 | 19 | cpdef Statistics copy(self, _=*) 20 | 21 | @cython.locals( 22 | delta=double, 23 | delta_n=double, 24 | delta_n2=double, 25 | term=double, 26 | ) 27 | cpdef push(self, double value) 28 | 29 | cpdef double minimum(self) 30 | 31 | cpdef double maximum(self) 32 | 33 | cpdef double mean(self) 34 | 35 | cpdef double variance(self, double ddof=*) 36 | 37 | cpdef double stddev(self, double ddof=*) 38 | 39 | cpdef double skewness(self) 40 | 41 | cpdef double kurtosis(self) 42 | 43 | @cython.locals(sigma=Statistics) 44 | cpdef Statistics _add(self, Statistics that) 45 | 46 | @cython.locals( 47 | sum_count=double, 48 | delta=double, 49 | delta2=double, 50 | delta3=double, 51 | delta4=double, 52 | sum_eta=double, 53 | sum_rho=double, 54 | sum_tau=double, 55 | sum_phi=double, 56 | ) 57 | cpdef Statistics _iadd(self, Statistics that) 58 | 59 | @cython.locals(sigma=Statistics) 60 | cpdef Statistics _mul(self, double that) 61 | 62 | cpdef Statistics _imul(self, double that) 63 | 64 | 65 | cpdef Statistics make_statistics(state) 66 | 67 | 68 | cdef class ExponentialStatistics: 69 | cdef public double _decay, _mean, _variance 70 | 71 | cpdef _set_decay(self, double value) 72 | 73 | cpdef clear(self, double mean=*, double variance=*, decay=*) 74 | 75 | cpdef get_state(self) 76 | 77 | cpdef set_state(self, state) 78 | 79 | cpdef __reduce__(self) 80 | 81 | cpdef ExponentialStatistics copy(self, _=*) 82 | 83 | @cython.locals( 84 | alpha=double, 85 | diff=double, 86 | incr=double, 87 | ) 88 | cpdef push(self, double value) 89 | 90 | cpdef double mean(self) 91 | 92 | cpdef double variance(self) 93 | 94 | cpdef double stddev(self) 95 | 96 | @cython.locals(sigma=ExponentialStatistics) 97 | cpdef ExponentialStatistics _add(self, ExponentialStatistics that) 98 | 99 | cpdef ExponentialStatistics _iadd(self, ExponentialStatistics that) 100 | 101 | @cython.locals( 102 | sigma=ExponentialStatistics, 103 | ) 104 | cpdef ExponentialStatistics _mul(self, double that) 105 | 106 | cpdef ExponentialStatistics _imul(self, double that) 107 | 108 | 109 | cpdef ExponentialStatistics make_exponential_statistics(state) 110 | 111 | 112 | cdef class Regression: 113 | cdef public Statistics _xstats, _ystats 114 | cdef public double _count, _sxy 115 | 116 | cpdef clear(self) 117 | 118 | cpdef get_state(self) 119 | 120 | cpdef set_state(self, state) 121 | 122 | cpdef __reduce__(self) 123 | 124 | cpdef Regression copy(self, _=*) 125 | 126 | cpdef push(self, double xcoord, double ycoord) 127 | 128 | @cython.locals(sxx=double) 129 | cpdef double slope(self, double ddof=*) 130 | 131 | cpdef double intercept(self, double ddof=*) 132 | 133 | @cython.locals(term=double) 134 | cpdef double correlation(self, double ddof=*) 135 | 136 | @cython.locals(sigma=Regression) 137 | cpdef Regression _add(self, Regression that) 138 | 139 | @cython.locals( 140 | sum_count=double, 141 | sum_xstats=Statistics, 142 | sum_ystats=Statistics, 143 | deltax=double, 144 | deltay=double, 145 | sum_sxy=double, 146 | ) 147 | cpdef Regression _iadd(self, Regression that) 148 | 149 | 150 | cpdef Regression make_regression(state) 151 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | RunStats: Computing Statistics and Regression in One Pass 2 | ========================================================= 3 | 4 | `RunStats`_ is an Apache2 licensed Python module for online statistics and 5 | online regression. Statistics and regression summaries are computed in a single 6 | pass. Previous values are not recorded in summaries. 7 | 8 | Long running systems often generate numbers summarizing performance. It could 9 | be the latency of a response or the time between requests. It's often useful to 10 | use these numbers in summary statistics like the arithmetic mean, minimum, 11 | standard deviation, etc. When many values are generated, computing these 12 | summaries can be computationally intensive. It may even be infeasible to keep 13 | every recorded value. In such cases computing online statistics and online 14 | regression is necessary. 15 | 16 | In other cases, you may only have one opportunity to observe all the recorded 17 | values. Python's generators work exactly this way. Traditional methods for 18 | calculating the variance and other higher moments requires multiple passes over 19 | the data. With generators, this is not possible and so computing statistics in 20 | a single pass is necessary. 21 | 22 | There are also scenarios where a user is not interested in a complete summary 23 | of the entire stream of data but rather wants to observe the current state of 24 | the system based on the recent past. In these cases exponential statistics are 25 | used. Instead of weighting all values uniformly in the statistics computation, 26 | an exponential decay weight is applied to older values. The decay rate is 27 | configurable and provides a mechanism for balancing recent values with past 28 | values. 29 | 30 | The Python `RunStats`_ module was designed for these cases by providing classes 31 | for computing online summary statistics and online linear regression in a 32 | single pass. Summary objects work on sequences which may be larger than memory 33 | or disk space permit. They may also be efficiently combined together to create 34 | aggregate summaries. 35 | 36 | 37 | Features 38 | -------- 39 | 40 | - Pure-Python 41 | - Fully Documented 42 | - 100% Test Coverage 43 | - Numerically Stable 44 | - Optional Cython-optimized Extension (5-100 times faster) 45 | - Statistics summary computes mean, variance, standard deviation, skewness, 46 | kurtosis, minimum and maximum. 47 | - Regression summary computes slope, intercept and correlation. 48 | - Developed on Python 3.9 49 | - Tested on CPython 3.6, 3.7, 3.8, 3.9 50 | - Tested on Linux, Mac OS X, and Windows 51 | - Tested using GitHub Actions 52 | 53 | .. image:: https://github.com/grantjenks/python-runstats/workflows/integration/badge.svg 54 | :target: http://www.grantjenks.com/docs/runstats/ 55 | 56 | 57 | Quickstart 58 | ---------- 59 | 60 | Installing `RunStats`_ is simple with `pip `_:: 61 | 62 | $ pip install runstats 63 | 64 | You can access documentation in the interpreter with Python's built-in help 65 | function: 66 | 67 | .. code-block:: python 68 | 69 | >>> import runstats 70 | >>> help(runstats) # doctest: +SKIP 71 | >>> help(runstats.Statistics) # doctest: +SKIP 72 | >>> help(runstats.Regression) # doctest: +SKIP 73 | >>> help(runstats.ExponentialStatistics) # doctest: +SKIP 74 | 75 | 76 | Tutorial 77 | -------- 78 | 79 | The Python `RunStats`_ module provides three types for computing running 80 | statistics: Statistics, ExponentialStatistics and Regression.The Regression 81 | object leverages Statistics internally for its calculations. Each can be 82 | initialized without arguments: 83 | 84 | .. code-block:: python 85 | 86 | >>> from runstats import Statistics, Regression, ExponentialStatistics 87 | >>> stats = Statistics() 88 | >>> regr = Regression() 89 | >>> exp_stats = ExponentialStatistics() 90 | 91 | Statistics objects support four methods for modification. Use `push` to add 92 | values to the summary, `clear` to reset the summary, sum to combine Statistics 93 | summaries and multiply to weight summary Statistics by a scalar. 94 | 95 | .. code-block:: python 96 | 97 | >>> for num in range(10): 98 | ... stats.push(float(num)) 99 | >>> stats.mean() 100 | 4.5 101 | >>> stats.maximum() 102 | 9.0 103 | >>> stats += stats 104 | >>> stats.mean() 105 | 4.5 106 | >>> stats.variance() 107 | 8.68421052631579 108 | >>> len(stats) 109 | 20 110 | >>> stats *= 2 111 | >>> len(stats) 112 | 40 113 | >>> stats.clear() 114 | >>> len(stats) 115 | 0 116 | >>> stats.minimum() 117 | nan 118 | 119 | Use the Python built-in `len` for the number of pushed values. Unfortunately 120 | the Python `min` and `max` built-ins may not be used for the minimum and 121 | maximum as sequences are expected instead. Therefore, there are `minimum` and 122 | `maximum` methods provided for that purpose: 123 | 124 | .. code-block:: python 125 | 126 | >>> import random 127 | >>> random.seed(0) 128 | >>> for __ in range(1000): 129 | ... stats.push(random.random()) 130 | >>> len(stats) 131 | 1000 132 | >>> min(stats) 133 | Traceback (most recent call last): 134 | ... 135 | TypeError: ... 136 | >>> stats.minimum() 137 | 0.00024069652516689466 138 | >>> stats.maximum() 139 | 0.9996851255769114 140 | 141 | Statistics summaries provide five measures of a series: mean, variance, 142 | standard deviation, skewness and kurtosis: 143 | 144 | .. code-block:: python 145 | 146 | >>> stats = Statistics([1, 2, 5, 12, 5, 2, 1]) 147 | >>> stats.mean() 148 | 4.0 149 | >>> stats.variance() 150 | 15.33333333333333 151 | >>> stats.stddev() 152 | 3.915780041490243 153 | >>> stats.skewness() 154 | 1.33122127314735 155 | >>> stats.kurtosis() 156 | 0.5496219281663506 157 | 158 | All internal calculations use Python's `float` type. 159 | 160 | Like Statistics, the Regression type supports some methods for modification: 161 | `push`, `clear` and sum: 162 | 163 | .. code-block:: python 164 | 165 | >>> regr.clear() 166 | >>> len(regr) 167 | 0 168 | >>> for num in range(10): 169 | ... regr.push(num, num + 5) 170 | >>> len(regr) 171 | 10 172 | >>> regr.slope() 173 | 1.0 174 | >>> more = Regression((num, num + 5) for num in range(10, 20)) 175 | >>> total = regr + more 176 | >>> len(total) 177 | 20 178 | >>> total.slope() 179 | 1.0 180 | >>> total.intercept() 181 | 5.0 182 | >>> total.correlation() 183 | 1.0 184 | 185 | Regression summaries provide three measures of a series of pairs: slope, 186 | intercept and correlation. Note that, as a regression, the points need not 187 | exactly lie on a line: 188 | 189 | .. code-block:: python 190 | 191 | >>> regr = Regression([(1.2, 1.9), (3, 5.1), (4.9, 8.1), (7, 11)]) 192 | >>> regr.slope() 193 | 1.5668320150154176 194 | >>> regr.intercept() 195 | 0.21850113956294415 196 | >>> regr.correlation() 197 | 0.9983810791694997 198 | 199 | Both constructors accept an optional iterable that is consumed and pushed into 200 | the summary. Note that you may pass a generator as an iterable and the 201 | generator will be entirely consumed. 202 | 203 | The ExponentialStatistics are constructed by providing a decay rate, initial 204 | mean, and initial variance. The decay rate has default 0.9 and must be between 205 | 0 and 1. The initial mean and variance default to zero. 206 | 207 | .. code-block:: python 208 | 209 | >>> exp_stats = ExponentialStatistics() 210 | >>> exp_stats.decay 211 | 0.9 212 | >>> exp_stats.mean() 213 | 0.0 214 | >>> exp_stats.variance() 215 | 0.0 216 | 217 | The decay rate is the weight by which the current statistics are discounted 218 | by. Consequently, (1 - decay) is the weight of the new value. Like the `Statistics` class, 219 | there are four methods for modification: `push`, `clear`, sum and 220 | multiply. 221 | 222 | .. code-block:: python 223 | 224 | >>> for num in range(10): 225 | ... exp_stats.push(num) 226 | >>> exp_stats.mean() 227 | 3.486784400999999 228 | >>> exp_stats.variance() 229 | 11.593430921943071 230 | >>> exp_stats.stddev() 231 | 3.4049127627507683 232 | 233 | The decay of the exponential statistics can also be changed. The value must be 234 | between 0 and 1. 235 | 236 | .. code-block:: python 237 | 238 | >>> exp_stats.decay 239 | 0.9 240 | >>> exp_stats.decay = 0.5 241 | >>> exp_stats.decay 242 | 0.5 243 | >>> exp_stats.decay = 10 244 | Traceback (most recent call last): 245 | ... 246 | ValueError: decay must be between 0 and 1 247 | 248 | The clear method allows to optionally set a new mean, new variance and new 249 | decay. If none are provided mean and variance reset to zero, while the decay is 250 | not changed. 251 | 252 | .. code-block:: python 253 | 254 | >>> exp_stats.clear() 255 | >>> exp_stats.decay 256 | 0.5 257 | >>> exp_stats.mean() 258 | 0.0 259 | >>> exp_stats.variance() 260 | 0.0 261 | 262 | Combining `ExponentialStatistics` is done by adding them together. The mean and 263 | variance are simply added to create a new object. To weight each 264 | `ExponentialStatistics`, multiply them by a constant factor. If two 265 | `ExponentialStatistics` are added then the leftmost decay is used for the new 266 | object. The `len` method is not supported. 267 | 268 | .. code-block:: python 269 | 270 | >>> alpha_stats = ExponentialStatistics(iterable=range(10)) 271 | >>> beta_stats = ExponentialStatistics(decay=0.1) 272 | >>> for num in range(10): 273 | ... beta_stats.push(num) 274 | >>> exp_stats = beta_stats * 0.5 + alpha_stats * 0.5 275 | >>> exp_stats.decay 276 | 0.1 277 | >>> exp_stats.mean() 278 | 6.187836645 279 | 280 | All internal calculations of the Statistics and Regression classes are based 281 | entirely on the C++ code by John Cook as posted in a couple of articles: 282 | 283 | * `Computing Skewness and Kurtosis in One Pass`_ 284 | * `Computing Linear Regression in One Pass`_ 285 | 286 | .. _`Computing Skewness and Kurtosis in One Pass`: http://www.johndcook.com/blog/skewness_kurtosis/ 287 | .. _`Computing Linear Regression in One Pass`: http://www.johndcook.com/blog/running_regression/ 288 | 289 | The ExponentialStatistics implementation is based on: 290 | 291 | * Finch, 2009, Incremental Calculation of Weighted Mean and Variance 292 | 293 | The pure-Python version of `RunStats`_ is directly available if preferred. 294 | 295 | .. code-block:: python 296 | 297 | >>> import runstats.core # Pure-Python 298 | >>> runstats.core.Statistics 299 | 300 | 301 | When importing from `runstats` the Cython-optimized version `_core` is 302 | preferred and the `core` version is used as fallback. Micro-benchmarking 303 | Statistics and Regression by calling `push` repeatedly shows the 304 | Cython-optimized extension as 20-40 times faster than the pure-Python 305 | extension. 306 | 307 | .. _`RunStats`: http://www.grantjenks.com/docs/runstats/ 308 | 309 | 310 | Reference and Indices 311 | --------------------- 312 | 313 | * `RunStats Documentation`_ 314 | * `RunStats API Reference`_ 315 | * `RunStats at PyPI`_ 316 | * `RunStats at GitHub`_ 317 | * `RunStats Issue Tracker`_ 318 | 319 | .. _`RunStats Documentation`: http://www.grantjenks.com/docs/runstats/ 320 | .. _`RunStats API Reference`: http://www.grantjenks.com/docs/runstats/api.html 321 | .. _`RunStats at PyPI`: https://pypi.python.org/pypi/runstats/ 322 | .. _`RunStats at GitHub`: https://github.com/grantjenks/python-runstats/ 323 | .. _`RunStats Issue Tracker`: https://github.com/grantjenks/python-runstats/issues/ 324 | 325 | 326 | License 327 | ------- 328 | 329 | Copyright 2013-2021 Grant Jenks 330 | 331 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use 332 | this file except in compliance with the License. You may obtain a copy of the 333 | License at 334 | 335 | http://www.apache.org/licenses/LICENSE-2.0 336 | 337 | Unless required by applicable law or agreed to in writing, software distributed 338 | under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 339 | CONDITIONS OF ANY KIND, either express or implied. See the License for the 340 | specific language governing permissions and limitations under the License. 341 | -------------------------------------------------------------------------------- /runstats/core.py: -------------------------------------------------------------------------------- 1 | """Python RunStats 2 | 3 | Compute Statistics, Exponential Statistics and Regression in a single pass. 4 | 5 | """ 6 | 7 | from __future__ import division 8 | 9 | NAN = float('nan') 10 | 11 | 12 | class Statistics: 13 | """Compute statistics in a single pass. 14 | 15 | Computes the minimum, maximum, mean, variance, standard deviation, 16 | skewness, and kurtosis. 17 | Statistics objects may also be added together and copied. 18 | 19 | Based entirely on the C++ code by John D Cook at 20 | http://www.johndcook.com/skewness_kurtosis.html 21 | """ 22 | 23 | def __init__(self, iterable=()): 24 | """Initialize Statistics object. 25 | 26 | Iterates optional parameter `iterable` and pushes each value into the 27 | statistics summary. 28 | """ 29 | self.clear() 30 | for value in iterable: 31 | self.push(value) 32 | 33 | def clear(self): 34 | """Clear Statistics object.""" 35 | self._count = self._eta = self._rho = self._tau = self._phi = 0.0 36 | self._min = self._max = NAN 37 | 38 | def __eq__(self, that): 39 | return self.get_state() == that.get_state() 40 | 41 | def __ne__(self, that): 42 | return self.get_state() != that.get_state() 43 | 44 | def get_state(self): 45 | """Get internal state.""" 46 | return ( 47 | self._count, 48 | self._eta, 49 | self._rho, 50 | self._tau, 51 | self._phi, 52 | self._min, 53 | self._max, 54 | ) 55 | 56 | def set_state(self, state): 57 | """Set internal state.""" 58 | ( 59 | self._count, 60 | self._eta, 61 | self._rho, 62 | self._tau, 63 | self._phi, 64 | self._min, 65 | self._max, 66 | ) = state 67 | 68 | @classmethod 69 | def fromstate(cls, state): 70 | """Return Statistics object from state.""" 71 | stats = cls() 72 | stats.set_state(state) 73 | return stats 74 | 75 | def __reduce__(self): 76 | return make_statistics, (self.get_state(),) 77 | 78 | def copy(self, _=None): 79 | """Copy Statistics object.""" 80 | return self.fromstate(self.get_state()) 81 | 82 | def __copy__(self, _=None): 83 | """Copy Statistics object.""" 84 | return self.copy(_) 85 | 86 | __deepcopy__ = __copy__ 87 | 88 | def __len__(self): 89 | """Number of values that have been pushed.""" 90 | return int(self._count) 91 | 92 | def push(self, value): 93 | """Add `value` to the Statistics summary.""" 94 | if self._count == 0.0: 95 | self._min = value 96 | self._max = value 97 | else: 98 | self._min = min(self._min, value) 99 | self._max = max(self._max, value) 100 | 101 | delta = value - self._eta 102 | delta_n = delta / (self._count + 1) 103 | delta_n2 = delta_n * delta_n 104 | term = delta * delta_n * self._count 105 | 106 | self._count += 1 107 | self._eta += delta_n 108 | self._phi += ( 109 | term * delta_n2 * (self._count ** 2 - 3 * self._count + 3) 110 | + 6 * delta_n2 * self._rho 111 | - 4 * delta_n * self._tau 112 | ) 113 | self._tau += ( 114 | term * delta_n * (self._count - 2) - 3 * delta_n * self._rho 115 | ) 116 | self._rho += term 117 | 118 | def minimum(self): 119 | """Minimum of values.""" 120 | return self._min 121 | 122 | def maximum(self): 123 | """Maximum of values.""" 124 | return self._max 125 | 126 | def mean(self): 127 | """Mean of values.""" 128 | return self._eta 129 | 130 | def variance(self, ddof=1.0): 131 | """Variance of values (with `ddof` degrees of freedom).""" 132 | return self._rho / (self._count - ddof) 133 | 134 | def stddev(self, ddof=1.0): 135 | """Standard deviation of values (with `ddof` degrees of freedom).""" 136 | return self.variance(ddof) ** 0.5 137 | 138 | def skewness(self): 139 | """Skewness of values.""" 140 | return (self._count ** 0.5) * self._tau / (self._rho ** 1.5) 141 | 142 | def kurtosis(self): 143 | """Kurtosis of values.""" 144 | return self._count * self._phi / (self._rho * self._rho) - 3.0 145 | 146 | def _add(self, that): 147 | """Add two Statistics objects together.""" 148 | sigma = self.copy() 149 | sigma._iadd(that) 150 | return sigma 151 | 152 | def __add__(self, that): 153 | """Add two Statistics objects together.""" 154 | return self._add(that) 155 | 156 | def _iadd(self, that): 157 | """Add another Statistics object to this one.""" 158 | sum_count = self._count + that._count 159 | if sum_count == 0: 160 | return self 161 | 162 | delta = that._eta - self._eta 163 | delta2 = delta ** 2 164 | delta3 = delta ** 3 165 | delta4 = delta ** 4 166 | 167 | sum_eta = ( 168 | self._count * self._eta + that._count * that._eta 169 | ) / sum_count 170 | 171 | sum_rho = ( 172 | self._rho 173 | + that._rho 174 | + delta2 * self._count * that._count / sum_count 175 | ) 176 | 177 | sum_tau = ( 178 | self._tau 179 | + that._tau 180 | + delta3 181 | * self._count 182 | * that._count 183 | * (self._count - that._count) 184 | / (sum_count ** 2) 185 | + 3.0 186 | * delta 187 | * (self._count * that._rho - that._count * self._rho) 188 | / sum_count 189 | ) 190 | 191 | sum_phi = ( 192 | self._phi 193 | + that._phi 194 | + delta4 195 | * self._count 196 | * that._count 197 | * (self._count ** 2 - self._count * that._count + that._count ** 2) 198 | / (sum_count ** 3) 199 | + 6.0 200 | * delta2 201 | * ( 202 | self._count * self._count * that._rho 203 | + that._count * that._count * self._rho 204 | ) 205 | / (sum_count ** 2) 206 | + 4.0 207 | * delta 208 | * (self._count * that._tau - that._count * self._tau) 209 | / sum_count 210 | ) 211 | 212 | if self._count == 0.0: 213 | self._min = that._min 214 | self._max = that._max 215 | elif that._count != 0.0: 216 | self._min = min(self._min, that._min) 217 | self._max = max(self._max, that._max) 218 | 219 | self._count = sum_count 220 | self._eta = sum_eta 221 | self._rho = sum_rho 222 | self._tau = sum_tau 223 | self._phi = sum_phi 224 | 225 | return self 226 | 227 | def __iadd__(self, that): 228 | """Add another Statistics object to this one.""" 229 | return self._iadd(that) 230 | 231 | def _mul(self, that): 232 | """Multiply by a scalar to change Statistics weighting.""" 233 | sigma = self.copy() 234 | sigma._imul(that) 235 | return sigma 236 | 237 | def __mul__(self, that): 238 | """Multiply by a scalar to change Statistics weighting.""" 239 | if isinstance(self, Statistics): 240 | return self._mul(that) 241 | # https://stackoverflow.com/q/33218006/232571 242 | return that._mul(self) # pragma: no cover 243 | 244 | __rmul__ = __mul__ 245 | 246 | def _imul(self, that): 247 | """Multiply by a scalar to change Statistics weighting in-place.""" 248 | self._count *= that 249 | self._rho *= that 250 | self._tau *= that 251 | self._phi *= that 252 | return self 253 | 254 | def __imul__(self, that): 255 | """Multiply by a scalar to change Statistics weighting in-place.""" 256 | return self._imul(that) 257 | 258 | 259 | def make_statistics(state): 260 | """Make Statistics object from state.""" 261 | return Statistics.fromstate(state) 262 | 263 | 264 | class ExponentialStatistics: 265 | """Compute exponential mean and variance in a single pass. 266 | 267 | ExponentialStatistics objects may also be copied. 268 | 269 | Based on 270 | "Finch, 2009, Incremental Calculation of Weighted Mean and Variance" at 271 | https://nanopdf.com/download/incremental-calculation-of-weighted-mean-and-variance_pdf 272 | 273 | For an explanation of these statistics refer to e.g.: 274 | https://nestedsoftware.com/2018/04/04/exponential-moving-average-on-streaming-data-4hhl.24876.html 275 | 276 | """ 277 | 278 | def __init__(self, decay=0.9, mean=0.0, variance=0.0, iterable=()): 279 | """Initialize ExponentialStatistics object. 280 | 281 | Incrementally tracks mean and variance and exponentially discounts old 282 | values. 283 | 284 | Requires a `decay` rate in exclusive range (0, 1) for discounting 285 | previous statistics. 286 | 287 | Optionally allows setting initial mean and variance. Default 0. 288 | 289 | Iterates optional parameter `iterable` and pushes each value into the 290 | statistics summary. 291 | 292 | """ 293 | self.clear(mean, variance, decay) 294 | for value in iterable: 295 | self.push(value) 296 | 297 | @property 298 | def decay(self): 299 | """Exponential decay rate of old values.""" 300 | return self._decay 301 | 302 | @decay.setter 303 | def decay(self, value): 304 | self._set_decay(value) 305 | 306 | def _set_decay(self, value): 307 | if not 0 <= value <= 1: 308 | raise ValueError('decay must be between 0 and 1') 309 | self._decay = value 310 | 311 | def clear(self, mean=0.0, variance=0.0, decay=None): 312 | """Clear ExponentialStatistics object.""" 313 | self._mean = mean 314 | self._variance = variance 315 | if decay is not None: 316 | self._set_decay(decay) 317 | 318 | def __eq__(self, that): 319 | return self.get_state() == that.get_state() 320 | 321 | def __ne__(self, that): 322 | return self.get_state() != that.get_state() 323 | 324 | def get_state(self): 325 | """Get internal state.""" 326 | return self._decay, self._mean, self._variance 327 | 328 | def set_state(self, state): 329 | """Set internal state.""" 330 | ( 331 | self._decay, 332 | self._mean, 333 | self._variance, 334 | ) = state 335 | 336 | @classmethod 337 | def fromstate(cls, state): 338 | """Return ExponentialStatistics object from state.""" 339 | stats = cls() 340 | stats.set_state(state) 341 | return stats 342 | 343 | def __reduce__(self): 344 | return make_exponential_statistics, (self.get_state(),) 345 | 346 | def copy(self, _=None): 347 | """Copy ExponentialStatistics object.""" 348 | return self.fromstate(self.get_state()) 349 | 350 | def __copy__(self, _=None): 351 | """Copy ExponentialStatistics object.""" 352 | return self.copy(_) 353 | 354 | __deepcopy__ = __copy__ 355 | 356 | def push(self, value): 357 | """Add `value` to the ExponentialStatistics summary.""" 358 | alpha = 1.0 - self._decay 359 | diff = value - self._mean 360 | incr = alpha * diff 361 | self._variance += alpha * (self._decay * diff ** 2 - self._variance) 362 | self._mean += incr 363 | 364 | def mean(self): 365 | """Exponential mean of values.""" 366 | return self._mean 367 | 368 | def variance(self): 369 | """Exponential variance of values.""" 370 | return self._variance 371 | 372 | def stddev(self): 373 | """Exponential standard deviation of values.""" 374 | return self.variance() ** 0.5 375 | 376 | def _add(self, that): 377 | """Add two ExponentialStatistics objects together.""" 378 | sigma = self.copy() 379 | sigma._iadd(that) 380 | return sigma 381 | 382 | def __add__(self, that): 383 | """Add two ExponentialStatistics objects together.""" 384 | return self._add(that) 385 | 386 | def _iadd(self, that): 387 | """Add another ExponentialStatistics object to this one.""" 388 | self._mean += that.mean() 389 | self._variance += that.variance() 390 | return self 391 | 392 | def __iadd__(self, that): 393 | """Add another ExponentialStatistics object to this one.""" 394 | return self._iadd(that) 395 | 396 | def _mul(self, that): 397 | """Multiply by a scalar to change ExponentialStatistics weighting.""" 398 | sigma = self.copy() 399 | sigma._imul(that) 400 | return sigma 401 | 402 | def __mul__(self, that): 403 | """Multiply by a scalar to change ExponentialStatistics weighting.""" 404 | if isinstance(self, ExponentialStatistics): 405 | return self._mul(that) 406 | # https://stackoverflow.com/q/33218006/232571 407 | return that._mul(self) # pragma: no cover 408 | 409 | def _imul(self, that): 410 | """Multiply by a scalar to change ExponentialStatistics weighting 411 | in-place. 412 | 413 | """ 414 | self._mean *= that 415 | self._variance *= that 416 | return self 417 | 418 | def __imul__(self, that): 419 | """Multiply by a scalar to change ExponentialStatistics weighting 420 | in-place. 421 | 422 | """ 423 | return self._imul(that) 424 | 425 | 426 | def make_exponential_statistics(state): 427 | """Make ExponentialStatistics object from state.""" 428 | return ExponentialStatistics.fromstate(state) 429 | 430 | 431 | class Regression: 432 | """ 433 | Compute simple linear regression in a single pass. 434 | 435 | Computes the slope, intercept, and correlation. 436 | Regression objects may also be added together and copied. 437 | 438 | Based entirely on the C++ code by John D Cook at 439 | http://www.johndcook.com/running_regression.html 440 | """ 441 | 442 | def __init__(self, iterable=()): 443 | """Initialize Regression object. 444 | 445 | Iterates optional parameter `iterable` and pushes each pair into the 446 | regression summary. 447 | """ 448 | self._xstats = Statistics() 449 | self._ystats = Statistics() 450 | self._count = self._sxy = 0.0 451 | 452 | for xcoord, ycoord in iterable: 453 | self.push(xcoord, ycoord) 454 | 455 | def __eq__(self, that): 456 | return self.get_state() == that.get_state() 457 | 458 | def __ne__(self, that): 459 | return self.get_state() != that.get_state() 460 | 461 | def clear(self): 462 | """Clear Regression object.""" 463 | self._xstats.clear() 464 | self._ystats.clear() 465 | self._count = self._sxy = 0.0 466 | 467 | def get_state(self): 468 | """Get internal state.""" 469 | return ( 470 | self._count, 471 | self._sxy, 472 | self._xstats.get_state(), 473 | self._ystats.get_state(), 474 | ) 475 | 476 | def set_state(self, state): 477 | """Set internal state.""" 478 | count, sxy, xstats, ystats = state 479 | self._count = count 480 | self._sxy = sxy 481 | self._xstats.set_state(xstats) 482 | self._ystats.set_state(ystats) 483 | 484 | @classmethod 485 | def fromstate(cls, state): 486 | """Return Regression object from state.""" 487 | regr = cls() 488 | regr.set_state(state) 489 | return regr 490 | 491 | def __reduce__(self): 492 | return make_regression, (self.get_state(),) 493 | 494 | def copy(self, _=None): 495 | """Copy Regression object.""" 496 | return self.fromstate(self.get_state()) 497 | 498 | def __copy__(self, _=None): 499 | """Copy Regression object.""" 500 | return self.copy(_) 501 | 502 | __deepcopy__ = __copy__ 503 | 504 | def __len__(self): 505 | """Number of values that have been pushed.""" 506 | return int(self._count) 507 | 508 | def push(self, xcoord, ycoord): 509 | """Add a pair `(x, y)` to the Regression summary.""" 510 | self._sxy += ( 511 | (self._xstats.mean() - xcoord) 512 | * (self._ystats.mean() - ycoord) 513 | * self._count 514 | / (self._count + 1) 515 | ) 516 | self._xstats.push(xcoord) 517 | self._ystats.push(ycoord) 518 | self._count += 1 519 | 520 | def slope(self, ddof=1.0): 521 | """Slope of values (with `ddof` degrees of freedom).""" 522 | sxx = self._xstats.variance(ddof) * (self._count - ddof) 523 | return self._sxy / sxx 524 | 525 | def intercept(self, ddof=1.0): 526 | """Intercept of values (with `ddof` degrees of freedom).""" 527 | return self._ystats.mean() - self.slope(ddof) * self._xstats.mean() 528 | 529 | def correlation(self, ddof=1.0): 530 | """Correlation of values (with `ddof` degrees of freedom).""" 531 | term = self._xstats.stddev(ddof) * self._ystats.stddev(ddof) 532 | return self._sxy / ((self._count - ddof) * term) 533 | 534 | def _add(self, that): 535 | """Add two Regression objects together.""" 536 | sigma = self.copy() 537 | sigma._iadd(that) 538 | return sigma 539 | 540 | def __add__(self, that): 541 | """Add two Regression objects together.""" 542 | return self._add(that) 543 | 544 | def _iadd(self, that): 545 | """Add another Regression object to this one.""" 546 | sum_count = self._count + that._count 547 | if sum_count == 0: 548 | return self 549 | 550 | sum_xstats = self._xstats._add(that._xstats) 551 | sum_ystats = self._ystats._add(that._ystats) 552 | 553 | deltax = that._xstats.mean() - self._xstats.mean() 554 | deltay = that._ystats.mean() - self._ystats.mean() 555 | sum_sxy = ( 556 | self._sxy 557 | + that._sxy 558 | + self._count * that._count * deltax * deltay / sum_count 559 | ) 560 | 561 | self._count = sum_count 562 | self._xstats = sum_xstats 563 | self._ystats = sum_ystats 564 | self._sxy = sum_sxy 565 | 566 | return self 567 | 568 | def __iadd__(self, that): 569 | """Add another Regression object to this one.""" 570 | return self._iadd(that) 571 | 572 | 573 | def make_regression(state): 574 | """Make Regression object from state.""" 575 | return Regression.fromstate(state) 576 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | # A comma-separated list of package or module names from where C extensions may 4 | # be loaded. Extensions are loading into the active Python interpreter and may 5 | # run arbitrary code. 6 | extension-pkg-whitelist= 7 | 8 | # Specify a score threshold to be exceeded before program exits with error. 9 | fail-under=10.0 10 | 11 | # Add files or directories to the blacklist. They should be base names, not 12 | # paths. 13 | ignore=CVS 14 | 15 | # Add files or directories matching the regex patterns to the blacklist. The 16 | # regex matches against base names, not paths. 17 | ignore-patterns= 18 | 19 | # Python code to execute, usually for sys.path manipulation such as 20 | # pygtk.require(). 21 | #init-hook= 22 | 23 | # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the 24 | # number of processors available to use. 25 | jobs=1 26 | 27 | # Control the amount of potential inferred values when inferring a single 28 | # object. This can help the performance when dealing with large functions or 29 | # complex, nested conditions. 30 | limit-inference-results=100 31 | 32 | # List of plugins (as comma separated values of python module names) to load, 33 | # usually to register additional checkers. 34 | load-plugins= 35 | 36 | # Pickle collected data for later comparisons. 37 | persistent=yes 38 | 39 | # When enabled, pylint would attempt to guess common misconfiguration and emit 40 | # user-friendly hints instead of false-positive error messages. 41 | suggestion-mode=yes 42 | 43 | # Allow loading of arbitrary C extensions. Extensions are imported into the 44 | # active Python interpreter and may run arbitrary code. 45 | unsafe-load-any-extension=no 46 | 47 | 48 | [MESSAGES CONTROL] 49 | 50 | # Only show warnings with the listed confidence levels. Leave empty to show 51 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. 52 | confidence= 53 | 54 | # Disable the message, report, category or checker with the given id(s). You 55 | # can either give multiple identifiers separated by comma (,) or put this 56 | # option multiple times (only on the command line, not in the configuration 57 | # file where it should appear only once). You can also use "--disable=all" to 58 | # disable everything first and then reenable specific checks. For example, if 59 | # you want to run only the similarities checker, you can use "--disable=all 60 | # --enable=similarities". If you want to run only the classes checker, but have 61 | # no Warning level messages displayed, use "--disable=all --enable=classes 62 | # --disable=W". 63 | disable=print-statement, 64 | parameter-unpacking, 65 | unpacking-in-except, 66 | old-raise-syntax, 67 | backtick, 68 | long-suffix, 69 | old-ne-operator, 70 | old-octal-literal, 71 | import-star-module-level, 72 | non-ascii-bytes-literal, 73 | raw-checker-failed, 74 | bad-inline-option, 75 | locally-disabled, 76 | file-ignored, 77 | suppressed-message, 78 | useless-suppression, 79 | deprecated-pragma, 80 | use-symbolic-message-instead, 81 | apply-builtin, 82 | basestring-builtin, 83 | buffer-builtin, 84 | cmp-builtin, 85 | coerce-builtin, 86 | execfile-builtin, 87 | file-builtin, 88 | long-builtin, 89 | raw_input-builtin, 90 | reduce-builtin, 91 | standarderror-builtin, 92 | unicode-builtin, 93 | xrange-builtin, 94 | coerce-method, 95 | delslice-method, 96 | getslice-method, 97 | setslice-method, 98 | no-absolute-import, 99 | old-division, 100 | dict-iter-method, 101 | dict-view-method, 102 | next-method-called, 103 | metaclass-assignment, 104 | indexing-exception, 105 | raising-string, 106 | reload-builtin, 107 | oct-method, 108 | hex-method, 109 | nonzero-method, 110 | cmp-method, 111 | input-builtin, 112 | round-builtin, 113 | intern-builtin, 114 | unichr-builtin, 115 | map-builtin-not-iterating, 116 | zip-builtin-not-iterating, 117 | range-builtin-not-iterating, 118 | filter-builtin-not-iterating, 119 | using-cmp-argument, 120 | eq-without-hash, 121 | div-method, 122 | idiv-method, 123 | rdiv-method, 124 | exception-message-attribute, 125 | invalid-str-codec, 126 | sys-max-int, 127 | bad-python3-import, 128 | deprecated-string-function, 129 | deprecated-str-translate-call, 130 | deprecated-itertools-function, 131 | deprecated-types-field, 132 | next-method-defined, 133 | dict-items-not-iterating, 134 | dict-keys-not-iterating, 135 | dict-values-not-iterating, 136 | deprecated-operator-function, 137 | deprecated-urllib-function, 138 | xreadlines-attribute, 139 | deprecated-sys-function, 140 | exception-escape, 141 | comprehension-escape, 142 | protected-access, 143 | attribute-defined-outside-init, 144 | 145 | # Enable the message, report, category or checker with the given id(s). You can 146 | # either give multiple identifier separated by comma (,) or put this option 147 | # multiple time (only on the command line, not in the configuration file where 148 | # it should appear only once). See also the "--disable" option for examples. 149 | enable=c-extension-no-member 150 | 151 | 152 | [REPORTS] 153 | 154 | # Python expression which should return a score less than or equal to 10. You 155 | # have access to the variables 'error', 'warning', 'refactor', and 'convention' 156 | # which contain the number of messages in each category, as well as 'statement' 157 | # which is the total number of statements analyzed. This score is used by the 158 | # global evaluation report (RP0004). 159 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 160 | 161 | # Template used to display messages. This is a python new-style format string 162 | # used to format the message information. See doc for all details. 163 | #msg-template= 164 | 165 | # Set the output format. Available formats are text, parseable, colorized, json 166 | # and msvs (visual studio). You can also give a reporter class, e.g. 167 | # mypackage.mymodule.MyReporterClass. 168 | output-format=text 169 | 170 | # Tells whether to display a full report or only the messages. 171 | reports=no 172 | 173 | # Activate the evaluation score. 174 | score=yes 175 | 176 | 177 | [REFACTORING] 178 | 179 | # Maximum number of nested blocks for function / method body 180 | max-nested-blocks=5 181 | 182 | # Complete name of functions that never returns. When checking for 183 | # inconsistent-return-statements if a never returning function is called then 184 | # it will be considered as an explicit return statement and no message will be 185 | # printed. 186 | never-returning-functions=sys.exit 187 | 188 | 189 | [LOGGING] 190 | 191 | # The type of string formatting that logging methods do. `old` means using % 192 | # formatting, `new` is for `{}` formatting. 193 | logging-format-style=old 194 | 195 | # Logging modules to check that the string format arguments are in logging 196 | # function parameter format. 197 | logging-modules=logging 198 | 199 | 200 | [SPELLING] 201 | 202 | # Limits count of emitted suggestions for spelling mistakes. 203 | max-spelling-suggestions=4 204 | 205 | # Spelling dictionary name. Available dictionaries: none. To make it work, 206 | # install the python-enchant package. 207 | spelling-dict= 208 | 209 | # List of comma separated words that should not be checked. 210 | spelling-ignore-words= 211 | 212 | # A path to a file that contains the private dictionary; one word per line. 213 | spelling-private-dict-file= 214 | 215 | # Tells whether to store unknown words to the private dictionary (see the 216 | # --spelling-private-dict-file option) instead of raising a message. 217 | spelling-store-unknown-words=no 218 | 219 | 220 | [MISCELLANEOUS] 221 | 222 | # List of note tags to take in consideration, separated by a comma. 223 | notes=FIXME, 224 | XXX, 225 | TODO 226 | 227 | # Regular expression of note tags to take in consideration. 228 | #notes-rgx= 229 | 230 | 231 | [TYPECHECK] 232 | 233 | # List of decorators that produce context managers, such as 234 | # contextlib.contextmanager. Add to this list to register other decorators that 235 | # produce valid context managers. 236 | contextmanager-decorators=contextlib.contextmanager 237 | 238 | # List of members which are set dynamically and missed by pylint inference 239 | # system, and so shouldn't trigger E1101 when accessed. Python regular 240 | # expressions are accepted. 241 | generated-members= 242 | 243 | # Tells whether missing members accessed in mixin class should be ignored. A 244 | # mixin class is detected if its name ends with "mixin" (case insensitive). 245 | ignore-mixin-members=yes 246 | 247 | # Tells whether to warn about missing members when the owner of the attribute 248 | # is inferred to be None. 249 | ignore-none=yes 250 | 251 | # This flag controls whether pylint should warn about no-member and similar 252 | # checks whenever an opaque object is returned when inferring. The inference 253 | # can return multiple potential results while evaluating a Python object, but 254 | # some branches might not be evaluated, which results in partial inference. In 255 | # that case, it might be useful to still emit no-member and other checks for 256 | # the rest of the inferred objects. 257 | ignore-on-opaque-inference=yes 258 | 259 | # List of class names for which member attributes should not be checked (useful 260 | # for classes with dynamically set attributes). This supports the use of 261 | # qualified names. 262 | ignored-classes=optparse.Values,thread._local,_thread._local 263 | 264 | # List of module names for which member attributes should not be checked 265 | # (useful for modules/projects where namespaces are manipulated during runtime 266 | # and thus existing member attributes cannot be deduced by static analysis). It 267 | # supports qualified module names, as well as Unix pattern matching. 268 | ignored-modules= 269 | 270 | # Show a hint with possible names when a member name was not found. The aspect 271 | # of finding the hint is based on edit distance. 272 | missing-member-hint=yes 273 | 274 | # The minimum edit distance a name should have in order to be considered a 275 | # similar match for a missing member name. 276 | missing-member-hint-distance=1 277 | 278 | # The total number of similar names that should be taken in consideration when 279 | # showing a hint for a missing member. 280 | missing-member-max-choices=1 281 | 282 | # List of decorators that change the signature of a decorated function. 283 | signature-mutators= 284 | 285 | 286 | [VARIABLES] 287 | 288 | # List of additional names supposed to be defined in builtins. Remember that 289 | # you should avoid defining new builtins when possible. 290 | additional-builtins= 291 | 292 | # Tells whether unused global variables should be treated as a violation. 293 | allow-global-unused-variables=yes 294 | 295 | # List of strings which can identify a callback function by name. A callback 296 | # name must start or end with one of those strings. 297 | callbacks=cb_, 298 | _cb 299 | 300 | # A regular expression matching the name of dummy variables (i.e. expected to 301 | # not be used). 302 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ 303 | 304 | # Argument names that match this expression will be ignored. Default to name 305 | # with leading underscore. 306 | ignored-argument-names=_.*|^ignored_|^unused_ 307 | 308 | # Tells whether we should check for unused import in __init__ files. 309 | init-import=no 310 | 311 | # List of qualified module names which can have objects that can redefine 312 | # builtins. 313 | redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io 314 | 315 | 316 | [FORMAT] 317 | 318 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 319 | expected-line-ending-format= 320 | 321 | # Regexp for a line that is allowed to be longer than the limit. 322 | ignore-long-lines=^\s*(# )??$ 323 | 324 | # Number of spaces of indent required inside a hanging or continued line. 325 | indent-after-paren=4 326 | 327 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 328 | # tab). 329 | indent-string=' ' 330 | 331 | # Maximum number of characters on a single line. 332 | max-line-length=100 333 | 334 | # Maximum number of lines in a module. 335 | max-module-lines=1000 336 | 337 | # Allow the body of a class to be on the same line as the declaration if body 338 | # contains single statement. 339 | single-line-class-stmt=no 340 | 341 | # Allow the body of an if to be on the same line as the test if there is no 342 | # else. 343 | single-line-if-stmt=no 344 | 345 | 346 | [SIMILARITIES] 347 | 348 | # Ignore comments when computing similarities. 349 | ignore-comments=yes 350 | 351 | # Ignore docstrings when computing similarities. 352 | ignore-docstrings=yes 353 | 354 | # Ignore imports when computing similarities. 355 | ignore-imports=no 356 | 357 | # Minimum lines number of a similarity. 358 | min-similarity-lines=4 359 | 360 | 361 | [BASIC] 362 | 363 | # Naming style matching correct argument names. 364 | argument-naming-style=snake_case 365 | 366 | # Regular expression matching correct argument names. Overrides argument- 367 | # naming-style. 368 | #argument-rgx= 369 | 370 | # Naming style matching correct attribute names. 371 | attr-naming-style=snake_case 372 | 373 | # Regular expression matching correct attribute names. Overrides attr-naming- 374 | # style. 375 | #attr-rgx= 376 | 377 | # Bad variable names which should always be refused, separated by a comma. 378 | bad-names=foo, 379 | bar, 380 | baz, 381 | toto, 382 | tutu, 383 | tata 384 | 385 | # Bad variable names regexes, separated by a comma. If names match any regex, 386 | # they will always be refused 387 | bad-names-rgxs= 388 | 389 | # Naming style matching correct class attribute names. 390 | class-attribute-naming-style=any 391 | 392 | # Regular expression matching correct class attribute names. Overrides class- 393 | # attribute-naming-style. 394 | #class-attribute-rgx= 395 | 396 | # Naming style matching correct class names. 397 | class-naming-style=PascalCase 398 | 399 | # Regular expression matching correct class names. Overrides class-naming- 400 | # style. 401 | #class-rgx= 402 | 403 | # Naming style matching correct constant names. 404 | const-naming-style=UPPER_CASE 405 | 406 | # Regular expression matching correct constant names. Overrides const-naming- 407 | # style. 408 | #const-rgx= 409 | 410 | # Minimum line length for functions/classes that require docstrings, shorter 411 | # ones are exempt. 412 | docstring-min-length=-1 413 | 414 | # Naming style matching correct function names. 415 | function-naming-style=snake_case 416 | 417 | # Regular expression matching correct function names. Overrides function- 418 | # naming-style. 419 | #function-rgx= 420 | 421 | # Good variable names which should always be accepted, separated by a comma. 422 | good-names=i, 423 | j, 424 | k, 425 | ex, 426 | Run, 427 | _ 428 | 429 | # Good variable names regexes, separated by a comma. If names match any regex, 430 | # they will always be accepted 431 | good-names-rgxs= 432 | 433 | # Include a hint for the correct naming format with invalid-name. 434 | include-naming-hint=no 435 | 436 | # Naming style matching correct inline iteration names. 437 | inlinevar-naming-style=any 438 | 439 | # Regular expression matching correct inline iteration names. Overrides 440 | # inlinevar-naming-style. 441 | #inlinevar-rgx= 442 | 443 | # Naming style matching correct method names. 444 | method-naming-style=snake_case 445 | 446 | # Regular expression matching correct method names. Overrides method-naming- 447 | # style. 448 | #method-rgx= 449 | 450 | # Naming style matching correct module names. 451 | module-naming-style=snake_case 452 | 453 | # Regular expression matching correct module names. Overrides module-naming- 454 | # style. 455 | #module-rgx= 456 | 457 | # Colon-delimited sets of names that determine each other's naming style when 458 | # the name regexes allow several styles. 459 | name-group= 460 | 461 | # Regular expression which should only match function or class names that do 462 | # not require a docstring. 463 | no-docstring-rgx=^_ 464 | 465 | # List of decorators that produce properties, such as abc.abstractproperty. Add 466 | # to this list to register other decorators that produce valid properties. 467 | # These decorators are taken in consideration only for invalid-name. 468 | property-classes=abc.abstractproperty 469 | 470 | # Naming style matching correct variable names. 471 | variable-naming-style=snake_case 472 | 473 | # Regular expression matching correct variable names. Overrides variable- 474 | # naming-style. 475 | #variable-rgx= 476 | 477 | 478 | [STRING] 479 | 480 | # This flag controls whether inconsistent-quotes generates a warning when the 481 | # character used as a quote delimiter is used inconsistently within a module. 482 | check-quote-consistency=no 483 | 484 | # This flag controls whether the implicit-str-concat should generate a warning 485 | # on implicit string concatenation in sequences defined over several lines. 486 | check-str-concat-over-line-jumps=no 487 | 488 | 489 | [IMPORTS] 490 | 491 | # List of modules that can be imported at any level, not just the top level 492 | # one. 493 | allow-any-import-level= 494 | 495 | # Allow wildcard imports from modules that define __all__. 496 | allow-wildcard-with-all=no 497 | 498 | # Analyse import fallback blocks. This can be used to support both Python 2 and 499 | # 3 compatible code, which means that the block might have code that exists 500 | # only in one or another interpreter, leading to false positives when analysed. 501 | analyse-fallback-blocks=no 502 | 503 | # Deprecated modules which should not be used, separated by a comma. 504 | deprecated-modules=optparse,tkinter.tix 505 | 506 | # Create a graph of external dependencies in the given file (report RP0402 must 507 | # not be disabled). 508 | ext-import-graph= 509 | 510 | # Create a graph of every (i.e. internal and external) dependencies in the 511 | # given file (report RP0402 must not be disabled). 512 | import-graph= 513 | 514 | # Create a graph of internal dependencies in the given file (report RP0402 must 515 | # not be disabled). 516 | int-import-graph= 517 | 518 | # Force import order to recognize a module as part of the standard 519 | # compatibility libraries. 520 | known-standard-library= 521 | 522 | # Force import order to recognize a module as part of a third party library. 523 | known-third-party=enchant 524 | 525 | # Couples of modules and preferred modules, separated by a comma. 526 | preferred-modules= 527 | 528 | 529 | [CLASSES] 530 | 531 | # List of method names used to declare (i.e. assign) instance attributes. 532 | defining-attr-methods=__init__, 533 | __new__, 534 | setUp, 535 | __post_init__ 536 | 537 | # List of member names, which should be excluded from the protected access 538 | # warning. 539 | exclude-protected=_asdict, 540 | _fields, 541 | _replace, 542 | _source, 543 | _make 544 | 545 | # List of valid names for the first argument in a class method. 546 | valid-classmethod-first-arg=cls 547 | 548 | # List of valid names for the first argument in a metaclass class method. 549 | valid-metaclass-classmethod-first-arg=cls 550 | 551 | 552 | [DESIGN] 553 | 554 | # Maximum number of arguments for function / method. 555 | max-args=5 556 | 557 | # Maximum number of attributes for a class (see R0902). 558 | max-attributes=7 559 | 560 | # Maximum number of boolean expressions in an if statement (see R0916). 561 | max-bool-expr=5 562 | 563 | # Maximum number of branch for function / method body. 564 | max-branches=12 565 | 566 | # Maximum number of locals for function / method body. 567 | max-locals=15 568 | 569 | # Maximum number of parents for a class (see R0901). 570 | max-parents=7 571 | 572 | # Maximum number of public methods for a class (see R0904). 573 | max-public-methods=20 574 | 575 | # Maximum number of return / yield for function / method body. 576 | max-returns=6 577 | 578 | # Maximum number of statements in function / method body. 579 | max-statements=50 580 | 581 | # Minimum number of public methods for a class (see R0903). 582 | min-public-methods=2 583 | 584 | 585 | [EXCEPTIONS] 586 | 587 | # Exceptions that will emit a warning when being caught. Defaults to 588 | # "BaseException, Exception". 589 | overgeneral-exceptions=BaseException, 590 | Exception 591 | -------------------------------------------------------------------------------- /tests/test_runstats.py: -------------------------------------------------------------------------------- 1 | """Test runstats module. 2 | 3 | """ 4 | 5 | import copy 6 | import math 7 | import pickle 8 | import random 9 | 10 | import pytest 11 | 12 | from runstats import ExponentialStatistics as FastExponentialStatistics 13 | from runstats import Regression as FastRegression 14 | from runstats import Statistics as FastStatistics 15 | from runstats.core import ExponentialStatistics as CoreExponentialStatistics 16 | from runstats.core import Regression as CoreRegression 17 | from runstats.core import Statistics as CoreStatistics 18 | 19 | limit = 1e-2 20 | count = 1000 21 | 22 | 23 | def mean(values): 24 | return sum(values) / len(values) 25 | 26 | 27 | def variance(values, ddof=1.0): 28 | temp = mean(values) 29 | return sum((value - temp) ** 2 for value in values) / (len(values) - ddof) 30 | 31 | 32 | def stddev(values, ddof=1.0): 33 | return variance(values, ddof) ** 0.5 34 | 35 | 36 | def skewness(values): 37 | temp = mean(values) 38 | numerator = sum((value - temp) ** 3 for value in values) / len(values) 39 | denominator = ( 40 | sum((value - temp) ** 2 for value in values) / len(values) 41 | ) ** 1.5 42 | return numerator / denominator 43 | 44 | 45 | def kurtosis(values): 46 | temp = mean(values) 47 | numerator = sum((value - temp) ** 4 for value in values) / len(values) 48 | sum_diff_2 = sum((value - temp) ** 2 for value in values) 49 | denominator = (sum_diff_2 / len(values)) ** 2 50 | return (numerator / denominator) - 3 51 | 52 | 53 | def error(value, test): 54 | return abs((test - value) / value) 55 | 56 | 57 | @pytest.mark.parametrize( 58 | 'Statistics,Regression', 59 | [ 60 | (CoreStatistics, CoreRegression), 61 | (FastStatistics, FastRegression), 62 | ], 63 | ) 64 | def test_statistics(Statistics, Regression): 65 | random.seed(0) 66 | alpha = [random.random() for _ in range(count)] 67 | 68 | alpha_stats = Statistics() 69 | 70 | for val in alpha: 71 | alpha_stats.push(val) 72 | 73 | assert len(alpha_stats) == count 74 | assert error(mean(alpha), alpha_stats.mean()) < limit 75 | assert error(variance(alpha, 0.0), alpha_stats.variance(0.0)) < limit 76 | assert error(variance(alpha, 1.0), alpha_stats.variance(1.0)) < limit 77 | assert error(stddev(alpha, 0.0), alpha_stats.stddev(0.0)) < limit 78 | assert error(stddev(alpha, 1.0), alpha_stats.stddev(1.0)) < limit 79 | assert error(skewness(alpha), alpha_stats.skewness()) < limit 80 | assert error(kurtosis(alpha), alpha_stats.kurtosis()) < limit 81 | assert alpha_stats.minimum() == min(alpha) 82 | assert alpha_stats.maximum() == max(alpha) 83 | 84 | alpha_stats.clear() 85 | 86 | assert len(alpha_stats) == 0 87 | 88 | alpha_stats = Statistics(alpha) 89 | 90 | beta = [random.random() for _ in range(count)] 91 | 92 | beta_stats = Statistics() 93 | 94 | for val in beta: 95 | beta_stats.push(val) 96 | 97 | gamma_stats = alpha_stats + beta_stats 98 | 99 | assert len(beta_stats) != len(gamma_stats) 100 | assert error(mean(alpha + beta), gamma_stats.mean()) < limit 101 | assert ( 102 | error(variance(alpha + beta, 1.0), gamma_stats.variance(1.0)) < limit 103 | ) 104 | assert ( 105 | error(variance(alpha + beta, 0.0), gamma_stats.variance(0.0)) < limit 106 | ) 107 | assert error(stddev(alpha + beta, 1.0), gamma_stats.stddev(1.0)) < limit 108 | assert error(stddev(alpha + beta, 0.0), gamma_stats.stddev(0.0)) < limit 109 | assert error(skewness(alpha + beta), gamma_stats.skewness()) < limit 110 | assert error(kurtosis(alpha + beta), gamma_stats.kurtosis()) < limit 111 | assert gamma_stats.minimum() == min(alpha + beta) 112 | assert gamma_stats.maximum() == max(alpha + beta) 113 | 114 | delta_stats = beta_stats.copy() 115 | delta_stats += alpha_stats 116 | 117 | assert len(beta_stats) != len(delta_stats) 118 | assert error(mean(alpha + beta), delta_stats.mean()) < limit 119 | assert ( 120 | error(variance(alpha + beta, 1.0), delta_stats.variance(1.0)) < limit 121 | ) 122 | assert ( 123 | error(variance(alpha + beta, 0.0), delta_stats.variance(0.0)) < limit 124 | ) 125 | assert error(stddev(alpha + beta, 1.0), delta_stats.stddev(1.0)) < limit 126 | assert error(stddev(alpha + beta, 0.0), delta_stats.stddev(0.0)) < limit 127 | assert error(skewness(alpha + beta), delta_stats.skewness()) < limit 128 | assert error(kurtosis(alpha + beta), delta_stats.kurtosis()) < limit 129 | assert delta_stats.minimum() == min(alpha + beta) 130 | assert delta_stats.maximum() == max(alpha + beta) 131 | 132 | 133 | @pytest.mark.parametrize( 134 | 'ExponentialStatistics', 135 | [CoreExponentialStatistics, FastExponentialStatistics], 136 | ) 137 | def test_exponential_statistics(ExponentialStatistics): 138 | random.seed(0) 139 | alpha = [random.random() for _ in range(count)] 140 | big_alpha = [random.random() for _ in range(count * 100)] 141 | 142 | alpha_exp_stats_zero = ExponentialStatistics(0.9999) 143 | alpha_exp_stats_init = ExponentialStatistics( 144 | decay=0.9999, 145 | mean=mean(alpha), 146 | variance=variance(alpha, 0), 147 | ) 148 | 149 | for val in big_alpha: 150 | alpha_exp_stats_zero.push(val) 151 | alpha_exp_stats_init.push(val) 152 | 153 | assert error(mean(big_alpha), alpha_exp_stats_zero.mean()) < limit 154 | assert error(mean(big_alpha), alpha_exp_stats_init.mean()) < limit 155 | assert ( 156 | error(variance(big_alpha, 0), alpha_exp_stats_zero.variance()) < limit 157 | ) 158 | assert ( 159 | error(variance(big_alpha, 0), alpha_exp_stats_init.variance()) < limit 160 | ) 161 | assert error(stddev(big_alpha, 0), alpha_exp_stats_zero.stddev()) < limit 162 | assert error(stddev(big_alpha, 0), alpha_exp_stats_init.stddev()) < limit 163 | 164 | alpha_exp_stats_zero.clear() 165 | alpha_exp_stats_zero.decay = 0.1 166 | alpha_exp_stats_init.clear( 167 | decay=0.1, mean=mean(alpha), variance=variance(alpha, 0) 168 | ) 169 | 170 | for val in big_alpha: 171 | alpha_exp_stats_zero.push(val) 172 | alpha_exp_stats_init.push(val) 173 | 174 | assert ( 175 | error(alpha_exp_stats_zero.mean(), alpha_exp_stats_init.mean()) < limit 176 | ) 177 | assert ( 178 | error(alpha_exp_stats_zero.variance(), alpha_exp_stats_init.variance()) 179 | < limit 180 | ) 181 | assert ( 182 | error(alpha_exp_stats_zero.stddev(), alpha_exp_stats_init.stddev()) 183 | < limit 184 | ) 185 | 186 | alpha_exp_stats = ExponentialStatistics(0.1, iterable=alpha) 187 | beta = [random.random() * 2 for _ in range(count)] 188 | beta_exp_stats = ExponentialStatistics(0.1) 189 | 190 | assert alpha_exp_stats != beta_exp_stats 191 | 192 | for val in beta: 193 | alpha_exp_stats.push(val) 194 | beta_exp_stats.push(val) 195 | 196 | assert alpha_exp_stats == beta_exp_stats 197 | 198 | for val in alpha: 199 | alpha_exp_stats.push(val) 200 | beta_exp_stats.push(val) 201 | 202 | assert alpha_exp_stats == beta_exp_stats 203 | 204 | current_mean = alpha_exp_stats.mean() 205 | current_variance = alpha_exp_stats.variance() 206 | alpha_exp_stats.decay = 0.99999999 207 | 208 | for val in range(10): 209 | alpha_exp_stats.push(val) 210 | 211 | assert (error(current_mean, alpha_exp_stats.mean())) < limit 212 | assert (error(current_variance, alpha_exp_stats.variance())) < limit 213 | 214 | alpha_exp_stats.decay = 0.1 215 | 216 | for val in range(10): 217 | alpha_exp_stats.push(val) 218 | 219 | assert (error(current_mean, alpha_exp_stats.mean())) > limit 220 | assert (error(current_variance, alpha_exp_stats.variance())) > limit 221 | 222 | 223 | @pytest.mark.parametrize( 224 | 'ExponentialStatistics', 225 | [CoreExponentialStatistics, FastExponentialStatistics], 226 | ) 227 | def test_bad_decay(ExponentialStatistics): 228 | with pytest.raises(ValueError): 229 | ExponentialStatistics(decay=2.0) 230 | with pytest.raises(ValueError): 231 | ExponentialStatistics(decay=-1.0) 232 | 233 | 234 | @pytest.mark.parametrize( 235 | 'Statistics,Regression', 236 | [ 237 | (CoreStatistics, CoreRegression), 238 | (FastStatistics, FastRegression), 239 | ], 240 | ) 241 | def test_add_statistics(Statistics, Regression): 242 | stats0 = Statistics() 243 | stats10 = Statistics(range(10)) 244 | assert (stats0 + stats10) == stats10 245 | assert (stats10 + stats0) == stats10 246 | stats0 += stats10 247 | 248 | 249 | @pytest.mark.parametrize( 250 | 'ExponentialStatistics', 251 | [CoreExponentialStatistics, FastExponentialStatistics], 252 | ) 253 | def test_add_exponential_statistics(ExponentialStatistics): 254 | exp_stats0 = ExponentialStatistics(0.9) 255 | exp_stats10 = ExponentialStatistics(0.9, iterable=range(10)) 256 | assert (exp_stats0 + exp_stats10) == exp_stats10 257 | assert (exp_stats10 + exp_stats0) == exp_stats10 258 | exp_stats0 += exp_stats10 259 | exp_stats0 *= 2 260 | with pytest.raises(TypeError): 261 | exp_stats0 * object() 262 | with pytest.raises(TypeError): 263 | object() * exp_stats0 264 | 265 | 266 | def correlation(values): 267 | sigma_x = sum(xxx for xxx, yyy in values) / len(values) 268 | sigma_y = sum(yyy for xxx, yyy in values) / len(values) 269 | sigma_xy = sum(xxx * yyy for xxx, yyy in values) / len(values) 270 | sigma_x2 = sum(xxx ** 2 for xxx, yyy in values) / len(values) 271 | sigma_y2 = sum(yyy ** 2 for xxx, yyy in values) / len(values) 272 | return (sigma_xy - sigma_x * sigma_y) / ( 273 | ((sigma_x2 - sigma_x ** 2) * (sigma_y2 - sigma_y ** 2)) ** 0.5 274 | ) 275 | 276 | 277 | @pytest.mark.parametrize( 278 | 'Statistics,Regression', 279 | [ 280 | (CoreStatistics, CoreRegression), 281 | (FastStatistics, FastRegression), 282 | ], 283 | ) 284 | def test_regression(Statistics, Regression): 285 | random.seed(0) 286 | alpha, beta, rand = 5.0, 10.0, 1.0 287 | 288 | points = [ 289 | (xxx, alpha * xxx + beta + rand * (0.5 - random.random())) 290 | for xxx in range(count) 291 | ] 292 | 293 | regr = Regression() 294 | 295 | for xxx, yyy in points: 296 | regr.push(xxx, yyy) 297 | 298 | assert error(alpha, regr.slope()) < limit 299 | assert error(beta, regr.intercept()) < limit 300 | assert error(correlation(points), regr.correlation()) < limit 301 | 302 | regr_copy = regr.copy() 303 | 304 | more_points = [ 305 | (xxx, alpha * xxx + beta + rand * (0.5 - random.random())) 306 | for xxx in range(count, 2 * count) 307 | ] 308 | 309 | for xxx, yyy in more_points: 310 | regr_copy.push(xxx, yyy) 311 | 312 | regr_more = Regression(more_points) 313 | 314 | regr_sum = regr + regr_more 315 | 316 | assert len(regr_copy) == len(regr_sum) == (2 * count) 317 | assert error(regr_copy.slope(), regr_sum.slope()) < limit 318 | assert error(regr_copy.intercept(), regr_sum.intercept()) < limit 319 | assert error(regr_copy.correlation(), regr_sum.correlation()) < limit 320 | 321 | regr += regr_more 322 | 323 | assert len(regr) == len(regr_copy) == (2 * count) 324 | assert error(regr.slope(), regr_copy.slope()) < limit 325 | assert error(regr.intercept(), regr_copy.intercept()) < limit 326 | assert error(regr.correlation(), regr_copy.correlation()) < limit 327 | 328 | regr.clear() 329 | 330 | assert len(regr) == 0 331 | 332 | 333 | @pytest.mark.parametrize( 334 | 'Statistics,Regression', 335 | [ 336 | (CoreStatistics, CoreRegression), 337 | (FastStatistics, FastRegression), 338 | ], 339 | ) 340 | def test_get_set_state_statistics(Statistics, Regression): 341 | random.seed(0) 342 | tail = -10 343 | vals = [random.random() for _ in range(count)] 344 | 345 | stats = Statistics(vals[:tail]) 346 | state = stats.get_state() 347 | 348 | for num in vals[tail:]: 349 | stats.push(num) 350 | 351 | new_stats = Statistics() 352 | new_stats.set_state(state) 353 | 354 | for num in vals[tail:]: 355 | new_stats.push(num) 356 | 357 | assert stats.mean() == new_stats.mean() 358 | assert stats.variance() == new_stats.variance() 359 | assert stats.minimum() == new_stats.minimum() 360 | assert stats.maximum() == new_stats.maximum() 361 | assert stats.kurtosis() == new_stats.kurtosis() 362 | assert stats.skewness() == new_stats.skewness() 363 | 364 | assert stats == Statistics.fromstate(stats.get_state()) 365 | 366 | 367 | @pytest.mark.parametrize( 368 | 'ExponentialStatistics', 369 | [CoreExponentialStatistics, FastExponentialStatistics], 370 | ) 371 | def test_get_set_state_exponential_statistics(ExponentialStatistics): 372 | random.seed(0) 373 | vals = [random.random() for _ in range(count)] 374 | exp_stats = ExponentialStatistics(iterable=vals) 375 | exp_state = exp_stats.get_state() 376 | 377 | new_exp_stats = ExponentialStatistics(0.8) 378 | assert exp_stats != new_exp_stats 379 | assert new_exp_stats.decay == 0.8 380 | new_exp_stats.set_state(exp_state) 381 | assert new_exp_stats.decay == 0.9 382 | assert exp_stats == new_exp_stats 383 | new_exp_stats.decay = 0.1 384 | assert exp_stats != new_exp_stats 385 | assert exp_stats.mean() == new_exp_stats.mean() 386 | assert exp_stats.variance() == new_exp_stats.variance() 387 | assert new_exp_stats.decay == 0.1 388 | 389 | assert exp_stats == ExponentialStatistics.fromstate(exp_stats.get_state()) 390 | 391 | 392 | @pytest.mark.parametrize( 393 | 'Statistics,Regression', 394 | [ 395 | (CoreStatistics, CoreRegression), 396 | (FastStatistics, FastRegression), 397 | ], 398 | ) 399 | def test_get_set_state_regression(Statistics, Regression): 400 | random.seed(0) 401 | tail = -10 402 | alpha, beta, rand = 5.0, 10.0, 20.0 403 | points = [ 404 | (xxx, alpha * xxx + beta + rand * (0.5 - random.random())) 405 | for xxx in range(count) 406 | ] 407 | 408 | regr = Regression(points[:tail]) 409 | state = regr.get_state() 410 | 411 | for xxx, yyy in points[tail:]: 412 | regr.push(xxx, yyy) 413 | 414 | new_regr = Regression() 415 | new_regr.set_state(state) 416 | 417 | for xxx, yyy in points[tail:]: 418 | new_regr.push(xxx, yyy) 419 | 420 | assert regr.slope() == new_regr.slope() 421 | assert regr.intercept() == new_regr.intercept() 422 | assert regr.correlation() == new_regr.correlation() 423 | 424 | assert regr == Regression.fromstate(regr.get_state()) 425 | 426 | 427 | @pytest.mark.parametrize( 428 | 'Statistics,Regression', 429 | [ 430 | (CoreStatistics, CoreRegression), 431 | (FastStatistics, FastRegression), 432 | ], 433 | ) 434 | def test_pickle_statistics(Statistics, Regression): 435 | stats = Statistics(range(10)) 436 | for num in range(pickle.HIGHEST_PROTOCOL): 437 | pickled_stats = pickle.dumps(stats, protocol=num) 438 | unpickled_stats = pickle.loads(pickled_stats) 439 | assert stats == unpickled_stats, 'protocol: %s' % num 440 | 441 | 442 | @pytest.mark.parametrize( 443 | 'ExponentialStatistics', 444 | [CoreExponentialStatistics, FastExponentialStatistics], 445 | ) 446 | def test_pickle_exponential_statistics(ExponentialStatistics): 447 | exp_stats = ExponentialStatistics(0.9, iterable=range(10)) 448 | for num in range(pickle.HIGHEST_PROTOCOL): 449 | pickled_exp_stats = pickle.dumps(exp_stats, protocol=num) 450 | unpickled_exp_stats = pickle.loads(pickled_exp_stats) 451 | assert exp_stats == unpickled_exp_stats, 'protocol: %s' % num 452 | 453 | 454 | @pytest.mark.parametrize( 455 | 'Statistics,Regression', 456 | [ 457 | (CoreStatistics, CoreRegression), 458 | (FastStatistics, FastRegression), 459 | ], 460 | ) 461 | def test_pickle_regression(Statistics, Regression): 462 | regr = Regression(enumerate(range(10))) 463 | for num in range(pickle.HIGHEST_PROTOCOL): 464 | pickled_regr = pickle.dumps(regr, protocol=num) 465 | unpickled_regr = pickle.loads(pickled_regr) 466 | assert regr == unpickled_regr, 'protocol: %s' % num 467 | 468 | 469 | @pytest.mark.parametrize( 470 | 'Statistics,Regression', 471 | [ 472 | (CoreStatistics, CoreRegression), 473 | (FastStatistics, FastRegression), 474 | ], 475 | ) 476 | def test_copy_statistics(Statistics, Regression): 477 | stats = Statistics(range(10)) 478 | copy_stats = copy.copy(stats) 479 | assert stats == copy_stats 480 | deepcopy_stats = copy.deepcopy(stats) 481 | assert stats == deepcopy_stats 482 | 483 | 484 | @pytest.mark.parametrize( 485 | 'ExponentialStatistics', 486 | [CoreExponentialStatistics, FastExponentialStatistics], 487 | ) 488 | def test_copy_exponential_statistics(ExponentialStatistics): 489 | exp_stats = ExponentialStatistics(0.9, iterable=range(10)) 490 | copy_exp_stats = copy.copy(exp_stats) 491 | assert exp_stats == copy_exp_stats 492 | deepcopy_exp_stats = copy.deepcopy(exp_stats) 493 | assert exp_stats == deepcopy_exp_stats 494 | 495 | 496 | @pytest.mark.parametrize( 497 | 'Statistics,Regression', 498 | [ 499 | (CoreStatistics, CoreRegression), 500 | (FastStatistics, FastRegression), 501 | ], 502 | ) 503 | def test_copy_regression(Statistics, Regression): 504 | regr = Regression(enumerate(range(10))) 505 | copy_regr = copy.copy(regr) 506 | assert regr == copy_regr 507 | deepcopy_regr = copy.deepcopy(regr) 508 | assert regr == deepcopy_regr 509 | 510 | 511 | @pytest.mark.parametrize( 512 | 'Statistics,Regression', 513 | [ 514 | (CoreStatistics, CoreRegression), 515 | (FastStatistics, FastRegression), 516 | ], 517 | ) 518 | def test_equality_statistics(Statistics, Regression): 519 | stats1 = Statistics(range(10)) 520 | stats2 = Statistics(range(10)) 521 | assert stats1 == stats2 522 | stats2.push(42) 523 | assert stats1 != stats2 524 | 525 | 526 | @pytest.mark.parametrize( 527 | 'ExponentialStatistics', 528 | [CoreExponentialStatistics, FastExponentialStatistics], 529 | ) 530 | def test_equality_exponential_statistics(ExponentialStatistics): 531 | exp_stats1 = ExponentialStatistics(0.9, iterable=range(10)) 532 | exp_stats2 = ExponentialStatistics(0.9, iterable=range(10)) 533 | assert exp_stats1 == exp_stats2 534 | exp_stats2.push(42) 535 | assert exp_stats1 != exp_stats2 536 | 537 | 538 | @pytest.mark.parametrize( 539 | 'Statistics,Regression', 540 | [ 541 | (CoreStatistics, CoreRegression), 542 | (FastStatistics, FastRegression), 543 | ], 544 | ) 545 | def test_equality_regression(Statistics, Regression): 546 | regr1 = Regression(enumerate(range(10))) 547 | regr2 = Regression(enumerate(range(10))) 548 | assert regr1 == regr2 549 | regr2.push(42, 42) 550 | assert regr1 != regr2 551 | 552 | 553 | @pytest.mark.parametrize( 554 | 'Statistics,Regression', 555 | [ 556 | (CoreStatistics, CoreRegression), 557 | (FastStatistics, FastRegression), 558 | ], 559 | ) 560 | def test_sum_stats_count0(Statistics, Regression): 561 | stats1 = Statistics() 562 | stats2 = Statistics() 563 | sumstats = stats1 + stats2 564 | assert len(sumstats) == 0 565 | 566 | 567 | @pytest.mark.parametrize( 568 | 'Statistics,Regression', 569 | [ 570 | (CoreStatistics, CoreRegression), 571 | (FastStatistics, FastRegression), 572 | ], 573 | ) 574 | def test_sum_regr_count0(Statistics, Regression): 575 | regr1 = Regression() 576 | regr2 = Regression() 577 | sumregr = regr1 + regr2 578 | assert len(sumregr) == 0 579 | 580 | 581 | @pytest.mark.parametrize( 582 | 'Statistics,Regression', 583 | [ 584 | (CoreStatistics, CoreRegression), 585 | (FastStatistics, FastRegression), 586 | ], 587 | ) 588 | def test_multiply(Statistics, Regression): 589 | stats1 = Statistics(range(10)) 590 | stats2 = Statistics(range(10)) * 2 591 | stats4 = 2 * stats2 592 | assert len(stats2) == 2 * len(stats1) 593 | assert len(stats4) == 2 * len(stats2) 594 | assert stats1.mean() == stats2.mean() 595 | assert stats1.mean() == stats4.mean() 596 | assert stats1.minimum() == stats2.minimum() 597 | assert stats1.maximum() == stats2.maximum() 598 | assert stats1.minimum() == stats4.minimum() 599 | assert stats1.maximum() == stats4.maximum() 600 | assert (stats1 + stats1).variance() == stats2.variance() 601 | assert (stats1 + stats1).kurtosis() == stats2.kurtosis() 602 | assert (stats1 + stats1).skewness() == stats2.skewness() 603 | assert (stats2 + stats2).variance() == stats4.variance() 604 | assert (stats2 + stats2).kurtosis() == stats4.kurtosis() 605 | assert (stats2 + stats2).skewness() == stats4.skewness() 606 | assert (2 * stats2) == stats4 607 | stats1 *= 4 608 | assert stats1 == stats4 609 | stats5 = math.e * stats1 610 | assert stats1.mean() == stats5.mean() 611 | with pytest.raises(TypeError): 612 | stats1 * object() 613 | with pytest.raises(TypeError): 614 | object() * stats1 615 | 616 | 617 | @pytest.mark.parametrize( 618 | 'ExponentialStatistics', 619 | [CoreExponentialStatistics, FastExponentialStatistics], 620 | ) 621 | def test_expoential_batch(ExponentialStatistics): 622 | random.seed(0) 623 | 624 | alpha = [random.random() for _ in range(count)] 625 | beta = [random.random() * 2 for _ in range(count)] 626 | 627 | alpha_exp_stats = ExponentialStatistics(0.1, iterable=alpha) 628 | beta_exp_stats = ExponentialStatistics(0.9, iterable=beta) 629 | 630 | gamma_exp_stats = alpha_exp_stats * 0.3 + beta_exp_stats * 0.7 631 | 632 | weighted_mean = alpha_exp_stats.mean() * 0.3 + beta_exp_stats.mean() * 0.7 633 | assert weighted_mean == gamma_exp_stats.mean() 634 | 635 | weighted_var = ( 636 | alpha_exp_stats.variance() * 0.3 + beta_exp_stats.variance() * 0.7 637 | ) 638 | assert weighted_var == gamma_exp_stats.variance() 639 | assert alpha_exp_stats._decay == gamma_exp_stats._decay 640 | assert beta_exp_stats._decay != gamma_exp_stats._decay 641 | --------------------------------------------------------------------------------