├── .github └── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── .gitignore ├── .travis.yml ├── .whitesource ├── CODE_OF_CONDUCT.md ├── DEPLOYMENT.md ├── LICENSE ├── MANIFEST.in ├── README.rst ├── docs ├── Algorithms.ipynb ├── Makefile ├── Quickstart.ipynb ├── Releases.md ├── _static │ └── css │ │ └── custom.css ├── api.rst ├── citations.rst ├── code_of_conduct.rst ├── conf.py ├── contributing.rst ├── deploy_docs.sh ├── examples.rst ├── examples │ ├── ECG_Annotation_Vectors.ipynb │ ├── Hierarchical_Clustering_Accelerometer_Walk_Stand_etc.ipynb │ ├── NYC_Taxis.ipynb │ └── Plot_Discords_MP.ipynb ├── help.rst ├── images │ ├── datamp.png │ ├── datampanom.png │ └── mpf-logo.png ├── index.rst ├── install.rst ├── installation │ ├── Linux_Installation.ipynb │ ├── Mac_OS_Installation.ipynb │ ├── Windows_Installation.ipynb │ ├── install_microsoft_anaconda.jpeg │ ├── install_microsoft_cpp.jpeg │ └── install_microsoft_download_build_tools.jpeg └── make.bat ├── matrixprofile ├── __init__.py ├── algorithms │ ├── __init__.py │ ├── cympx.pyx │ ├── hierarchical_clustering.py │ ├── mass2.py │ ├── mpdist.py │ ├── mpx.py │ ├── mstomp.py │ ├── pairwise_dist.py │ ├── regimes.py │ ├── scrimp.py │ ├── skimp.py │ ├── snippets.py │ ├── statistics.py │ ├── stomp.py │ ├── top_k_discords.py │ └── top_k_motifs.py ├── analyze.py ├── compute.py ├── core.py ├── cycore.pyx ├── datasets │ ├── __init__.py │ └── datasets.py ├── discover.py ├── exceptions.py ├── io │ ├── __init__.py │ ├── __io.py │ └── protobuf │ │ ├── README.md │ │ ├── __init__.py │ │ ├── proto_messages.proto │ │ ├── proto_messages_pb2.py │ │ └── protobuf_utils.py ├── preprocess.py ├── transform.py ├── utils.py └── visualize.py ├── python2-requirements.txt ├── requirements.txt ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── mpx_ab_mpa.txt ├── mpx_ab_mpb.txt ├── mpx_ab_mpia.txt ├── mpx_ab_mpib.txt ├── mpx_mp.txt ├── mpx_mpi.txt ├── muinvn_mua.txt ├── muinvn_stda.txt ├── sampledata.txt ├── scrimp.mp.txt ├── scrimp.mpi.txt ├── test_analyze.py ├── test_annotation_vector.py ├── test_compute.py ├── test_core.py ├── test_cycore.py ├── test_datasets.py ├── test_hierarchical_clustering.py ├── test_io.py ├── test_mass2.py ├── test_mpdist.py ├── test_mpx.py ├── test_mstomp.py ├── test_pairwise_dist.py ├── test_preprocess.py ├── test_regimes.py ├── test_scrimp.py ├── test_skimp.py ├── test_snippets.py ├── test_statistics.py ├── test_stomp.py ├── test_top_k_discords.py ├── test_top_k_motifs.py ├── test_utils.py └── test_visualize.py └── version.py /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Version [e.g. 22] 29 | 30 | **Additional context** 31 | Add any other context about the problem here. 32 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.pyo 3 | __pycache__ 4 | *.so 5 | *.o 6 | *.c 7 | 8 | *.egg 9 | *.egg-info 10 | .*cache*/ 11 | *venv*/ 12 | 13 | Cython/Compiler/*.c 14 | Cython/Plex/*.c 15 | Cython/Runtime/refnanny.c 16 | Cython/Tempita/*.c 17 | Cython/*.c 18 | Cython/*.html 19 | Cython/*/*.html 20 | 21 | Tools/*.elc 22 | Demos/*.html 23 | Demos/*/*.html 24 | 25 | /TEST_TMP/ 26 | /build/ 27 | /wheelhouse*/ 28 | !tests/build/ 29 | /dist/ 30 | .gitrev 31 | .coverage 32 | *.patch 33 | *.diff 34 | *.orig 35 | *.prof 36 | *.rej 37 | *.log 38 | *.dep 39 | *.swp 40 | *~ 41 | callgrind.out.* 42 | 43 | .ipynb_checkpoints 44 | docs/build 45 | 46 | tags 47 | TAGS 48 | MANIFEST 49 | 50 | .tox 51 | 52 | # Jetbrains IDE project files 53 | /.idea 54 | /*.iml 55 | .DS_Store 56 | 57 | # ignore generated version file 58 | matrixprofile/version.py 59 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # In this configuration, the package is tested against multiple versions of python 2 | # on Linux and on one version of Python on Windows. If and only if all tests pass 3 | # will the wheels be built and deployed. Further, deployment is only initiated if 4 | # the current branch is "master", the current commit is tagged, and the current 5 | # repo is yours (e.g. it won't run on a Pull Request). For convenience, a source 6 | # distribution is also created. 7 | 8 | language: python 9 | python: 10 | # - 2.7 11 | - 3.5 12 | - 3.6 13 | - 3.7 14 | - 3.8 15 | - 3.9 16 | 17 | before_install: 18 | - | 19 | if [[ "$TRAVIS_OS_NAME" = windows ]]; then 20 | choco install python --version 3.8.6 21 | export PATH="/c/Python38:/c/Python38/Scripts:$PATH" 22 | fi 23 | - | 24 | if [[ "$TRAVIS_OS_NAME" = osx ]]; then 25 | export PATH="/usr/local/bin:$PATH" 26 | if [ ! -f "/usr/local/bin/gcc-9" ]; then 27 | brew install gcc@9 28 | fi 29 | 30 | brew install libomp 31 | ln -s /usr/local/bin/gcc-9 /usr/local/bin/gcc 32 | export CC="/usr/local/bin/gcc" 33 | fi 34 | 35 | install: 36 | - pip install pytest 37 | 38 | script: 39 | - if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then pip install -r python2-requirements.txt; else pip install -r requirements.txt; fi 40 | - pip install -e . 41 | - pytest tests/ 42 | 43 | stages: 44 | - test 45 | - codecov 46 | # Only execute deployment stage on master branch, tagged commits, 47 | # and from your repository (e.g. not PRs). Replace with your repo name. 48 | - name: deploy 49 | if: tag IS present 50 | 51 | jobs: 52 | include: 53 | # # Optional: run a test on Windows 54 | - os: windows 55 | language: shell 56 | name: Test on Windows 57 | 58 | # # Code coverage 59 | - stage: codecov 60 | name: Add code coverage stats 61 | services: docker 62 | python: 3.6.2 63 | install: 64 | - pip install codecov 65 | - pip install pytest-cov 66 | - pip install -e . 67 | script: 68 | - pytest --cov=matrixprofile tests/ 69 | - codecov 70 | 71 | # # Deploy on linux 72 | - stage: deploy 73 | name: Build and deploy Linux wheels 74 | services: docker 75 | python: 3.8 76 | install: 77 | - pip install twine 78 | - pip install cython 79 | - pip install cibuildwheel==1.7.2 80 | script: 81 | - cibuildwheel --output-dir wheelhouse 82 | - twine upload wheelhouse/*.whl --verbose --skip-existing 83 | - python setup.py sdist --formats=gztar 84 | - twine upload dist/*.tar.gz --verbose --skip-existing 85 | after_success: 86 | - ls wheelhouse/*.whl 87 | # Deploy on mac 88 | - stage: deploy 89 | name: Build and deploy macOS wheels 90 | os: osx 91 | language: shell 92 | install: 93 | - python3 -m pip install twine 94 | - python3 -m pip install cython 95 | - python3 -m pip install cibuildwheel==1.7.2 96 | script: 97 | - python3 -m cibuildwheel --output-dir wheelhouse 98 | - twine upload wheelhouse/*.whl --verbose --skip-existing 99 | after_success: 100 | - ls wheelhouse/*.whl 101 | # Deploy on windows 102 | - stage: deploy 103 | name: Build and deploy Windows wheels 104 | os: windows 105 | language: shell 106 | install: 107 | - pip install twine 108 | - pip install cython 109 | - pip install cibuildwheel==1.7.2 110 | script: 111 | - cibuildwheel --output-dir wheelhouse 112 | - twine upload wheelhouse/*.whl --verbose --skip-existing 113 | after_success: 114 | - ls wheelhouse/*.whl 115 | 116 | env: 117 | global: 118 | - TWINE_USERNAME=matrixprofilefoundation 119 | - MPLBACKEND=agg 120 | # Note: TWINE_PASSWORD is set in Travis settings 121 | 122 | # skip 32 bit binaries and pypy builds 123 | - CIBW_SKIP="*-win32 *-manylinux_i686 cp27-manylinux_* pp* cp27-macosx_*" 124 | - MACOSX_DEPLOYMENT_TARGET="10.12" 125 | -------------------------------------------------------------------------------- /.whitesource: -------------------------------------------------------------------------------- 1 | { 2 | "checkRunSettings": { 3 | "vulnerableCheckRunConclusionLevel": "failure" 4 | }, 5 | "issueSettings": { 6 | "minSeverityLevel": "LOW" 7 | } 8 | } -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | Code of Conduct 2 | --------------- 3 | Please find our Code of Conduct documentation [here.](http://matrixprofile.docs.matrixprofile.org/code_of_conduct.html) 4 | -------------------------------------------------------------------------------- /DEPLOYMENT.md: -------------------------------------------------------------------------------- 1 | # Deployment Instructions 2 | 3 | ## Source Code 4 | The matrixprofile library consists a multi-stage TravisCI build. The steps to create a release are as follows: 5 | 6 | 1. Update the version.py 7 | 2. Update the docs/Releases.md 8 | 3. Push the commit with the release to master. 9 | 10 | ``` 11 | git commit version.py docs/Releases.md -m "release vX.X.X" 12 | git push origin master 13 | ``` 14 | 15 | 4. Now we create a git tag that triggers a TravisCI build. 16 | 17 | ``` 18 | git tag -a 'vX.X.X' -m 'release vX.X.X' 19 | git push --tags 20 | ``` 21 | 22 | ## API Documentation 23 | The API documentation is hosted on Github pages. A bash script exists to "deploy" the code. 24 | 25 | ``` 26 | bash docs/deploy_docs.sh 27 | ``` 28 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include version.py 2 | recursive-include . *.pyx *.proto *.txt *.csv *.json 3 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. image:: https://matrixprofile.org/static/img/mpf-logo.png 2 | :target: https://matrixprofile.org 3 | :height: 300px 4 | :scale: 50% 5 | :alt: MPF Logo 6 | | 7 | | 8 | .. image:: https://img.shields.io/pypi/v/matrixprofile.svg 9 | :target: https://pypi.org/project/matrixprofile/ 10 | :alt: PyPI Version 11 | .. image:: https://pepy.tech/badge/matrixprofile 12 | :target: https://pepy.tech/project/matrixprofile 13 | :alt: PyPI Downloads 14 | .. image:: https://img.shields.io/conda/vn/conda-forge/matrixprofile.svg 15 | :target: https://anaconda.org/conda-forge/matrixprofile 16 | :alt: Conda Version 17 | .. image:: https://img.shields.io/conda/dn/conda-forge/matrixprofile.svg 18 | :target: https://anaconda.org/conda-forge/matrixprofile 19 | :alt: Conda Downloads 20 | .. image:: https://codecov.io/gh/matrix-profile-foundation/matrixprofile/branch/master/graph/badge.svg 21 | :target: https://codecov.io/gh/matrix-profile-foundation/matrixprofile 22 | :alt: Code Coverage 23 | .. image:: https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/matrixprofile-feedstock?branchName=master 24 | :target: https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=11637&branchName=master 25 | :alt: Azure Pipelines 26 | .. image:: https://api.travis-ci.com/matrix-profile-foundation/matrixprofile.svg?branch=master 27 | :target: https://travis-ci.com/matrix-profile-foundation/matrixprofile 28 | :alt: Build Status 29 | .. image:: https://img.shields.io/conda/pn/conda-forge/matrixprofile.svg 30 | :target: https://anaconda.org/conda-forge/matrixprofile 31 | :alt: Platforms 32 | .. image:: https://img.shields.io/badge/License-Apache%202.0-blue.svg 33 | :target: https://opensource.org/licenses/Apache-2.0 34 | :alt: License 35 | .. image:: https://img.shields.io/twitter/follow/matrixprofile.svg?style=social 36 | :target: https://twitter.com/matrixprofile 37 | :alt: Twitter 38 | .. image:: https://img.shields.io/discord/589321741277462559?logo=discord 39 | :target: https://discordapp.com/invite/sBhDNXT 40 | :alt: Discord 41 | .. image:: https://joss.theoj.org/papers/10.21105/joss.02179/status.svg 42 | :target: https://doi.org/10.21105/joss.02179 43 | :alt: JOSSDOI 44 | .. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.3789780.svg 45 | :target: https://doi.org/10.5281/zenodo.3789780 46 | :alt: ZenodoDOI 47 | 48 | MatrixProfile 49 | ---------------- 50 | NOTE: THIS LIBRARY IS NOT ACTIVELY SUPPORTED. PLEASE CHECK OUT THE TD AMERITRADE STUMPY LIBRARY INSTEAD: https://github.com/TDAmeritrade/stumpyhttps://github.com/TDAmeritrade/stumpy 51 | 52 | MatrixProfile is a Python 3 library, brought to you by the `Matrix Profile Foundation `_, for mining time series data. The Matrix Profile is a novel data structure with corresponding algorithms (stomp, regimes, motifs, etc.) developed by the `Keogh `_ and `Mueen `_ research groups at UC-Riverside and the University of New Mexico. The goal of this library is to make these algorithms accessible to both the novice and expert through standardization of core concepts, a simplistic API, and sensible default parameter values. 53 | 54 | In addition to this Python library, the Matrix Profile Foundation, provides implementations in other languages. These languages have a pretty consistent API allowing you to easily switch between them without a huge learning curve. 55 | 56 | * `tsmp `_ - an R implementation 57 | * `go-matrixprofile `_ - a Golang implementation 58 | 59 | Python Support 60 | ---------------- 61 | Currently, we support the following versions of Python: 62 | 63 | * 3.5 64 | * 3.6 65 | * 3.7 66 | * 3.8 67 | * 3.9 68 | 69 | Python 2 is no longer supported. There are earlier versions of this library that support Python 2. 70 | 71 | Installation 72 | ------------ 73 | The easiest way to install this library is using pip or conda. If you would like to install it from source, please review the `installation documentation `_ for your platform. 74 | 75 | Installation with pip 76 | 77 | .. code-block:: bash 78 | 79 | pip install matrixprofile 80 | 81 | Installation with conda 82 | 83 | .. code-block:: bash 84 | 85 | conda config --add channels conda-forge 86 | conda install matrixprofile 87 | 88 | Getting Started 89 | --------------- 90 | This article provides introductory material on the Matrix Profile: 91 | `Introduction to Matrix Profiles `_ 92 | 93 | 94 | This article provides details about core concepts introduced in this library: 95 | `How To Painlessly Analyze Your Time Series `_ 96 | 97 | Our documentation provides a `quick start guide `_, `examples `_ and `api `_ documentation. It is the source of truth for getting up and running. 98 | 99 | Algorithms 100 | ---------- 101 | For details about the algorithms implemented, including performance characteristics, please refer to the `documentation `_. 102 | 103 | ------------ 104 | Getting Help 105 | ------------ 106 | We provide a dedicated `Discord channel `_ where practitioners can discuss applications and ask questions about the Matrix Profile Foundation libraries. If you rather not join Discord, then please open a `Github issue `_. 107 | 108 | ------------ 109 | Contributing 110 | ------------ 111 | Please review the `contributing guidelines `_ located in our documentation. 112 | 113 | --------------- 114 | Code of Conduct 115 | --------------- 116 | Please review our `Code of Conduct documentation `_. 117 | 118 | --------- 119 | Citations 120 | --------- 121 | All proper acknowledgements for works of others may be found in our `citation documentation `_. 122 | 123 | ------ 124 | Citing 125 | ------ 126 | Please cite this work using the `Journal of Open Source Software article `_. 127 | 128 | Van Benschoten et al., (2020). MPA: a novel cross-language API for time series analysis. Journal of Open Source Software, 5(49), 2179, https://doi.org/10.21105/joss.02179 129 | 130 | .. code:: bibtex 131 | 132 | @article{Van Benschoten2020, 133 | doi = {10.21105/joss.02179}, 134 | url = {https://doi.org/10.21105/joss.02179}, 135 | year = {2020}, 136 | publisher = {The Open Journal}, 137 | volume = {5}, 138 | number = {49}, 139 | pages = {2179}, 140 | author = {Andrew Van Benschoten and Austin Ouyang and Francisco Bischoff and Tyler Marrs}, 141 | title = {MPA: a novel cross-language API for time series analysis}, 142 | journal = {Journal of Open Source Software} 143 | } 144 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = . 8 | BUILDDIR = build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/Releases.md: -------------------------------------------------------------------------------- 1 | # 1.0.0 (2020-03-01) 2 | - First major release of core concepts - compute, visualize, analyze and discover 3 | 4 | # 1.0.1 (2020-03-03) 5 | - Fix issues with MacOS binary build 6 | 7 | # 1.0.2 (2020-04-14) 8 | - Improved API documentation 9 | - Better cross-platform binary wheel compile options 10 | 11 | # 1.1.0 (2020-04-23) 12 | - Annotation Vectors (Feature) thanks to [@Nikita Smyrnov](https://github.com/nikita-smyrnov) [@StateFarmIns](https://github.com/StateFarmIns) 13 | - Hierarchical Clustering with MPDist (Feature) 14 | 15 | # 1.1.1 (2020-05-05) 16 | - Minor bug and dependency fixes 17 | 18 | # 1.1.2 (2020-05-17) 19 | - Minor bug fixes 20 | - Minor optimizations 21 | 22 | # 1.1.3 (2020-07-22) 23 | - Minor code refactoring 24 | 25 | # 1.1.4 (2020-08-30) 26 | - Enhanced snippets support and visualization thanks to [@Yue Lue](https://github.com/LuYueee) 27 | - Updated exclusion zone calculation thanks to [@Llewellyn Strydom](https://github.com/LlewellynS96) 28 | - Better handling of subsequence lower bounds thanks to [Kaminwar Sai Rahul](https://github.com/KSaiRahul21) 29 | 30 | # 1.1.6 (2020-12-23) 31 | - Added data preprocessing module thanks to [@Yue Lue](https://github.com/LuYueee) 32 | - Snippet visualizations thanks to [@Yue Lue](https://github.com/LuYueee) 33 | - Improved API Docs thanks to [@earthgecko](https://github.com/earthgecko) and [@Ninimama](https://github.com/Ninimama) 34 | - Improved default arguments thanks to [@Bruno-Hanzen](https://github.com/Bruno-Hanzen) 35 | - Python 3.9 support thanks to [@Immentel](https://github.com/Immentel) 36 | - Source distribution thanks to [@Immentel](https://github.com/Immentel) 37 | - Improved algorithm stability thanks to [@kavj](https://github.com/kavj) 38 | 39 | # 1.1.7 (2021-01-14) 40 | - Minor updates to deployment process 41 | - Switched the no window found warning to an explicit exception 42 | 43 | # 1.1.8 (2021-01-15) 44 | - Minor updates to deployment process 45 | 46 | # 1.1.9 (2021-01-16) 47 | - Fix the source distribution release to include all necessary files 48 | 49 | # 1.1.10 (2021-01-16) 50 | - Adjust absolute paths to relative - deveops building wheels 51 | -------------------------------------------------------------------------------- /docs/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | .wy-nav-content { 2 | max-width: 100%; 3 | } -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | === 2 | API 3 | === 4 | 5 | matrixprofile.analyze 6 | ===================== 7 | 8 | .. autofunction:: matrixprofile.analyze 9 | 10 | matrixprofile.compute 11 | ===================== 12 | 13 | .. autofunction:: matrixprofile.compute 14 | 15 | matrixprofile.visualize 16 | ======================= 17 | 18 | .. autofunction:: matrixprofile.visualize 19 | 20 | matrixprofile.preprocess.preprocess 21 | =================================== 22 | 23 | .. autofunction:: matrixprofile.preprocess.preprocess 24 | 25 | matrixprofile.preprocess.impute_missing 26 | ======================================= 27 | 28 | .. autofunction:: matrixprofile.preprocess.impute_missing 29 | 30 | matrixprofile.preprocess.add_noise_to_series 31 | ============================================ 32 | 33 | .. autofunction:: matrixprofile.preprocess.add_noise_to_series 34 | 35 | matrixprofile.discover.motifs 36 | ============================= 37 | 38 | .. autofunction:: matrixprofile.discover.motifs 39 | 40 | matrixprofile.discover.discords 41 | =============================== 42 | 43 | .. autofunction:: matrixprofile.discover.discords 44 | 45 | matrixprofile.discover.snippets 46 | =============================== 47 | 48 | .. autofunction:: matrixprofile.discover.snippets 49 | 50 | matrixprofile.discover.regimes 51 | ============================== 52 | 53 | .. autofunction:: matrixprofile.discover.regimes 54 | 55 | matrixprofile.discover.statistics 56 | ================================= 57 | 58 | .. autofunction:: matrixprofile.discover.statistics 59 | 60 | matrixprofile.discover.hierarchical_clusters 61 | ============================================ 62 | 63 | .. autofunction:: matrixprofile.discover.hierarchical_clusters 64 | 65 | matrixprofile.algorithms.stomp 66 | ============================== 67 | 68 | .. autofunction:: matrixprofile.algorithms.stomp 69 | 70 | matrixprofile.algorithms.mpx 71 | ============================ 72 | 73 | .. autofunction:: matrixprofile.algorithms.mpx 74 | 75 | matrixprofile.algorithms.skimp 76 | ============================== 77 | 78 | .. autofunction:: matrixprofile.algorithms.skimp 79 | 80 | matrixprofile.algorithms.mass2 81 | ============================== 82 | 83 | .. autofunction:: matrixprofile.algorithms.mass2 84 | 85 | matrixprofile.algorithms.mpdist 86 | =============================== 87 | 88 | .. autofunction:: matrixprofile.algorithms.mpdist 89 | 90 | matrixprofile.algorithms.pairwise_dist 91 | ====================================== 92 | 93 | .. autofunction:: matrixprofile.algorithms.pairwise_dist 94 | 95 | matrixprofile.algorithms.maximum_subsequence 96 | ============================================ 97 | 98 | .. autofunction:: matrixprofile.algorithms.maximum_subsequence 99 | 100 | matrixprofile.algorithms.prescrimp 101 | ================================== 102 | 103 | .. autofunction:: matrixprofile.algorithms.prescrimp 104 | 105 | matrixprofile.algorithms.scrimp_plus_plus 106 | ========================================= 107 | 108 | .. autofunction:: matrixprofile.algorithms.scrimp_plus_plus 109 | 110 | matrixprofile.transform.apply_av 111 | ============================ 112 | 113 | .. autofunction:: matrixprofile.transform.apply_av 114 | 115 | matrixprofile.transform.make_default_av 116 | ============================ 117 | 118 | .. autofunction:: matrixprofile.transform.make_default_av 119 | 120 | matrixprofile.transform.make_complexity_av 121 | ============================ 122 | 123 | .. autofunction:: matrixprofile.transform.make_complexity_av 124 | 125 | matrixprofile.transform.make_meanstd_av 126 | ============================ 127 | 128 | .. autofunction:: matrixprofile.transform.make_meanstd_av 129 | 130 | matrixprofile.transform.make_clipping_av 131 | ============================ 132 | 133 | .. autofunction:: matrixprofile.transform.make_clipping_av 134 | 135 | matrixprofile.utils.empty_mp 136 | ============================ 137 | 138 | .. autofunction:: matrixprofile.utils.empty_mp 139 | 140 | matrixprofile.utils.pick_mp 141 | =========================== 142 | 143 | .. autofunction:: matrixprofile.utils.pick_mp 144 | 145 | matrixprofile.io.to_disk 146 | ======================== 147 | 148 | .. autofunction:: matrixprofile.io.to_disk 149 | 150 | matrixprofile.io.from_disk 151 | ========================== 152 | 153 | .. autofunction:: matrixprofile.io.from_disk 154 | 155 | matrixprofile.io.to_json 156 | ======================== 157 | 158 | .. autofunction:: matrixprofile.io.to_json 159 | 160 | matrixprofile.io.from_json 161 | ========================== 162 | 163 | .. autofunction:: matrixprofile.io.from_json 164 | 165 | matrixprofile.io.to_mpf 166 | ======================== 167 | 168 | .. autofunction:: matrixprofile.io.to_mpf 169 | 170 | matrixprofile.io.from_mpf 171 | ========================== 172 | 173 | .. autofunction:: matrixprofile.io.from_mpf 174 | 175 | matrixprofile.datasets.fetch_available 176 | ====================================== 177 | 178 | .. autofunction:: matrixprofile.datasets.fetch_available 179 | 180 | matrixprofile.datasets.load 181 | ================================ 182 | 183 | .. autofunction:: matrixprofile.datasets.load 184 | -------------------------------------------------------------------------------- /docs/citations.rst: -------------------------------------------------------------------------------- 1 | --------- 2 | Citations 3 | --------- 4 | Abdullah Mueen, Yan Zhu, Michael Yeh, Kaveh Kamgar, Krishnamurthy Viswanathan, Chetan Kumar Gupta and Eamonn Keogh (2015), The Fastest Similarity Search Algorithm for Time Series Subsequences under Euclidean Distance, URL: http://www.cs.unm.edu/~mueen/FastestSimilaritySearch.html. 5 | 6 | Matrix Profile I: All Pairs Similarity Joins for Time Series: A Unifying View that Includes Motifs, Discords and Shapelets. Chin-Chia Michael Yeh, Yan Zhu, Liudmila Ulanova, Nurjahan Begum, Yifei Ding, Hoang Anh Dau, Diego Furtado Silva, Abdullah Mueen, Eamonn Keogh (2016). IEEE ICDM 2016. 7 | 8 | Matrix Profile II: Exploiting a Novel Algorithm and GPUs to break the one Hundred Million Barrier for Time Series Motifs and Joins. Yan Zhu, Zachary Zimmerman, Nader Shakibay Senobari, Chin-Chia Michael Yeh, Gareth Funning, Abdullah Mueen, Philip Berisk and Eamonn Keogh (2016). EEE ICDM 2016. 9 | 10 | Matrix Profile V: A Generic Technique to Incorporate Domain Knowledge into Motif Discovery. Hoang Anh Dau and Eamonn Keogh. KDD'17, Halifax, Canada. 11 | 12 | Matrix Profile VIII: Domain Agnostic Online Semantic Segmentation at Superhuman Performance Levels. Shaghayegh Gharghabi, Yifei Ding, Chin-Chia Michael Yeh, Kaveh Kamgar, Liudmila Ulanova, and Eamonn Keogh. ICDM 2017. 13 | 14 | Matrix Profile XI: SCRIMP++: Time Series Motif Discovery at Interactive Speed. Yan Zhu, Chin-Chia Michael Yeh, Zachary Zimmerman, Kaveh Kamgar and Eamonn Keogh, ICDM 2018. 15 | 16 | Matrix Profile XII: MPdist: A Novel Time Series Distance Measure to Allow Data Mining in More Challenging Scenarios. Shaghayegh Gharghabi, Shima Imani, Anthony Bagnall, Amirali Darvishzadeh, Eamonn Keogh. ICDM 2018. 17 | 18 | Matrix Profile XIII: Time Series Snippets: A New Primitive for Time Series Data Mining. Shima Imani, Frank Madrid, Wei Ding, Scott Crouter, Eamonn Keogh. IEEE Big Knowledge 2018. 19 | 20 | Matrix Profile XX: Finding and Visualizing Time Series Motifs of All Lengths using the Matrix Profile. Frank Madrid, Shima Imani, Ryan Mercer, Zacharay Zimmerman, Nader Shakibay, Eamonn Keogh. IEEE Big Knowledge 2019. 21 | -------------------------------------------------------------------------------- /docs/code_of_conduct.rst: -------------------------------------------------------------------------------- 1 | Code of Conduct 2 | ========================================= 3 | 4 | The Matrix Profile Foundation (MPF) community is made up of members from around the globe with a 5 | diverse set of skills, personalities, and experiences. It is through 6 | these differences that our community experiences success and continued 7 | growth. We expect everyone in our community to follow these guidelines 8 | when interacting with others both inside and outside of our community. 9 | Our goal is to keep ours a positive, inclusive, successful, and growing 10 | community. 11 | 12 | A member of the MPF community is: 13 | 14 | Open 15 | ---- 16 | 17 | Members of the community are open to collaboration, whether on patches, 18 | reporting issues, asking for help or otherwise. We welcome those 19 | interested in joining the community, and realize that including people 20 | with a variety of opinions and backgrounds will only serve to enrich our 21 | community. 22 | 23 | We are accepting of all who wish to take part in our activities, 24 | fostering an environment where anyone can participate and everyone can 25 | make a difference, ensuring that all participants are heard and feel 26 | confident that they can freely express their opinions. 27 | 28 | Considerate 29 | ----------- 30 | 31 | Members of the community are considerate of their peers -- other 32 | developers, users, etc. We are thoughtful when addressing the efforts of 33 | others, keeping in mind that often the labor was completed simply for 34 | the good of the community. We are attentive in our communications, 35 | whether in person or online, and we're tactful when approaching 36 | differing views. 37 | 38 | We recognize the work made by everyone and ensure the proper 39 | acknowledgement/citation of original authors at all times. As authors, 40 | we pledge to be explicit about how we want our own work to be cited or 41 | acknowledged. 42 | 43 | Respectful 44 | ---------- 45 | 46 | Members of the community are respectful. We are respectful of others, 47 | their positions, their skills, their commitments, and their efforts. We 48 | are respectful of the volunteer and professional efforts within the 49 | community. We are respectful of the processes set forth in the 50 | community, and we work within them (paying particular attention to those 51 | new to the community). 52 | 53 | Behavior Examples 54 | ----------------- 55 | 56 | Examples of encouraged behavior include: 57 | 58 | - Using welcoming and inclusive language 59 | - Being respectful of differing viewpoints and experiences 60 | - Gracefully accepting constructive criticism 61 | - Focusing on what is best for the community 62 | - Showing empathy towards other community members 63 | 64 | Examples of unacceptable behavior include: 65 | 66 | - The use of sexualized language or imagery and unwelcome sexual attention or advances 67 | - Trolling, insulting/derogatory comments, and personal or political attacks 68 | - Public or private harassment 69 | - Publishing others' private information, such as a physical or electronic address, without explicit permission 70 | - Other conduct which could reasonably be considered inappropriate in a professional setting 71 | 72 | MPF Leadership Pledge & Responsibilities 73 | ---------------------------------------- 74 | 75 | We pledge to help the entire community follow the code of conduct, and 76 | to not remain silent when we see violations of the code of conduct. We 77 | will take action when members of our community violate this code and ask 78 | that such violations be reported to mpf.conduct@gmail.com (all emails 79 | sent to this address will be treated with the strictest confidence) or 80 | talking privately. 81 | 82 | We are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 83 | 84 | We have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 85 | 86 | CoC Scope 87 | --------- 88 | 89 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 90 | 91 | Attribution 92 | ----------- 93 | 94 | Parts of this Code of Conduct have been adapted from the the `sunpy Code of Conduct `_. 95 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import os 16 | import sys 17 | sys.path.insert(0, os.path.abspath('..')) 18 | 19 | import version as mod_version 20 | 21 | 22 | # -- Project information ----------------------------------------------------- 23 | 24 | project = 'matrixprofile' 25 | copyright = '2020, Matrix Profile Foundation' 26 | author = 'Matrix Profile Foundation' 27 | 28 | # The short X.Y version 29 | version = mod_version.__version__ 30 | # The full version, including alpha/beta/rc tags 31 | release = mod_version.__version__ 32 | 33 | 34 | # -- General configuration --------------------------------------------------- 35 | 36 | # If your documentation needs a minimal Sphinx version, state it here. 37 | # 38 | # needs_sphinx = '1.0' 39 | 40 | # Add any Sphinx extension module names here, as strings. They can be 41 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 42 | # ones. 43 | extensions = [ 44 | 'sphinx.ext.autodoc', 45 | 'sphinx.ext.mathjax', 46 | 'sphinx.ext.ifconfig', 47 | 'sphinx.ext.viewcode', 48 | 'sphinx.ext.githubpages', 49 | 'sphinx.ext.napoleon', 50 | 'nbsphinx', 51 | ] 52 | 53 | # Napoleon settings (see https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html#configuration) # noqa: E501 54 | napoleon_numpy_docstring = True 55 | 56 | autosummary_generate = True 57 | 58 | # Add any paths that contain templates here, relative to this directory. 59 | templates_path = ['_templates'] 60 | 61 | # The suffix(es) of source filenames. 62 | # You can specify multiple suffix as a list of string: 63 | # 64 | # source_suffix = ['.rst', '.md'] 65 | source_suffix = '.rst' 66 | 67 | # The master toctree document. 68 | master_doc = 'index' 69 | 70 | # The language for content autogenerated by Sphinx. Refer to documentation 71 | # for a list of supported languages. 72 | # 73 | # This is also used if you do content translation via gettext catalogs. 74 | # Usually you set "language" from the command line for these cases. 75 | language = None 76 | 77 | # List of patterns, relative to source directory, that match files and 78 | # directories to ignore when looking for source files. 79 | # This pattern also affects html_static_path and html_extra_path. 80 | exclude_patterns = [ 81 | "_build", 82 | "Thumbs.db", 83 | ".DS_Store", 84 | "installation/*.ipynb_checkpoints/*", 85 | ".ipynb_checkpoints/*" 86 | ] 87 | 88 | # The name of the Pygments (syntax highlighting) style to use. 89 | pygments_style = None 90 | 91 | 92 | # -- Options for HTML output ------------------------------------------------- 93 | 94 | # The theme to use for HTML and HTML Help pages. See the documentation for 95 | # a list of builtin themes. 96 | # 97 | html_theme = 'sphinx_rtd_theme' 98 | 99 | # Theme options are theme-specific and customize the look and feel of a theme 100 | # further. For a list of options available for each theme, see the 101 | # documentation. 102 | # 103 | html_theme_options = { 104 | 'logo_only': False, 105 | 'display_version': True, 106 | 'style_nav_header_background': 'white', 107 | } 108 | 109 | html_logo = 'images/mpf-logo.png' 110 | html_title = 'MatrixProfile' 111 | 112 | # Add any paths that contain custom static files (such as style sheets) here, 113 | # relative to this directory. They are copied after the builtin static files, 114 | # so a file named "default.css" will overwrite the builtin "default.css". 115 | html_static_path = ['_static'] 116 | 117 | # Custom sidebar templates, must be a dictionary that maps document names 118 | # to template names. 119 | # 120 | # The default sidebars (for documents that don't match any pattern) are 121 | # defined by theme itself. Builtin themes are using these templates by 122 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 123 | # 'searchbox.html']``. 124 | # 125 | # html_sidebars = {} 126 | html_css_files = ["css/custom.css"] 127 | 128 | # -- Options for HTMLHelp output --------------------------------------------- 129 | 130 | # Output file base name for HTML help builder. 131 | htmlhelp_basename = 'matrixprofiledoc' 132 | 133 | 134 | # -- Options for LaTeX output ------------------------------------------------ 135 | 136 | latex_elements = { 137 | # The paper size ('letterpaper' or 'a4paper'). 138 | # 139 | # 'papersize': 'letterpaper', 140 | 141 | # The font size ('10pt', '11pt' or '12pt'). 142 | # 143 | # 'pointsize': '10pt', 144 | 145 | # Additional stuff for the LaTeX preamble. 146 | # 147 | # 'preamble': '', 148 | 149 | # Latex figure (float) alignment 150 | # 151 | # 'figure_align': 'htbp', 152 | } 153 | 154 | # Grouping the document tree into LaTeX files. List of tuples 155 | # (source start file, target name, title, 156 | # author, documentclass [howto, manual, or own class]). 157 | latex_documents = [ 158 | (master_doc, 'matrixprofile.tex', 'matrixprofile Documentation', 159 | 'Matrix Profile Foundation', 'manual'), 160 | ] 161 | 162 | 163 | # -- Options for manual page output ------------------------------------------ 164 | 165 | # One entry per manual page. List of tuples 166 | # (source start file, name, description, authors, manual section). 167 | man_pages = [ 168 | (master_doc, 'matrixprofile', 'matrixprofile Documentation', 169 | [author], 1) 170 | ] 171 | 172 | 173 | # -- Options for Texinfo output ---------------------------------------------- 174 | 175 | # Grouping the document tree into Texinfo files. List of tuples 176 | # (source start file, target name, title, author, 177 | # dir menu entry, description, category) 178 | texinfo_documents = [ 179 | (master_doc, 'matrixprofile', 'matrixprofile Documentation', 180 | author, 'matrixprofile', 'One line description of project.', 181 | 'Miscellaneous'), 182 | ] 183 | 184 | 185 | # -- Options for Epub output ------------------------------------------------- 186 | 187 | # Bibliographic Dublin Core info. 188 | epub_title = project 189 | 190 | # The unique identifier of the text. This can be a ISBN number 191 | # or the project homepage. 192 | # 193 | # epub_identifier = '' 194 | 195 | # A unique identification for the text. 196 | # 197 | # epub_uid = '' 198 | 199 | # A list of files that should not be packed into the epub file. 200 | epub_exclude_files = ['search.html'] 201 | 202 | 203 | # -- Extension configuration ------------------------------------------------- 204 | 205 | # -- Options for intersphinx extension --------------------------------------- 206 | 207 | # Example configuration for intersphinx: refer to the Python standard library. 208 | intersphinx_mapping = {'https://docs.python.org/': None} 209 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | ------------ 2 | Contributing 3 | ------------ 4 | Pull requests are always welcome, and the Matrix Profile Foundation community appreciates any help you give. Note that a code of conduct applies to all spaces managed by this project, including issues and pull requests: 5 | `Review our code of conduct! `_. 6 | 7 | When submitting a pull request, we ask you check the following: 8 | 9 | 1. All pull requests must include, **documentation** updates, **unit tests** and `PEP-8 `_ compliant code standards. We also make use of `Numpy style code documentation `_. 10 | 11 | 2. The contributed code will fall under MatrixProfile's `license `_. Please be sure that it complies with those standards. 12 | 13 | 3. All code that is implemented from a research paper must be included within the `citations documentations `_. 14 | 15 | If you have a partial contribution, feel free to submit a pull request for review. We will help guide you in making the contribution successful! In order to get started working with this library's source code, please follow the `development installation guide `_ for your platform. -------------------------------------------------------------------------------- /docs/deploy_docs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" 5 | HTMLDIR="${SCRIPTPATH}/build/html" 6 | MSG="deployed docs" 7 | 8 | while getopts m: option 9 | do 10 | case "${option}" 11 | in 12 | m) MSG=${OPTARG};; 13 | esac 14 | done 15 | 16 | make html 17 | cd /tmp/ 18 | 19 | if [ -d "matrixprofile-docs-website" ]; then 20 | rm -rf matrixprofile-docs-website 21 | fi 22 | 23 | 24 | git clone git@github.com:matrix-profile-foundation/matrixprofile-docs-website.git 25 | cd matrixprofile-docs-website 26 | git checkout master 27 | git rm -rf . 28 | git clean -fxd 29 | echo "matrixprofile.docs.matrixprofile.org" > CNAME 30 | touch .nojekyll 31 | cp -R "${HTMLDIR}"/* . 32 | git add . 33 | git commit -am "${MSG}" 34 | git push origin master 35 | cd / 36 | rm -rf /tmp/matrixprofile-docs-website 37 | -------------------------------------------------------------------------------- /docs/examples.rst: -------------------------------------------------------------------------------- 1 | ======== 2 | Examples 3 | ======== 4 | 5 | .. toctree:: 6 | :maxdepth: 1 7 | :glob: 8 | 9 | examples/NYC_Taxis.ipynb 10 | examples/Plot_Discords_MP.ipynb 11 | examples/ECG_Annotation_Vectors.ipynb 12 | examples/Hierarchical_Clustering_Accelerometer_Walk_Stand_etc.ipynb -------------------------------------------------------------------------------- /docs/help.rst: -------------------------------------------------------------------------------- 1 | ------------ 2 | Getting Help 3 | ------------ 4 | We provide a dedicated `Discord channel `_ where practitioners can discuss applications and ask questions about the Matrix Profile Foundation libraries. If you rather not join Discord, then please open a `Github issue `_. -------------------------------------------------------------------------------- /docs/images/datamp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matrix-profile-foundation/matrixprofile/6fbd5fe2fd0e93162ef77c4da1b30188072dd404/docs/images/datamp.png -------------------------------------------------------------------------------- /docs/images/datampanom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matrix-profile-foundation/matrixprofile/6fbd5fe2fd0e93162ef77c4da1b30188072dd404/docs/images/datampanom.png -------------------------------------------------------------------------------- /docs/images/mpf-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matrix-profile-foundation/matrixprofile/6fbd5fe2fd0e93162ef77c4da1b30188072dd404/docs/images/mpf-logo.png -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. toctree:: 2 | :hidden: 3 | :maxdepth: 4 4 | :caption: Contents: 5 | 6 | install 7 | Quickstart.ipynb 8 | api 9 | examples 10 | Algorithms.ipynb 11 | help 12 | contributing 13 | code_of_conduct 14 | citations 15 | Code Repository (Github) 16 | Website 17 | 18 | .. include:: ../README.rst 19 | 20 | Indices and tables 21 | ================== 22 | 23 | * :ref:`genindex` 24 | * :ref:`modindex` 25 | * :ref:`search` -------------------------------------------------------------------------------- /docs/install.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Installation 3 | ============ 4 | 5 | .. toctree:: 6 | :maxdepth: 1 7 | :glob: 8 | 9 | installation/Linux_Installation.ipynb 10 | installation/Windows_Installation.ipynb 11 | installation/Mac_OS_Installation.ipynb 12 | -------------------------------------------------------------------------------- /docs/installation/Linux_Installation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Linux Installation\n", 8 | "\n", 9 | "Linux installation is broken up into two categories. The first is for end user installation and the second is for development. The installation guide uses Ubuntu Server 18.04 LTS. You may need to adapt some commands for your variant of Linux." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## End User Installation\n", 17 | "\n", 18 | "The majority of users will want to simply install the binary distribution of the library. This can be accomplished using pip. If we do not have a binary wheel for your operating system, you must follow the development installation guide.\n", 19 | "\n", 20 | "```bash\n", 21 | "pip install matrixprofile\n", 22 | "```" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "## Development Installation\n", 30 | "\n", 31 | "Due to our usage of Cython and OpenMP for some modules, you are required to install some build essential dependencies. This portion of the installation guide will walk you through that." 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "### Install Anaconda\n", 39 | "\n", 40 | "Anaconda has become the defacto Python distribution for scientific computing and is highly recommended. You may use a base installation of Python without it, but this document does not cover that use case.\n", 41 | "\n", 42 | "Download the command line installer of Anaconda from the [Anaconda website](https://www.anaconda.com/distribution/).\n", 43 | "\n", 44 | "Once the installation completes, you can run the following to keep your terminal session open.\n", 45 | "Note that all commands assume you installed Anaconda in your home directory.\n", 46 | "\n", 47 | "```\n", 48 | "source ~/.bashrc\n", 49 | "```\n", 50 | "\n", 51 | "Conda will now load the (base) environment when you launch a terminal." 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "### Install Build Essential\n", 59 | "\n", 60 | "Build essential is required to compile some modules using Cython.\n", 61 | "\n", 62 | "```\n", 63 | "sudo apt install build-essential\n", 64 | "```" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "### Install matrixprofile for Development\n", 72 | "\n", 73 | "Finally, we can clone the matrixprofile source code and install it in development mode. If you wish, you may create a new Anaconda environment for this. Please refer to the [Anaconda documentation](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-with-commands) to create a conda environment.\n", 74 | "\n", 75 | "When I clone source code, I tend to favor putting all source files in a \"src\" directory within my home directory. Please substitute to suite your development preferences.\n", 76 | "\n", 77 | "```\n", 78 | "mkdir ~/src\n", 79 | "cd ~/src\n", 80 | "git clone https://github.com/matrix-profile-foundation/matrixprofile.git\n", 81 | "cd matrixprofile\n", 82 | "\n", 83 | "# for python 3.x\n", 84 | "pip install -r requirements.txt\n", 85 | "\n", 86 | "# for python 2.x\n", 87 | "pip install -r python2-requirements.txt\n", 88 | "\n", 89 | "pip install -e .\n", 90 | "```\n", 91 | "\n", 92 | "Once the installation completes, you can try running the tests to verify everything is OK.\n", 93 | "\n", 94 | "```\n", 95 | "pytest tests/\n", 96 | "```\n", 97 | "\n", 98 | "Please refer to our contribution guidelines for details on what we expect. Happy coding!" 99 | ] 100 | } 101 | ], 102 | "metadata": { 103 | "kernelspec": { 104 | "display_name": "Python 3", 105 | "language": "python", 106 | "name": "python3" 107 | }, 108 | "language_info": { 109 | "codemirror_mode": { 110 | "name": "ipython", 111 | "version": 3 112 | }, 113 | "file_extension": ".py", 114 | "mimetype": "text/x-python", 115 | "name": "python", 116 | "nbconvert_exporter": "python", 117 | "pygments_lexer": "ipython3", 118 | "version": "3.7.4" 119 | } 120 | }, 121 | "nbformat": 4, 122 | "nbformat_minor": 4 123 | } 124 | -------------------------------------------------------------------------------- /docs/installation/Mac_OS_Installation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Mac OS Installation\n", 8 | "\n", 9 | "Mac OS installation is broken up into two categories. The first is for end user installation and the second is for development." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## End User Installation\n", 17 | "\n", 18 | "The majority of users will want to simply install the binary distribution of the library. This can be accomplished using pip. If we do not have a binary wheel for your operating system, you must follow the development installation guide.\n", 19 | "\n", 20 | "```bash\n", 21 | "pip install matrixprofile\n", 22 | "```" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "## Development Installation\n", 30 | "\n", 31 | "Due to our usage of Cython and OpenMP for some modules, you are required to use homebrew's C compiler gcc. Mac OS decided to drop support for OpenMP requiring manual installation of it. This development installation will walk you through the preferred way to get set up for development." 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "### Install Homebrew\n", 39 | "\n", 40 | "Homebrew is required to install gcc, llvm and openmp. Go to the [homebrew website](https://brew.sh) and install homebrew. In most cases it will automatically prompt you to install XCode Command Line Tools. If it does not, please install it." 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "### Install Anaconda\n", 48 | "\n", 49 | "Anaconda has become the defacto Python distribution for scientific computing and is highly recommended. You may use a base installation of Python without it, but this document does not cover that use case.\n", 50 | "\n", 51 | "Download the command line installer of Anaconda from the [Anaconda website](https://www.anaconda.com/distribution/).\n", 52 | "\n", 53 | "Open a terminal and execute the installer. If you are using the latest Mac OS (as of December 2019) Catalina, Anaconda does not automatically create a ZSH entry (the default shell). However, it does create an entry in your .bash_profile. To always load Anaconda environment when you launch a shell, run the following in a terminal:\n", 54 | "\n", 55 | "Note that all commands assume you installed Anaconda in your home directory.\n", 56 | "\n", 57 | "```\n", 58 | "source ~/.bash_profile\n", 59 | "conda init zsh\n", 60 | "```\n", 61 | "\n", 62 | "Conda will now load the (base) environment when you launch a terminal." 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "### Install C Libraries\n", 70 | "\n", 71 | "In this section you will install llvm, libomp and gcc via homebrew.\n", 72 | "\n", 73 | "```\n", 74 | "brew install llvm libomp gcc\n", 75 | "```\n", 76 | "\n", 77 | "Once this is finished we need to create a symlink to the gcc version installed. This way it will be found when Cython tries to compile the Cython modules. Note that you should change {version} to the version of gcc that you installed.\n", 78 | "\n", 79 | "```\n", 80 | "ln -s /usr/local/bin/gcc-{version} /usr/local/bin/gcc\n", 81 | "```\n", 82 | "\n", 83 | "We also need to add some entries to our .zshrc file so that openmp can be found. Copy and paste the following lines into your ~/.zshrc file.\n", 84 | "\n", 85 | "```\n", 86 | "export PATH=\"/usr/local/opt/llvm/bin:$PATH\"\n", 87 | "export LDFLAGS=\"-L/usr/local/opt/llvm/lib\"\n", 88 | "export CPPFLAGS=\"-I/usr/local/opt/llvm/include\"\n", 89 | "```\n", 90 | "\n", 91 | "Once this is complete, we can keep using the same terminal by simply sourcing the .zshrc file.\n", 92 | "\n", 93 | "```\n", 94 | "source ~/.zshrc\n", 95 | "```\n", 96 | "\n", 97 | "To verify that openmp is working properly with gcc, we can create a simple C application. Create a file named \"omptest.c\" with the following code:\n", 98 | "\n", 99 | "```c\n", 100 | "#include \n", 101 | "#include \n", 102 | "#include \n", 103 | "\n", 104 | "int main() {\n", 105 | " #pragma omp parallel num_threads(4)\n", 106 | " {\n", 107 | " printf(\"Hello from thread %d, nthreads %d\\n\", omp_get_thread_num(), omp_get_num_threads());\n", 108 | " }\n", 109 | " return EXIT_SUCCESS;\n", 110 | "}\n", 111 | "```\n", 112 | "\n", 113 | "Now we can compile and run the application.\n", 114 | "\n", 115 | "```\n", 116 | "gcc -fopenmp omptest.c -o omptest\n", 117 | "./omptest\n", 118 | "```" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "### Install matrixprofile for Development\n", 126 | "\n", 127 | "Finally, we can clone the matrixprofile source code and install it in development mode. If you wish, you may create a new Anaconda environment for this. Please refer to the [Anaconda documentation](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-with-commands) to create a conda environment.\n", 128 | "\n", 129 | "When I clone source code, I tend to favor putting all source files in a \"src\" directory within my home directory. Please substitute to suite your development preferences.\n", 130 | "\n", 131 | "```\n", 132 | "mkdir ~/src\n", 133 | "cd ~/src\n", 134 | "git clone https://github.com/matrix-profile-foundation/matrixprofile.git\n", 135 | "cd matrixprofile\n", 136 | "\n", 137 | "# for python 3.x\n", 138 | "pip install -r requirements.txt\n", 139 | "\n", 140 | "# for python 2.x\n", 141 | "pip install -r python2-requirements.txt\n", 142 | "\n", 143 | "pip install -e .\n", 144 | "```\n", 145 | "\n", 146 | "Once the installation completes, you can try running the tests to verify everything is OK.\n", 147 | "\n", 148 | "```\n", 149 | "pytest tests/\n", 150 | "```\n", 151 | "\n", 152 | "Please refer to our contribution guidelines for details on what we expect. Happy coding!" 153 | ] 154 | } 155 | ], 156 | "metadata": { 157 | "kernelspec": { 158 | "display_name": "Python 3", 159 | "language": "python", 160 | "name": "python3" 161 | }, 162 | "language_info": { 163 | "codemirror_mode": { 164 | "name": "ipython", 165 | "version": 3 166 | }, 167 | "file_extension": ".py", 168 | "mimetype": "text/x-python", 169 | "name": "python", 170 | "nbconvert_exporter": "python", 171 | "pygments_lexer": "ipython3", 172 | "version": "3.7.4" 173 | } 174 | }, 175 | "nbformat": 4, 176 | "nbformat_minor": 4 177 | } 178 | -------------------------------------------------------------------------------- /docs/installation/Windows_Installation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Windows Installation\n", 8 | "\n", 9 | "Windows installation is broken up into two categories. The first is for end user installation and the second is for development. The installation guide is based on Windows 10." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## End User Installation\n", 17 | "\n", 18 | "The majority of users will want to simply install the binary distribution of the library. This can be accomplished using pip. If we do not have a binary wheel for your operating system, you must follow the development installation guide.\n", 19 | "\n", 20 | "```bash\n", 21 | "pip install matrixprofile\n", 22 | "```" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "## Development Installation\n", 30 | "\n", 31 | "Due to our usage of Cython and OpenMP for some modules, you are required to install Microsoft Visual C++ Build Tools. This portion of the installation guide will walk you through that." 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "### Install Anaconda\n", 39 | "\n", 40 | "Anaconda has become the defacto Python distribution for scientific computing and is highly recommended. You may use a base installation of Python without it, but this document does not cover that use case.\n", 41 | "\n", 42 | "Download the Anaconda installer for Windows from the [Anaconda website](https://www.anaconda.com/distribution/).\n", 43 | "\n", 44 | "Be sure to make this the default Python during the installation." 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "### Install Visual C++ Build Tools\n", 52 | "\n", 53 | "Go to [https://visualstudio.microsoft.com/downloads/](https://visualstudio.microsoft.com/downloads/) and download the \"Build Tools for Visual Studio\". Scroll down and find it in the section labeled \"Tools for Visual Studio\".\n", 54 | "\n", 55 | "![Download Build Tools](install_microsoft_download_build_tools.jpeg)\n", 56 | "\n", 57 | "During the installation, be sure to check the box next to \"C++ Build Tools\".\n", 58 | "\n", 59 | "![Install C++ Build Tools](install_microsoft_cpp.jpeg)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "### Install matrixprofile for Development\n", 67 | "\n", 68 | "Finally, we can clone the matrixprofile source code and install it in development mode. If you wish, you may create a new Anaconda environment for this. Please refer to the [Anaconda documentation](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-with-commands) to create a conda environment.\n", 69 | "\n", 70 | "Clone the repository using git.\n", 71 | "\n", 72 | "Next, launch an Anaconda Powershell.\n", 73 | "\n", 74 | "![Anaconda Powershell](install_microsoft_anaconda.jpeg)\n", 75 | "\n", 76 | "```\n", 77 | "cd matrixprofile\n", 78 | "\n", 79 | "# for python 3.x\n", 80 | "pip install -r requirements.txt\n", 81 | "\n", 82 | "# for python 2.x\n", 83 | "pip install -r python2-requirements.txt\n", 84 | "\n", 85 | "pip install -e .\n", 86 | "```\n", 87 | "\n", 88 | "Once the installation completes, you can try running the tests to verify everything is OK.\n", 89 | "\n", 90 | "```\n", 91 | "pytest tests/\n", 92 | "```\n", 93 | "\n", 94 | "Please refer to our contribution guidelines for details on what we expect. Happy coding!" 95 | ] 96 | } 97 | ], 98 | "metadata": { 99 | "kernelspec": { 100 | "display_name": "Python 3", 101 | "language": "python", 102 | "name": "python3" 103 | }, 104 | "language_info": { 105 | "codemirror_mode": { 106 | "name": "ipython", 107 | "version": 3 108 | }, 109 | "file_extension": ".py", 110 | "mimetype": "text/x-python", 111 | "name": "python", 112 | "nbconvert_exporter": "python", 113 | "pygments_lexer": "ipython3", 114 | "version": "3.7.4" 115 | } 116 | }, 117 | "nbformat": 4, 118 | "nbformat_minor": 4 119 | } 120 | -------------------------------------------------------------------------------- /docs/installation/install_microsoft_anaconda.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matrix-profile-foundation/matrixprofile/6fbd5fe2fd0e93162ef77c4da1b30188072dd404/docs/installation/install_microsoft_anaconda.jpeg -------------------------------------------------------------------------------- /docs/installation/install_microsoft_cpp.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matrix-profile-foundation/matrixprofile/6fbd5fe2fd0e93162ef77c4da1b30188072dd404/docs/installation/install_microsoft_cpp.jpeg -------------------------------------------------------------------------------- /docs/installation/install_microsoft_download_build_tools.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matrix-profile-foundation/matrixprofile/6fbd5fe2fd0e93162ef77c4da1b30188072dd404/docs/installation/install_microsoft_download_build_tools.jpeg -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /matrixprofile/__init__.py: -------------------------------------------------------------------------------- 1 | name = "matrixprofile" 2 | 3 | import sys 4 | sys.path.append('../') 5 | 6 | from matrixprofile.version import __version__, __version_info__ 7 | 8 | from matrixprofile.compute import compute 9 | from matrixprofile.visualize import visualize 10 | from matrixprofile.analyze import analyze 11 | from matrixprofile import discover 12 | from matrixprofile import transform 13 | from matrixprofile import utils 14 | from matrixprofile import io 15 | from matrixprofile import algorithms 16 | from matrixprofile import datasets -------------------------------------------------------------------------------- /matrixprofile/algorithms/__init__.py: -------------------------------------------------------------------------------- 1 | from matrixprofile.algorithms.stomp import stomp 2 | from matrixprofile.algorithms.mass2 import mass2 3 | from matrixprofile.algorithms.mpdist import mpdist 4 | from matrixprofile.algorithms.mpdist import mpdist_vector 5 | from matrixprofile.algorithms.mpx import mpx 6 | from matrixprofile.algorithms.scrimp import scrimp_plus_plus 7 | from matrixprofile.algorithms.scrimp import prescrimp 8 | from matrixprofile.algorithms.skimp import skimp 9 | from matrixprofile.algorithms.skimp import maximum_subsequence 10 | from matrixprofile.algorithms.pairwise_dist import pairwise_dist -------------------------------------------------------------------------------- /matrixprofile/algorithms/hierarchical_clustering.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | from scipy.cluster.hierarchy import linkage, inconsistent, fcluster 12 | from scipy.cluster.hierarchy import cophenet 13 | 14 | from matrixprofile import core 15 | from matrixprofile.algorithms.pairwise_dist import pairwise_dist 16 | 17 | 18 | def hierarchical_clusters(X, window_size, t, threshold=0.05, method='single', 19 | depth=2, criterion='distance', n_jobs=1): 20 | """ 21 | Cluster M time series into hierarchical clusters using agglomerative 22 | approach. This function is more or less a convenience wrapper around 23 | SciPy's scipy.cluster.hierarchy functions, but uses the MPDist algorithm 24 | to compute distances between each pair of time series. 25 | 26 | Note 27 | ---- 28 | Memory usage could potentially high depending on the length of your 29 | time series and how many distances are computed! 30 | 31 | Parameters 32 | ---------- 33 | X : array_like 34 | An M x N matrix where M is the time series and N is the observations at 35 | a given time. 36 | window_size : int 37 | The window size used to compute the MPDist. 38 | t : scalar 39 | For criteria 'inconsistent', 'distance' or 'monocrit', this is the 40 | threshold to apply when forming flat clusters. 41 | For 'maxclust' criteria, this would be max number of clusters 42 | requested. 43 | threshold : float, Default 0.05 44 | The percentile in which the MPDist is taken from. By default it is 45 | set to 0.05 based on empircal research results from the paper. 46 | Generally, you should not change this unless you know what you are 47 | doing! This value must be a float greater than 0 and less than 1. 48 | method : str, Default single 49 | The linkage algorithm to use. 50 | Options: {single, complete, average, weighted} 51 | depth : int, Default 2 52 | A non-negative value more than 0 to specify the number of levels below 53 | a non-singleton cluster to allow. 54 | criterion : str, Default distance 55 | Options: {inconsistent, distance, maxclust, monocrit} 56 | The criterion to use in forming flat clusters. 57 | ``inconsistent`` : 58 | If a cluster node and all its 59 | descendants have an inconsistent value less than or equal 60 | to `t`, then all its leaf descendants belong to the 61 | same flat cluster. When no non-singleton cluster meets 62 | this criterion, every node is assigned to its own 63 | cluster. (Default) 64 | ``distance`` : 65 | Forms flat clusters so that the original 66 | observations in each flat cluster have no greater a 67 | cophenetic distance than `t`. 68 | ``maxclust`` : 69 | Finds a minimum threshold ``r`` so that 70 | the cophenetic distance between any two original 71 | observations in the same flat cluster is no more than 72 | ``r`` and no more than `t` flat clusters are formed. 73 | ``monocrit`` : 74 | Forms a flat cluster from a cluster node c 75 | with index i when ``monocrit[j] <= t``. 76 | For example, to threshold on the maximum mean distance 77 | as computed in the inconsistency matrix R with a 78 | threshold of 0.8 do:: 79 | MR = maxRstat(Z, R, 3) 80 | cluster(Z, t=0.8, criterion='monocrit', monocrit=MR) 81 | n_jobs : int, Default 1 82 | The number of cpu cores used to compute the MPDist. 83 | 84 | Returns 85 | ------- 86 | clusters : dict 87 | Clustering statistics, distances and labels. 88 | 89 | >>> { 90 | >>> pairwise_distances: MPDist between pairs of time series as 91 | >>> np.ndarray, 92 | >>> linkage_matrix: clustering linkage matrix as np.ndarray, 93 | >>> inconsistency_statistics: inconsistency stats as np.ndarray, 94 | >>> assignments: cluster label associated with input X location as 95 | >>> np.ndarray, 96 | >>> cophenet: float the cophenet statistic, 97 | >>> cophenet_distances: cophenet distances between pairs of time 98 | >>> series as np.ndarray 99 | >>> class: hclusters 100 | >>> } 101 | """ 102 | # valid SciPy clustering options to work with custom distance metric 103 | valid_methods = set(['single', 'complete', 'average', 'weighted']) 104 | valid_criterions = set([ 105 | 'inconsistent', 'distance', 'monocrit', 'maxclust' 106 | ]) 107 | method = method.lower() 108 | criterion = criterion.lower() 109 | 110 | # error handling 111 | if not core.is_array_like(X): 112 | raise ValueError('X must be array like!') 113 | 114 | if not isinstance(t, (float, int)): 115 | raise ValueError('t must be a scalar (int or float)') 116 | 117 | if not isinstance(threshold, float) or threshold <= 0 or threshold >= 1: 118 | raise ValueError('threshold must be a float greater than 0 and less'\ 119 | ' than 1') 120 | 121 | if not isinstance(depth, int) or depth < 1: 122 | raise ValueError('depth must be an integer greater than 0') 123 | 124 | if method not in valid_methods: 125 | opts_str = ', '.join(valid_methods) 126 | raise ValueError('method may only be one of: ' + opts_str) 127 | 128 | if criterion not in valid_criterions: 129 | opts_str = ', '.join(valid_criterions) 130 | raise ValueError('criterion may only be one of: ' + opts_str) 131 | 132 | Y = pairwise_dist(X, window_size, threshold=threshold, n_jobs=n_jobs) 133 | Z = linkage(Y, method=method) 134 | R = inconsistent(Z, d=depth) 135 | c, coph_dists = cophenet(Z, Y) 136 | T = fcluster(Z, criterion=criterion, depth=depth, R=R, t=t) 137 | 138 | return { 139 | 'pairwise_distances': Y, 140 | 'linkage_matrix': Z, 141 | 'inconsistency_statistics': R, 142 | 'assignments': T, 143 | 'cophenet': c, 144 | 'cophenet_distances': coph_dists, 145 | 'class': 'hclusters' 146 | } 147 | -------------------------------------------------------------------------------- /matrixprofile/algorithms/mass2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | from __future__ import unicode_literals 6 | 7 | range = getattr(__builtins__, 'xrange', range) 8 | # end of py2 compatability boilerplate 9 | 10 | import numpy as np 11 | 12 | from matrixprofile import core 13 | 14 | def mass2(ts, query, extras=False, threshold=1e-10): 15 | """ 16 | Compute the distance profile for the given query over the given time 17 | series. 18 | 19 | Parameters 20 | ---------- 21 | ts : array_like 22 | The time series to search. 23 | query : array_like 24 | The query. 25 | extras : boolean, default False 26 | Optionally return additional data used to compute the matrix profile. 27 | 28 | Returns 29 | ------- 30 | np.array, dict : distance_profile 31 | An array of distances np.array() or dict with extras. 32 | 33 | With extras: 34 | 35 | >>> { 36 | >>> 'distance_profile': The distance profile, 37 | >>> 'product': The FFT product between ts and query, 38 | >>> 'data_mean': The moving average of the ts over len(query), 39 | >>> 'query_mean': The mean of the query, 40 | >>> 'data_std': The moving std. of the ts over len(query), 41 | >>> 'query_std': The std. of the query 42 | >>> } 43 | 44 | Raises 45 | ------ 46 | ValueError 47 | If ts is not a list or np.array. 48 | If query is not a list or np.array. 49 | If ts or query is not one dimensional. 50 | 51 | """ 52 | ts, query = core.precheck_series_and_query_1d(ts, query) 53 | 54 | n = len(ts) 55 | m = len(query) 56 | x = ts 57 | y = query 58 | 59 | meany = np.mean(y) 60 | sigmay = np.std(y) 61 | 62 | meanx, sigmax = core.moving_avg_std(x, m) 63 | meanx = np.append(np.ones([1, len(x) - len(meanx)]), meanx) 64 | sigmax = np.append(np.zeros([1, len(x) - len(sigmax)]), sigmax) 65 | 66 | 67 | y = np.append(np.flip(y), np.zeros([1, n - m])) 68 | 69 | X = np.fft.fft(x) 70 | Y = np.fft.fft(y) 71 | Y.resize(X.shape) 72 | Z = X * Y 73 | z = np.fft.ifft(Z) 74 | 75 | # do not allow divide by zero 76 | tmp = (sigmax[m - 1:n] * sigmay) 77 | tmp[tmp == 0] = 1e-12 78 | 79 | dist = 2 * (m - (z[m - 1:n] - m * meanx[m - 1:n] * meany) / tmp) 80 | 81 | # fix to handle constant values 82 | dist[sigmax[m - 1:n] < threshold] = m 83 | dist[(sigmax[m - 1:n] < threshold) & (sigmay < threshold)] = 0 84 | dist = np.sqrt(dist) 85 | 86 | if extras: 87 | return { 88 | 'distance_profile': dist, 89 | 'product': z, 90 | 'data_mean': meanx, 91 | 'query_mean': meany, 92 | 'data_std': sigmax, 93 | 'query_std': sigmay 94 | } 95 | 96 | return dist 97 | -------------------------------------------------------------------------------- /matrixprofile/algorithms/mpdist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import math 12 | 13 | import numpy as np 14 | 15 | from matrixprofile import core 16 | from matrixprofile.algorithms.cympx import mpx_ab_parallel as cympx_ab_parallel 17 | from matrixprofile.algorithms.mass2 import mass2 18 | 19 | 20 | def mpdist(ts, ts_b, w, threshold=0.05, n_jobs=1): 21 | """ 22 | Computes the MPDist between the two series ts and ts_b. For more details 23 | refer to the paper: 24 | 25 | Matrix Profile XII: MPdist: A Novel Time Series Distance Measure to Allow 26 | Data Mining in More Challenging Scenarios. Shaghayegh Gharghabi, 27 | Shima Imani, Anthony Bagnall, Amirali Darvishzadeh, Eamonn Keogh. ICDM 2018 28 | 29 | Parameters 30 | ---------- 31 | ts : array_like 32 | The time series to compute the matrix profile for. 33 | ts_b : array_like 34 | The time series to compare against. 35 | w : int 36 | The window size. 37 | threshold : float, Default 0.05 38 | The percentile in which the distance is taken from. By default it is 39 | set to 0.05 based on empircal research results from the paper. 40 | Generally, you should not change this unless you know what you are 41 | doing! This value must be a float greater than 0 and less than 1. 42 | n_jobs : int, Default = 1 43 | Number of cpu cores to use. 44 | 45 | Returns 46 | ------- 47 | float : mpdist 48 | The MPDist. 49 | 50 | """ 51 | ts = core.to_np_array(ts).astype('d') 52 | ts_b = core.to_np_array(ts_b).astype('d') 53 | n_jobs = core.valid_n_jobs(n_jobs) 54 | 55 | if not core.is_one_dimensional(ts): 56 | raise ValueError('ts must be one dimensional!') 57 | 58 | if not core.is_one_dimensional(ts_b): 59 | raise ValueError('ts_b must be one dimensional!') 60 | 61 | if not isinstance(threshold, float) or threshold <= 0 or threshold >= 1: 62 | raise ValueError('threshold must be a float greater than 0 and less'\ 63 | ' than 1') 64 | 65 | mp, mpi, mpb, mpib = cympx_ab_parallel(ts, ts_b, w, 0, n_jobs) 66 | 67 | mp_abba = np.append(mp, mpb) 68 | data_len = len(ts) + len(ts_b) 69 | abba_sorted = np.sort(mp_abba[~core.nan_inf_indices(mp_abba)]) 70 | 71 | distance = np.inf 72 | if len(abba_sorted) > 0: 73 | upper_idx = int(np.ceil(threshold * data_len)) - 1 74 | idx = np.min([len(abba_sorted) - 1, upper_idx]) 75 | distance = abba_sorted[idx] 76 | 77 | return distance 78 | 79 | 80 | def mass_distance_matrix(ts, query, w): 81 | """ 82 | Computes a distance matrix using mass that is used in mpdist_vector 83 | algorithm. 84 | 85 | Parameters 86 | ---------- 87 | ts : array_like 88 | The time series to compute the matrix for. 89 | query : array_like 90 | The time series to compare against. 91 | w : int 92 | The window size. 93 | 94 | Returns 95 | ------- 96 | array_like : dist_matrix 97 | The MASS distance matrix. 98 | 99 | """ 100 | subseq_num = len(query) - w + 1 101 | distances = [] 102 | 103 | for i in range(subseq_num): 104 | distances.append(np.real(mass2(ts, query[i:i + w]))) 105 | 106 | return np.array(distances) 107 | 108 | 109 | def calculate_mpdist(profile, threshold, data_length): 110 | """ 111 | Computes the MPDist given a profile, threshold and data length. This is 112 | primarily used for MPDist Vector algorithm. 113 | 114 | Parameters 115 | ---------- 116 | profile : array_like 117 | The profile to calculate the mpdist for. 118 | threshold : float 119 | The threshold to use in computing the distance. 120 | data_length : int 121 | The length of the original data. 122 | 123 | Returns 124 | ------- 125 | float : mpdist 126 | The MPDist. 127 | 128 | """ 129 | dist_loc = int(np.ceil(threshold * data_length)) 130 | profile_sorted = np.sort(profile) 131 | mask = core.not_nan_inf_indices(profile_sorted) 132 | 133 | profile_clean = profile_sorted[mask] 134 | 135 | if len(profile_clean) < 1: 136 | distance = np.inf 137 | elif len(profile_clean) >= dist_loc: 138 | distance = profile_clean[dist_loc] 139 | else: 140 | distance = np.max(profile_clean) 141 | 142 | return distance 143 | 144 | 145 | def mpdist_vector(ts, ts_b, w): 146 | """ 147 | Computes a vector of MPDist measures. 148 | 149 | Parameters 150 | ---------- 151 | ts : array_like 152 | The time series to compute the matrix for. 153 | ts_b : array_like 154 | The time series to compare against. 155 | w : int 156 | The window size. 157 | 158 | Returns 159 | ------- 160 | array_like : mpdist_vector 161 | The MPDist vector. 162 | 163 | """ 164 | matrix = mass_distance_matrix(ts, ts_b, w) 165 | rows, cols = matrix.shape 166 | 167 | # compute row and column minimums 168 | all_right_hist = matrix.min(axis=0) 169 | mass_minimums = np.apply_along_axis(core.moving_min, 1, matrix, window=rows) 170 | 171 | # recreate the matrix profile and compute MPDist 172 | mpdist_length = len(ts) - len(ts_b) + 1 173 | right_hist_length = len(ts_b) - w + 1 174 | mpdist_array = np.zeros(mpdist_length) 175 | left_hist = np.zeros(right_hist_length) 176 | 177 | mpdist_array = [] 178 | for i in range(mpdist_length): 179 | right_hist = all_right_hist[i:right_hist_length + i] 180 | left_hist = mass_minimums[:, i] 181 | profile = np.append(left_hist, right_hist) 182 | mpdist_array.append(calculate_mpdist(profile, 0.05, 2 * len(ts_b))) 183 | 184 | return np.array(mpdist_array) 185 | -------------------------------------------------------------------------------- /matrixprofile/algorithms/mpx.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import math 12 | 13 | import numpy as np 14 | 15 | from matrixprofile import core 16 | from matrixprofile.algorithms.cympx import mpx_parallel as cympx_parallel 17 | from matrixprofile.algorithms.cympx import mpx_ab_parallel as cympx_ab_parallel 18 | 19 | 20 | def mpx(ts, w, query=None, cross_correlation=False, n_jobs=1): 21 | """ 22 | The MPX algorithm computes the matrix profile without using the FFT. 23 | 24 | Parameters 25 | ---------- 26 | ts : array_like 27 | The time series to compute the matrix profile for. 28 | w : int 29 | The window size. 30 | query : array_like 31 | Optionally a query series. 32 | cross_correlation : bool, Default=False 33 | Determine if cross_correlation distance should be returned. It defaults 34 | to Euclidean Distance. 35 | n_jobs : int, Default = 1 36 | Number of cpu cores to use. 37 | 38 | Returns 39 | ------- 40 | dict : profile 41 | A MatrixProfile data structure. 42 | 43 | >>> { 44 | >>> 'mp': The matrix profile, 45 | >>> 'pi': The matrix profile 1NN indices, 46 | >>> 'rmp': The right matrix profile, 47 | >>> 'rpi': The right matrix profile 1NN indices, 48 | >>> 'lmp': The left matrix profile, 49 | >>> 'lpi': The left matrix profile 1NN indices, 50 | >>> 'metric': The distance metric computed for the mp, 51 | >>> 'w': The window size used to compute the matrix profile, 52 | >>> 'ez': The exclusion zone used, 53 | >>> 'join': Flag indicating if a similarity join was computed, 54 | >>> 'sample_pct': Percentage of samples used in computing the MP, 55 | >>> 'data': { 56 | >>> 'ts': Time series data, 57 | >>> 'query': Query data if supplied 58 | >>> } 59 | >>> 'class': "MatrixProfile" 60 | >>> 'algorithm': "mpx" 61 | >>> } 62 | 63 | """ 64 | ts = core.to_np_array(ts).astype('d') 65 | n_jobs = core.valid_n_jobs(n_jobs) 66 | is_join = False 67 | 68 | if core.is_array_like(query): 69 | query = core.to_np_array(query).astype('d') 70 | is_join = True 71 | mp, mpi, mpb, mpib = cympx_ab_parallel(ts, query, w, 72 | int(cross_correlation), n_jobs) 73 | else: 74 | mp, mpi = cympx_parallel(ts, w, int(cross_correlation), n_jobs) 75 | 76 | mp = np.asarray(mp) 77 | mpi = np.asarray(mpi) 78 | distance_metric = 'euclidean' 79 | if cross_correlation: 80 | distance_metric = 'cross_correlation' 81 | 82 | return { 83 | 'mp': mp, 84 | 'pi': mpi, 85 | 'rmp': None, 86 | 'rpi': None, 87 | 'lmp': None, 88 | 'lpi': None, 89 | 'metric': distance_metric, 90 | 'w': w, 91 | 'ez': int(np.ceil(w / 4.0)) if is_join else 0, 92 | 'join': is_join, 93 | 'sample_pct': 1, 94 | 'data': { 95 | 'ts': ts, 96 | 'query': query 97 | }, 98 | 'class': 'MatrixProfile', 99 | 'algorithm': 'mpx' 100 | } 101 | -------------------------------------------------------------------------------- /matrixprofile/algorithms/pairwise_dist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import numpy as np 12 | 13 | from matrixprofile import core 14 | from matrixprofile.algorithms.mpdist import mpdist 15 | 16 | def compute_dist(args): 17 | """ 18 | Helper function to parallelize pairwise distance calculation. 19 | 20 | Parameters 21 | ---------- 22 | args : tuple 23 | The arguments to pass to the mpdist calculation. 24 | 25 | Returns 26 | ------- 27 | values : tuple 28 | The kth index and distance. 29 | """ 30 | k = args[0] 31 | distance = mpdist(args[1], args[2], args[3], threshold=args[4]) 32 | 33 | return (k, distance) 34 | 35 | 36 | def pairwise_dist(X, window_size, threshold=0.05, n_jobs=1): 37 | """ 38 | Utility function to compute all pairwise distances between the timeseries 39 | using MPDist. 40 | 41 | Note 42 | ---- 43 | scipy.spatial.distance.pdist cannot be used because they 44 | do not allow for jagged arrays, however their code was used as a reference 45 | in creating this function. 46 | https://github.com/scipy/scipy/blob/master/scipy/spatial/distance.py#L2039 47 | 48 | Parameters 49 | ---------- 50 | X : array_like 51 | An array_like object containing time series to compute distances for. 52 | window_size : int 53 | The window size to use in computing the MPDist. 54 | threshold : float 55 | The threshold used to compute MPDist. 56 | n_jobs : int 57 | Number of CPU cores to use during computation. 58 | 59 | Returns 60 | ------- 61 | Y : np.ndarray 62 | Returns a condensed distance matrix Y. For 63 | each :math:`i` and :math:`j` (where :math:`i 1] = 1 85 | corrected_arc_curve = idealized 86 | 87 | # correct the head and tail with the window size 88 | corrected_arc_curve[:w] = 1 89 | corrected_arc_curve[-w:] = 1 90 | 91 | return corrected_arc_curve 92 | 93 | 94 | def extract_regimes(profile, num_regimes=3): 95 | """ 96 | Given a MatrixProfile, compute the corrected arc curve and extract 97 | the desired number of regimes. Regimes are computed with an exclusion 98 | zone of 5 * window size per the authors. 99 | 100 | The author states: 101 | This exclusion zone is based on an assumption that regimes will have 102 | multiple repetitions; FLUSS is not able to segment single gesture 103 | patterns. 104 | 105 | Parameters 106 | ---------- 107 | profile : dict 108 | Data structure from a MatrixProfile algorithm. 109 | num_regimes : int 110 | The desired number of regimes to find. 111 | 112 | Returns 113 | ------- 114 | dict : profile 115 | The original MatrixProfile object with additional keys containing. 116 | 117 | >>> { 118 | >>> 'cac': The corrected arc curve 119 | >>> 'cac_ez': The exclusion zone used 120 | >>> 'regimes': Array of starting indices indicating a regime. 121 | >>> } 122 | 123 | """ 124 | if not core.is_mp_obj(profile): 125 | raise ValueError('profile must be a MatrixProfile structure') 126 | 127 | cac = profile.get('cac') 128 | window_size = profile.get('w') 129 | ez = window_size * 5 130 | 131 | # compute the CAC if needed 132 | if isinstance(cac, type(None)): 133 | cac = fluss(profile) 134 | profile['cac'] = cac 135 | 136 | regimes = [] 137 | tmp = np.copy(cac) 138 | n = len(tmp) 139 | 140 | for _ in range(num_regimes): 141 | min_index = np.argmin(tmp) 142 | regimes.append(min_index) 143 | 144 | # apply exclusion zone 145 | ez_start = np.max([0, min_index - ez]) 146 | ez_end = np.min([n, min_index + ez]) 147 | tmp[ez_start:ez_end] = np.inf 148 | 149 | profile['regimes'] = np.array(regimes, dtype=int) 150 | profile['cac_ez'] = ez 151 | 152 | return profile 153 | -------------------------------------------------------------------------------- /matrixprofile/algorithms/snippets.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import numpy as np 12 | 13 | from matrixprofile import core 14 | from matrixprofile.algorithms.mpdist import mpdist_vector 15 | 16 | 17 | def snippets(ts, snippet_size, num_snippets=2, window_size=None): 18 | """ 19 | The snippets algorithm is used to summarize your time series by 20 | identifying N number of representative subsequences. If you want to 21 | identify typical patterns in your time series, then this is the algorithm 22 | to use. 23 | 24 | Parameters 25 | ---------- 26 | ts : array_like 27 | The time series. 28 | snippet_size : int 29 | The size of snippet desired. 30 | num_snippets : int, Default 2 31 | The number of snippets you would like to find. 32 | window_size : int, Default (snippet_size / 2) 33 | The window size. 34 | 35 | Returns 36 | ------- 37 | list : snippets 38 | A list of snippets as dictionary objects with the following structure. 39 | 40 | >>> { 41 | >>> index: the index of the snippet, 42 | >>> snippet: the snippet values, 43 | >>> neighbors: the starting indices of all subsequences similar to the current snippet 44 | >>> fraction: fraction of the snippet 45 | >>> } 46 | 47 | """ 48 | ts = core.to_np_array(ts).astype('d') 49 | time_series_len = len(ts) 50 | n = len(ts) 51 | 52 | if not isinstance(snippet_size, int) or snippet_size < 4: 53 | raise ValueError('snippet_size must be an integer >= 4') 54 | 55 | if n < (2 * snippet_size): 56 | raise ValueError('Time series is too short relative to snippet length') 57 | 58 | if not window_size: 59 | window_size = int(np.floor(snippet_size / 2)) 60 | 61 | if window_size >= snippet_size: 62 | raise ValueError('window_size must be smaller than snippet_size') 63 | 64 | # pad end of time series with zeros 65 | num_zeros = int(snippet_size * np.ceil(n / snippet_size) - n) 66 | ts = np.append(ts, np.zeros(num_zeros)) 67 | 68 | # compute all profiles 69 | indices = np.arange(0, len(ts) - snippet_size, snippet_size) 70 | distances = [] 71 | 72 | for j, i in enumerate(indices): 73 | distance = mpdist_vector(ts, ts[i:(i + snippet_size - 1)], int(window_size)) 74 | distances.append(distance) 75 | 76 | distances = np.array(distances) 77 | 78 | # find N snippets 79 | snippets = [] 80 | minis = np.inf 81 | total_min = None 82 | for n in range(num_snippets): 83 | minims = np.inf 84 | 85 | for i in range(len(indices)): 86 | s = np.sum(np.minimum(distances[i, :], minis)) 87 | 88 | if minims > s: 89 | minims = s 90 | index = i 91 | 92 | minis = np.minimum(distances[index, :], minis) 93 | actual_index = indices[index] 94 | snippet = ts[actual_index:actual_index + snippet_size] 95 | snippet_distance = distances[index] 96 | snippets.append({ 97 | 'index': actual_index, 98 | 'snippet': snippet, 99 | 'distance': snippet_distance 100 | }) 101 | 102 | if isinstance(total_min, type(None)): 103 | total_min = snippet_distance 104 | else: 105 | total_min = np.minimum(total_min, snippet_distance) 106 | 107 | # compute the fraction of each snippet 108 | for snippet in snippets: 109 | mask = (snippet['distance'] <= total_min) 110 | # create a key "neighbors" for the snippet dict, 111 | # and store all the time series indices for the data represented by a snippet (arr[mask]) 112 | arr = np.arange(len(mask)) 113 | # max_index indicates the length of a profile, which is (n-m) in the Snippets paper) 114 | max_index = time_series_len - snippet_size 115 | # since 'ts' is padded with 0 before calculate the MPdist profile 116 | # all parts of the profile that are out of range [0, n-m] cannot be used as neighboring snippet indices 117 | snippet['neighbors'] = list(filter(lambda x : x <= max_index, arr[mask])) 118 | # Add the last m time series indices into the neighboring snippet indices 119 | if max_index in snippet['neighbors']: 120 | last_m_indices = list(range(max_index+1, time_series_len)) 121 | snippet['neighbors'].extend(last_m_indices) 122 | snippet['fraction'] = mask.sum() / (len(ts) - snippet_size) 123 | total_min = total_min - mask 124 | del snippet['distance'] 125 | 126 | return snippets 127 | -------------------------------------------------------------------------------- /matrixprofile/algorithms/statistics.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | from __future__ import unicode_literals 6 | 7 | range = getattr(__builtins__, 'xrange', range) 8 | # end of py2 compatability boilerplate 9 | 10 | import numpy as np 11 | 12 | from matrixprofile import core 13 | 14 | 15 | def statistics(ts, window_size): 16 | """ 17 | Compute global and moving statistics for the provided 1D time 18 | series. The statistics computed include the min, max, mean, std. and median 19 | over the window specified and globally. 20 | 21 | Parameters 22 | ---------- 23 | ts : array_like 24 | The time series. 25 | window_size: int 26 | The size of the window to compute moving statistics over. 27 | 28 | Returns 29 | ------- 30 | dict : statistics 31 | The global and rolling window statistics. 32 | 33 | >>> { 34 | >>> ts: the original time series, 35 | >>> min: the global minimum, 36 | >>> max: the global maximum, 37 | >>> mean: the global mean, 38 | >>> std: the global standard deviation, 39 | >>> median: the global median, 40 | >>> moving_min: the moving minimum, 41 | >>> moving_max: the moving maximum, 42 | >>> moving_mean: the moving mean, 43 | >>> moving_std: the moving standard deviation, 44 | >>> moving_median: the moving median, 45 | >>> window_size: the window size provided, 46 | >>> class: Statistics 47 | >>> } 48 | 49 | Raises 50 | ------ 51 | ValueError 52 | If window_size is not an int. 53 | If window_size > len(ts) 54 | If ts is not a list or np.array. 55 | If ts is not 1D. 56 | 57 | """ 58 | if not core.is_array_like(ts): 59 | raise ValueError('ts must be array like') 60 | 61 | if not core.is_one_dimensional(ts): 62 | raise ValueError('The time series must be 1D') 63 | 64 | if not isinstance(window_size, int): 65 | raise ValueError('Expecting int for window_size') 66 | 67 | if window_size > len(ts): 68 | raise ValueError('Window size cannot be greater than len(ts)') 69 | 70 | if window_size < 3: 71 | raise ValueError('Window size cannot be less than 3') 72 | 73 | moving_mu, moving_sigma = core.moving_avg_std(ts, window_size) 74 | rolling_ts = core.rolling_window(ts, window_size) 75 | 76 | return { 77 | 'ts': ts, 78 | 'min': np.min(ts), 79 | 'max': np.max(ts), 80 | 'mean': np.mean(ts), 81 | 'std': np.std(ts), 82 | 'median': np.median(ts), 83 | 'moving_min': np.min(rolling_ts, axis=1), 84 | 'moving_max': np.max(rolling_ts, axis=1), 85 | 'moving_mean': moving_mu, 86 | 'moving_std': moving_sigma, 87 | 'moving_median': np.median(rolling_ts, axis=1), 88 | 'window_size': window_size, 89 | 'class': 'Statistics' 90 | } 91 | -------------------------------------------------------------------------------- /matrixprofile/algorithms/top_k_discords.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | from __future__ import unicode_literals 6 | 7 | range = getattr(__builtins__, 'xrange', range) 8 | # end of py2 compatability boilerplate 9 | 10 | 11 | import numpy as np 12 | 13 | from matrixprofile import core 14 | 15 | 16 | def pmp_top_k_discords(profile, exclusion_zone=None, k=3): 17 | """ 18 | Computes the top K discords for the given Pan-MatrixProfile. The return 19 | values is a list of row by col indices. 20 | 21 | Notes 22 | ----- 23 | This algorithm is written to work with Euclidean distance. If you submit 24 | a PMP of Pearson metrics, then it is first converted to Euclidean. 25 | 26 | Parameters 27 | ---------- 28 | profile : dict 29 | Data structure from a PMP algorithm. 30 | exclusion_zone : int, Default window / 2 31 | The zone to exclude around the found discords to reduce trivial 32 | findings. By default we use the row-wise window / 2. 33 | k : int 34 | Maximum number of discords to find. 35 | 36 | Returns 37 | ------- 38 | dict : profile 39 | A 2D array of indices. The first column corresponds to the row index 40 | and the second column corresponds to the column index of the 41 | submitted PMP. It is placed back on the original object passed in as 42 | 'discords' key. 43 | 44 | """ 45 | if not core.is_pmp_obj(profile): 46 | raise ValueError('Expecting PMP data structure!') 47 | 48 | # this function requires euclidean distance 49 | # convert if the metric is pearson 50 | metric = profile.get('metric', None) 51 | pmp = profile.get('pmp', None) 52 | windows = profile.get('windows', None) 53 | 54 | tmp = None 55 | if metric == 'pearson': 56 | tmp = core.pearson_to_euclidean(pmp, windows) 57 | else: 58 | tmp = np.copy(pmp).astype('d') 59 | 60 | # replace nan and infs with -infinity 61 | # for whatever reason numpy argmax finds infinity as max so 62 | # this is a way to get around it by converting to -infinity 63 | tmp[core.nan_inf_indices(tmp)] = -np.inf 64 | 65 | # iterate finding the max value k times or until negative 66 | # infinity is obtained 67 | found = [] 68 | 69 | for _ in range(k): 70 | max_idx = np.unravel_index(np.argmax(tmp), tmp.shape) 71 | window = windows[max_idx[0]] 72 | 73 | if tmp[max_idx] == -np.inf: 74 | break 75 | 76 | found.append(max_idx) 77 | 78 | # apply exclusion zone 79 | # the exclusion zone is based on 1/2 of the window size 80 | # used to compute that specific matrix profile 81 | n = tmp[max_idx[0]].shape[0] 82 | if exclusion_zone is None: 83 | exclusion_zone = int(np.floor(window / 2)) 84 | 85 | ez_start = np.max([0, max_idx[1] - exclusion_zone]) 86 | ez_stop = np.min([n, max_idx[1] + exclusion_zone]) 87 | tmp[max_idx[0]][ez_start:ez_stop] = -np.inf 88 | 89 | profile['discords'] = np.array(found) 90 | 91 | return profile 92 | 93 | 94 | def mp_top_k_discords(profile, exclusion_zone=None, k=3): 95 | """ 96 | Find the top K number of discords (anomalies) given a matrix profile, 97 | exclusion zone and the desired number of discords. The exclusion zone 98 | nullifies entries on the left and right side of the first and subsequent 99 | discords to remove non-trivial matches. More specifically, a discord found 100 | at location X will more than likely have additional discords to the left or 101 | right of it. 102 | 103 | Parameters 104 | ---------- 105 | profile : dict 106 | The MatrixProfile data structure. 107 | exclusion_zone : int, Default mp algorithm ez 108 | Desired number of values to exclude on both sides of the anomaly. 109 | k : int 110 | Desired number of discords to find. 111 | 112 | Returns 113 | ------- 114 | dict : profile 115 | The original input profile with an additional "discords" key containing 116 | the a np.ndarray of discord indices. 117 | 118 | """ 119 | if not core.is_mp_obj(profile): 120 | raise ValueError('Expecting MP data structure!') 121 | 122 | found = [] 123 | tmp = np.copy(profile.get('mp', None)).astype('d') 124 | n = len(tmp) 125 | 126 | # TODO: this is based on STOMP standards when this motif finding algorithm 127 | # originally came out. Should we default this to 4.0 instead? That seems 128 | # to be the common value now per new research. 129 | window_size = profile.get('w', None) 130 | if exclusion_zone is None: 131 | exclusion_zone = profile.get('ez', None) 132 | 133 | # obtain indices in ascending order 134 | indices = np.argsort(tmp) 135 | 136 | # created flipped view for discords 137 | indices = indices[::-1] 138 | 139 | for idx in indices: 140 | if not np.isinf(tmp[idx]): 141 | found.append(idx) 142 | 143 | # apply exclusion zone 144 | if exclusion_zone > 0: 145 | exclusion_zone_start = np.max([0, idx - exclusion_zone]) 146 | exclusion_zone_end = np.min([n, idx + exclusion_zone]) 147 | tmp[exclusion_zone_start:exclusion_zone_end] = np.inf 148 | 149 | if len(found) >= k: 150 | break 151 | 152 | 153 | profile['discords'] = np.array(found, dtype='int') 154 | 155 | return profile 156 | 157 | 158 | def top_k_discords(profile, exclusion_zone=None, k=3): 159 | """ 160 | Find the top K number of discords (anomalies) given a mp or pmp, 161 | exclusion zone and the desired number of discords. The exclusion zone 162 | nullifies entries on the left and right side of the first and subsequent 163 | discords to remove non-trivial matches. More specifically, a discord found 164 | at location X will more than likely have additional discords to the left or 165 | right of it. 166 | 167 | Parameters 168 | ---------- 169 | profile : dict 170 | A MatrixProfile or Pan-MatrixProfile structure. 171 | exclusion_zone : int, Default mp algorithm ez 172 | Desired number of values to exclude on both sides of the anomaly. 173 | k : int 174 | Desired number of discords to find. 175 | 176 | Returns 177 | ------- 178 | dict : profile 179 | The original profile object with an additional 'discords' key. Take 180 | note that a MatrixProfile discord contains a single value while the 181 | Pan-MatrixProfile contains a row and column index. 182 | 183 | """ 184 | if not core.is_mp_or_pmp_obj(profile): 185 | raise ValueError('Expecting MP or PMP data structure!') 186 | 187 | cls = profile.get('class', None) 188 | func = None 189 | 190 | if cls == 'MatrixProfile': 191 | func = mp_top_k_discords 192 | elif cls == 'PMP': 193 | func = pmp_top_k_discords 194 | else: 195 | raise ValueError('Unsupported data structure!') 196 | 197 | return func( 198 | profile, 199 | exclusion_zone=exclusion_zone, 200 | k=k, 201 | ) 202 | -------------------------------------------------------------------------------- /matrixprofile/compute.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | from __future__ import unicode_literals 6 | 7 | range = getattr(__builtins__, 'xrange', range) 8 | # end of py2 compatability boilerplate 9 | 10 | # Python native imports 11 | import math 12 | import logging 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | # Third-party imports 17 | import numpy as np 18 | 19 | # Project imports 20 | from matrixprofile import core 21 | from matrixprofile.preprocess import preprocess 22 | from matrixprofile.preprocess import validate_preprocess_kwargs 23 | from matrixprofile.algorithms.mpx import mpx 24 | from matrixprofile.algorithms.scrimp import scrimp_plus_plus 25 | from matrixprofile.algorithms.skimp import skimp 26 | from matrixprofile.algorithms.skimp import maximum_subsequence 27 | 28 | 29 | def compute(ts, windows=None, query=None, sample_pct=1, threshold=0.98, 30 | n_jobs=1, preprocessing_kwargs = None): 31 | """ 32 | Computes the exact or approximate MatrixProfile based on the sample percent 33 | specified. Currently, MPX and SCRIMP++ is used for the exact and 34 | approximate algorithms respectively. When multiple windows are passed, the 35 | Pan-MatrixProfile is computed and returned. 36 | 37 | By default, only passing in a time series (ts), the Pan-MatrixProfile is 38 | computed based on the maximum upper window algorithm with a correlation 39 | threshold of 0.98. 40 | 41 | Notes 42 | ----- 43 | When multiple windows are passed and the Pan-MatrixProfile is computed, the 44 | query is ignored! 45 | 46 | Parameters 47 | ---------- 48 | ts : array_like 49 | The time series to analyze. 50 | windows : int, array_like 51 | The window(s) to compute the MatrixProfile. Note that it may be an int 52 | for a single matrix profile computation or an array of ints for 53 | computing the pan matrix profile. 54 | query : array_like, optional 55 | The query to analyze. Note that when computing the PMP the query is 56 | ignored! 57 | sample_pct : float, default 1 58 | A float between 0 and 1 representing how many samples to compute for 59 | the MP or PMP. When it is 1, the exact algorithm is used. 60 | threshold : float, default 0.98 61 | The correlation coefficient used as the threshold. It should be between 62 | 0 and 1. This is used to compute the upper window size when no 63 | window(s) is given. 64 | n_jobs : int, default = 1 65 | Number of cpu cores to use. 66 | preprocessing_kwargs : dict, default = None 67 | A dictionary object to sets parameters for preprocess function. 68 | A valid preprocessing_kwargs should have the following structure: 69 | 70 | >>> { 71 | >>> 'window': The window size to compute the mean/median/minimum/maximum value, 72 | >>> 'method': A string indicating the data imputation method, which should be 73 | >>> 'mean', 'median', 'min' or 'max', 74 | >>> 'direction': A string indicating the data imputation direction, which should be 75 | >>> 'forward', 'fwd', 'f', 'backward', 'bwd', 'b'. If the direction is 76 | >>> forward, we use previous data for imputation; if the direction is 77 | >>> backward, we use subsequent data for imputation., 78 | >>> 'add_noise': A boolean value indicating whether noise needs to be added into the 79 | >>> time series 80 | >>> } 81 | 82 | To disable preprocessing procedure, set the preprocessing_kwargs to 83 | None/False/""/{}. 84 | 85 | Returns 86 | ------- 87 | dict : profile 88 | The profile computed. 89 | 90 | """ 91 | result = None 92 | multiple_windows = core.is_array_like(windows) and len(windows) > 1 93 | no_windows = isinstance(windows, type(None)) 94 | has_threshold = isinstance(threshold, float) 95 | 96 | if no_windows and not has_threshold: 97 | raise ValueError('compute requires a threshold or window(s) to be set!') 98 | 99 | # Check to make sure all window sizes are greater than 3, return a ValueError if not. 100 | if (isinstance(windows, int) and windows < 4) or (multiple_windows and np.any(np.unique(windows) < 4)): 101 | raise ValueError('Compute requires all window sizes to be greater than 3!') 102 | 103 | if core.is_array_like(windows) and len(windows) == 1: 104 | windows = windows[0] 105 | 106 | # preprocess the time series 107 | preprocessing_kwargs = validate_preprocess_kwargs(preprocessing_kwargs) 108 | if preprocessing_kwargs: 109 | ts = preprocess(ts, 110 | window=preprocessing_kwargs['window'], 111 | impute_method=preprocessing_kwargs['impute_method'], 112 | impute_direction=preprocessing_kwargs['impute_direction'], 113 | add_noise=preprocessing_kwargs['add_noise']) 114 | 115 | # compute the upper window and pmp 116 | if no_windows and has_threshold: 117 | profile = maximum_subsequence(ts, threshold, include_pmp=True) 118 | 119 | # determine windows to be computed 120 | # from 8 in steps of 2 until upper w 121 | start = 8 122 | windows = range(start, profile['upper_window'] + 1) 123 | 124 | # compute the pmp 125 | result = skimp(ts, windows=windows, sample_pct=sample_pct, 126 | pmp_obj=profile) 127 | 128 | # compute the pmp 129 | elif multiple_windows: 130 | if core.is_array_like(query): 131 | logger.warn('Computing PMP - query is ignored!') 132 | 133 | result = skimp(ts, windows=windows, sample_pct=1, 134 | n_jobs=n_jobs) 135 | 136 | # compute exact mp 137 | elif sample_pct >= 1: 138 | result = mpx(ts, windows, query=query, n_jobs=n_jobs) 139 | 140 | # compute approximate mp 141 | else: 142 | result = scrimp_plus_plus(ts, windows, query=query, n_jobs=n_jobs, 143 | sample_pct=sample_pct) 144 | 145 | return result -------------------------------------------------------------------------------- /matrixprofile/cycore.pyx: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | from __future__ import unicode_literals 6 | 7 | # range = getattr(__builtins__, 'xrange', range) 8 | # end of py2 compatability boilerplate 9 | 10 | 11 | from libc.math cimport pow 12 | cdef extern from "math.h": 13 | double sqrt(double m) 14 | 15 | from numpy cimport ndarray 16 | cimport numpy as np 17 | cimport cython 18 | 19 | import numpy as np 20 | 21 | 22 | @cython.boundscheck(False) 23 | @cython.cdivision(True) 24 | def muinvn(double[:] a, unsigned int w): 25 | """ 26 | Computes the moving average and standard deviation over the provided 27 | array. This approach uses Welford's method. It leads to more precision 28 | in the results. 29 | 30 | Parameters 31 | ---------- 32 | a : array_like 33 | The array to compute statistics on. 34 | w : int 35 | The window size. 36 | 37 | Returns 38 | ------- 39 | (array_like, array_like) : 40 | The (mu, sigma) arrays respectively. 41 | 42 | """ 43 | cdef Py_ssize_t i, j 44 | cdef Py_ssize_t n = a.shape[0] 45 | cdef double p, s, x, z, c, a1, a2, a3, mu_a 46 | cdef Py_ssize_t profile_len = n - w + 1 47 | cdef double[:] h = np.empty(n, dtype='d') 48 | cdef double[:] r = np.empty(n, dtype='d') 49 | cdef double[:] mu = np.empty(profile_len, dtype='d') 50 | cdef double[:] sig = np.empty(profile_len, dtype='d') 51 | 52 | # compute moving mean 53 | p = a[0] 54 | s = 0 55 | for i in range(1, w): 56 | x = p + a[i] 57 | z = x - p 58 | s = s + ((p - (x - z)) + (a[i] - z)) 59 | p = x 60 | 61 | mu[0] = (p + s) / w 62 | for i in range(w, n): 63 | x = p - a[i - w + 1] 64 | z = x - p 65 | s = s + ((p - (x - z)) - (a[i - w] + z)) 66 | p = x 67 | 68 | x = p + a[i] 69 | z = x - p 70 | s = s + ((p - (x - z)) + (a[i] - z)) 71 | p = x 72 | 73 | mu[i - w + 1] = (p + s) / w 74 | 75 | # compute moving standard deviation 76 | for i in range(profile_len): 77 | for j in range(i, i + w): 78 | mu_a = a[j] - mu[i] 79 | h[j] = mu_a * mu_a 80 | 81 | c = (pow(2, 27) + 1) * mu_a 82 | a1 = (c - (c - mu_a)) 83 | a2 = (mu_a - a1) 84 | a3 = a1 * a2 85 | r[j] = a2 * a2 - (((h[j] - a1 * a1) - a3) - a3) 86 | 87 | p = h[i] 88 | s = r[i] 89 | for j in range(i + 1, i + w): 90 | x = p + h[j] 91 | z = x - p 92 | s = s + (((p - (x - z)) + (h[j] - z)) + r[j]) 93 | p = x 94 | 95 | if p + s == 0: 96 | sig[i] = 0 97 | else: 98 | sig[i] = 1 / sqrt(p + s) 99 | 100 | return (mu, sig) 101 | 102 | 103 | @cython.boundscheck(False) 104 | @cython.cdivision(True) 105 | def moving_avg_std(double[:] a, unsigned int w): 106 | """ 107 | Computes the moving average and standard deviation over the provided 108 | array. 109 | 110 | Parameters 111 | ---------- 112 | a : array_like 113 | The array to compute statistics on. 114 | w : int 115 | The window size. 116 | 117 | Returns 118 | ------- 119 | (array_like, array_like) : 120 | The (mu, sigma) arrays respectively. 121 | 122 | """ 123 | cdef Py_ssize_t i 124 | cdef Py_ssize_t n = a.shape[0] 125 | cdef Py_ssize_t ws = w 126 | cdef Py_ssize_t profile_len = n - w + 1 127 | cdef double[:] cumsum = np.empty(n, dtype='d') 128 | cdef double[:] sq_cumsum = np.empty(n, dtype='d') 129 | cdef double[:] sums = np.empty(profile_len, dtype='d') 130 | cdef double[:] sq_sums = np.empty(profile_len, dtype='d') 131 | cdef double[:] mu = np.empty(profile_len, dtype='d') 132 | cdef double[:] sig_sq = np.empty(profile_len, dtype='d') 133 | cdef double[:] sig = np.empty(profile_len, dtype='d') 134 | 135 | cumsum[0] = a[0] 136 | sq_cumsum[0] = a[0] * a[0] 137 | for i in range(1, n): 138 | cumsum[i] = a[i] + cumsum[i - 1] 139 | sq_cumsum[i] = a[i] * a[i] + sq_cumsum[i - 1] 140 | 141 | sums[0] = cumsum[w - 1] 142 | sq_sums[0] = sq_cumsum[w - 1] 143 | for i in range(n - w): 144 | sums[i + 1] = cumsum[w + i] - cumsum[i] 145 | sq_sums[i + 1] = sq_cumsum[w + i] - sq_cumsum[i] 146 | 147 | for i in range(profile_len): 148 | mu[i] = sums[i] / w 149 | sig_sq[i] = sq_sums[i] / w - mu[i] * mu[i] 150 | 151 | if sig_sq[i] < 0: 152 | sig[i] = 0 153 | else: 154 | sig[i] = sqrt(sig_sq[i]) 155 | 156 | return (mu, sig) -------------------------------------------------------------------------------- /matrixprofile/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from matrixprofile.datasets.datasets import load 2 | from matrixprofile.datasets.datasets import fetch_available -------------------------------------------------------------------------------- /matrixprofile/datasets/datasets.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | from __future__ import unicode_literals 6 | 7 | range = getattr(__builtins__, 'xrange', range) 8 | # end of py2 compatability boilerplate 9 | 10 | import csv 11 | import gzip 12 | import json 13 | import os 14 | 15 | # load urlretrieve for python2 and python3 16 | try: 17 | from urllib.request import urlretrieve 18 | except: 19 | from urllib import urlretrieve 20 | 21 | import numpy as np 22 | 23 | DATA_LISTING_URL = 'https://raw.githubusercontent.com/matrix-profile-foundation/mpf-datasets/master/listings.json' 24 | DATA_URL = 'https://raw.githubusercontent.com/matrix-profile-foundation/mpf-datasets/master/{}/{}' 25 | DATA_DIR = os.path.expanduser(os.path.join('~', '.mpf-datasets')) 26 | 27 | 28 | def create_dirs(path): 29 | """ 30 | Python 2 and 3 compatible function to make directories. Python 3 has the 31 | exist_ok option in makedirs, but Python 2 does not. 32 | 33 | Parameters 34 | ---------- 35 | path : str 36 | The path to create directories for. 37 | 38 | """ 39 | try: 40 | os.makedirs(path) 41 | except: 42 | pass 43 | 44 | if not os.path.exists(path): 45 | raise OSError('Unable to create path: {}'.format(path)) 46 | 47 | 48 | def fetch_available(category=None): 49 | """ 50 | Fetches the available datasets found in 51 | github.com/matrix-profile-foundation/mpf-datasets github repository. 52 | Providing a category filters the datasets. 53 | 54 | Parameters 55 | ---------- 56 | category : str, Optional 57 | The desired category to retrieve datasets by. 58 | 59 | Returns 60 | ------- 61 | list : 62 | A list of dictionaries containing details about each dataset. 63 | 64 | Raises 65 | ------ 66 | ValueError: 67 | When a category is provided, but is not found in the listing. 68 | 69 | """ 70 | # download the file and load it 71 | create_dirs(DATA_DIR) 72 | output_path = os.path.join(DATA_DIR, 'listings.json') 73 | result = urlretrieve(DATA_LISTING_URL, output_path) 74 | 75 | with open(output_path) as f: 76 | datasets = json.load(f) 77 | 78 | # filter with category 79 | if category: 80 | category_found = False 81 | filtered = [] 82 | 83 | for dataset in datasets: 84 | if dataset['category'] == category.lower(): 85 | filtered.append(dataset) 86 | category_found = True 87 | 88 | datasets = filtered 89 | if not category_found: 90 | raise ValueError('category {} is not a valid option.'.format(category)) 91 | 92 | return datasets 93 | 94 | 95 | def get_csv_indices(fp, is_gzip=False): 96 | """ 97 | Utility function to provide indices of the datetime dimension and the 98 | real valued dimensions. 99 | 100 | Parameters 101 | ---------- 102 | fp : str 103 | The filepath to load. 104 | is_gzip : boolean, Default False 105 | Flag to tell if the csv is gzipped. 106 | 107 | Returns 108 | ------- 109 | (dt_index, real_indices) : 110 | The datetime index and real valued indices. 111 | 112 | """ 113 | first_line = None 114 | if is_gzip: 115 | with gzip.open(fp, 'rt') as f: 116 | first_line = f.readline() 117 | else: 118 | with open(fp) as f: 119 | first_line = f.readline() 120 | 121 | dt_index = None 122 | real_indices = [] 123 | for index, label in enumerate(first_line.split(',')): 124 | if 'date' in label.lower() or 'time' in label.lower(): 125 | dt_index = index 126 | else: 127 | real_indices.append(index) 128 | 129 | return dt_index, real_indices 130 | 131 | 132 | def load(name): 133 | """ 134 | Loads a MPF dataset by base file name or file name. The match is case 135 | insensitive. 136 | 137 | Note 138 | ---- 139 | An internet connection is required to fetch the data. 140 | 141 | Returns 142 | ------- 143 | dict : 144 | The dataset and metadata. 145 | 146 | >>> { 147 | >>> 'name': The file name loaded, 148 | >>> 'category': The category the file came from, 149 | >>> 'description': A description, 150 | >>> 'data': The real valued data as an np.ndarray, 151 | >>> 'datetime': The datetime as an np.ndarray 152 | >>> } 153 | 154 | """ 155 | datasets = fetch_available() 156 | 157 | # find the filename in datasets matching either on filename provided or 158 | # the base name 159 | filename = None 160 | category = None 161 | description = None 162 | for dataset in datasets: 163 | base_name = dataset['name'].split('.')[0] 164 | 165 | if name.lower() == base_name or name.lower() == dataset['name']: 166 | filename = dataset['name'] 167 | category = dataset['category'] 168 | description = dataset['description'] 169 | 170 | if not filename: 171 | raise ValueError('Could not find dataset {}'.format(name)) 172 | 173 | # download the file 174 | output_dir = os.path.join(DATA_DIR, category) 175 | create_dirs(output_dir) 176 | output_path = os.path.join(output_dir, filename) 177 | 178 | if not os.path.exists(output_path): 179 | url = DATA_URL.format(category, filename) 180 | urlretrieve(url, output_path) 181 | 182 | # load the file based on type 183 | is_txt = filename.endswith('.txt') 184 | is_txt_gunzip = filename.endswith('.txt.gz') 185 | is_csv = filename.endswith('.csv') 186 | is_csv_gunzip = filename.endswith('.csv.gz') 187 | 188 | data = None 189 | dt_data = None 190 | if is_txt or is_txt_gunzip: 191 | data = np.loadtxt(output_path) 192 | elif is_csv or is_csv_gunzip: 193 | dt_index, real_indices = get_csv_indices( 194 | output_path, is_gzip=is_csv_gunzip) 195 | 196 | if isinstance(dt_index, int): 197 | dt_data = np.genfromtxt( 198 | output_path, 199 | dtype='datetime64', 200 | delimiter=',', 201 | skip_header=True, 202 | usecols=[dt_index,] 203 | ) 204 | 205 | data = np.genfromtxt( 206 | output_path, 207 | delimiter=',', 208 | dtype='float64', 209 | skip_header=True, 210 | usecols=real_indices 211 | ) 212 | 213 | return { 214 | 'name': filename, 215 | 'category': category, 216 | 'description': description, 217 | 'data': data, 218 | 'datetime': dt_data 219 | } 220 | -------------------------------------------------------------------------------- /matrixprofile/discover.py: -------------------------------------------------------------------------------- 1 | from matrixprofile.algorithms.top_k_discords import top_k_discords as discords 2 | from matrixprofile.algorithms.top_k_motifs import top_k_motifs as motifs 3 | from matrixprofile.algorithms.snippets import snippets 4 | from matrixprofile.algorithms.regimes import extract_regimes as regimes 5 | from matrixprofile.algorithms.statistics import statistics 6 | from matrixprofile.algorithms.hierarchical_clustering import ( 7 | hierarchical_clusters 8 | ) -------------------------------------------------------------------------------- /matrixprofile/exceptions.py: -------------------------------------------------------------------------------- 1 | 2 | class NoSolutionPossible(Exception): 3 | """A simple class used to explicitly let a user know that a solution is not 4 | possible given the current inputs. 5 | """ 6 | pass 7 | 8 | -------------------------------------------------------------------------------- /matrixprofile/io/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'to_disk', 3 | 'from_disk', 4 | 'to_json', 5 | 'from_json', 6 | 'to_mpf', 7 | 'from_mpf', 8 | ] 9 | 10 | from matrixprofile.io.__io import to_disk 11 | from matrixprofile.io.__io import from_disk 12 | from matrixprofile.io.__io import to_json 13 | from matrixprofile.io.__io import from_json 14 | from matrixprofile.io.protobuf.protobuf_utils import ( 15 | to_mpf, 16 | from_mpf 17 | ) -------------------------------------------------------------------------------- /matrixprofile/io/__io.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | from __future__ import unicode_literals 6 | 7 | range = getattr(__builtins__, 'xrange', range) 8 | # end of py2 compatability boilerplate 9 | 10 | __all__ = [ 11 | 'to_json', 12 | 'from_json', 13 | 'to_disk', 14 | 'from_disk', 15 | ] 16 | 17 | import json.tool 18 | 19 | import numpy as np 20 | 21 | from matrixprofile import core 22 | from matrixprofile.io.protobuf.protobuf_utils import ( 23 | to_mpf, 24 | from_mpf 25 | ) 26 | 27 | 28 | # Supported file extensions 29 | SUPPORTED_EXTS = set([ 30 | 'json', 31 | 'mpf', 32 | ]) 33 | 34 | # Supported file formats 35 | SUPPORTED_FORMATS = set([ 36 | 'json', 37 | 'mpf', 38 | ]) 39 | 40 | def JSONSerializer(obj): 41 | """ 42 | Default JSON serializer to write numpy arays and other non-supported 43 | data types. 44 | 45 | Borrowed from: 46 | https://stackoverflow.com/a/52604722 47 | """ 48 | if type(obj).__module__ == np.__name__: 49 | if isinstance(obj, np.ndarray): 50 | return obj.tolist() 51 | else: 52 | return obj.item() 53 | 54 | raise TypeError('Unknown type:', type(obj)) 55 | 56 | 57 | def from_json(profile): 58 | """ 59 | Converts a JSON formatted string into a profile data structure. 60 | 61 | Parameters 62 | ---------- 63 | profile : str 64 | The profile as a JSON formatted string. 65 | 66 | Returns 67 | ------- 68 | profile : dict_like 69 | A MatrixProfile or Pan-MatrixProfile data structure. 70 | """ 71 | dct = json.load(profile) 72 | 73 | # handle pmp and convert to appropriate types 74 | if core.is_pmp_obj(dct): 75 | dct['pmp'] = np.array(dct['pmp'], dtype='float64') 76 | dct['pmpi'] = np.array(dct['pmpi'], dtype=int) 77 | dct['data']['ts'] = np.array(dct['data']['ts'], dtype='float64') 78 | dct['windows'] = np.array(dct['windows'], dtype=int) 79 | 80 | # handle mp 81 | elif core.is_mp_obj(dct): 82 | dct['mp'] = np.array(dct['mp'], dtype='float64') 83 | dct['pi'] = np.array(dct['pi'], dtype=int) 84 | 85 | has_l = isinstance(dct['lmp'], list) 86 | has_l = has_l and isinstance(dct['lpi'], list) 87 | 88 | if has_l: 89 | dct['lmp'] = np.array(dct['lmp'], dtype='float64') 90 | dct['lpi'] = np.array(dct['lpi'], dtype=int) 91 | 92 | has_r = isinstance(dct['rmp'], list) 93 | has_r = has_r and isinstance(dct['rpi'], list) 94 | 95 | if has_r: 96 | dct['rmp'] = np.array(dct['rmp'], dtype='float64') 97 | dct['rpi'] = np.array(dct['rpi'], dtype=int) 98 | 99 | dct['data']['ts'] = np.array(dct['data']['ts'], dtype='float64') 100 | 101 | if isinstance(dct['data']['query'], list): 102 | dct['data']['query'] = np.array(dct['data']['query'], dtype='float64') 103 | else: 104 | raise ValueError('File is not of type profile!') 105 | 106 | return dct 107 | 108 | 109 | def to_json(profile): 110 | """ 111 | Converts a given profile object into JSON format. 112 | 113 | Parameters 114 | ---------- 115 | profile : dict_like 116 | A MatrixProfile or Pan-MatrixProfile data structure. 117 | 118 | Returns 119 | ------- 120 | str : 121 | The profile as a JSON formatted string. 122 | """ 123 | if not core.is_mp_or_pmp_obj(profile): 124 | raise ValueError('profile is expected to be of type MatrixProfile or PMP') 125 | 126 | return json.dumps(profile, default=JSONSerializer) 127 | 128 | 129 | def add_extension_to_path(file_path, extension): 130 | """ 131 | Utility function to add the file extension when it is not provided by the 132 | user in the file path. 133 | 134 | Parameters 135 | ---------- 136 | file_path : str 137 | The file path. 138 | 139 | Returns 140 | ------- 141 | str : 142 | The file path with the extension appended. 143 | str : 144 | The file format extension. 145 | """ 146 | end = '.{}'.format(extension) 147 | if not file_path.endswith(end): 148 | file_path = '{}{}'.format(file_path, end) 149 | 150 | return file_path 151 | 152 | 153 | def infer_file_format(file_path): 154 | """ 155 | Attempts to determine the file type based on the extension. The extension 156 | is assumed to be the last dot suffix. 157 | 158 | Parameters 159 | ---------- 160 | file_path : str 161 | The file path to infer the file format of. 162 | 163 | Returns 164 | ------- 165 | str : 166 | A label described the file extension. 167 | """ 168 | pieces = file_path.split('.') 169 | extension = pieces[-1].lower() 170 | 171 | if extension not in SUPPORTED_EXTS: 172 | raise RuntimeError('Unsupported file type with extension {}'.format(extension)) 173 | 174 | return extension 175 | 176 | 177 | def to_disk(profile, file_path, format='json'): 178 | """ 179 | Writes a profile object of type MatrixProfile or PMP to disk as a JSON 180 | formatted file by default. 181 | 182 | Note 183 | ---- 184 | The JSON format is human readable where as the mpf format is binary and 185 | cannot be read when opened in a text editor. When the file path does not 186 | include the extension, it is appended for you. 187 | 188 | Parameters 189 | ---------- 190 | profile : dict_like 191 | A MatrixProfile or Pan-MatrixProfile data structure. 192 | file_path : str 193 | The path to write the file to. 194 | format : str, default json 195 | The format of the file to be written. Options include json, mpf 196 | """ 197 | if not core.is_mp_or_pmp_obj(profile): 198 | raise ValueError('profile is expected to be of type MatrixProfile or PMP') 199 | 200 | if format not in SUPPORTED_FORMATS: 201 | raise ValueError('Unsupported file format {} given.'.format(format)) 202 | 203 | file_path = add_extension_to_path(file_path, format) 204 | 205 | if format == 'json': 206 | with open(file_path, 'w') as out: 207 | out.write(to_json(profile)) 208 | elif format == 'mpf': 209 | with open(file_path, 'wb') as out: 210 | out.write(to_mpf(profile)) 211 | 212 | 213 | def from_disk(file_path, format='infer'): 214 | """ 215 | Reads a profile object of type MatrixProfile or PMP from disk into the 216 | respective object type. By default the type is inferred by the file 217 | extension. 218 | 219 | Parameters 220 | ---------- 221 | file_path : str 222 | The path to read the file from. 223 | format : str, default infer 224 | The file format type to read from disk. Options include: 225 | infer, json, mpf 226 | 227 | Returns 228 | ------- 229 | profile : dict_like, None 230 | A MatrixProfile or Pan-MatrixProfile data structure. 231 | """ 232 | if format != 'infer': 233 | if format not in SUPPORTED_FORMATS: 234 | raise ValueError('format supplied {} is not supported'.format(format)) 235 | else: 236 | format = infer_file_format(file_path) 237 | 238 | profile = None 239 | if format == 'json': 240 | with open(file_path) as f: 241 | profile = from_json(f) 242 | elif format == 'mpf': 243 | with open(file_path, 'rb') as f: 244 | profile = from_mpf(f.read()) 245 | 246 | return profile -------------------------------------------------------------------------------- /matrixprofile/io/protobuf/README.md: -------------------------------------------------------------------------------- 1 | To update the messages module, you must run: 2 | 3 | ``` 4 | protoc proto_messages.proto --python_out=. 5 | ``` 6 | 7 | This regenerates the "proto_messages_pb2.py" module. It should **NEVER** be edited directly. Also note that this requires you to have the protobuf compiler installed on your system. 8 | 9 | https://github.com/protocolbuffers/protobuf -------------------------------------------------------------------------------- /matrixprofile/io/protobuf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matrix-profile-foundation/matrixprofile/6fbd5fe2fd0e93162ef77c4da1b30188072dd404/matrixprofile/io/protobuf/__init__.py -------------------------------------------------------------------------------- /matrixprofile/io/protobuf/proto_messages.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package mpf; 4 | 5 | // Workaround to store a 2D array courtesy of: 6 | // https://stackoverflow.com/a/15179330 7 | 8 | // NOTE: with numpy you can simply flatten and reshape the array 9 | message DoubleMatrix { 10 | uint64 rows = 1; 11 | uint64 cols = 2; 12 | repeated double data = 3 [packed=true]; 13 | } 14 | 15 | message IntMatrix { 16 | uint64 rows = 1; 17 | uint64 cols = 2; 18 | repeated int64 data = 3 [packed=true]; 19 | } 20 | 21 | // Data structure to specify a location in a matrix (row, col) 22 | message Location { 23 | uint64 row = 1; 24 | uint64 col = 2; 25 | } 26 | 27 | // Store Motif for any dimensions 28 | message Motif { 29 | repeated Location motifs = 1; 30 | repeated Location neighbors = 2; 31 | } 32 | 33 | // Store Matrix Profile specific attributes 34 | message MatrixProfile { 35 | // matrix profile distances 36 | DoubleMatrix mp = 1; 37 | 38 | // matrix profile indices 39 | IntMatrix pi = 2; 40 | 41 | // left matrix profile distances 42 | DoubleMatrix lmp = 3; 43 | 44 | // left matrix profile indices 45 | IntMatrix lpi = 4; 46 | 47 | // right matrix profile distances 48 | DoubleMatrix rmp = 5; 49 | 50 | // right matrix profile indices 51 | IntMatrix rpi = 6; 52 | 53 | // exclusion zone used to calculate the matrix profile 54 | uint64 ez = 9; 55 | 56 | // boolean to determine if a similarity join compuation happened 57 | bool join = 10; 58 | } 59 | 60 | // Store Pan-MatrixProfile specific attributes 61 | message PanMatrixProfile { 62 | // Pan-MatrixProfile distances 63 | DoubleMatrix pmp = 1; 64 | 65 | // Pan-MatrixProfile indices 66 | IntMatrix pmpi = 2; 67 | } 68 | 69 | message MPFOutput { 70 | // the class of the data structure written (MatrixProfile or PMP) 71 | string klass = 1; 72 | 73 | // the algorithm used to compute the MP/PMP 74 | string algorithm = 2; 75 | 76 | // the distance metric stored in the MP/PMP 77 | string metric = 3; 78 | 79 | // the percetage of samples computed for the MP/PMP 80 | double sample_pct = 4; 81 | 82 | // the window(s) used in the computation 83 | repeated uint64 windows = 5 [packed=true]; 84 | 85 | // the time series - original data 86 | DoubleMatrix ts = 6; 87 | 88 | // optionally the query when provided - original data 89 | DoubleMatrix query = 7; 90 | 91 | // the matrix profile envelope 92 | MatrixProfile mp = 8; 93 | 94 | // the pan-matrixprofile envelope 95 | PanMatrixProfile pmp = 9; 96 | 97 | // the motifs for the MP/PMP 98 | repeated Motif motifs = 10; 99 | 100 | // the discords for the MP/PMP 101 | repeated Location discords = 11; 102 | 103 | // the corrected matrix profile for the MP 104 | DoubleMatrix cmp = 12; 105 | 106 | // the AV used to compute the CMP for the MP 107 | DoubleMatrix av = 13; 108 | 109 | // the type of AV applied to the MP 110 | string av_type = 14; 111 | } 112 | -------------------------------------------------------------------------------- /matrixprofile/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | from __future__ import unicode_literals 6 | 7 | range = getattr(__builtins__, 'xrange', range) 8 | # end of py2 compatability boilerplate 9 | 10 | import numpy as np 11 | 12 | from matrixprofile import core 13 | 14 | 15 | def empty_mp(): 16 | """ 17 | Utility function that provides an empty MatrixProfile data structure. 18 | 19 | Returns 20 | ------- 21 | dict : profile 22 | An empty MatrixProfile data structure. 23 | 24 | """ 25 | return { 26 | 'mp': None, 27 | 'pi': None, 28 | 'rmp': None, 29 | 'rpi': None, 30 | 'lmp': None, 31 | 'lpi': None, 32 | 'metric': None, 33 | 'w': None, 34 | 'ez': None, 35 | 'join': None, 36 | 'data': { 37 | 'ts': None, 38 | 'query': None 39 | }, 40 | 'class': 'MatrixProfile', 41 | 'algorithm': None 42 | } 43 | 44 | 45 | def pick_mp(profile, window): 46 | """ 47 | Utility function that extracts a MatrixProfile from a Pan-MatrixProfile 48 | placing it into the MatrixProfile data structure. 49 | 50 | Parameters 51 | ---------- 52 | profile : dict 53 | A Pan-MatrixProfile data structure. 54 | window : int 55 | The specific window size used to compute the desired MatrixProfile. 56 | 57 | Returns 58 | ------- 59 | dict : profile 60 | A MatrixProfile data structure. 61 | 62 | Raises 63 | ------ 64 | ValueError 65 | If profile is not a Pan-MatrixProfile data structure. 66 | If window is not an integer. 67 | If desired MatrixProfile is not found based on window. 68 | 69 | """ 70 | 71 | if not core.is_pmp_obj(profile): 72 | raise ValueError('pluck_mp expects profile as a PMP data structure!') 73 | 74 | if not isinstance(window, int): 75 | raise ValueError('pluck_mp expects window to be an int!') 76 | 77 | mp_profile = empty_mp() 78 | 79 | # find the window index 80 | windows = profile.get('windows') 81 | window_index = np.argwhere(windows == window) 82 | 83 | if len(window_index) < 1: 84 | raise RuntimeError('Unable to find window {} in the provided PMP!'.format(window)) 85 | 86 | window_index = window_index.flatten()[0] 87 | 88 | window = windows[window_index] 89 | mp = profile['pmp'][window_index] 90 | n = len(mp) 91 | mp_profile['mp'] = mp[0:n-window+1] 92 | mp_profile['pi'] = profile['pmpi'][window_index][0:n-window+1] 93 | mp_profile['metric'] = profile['metric'] 94 | mp_profile['data']['ts'] = profile['data']['ts'] 95 | mp_profile['join'] = False 96 | mp_profile['w'] = int(window) 97 | mp_profile['ez'] = int(np.floor(windows[window_index] / 4)) 98 | mp_profile['algorithm'] = 'mpx' 99 | 100 | return mp_profile 101 | -------------------------------------------------------------------------------- /python2-requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.16.2 2 | matplotlib 3 | pytest>=3.5.1 4 | setuptools>=39.1.0 5 | cython>=0.x 6 | protobuf>=3.11.2,<4.0.0 7 | scipy<2.0.0 8 | sphinx 9 | nbsphinx 10 | sphinx_rtd_theme 11 | wheel -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.16.2 2 | matplotlib>=3.0.3 3 | pytest>=3.5.1 4 | setuptools>=39.1.0 5 | cython>=0.x 6 | protobuf>=3.11.2,<4.0.0 7 | scipy>=1.3.2,<2.0.0 8 | sphinx 9 | nbsphinx 10 | sphinx_rtd_theme 11 | wheel -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | license_file = LICENSE 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | from setuptools import dist 4 | dist.Distribution().fetch_build_eggs(['cython>=0.x', 'numpy>=1.16.2', 'wheel']) 5 | 6 | from distutils.extension import Extension 7 | 8 | from Cython.Build import cythonize 9 | import numpy 10 | 11 | import os, sys 12 | from glob import glob 13 | 14 | DIR_PATH = os.path.dirname(os.path.abspath(__file__)) 15 | sys.path.append(DIR_PATH) 16 | 17 | import version 18 | 19 | SOURCE_URL = 'https://github.com/matrix-profile-foundation/matrixprofile' 20 | README = os.path.join(DIR_PATH, 'README.rst') 21 | 22 | # manual list of files to be compiled 23 | extensions = [] 24 | extensions.append(Extension( 25 | 'matrixprofile.algorithms.cympx', 26 | ['matrixprofile/algorithms/cympx.pyx'], 27 | extra_compile_args = ["-O2", "-fopenmp" ], 28 | extra_link_args = ['-fopenmp'], 29 | include_dirs=[numpy.get_include()], 30 | )) 31 | 32 | extensions.append(Extension( 33 | 'matrixprofile.cycore', 34 | ['matrixprofile/cycore.pyx'], 35 | extra_compile_args = ["-O2",], 36 | include_dirs=[numpy.get_include()], 37 | )) 38 | 39 | matplot = 'matplotlib>=3.0.3' 40 | scipy = 'scipy>=1.3.2,<2.0.0' 41 | if sys.version_info.major == 3: 42 | with open(README, 'r', encoding='utf-8') as fh: 43 | long_description = fh.read() 44 | elif sys.version_info.major == 2: 45 | matplot = 'matplotlib' 46 | scipy = 'scipy<2.0.0' 47 | with open(README, 'r') as fh: 48 | long_description = fh.read() 49 | 50 | # copy version file over 51 | with open(os.path.join(DIR_PATH, 'version.py')) as fh: 52 | with open(os.path.join(DIR_PATH, 'matrixprofile', 'version.py'), 'w') as out: 53 | out.write(fh.read()) 54 | 55 | setuptools.setup( 56 | name="matrixprofile", 57 | version=version.__version__, 58 | author="Matrix Profile Foundation", 59 | author_email="tylerwmarrs@gmail.com", 60 | description="An open source time series data mining library based on Matrix Profile algorithms.", 61 | long_description=long_description, 62 | long_description_content_type="text/x-rst", 63 | url=SOURCE_URL, 64 | project_urls={ 65 | 'Matrix Profile Foundation': 'https://matrixprofile.org', 66 | 'Source Code': SOURCE_URL, 67 | }, 68 | include_package_data=True, 69 | packages = setuptools.find_packages(), 70 | setup_requires=['cython>=0.x', 'wheel'], 71 | install_requires=['numpy>=1.16.2', matplot, 'protobuf>=3.11.2,<4.0.0', scipy], 72 | ext_modules=cythonize(extensions), 73 | include_dirs=[numpy.get_include()], 74 | classifiers=[ 75 | "Programming Language :: Python :: 2", 76 | "Programming Language :: Python :: 3", 77 | "License :: OSI Approved :: Apache Software License", 78 | "Intended Audience :: Science/Research", 79 | "Intended Audience :: Information Technology", 80 | "Intended Audience :: Developers", 81 | "Operating System :: MacOS", 82 | "Operating System :: Microsoft :: Windows", 83 | "Operating System :: POSIX", 84 | "Topic :: Software Development", 85 | "Topic :: Scientific/Engineering", 86 | ], 87 | keywords="matrix profile time series discord motif analysis data science anomaly pattern", 88 | ) 89 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matrix-profile-foundation/matrixprofile/6fbd5fe2fd0e93162ef77c4da1b30188072dd404/tests/__init__.py -------------------------------------------------------------------------------- /tests/mpx_ab_mpb.txt: -------------------------------------------------------------------------------- 1 | 0 2 | 0 3 | 0 4 | 0 5 | 0 6 | 0 7 | 0 8 | 0 9 | 0 10 | 0 11 | 0 12 | 0 13 | 0 14 | 0 15 | 0 16 | 0 17 | 1.1921e-07 18 | 1.1921e-07 19 | 0 20 | 1.46e-07 21 | 1.1921e-07 22 | 1.1921e-07 23 | 1.46e-07 24 | 1.1921e-07 25 | 2.0648e-07 26 | 1.8849e-07 27 | 2.2302e-07 28 | 2.3842e-07 29 | 2.0648e-07 30 | 1.6859e-07 31 | 2.3842e-07 32 | 1.1921e-07 33 | 2.0648e-07 34 | 1.8849e-07 35 | 2.3842e-07 36 | 2.5288e-07 37 | 2.7957e-07 38 | 2.3842e-07 39 | 2.5288e-07 40 | 2.6656e-07 41 | 2.3842e-07 42 | 2.2302e-07 43 | 2.3842e-07 44 | 2.5288e-07 45 | 2.5288e-07 46 | 2.6656e-07 47 | 2.7957e-07 48 | 2.6656e-07 49 | 2.2302e-07 50 | 2.3842e-07 51 | 1.46e-07 52 | 1.6859e-07 53 | 2.0648e-07 54 | 2.0648e-07 55 | 2.2302e-07 56 | 2.3842e-07 57 | 2.3842e-07 58 | 2.6656e-07 59 | 2.0648e-07 60 | 1.6859e-07 61 | 2.0648e-07 62 | 2.0648e-07 63 | 2.0648e-07 64 | 2.2302e-07 65 | 2.2302e-07 66 | 2.0648e-07 67 | 2.5288e-07 68 | 2.2302e-07 69 | 2.5288e-07 70 | 2.0648e-07 71 | -------------------------------------------------------------------------------- /tests/mpx_ab_mpia.txt: -------------------------------------------------------------------------------- 1 | 47 2 | 48 3 | 49 4 | 50 5 | 51 6 | 52 7 | 53 8 | 54 9 | 55 10 | 56 11 | 57 12 | 58 13 | 59 14 | 60 15 | 61 16 | 62 17 | 63 18 | 64 19 | 65 20 | 66 21 | 67 22 | 68 23 | 69 24 | 70 25 | 31 26 | 32 27 | 33 28 | 34 29 | 35 30 | 36 31 | 37 32 | 38 33 | 39 34 | 40 35 | 61 36 | 62 37 | 63 38 | 64 39 | 65 40 | 66 41 | 27 42 | 28 43 | 29 44 | 30 45 | 31 46 | 32 47 | 33 48 | 34 49 | 35 50 | 36 51 | 37 52 | 38 53 | 59 54 | 60 55 | 61 56 | 62 57 | 43 58 | 44 59 | 45 60 | 46 61 | 47 62 | 48 63 | 49 64 | 50 65 | 51 66 | 52 67 | 53 68 | 54 69 | 55 70 | 56 71 | 57 72 | 58 73 | 59 74 | 60 75 | 61 76 | 62 77 | 63 78 | 64 79 | 65 80 | 66 81 | 67 82 | 68 83 | 69 84 | 70 85 | 31 86 | 32 87 | 33 88 | 34 89 | 35 90 | 36 91 | 37 92 | 38 93 | 39 94 | 40 95 | 41 96 | 42 97 | 43 98 | 44 99 | 45 100 | 46 101 | 27 102 | 28 103 | 29 104 | 30 105 | 31 106 | 32 107 | 33 108 | 34 109 | 35 110 | 36 111 | 37 112 | 38 113 | 39 114 | 40 115 | 41 116 | 42 117 | 43 118 | 44 119 | 45 120 | 46 121 | 47 122 | 48 123 | 49 124 | 50 125 | 51 126 | 32 127 | 53 128 | 34 129 | 35 130 | 36 131 | 37 132 | 38 133 | 39 134 | 40 135 | 41 136 | 42 137 | 43 138 | 44 139 | 45 140 | 46 141 | 47 142 | 48 143 | 49 144 | 50 145 | 51 146 | 52 147 | 53 148 | 54 149 | 55 150 | 56 151 | 57 152 | 58 153 | 59 154 | 60 155 | 61 156 | 62 157 | 63 158 | 64 159 | 65 160 | 66 161 | 67 162 | 68 163 | 69 164 | 70 165 | 31 166 | 32 167 | 33 168 | 34 169 | 55 170 | 36 171 | 57 172 | 58 173 | 59 174 | 60 175 | 61 176 | 62 177 | 63 178 | 64 179 | 65 180 | 66 181 | 67 182 | 68 183 | 69 184 | 70 185 | 70 186 | 52 187 | 53 188 | 54 189 | 55 190 | 56 191 | 57 192 | 58 193 | 59 194 | 60 195 | 61 196 | 61 197 | 62 198 | 1 199 | 1 200 | 1 201 | 2 202 | 3 203 | 4 204 | 5 205 | 6 206 | 7 207 | 8 208 | 9 209 | 10 210 | 11 211 | 12 212 | 13 213 | 14 214 | 15 215 | 16 216 | 17 217 | 18 218 | 19 219 | 20 220 | 21 221 | 22 222 | 23 223 | 24 224 | 25 225 | 26 226 | 27 227 | 28 228 | 29 229 | 30 230 | 31 231 | 32 232 | 33 233 | 34 234 | 35 235 | 36 236 | 37 237 | 38 238 | 39 239 | 40 240 | 41 241 | 42 242 | 43 243 | 44 244 | 45 245 | 46 246 | 47 247 | 48 248 | 49 249 | 50 250 | 51 251 | 52 252 | 53 253 | 54 254 | 55 255 | 56 256 | 57 257 | 58 258 | 59 259 | 60 260 | 61 261 | 62 262 | 63 263 | 64 264 | 65 265 | 66 266 | 67 267 | 68 268 | 69 269 | 70 270 | 31 271 | 32 272 | 33 273 | 34 274 | 35 275 | 36 276 | 37 277 | 38 278 | 59 279 | 40 280 | 61 281 | 62 282 | 63 283 | 64 284 | 65 285 | 66 286 | 67 287 | 68 288 | 69 289 | 70 290 | 31 291 | 32 292 | 33 293 | 34 294 | 55 295 | 36 296 | 57 297 | 58 298 | 59 299 | 60 300 | 61 301 | 62 302 | 63 303 | 64 304 | 65 305 | 66 306 | 67 307 | 68 308 | 69 309 | 70 310 | 31 311 | 32 312 | 33 313 | 34 314 | 55 315 | 56 316 | 57 317 | 38 318 | 39 319 | 40 320 | 61 321 | 62 322 | 63 323 | 64 324 | 65 325 | 66 326 | 67 327 | 68 328 | 69 329 | 70 330 | 31 331 | 32 332 | 33 333 | 34 334 | 35 335 | 36 336 | 37 337 | 38 338 | 39 339 | 40 340 | 61 341 | 62 342 | 63 343 | 64 344 | 65 345 | 66 346 | 67 347 | 68 348 | 69 349 | 70 350 | 31 351 | 32 352 | 33 353 | 34 354 | 55 355 | 56 356 | 57 357 | 58 358 | 59 359 | 60 360 | 61 361 | 62 362 | 63 363 | 64 364 | 65 365 | 66 366 | 27 367 | 28 368 | 29 369 | 30 370 | 31 371 | 32 372 | 33 373 | 34 374 | 35 375 | 36 376 | 37 377 | 38 378 | 39 379 | 40 380 | 41 381 | 42 382 | 43 383 | 44 384 | 45 385 | 46 386 | 27 387 | 28 388 | 29 389 | 30 390 | 31 391 | 32 392 | 33 393 | 34 394 | 35 395 | 36 396 | 37 397 | 38 398 | 59 399 | 60 400 | 61 401 | 62 402 | 63 403 | 64 404 | 65 405 | 66 406 | 27 407 | 28 408 | 29 409 | 69 410 | 70 411 | 31 412 | 52 413 | 53 414 | 55 415 | 56 416 | 57 417 | 58 418 | 59 419 | 60 420 | 61 421 | 61 422 | 62 423 | 1 424 | 1 425 | 1 426 | 6 427 | 55 428 | 8 429 | 9 430 | 1 431 | 11 432 | 12 433 | 59 434 | 60 435 | 41 436 | 42 437 | 43 438 | 25 439 | 25 440 | 46 441 | 47 442 | 48 443 | 49 444 | 50 445 | 51 446 | 2 447 | 3 448 | 4 449 | 5 450 | 6 451 | 40 452 | 40 453 | 1 454 | 2 455 | 2 456 | 3 457 | 4 458 | 5 459 | 6 460 | 7 461 | 50 462 | 32 463 | 33 464 | 34 465 | 2 466 | 3 467 | 4 468 | 5 469 | 39 470 | 7 471 | 1 472 | 1 473 | 10 474 | 11 475 | 1 476 | 6 477 | 7 478 | 8 479 | 9 480 | 10 481 | 12 482 | 13 483 | 13 484 | 14 485 | 15 486 | 16 487 | 17 488 | 18 489 | 19 490 | 20 491 | 21 492 | 22 493 | 24 494 | 25 495 | 43 496 | 42 497 | 62 498 | 42 499 | 43 500 | 43 501 | 44 502 | 45 503 | 46 504 | 46 505 | 47 506 | 47 507 | 68 508 | 49 509 | 53 510 | 54 511 | 55 512 | 56 513 | 56 514 | 57 515 | 57 516 | 58 517 | 58 518 | 59 519 | 60 520 | 5 521 | 6 522 | 7 523 | 8 524 | 9 525 | 10 526 | 11 527 | 12 528 | 13 529 | 14 530 | 15 531 | 16 532 | 17 533 | 17 534 | 18 535 | 19 536 | 20 537 | 21 538 | 22 539 | 23 540 | 24 541 | 25 542 | 26 543 | 28 544 | 29 545 | 69 546 | 70 547 | 31 548 | 32 549 | 52 550 | 53 551 | 54 552 | 36 553 | 2 554 | 38 555 | 58 556 | 5 557 | 6 558 | 61 559 | 62 560 | 63 561 | 1 562 | 23 563 | 25 564 | 25 565 | 46 566 | 47 567 | 26 568 | 49 569 | 50 570 | 1 571 | 2 572 | 3 573 | 4 574 | 5 575 | 6 576 | 7 577 | 8 578 | 1 579 | 1 580 | 2 581 | 3 582 | 4 583 | 5 584 | 6 585 | 7 586 | 1 587 | 9 588 | 1 589 | 5 590 | 1 591 | 35 592 | 8 593 | 56 594 | 1 595 | 58 596 | 1 597 | 1 598 | 7 599 | 8 600 | 1 601 | 13 602 | 9 603 | 10 604 | 11 605 | 10 606 | 14 607 | 14 608 | 15 609 | 16 610 | 17 611 | 19 612 | 19 613 | 21 614 | 21 615 | 22 616 | 24 617 | 25 618 | 65 619 | 66 620 | 24 621 | 22 622 | 2 623 | 42 624 | 43 625 | 43 626 | 44 627 | 45 628 | 46 629 | 46 630 | 47 631 | 47 632 | 68 633 | 3 634 | 4 635 | 54 636 | 55 637 | 56 638 | 56 639 | 57 640 | 57 641 | 58 642 | 58 643 | 59 644 | 60 645 | 5 646 | 6 647 | 7 648 | 8 649 | 9 650 | 10 651 | 11 652 | 12 653 | 13 654 | 14 655 | 15 656 | 16 657 | 17 658 | 17 659 | 18 660 | 19 661 | 20 662 | 21 663 | 22 664 | 23 665 | 24 666 | 25 667 | 26 668 | 28 669 | 29 670 | 69 671 | 70 672 | 31 673 | 32 674 | 52 675 | 53 676 | 54 677 | 55 678 | 8 679 | 9 680 | 56 681 | 57 682 | 58 683 | 59 684 | 40 685 | 41 686 | 42 687 | 21 688 | 25 689 | 23 690 | 46 691 | 47 692 | 26 693 | 47 694 | 50 695 | 51 696 | 2 697 | 3 698 | 4 699 | 5 700 | 6 701 | 39 702 | 58 703 | 41 704 | 60 705 | 2 706 | 3 707 | 4 708 | 5 709 | 6 710 | 7 711 | 8 712 | 9 713 | 33 714 | 34 715 | 1 716 | 35 717 | 8 718 | 37 719 | 39 720 | 58 721 | 41 722 | 1 723 | 5 724 | 6 725 | 7 726 | 8 727 | 9 728 | 10 729 | 11 730 | 12 731 | 13 732 | 14 733 | 15 734 | 15 735 | 17 736 | 18 737 | 1 738 | 19 739 | 20 740 | 21 741 | 5 742 | 6 743 | 7 744 | 8 745 | 9 746 | 10 747 | 11 748 | 12 749 | 13 750 | 14 751 | 15 752 | 16 753 | 17 754 | 18 755 | 19 756 | 20 757 | 21 758 | 22 759 | 23 760 | 44 761 | 46 762 | 27 763 | 28 764 | 29 765 | 30 766 | 31 767 | 32 768 | 53 769 | 55 770 | 56 771 | 57 772 | 58 773 | 59 774 | 60 775 | 62 776 | 63 777 | 64 778 | 46 779 | 13 780 | 14 781 | 16 782 | 17 783 | 18 784 | 19 785 | 20 786 | 21 787 | 22 788 | 23 789 | 24 790 | 25 791 | 45 792 | 46 793 | 47 794 | 48 795 | 49 796 | 17 797 | 18 798 | 19 799 | 20 800 | 21 801 | 22 802 | 23 803 | 24 804 | 43 805 | 64 806 | 46 807 | 47 808 | 48 809 | 30 810 | 31 811 | 33 812 | 34 813 | 35 814 | 36 815 | 37 816 | 59 817 | 60 818 | 62 819 | 63 820 | 45 821 | 46 822 | 47 823 | 15 824 | 16 825 | 17 826 | 18 827 | 19 828 | 20 829 | 21 830 | 22 831 | 23 832 | 24 833 | 25 834 | 45 835 | 46 836 | 47 837 | 49 838 | 30 839 | 52 840 | 17 841 | 18 842 | 20 843 | 21 844 | 21 845 | -------------------------------------------------------------------------------- /tests/mpx_ab_mpib.txt: -------------------------------------------------------------------------------- 1 | 200 2 | 201 3 | 202 4 | 203 5 | 204 6 | 205 7 | 206 8 | 207 9 | 208 10 | 209 11 | 210 12 | 211 13 | 212 14 | 213 15 | 214 16 | 215 17 | 216 18 | 217 19 | 218 20 | 219 21 | 220 22 | 221 23 | 222 24 | 223 25 | 224 26 | 225 27 | 226 28 | 227 29 | 228 30 | 229 31 | 230 32 | 231 33 | 232 34 | 233 35 | 234 36 | 235 37 | 236 38 | 237 39 | 238 40 | 239 41 | 240 42 | 241 43 | 242 44 | 243 45 | 244 46 | 245 47 | 246 48 | 247 49 | 248 50 | 249 51 | 250 52 | 251 53 | 252 54 | 253 55 | 254 56 | 255 57 | 256 58 | 257 59 | 258 60 | 259 61 | 260 62 | 261 63 | 262 64 | 263 65 | 264 66 | 265 67 | 266 68 | 267 69 | 268 70 | 269 71 | -------------------------------------------------------------------------------- /tests/test_analyze.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import os 12 | 13 | import pytest 14 | 15 | import numpy as np 16 | 17 | from matrixprofile import analyze 18 | 19 | import matrixprofile 20 | MODULE_PATH = matrixprofile.__path__[0] 21 | 22 | 23 | def test_analyze_mp_exact_no_query(): 24 | ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) 25 | m = 32 26 | 27 | profile, figures = analyze(ts, windows=m) 28 | assert(profile['algorithm'] == 'mpx') 29 | assert(profile['w'] == 32) 30 | assert(profile['data']['query'] == None) 31 | assert(profile['join'] == False) 32 | assert(profile['sample_pct'] == 1) 33 | assert(profile['class'] == 'MatrixProfile') 34 | assert('motifs' in profile) 35 | assert('discords' in profile) 36 | assert(len(figures) == 4) 37 | 38 | 39 | def test_analyze_mp_exact_with_query(): 40 | ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) 41 | query = ts[100:200] 42 | m = 32 43 | 44 | profile, figures = analyze(ts, windows=m, query=query) 45 | assert(profile['algorithm'] == 'mpx') 46 | assert(profile['w'] == 32) 47 | np.testing.assert_equal(profile['data']['query'], query) 48 | assert(profile['join'] == True) 49 | assert(profile['sample_pct'] == 1) 50 | assert(profile['class'] == 'MatrixProfile') 51 | assert('motifs' in profile) 52 | assert('discords' in profile) 53 | assert(len(figures) == 4) 54 | 55 | 56 | def test_analyze_mp_approximate(): 57 | ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) 58 | m = 32 59 | 60 | profile, figures = analyze(ts, windows=m, sample_pct=0.5) 61 | assert(profile['algorithm'] == 'scrimp++') 62 | assert(profile['w'] == 32) 63 | assert(profile['data']['query'] == None) 64 | assert(profile['join'] == False) 65 | assert(profile['sample_pct'] == 0.5) 66 | assert(profile['class'] == 'MatrixProfile') 67 | assert(len(figures) == 4) 68 | 69 | 70 | def test_analyze_pmp_no_sample_pct(): 71 | ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) 72 | 73 | profile, figures = analyze(ts) 74 | assert(profile['algorithm'] == 'skimp') 75 | assert(profile['class'] == 'PMP') 76 | assert(profile['sample_pct'] == 1) 77 | assert(len(figures) == 6) 78 | 79 | 80 | def test_analyze_pmp_sample_pct(): 81 | ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) 82 | 83 | profile, figures = analyze(ts, sample_pct=0.1) 84 | assert(profile['algorithm'] == 'skimp') 85 | assert(profile['class'] == 'PMP') 86 | assert(profile['sample_pct'] == 0.1) 87 | assert(len(figures) == 6) 88 | 89 | 90 | def test_analyze_pmp_windows(): 91 | ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) 92 | windows = np.arange(8, 32) 93 | 94 | profile, figures = analyze(ts, windows=windows, sample_pct=1) 95 | assert(profile['algorithm'] == 'skimp') 96 | assert(profile['class'] == 'PMP') 97 | assert(profile['sample_pct'] == 1) 98 | np.testing.assert_equal(profile['windows'], windows) 99 | assert(len(figures) == 6) 100 | 101 | 102 | def test_preprocess(): 103 | ts = np.array([2, 3, 2, 3, 1, 2, 3, 4, 2, np.nan, np.inf, 4, 2, 3, 4, 5, 104 | 6, 7, 8, 3, 4, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, np.nan, np.nan, 105 | np.inf, np.nan, np.inf, np.nan, np.inf, np.nan, np.inf]) 106 | m = 6 107 | preprocessing_kwargs = { 108 | 'window': 5, 109 | 'impute_method': 'median', 110 | 'impute_direction': 'backward', 111 | 'add_noise': False 112 | } 113 | 114 | result = analyze(ts, windows=m, preprocessing_kwargs=preprocessing_kwargs) 115 | preprocessed_ts = result[0]['data']['ts'] 116 | assert (np.any(np.isnan(preprocessed_ts)) == False) 117 | assert (np.any(np.isinf(preprocessed_ts)) == False) 118 | 119 | # if preprocessing_kwargs=None, we disable the preprocessing procedure. 120 | result = analyze(ts, windows=m, preprocessing_kwargs=None) 121 | unprocessed_ts = result[0]['data']['ts'] 122 | assert (np.any(np.isnan(unprocessed_ts)) == True) 123 | assert (np.any(np.isinf(unprocessed_ts)) == True) 124 | 125 | # check if preprocessing_kwargs is None by default. 126 | result = analyze(ts, windows=m) 127 | unprocessed_ts = result[0]['data']['ts'] 128 | assert(np.any(np.isnan(unprocessed_ts)) == True) 129 | assert(np.any(np.isinf(unprocessed_ts)) == True) 130 | 131 | with pytest.raises(ValueError) as excinfo: 132 | analyze(ts, windows=m, preprocessing_kwargs=1) 133 | assert "The parameter 'preprocessing_kwargs' is not dict like!" \ 134 | in str(excinfo.value) 135 | 136 | with pytest.raises(ValueError) as excinfo: 137 | preprocessing_kwargs = { 138 | 'win': 5, 139 | 'impute_dir': 'backward', 140 | } 141 | analyze(ts, windows=m, preprocessing_kwargs=preprocessing_kwargs) 142 | assert "invalid key(s) for preprocessing_kwargs! valid key(s) should include " \ 143 | "{'impute_direction', 'add_noise', 'impute_method', 'window'}" \ 144 | in str(excinfo.value) -------------------------------------------------------------------------------- /tests/test_compute.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import os 12 | 13 | import pytest 14 | 15 | import numpy as np 16 | 17 | from matrixprofile import compute 18 | 19 | import matrixprofile 20 | MODULE_PATH = matrixprofile.__path__[0] 21 | 22 | 23 | def test_compute_mp_exact_no_query(): 24 | ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) 25 | m = 32 26 | 27 | profile = compute(ts, windows=m) 28 | assert(profile['algorithm'] == 'mpx') 29 | assert(profile['w'] == 32) 30 | assert(profile['data']['query'] == None) 31 | assert(profile['join'] == False) 32 | assert(profile['sample_pct'] == 1) 33 | assert(profile['class'] == 'MatrixProfile') 34 | 35 | 36 | def test_compute_mp_exact_with_query(): 37 | ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) 38 | query = ts[100:200] 39 | m = 32 40 | 41 | profile = compute(ts, windows=m, query=query) 42 | assert(profile['algorithm'] == 'mpx') 43 | assert(profile['w'] == 32) 44 | np.testing.assert_equal(profile['data']['query'], query) 45 | assert(profile['join'] == True) 46 | assert(profile['sample_pct'] == 1) 47 | assert(profile['class'] == 'MatrixProfile') 48 | 49 | 50 | def test_compute_mp_approximate(): 51 | ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) 52 | m = 32 53 | 54 | profile = compute(ts, windows=m, sample_pct=0.5) 55 | assert(profile['algorithm'] == 'scrimp++') 56 | assert(profile['w'] == 32) 57 | assert(profile['data']['query'] == None) 58 | assert(profile['join'] == False) 59 | assert(profile['sample_pct'] == 0.5) 60 | assert(profile['class'] == 'MatrixProfile') 61 | 62 | 63 | def test_compute_pmp_no_sample_pct_windows(): 64 | ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) 65 | windows = np.arange(8, 32) 66 | 67 | profile = compute(ts, windows=windows) 68 | assert(profile['algorithm'] == 'skimp') 69 | assert(profile['class'] == 'PMP') 70 | assert(profile['sample_pct'] == 1) 71 | np.testing.assert_equal(profile['windows'], windows) 72 | 73 | 74 | def test_compute_pmp_sample_pct_windows(): 75 | ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) 76 | windows = np.arange(8, 32) 77 | 78 | profile = compute(ts, windows=windows, sample_pct=1) 79 | assert(profile['algorithm'] == 'skimp') 80 | assert(profile['class'] == 'PMP') 81 | assert(profile['sample_pct'] == 1) 82 | np.testing.assert_equal(profile['windows'], windows) 83 | 84 | 85 | def test_compute_pmp_no_windows(): 86 | ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) 87 | 88 | profile = compute(ts) 89 | assert(profile['algorithm'] == 'skimp') 90 | assert(profile['class'] == 'PMP') 91 | 92 | # sample pct is ignored when windows are provided and defaults to 1 93 | assert(profile['sample_pct'] == 1) 94 | 95 | 96 | def test_compute_pmp_no_windows_sample_pct(): 97 | ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) 98 | 99 | profile = compute(ts, sample_pct=0.1) 100 | assert(profile['algorithm'] == 'skimp') 101 | assert(profile['class'] == 'PMP') 102 | 103 | # sample pct is ignored when windows are provided and defaults to 1 104 | assert(profile['sample_pct'] == 0.1) 105 | 106 | 107 | def test_compute_mp_invalid_windows(): 108 | ts = [3., 3., 3., 3., 3., 3., 3., 3.] 109 | 110 | with pytest.raises(ValueError) as excinfo: 111 | w = 0 112 | compute(ts, windows=w) 113 | assert 'Compute requires all window sizes to be greater than 3!' \ 114 | in str(excinfo.value) 115 | 116 | with pytest.raises(ValueError) as excinfo: 117 | w = 3 118 | compute(ts, windows=w) 119 | assert 'Compute requires all window sizes to be greater than 3!' \ 120 | in str(excinfo.value) 121 | 122 | with pytest.raises(ValueError) as excinfo: 123 | w = [4, 0] 124 | compute(ts, windows=w) 125 | assert 'Compute requires all window sizes to be greater than 3!' \ 126 | in str(excinfo.value) 127 | 128 | with pytest.raises(ValueError) as excinfo: 129 | w = [4, 3] 130 | compute(ts, windows=w) 131 | assert 'Compute requires all window sizes to be greater than 3!' \ 132 | in str(excinfo.value) 133 | 134 | 135 | def test_preprocess(): 136 | ts = np.array([np.nan, np.inf, np.inf, np.nan, np.inf, 2, 3, 2, 3, 1, 2, 3, 4, 2, 137 | np.nan, np.inf, 4, 2, 3, 4, 5, 6, 7, 8, 3, 4, 2, 3, 4, 5, 6, 7, 6, 138 | 5, 4, 3, np.nan, np.nan, np.inf, np.nan, np.inf, np.nan]) 139 | m = 6 140 | preprocessing_kwargs = { 141 | 'window': 5, 142 | 'impute_method': 'median', 143 | 'impute_direction': 'backward', 144 | 'add_noise': False 145 | } 146 | 147 | profile = compute(ts, windows=m, preprocessing_kwargs=preprocessing_kwargs) 148 | preprocessed_ts = profile['data']['ts'] 149 | assert(np.any(np.isnan(preprocessed_ts)) == False) 150 | assert(np.any(np.isinf(preprocessed_ts)) == False) 151 | 152 | # if preprocessing_kwargs=None, we disable the preprocessing procedure. 153 | profile = compute(ts, windows=m, preprocessing_kwargs=None) 154 | unprocessed_ts = profile['data']['ts'] 155 | assert(np.any(np.isnan(unprocessed_ts)) == True) 156 | assert(np.any(np.isinf(unprocessed_ts)) == True) 157 | 158 | # check if preprocessing_kwargs is None by default. 159 | profile = compute(ts, windows=m) 160 | unprocessed_ts = profile['data']['ts'] 161 | assert(np.any(np.isnan(unprocessed_ts)) == True) 162 | assert(np.any(np.isinf(unprocessed_ts)) == True) 163 | 164 | with pytest.raises(ValueError) as excinfo: 165 | compute(ts, windows=m, preprocessing_kwargs=1) 166 | assert "The parameter 'preprocessing_kwargs' is not dict like!" \ 167 | in str(excinfo.value) 168 | 169 | with pytest.raises(ValueError) as excinfo: 170 | preprocessing_kwargs = { 171 | 'win': 5, 172 | 'impute_dir': 'backward', 173 | } 174 | compute(ts, windows=m, preprocessing_kwargs=preprocessing_kwargs) 175 | assert "invalid key(s) for preprocessing_kwargs! valid key(s) should include " \ 176 | "{'impute_direction', 'add_noise', 'impute_method', 'window'}" \ 177 | in str(excinfo.value) -------------------------------------------------------------------------------- /tests/test_cycore.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | """Tests for `mass_ts` package.""" 12 | 13 | import os 14 | 15 | import pytest 16 | 17 | import numpy as np 18 | 19 | from matrixprofile import cycore 20 | import matrixprofile 21 | 22 | MODULE_PATH = matrixprofile.__path__[0] 23 | 24 | 25 | def test_moving_avg_std(): 26 | a = np.array([1, 2, 3, 4, 5, 6], dtype='d') 27 | mu, std = cycore.moving_avg_std(a, 3) 28 | mu_desired = np.array([2., 3., 4., 5.]) 29 | std_desired = np.array([0.81649658, 0.81649658, 0.81649658, 0.81649658]) 30 | 31 | np.testing.assert_almost_equal(mu, mu_desired) 32 | np.testing.assert_almost_equal(std, std_desired) 33 | 34 | 35 | def test_it_should_not_produce_nan_values_when_std_is_almost_zero(): 36 | a = np.array([10.1, 10.1, 10.1, 10.1, 10.1, 10.1, 10.1], dtype='d') 37 | mu, std = cycore.moving_avg_std(a, 3) 38 | mu_muinvn, std_muinvn = cycore.muinvn(a, 3) 39 | 40 | mu_desired = np.array([10.1, 10.1, 10.1, 10.1, 10.1]) 41 | std_desired = np.array([0, 0, 0, 0, 0]) 42 | 43 | np.testing.assert_almost_equal(mu, mu_desired) 44 | np.testing.assert_almost_equal(std, std_desired) 45 | 46 | np.testing.assert_almost_equal(mu_muinvn, mu_desired) 47 | np.testing.assert_almost_equal(std_muinvn, std_desired) 48 | 49 | 50 | def test_moving_muinvn(): 51 | a = np.array([1, 2, 3, 4, 5, 6], dtype='d') 52 | mu, std = cycore.muinvn(a, 3) 53 | mu_desired = np.array([2., 3., 4., 5.]) 54 | std_desired = np.array([0.7071068, 0.7071068, 0.7071068, 0.7071068]) 55 | 56 | np.testing.assert_almost_equal(mu, mu_desired) 57 | np.testing.assert_almost_equal(std, std_desired) 58 | 59 | 60 | def test_muinvn_vs_matlab(): 61 | ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) 62 | w = 32 63 | 64 | ml_mu = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'muinvn_mua.txt')) 65 | ml_std = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'muinvn_stda.txt')) 66 | 67 | mu, std = cycore.muinvn(ts, w) 68 | 69 | np.testing.assert_almost_equal(ml_mu, mu, decimal=4) 70 | np.testing.assert_almost_equal(ml_std, std, decimal=4) -------------------------------------------------------------------------------- /tests/test_datasets.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import pytest 12 | 13 | import numpy as np 14 | 15 | from matrixprofile.datasets.datasets import load 16 | from matrixprofile.datasets.datasets import fetch_available 17 | 18 | 19 | def test_load_valid(): 20 | dataset = load('motifs-discords-small') 21 | assert(isinstance(dataset['data'], np.ndarray) == True) 22 | assert('description' in dataset) 23 | assert('name' in dataset) 24 | assert('category' in dataset) 25 | 26 | 27 | def test_load_not_found(): 28 | with pytest.raises(ValueError) as excinfo: 29 | data = load('alksdfasdf') 30 | assert('Could not find dataset alksdfasdf' in str(excinfo.value)) 31 | 32 | 33 | def test_fetch_available_all(): 34 | datasets = fetch_available() 35 | assert(isinstance(datasets, list) == True) 36 | assert(len(datasets) > 0) 37 | 38 | 39 | def test_fetch_available_category_valid(): 40 | datasets = fetch_available(category='real') 41 | assert(isinstance(datasets, list) == True) 42 | assert(len(datasets) > 0) 43 | 44 | 45 | def test_fetch_available_category_invalid(): 46 | with pytest.raises(ValueError) as excinfo: 47 | fetch_available('alksdsfldfsd') 48 | assert('category alksdsfldfsd is not a valid option.' in str(excinfo.value)) -------------------------------------------------------------------------------- /tests/test_hierarchical_clustering.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import os 12 | 13 | import pytest 14 | 15 | import numpy as np 16 | 17 | from matrixprofile.algorithms.hierarchical_clustering import ( 18 | hierarchical_clusters 19 | ) 20 | 21 | def test_hierarchical_clusters_valid_simple(): 22 | np.random.seed(9999) 23 | ts = np.random.uniform(size=2**10) 24 | ts2 = np.random.uniform(size=2**10) 25 | ts3 = np.random.uniform(size=2**10) 26 | X = [ 27 | ts, 28 | ts, 29 | ts2, 30 | ts2, 31 | ts3 32 | ] 33 | w = 2**6 34 | t = 2 35 | 36 | clusters = hierarchical_clusters(X, w, t) 37 | 38 | # evaluate keys 39 | expected_keys = set([ 40 | 'pairwise_distances', 41 | 'linkage_matrix', 42 | 'inconsistency_statistics', 43 | 'assignments', 44 | 'cophenet', 45 | 'cophenet_distances', 46 | 'class' 47 | ]) 48 | actual_keys = set(clusters.keys()) 49 | assert(expected_keys == actual_keys) 50 | assert(clusters['class'] == 'hclusters') 51 | 52 | # evaluate cluster assignments 53 | expected_assignments = np.array([1, 1, 2, 2, 3]) 54 | np.testing.assert_equal(clusters['assignments'], expected_assignments) 55 | 56 | # evaluate cophenet score 57 | expected_cophenet = 0.9999870997174531 58 | np.testing.assert_almost_equal(clusters['cophenet'], expected_cophenet) 59 | 60 | # evaluate pairwise distances 61 | expected_distances = np.array([0, 8.2299501, 8.2299501, 8.29915377, 62 | 8.2299501, 8.2299501, 8.29915377, 0, 8.2558308, 8.2558308]) 63 | np.testing.assert_almost_equal( 64 | clusters['pairwise_distances'], expected_distances) 65 | 66 | 67 | def test_hierarchical_clusters_valid_simple_parallel(): 68 | np.random.seed(9999) 69 | ts = np.random.uniform(size=2**10) 70 | ts2 = np.random.uniform(size=2**10) 71 | ts3 = np.random.uniform(size=2**10) 72 | X = [ 73 | ts, 74 | ts, 75 | ts2, 76 | ts2, 77 | ts3 78 | ] 79 | w = 2**6 80 | t = 2 81 | 82 | clusters = hierarchical_clusters(X, w, t, n_jobs=2) 83 | 84 | # evaluate keys 85 | expected_keys = set([ 86 | 'pairwise_distances', 87 | 'linkage_matrix', 88 | 'inconsistency_statistics', 89 | 'assignments', 90 | 'cophenet', 91 | 'cophenet_distances', 92 | 'class' 93 | ]) 94 | actual_keys = set(clusters.keys()) 95 | assert(expected_keys == actual_keys) 96 | assert(clusters['class'] == 'hclusters') 97 | 98 | # evaluate cluster assignments 99 | expected_assignments = np.array([1, 1, 2, 2, 3]) 100 | np.testing.assert_equal(clusters['assignments'], expected_assignments) 101 | 102 | # evaluate cophenet score 103 | expected_cophenet = 0.9999870997174531 104 | np.testing.assert_almost_equal(clusters['cophenet'], expected_cophenet) 105 | 106 | # evaluate pairwise distances 107 | expected_distances = np.array([0, 8.2299501, 8.2299501, 8.29915377, 108 | 8.2299501, 8.2299501, 8.29915377, 0, 8.2558308, 8.2558308]) 109 | np.testing.assert_almost_equal( 110 | clusters['pairwise_distances'], expected_distances) 111 | 112 | 113 | def test_hierarchical_clusters_invalid_params(): 114 | np.random.seed(9999) 115 | ts = np.random.uniform(size=2**10) 116 | ts2 = np.random.uniform(size=2**10) 117 | ts3 = np.random.uniform(size=2**10) 118 | X = [ 119 | ts, 120 | ts, 121 | ts2, 122 | ts2, 123 | ts3 124 | ] 125 | w = 2**6 126 | t = 2 127 | 128 | # invalid X 129 | with pytest.raises(ValueError) as excinfo: 130 | clusters = hierarchical_clusters('', w, t) 131 | assert('X must be array_like!' == str(excinfo.value)) 132 | 133 | # invalid t 134 | with pytest.raises(ValueError) as excinfo: 135 | clusters = hierarchical_clusters(X, w, '') 136 | assert('t must be a scalar (int or float)' == str(excinfo.value)) 137 | 138 | # invalid threshold 0 139 | error = 'threshold must be a float greater than 0 and less than 1' 140 | with pytest.raises(ValueError) as excinfo: 141 | clusters = hierarchical_clusters(X, w, t, threshold=0) 142 | assert(error == str(excinfo.value)) 143 | 144 | # invalid threshold 1 145 | with pytest.raises(ValueError) as excinfo: 146 | clusters = hierarchical_clusters(X, w, t, threshold=1) 147 | assert(error == str(excinfo.value)) 148 | 149 | # invalid threshold not numeric 150 | with pytest.raises(ValueError) as excinfo: 151 | clusters = hierarchical_clusters(X, w, t, threshold='') 152 | assert(error == str(excinfo.value)) 153 | 154 | # invalid depth < 1 155 | error = 'depth must be an integer greater than 0' 156 | with pytest.raises(ValueError) as excinfo: 157 | clusters = hierarchical_clusters(X, w, t, depth=0) 158 | assert(error == str(excinfo.value)) 159 | 160 | # invalid depth not int 161 | with pytest.raises(ValueError) as excinfo: 162 | clusters = hierarchical_clusters(X, w, t, depth='') 163 | assert(error == str(excinfo.value)) 164 | 165 | # invalid method 166 | with pytest.raises(ValueError) as excinfo: 167 | clusters = hierarchical_clusters(X, w, t, method='') 168 | assert('method may be only one of' in str(excinfo.value)) 169 | 170 | # invalid criterion 171 | with pytest.raises(ValueError) as excinfo: 172 | clusters = hierarchical_clusters(X, w, t, criterion='') 173 | assert('criterion may be only one of' in str(excinfo.value)) 174 | -------------------------------------------------------------------------------- /tests/test_io.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import os 12 | import tempfile 13 | 14 | import numpy as np 15 | 16 | import matrixprofile as mp 17 | 18 | 19 | def test_disk_to_json_and_from_json_mp(): 20 | ts = np.random.uniform(size=1024) 21 | w = 32 22 | 23 | profile = mp.algorithms.mpx(ts, w) 24 | out = os.path.join(tempfile.gettempdir(), 'mp.json') 25 | mp.io.to_disk(profile, out) 26 | 27 | dprofile = mp.io.from_disk(out) 28 | 29 | keys = set(profile.keys()) 30 | keysb = set(dprofile.keys()) 31 | 32 | assert(keys == keysb) 33 | 34 | # check values same 35 | for k, v in profile.items(): 36 | if isinstance(v, np.ndarray): 37 | np.testing.assert_equal(v, dprofile[k]) 38 | elif k == 'data': 39 | pass 40 | else: 41 | assert(v == dprofile[k]) 42 | 43 | np.testing.assert_equal(profile['data']['ts'], dprofile['data']['ts']) 44 | np.testing.assert_equal(profile['data']['query'], dprofile['data']['query']) 45 | 46 | 47 | def test_disk_to_json_and_from_json_pmp(): 48 | ts = np.random.uniform(size=1024) 49 | 50 | profile = mp.algorithms.skimp(ts) 51 | out = os.path.join(tempfile.gettempdir(), 'pmp.json') 52 | mp.io.to_disk(profile, out) 53 | 54 | dprofile = mp.io.from_disk(out) 55 | 56 | keys = set(profile.keys()) 57 | keysb = set(dprofile.keys()) 58 | 59 | assert(keys == keysb) 60 | 61 | # check values same 62 | for k, v in profile.items(): 63 | if isinstance(v, np.ndarray): 64 | np.testing.assert_equal(v, dprofile[k]) 65 | elif k == 'data': 66 | pass 67 | else: 68 | assert(v == dprofile[k]) 69 | 70 | np.testing.assert_equal(profile['data']['ts'], dprofile['data']['ts']) 71 | 72 | 73 | def test_disk_to_mpf_and_from_mpf_mp(): 74 | ts = np.random.uniform(size=1024) 75 | w = 32 76 | 77 | profile = mp.algorithms.mpx(ts, w) 78 | out = os.path.join(tempfile.gettempdir(), 'mp.mpf') 79 | mp.io.to_disk(profile, out, format='mpf') 80 | 81 | dprofile = mp.io.from_disk(out) 82 | 83 | keys = set(profile.keys()) 84 | keysb = set(dprofile.keys()) 85 | 86 | assert(keys == keysb) 87 | 88 | # check values same 89 | for k, v in profile.items(): 90 | if isinstance(v, np.ndarray): 91 | np.testing.assert_equal(v, dprofile[k]) 92 | elif k == 'data': 93 | pass 94 | else: 95 | assert(v == dprofile[k]) 96 | 97 | np.testing.assert_equal(profile['data']['ts'], dprofile['data']['ts']) 98 | np.testing.assert_equal(profile['data']['query'], dprofile['data']['query']) 99 | 100 | 101 | def test_disk_to_mpf_and_from_mpf_pmp(): 102 | ts = np.random.uniform(size=1024) 103 | 104 | profile = mp.algorithms.skimp(ts) 105 | out = os.path.join(tempfile.gettempdir(), 'pmp.mpf') 106 | mp.io.to_disk(profile, out, format='mpf') 107 | 108 | dprofile = mp.io.from_disk(out) 109 | 110 | keys = set(profile.keys()) 111 | keysb = set(dprofile.keys()) 112 | 113 | assert(keys == keysb) 114 | 115 | # check values same 116 | for k, v in profile.items(): 117 | if isinstance(v, np.ndarray): 118 | np.testing.assert_equal(v, dprofile[k]) 119 | elif k == 'data': 120 | pass 121 | else: 122 | assert(v == dprofile[k]) 123 | 124 | np.testing.assert_equal(profile['data']['ts'], dprofile['data']['ts']) -------------------------------------------------------------------------------- /tests/test_mass2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import os 12 | 13 | import pytest 14 | 15 | import numpy as np 16 | 17 | from matrixprofile.algorithms.mass2 import mass2 18 | 19 | def test_mass2(): 20 | ts = np.array([1, 1, 1, 2, 1, 1, 4, 5]) 21 | query = np.array([2, 1, 1, 4]) 22 | actual = mass2(ts, query) 23 | desired = np.array([ 24 | 0.67640791-1.37044402e-16j, 25 | 3.43092352+0.00000000e+00j, 26 | 3.43092352+1.02889035e-17j, 27 | 0.+0.00000000e+00j, 28 | 1.85113597+1.21452707e-17j 29 | ]) 30 | 31 | np.testing.assert_almost_equal(actual, desired) -------------------------------------------------------------------------------- /tests/test_mpdist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import os 12 | 13 | import pytest 14 | 15 | import numpy as np 16 | 17 | from matrixprofile.algorithms.mpdist import mpdist 18 | import matrixprofile 19 | 20 | MODULE_PATH = matrixprofile.__path__[0] 21 | 22 | 23 | def test_small_series_single_threaded(): 24 | ts = np.array([ 25 | 1, 2, 3, 1, 2, 3, 4, 5, 6, 0, 0, 1, 26 | 1, 2, 2, 4, 5, 1, 1, 9 27 | ]).astype('d') 28 | query = np.array([ 29 | 0.23595094, 0.9865171, 0.1934413, 0.60880883, 30 | 0.55174926, 0.77139988, 0.33529215, 0.63215848 31 | ]).astype('d') 32 | w = 4 33 | 34 | desired = 0.437690617625298 35 | actual = mpdist(ts, query, w, n_jobs=1) 36 | 37 | np.testing.assert_almost_equal(actual, desired) 38 | 39 | 40 | def test_small_series_multi_threaded(): 41 | ts = np.array([ 42 | 1, 2, 3, 1, 2, 3, 4, 5, 6, 0, 0, 1, 43 | 1, 2, 2, 4, 5, 1, 1, 9 44 | ]).astype('d') 45 | query = np.array([ 46 | 0.23595094, 0.9865171, 0.1934413, 0.60880883, 47 | 0.55174926, 0.77139988, 0.33529215, 0.63215848 48 | ]).astype('d') 49 | w = 4 50 | 51 | desired = 0.437690617625298 52 | actual = mpdist(ts, query, w, n_jobs=-1) 53 | 54 | np.testing.assert_almost_equal(actual, desired) 55 | 56 | 57 | def test_against_matlab(): 58 | ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) 59 | tsb = ts[199:300] 60 | w = 32 61 | 62 | desired = 1.460009659995543e-07 63 | actual = mpdist(ts, tsb, w, n_jobs=1) 64 | 65 | np.testing.assert_almost_equal(actual, desired) 66 | 67 | 68 | def test_against_matlab_parallel(): 69 | ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) 70 | tsb = ts[199:300] 71 | w = 32 72 | 73 | desired = 1.460009659995543e-07 74 | actual = mpdist(ts, tsb, w, n_jobs=-1) 75 | 76 | np.testing.assert_almost_equal(actual, desired) 77 | 78 | 79 | def test_ts_not_one_dimensional(): 80 | ts = np.array([[1, 1], [2, 2]]) 81 | tsb = np.arange(10) 82 | w = 32 83 | 84 | with pytest.raises(ValueError) as excinfo: 85 | mpdist(ts, tsb, w) 86 | assert('ts must be one dimensional!' == str(excinfo.value)) 87 | 88 | 89 | def test_tsb_not_one_dimensional(): 90 | tsb = np.array([[1, 1], [2, 2]]) 91 | ts = np.arange(10) 92 | w = 32 93 | 94 | with pytest.raises(ValueError) as excinfo: 95 | mpdist(ts, tsb, w) 96 | assert('ts_b must be one dimensional!' == str(excinfo.value)) 97 | 98 | 99 | def test_invalid_threshold(): 100 | ts = np.arange(100) 101 | tsb = np.arange(100) 102 | w = 32 103 | threshold = -1 104 | error = 'threshold must be a float greater than 0 and less than 1' 105 | 106 | with pytest.raises(ValueError) as excinfo: 107 | mpdist(ts, tsb, w, threshold=threshold) 108 | assert(error == str(excinfo.value)) 109 | 110 | threshold = 'str' 111 | with pytest.raises(ValueError) as excinfo: 112 | mpdist(ts, tsb, w, threshold=threshold) 113 | assert(error == str(excinfo.value)) 114 | 115 | threshold = 1 116 | with pytest.raises(ValueError) as excinfo: 117 | mpdist(ts, tsb, w, threshold=threshold) 118 | assert(error == str(excinfo.value)) 119 | -------------------------------------------------------------------------------- /tests/test_mpx.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import os 12 | 13 | import pytest 14 | 15 | import numpy as np 16 | 17 | from matrixprofile.algorithms.mpx import mpx 18 | from matrixprofile.algorithms.cympx import mpx_ab_parallel 19 | import matrixprofile 20 | 21 | MODULE_PATH = matrixprofile.__path__[0] 22 | 23 | 24 | def test_mpx_small_series_self_join_euclidean_single_threaded(): 25 | ts = np.array([0, 1, 1, 1, 0, 0, 2, 1, 0, 0, 2, 1]) 26 | w = 4 27 | desired = np.array([1.9550, 1.9550, 0.8739, 0, 0, 1.9550, 0.8739, 0, 0]) 28 | desired_pi = np.array([4, 5, 6, 7, 8, 1, 2, 3, 4]) 29 | 30 | profile = mpx(ts, w, cross_correlation=False, n_jobs=1) 31 | np.testing.assert_almost_equal(profile['mp'], desired, decimal=4) 32 | np.testing.assert_almost_equal(profile['pi'], desired_pi) 33 | 34 | 35 | def test_mpx_small_series_self_join_euclidean_multi_threaded(): 36 | ts = np.array([0, 1, 1, 1, 0, 0, 2, 1, 0, 0, 2, 1]) 37 | w = 4 38 | desired = np.array([1.9550, 1.9550, 0.8739, 0, 0, 1.9550, 0.8739, 0, 0]) 39 | desired_pi = np.array([4, 5, 6, 7, 8, 1, 2, 3, 4]) 40 | 41 | profile = mpx(ts, w, cross_correlation=False, n_jobs=-1) 42 | np.testing.assert_almost_equal(profile['mp'], desired, decimal=4) 43 | np.testing.assert_almost_equal(profile['pi'], desired_pi) 44 | 45 | 46 | def test_mpx_small_series_self_join_pearson_single_threaded(): 47 | ts = np.array([0, 1, 1, 1, 0, 0, 2, 1, 0, 0, 2, 1]) 48 | w = 4 49 | desired = np.array([0.522232967867094, 0.522232967867094, 0.904534033733291, 1, 1, 0.522232967867094, 0.904534033733291, 1, 1]) 50 | desired_pi = np.array([4, 5, 6, 7, 8, 1, 2, 3, 4]) 51 | 52 | profile = mpx(ts, w, cross_correlation=True, n_jobs=1) 53 | np.testing.assert_almost_equal(profile['mp'], desired, decimal=4) 54 | np.testing.assert_almost_equal(profile['pi'], desired_pi) 55 | 56 | 57 | def test_mpx_small_series_self_join_pearson_multi_threaded(): 58 | ts = np.array([0, 1, 1, 1, 0, 0, 2, 1, 0, 0, 2, 1]) 59 | w = 4 60 | desired = np.array([0.522232967867094, 0.522232967867094, 0.904534033733291, 1, 1, 0.522232967867094, 0.904534033733291, 1, 1]) 61 | desired_pi = np.array([4, 5, 6, 7, 8, 1, 2, 3, 4]) 62 | 63 | profile = mpx(ts, w, cross_correlation=True, n_jobs=-1) 64 | np.testing.assert_almost_equal(profile['mp'], desired, decimal=4) 65 | np.testing.assert_almost_equal(profile['pi'], desired_pi) 66 | 67 | 68 | def test_mpx_small_series_similarity_join_single_threaded(): 69 | ts = np.array([1, 2, 3, 1, 2, 3, 4, 5, 6, 0, 0, 1, 1, 2, 2, 4, 5, 1, 1, 9]).astype('d') 70 | query = np.array([0, 0, 1, 1, 2, 2, 4, 5]).astype('d') 71 | w = 4 72 | 73 | desired = np.array([ 74 | 2.36387589e+00, 2.82842712e+00, 2.17957574e+00, 6.40728972e-01, 75 | 6.40728972e-01, 6.40728972e-01, 3.26103392e+00, 3.61947699e+00, 76 | 3.39984131e+00, 0.00000000e+00, 4.21468485e-08, 0.00000000e+00, 77 | 4.21468485e-08, 0.00000000e+00, 2.82842712e+00, 3.57109342e+00, 78 | 1.73771570e+00 79 | ]) 80 | desired_pi = np.array([0, 1, 4, 1, 1, 1, 2, 1, 4, 2, 1, 2, 3, 4, 2, 1, 3]) 81 | 82 | profile = mpx(ts, w, cross_correlation=False, query=query, n_jobs=1) 83 | 84 | np.testing.assert_almost_equal(profile['mp'], desired, decimal=4) 85 | np.testing.assert_almost_equal(profile['pi'], desired_pi) 86 | 87 | 88 | def test_mpx_small_series_similarity_join_multi_threaded(): 89 | ts = np.array([1, 2, 3, 1, 2, 3, 4, 5, 6, 0, 0, 1, 1, 2, 2, 4, 5, 1, 1, 9]).astype('d') 90 | query = np.array([0, 0, 1, 1, 2, 2, 4, 5]).astype('d') 91 | w = 4 92 | 93 | desired = np.array([ 94 | 2.36387589e+00, 2.82842712e+00, 2.17957574e+00, 6.40728972e-01, 95 | 6.40728972e-01, 6.40728972e-01, 3.26103392e+00, 3.61947699e+00, 96 | 3.39984131e+00, 0.00000000e+00, 4.21468485e-08, 0.00000000e+00, 97 | 4.21468485e-08, 0.00000000e+00, 2.82842712e+00, 3.57109342e+00, 98 | 1.73771570e+00 99 | ]) 100 | desired_pi = np.array([0, 1, 4, 1, 1, 1, 2, 1, 4, 2, 1, 2, 3, 4, 2, 1, 3]) 101 | 102 | profile = mpx(ts, w, cross_correlation=False, query=query, n_jobs=-1) 103 | 104 | np.testing.assert_almost_equal(profile['mp'], desired, decimal=4) 105 | np.testing.assert_almost_equal(profile['pi'], desired_pi) 106 | 107 | 108 | def test_mpx_similarity_join_matlab(): 109 | ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) 110 | tsb = ts[199:300] 111 | w = 32 112 | 113 | ml_mpa = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'mpx_ab_mpa.txt')) 114 | ml_mpb = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'mpx_ab_mpb.txt')) 115 | 116 | mpa, mpia, mpb, mpib = mpx_ab_parallel(ts, tsb, w, 0, 1) 117 | 118 | np.testing.assert_almost_equal(ml_mpa, mpa, decimal=4) 119 | np.testing.assert_almost_equal(ml_mpb, mpb, decimal=4) 120 | 121 | 122 | def test_mpx_similarity_join_parallel_matlab(): 123 | ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) 124 | tsb = ts[199:300] 125 | w = 32 126 | 127 | ml_mpa = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'mpx_ab_mpa.txt')) 128 | ml_mpb = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'mpx_ab_mpb.txt')) 129 | 130 | mpa, mpia, mpb, mpib = mpx_ab_parallel(ts, tsb, w, 0, 2) 131 | 132 | np.testing.assert_almost_equal(ml_mpa, mpa, decimal=4) 133 | np.testing.assert_almost_equal(ml_mpb, mpb, decimal=4) -------------------------------------------------------------------------------- /tests/test_mstomp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import os 12 | 13 | import pytest 14 | 15 | import numpy as np 16 | 17 | from matrixprofile.algorithms.mstomp import mstomp 18 | 19 | 20 | def test_mstomp_window_size_less_than_4(): 21 | ts = np.array([[1, 2, 3, 4, 5, 6, 7, 8], [8, 7, 6, 5, 4, 3, 2, 1]]) 22 | w = 2 23 | 24 | with pytest.raises(ValueError) as excinfo: 25 | mstomp(ts, w) 26 | assert 'window size must be at least 4.' in str(excinfo.value) 27 | 28 | 29 | def test_mstomp_time_series_too_small(): 30 | ts = np.array([[1, 2, 3, 4, 5, 6, 7, 8], [8, 7, 6, 5, 4, 3, 2, 1]]) 31 | w = 8 32 | 33 | with pytest.raises(ValueError) as excinfo: 34 | mstomp(ts, w) 35 | assert 'Time series is too short' in str(excinfo.value) 36 | 37 | 38 | def test_mstomp_single_dimension(): 39 | ts = np.array([0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0]) 40 | w = 4 41 | desired_mp = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0]]) 42 | desired_pi = np.array([[4, 5, 6, 7, 0, 1, 2, 3, 0]]) 43 | 44 | desired_lmp = np.array([[np.inf, np.inf, np.inf, 2.82842712, 0, 0, 0, 0, 0]]) 45 | desired_lpi = np.array([[0, 0, 0, 0, 0, 1, 2, 3, 0]]) 46 | 47 | desired_rmp = np.array([[0, 0, 0, 0, 0, 2.82842712, np.inf, np.inf, np.inf]]) 48 | desired_rpi = np.array([[4, 5, 6, 7, 8, 8, 0, 0, 0]]) 49 | 50 | profile = mstomp(ts, w, n_jobs=1) 51 | np.testing.assert_almost_equal(profile['mp'], desired_mp) 52 | np.testing.assert_almost_equal(profile['pi'], desired_pi) 53 | 54 | np.testing.assert_almost_equal(profile['lmp'], desired_lmp) 55 | np.testing.assert_almost_equal(profile['lpi'], desired_lpi) 56 | 57 | np.testing.assert_almost_equal(profile['rmp'], desired_rmp) 58 | np.testing.assert_almost_equal(profile['rpi'], desired_rpi) 59 | 60 | 61 | def test_mstomp_multi_dimension(): 62 | ts = np.array([[0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0], [1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1]]) 63 | w = 4 64 | desired_mp = np.array( 65 | [[0, 0, 0, 0, 0, 0, 0, 0, 0], 66 | [0, 0, 9.19401687e-01, 9.19401687e-01, 2.98023224e-08, 0, 9.19401687e-01, 9.19401687e-01, 9.19401687e-01]]) 67 | desired_pi = np.array([[4, 5, 6, 7, 0, 1, 2, 3, 0], [4, 5, 6, 7, 0, 1, 2, 3, 0]]) 68 | desired_pd = [ 69 | np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0]]), 70 | np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1]]) 71 | ] 72 | 73 | profile = mstomp(ts, w, return_dimension=True, n_jobs=1) 74 | np.testing.assert_almost_equal(profile['mp'], desired_mp) 75 | np.testing.assert_almost_equal(profile['pi'], desired_pi) 76 | for i in range(len(ts)): 77 | np.testing.assert_almost_equal(profile['pd'][i], desired_pd[i]) 78 | 79 | 80 | def test_mstomp_single_dimension_multi_threaded(): 81 | ts = np.array([0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0]) 82 | w = 4 83 | desired_mp = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0]]) 84 | desired_pi = np.array([[4, 5, 6, 7, 0, 1, 2, 3, 0]]) 85 | 86 | desired_lmp = np.array([[np.inf, np.inf, np.inf, 2.82842712, 0, 0, 0, 0, 0]]) 87 | desired_lpi = np.array([[0, 0, 0, 0, 0, 1, 2, 3, 0]]) 88 | 89 | desired_rmp = np.array([[0, 0, 0, 0, 0, 2.82842712, np.inf, np.inf, np.inf]]) 90 | desired_rpi = np.array([[4, 5, 6, 7, 8, 8, 0, 0, 0]]) 91 | 92 | profile = mstomp(ts, w, n_jobs=-1) 93 | np.testing.assert_almost_equal(profile['mp'], desired_mp) 94 | np.testing.assert_almost_equal(profile['pi'], desired_pi) 95 | 96 | np.testing.assert_almost_equal(profile['lmp'], desired_lmp) 97 | np.testing.assert_almost_equal(profile['lpi'], desired_lpi) 98 | 99 | np.testing.assert_almost_equal(profile['rmp'], desired_rmp) 100 | np.testing.assert_almost_equal(profile['rpi'], desired_rpi) 101 | 102 | 103 | def test_mstomp_multi_dimension_multi_threaded(): 104 | ts = np.array([[0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0], [1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1]]) 105 | w = 4 106 | desired_mp = np.array( 107 | [[0, 0, 0, 0, 0, 0, 0, 0, 0], 108 | [0, 0, 9.19401687e-01, 9.19401687e-01, 2.98023224e-08, 0, 9.19401687e-01, 9.19401687e-01, 9.19401687e-01]]) 109 | desired_pi = np.array([[4, 5, 6, 7, 0, 1, 2, 3, 0], [4, 5, 6, 7, 0, 1, 2, 3, 0]]) 110 | desired_pd = [ 111 | np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0]]), 112 | np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1]]) 113 | ] 114 | 115 | profile = mstomp(ts, w, return_dimension=True, n_jobs=-1) 116 | np.testing.assert_almost_equal(profile['mp'], desired_mp) 117 | np.testing.assert_almost_equal(profile['pi'], desired_pi) 118 | for i in range(len(ts)): 119 | np.testing.assert_almost_equal(profile['pd'][i], desired_pd[i]) -------------------------------------------------------------------------------- /tests/test_pairwise_dist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import os 12 | 13 | import pytest 14 | 15 | import numpy as np 16 | 17 | from matrixprofile.algorithms.pairwise_dist import ( 18 | compute_dist, 19 | pairwise_dist, 20 | ) 21 | 22 | 23 | def test_pairwise_dist_valid_simple(): 24 | X = [ 25 | np.arange(100), 26 | np.arange(100), 27 | np.ones(100), 28 | np.zeros(100) 29 | ] 30 | w = 8 31 | dists = pairwise_dist(X, w) 32 | expected = np.array([ 0, 4, 4, 4, 4, 4]) 33 | np.testing.assert_equal(dists, expected) 34 | 35 | # test with MxN np.ndarray 36 | X = np.array(X) 37 | dists = pairwise_dist(X, w) 38 | expected = np.array([ 0, 4, 4, 4, 4, 4]) 39 | np.testing.assert_equal(dists, expected) 40 | 41 | 42 | def test_pairwise_dist_invalid_params(): 43 | X = [ 44 | np.arange(10), 45 | np.arange(20) 46 | ] 47 | w = 4 48 | threshold = 0.05 49 | n_jobs = 1 50 | with pytest.raises(ValueError) as excinfo: 51 | pairwise_dist('', w, threshold=threshold, n_jobs=n_jobs) 52 | assert('X must be array_like!' == str(excinfo.value)) 53 | 54 | error = 'threshold must be a float greater than 0 and less'\ 55 | ' than 1' 56 | with pytest.raises(ValueError) as excinfo: 57 | pairwise_dist(X, w, threshold=1, n_jobs=n_jobs) 58 | assert(error == str(excinfo.value)) 59 | 60 | 61 | def test_compute_dist_valid(): 62 | ts = np.arange(100) 63 | w = 8 64 | k = 0 65 | threshold = 0.05 66 | args = (k, ts, ts, w, threshold) 67 | result = compute_dist(args) 68 | 69 | assert(result[0] == k) 70 | assert(result[0] == 0) -------------------------------------------------------------------------------- /tests/test_preprocess.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import numpy as np 12 | import pytest 13 | 14 | from matrixprofile.preprocess import preprocess 15 | from matrixprofile.preprocess import is_subsequence_constant 16 | from matrixprofile.preprocess import add_noise_to_series 17 | from matrixprofile.preprocess import impute_missing 18 | from matrixprofile.preprocess import validate_preprocess_kwargs 19 | 20 | 21 | def test_valid_preprocess_kwargs(): 22 | preprocessing_kwargs = { 23 | 'window': 5, 24 | 'impute_method': 'median', 25 | 'impute_direction': 'backward', 26 | 'add_noise': False 27 | } 28 | 29 | valid_kwargs = validate_preprocess_kwargs(preprocessing_kwargs) 30 | assert(valid_kwargs['window'] == 5) 31 | assert(valid_kwargs['impute_method'] == 'median') 32 | assert(valid_kwargs['impute_direction'] == 'backward') 33 | assert(valid_kwargs['add_noise'] == False) 34 | 35 | preprocessing_kwargs = { 36 | 'window': 5, 37 | 'add_noise': False 38 | } 39 | 40 | valid_kwargs = validate_preprocess_kwargs(preprocessing_kwargs) 41 | assert(valid_kwargs['window'] == 5) 42 | assert(valid_kwargs['impute_method'] == 'mean') 43 | assert(valid_kwargs['impute_direction'] == 'forward') 44 | assert(valid_kwargs['add_noise'] == False) 45 | 46 | valid_kwargs = validate_preprocess_kwargs(None) 47 | assert(valid_kwargs == None) 48 | 49 | 50 | def test_invalid_preprocess_kwargs(): 51 | with pytest.raises(ValueError) as excinfo: 52 | validate_preprocess_kwargs(preprocessing_kwargs = 1) 53 | assert "The parameter 'preprocessing_kwargs' is not dict like!" \ 54 | in str(excinfo.value) 55 | 56 | with pytest.raises(ValueError) as excinfo: 57 | preprocessing_kwargs = { 58 | 'win': 5, 59 | 'impute_dir': 'backward', 60 | } 61 | validate_preprocess_kwargs(preprocessing_kwargs) 62 | assert "invalid key(s) for preprocessing_kwargs! valid key(s) should include " \ 63 | "{'impute_direction', 'add_noise', 'impute_method', 'window'}" \ 64 | in str(excinfo.value) 65 | 66 | with pytest.raises(ValueError) as excinfo: 67 | preprocessing_kwargs = {'window': 'str'} 68 | valid_kwargs = validate_preprocess_kwargs(preprocessing_kwargs) 69 | assert "The value for preprocessing_kwargs['window'] is not an integer!" \ 70 | in str(excinfo.value) 71 | 72 | with pytest.raises(ValueError) as excinfo: 73 | preprocessing_kwargs = {'impute_method': False} 74 | valid_kwargs = validate_preprocess_kwargs(preprocessing_kwargs) 75 | assert "invalid imputation method! valid include options: mean, median, min, max" \ 76 | in str(excinfo.value) 77 | 78 | with pytest.raises(ValueError) as excinfo: 79 | preprocessing_kwargs = {'impute_direction': 5} 80 | valid_kwargs = validate_preprocess_kwargs(preprocessing_kwargs) 81 | assert "invalid imputation direction! valid include options: " \ 82 | "forward, fwd, f, backward, bwd, b" \ 83 | in str(excinfo.value) 84 | 85 | with pytest.raises(ValueError) as excinfo: 86 | preprocessing_kwargs = {'add_noise': 'str'} 87 | valid_kwargs = validate_preprocess_kwargs(preprocessing_kwargs) 88 | assert "The value for preprocessing_kwargs['add_noise'] is not a boolean value!" \ 89 | in str(excinfo.value) 90 | 91 | 92 | def test_is_subsequence_constant(): 93 | with pytest.raises(ValueError) as excinfo: 94 | ts = 1 95 | is_subsequence_constant(ts) 96 | assert "subsequence is not array like!" \ 97 | in str(excinfo.value) 98 | 99 | ts = np.array([1, 1, 1, 1, 1, 1]) 100 | assert(is_subsequence_constant(ts) == True) 101 | 102 | ts = np.array([1, 2, 1, 1, 1, 1]) 103 | assert(is_subsequence_constant(ts) == False) 104 | 105 | 106 | def test_add_noise_to_series(): 107 | with pytest.raises(ValueError) as excinfo: 108 | ts = 1 109 | temp = add_noise_to_series(ts) 110 | assert "series is not array like!" \ 111 | in str(excinfo.value) 112 | 113 | ts = np.array([1, 1, 1, 1, 1, 1, 1, 1]) 114 | 115 | temp = add_noise_to_series(ts) 116 | assert(np.all((temp - ts) >= 0) and np.all((temp - ts) < 0.0000009)) 117 | 118 | 119 | def test_impute_missing(): 120 | with pytest.raises(ValueError) as excinfo: 121 | ts = 1 122 | ts = impute_missing(ts, window=4) 123 | assert "ts is not array like!" \ 124 | in str(excinfo.value) 125 | 126 | with pytest.raises(ValueError) as excinfo: 127 | ts = np.array([1, 2, 3]) 128 | ts = impute_missing(ts, window=4, method=False) 129 | assert "invalid imputation method! valid include options: mean, median, min, max" \ 130 | in str(excinfo.value) 131 | 132 | with pytest.raises(ValueError) as excinfo: 133 | ts = np.array([1, 2, 3]) 134 | ts = impute_missing(ts, window=4, direction='a') 135 | assert "invalid imputation direction! valid include options: " \ 136 | "forward, fwd, f, backward, bwd, b" \ 137 | in str(excinfo.value) 138 | 139 | with pytest.raises(ValueError) as excinfo: 140 | ts = np.array([1, 2, 3]) 141 | ts = impute_missing(ts, window='str') 142 | assert "window is not an integer!" \ 143 | in str(excinfo.value) 144 | 145 | ts = np.array([np.nan, np.nan, np.inf, np.nan, np.inf, np.inf, 4, 5, np.nan, 146 | np.inf, np.nan, np.inf, np.inf, np.inf, np.inf, np.nan, 2]) 147 | 148 | ts = impute_missing(ts, window=4, direction='b') 149 | assert(np.any(np.isnan(ts)) == False) 150 | assert(np.any(np.isinf(ts)) == False) 151 | 152 | 153 | def test_preprocess(): 154 | with pytest.raises(ValueError) as excinfo: 155 | ts = 1 156 | ts = preprocess(ts, window=4) 157 | assert "ts is not array like!" \ 158 | in str(excinfo.value) 159 | 160 | ts = np.array([np.nan, np.inf, np.inf, np.nan, np.inf, 2, 3, 2, 3, 1, 2, 3, 4, 2, 161 | np.nan, np.inf, 4, 2, 3, 4, 5, 6, 7, 8, 3, 4, 2, 3, 4, 5, 6, 7, 6, 162 | 5, 4, 3, np.nan, np.nan, np.inf, np.nan, np.inf, np.nan]) 163 | 164 | ts = preprocess(ts, window=4) 165 | assert(np.any(np.isnan(ts)) == False) 166 | assert(np.any(np.isinf(ts)) == False) -------------------------------------------------------------------------------- /tests/test_regimes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import os 12 | 13 | import pytest 14 | 15 | import numpy as np 16 | 17 | from matrixprofile.algorithms.regimes import extract_regimes 18 | from matrixprofile.algorithms.mpx import mpx 19 | 20 | import matrixprofile 21 | 22 | MODULE_PATH = matrixprofile.__path__[0] 23 | 24 | 25 | def test_regimes(): 26 | ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) 27 | w = 32 28 | profile = mpx(ts, w) 29 | 30 | # test extract 3 regimes (default) 31 | profile = extract_regimes(profile) 32 | actual = profile['regimes'] 33 | desired = np.array([759, 423, 583]) 34 | 35 | np.testing.assert_array_equal(actual, desired) 36 | 37 | # test extract 2 regimes 38 | profile = extract_regimes(profile, num_regimes=2) 39 | actual = profile['regimes'] 40 | desired = np.array([759, 423]) 41 | 42 | np.testing.assert_array_equal(actual, desired) -------------------------------------------------------------------------------- /tests/test_scrimp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | from __future__ import unicode_literals 8 | 9 | range = getattr(__builtins__, 'xrange', range) 10 | # end of py2 compatability boilerplate 11 | 12 | """Tests for `scrimp` package.""" 13 | 14 | import os 15 | import pytest 16 | 17 | import numpy as np 18 | 19 | import matrixprofile 20 | from matrixprofile.algorithms import scrimp 21 | 22 | MODULE_PATH = matrixprofile.__path__[0] 23 | 24 | 25 | def test_time_series_too_short_exception(): 26 | with pytest.raises(ValueError) as excinfo: 27 | scrimp.scrimp_plus_plus([1, 2, 3, 4, 5], 4, 0.25) 28 | assert 'Time series is too short' in str(excinfo.value) 29 | 30 | 31 | def test_window_size_minimum_exception(): 32 | with pytest.raises(ValueError) as excinfo: 33 | scrimp.scrimp_plus_plus([1, 2, 3, 4, 5], 2, 0.25) 34 | assert 'Window size must be at least 4' in str(excinfo.value) 35 | 36 | 37 | def test_invalid_step_size_negative(): 38 | exc = 'step_size should be a float between 0 and 1.' 39 | with pytest.raises(ValueError) as excinfo: 40 | scrimp.scrimp_plus_plus([1, 2, 3, 4, 5], 2, -1) 41 | assert exc in str(excinfo.value) 42 | 43 | 44 | def test_invalid_step_size_str(): 45 | exc = 'step_size should be a float between 0 and 1.' 46 | with pytest.raises(ValueError) as excinfo: 47 | scrimp.scrimp_plus_plus([1, 2, 3, 4, 5], 2, 'a') 48 | assert exc in str(excinfo.value) 49 | 50 | 51 | def test_invalid_step_size_greater(): 52 | exc = 'step_size should be a float between 0 and 1.' 53 | with pytest.raises(ValueError) as excinfo: 54 | scrimp.scrimp_plus_plus([1, 2, 3, 4, 5], 2, 2) 55 | assert exc in str(excinfo.value) 56 | 57 | 58 | def test_invalid_random_state_exception(): 59 | exc = 'Invalid random_state value given.' 60 | with pytest.raises(ValueError) as excinfo: 61 | scrimp.scrimp_plus_plus([1, 2, 3, 4, 5], 2, random_state='adsf') 62 | assert exc in str(excinfo.value) 63 | 64 | 65 | def test_scrimp_plus_plus(): 66 | ts = np.array([0, 0, 1, 0, 0, 0, 1, 0]) 67 | m = 4 68 | step_size = 0.25 69 | profile = scrimp.scrimp_plus_plus(ts, m, step_size=step_size, sample_pct=1.0) 70 | 71 | expected_mp = np.array([ 72 | 0, 73 | 3.2660, 74 | 3.2660, 75 | 3.2660, 76 | 0 77 | ]) 78 | expected_mpidx = np.array([ 79 | 4, 80 | 3, 81 | 0, 82 | 0, 83 | 0, 84 | ]) 85 | 86 | np.testing.assert_almost_equal(profile['mp'], expected_mp, decimal=4) 87 | np.testing.assert_equal(profile['pi'], expected_mpidx) 88 | 89 | ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) 90 | m = 32 91 | step_size = 0.25 92 | profile = scrimp.scrimp_plus_plus(ts, m, step_size=step_size, sample_pct=1.0) 93 | expected_mp = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'scrimp.mp.txt')) 94 | expected_mpi = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'scrimp.mpi.txt')).astype('int') - 1 95 | 96 | np.testing.assert_almost_equal(profile['mp'], expected_mp) 97 | np.testing.assert_equal(profile['pi'], expected_mpi) 98 | -------------------------------------------------------------------------------- /tests/test_skimp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import os 12 | 13 | import pytest 14 | 15 | import numpy as np 16 | 17 | from matrixprofile.algorithms import skimp 18 | from matrixprofile.algorithms.skimp import binary_split 19 | from matrixprofile.algorithms.skimp import maximum_subsequence 20 | from matrixprofile.exceptions import NoSolutionPossible 21 | 22 | 23 | def test_binary_split_1(): 24 | desired = [0] 25 | actual = binary_split(1) 26 | 27 | np.testing.assert_equal(actual, desired) 28 | 29 | 30 | def test_binary_split_many(): 31 | desired = [0, 5, 2, 7, 1, 3, 6, 8, 4, 9] 32 | actual = binary_split(10) 33 | 34 | np.testing.assert_equal(actual, desired) 35 | 36 | 37 | def test_maximum_subsequence_36(): 38 | np.random.seed(9999) 39 | ts = np.random.uniform(size=2**10) 40 | w = 2**5 41 | subq = ts[0:w] 42 | ts[0:w] = subq 43 | ts[w+100:w+100+w] = subq 44 | 45 | upper = maximum_subsequence(ts, 0.98) 46 | 47 | assert(upper == 36) 48 | 49 | 50 | def test_maximum_subsequence_68(): 51 | np.random.seed(9999) 52 | ts = np.random.uniform(size=2**10) 53 | w = 2**6 54 | subq = ts[0:w] 55 | ts[0:w] = subq 56 | ts[w+100:w+100+w] = subq 57 | 58 | upper = maximum_subsequence(ts, 0.98) 59 | 60 | assert(upper == 68) 61 | 62 | def test_maximum_subsequence_no_windows(): 63 | np.random.seed(9999) 64 | ts = np.random.uniform(size=2**10) 65 | w = 2**6 66 | subq = ts[0:w] 67 | ts[0:w] = subq 68 | ts[w+100:w+100+w] = subq 69 | 70 | with pytest.raises(NoSolutionPossible) as excinfo: 71 | upper = maximum_subsequence(ts, 1.0) 72 | assert 'no windows' in str(excinfo.value) 73 | 74 | -------------------------------------------------------------------------------- /tests/test_snippets.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import os 12 | 13 | import pytest 14 | 15 | import numpy as np 16 | 17 | from matrixprofile.algorithms.snippets import snippets 18 | 19 | import matrixprofile 20 | 21 | MODULE_PATH = matrixprofile.__path__[0] 22 | 23 | def test_snippets(): 24 | ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) 25 | w = 32 26 | snippet_size = 64 27 | 28 | result = snippets(ts, snippet_size, window_size=w) 29 | assert(result[0]['index'] == 384) 30 | assert(result[1]['index'] == 704) 31 | assert(sum(result[0]['neighbors']) == 191408) 32 | assert(sum(result[1]['neighbors']) == 190967) 33 | 34 | # test inferred window size of snippet size / 2 35 | result = snippets(ts, snippet_size) 36 | assert(result[0]['index'] == 384) 37 | assert(result[1]['index'] == 704) 38 | assert(sum(result[0]['neighbors']) == 191408) 39 | assert(sum(result[1]['neighbors']) == 190967) 40 | 41 | snippet_size = 128 42 | result = snippets(ts, snippet_size, window_size=w) 43 | assert(result[0]['index'] == 384) 44 | assert(result[1]['index'] == 640) 45 | assert(sum(result[0]['neighbors']) == 227661) 46 | assert(sum(result[1]['neighbors']) == 154714) 47 | 48 | snippet_size = 8 49 | result = snippets(ts, snippet_size, window_size=snippet_size / 2) 50 | assert(result[0]['index'] == 72) 51 | assert(result[1]['index'] == 784) 52 | assert(sum(result[0]['neighbors']) == 149499) 53 | assert(sum(result[1]['neighbors']) == 232876) 54 | 55 | def test_invalid_snippet_size(): 56 | ts = np.arange(100) 57 | ss = 2 58 | 59 | error = 'snippet_size must be an integer >= 4' 60 | with pytest.raises(ValueError) as excinfo: 61 | snippets(ts, ss) 62 | assert(error == str(excinfo.value)) 63 | 64 | with pytest.raises(ValueError) as excinfo: 65 | snippets(ts, '232') 66 | assert(error == str(excinfo.value)) 67 | 68 | 69 | def test_invalid_snippet_size_and_ts(): 70 | ts = np.arange(100) 71 | ss = 75 72 | 73 | error = 'Time series is too short relative to snippet length' 74 | with pytest.raises(ValueError) as excinfo: 75 | snippets(ts, ss) 76 | assert(error == str(excinfo.value)) 77 | 78 | 79 | def test_window_size_greater_snippet_size(): 80 | ts = np.arange(100) 81 | ss = 25 82 | w = 30 83 | 84 | error = 'window_size must be smaller than snippet_size' 85 | with pytest.raises(ValueError) as excinfo: 86 | snippets(ts, ss, window_size=w) 87 | assert(error == str(excinfo.value)) 88 | -------------------------------------------------------------------------------- /tests/test_statistics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import os 12 | 13 | import pytest 14 | 15 | import numpy as np 16 | 17 | from matrixprofile.algorithms.statistics import statistics 18 | 19 | 20 | def test_invalid_ts_not_1d(): 21 | ts = np.array([[1, 1], [1, 1]]) 22 | w = 2 23 | 24 | with pytest.raises(ValueError) as excinfo: 25 | statistics(ts, w) 26 | assert 'The time series must be 1D' in str(excinfo.value) 27 | 28 | 29 | def test_invalid_ts_not_array(): 30 | ts = None 31 | w = 2 32 | 33 | with pytest.raises(ValueError) as excinfo: 34 | statistics(ts, w) 35 | assert 'ts must be array like' in str(excinfo.value) 36 | 37 | 38 | def test_invalid_window_size_not_int(): 39 | ts = np.arange(10) 40 | w = 's' 41 | 42 | with pytest.raises(ValueError) as excinfo: 43 | statistics(ts, w) 44 | assert 'Expecting int for window_size' in str(excinfo.value) 45 | 46 | 47 | def test_invalid_window_size_too_large(): 48 | ts = np.arange(10) 49 | w = 11 50 | 51 | with pytest.raises(ValueError) as excinfo: 52 | statistics(ts, w) 53 | assert 'Window size cannot be greater than len(ts)' in str(excinfo.value) 54 | 55 | 56 | def test_invalid_window_size_too_small(): 57 | ts = np.arange(10) 58 | w = 2 59 | 60 | with pytest.raises(ValueError) as excinfo: 61 | statistics(ts, w) 62 | assert 'Window size cannot be less than 3' in str(excinfo.value) 63 | 64 | 65 | def test_valid(): 66 | ts = np.array([1, 3, 2, 4, 5, 1, 1, 1, 2, 4, 9, 7]) 67 | w = 4 68 | ts_stats = statistics(ts, w) 69 | 70 | assert(ts_stats['min'] == 1) 71 | assert(ts_stats['max'] == 9) 72 | np.testing.assert_almost_equal(ts_stats['mean'], 3.3333333) 73 | np.testing.assert_almost_equal(ts_stats['std'], 2.494438257) 74 | assert(ts_stats['median'] == 2.5) 75 | np.testing.assert_almost_equal(ts_stats['moving_min'], np.array([1, 2, 1, 1, 1, 1, 1, 1, 2])) 76 | np.testing.assert_almost_equal(ts_stats['moving_max'], np.array([4, 5, 5, 5, 5, 2, 4, 9, 9])) 77 | np.testing.assert_almost_equal(ts_stats['moving_mean'], np.array([2.5, 3.5, 3.0, 2.75, 2.0, 1.25, 2.0, 4.0, 5.5])) 78 | np.testing.assert_almost_equal(ts_stats['moving_std'], np.array([1.11803399, 1.11803399, 1.58113883, 1.78535711, 1.73205081, 0.4330127, 1.22474487, 3.082207, 2.6925824])) 79 | np.testing.assert_almost_equal(ts_stats['moving_median'], np.array([2.5, 3.5, 3.0, 2.5, 1.0, 1.0, 1.5, 3.0, 5.5])) 80 | np.testing.assert_equal(ts_stats['ts'], ts) 81 | assert(ts_stats['window_size'] == w) 82 | assert(ts_stats['class'] == 'Statistics') 83 | -------------------------------------------------------------------------------- /tests/test_stomp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import os 12 | 13 | import pytest 14 | 15 | import numpy as np 16 | 17 | from matrixprofile.algorithms.stomp import stomp 18 | 19 | 20 | def test_stomp_window_size_less_than_4(): 21 | ts = np.array([1, 2, 3, 4, 5, 6, 7, 8]) 22 | w = 2 23 | 24 | with pytest.raises(ValueError) as excinfo: 25 | stomp(ts, w) 26 | assert 'window size must be at least 4.' in str(excinfo.value) 27 | 28 | 29 | def test_stomp_window_size_too_small(): 30 | ts = np.array([1, 2, 3, 4, 5, 6, 7, 8]) 31 | w = 8 32 | 33 | with pytest.raises(ValueError) as excinfo: 34 | stomp(ts, w) 35 | assert 'Time series is too short' in str(excinfo.value) 36 | 37 | 38 | def test_stomp_small_series_self_join_single_threaded(): 39 | ts = np.array([0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0]) 40 | w = 4 41 | desired = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]) 42 | desired_pi = np.array([4, 5, 6, 7, 0, 1, 2, 3, 0]) 43 | 44 | desired_lmp = np.array([np.inf, np.inf, np.inf, 2.82842712, 0, 0, 0, 0, 0]) 45 | desired_lpi = np.array([0, 0, 0, 0, 0, 1, 2, 3, 0]) 46 | 47 | desired_rmp = np.array([0, 0, 0, 0, 0, 2.82842712, np.inf, np.inf, np.inf]) 48 | desired_rpi = np.array([4, 5, 6, 7, 8, 8, 0, 0, 0]) 49 | 50 | profile = stomp(ts, w, n_jobs=1) 51 | np.testing.assert_almost_equal(profile['mp'], desired) 52 | np.testing.assert_almost_equal(profile['pi'], desired_pi) 53 | 54 | np.testing.assert_almost_equal(profile['lmp'], desired_lmp) 55 | np.testing.assert_almost_equal(profile['lpi'], desired_lpi) 56 | 57 | np.testing.assert_almost_equal(profile['rmp'], desired_rmp) 58 | np.testing.assert_almost_equal(profile['rpi'], desired_rpi) 59 | 60 | 61 | def test_stomp_small_series_self_join_multi_threaded(): 62 | ts = np.array([0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0]) 63 | w = 4 64 | desired = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]) 65 | desired_pi = np.array([4, 5, 6, 7, 0, 1, 2, 3, 0]) 66 | 67 | desired_lmp = np.array([np.inf, np.inf, np.inf, 2.82842712, 0, 0, 0, 0, 0]) 68 | desired_lpi = np.array([0, 0, 0, 0, 0, 1, 2, 3, 0]) 69 | 70 | desired_rmp = np.array([0, 0, 0, 0, 0, 2.82842712, np.inf, np.inf, np.inf]) 71 | desired_rpi = np.array([4, 5, 6, 7, 8, 8, 0, 0, 0]) 72 | 73 | profile = stomp(ts, w, n_jobs=-1) 74 | np.testing.assert_almost_equal(profile['mp'], desired) 75 | np.testing.assert_almost_equal(profile['pi'], desired_pi) 76 | 77 | np.testing.assert_almost_equal(profile['lmp'], desired_lmp) 78 | np.testing.assert_almost_equal(profile['lpi'], desired_lpi) 79 | 80 | np.testing.assert_almost_equal(profile['rmp'], desired_rmp) 81 | np.testing.assert_almost_equal(profile['rpi'], desired_rpi) 82 | 83 | -------------------------------------------------------------------------------- /tests/test_top_k_discords.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import os 12 | 13 | import pytest 14 | 15 | import numpy as np 16 | 17 | from matrixprofile.algorithms.top_k_discords import top_k_discords 18 | 19 | 20 | def test_mp_all_same(): 21 | profile = { 22 | 'mp': np.ones(10), 23 | 'ez': 2, 24 | 'w': 4, 25 | 'class': 'MatrixProfile' 26 | } 27 | 28 | discords = top_k_discords(profile)['discords'] 29 | desired = np.array([9, 6, 3]) 30 | np.testing.assert_almost_equal(discords, desired) 31 | 32 | 33 | def test_discords_no_exclusion(): 34 | profile = { 35 | 'mp': np.array([1, 2, 3, 4]), 36 | 'w': 4, 37 | 'class': 'MatrixProfile' 38 | } 39 | desired = np.array([3, 2, 1]) 40 | discords = top_k_discords(profile, k=3, exclusion_zone=0)['discords'] 41 | np.testing.assert_almost_equal(discords, desired) 42 | 43 | 44 | def test_discords_no_exclusion_all(): 45 | profile = { 46 | 'mp': np.array([1, 2, 3, 4]), 47 | 'w': 4, 48 | 'class': 'MatrixProfile' 49 | } 50 | desired = np.array([3, 2, 1, 0]) 51 | discords = top_k_discords(profile, k=4, exclusion_zone=0)['discords'] 52 | np.testing.assert_almost_equal(discords, desired) 53 | 54 | 55 | def test_discords_exclude_one(): 56 | profile = { 57 | 'mp': np.array([1, 2, 3, 4]), 58 | 'w': 4, 59 | 'class': 'MatrixProfile' 60 | } 61 | desired = np.array([3, 1]) 62 | discords = top_k_discords(profile, k=4, exclusion_zone=1)['discords'] 63 | np.testing.assert_almost_equal(discords, desired) -------------------------------------------------------------------------------- /tests/test_top_k_motifs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import os 12 | 13 | import pytest 14 | 15 | import numpy as np 16 | 17 | from matrixprofile.algorithms.top_k_motifs import top_k_motifs 18 | 19 | def test_all_inf(): 20 | obj = { 21 | 'mp': np.array([np.inf, np.inf, np.inf, np.inf, np.inf]), 22 | 'pi': np.array([0, 0, 0, 0, 0]), 23 | 'w': 4, 24 | 'data': { 25 | 'ts': np.array([1, 1, 1, 1, 1, 1, 1, 1]) 26 | }, 27 | 'class': 'MatrixProfile' 28 | } 29 | 30 | motifs = top_k_motifs(obj) 31 | desired = np.array([]) 32 | 33 | np.testing.assert_equal(motifs, desired) -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import os 12 | 13 | import pytest 14 | 15 | import numpy as np 16 | 17 | from matrixprofile import utils 18 | from matrixprofile import compute 19 | 20 | import matrixprofile 21 | MODULE_PATH = matrixprofile.__path__[0] 22 | 23 | def test_empty_mp(): 24 | keys = [ 25 | 'mp', 26 | 'pi', 27 | 'rmp', 28 | 'rpi', 29 | 'lmp', 30 | 'lpi', 31 | 'metric', 32 | 'w', 33 | 'ez', 34 | 'join', 35 | 'data', 36 | 'class', 37 | 'algorithm', 38 | ] 39 | 40 | empty = utils.empty_mp() 41 | 42 | for key in keys: 43 | assert(key in empty) 44 | 45 | assert('ts' in empty['data']) 46 | assert('query' in empty['data']) 47 | 48 | 49 | def test_pick_mp(): 50 | ts = np.loadtxt(os.path.join(MODULE_PATH, '..', 'tests', 'sampledata.txt')) 51 | n = len(ts) 52 | pmp = compute(ts) 53 | mp = utils.pick_mp(pmp, 32) 54 | 55 | assert(mp['w'] == 32) 56 | assert(mp['algorithm'] == 'mpx') 57 | assert(len(mp['mp']) == n - mp['w'] + 1) 58 | np.testing.assert_equal(mp['data']['ts'], ts) -------------------------------------------------------------------------------- /tests/test_visualize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | range = getattr(__builtins__, 'xrange', range) 9 | # end of py2 compatability boilerplate 10 | 11 | import os 12 | 13 | import pytest 14 | 15 | import numpy as np 16 | 17 | from matrixprofile.algorithms.stomp import stomp 18 | from matrixprofile.algorithms.skimp import skimp 19 | from matrixprofile.visualize import visualize 20 | from matrixprofile.visualize import plot_snippets 21 | from matrixprofile.algorithms.snippets import snippets 22 | 23 | def test_catch_all_visualize_invalid_structure(): 24 | data = {} 25 | with pytest.raises(Exception) as e: 26 | visualize(data) 27 | assert('MatrixProfile, Pan-MatrixProfile or Statistics data structure expected!' == str(e.value)) 28 | 29 | 30 | def test_catch_all_visualize_mp_only(): 31 | ts = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) 32 | w = 4 33 | 34 | profile = stomp(ts, w, n_jobs=1) 35 | 36 | # expect only the matrix profile plot 37 | figures = visualize(profile) 38 | assert(len(figures) == 1) 39 | 40 | 41 | def test_catch_all_visualize_mp_cmp(): 42 | ts = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) 43 | w = 4 44 | 45 | profile = stomp(ts, w, n_jobs=1) 46 | profile['cmp'] = np.arange(len(ts) - w + 1) 47 | 48 | figures = visualize(profile) 49 | assert(len(figures) == 2) 50 | 51 | 52 | def test_catch_all_visualize_mp_av(): 53 | ts = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) 54 | w = 4 55 | 56 | profile = stomp(ts, w, n_jobs=1) 57 | profile['av'] = np.arange(len(ts) - w + 1) 58 | 59 | figures = visualize(profile) 60 | assert(len(figures) == 2) 61 | 62 | 63 | def test_catch_all_visualize_mp_cmp_av(): 64 | ts = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) 65 | w = 4 66 | 67 | profile = stomp(ts, w, n_jobs=1) 68 | profile['cmp'] = np.arange(len(ts) - w + 1) 69 | profile['av'] = np.arange(len(ts) - w + 1) 70 | 71 | figures = visualize(profile) 72 | assert(len(figures) == 3) 73 | 74 | 75 | def test_catch_all_visualize_mp_discords(): 76 | ts = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) 77 | w = 4 78 | 79 | profile = stomp(ts, w, n_jobs=1) 80 | profile['discords'] = [0, 1] 81 | 82 | figures = visualize(profile) 83 | assert(len(figures) == 2) 84 | 85 | 86 | def test_catch_all_visualize_mp_motifs(): 87 | ts = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) 88 | w = 4 89 | 90 | profile = stomp(ts, w, n_jobs=1) 91 | profile['motifs'] = [{'motifs': [1, 1], 'neighbors': []}] 92 | 93 | figures = visualize(profile) 94 | assert(len(figures) == 3) 95 | 96 | 97 | def test_catch_all_visualize_mp_motifs_discords(): 98 | ts = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) 99 | w = 4 100 | 101 | profile = stomp(ts, w, n_jobs=1) 102 | profile['discords'] = [0, 1] 103 | profile['motifs'] = [{'motifs': [1, 1], 'neighbors': []}] 104 | 105 | figures = visualize(profile) 106 | assert(len(figures) == 4) 107 | 108 | 109 | def test_catch_all_visualize_pmp_only(): 110 | ts = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) 111 | w = [4, 5, 6] 112 | 113 | profile = skimp(ts, w, n_jobs=1) 114 | 115 | # expect only the matrix profile plot 116 | figures = visualize(profile) 117 | assert(len(figures) == 1) 118 | 119 | 120 | def test_catch_all_visualize_pmp_discords(): 121 | ts = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) 122 | w = [4, 5, 6] 123 | 124 | profile = skimp(ts, w, n_jobs=1) 125 | profile['discords'] = [(0, 1), (0, 2)] 126 | 127 | figures = visualize(profile) 128 | assert(len(figures) == 3) 129 | 130 | 131 | def test_catch_all_visualize_pmp_motifs(): 132 | ts = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) 133 | w = [4, 5, 6] 134 | 135 | profile = skimp(ts, w, n_jobs=1) 136 | profile['motifs'] = [{'motifs': [(1, 1)], 'neighbors': []}] 137 | 138 | figures = visualize(profile) 139 | assert(len(figures) == 3) 140 | 141 | def test_catch_all_visualize_pmp_motifs_discords(): 142 | ts = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) 143 | w = [4, 5, 6] 144 | 145 | profile = skimp(ts, w, n_jobs=1) 146 | profile['discords'] = [(0, 1), (0, 2)] 147 | profile['motifs'] = [{'motifs': [(1, 1)], 'neighbors': []}] 148 | 149 | figures = visualize(profile) 150 | assert(len(figures) == 5) 151 | 152 | 153 | def test_catch_all_stats(): 154 | profile = { 155 | 'class': 'Statistics', 156 | 'ts': np.array([]), 157 | 'window_size': 100 158 | } 159 | 160 | figures = visualize(profile) 161 | assert(len(figures) == 1) 162 | 163 | 164 | def test_catch_all_visualize_snippets(): 165 | ts = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) 166 | snippet_size = 4 167 | snippet_num = 1 168 | 169 | snippet_list = snippets(ts, snippet_size, snippet_num) 170 | 171 | figures = plot_snippets(snippet_list, ts) 172 | assert (len(figures) == snippet_num) -------------------------------------------------------------------------------- /version.py: -------------------------------------------------------------------------------- 1 | __version_info__ = (1, 1, 10) 2 | __version__ = '.'.join(map(str, __version_info__)) 3 | MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION = __version_info__ 4 | --------------------------------------------------------------------------------