├── .appveyor.yml ├── .gitignore ├── .travis.yml ├── .whitesource ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── analyze.go ├── av ├── annotation_vector.go └── annotation_vector_test.go ├── curr_bench.txt ├── discover.go ├── discover_test.go ├── example_caseStudy_test.go ├── example_matrixprofile_test.go ├── example_test.go ├── go.mod ├── go.sum ├── kmp.go ├── kmp_bench_test.go ├── kmp_test.go ├── matrixprofile.go ├── matrixprofile_bench_test.go ├── matrixprofile_test.go ├── mp_kdim.png ├── mp_sine.png ├── pmp.go ├── pmp_test.go ├── siggen ├── siggen.go └── siggen_test.go ├── util ├── util.go └── util_test.go └── visualize.go /.appveyor.yml: -------------------------------------------------------------------------------- 1 | # appveyor.yml 2 | build: off 3 | 4 | clone_folder: c:\gopath\src\github.com\matrix-profile-foundation\go-matrixprofile 5 | 6 | environment: 7 | GOPATH: c:\gopath 8 | 9 | stack: go 1.13 10 | 11 | test_script: 12 | - go test -v ./... -race -coverprofile=coverage.txt -covermode=atomic 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, build with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # Generated dot file from cpu profile 15 | *.pdf 16 | 17 | # Automatically generated benchmark file to compare againt curr_bench.txt 18 | new_bench.txt 19 | 20 | # Generated by travis-ci make command 21 | coverage.txt 22 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | script: make travis-ci 4 | 5 | go: 6 | - "1.13" 7 | 8 | os: 9 | - linux 10 | - osx 11 | 12 | after_success: 13 | - bash <(curl -s https://codecov.io/bash) 14 | -------------------------------------------------------------------------------- /.whitesource: -------------------------------------------------------------------------------- 1 | { 2 | "generalSettings": { 3 | "shouldScanRepo": true 4 | }, 5 | "checkRunSettings": { 6 | "vulnerableCheckRunConclusionLevel": "failure" 7 | } 8 | } -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at aouyang1@gmail.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | * Fork the repository 2 | * Create a new feature branch for the new feature or bug fix 3 | * Run tests 4 | * Commit your changes 5 | * Push code and open a new pull request 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2020 Matrix Profile Foundation and contributors. 2 | 3 | Apache License 4 | Version 2.0, January 2004 5 | http://www.apache.org/licenses/ 6 | 7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 8 | 9 | 1. Definitions. 10 | 11 | "License" shall mean the terms and conditions for use, reproduction, 12 | and distribution as defined by Sections 1 through 9 of this document. 13 | 14 | "Licensor" shall mean the copyright owner or entity authorized by 15 | the copyright owner that is granting the License. 16 | 17 | "Legal Entity" shall mean the union of the acting entity and all 18 | other entities that control, are controlled by, or are under common 19 | control with that entity. For the purposes of this definition, 20 | "control" means (i) the power, direct or indirect, to cause the 21 | direction or management of such entity, whether by contract or 22 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 23 | outstanding shares, or (iii) beneficial ownership of such entity. 24 | 25 | "You" (or "Your") shall mean an individual or Legal Entity 26 | exercising permissions granted by this License. 27 | 28 | "Source" form shall mean the preferred form for making modifications, 29 | including but not limited to software source code, documentation 30 | source, and configuration files. 31 | 32 | "Object" form shall mean any form resulting from mechanical 33 | transformation or translation of a Source form, including but 34 | not limited to compiled object code, generated documentation, 35 | and conversions to other media types. 36 | 37 | "Work" shall mean the work of authorship, whether in Source or 38 | Object form, made available under the License, as indicated by a 39 | copyright notice that is included in or attached to the work 40 | (an example is provided in the Appendix below). 41 | 42 | "Derivative Works" shall mean any work, whether in Source or Object 43 | form, that is based on (or derived from) the Work and for which the 44 | editorial revisions, annotations, elaborations, or other modifications 45 | represent, as a whole, an original work of authorship. For the purposes 46 | of this License, Derivative Works shall not include works that remain 47 | separable from, or merely link (or bind by name) to the interfaces of, 48 | the Work and Derivative Works thereof. 49 | 50 | "Contribution" shall mean any work of authorship, including 51 | the original version of the Work and any modifications or additions 52 | to that Work or Derivative Works thereof, that is intentionally 53 | submitted to Licensor for inclusion in the Work by the copyright owner 54 | or by an individual or Legal Entity authorized to submit on behalf of 55 | the copyright owner. For the purposes of this definition, "submitted" 56 | means any form of electronic, verbal, or written communication sent 57 | to the Licensor or its representatives, including but not limited to 58 | communication on electronic mailing lists, source code control systems, 59 | and issue tracking systems that are managed by, or on behalf of, the 60 | Licensor for the purpose of discussing and improving the Work, but 61 | excluding communication that is conspicuously marked or otherwise 62 | designated in writing by the copyright owner as "Not a Contribution." 63 | 64 | "Contributor" shall mean Licensor and any individual or Legal Entity 65 | on behalf of whom a Contribution has been received by Licensor and 66 | subsequently incorporated within the Work. 67 | 68 | 2. Grant of Copyright License. Subject to the terms and conditions of 69 | this License, each Contributor hereby grants to You a perpetual, 70 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 71 | copyright license to reproduce, prepare Derivative Works of, 72 | publicly display, publicly perform, sublicense, and distribute the 73 | Work and such Derivative Works in Source or Object form. 74 | 75 | 3. Grant of Patent License. Subject to the terms and conditions of 76 | this License, each Contributor hereby grants to You a perpetual, 77 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 78 | (except as stated in this section) patent license to make, have made, 79 | use, offer to sell, sell, import, and otherwise transfer the Work, 80 | where such license applies only to those patent claims licensable 81 | by such Contributor that are necessarily infringed by their 82 | Contribution(s) alone or by combination of their Contribution(s) 83 | with the Work to which such Contribution(s) was submitted. If You 84 | institute patent litigation against any entity (including a 85 | cross-claim or counterclaim in a lawsuit) alleging that the Work 86 | or a Contribution incorporated within the Work constitutes direct 87 | or contributory patent infringement, then any patent licenses 88 | granted to You under this License for that Work shall terminate 89 | as of the date such litigation is filed. 90 | 91 | 4. Redistribution. You may reproduce and distribute copies of the 92 | Work or Derivative Works thereof in any medium, with or without 93 | modifications, and in Source or Object form, provided that You 94 | meet the following conditions: 95 | 96 | (a) You must give any other recipients of the Work or 97 | Derivative Works a copy of this License; and 98 | 99 | (b) You must cause any modified files to carry prominent notices 100 | stating that You changed the files; and 101 | 102 | (c) You must retain, in the Source form of any Derivative Works 103 | that You distribute, all copyright, patent, trademark, and 104 | attribution notices from the Source form of the Work, 105 | excluding those notices that do not pertain to any part of 106 | the Derivative Works; and 107 | 108 | (d) If the Work includes a "NOTICE" text file as part of its 109 | distribution, then any Derivative Works that You distribute must 110 | include a readable copy of the attribution notices contained 111 | within such NOTICE file, excluding those notices that do not 112 | pertain to any part of the Derivative Works, in at least one 113 | of the following places: within a NOTICE text file distributed 114 | as part of the Derivative Works; within the Source form or 115 | documentation, if provided along with the Derivative Works; or, 116 | within a display generated by the Derivative Works, if and 117 | wherever such third-party notices normally appear. The contents 118 | of the NOTICE file are for informational purposes only and 119 | do not modify the License. You may add Your own attribution 120 | notices within Derivative Works that You distribute, alongside 121 | or as an addendum to the NOTICE text from the Work, provided 122 | that such additional attribution notices cannot be construed 123 | as modifying the License. 124 | 125 | You may add Your own copyright statement to Your modifications and 126 | may provide additional or different license terms and conditions 127 | for use, reproduction, or distribution of Your modifications, or 128 | for any such Derivative Works as a whole, provided Your use, 129 | reproduction, and distribution of the Work otherwise complies with 130 | the conditions stated in this License. 131 | 132 | 5. Submission of Contributions. Unless You explicitly state otherwise, 133 | any Contribution intentionally submitted for inclusion in the Work 134 | by You to the Licensor shall be under the terms and conditions of 135 | this License, without any additional terms or conditions. 136 | Notwithstanding the above, nothing herein shall supersede or modify 137 | the terms of any separate license agreement you may have executed 138 | with Licensor regarding such Contributions. 139 | 140 | 6. Trademarks. This License does not grant permission to use the trade 141 | names, trademarks, service marks, or product names of the Licensor, 142 | except as required for reasonable and customary use in describing the 143 | origin of the Work and reproducing the content of the NOTICE file. 144 | 145 | 7. Disclaimer of Warranty. Unless required by applicable law or 146 | agreed to in writing, Licensor provides the Work (and each 147 | Contributor provides its Contributions) on an "AS IS" BASIS, 148 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 149 | implied, including, without limitation, any warranties or conditions 150 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 151 | PARTICULAR PURPOSE. You are solely responsible for determining the 152 | appropriateness of using or redistributing the Work and assume any 153 | risks associated with Your exercise of permissions under this License. 154 | 155 | 8. Limitation of Liability. In no event and under no legal theory, 156 | whether in tort (including negligence), contract, or otherwise, 157 | unless required by applicable law (such as deliberate and grossly 158 | negligent acts) or agreed to in writing, shall any Contributor be 159 | liable to You for damages, including any direct, indirect, special, 160 | incidental, or consequential damages of any character arising as a 161 | result of this License or out of the use or inability to use the 162 | Work (including but not limited to damages for loss of goodwill, 163 | work stoppage, computer failure or malfunction, or any and all 164 | other commercial damages or losses), even if such Contributor 165 | has been advised of the possibility of such damages. 166 | 167 | 9. Accepting Warranty or Additional Liability. While redistributing 168 | the Work or Derivative Works thereof, You may choose to offer, 169 | and charge a fee for, acceptance of support, warranty, indemnity, 170 | or other liability obligations and/or rights consistent with this 171 | License. However, in accepting such obligations, You may act only 172 | on Your own behalf and on Your sole responsibility, not on behalf 173 | of any other Contributor, and only if You agree to indemnify, 174 | defend, and hold each Contributor harmless for any liability 175 | incurred by, or claims asserted against, such Contributor by reason 176 | of your accepting any such warranty or additional liability. 177 | 178 | END OF TERMS AND CONDITIONS 179 | 180 | APPENDIX: How to apply the Apache License to your work. 181 | 182 | To apply the Apache License to your work, attach the following 183 | boilerplate notice, with the fields enclosed by brackets "[]" 184 | replaced with your own identifying information. (Don't include 185 | the brackets!) The text should be enclosed in the appropriate 186 | comment syntax for the file format. We also recommend that a 187 | file or class name and description of purpose be included on the 188 | same "printed page" as the copyright notice for easier 189 | identification within third-party archives. 190 | 191 | Copyright [yyyy] [name of copyright owner] 192 | 193 | Licensed under the Apache License, Version 2.0 (the "License"); 194 | you may not use this file except in compliance with the License. 195 | You may obtain a copy of the License at 196 | 197 | http://www.apache.org/licenses/LICENSE-2.0 198 | 199 | Unless required by applicable law or agreed to in writing, software 200 | distributed under the License is distributed on an "AS IS" BASIS, 201 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 202 | See the License for the specific language governing permissions and 203 | limitations under the License. 204 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | usage: 2 | @echo "make all : Runs all tests, examples, and benchmarks" 3 | @echo "make test : Runs test suite" 4 | @echo "make bench : Runs benchmarks" 5 | @echo "make example : Runs examples" 6 | @echo "make travis-ci : Travis CI specific testing" 7 | 8 | all: test bench example 9 | 10 | test: 11 | go test -race -cover -run=Test ./... 12 | 13 | bench: 14 | go test ./... -run=NONE -bench=. -test.benchmem > new_bench.txt 15 | if ! type benchcmp > /dev/null; then go get golang.org/x/tools/cmd/benchcmp; fi 16 | benchcmp curr_bench.txt new_bench.txt 17 | 18 | example: 19 | go test ./... -run=Example 20 | 21 | travis-ci: 22 | go test -v ./... -race -coverprofile=coverage.txt -covermode=atomic 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.com/matrix-profile-foundation/go-matrixprofile.svg?branch=master)](https://travis-ci.com/matrix-profile-foundation/go-matrixprofile) 2 | [![Windows Build Status](https://ci.appveyor.com/api/projects/status/tp7cqme05eytqw94?svg=true)](https://ci.appveyor.com/api/projects/status/tp7cqme05eytqw94?svg=true) 3 | [![codecov](https://codecov.io/gh/matrix-profile-foundation/go-matrixprofile/branch/master/graph/badge.svg)](https://codecov.io/gh/matrix-profile-foundation/go-matrixprofile) 4 | [![Go Report Card](https://goreportcard.com/badge/github.com/matrix-profile-foundation/go-matrixprofile)](https://goreportcard.com/report/github.com/matrix-profile-foundation/go-matrixprofile) 5 | [![GoDoc](https://godoc.org/github.com/matrix-profile-foundation/go-matrixprofile?status.svg)](https://godoc.org/github.com/matrix-profile-foundation/go-matrixprofile) 6 | [![License](https://img.shields.io/badge/License-Apache2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 7 | 8 | # go-matrixprofile 9 | 10 | Golang library for computing a matrix profiles and matrix profile indexes. Features also include time series discords, time series segmentation, and motif discovery after computing the matrix profile. Visit [The UCR Matrix Profile Page](https://www.cs.ucr.edu/~eamonn/MatrixProfile.html) for more details into matrix profiles. 11 | 12 | ## Contents 13 | - [Installation](#installation) 14 | - [Quick start](#quick-start) 15 | - [Case Studies](#case-studies) 16 | * [Matrix Profile](#matrix-profile) 17 | * [Multi-Dimensional Matrix Profile](#multi-dimensional-matrix-profile) 18 | - [Benchmarks](#benchmarks) 19 | - [Contributing](#contributing) 20 | - [Testing](#testing) 21 | - [Other Libraries](#other-libraries) 22 | - [Contact](#contact) 23 | - [License](#license) 24 | - [Citations](#citations) 25 | 26 | ## Installation 27 | ```sh 28 | $ go get github.com/matrix-profile-foundation/go-matrixprofile 29 | ``` 30 | 31 | ## Quick start 32 | ```go 33 | // example_mp.go 34 | package main 35 | 36 | import ( 37 | "fmt" 38 | 39 | mp "github.com/matrix-profile-foundation/go-matrixprofile" 40 | ) 41 | 42 | func main() { 43 | sig := []float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0} 44 | 45 | p, err := mp.New(sig, nil, 4) 46 | if err != nil { 47 | panic(err) 48 | } 49 | 50 | if err = p.Compute(nil); err != nil { 51 | panic(err) 52 | } 53 | 54 | fmt.Printf("Signal: %.3f\n", sig) 55 | fmt.Printf("Matrix Profile: %.3f\n", p.MP) 56 | fmt.Printf("Profile Index: %5d\n", p.Idx) 57 | } 58 | ``` 59 | ```sh 60 | $ go run example_mp.go 61 | Signal: [0.000 0.990 1.000 0.000 0.000 0.980 1.000 0.000 0.000 0.960 1.000 0.000] 62 | Matrix Profile: [0.014 0.014 0.029 0.029 0.014 0.014 0.029 0.029 0.029] 63 | Profile Index: [ 4 5 6 7 0 1 2 3 4] 64 | ``` 65 | 66 | ## Case studies 67 | ### Matrix Profile 68 | Going through a completely synthetic scenario, we'll cover what features to look for in a matrix profile, and what the additional Discords, TopKMotifs, and Segment tell us. We'll first be generating a fake signal that is composed of sine waves, noise, and sawtooth waves. We then run STOMP on the signal to calculte the matrix profile and matrix profile indexes. 69 | 70 | ![mpsin](https://github.com/matrix-profile-foundation/go-matrixprofile/blob/master/mp_sine.png) 71 | subsequence length: 32 72 | 73 | * signal: This shows our raw data. Theres several oddities and patterns that can be seen here. 74 | * matrix profile: generated by running STOMP on this signal which generates both the matrix profile and the matrix profile index. In the matrix profile we see several spikes which indicate that these may be time series discords or anomalies in the time series. 75 | * corrected arc curve: This shows the segmentation of the time series. The two lowest dips around index 420 and 760 indicate potential state changes in the time series. At 420 we see the sinusoidal wave move into a more pulsed pattern. At 760 we see the pulsed pattern move into a sawtooth pattern. 76 | * discords: The discords graph shows the top 3 potential discords of the defined subsequence length, m, based on the 3 highest peaks in the matrix profile. This is mostly composed of noise. 77 | * motifs: These represent the top 6 motifs found from the time series. The first being the initial sine wave pattern. The second is during the pulsed sequence on a fall of the pulse to the noise. The third is during the pulsed sequence on the rise from the noise to the pulse. The fourth and fifth are the sawtooth patterns. 78 | 79 | The code to generate the graph can be found in [this example](https://github.com/matrix-profile-foundation/go-matrixprofile/blob/master/example_caseStudy_test.go#L104). 80 | 81 | ### Multi-Dimensional Matrix Profile 82 | Based on [4] we can extend the matrix profile algorithm to multi-dimensional scenario. 83 | 84 | ![mpkdim](https://github.com/matrix-profile-foundation/go-matrixprofile/blob/master/mp_kdim.png) 85 | subsequence length: 25 86 | 87 | * signal 0-2: the 3 time series dimensions 88 | * matrix profile 0-2: the k-dimensional matrix profile representing choose k from d time series. matrix profile 1 minima represent motifs that span at that time across 2 time series of the 3 available. matrix profile 2 minima represents the motifs that span at that time across 3 time series. 89 | 90 | The plots can be generated by running 91 | ```sh 92 | $ make example 93 | go test ./... -run=Example 94 | ok github.com/matrix-profile-foundation/go-matrixprofile 0.256s 95 | ok github.com/matrix-profile-foundation/go-matrixprofile/av (cached) [no tests to run] 96 | ok github.com/matrix-profile-foundation/go-matrixprofile/siggen (cached) [no tests to run] 97 | ok github.com/matrix-profile-foundation/go-matrixprofile/util (cached) [no tests to run] 98 | ``` 99 | A png file will be saved in the top level directory of the repository as `mp_sine.png` and `mp_kdim.png` 100 | 101 | ## Benchmarks 102 | ```sh 103 | BenchmarkMStomp-4 39 29853485 ns/op 7336245 B/op 227071 allocs/op 104 | BenchmarkZNormalize-4 7112282 185 ns/op 256 B/op 1 allocs/op 105 | BenchmarkMovmeanstd-4 89810 13628 ns/op 32768 B/op 4 allocs/op 106 | BenchmarkCrossCorrelate-4 15190 75262 ns/op 24584 B/op 3 allocs/op 107 | BenchmarkMass-4 15421 78660 ns/op 24842 B/op 4 allocs/op 108 | BenchmarkDistanceProfile-4 15190 79092 ns/op 24842 B/op 4 allocs/op 109 | BenchmarkCalculateDistanceProfile-4 220363 4625 ns/op 0 B/op 0 allocs/op 110 | BenchmarkStmp/m32_pts1k-4 15 77806814 ns/op 24209736 B/op 3892 allocs/op 111 | BenchmarkStmp/m128_pts1k-4 16 70673766 ns/op 22496294 B/op 3508 allocs/op 112 | BenchmarkStamp/m32_p2_pts1k-4 25 46207243 ns/op 24284148 B/op 3909 allocs/op 113 | BenchmarkStomp/m128_p1_pts__1024-4 152 7740858 ns/op 196805 B/op 28 allocs/op 114 | BenchmarkStomp/m128_p2_pts__4096-4 13 81826774 ns/op 1116937 B/op 39 allocs/op 115 | BenchmarkStomp/m128_p2_pts_16384-4 1 1342203283 ns/op 4776832 B/op 45 allocs/op 116 | BenchmarkStomp/m128_p4_pts_16384-4 1 1269550826 ns/op 7153728 B/op 67 allocs/op 117 | BenchmarkStomp/m1024_p2_pts_16384-4 1 1235325258 ns/op 4776832 B/op 45 allocs/op 118 | BenchmarkMpx/m128_p1_pts__1024-4 564 2310017 ns/op 84591 B/op 26 allocs/op 119 | BenchmarkMpx/m128_p2_pts__4096-4 63 19927988 ns/op 400206 B/op 32 allocs/op 120 | BenchmarkMpx/m128_p2_pts_16384-4 4 294076163 ns/op 1708912 B/op 33 allocs/op 121 | BenchmarkMpx/m128_p4_pts_16384-4 4 327366290 ns/op 2237776 B/op 45 allocs/op 122 | BenchmarkMpx/m1024_p2_pts_16384-4 4 330582811 ns/op 1737584 B/op 33 allocs/op 123 | BenchmarkUpdate-4 80 13849082 ns/op 795065 B/op 18 allocs/op 124 | ``` 125 | 126 | Ran on a 2018 MacBookAir on Jan 6, 2020 127 | ```sh 128 | Processor: 1.6 GHz Intel Core i5 129 | Memory: 8GB 2133 MHz LPDDR3 130 | OS: macOS Mojave v10.14.6 131 | Logical CPUs: 4 132 | Physical CPUs: 2 133 | ``` 134 | ```sh 135 | $ make bench 136 | ``` 137 | 138 | ## Contributing 139 | * Fork the repository 140 | * Create a new branch (feature_\* or bug_\*)for the new feature or bug fix 141 | * Run tests 142 | * Commit your changes 143 | * Push code and open a new pull request 144 | 145 | ## Testing 146 | Run all tests including benchmarks 147 | ```sh 148 | $ make all 149 | ``` 150 | Just run benchmarks 151 | ```sh 152 | $ make bench 153 | ``` 154 | Just run tests 155 | ```sh 156 | $ make test 157 | ``` 158 | 159 | ## Other libraries 160 | * R: [github.com/matrix-profile-foundation/tsmp](https://github.com/matrix-profile-foundation/tsmp) 161 | * Python: [github.com/matrix-profile-foundation/matrixprofile](https://github.com/matrix-profile-foundation/matrixprofile) 162 | 163 | ## Contact 164 | * Austin Ouyang (aouyang1@gmail.com) 165 | 166 | ## License 167 | The Apache 2.0 License. See [LICENSE](https://github.com/matrix-profile-foundation/go-matrixprofile/blob/master/LICENSE) for more details. 168 | 169 | Copyright (c) 2020 Matrix Profile Foundation and contributors. 170 | 171 | ## Citing this work 172 | 173 | Please cite this work using our Journal of Open Source Software article 174 | 175 | Van Benschoten et al., (2020). MPA: a novel cross-language API for time series analysis. Journal of Open Source Software, 5(49), 2179, https://doi.org/10.21105/joss.02179 176 | 177 | ## Citations 178 | [1] Chin-Chia Michael Yeh, Yan Zhu, Liudmila Ulanova, Nurjahan Begum, Yifei Ding, Hoang Anh Dau, Diego Furtado Silva, Abdullah Mueen, Eamonn Keogh (2016). [Matrix Profile I: All Pairs Similarity Joins for Time Series: A Unifying View that Includes Motifs, Discords and Shapelets](https://www.cs.ucr.edu/~eamonn/PID4481997_extend_Matrix%20Profile_I.pdf). IEEE ICDM 2016. 179 | 180 | [2] Yan Zhu, Zachary Zimmerman, Nader Shakibay Senobari, Chin-Chia Michael Yeh, Gareth Funning, Abdullah Mueen, Philip Berisk and Eamonn Keogh (2016). [Matrix Profile II: Exploiting a Novel Algorithm and GPUs to break the one Hundred Million Barrier for Time Series Motifs and Joins](https://www.cs.ucr.edu/~eamonn/STOMP_GPU_final_submission_camera_ready.pdf). IEEE ICDM 2016. 181 | 182 | [3] Hoang Anh Dau and Eamonn Keogh (2017). [Matrix Profile V: A Generic Technique to Incorporate Domain Knowledge into Motif Discovery](https://www.cs.ucr.edu/~eamonn/guided-motif-KDD17-new-format-10-pages-v005.pdf). KDD 2017. 183 | 184 | [4] Chin-Chia Michael Yeh, Nickolas Kavantzas, Eamonn Keogh (2017).[Matrix Profile VI: Meaningful Multidimensional Motif Discovery](https://www.cs.ucr.edu/%7Eeamonn/Motif_Discovery_ICDM.pdf). ICDM 2017. 185 | 186 | [5] Shaghayegh Gharghabi, Yifei Ding, Chin-Chia Michael Yeh, Kaveh Kamgar, Liudmila Ulanova, Eamonn Keogh (2017). [Matrix Profile VIII: Domain Agnostic Online Semantic Segmentation at Superhuman Performance Levels](https://www.cs.ucr.edu/%7Eeamonn/Segmentation_ICDM.pdf). ICDM 2017. 187 | -------------------------------------------------------------------------------- /analyze.go: -------------------------------------------------------------------------------- 1 | package matrixprofile 2 | 3 | // AnalyzeOpts contains all the parameters needed for basic features to discover from 4 | // a matrix profile. This is currently limited to motif, discord, and segmentation discovery. 5 | type AnalyzeOpts struct { 6 | kMotifs int // the top k motifs to find 7 | rMotifs float64 // the max radius to find motifs 8 | kDiscords int // the top k discords to find 9 | OutputFilename string // relative or absolute filepath for the visualization output 10 | } 11 | 12 | // NewAnalyzeOpts creates a default set of parameters to analyze the matrix profile. 13 | func NewAnalyzeOpts() *AnalyzeOpts { 14 | return &AnalyzeOpts{ 15 | kMotifs: 3, 16 | rMotifs: 2, 17 | kDiscords: 3, 18 | OutputFilename: "mp.png", 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /av/annotation_vector.go: -------------------------------------------------------------------------------- 1 | // Package av generates a set of annotation vectors that can be applied onto a matrix profile before computing other features such as motifs, discords, etc. 2 | package av 3 | 4 | import ( 5 | "fmt" 6 | "math" 7 | 8 | "gonum.org/v1/gonum/floats" 9 | "gonum.org/v1/gonum/stat" 10 | 11 | "github.com/matrix-profile-foundation/go-matrixprofile/util" 12 | ) 13 | 14 | type AV string 15 | 16 | const ( 17 | Default AV = "default" // Default is the default annotation vector of all ones 18 | Complexity AV = "complexity" // Complexity is the annotation vector that focuses on areas of high "complexity" 19 | MeanStd AV = "mean_std" // MeanStd is the annotation vector focusing on areas where the signal is within a standard deviation of the mean 20 | Clipping AV = "clipping" // Clipping is the annotation vector reducing the importance of areas showing clipping effects on the positive and negative regime 21 | ) 22 | 23 | // Create returns the annotation vector given an input time series and a window size m 24 | func Create(av AV, ts []float64, m int) ([]float64, error) { 25 | var avec []float64 26 | switch av { 27 | case Default: 28 | avec = makeDefault(ts, m) 29 | case Complexity: 30 | avec = makeCompexity(ts, m) 31 | case MeanStd: 32 | avec = makeMeanStd(ts, m) 33 | case Clipping: 34 | avec = makeClipping(ts, m) 35 | default: 36 | return nil, fmt.Errorf("invalid annotation vector specified with matrix profile, %s", av) 37 | } 38 | return avec, nil 39 | } 40 | 41 | // makeDefault creates a default annotation vector of all ones resulting in 42 | // no change to the matrix profile when applied 43 | func makeDefault(d []float64, m int) []float64 { 44 | av := make([]float64, len(d)-m+1) 45 | for i := 0; i < len(av); i++ { 46 | av[i] = 1.0 47 | } 48 | return av 49 | } 50 | 51 | // makeCompexity creates an annotation vector that is based on the complexity 52 | // estimation of the signal. 53 | func makeCompexity(d []float64, m int) []float64 { 54 | av := make([]float64, len(d)-m+1) 55 | var ce, minAV, maxAV float64 56 | minAV = math.Inf(1) 57 | maxAV = math.Inf(-1) 58 | for i := 0; i < len(d)-m+1; i++ { 59 | ce = 0.0 60 | for j := 1; j < m; j++ { 61 | ce += (d[i+j] - d[i+j-1]) * (d[i+j] - d[i+j-1]) 62 | } 63 | av[i] = math.Sqrt(ce) 64 | if av[i] < minAV { 65 | minAV = av[i] 66 | } 67 | if av[i] > maxAV { 68 | maxAV = av[i] 69 | } 70 | } 71 | for i := 0; i < len(d)-m+1; i++ { 72 | if maxAV == 0 { 73 | av[i] = 0 74 | } else { 75 | av[i] = (av[i] - minAV) / maxAV 76 | } 77 | } 78 | 79 | return av 80 | } 81 | 82 | // makeMeanStd creates an annotation vector by setting any values above the mean 83 | // of the standard deviation vector to 0 and below to 1. 84 | func makeMeanStd(d []float64, m int) []float64 { 85 | av := make([]float64, len(d)-m+1) 86 | _, std, _ := util.MovMeanStd(d, m) 87 | mu := stat.Mean(std, nil) 88 | for i := 0; i < len(d)-m+1; i++ { 89 | if std[i] < mu { 90 | av[i] = 1 91 | } 92 | } 93 | return av 94 | } 95 | 96 | // makeClipping creates an annotation vector by setting subsequences with more 97 | // clipping on the positive or negative side of the signal to lower importance. 98 | func makeClipping(d []float64, m int) []float64 { 99 | av := make([]float64, len(d)-m+1) 100 | maxVal, minVal := floats.Max(d), floats.Min(d) 101 | var numClip int 102 | for i := 0; i < len(d)-m+1; i++ { 103 | numClip = 0 104 | for j := 0; j < m; j++ { 105 | if d[i+j] == maxVal || d[i+j] == minVal { 106 | numClip++ 107 | } 108 | } 109 | av[i] = float64(numClip) 110 | } 111 | 112 | minVal = floats.Min(av) 113 | for i := 0; i < len(av); i++ { 114 | av[i] -= minVal 115 | } 116 | 117 | maxVal = floats.Max(av) 118 | for i := 0; i < len(av); i++ { 119 | av[i] = 1 - av[i]/maxVal 120 | } 121 | 122 | return av 123 | } 124 | -------------------------------------------------------------------------------- /av/annotation_vector_test.go: -------------------------------------------------------------------------------- 1 | package av 2 | 3 | import ( 4 | "math" 5 | "testing" 6 | ) 7 | 8 | func TestMakeDefault(t *testing.T) { 9 | testdata := []struct { 10 | d []float64 11 | m int 12 | expected []float64 13 | }{ 14 | {[]float64{0, 1, 2, 3, 4, 5}, 3, []float64{1, 1, 1, 1}}, 15 | } 16 | for _, d := range testdata { 17 | out := makeDefault(d.d, d.m) 18 | 19 | if len(out) != len(d.expected) { 20 | t.Errorf("Expected length %d, but got %d for %v", len(d.expected), len(out), d) 21 | break 22 | } 23 | 24 | for i, val := range out { 25 | if math.Abs(val-d.expected[i]) > 1e-7 { 26 | t.Errorf("Expected value of %.3f, but got %.3f for %v", d.expected[i], val, d) 27 | } 28 | } 29 | } 30 | } 31 | 32 | func TestMakeCompexity(t *testing.T) { 33 | testdata := []struct { 34 | d []float64 35 | m int 36 | expected []float64 37 | }{ 38 | {[]float64{3, 3, 3, 3, 3, 3}, 3, []float64{0, 0, 0, 0}}, 39 | {[]float64{0, 1, 2, 3, 4, 5}, 3, []float64{0, 0, 0, 0}}, 40 | {[]float64{0, 3, 0, 2, 0, 1}, 3, []float64{0.47295372330527, 0.32279030890406757, 0.13962038997193682, 0}}, 41 | } 42 | for _, d := range testdata { 43 | out := makeCompexity(d.d, d.m) 44 | 45 | if len(out) != len(d.expected) { 46 | t.Errorf("Expected length %d, but got %d for %v", len(d.expected), len(out), d) 47 | break 48 | } 49 | 50 | for i, val := range out { 51 | if math.Abs(val-d.expected[i]) > 1e-7 { 52 | t.Errorf("Expected value of %.3f, but got %.3f for %v", d.expected[i], val, d) 53 | } 54 | } 55 | } 56 | } 57 | 58 | func TestMakeMeanStd(t *testing.T) { 59 | testdata := []struct { 60 | d []float64 61 | m int 62 | expected []float64 63 | }{ 64 | {[]float64{3, 3, 3, 3, 3, 3}, 3, []float64{0, 0, 0, 0}}, 65 | {[]float64{-10, 10, -10, 1, -1, 1}, 3, []float64{0, 0, 1, 1}}, 66 | {[]float64{0, 3, 0, 2, 0, 1}, 3, []float64{0, 0, 1, 1}}, 67 | } 68 | for _, d := range testdata { 69 | out := makeMeanStd(d.d, d.m) 70 | 71 | if len(out) != len(d.expected) { 72 | t.Errorf("Expected length %d, but got %d for %v", len(d.expected), len(out), d) 73 | break 74 | } 75 | 76 | for i, val := range out { 77 | if math.Abs(val-d.expected[i]) > 1e-7 { 78 | t.Errorf("Expected value of %.3f, but got %.3f for %v", d.expected[i], val, d) 79 | } 80 | } 81 | } 82 | } 83 | 84 | func TestMakeClipping(t *testing.T) { 85 | testdata := []struct { 86 | d []float64 87 | m int 88 | expected []float64 89 | }{ 90 | {[]float64{3, 3, 3, 3, 3, 3}, 3, []float64{0, 0, 0, 0}}, 91 | {[]float64{0, 1, 2, 3, 4, 5}, 3, []float64{0, 1, 1, 0}}, 92 | {[]float64{0, 3, 0, 2, 0, 1}, 3, []float64{0, 0.5, 0.5, 1}}, 93 | } 94 | for _, d := range testdata { 95 | out := makeClipping(d.d, d.m) 96 | 97 | if len(out) != len(d.expected) { 98 | t.Errorf("Expected length %d, but got %d for %v", len(d.expected), len(out), d) 99 | break 100 | } 101 | 102 | for i, val := range out { 103 | if math.Abs(val-d.expected[i]) > 1e-7 { 104 | t.Errorf("Expected value of %.3f, but got %.3f for %v", d.expected[i], val, d) 105 | } 106 | } 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /curr_bench.txt: -------------------------------------------------------------------------------- 1 | goos: darwin 2 | goarch: amd64 3 | pkg: github.com/matrix-profile-foundation/go-matrixprofile 4 | BenchmarkMStomp-4 39 29853485 ns/op 7336245 B/op 227071 allocs/op 5 | BenchmarkZNormalize-4 7112282 185 ns/op 256 B/op 1 allocs/op 6 | BenchmarkMovmeanstd-4 89810 13628 ns/op 32768 B/op 4 allocs/op 7 | BenchmarkCrossCorrelate-4 15190 75262 ns/op 24584 B/op 3 allocs/op 8 | BenchmarkMass-4 15421 78660 ns/op 24842 B/op 4 allocs/op 9 | BenchmarkDistanceProfile-4 15190 79092 ns/op 24842 B/op 4 allocs/op 10 | BenchmarkCalculateDistanceProfile-4 220363 4625 ns/op 0 B/op 0 allocs/op 11 | BenchmarkStmp/m32_pts1k-4 15 77806814 ns/op 24209736 B/op 3892 allocs/op 12 | BenchmarkStmp/m128_pts1k-4 16 70673766 ns/op 22496294 B/op 3508 allocs/op 13 | BenchmarkStamp/m32_p2_pts1k-4 25 46207243 ns/op 24284148 B/op 3909 allocs/op 14 | BenchmarkStomp/m128_p1_pts__1024-4 152 7740858 ns/op 196805 B/op 28 allocs/op 15 | BenchmarkStomp/m128_p2_pts__4096-4 13 81826774 ns/op 1116937 B/op 39 allocs/op 16 | BenchmarkStomp/m128_p2_pts_16384-4 1 1342203283 ns/op 4776832 B/op 45 allocs/op 17 | BenchmarkStomp/m128_p4_pts_16384-4 1 1269550826 ns/op 7153728 B/op 67 allocs/op 18 | BenchmarkStomp/m1024_p2_pts_16384-4 1 1235325258 ns/op 4776832 B/op 45 allocs/op 19 | BenchmarkMpx/m128_p1_pts__1024-4 564 2310017 ns/op 84591 B/op 26 allocs/op 20 | BenchmarkMpx/m128_p2_pts__4096-4 63 19927988 ns/op 400206 B/op 32 allocs/op 21 | BenchmarkMpx/m128_p2_pts_16384-4 4 294076163 ns/op 1708912 B/op 33 allocs/op 22 | BenchmarkMpx/m128_p4_pts_16384-4 4 327366290 ns/op 2237776 B/op 45 allocs/op 23 | BenchmarkMpx/m1024_p2_pts_16384-4 4 330582811 ns/op 1737584 B/op 33 allocs/op 24 | BenchmarkUpdate-4 80 13849082 ns/op 795065 B/op 18 allocs/op 25 | PASS 26 | ok github.com/matrix-profile-foundation/go-matrixprofile 36.122s 27 | PASS 28 | ok github.com/matrix-profile-foundation/go-matrixprofile/av 0.006s 29 | PASS 30 | ok github.com/matrix-profile-foundation/go-matrixprofile/siggen 0.006s 31 | PASS 32 | ok github.com/matrix-profile-foundation/go-matrixprofile/util 0.006s 33 | -------------------------------------------------------------------------------- /discover.go: -------------------------------------------------------------------------------- 1 | package matrixprofile 2 | 3 | import ( 4 | "math" 5 | ) 6 | 7 | // MotifGroup stores a list of indices representing a similar motif along 8 | // with the minimum distance that this set of motif composes of. 9 | type MotifGroup struct { 10 | Idx []int 11 | MinDist float64 12 | } 13 | 14 | // arcCurve computes the arc curve (histogram) which is uncorrected for. 15 | // This loops through the matrix profile index and increments the 16 | // counter for each index that the destination index passes through 17 | // start from the index in the matrix profile index. 18 | func arcCurve(mpIdx []int) []float64 { 19 | histo := make([]float64, len(mpIdx)) 20 | for i, idx := range mpIdx { 21 | switch { 22 | case idx >= len(mpIdx): 23 | case idx < 0: 24 | continue 25 | case idx > i+1: 26 | for j := i + 1; j < idx; j++ { 27 | histo[j]++ 28 | } 29 | case idx < i-1: 30 | for j := i - 1; j > idx; j-- { 31 | histo[j]++ 32 | } 33 | } 34 | } 35 | return histo 36 | } 37 | 38 | // iac represents the ideal arc curve with a maximum of n/2 and 0 values 39 | // at 0 and n-1. The derived equation to ensure the requirements is 40 | // -(sqrt(2/n)*(x-n/2))^2 + n/2 = y 41 | func iac(x float64, n int) float64 { 42 | return -math.Pow(math.Sqrt(2/float64(n))*(x-float64(n)/2.0), 2.0) + float64(n)/2.0 43 | } 44 | -------------------------------------------------------------------------------- /discover_test.go: -------------------------------------------------------------------------------- 1 | package matrixprofile 2 | 3 | import ( 4 | "math" 5 | "testing" 6 | ) 7 | 8 | func TestArcCurve(t *testing.T) { 9 | testdata := []struct { 10 | mpIdx []int 11 | expectedHisto []float64 12 | }{ 13 | {[]int{}, []float64{}}, 14 | {[]int{1, 1, 1, 1, 1}, []float64{0, 0, 2, 1, 0}}, 15 | {[]int{4, 5, 6, 0, 2, 1, 0}, []float64{0, 3, 5, 6, 4, 2, 0}}, 16 | {[]int{4, 5, 12, 0, 2, 1, 0}, []float64{0, 3, 5, 5, 3, 1, 0}}, 17 | {[]int{4, 5, -1, 0, 2, 1, 0}, []float64{0, 3, 5, 5, 3, 1, 0}}, 18 | {[]int{4, 5, 6, 2, 2, 1, 0}, []float64{0, 2, 4, 6, 4, 2, 0}}, 19 | {[]int{2, 3, 0, 0, 6, 3, 4}, []float64{0, 3, 2, 0, 1, 2, 0}}, 20 | } 21 | 22 | var histo []float64 23 | for _, d := range testdata { 24 | histo = arcCurve(d.mpIdx) 25 | if len(histo) != len(d.expectedHisto) { 26 | t.Errorf("Expected %d elements, but got %d, %+v", len(d.expectedHisto), len(histo), d) 27 | } 28 | for i := 0; i < len(histo); i++ { 29 | if math.Abs(float64(histo[i]-d.expectedHisto[i])) > 1e-7 { 30 | t.Errorf("Expected %v,\nbut got\n%v for\n%+v", d.expectedHisto, histo, d) 31 | break 32 | } 33 | } 34 | } 35 | } 36 | 37 | func TestIac(t *testing.T) { 38 | testdata := []struct { 39 | x float64 40 | n int 41 | expected float64 42 | }{ 43 | {0, 124, 0}, 44 | {124, 124, 0}, 45 | {62, 124, 62}, 46 | } 47 | 48 | var out float64 49 | for _, d := range testdata { 50 | if out = iac(d.x, d.n); out != d.expected { 51 | t.Errorf("Expected %.3f but got %.3f", d.expected, out) 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /example_caseStudy_test.go: -------------------------------------------------------------------------------- 1 | package matrixprofile 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/matrix-profile-foundation/go-matrixprofile/siggen" 7 | ) 8 | 9 | func Example_caseStudy() { 10 | sin := siggen.Sin(1, 5, 0, 0, 100, 2) 11 | sin2 := siggen.Sin(0.25, 10, 0, 0.75, 100, 0.25) 12 | saw := siggen.Sawtooth(0.5, 7, 0, 0, 100, 1) 13 | line := siggen.Line(0, 0, len(sin2)*4) 14 | sig := siggen.Append(sin, sin2, sin, line, sin2, line, sin2, line, saw) 15 | 16 | noise := siggen.Noise(0.1, len(sig)) 17 | sig = siggen.Add(sig, noise) 18 | 19 | var m int 20 | m = 32 21 | mp, err := New(sig, nil, m) 22 | if err != nil { 23 | panic(err) 24 | } 25 | 26 | ao := NewAnalyzeOpts() 27 | ao.OutputFilename = "mp_sine.png" 28 | 29 | if err = mp.Analyze(nil, ao); err != nil { 30 | panic(err) 31 | } 32 | 33 | fmt.Printf("Saved png file result to %s\n", ao.OutputFilename) 34 | // Output: Saved png file result to mp_sine.png 35 | } 36 | 37 | func Example_kDimensionalCaseStudy() { 38 | sin := siggen.Sin(1, 4, 0, 0, 100, 0.25) 39 | saw := siggen.Sawtooth(1, 4, 0, 0, 100, 0.25) 40 | square := siggen.Square(1, 4, 0, 0, 100, 0.25) 41 | line := siggen.Line(0, 0, len(sin)*4) 42 | line2 := siggen.Line(0, 0, len(sin)*3) 43 | sig := make([][]float64, 3) 44 | sig[0] = siggen.Append(line, line, line, saw, line2, saw, line2) 45 | sig[1] = siggen.Append(line, sin, line2, sin, line2, sin, line2, sin, line2) 46 | sig[2] = siggen.Append(line, square, line2, square, line2, square, line2, square, line2) 47 | 48 | sig[0] = siggen.Add(sig[0], siggen.Noise(0.1, len(sig[0]))) 49 | sig[1] = siggen.Add(sig[1], siggen.Noise(0.1, len(sig[0]))) 50 | sig[2] = siggen.Add(sig[2], siggen.Noise(0.1, len(sig[0]))) 51 | 52 | m := 25 53 | mp, err := NewKMP(sig, m) 54 | if err != nil { 55 | panic(err) 56 | } 57 | 58 | if err = mp.Compute(); err != nil { 59 | panic(err) 60 | } 61 | 62 | if err = mp.Visualize("mp_kdim.png"); err != nil { 63 | panic(err) 64 | } 65 | 66 | fmt.Println("Saved png file result to mp_kdim.png") 67 | // Output: Saved png file result to mp_kdim.png 68 | } 69 | -------------------------------------------------------------------------------- /example_matrixprofile_test.go: -------------------------------------------------------------------------------- 1 | package matrixprofile 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/matrix-profile-foundation/go-matrixprofile/siggen" 7 | ) 8 | 9 | func ExampleMatrixProfile_DiscoverSegments() { 10 | // generate a signal mainly composed of sine waves and switches 11 | // frequencies, amplitude, and offset midway through 12 | 13 | // amplitude of 1, frequency of 5Hz, sampling frequency of 100 Hz, 14 | // time of 2 seconds 15 | sin := siggen.Sin(1, 5, 0, 0, 100, 2) 16 | 17 | // amplitude of 0.25, frequency of 10Hz, offset of 0.75, sampling 18 | // frequency of 100 Hz, time of 1 second 19 | sin2 := siggen.Sin(0.25, 10, 0, 0.75, 100, 1) 20 | sig := siggen.Append(sin, sin2) 21 | 22 | // noise with an amplitude of 0.1 23 | noise := siggen.Noise(0.01, len(sig)) 24 | sig = siggen.Add(sig, noise) 25 | 26 | // create a new MatrixProfile struct using the signal and a 27 | // subsequence length of 32. The second subsequence is set to nil 28 | // so we perform a self join. 29 | mp, err := New(sig, nil, 32) 30 | if err != nil { 31 | panic(err) 32 | } 33 | 34 | // run the STMP algorithm with self join. The matrix profile 35 | // will be stored in mp.MP and the matrix profile index will 36 | // be stored in mp.Idx 37 | o := NewMPOpts() 38 | o.Algorithm = AlgoSTMP 39 | 40 | if err = mp.Compute(o); err != nil { 41 | panic(err) 42 | } 43 | 44 | // segment the timeseries using the number of arc crossings over 45 | // each index in the matrix profile index 46 | idx, cac, _ := mp.DiscoverSegments() 47 | fmt.Printf("Signal change foud at index: %d\n", idx) 48 | fmt.Printf("Corrected Arc Curve (CAC) value: %.3f\n", cac) 49 | 50 | // Output: 51 | // Signal change foud at index: 194 52 | // Corrected Arc Curve (CAC) value: 0.000 53 | } 54 | 55 | func ExampleMatrixProfile_DiscoverMotifs() { 56 | // generate a signal mainly composed of sine waves and switches 57 | // frequencies, amplitude, and offset midway through 58 | 59 | // amplitude of 1, frequency of 5Hz, sampling frequency of 100 Hz, 60 | // time of 2 seconds 61 | sin := siggen.Sin(1, 5, 0, 0, 100, 2) 62 | 63 | // amplitude of 0.25, frequency of 10Hz, offset of 0.75, sampling 64 | // frequency of 100 Hz, time of 1 second 65 | sin2 := siggen.Sin(0.25, 10, 0, 0.75, 100, 1) 66 | sig := siggen.Append(sin, sin2) 67 | 68 | // create a new MatrixProfile struct using the signal and a 69 | // subsequence length of 32. The second subsequence is set to nil 70 | // so we perform a self join. 71 | mp, err := New(sig, nil, 32) 72 | if err != nil { 73 | panic(err) 74 | } 75 | 76 | // run the STMP algorithm with self join. The matrix profile 77 | // will be stored in mp.MP and the matrix profile index will 78 | // be stored in mp.Idx 79 | o := NewMPOpts() 80 | o.Algorithm = AlgoSTMP 81 | 82 | if err = mp.Compute(o); err != nil { 83 | panic(err) 84 | } 85 | 86 | // finds the top 3 motifs in the signal. Motif groups include 87 | // all subsequences that are within 2 times the distance of the 88 | // original motif pair 89 | motifs, err := mp.DiscoverMotifs(2, 2, 10, mp.W/2) 90 | if err != nil { 91 | panic(err) 92 | } 93 | 94 | for i, mg := range motifs { 95 | fmt.Printf("Motif Group %d\n", i) 96 | fmt.Printf(" %d motifs\n", len(mg.Idx)) 97 | } 98 | 99 | // Output: 100 | // Motif Group 0 101 | // 2 motifs 102 | // Motif Group 1 103 | // 2 motifs 104 | } 105 | -------------------------------------------------------------------------------- /example_test.go: -------------------------------------------------------------------------------- 1 | package matrixprofile 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | func Example() { 8 | sig := []float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0} 9 | 10 | mp, err := New(sig, nil, 4) 11 | if err != nil { 12 | panic(err) 13 | } 14 | 15 | if err = mp.Compute(nil); err != nil { 16 | panic(err) 17 | } 18 | 19 | fmt.Printf("Signal: %.3f\n", sig) 20 | fmt.Printf("Matrix Profile: %.3f\n", mp.MP) 21 | fmt.Printf("Profile Index: %5d\n", mp.Idx) 22 | 23 | // Output: 24 | // Signal: [0.000 0.990 1.000 0.000 0.000 0.980 1.000 0.000 0.000 0.960 1.000 0.000] 25 | // Matrix Profile: [0.014 0.014 0.029 0.029 0.014 0.014 0.029 0.029 0.029] 26 | // Profile Index: [ 4 5 6 7 0 1 2 3 4] 27 | } 28 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/matrix-profile-foundation/go-matrixprofile 2 | 3 | go 1.12 4 | 5 | require ( 6 | gonum.org/v1/gonum v0.7.0 7 | gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b 8 | ) 9 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af h1:wVe6/Ea46ZMeNkQjjBW6xcqyQA/j5e0D6GytH95g0gQ= 2 | github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= 3 | github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90 h1:WXb3TSNmHp2vHoCroCIB1foO/yQ36swABL8aOVeDpgg= 4 | github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= 5 | github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g= 6 | github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= 7 | github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5 h1:PJr+ZMXIecYc1Ey2zucXdR73SMBtgjPgwa31099IMv0= 8 | github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= 9 | golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 10 | golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 11 | golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2 h1:y102fOLFqhV41b+4GPiJoa0k/x+pJcEi2/HB1Y5T6fU= 12 | golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 13 | golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81 h1:00VmoueYNlNz/aHIilyyQz/MHSqGoWJzpFv/HW8xpzI= 14 | golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= 15 | golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 16 | golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e h1:Io7mpb+aUAGF0MKxbyQ7HQl1VgB+cL6ZJZUFaFNqVV4= 17 | golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 18 | gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= 19 | gonum.org/v1/gonum v0.6.2 h1:4r+yNT0+8SWcOkXP+63H2zQbN+USnC73cjGUxnDF94Q= 20 | gonum.org/v1/gonum v0.6.2/go.mod h1:9mxDZsDKxgMAuccQkewq682L+0eCu4dCN2yonUJTCLU= 21 | gonum.org/v1/gonum v0.7.0 h1:Hdks0L0hgznZLG9nzXb8vZ0rRvqNvAcgAp84y7Mwkgw= 22 | gonum.org/v1/gonum v0.7.0/go.mod h1:L02bwd0sqlsvRv41G7wGWFCsVNZFv/k1xzGIxeANHGM= 23 | gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0 h1:OE9mWmgKkjJyEmDAAtGMPjXu+YNeGvK9VTSHY6+Qihc= 24 | gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= 25 | gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b h1:Qh4dB5D/WpoUUp3lSod7qgoyEHbDGPUWjIbnqdqqe1k= 26 | gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc= 27 | rsc.io/pdf v0.1.1 h1:k1MczvYDUvJBe93bYd7wrZLLUEcLZAuF824/I4e5Xr4= 28 | rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= 29 | -------------------------------------------------------------------------------- /kmp.go: -------------------------------------------------------------------------------- 1 | package matrixprofile 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "fmt" 7 | "io/ioutil" 8 | "math" 9 | "os" 10 | "sort" 11 | 12 | "github.com/matrix-profile-foundation/go-matrixprofile/util" 13 | "gonum.org/v1/gonum/dsp/fourier" 14 | "gonum.org/v1/plot/plotter" 15 | ) 16 | 17 | // KMP is a struct that tracks the current k-dimensional matrix profile 18 | // computation for a given slice of timeseries of length N and subsequence length of M. 19 | // The profile and the profile index are stored here. 20 | type KMP struct { 21 | T [][]float64 // a set of timeseries where the number of row represents the number of dimensions and each row is a separate time series 22 | tMean [][]float64 // sliding mean of each timeseries with a window of m each 23 | tStd [][]float64 // sliding standard deviation of each timeseries with a window of m each 24 | tF [][]complex128 // holds an existing calculation of the FFT for each timeseries 25 | n int // length of the timeseries 26 | W int // length of a subsequence 27 | MP [][]float64 // matrix profile 28 | Idx [][]int // matrix profile index 29 | } 30 | 31 | // NewKMP creates a matrix profile struct specifically to be used with the k dimensional 32 | // matrix profile computation. The number of rows represents the number of dimensions, 33 | // and each row holds a series of points of equal length as each other. 34 | func NewKMP(t [][]float64, w int) (*KMP, error) { 35 | if t == nil || len(t) == 0 { 36 | return nil, fmt.Errorf("slice is nil or has a length of 0 dimensions") 37 | } 38 | 39 | k := KMP{ 40 | T: t, 41 | W: w, 42 | n: len(t[0]), 43 | } 44 | 45 | // checks that all timeseries have the same length 46 | for d := 0; d < len(t); d++ { 47 | if len(t[d]) != k.n { 48 | return nil, fmt.Errorf("timeseries %d has a length of %d and doesn't match the first timeseries with length %d", d, len(t[d]), k.n) 49 | } 50 | } 51 | 52 | if k.W*2 >= k.n { 53 | return nil, fmt.Errorf("subsequence length must be less than half the timeseries") 54 | } 55 | 56 | if k.W < 2 { 57 | return nil, fmt.Errorf("subsequence length must be at least 2") 58 | } 59 | 60 | k.tMean = make([][]float64, len(t)) 61 | k.tStd = make([][]float64, len(t)) 62 | k.tF = make([][]complex128, len(t)) 63 | k.MP = make([][]float64, len(t)) 64 | k.Idx = make([][]int, len(t)) 65 | for d := 0; d < len(t); d++ { 66 | k.tMean[d] = make([]float64, k.n-k.W+1) 67 | k.tStd[d] = make([]float64, k.n-k.W+1) 68 | k.tF[d] = make([]complex128, k.n-k.W+1) 69 | k.MP[d] = make([]float64, k.n-k.W+1) 70 | k.Idx[d] = make([]int, k.n-k.W+1) 71 | } 72 | 73 | for d := 0; d < len(t); d++ { 74 | for i := 0; i < k.n-k.W+1; i++ { 75 | k.MP[d][i] = math.Inf(1) 76 | k.Idx[d][i] = math.MaxInt64 77 | } 78 | } 79 | 80 | if err := k.initCaches(); err != nil { 81 | return nil, err 82 | } 83 | 84 | return &k, nil 85 | } 86 | 87 | // Save will save the current matrix profile struct to disk 88 | func (k KMP) Save(filepath, format string) error { 89 | var err error 90 | switch format { 91 | case "json": 92 | f, err := os.Open(filepath) 93 | if err != nil { 94 | f, err = os.Create(filepath) 95 | if err != nil { 96 | return err 97 | } 98 | } 99 | defer f.Close() 100 | out, err := json.Marshal(k) 101 | if err != nil { 102 | return err 103 | } 104 | _, err = f.Write(out) 105 | default: 106 | return fmt.Errorf("invalid save format, %s", format) 107 | } 108 | return err 109 | } 110 | 111 | // Load will attempt to load a matrix profile from a file for iterative use 112 | func (k *KMP) Load(filepath, format string) error { 113 | var err error 114 | switch format { 115 | case "json": 116 | f, err := os.Open(filepath) 117 | if err != nil { 118 | return err 119 | } 120 | defer f.Close() 121 | b, err := ioutil.ReadAll(f) 122 | if err != nil { 123 | return err 124 | } 125 | err = json.Unmarshal(b, k) 126 | default: 127 | return fmt.Errorf("invalid load format, %s", format) 128 | } 129 | return err 130 | } 131 | 132 | // initCaches initializes cached data including the timeseries a and b rolling mean 133 | // and standard deviation and full fourier transform of timeseries b 134 | func (k *KMP) initCaches() error { 135 | var err error 136 | // precompute the mean and standard deviation for each window of size m for all 137 | // sliding windows across the b timeseries 138 | for d := 0; d < len(k.T); d++ { 139 | k.tMean[d], k.tStd[d], err = util.MovMeanStd(k.T[d], k.W) 140 | if err != nil { 141 | return err 142 | } 143 | } 144 | 145 | // precompute the fourier transform of the b timeseries since it will 146 | // be used multiple times while computing the matrix profile 147 | fft := fourier.NewFFT(k.n) 148 | for d := 0; d < len(k.T); d++ { 149 | k.tF[d] = fft.Coefficients(nil, k.T[d]) 150 | } 151 | 152 | return nil 153 | } 154 | 155 | // Compute runs a k dimensional matrix profile calculation across all time series 156 | func (k *KMP) Compute() error { 157 | return k.mStomp() 158 | } 159 | 160 | // MStomp computes the k dimensional matrix profile 161 | func (k *KMP) mStomp() error { 162 | var err error 163 | 164 | // save the first dot product of the first row that will be used by all future 165 | // go routines 166 | cachedDots := make([][]float64, len(k.T)) 167 | fft := fourier.NewFFT(k.n) 168 | k.crossCorrelate(0, fft, cachedDots) 169 | 170 | var D [][]float64 171 | D = make([][]float64, len(k.T)) 172 | for d := 0; d < len(D); d++ { 173 | D[d] = make([]float64, k.n-k.W+1) 174 | } 175 | 176 | dots := make([][]float64, len(k.T)) 177 | for d := 0; d < len(dots); d++ { 178 | dots[d] = make([]float64, k.n-k.W+1) 179 | copy(dots[d], cachedDots[d]) 180 | } 181 | 182 | for idx := 0; idx < k.n-k.W+1; idx++ { 183 | for d := 0; d < len(dots); d++ { 184 | if idx > 0 { 185 | for j := k.n - k.W; j > 0; j-- { 186 | dots[d][j] = dots[d][j-1] - k.T[d][j-1]*k.T[d][idx-1] + k.T[d][j+k.W-1]*k.T[d][idx+k.W-1] 187 | } 188 | dots[d][0] = cachedDots[d][idx] 189 | } 190 | 191 | for i := 0; i < k.n-k.W+1; i++ { 192 | D[d][i] = math.Sqrt(2 * float64(k.W) * math.Abs(1-(dots[d][i]-float64(k.W)*k.tMean[d][i]*k.tMean[d][idx])/(float64(k.W)*k.tStd[d][i]*k.tStd[d][idx]))) 193 | } 194 | // sets the distance in the exclusion zone to +Inf 195 | util.ApplyExclusionZone(D[d], idx, k.W/2) 196 | } 197 | 198 | k.columnWiseSort(D) 199 | k.columnWiseCumSum(D) 200 | 201 | for d := 0; d < len(D); d++ { 202 | for i := 0; i < k.n-k.W+1; i++ { 203 | if D[d][i]/(float64(d)+1) < k.MP[d][i] { 204 | k.MP[d][i] = D[d][i] / (float64(d) + 1) 205 | k.Idx[d][i] = idx 206 | } 207 | } 208 | } 209 | } 210 | 211 | return err 212 | } 213 | 214 | // crossCorrelate computes the sliding dot product between two slices 215 | // given a query and time series. Uses fast fourier transforms to compute 216 | // the necessary values. Returns the a slice of floats for the cross-correlation 217 | // of the signal q and the k.b signal. This makes an optimization where the query 218 | // length must be less than half the length of the timeseries, b. 219 | func (k KMP) crossCorrelate(idx int, fft *fourier.FFT, D [][]float64) { 220 | qpad := make([]float64, k.n) 221 | var qf []complex128 222 | var dot []float64 223 | 224 | for d := 0; d < len(D); d++ { 225 | for i := 0; i < k.W; i++ { 226 | qpad[i] = k.T[d][idx+k.W-i-1] 227 | } 228 | qf = fft.Coefficients(nil, qpad) 229 | 230 | // in place multiply the fourier transform of the b time series with 231 | // the subsequence fourier transform and store in the subsequence fft slice 232 | for i := 0; i < len(qf); i++ { 233 | qf[i] = k.tF[d][i] * qf[i] 234 | } 235 | 236 | dot = fft.Sequence(nil, qf) 237 | 238 | for i := 0; i < k.n-k.W+1; i++ { 239 | dot[k.W-1+i] = dot[k.W-1+i] / float64(k.n) 240 | } 241 | D[d] = dot[k.W-1:] 242 | } 243 | } 244 | 245 | func (k KMP) columnWiseSort(D [][]float64) { 246 | dist := make([]float64, len(D)) 247 | for i := 0; i < k.n-k.W+1; i++ { 248 | for d := 0; d < len(D); d++ { 249 | dist[d] = D[d][i] 250 | } 251 | sort.Float64s(dist) 252 | for d := 0; d < len(D); d++ { 253 | D[d][i] = dist[d] 254 | } 255 | } 256 | } 257 | 258 | func (k KMP) columnWiseCumSum(D [][]float64) { 259 | for d := 0; d < len(D); d++ { 260 | // change D to be a cumulative sum of distances across dimensions 261 | if d > 0 { 262 | for i := 0; i < k.n-k.W+1; i++ { 263 | D[d][i] += D[d-1][i] 264 | } 265 | } 266 | } 267 | } 268 | 269 | // Analyze has not been implemented yet 270 | func (k KMP) Analyze(mo *MPOpts, ao *AnalyzeOpts) error { 271 | return errors.New("Analyze for KMP has not been implemented yet.") 272 | } 273 | 274 | // DiscoverMotifs has not been implemented yet 275 | func (k KMP) DiscoverMotifs(kMotifs int, r float64) ([]MotifGroup, error) { 276 | return nil, errors.New("Motifs for KMP has not been implemented yet.") 277 | } 278 | 279 | // DiscoverDiscords has not been implemented yet 280 | func (k KMP) DiscoverDiscords(kDiscords int, exclusionZone int) ([]int, error) { 281 | return nil, errors.New("Discords for KMP has not been implemented yet.") 282 | } 283 | 284 | // DiscoverSegments has not been implemented yet 285 | func (k KMP) DiscoverSegments() (int, float64, []float64) { 286 | return 0, 0, nil 287 | } 288 | 289 | // Visualize creates a png of the k-dimensional matrix profile. 290 | func (k KMP) Visualize(fn string) error { 291 | sigPts := make([]plotter.XYs, len(k.T)) 292 | for i := 0; i < len(k.T); i++ { 293 | sigPts[i] = points(k.T[i], len(k.T[0])) 294 | } 295 | 296 | mpPts := make([]plotter.XYs, len(k.MP)) 297 | for i := 0; i < len(k.MP); i++ { 298 | mpPts[i] = points(k.MP[i], len(k.T[0])) 299 | } 300 | 301 | return plotKMP(sigPts, mpPts, fn) 302 | } 303 | -------------------------------------------------------------------------------- /kmp_bench_test.go: -------------------------------------------------------------------------------- 1 | package matrixprofile 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/matrix-profile-foundation/go-matrixprofile/siggen" 7 | ) 8 | 9 | func setupKData() [][]float64 { 10 | sin := siggen.Sin(1, 4, 0, 0, 100, 0.25) 11 | saw := siggen.Sawtooth(1, 4, 0, 0, 100, 0.25) 12 | square := siggen.Square(1, 4, 0, 0, 100, 0.25) 13 | line := siggen.Line(0, 0, len(sin)*4) 14 | line2 := siggen.Line(0, 0, len(sin)*3) 15 | sig := make([][]float64, 3) 16 | sig[0] = siggen.Append(line, line, line, saw, line2, saw, line2) 17 | sig[1] = siggen.Append(line, sin, line2, sin, line2, sin, line2, sin, line2) 18 | sig[2] = siggen.Append(line, square, line2, square, line2, square, line2, square, line2) 19 | 20 | noise := siggen.Noise(0.1, len(sig[0])) 21 | sig[0] = siggen.Add(sig[0], noise) 22 | 23 | noise = siggen.Noise(0.1, len(sig[0])) 24 | sig[1] = siggen.Add(sig[1], noise) 25 | 26 | noise = siggen.Noise(0.1, len(sig[0])) 27 | sig[2] = siggen.Add(sig[2], noise) 28 | 29 | return sig 30 | } 31 | 32 | func BenchmarkMStomp(b *testing.B) { 33 | sig := setupKData() 34 | mp, err := NewKMP(sig, 25) 35 | if err != nil { 36 | b.Error(err) 37 | } 38 | 39 | for i := 0; i < b.N; i++ { 40 | err = mp.Compute() 41 | if err != nil { 42 | b.Error(err) 43 | } 44 | if len(mp.MP) < 1 || len(mp.Idx) < 1 { 45 | b.Error("expected at least one dimension from matrix profile and matrix profile index") 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /kmp_test.go: -------------------------------------------------------------------------------- 1 | package matrixprofile 2 | 3 | import ( 4 | "math" 5 | "os" 6 | "testing" 7 | 8 | "gonum.org/v1/gonum/dsp/fourier" 9 | ) 10 | 11 | func TestNewKMP(t *testing.T) { 12 | testdata := []struct { 13 | t [][]float64 14 | w int 15 | expectedErr bool 16 | }{ 17 | {[][]float64{}, 2, true}, 18 | {[][]float64{{1, 1, 1, 1, 1}}, 2, false}, 19 | {[][]float64{{1, 1, 1, 1, 1}}, 1, true}, 20 | {[][]float64{{1, 1, 1, 1, 1}}, 6, true}, 21 | {[][]float64{{1, 1, 1, 1, 1}, {1, 1, 1}}, 2, true}, 22 | } 23 | 24 | for _, d := range testdata { 25 | _, err := NewKMP(d.t, d.w) 26 | if d.expectedErr && err == nil { 27 | t.Errorf("Expected an error, but got none for %v", d) 28 | } 29 | if !d.expectedErr && err != nil { 30 | t.Errorf("Expected no error, but got %v for %v", err, d) 31 | } 32 | } 33 | } 34 | 35 | func TestKCrossCorrelate(t *testing.T) { 36 | var err error 37 | var mp *KMP 38 | 39 | testdata := []struct { 40 | t [][]float64 41 | w int 42 | expected [][]float64 43 | }{ 44 | {[][]float64{{1, 1, 1, 1, 1}}, 2, [][]float64{{2, 2, 2, 2}}}, 45 | {[][]float64{{1, 2, 3, 3, 2, 1}}, 2, [][]float64{{5, 8, 9, 7, 4}}}, 46 | {[][]float64{{1, 2, 3, 3, 2, 1, 1}}, 2, [][]float64{{5, 8, 9, 7, 4, 3}}}, 47 | {[][]float64{ 48 | {1, 2, 3, 3, 2, 1, 1}, 49 | {2, 4, 3, 3, 2, 1, 1}, 50 | }, 2, 51 | [][]float64{ 52 | {5, 8, 9, 7, 4, 3}, 53 | {20, 20, 18, 14, 8, 6}, 54 | }}, 55 | } 56 | 57 | for _, d := range testdata { 58 | mp, err = NewKMP(d.t, d.w) 59 | if err != nil { 60 | if d.expected == nil { 61 | // Got an error while creating a new matrix profile 62 | continue 63 | } else { 64 | t.Errorf("did not expect to get an error , %v, for %v", err, d) 65 | } 66 | } 67 | 68 | fft := fourier.NewFFT(mp.n) 69 | D := make([][]float64, len(mp.T)) 70 | mp.crossCorrelate(0, fft, D) 71 | if err != nil && d.expected == nil { 72 | // Got an error while z normalizing and expected an error 73 | continue 74 | } 75 | if d.expected == nil { 76 | t.Errorf("Expected an invalid cross correlation calculation, %v", d) 77 | } 78 | if err != nil { 79 | t.Errorf("Did not expect error, %v", err) 80 | } 81 | if len(D) != len(d.expected) { 82 | t.Errorf("Expected %d dimensions, but got %d, %v", len(d.expected), len(D), d) 83 | } 84 | for i := 0; i < len(D); i++ { 85 | for j := 0; j < len(D[0]); j++ { 86 | if math.Abs(D[i][j]-d.expected[i][j]) > 1e-7 { 87 | t.Errorf("Expected %v, but got %v for %v", d.expected, D, d) 88 | break 89 | } 90 | } 91 | } 92 | 93 | } 94 | } 95 | 96 | func TestColumnWiseSort(t *testing.T) { 97 | testdata := []struct { 98 | d [][]float64 99 | expectedD [][]float64 100 | }{ 101 | { 102 | [][]float64{ 103 | {1, 4, 9}, 104 | {2, 6, 4}, 105 | {3, 2, 3}, 106 | {4, 1, 2}}, 107 | [][]float64{ 108 | {1, 1, 2}, 109 | {2, 2, 3}, 110 | {3, 4, 4}, 111 | {4, 6, 9}}, 112 | }, 113 | } 114 | 115 | for _, d := range testdata { 116 | mp := &KMP{W: 5, n: 7} 117 | mp.columnWiseSort(d.d) 118 | 119 | if len(d.d) != len(d.expectedD) { 120 | t.Errorf("Expected %d dimensions, but got %d, %+v", len(d.expectedD), len(d.d), d) 121 | break 122 | } 123 | for dim := 0; dim < len(d.d); dim++ { 124 | for i := 0; i < mp.n-mp.W-1; i++ { 125 | if math.Abs(d.d[dim][i]-d.expectedD[dim][i]) > 1e-7 { 126 | t.Errorf("Expected\n%.4f, but got\n%.4f for\n%+v", d.expectedD[dim], d.d[dim], d) 127 | break 128 | } 129 | } 130 | } 131 | } 132 | } 133 | 134 | func TestMStomp(t *testing.T) { 135 | var err error 136 | var mp *KMP 137 | 138 | testdata := []struct { 139 | t [][]float64 140 | m int 141 | expectedMP [][]float64 142 | }{ 143 | { 144 | [][]float64{ 145 | {0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0}, 146 | {0, 0, -1, -1, 0, 0, 0, -1, -1, 0, 0}, 147 | {0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0}}, 148 | 4, 149 | [][]float64{ 150 | {0, 0, 0, 1.838803373524, 1.838803373524, 0, 0, 0}, 151 | {0, 0, 0, 1.838803373524, 1.838803373524, 0, 0, 0}, 152 | {1.184098445303, 1.184098445303, 1.184098445303, 1.995669274602, 2.409967360985, 1.184098445303, 1.184098445303, 1.184098445303}}, 153 | }, 154 | } 155 | 156 | for _, d := range testdata { 157 | mp, err = NewKMP(d.t, d.m) 158 | if err != nil { 159 | if d.expectedMP == nil { 160 | // Got an error while creating a new matrix profile 161 | continue 162 | } else { 163 | t.Errorf("Did not expect an error, %v, while creating new mp for %v", err, d) 164 | } 165 | } 166 | 167 | err = mp.Compute() 168 | if err != nil { 169 | if d.expectedMP == nil { 170 | // Got an error while z normalizing and expected an error 171 | continue 172 | } else { 173 | t.Errorf("Did not expect an error, %v, while calculating stomp for %v", err, d) 174 | break 175 | } 176 | } 177 | if d.expectedMP == nil { 178 | t.Errorf("Expected an invalid STOMP calculation, %+v", d) 179 | break 180 | } 181 | 182 | if len(mp.MP) != len(d.expectedMP) { 183 | t.Errorf("Expected %d dimensions, but got %d, %+v", len(d.expectedMP), len(mp.MP), d) 184 | } 185 | for dim := 0; dim < len(d.t); dim++ { 186 | for i := 0; i < mp.n-mp.W-1; i++ { 187 | if math.Abs(mp.MP[dim][i]-d.expectedMP[dim][i]) > 1e-7 { 188 | for dd := 0; dd < len(d.t); dd++ { 189 | t.Errorf("Expected\n%.12f, but got\n%.12f for\n%+v", d.expectedMP[dd], mp.MP[dd], d) 190 | } 191 | break 192 | } 193 | } 194 | } 195 | } 196 | } 197 | 198 | func TestKMPSave(t *testing.T) { 199 | ts := [][]float64{{1, 2, 3, 4, 5, 6, 7, 8, 9}} 200 | m := 3 201 | p, err := NewKMP(ts, m) 202 | p.Compute() 203 | filepath := "./kmp.json" 204 | err = p.Save(filepath, "json") 205 | if err != nil { 206 | t.Errorf("Received error while saving matrix profile, %v", err) 207 | } 208 | if err = os.Remove(filepath); err != nil { 209 | t.Errorf("Could not remove file, %s, %v", filepath, err) 210 | } 211 | } 212 | 213 | func TestKMPLoad(t *testing.T) { 214 | ts := [][]float64{{1, 2, 3, 4, 5, 6, 7, 8, 9}} 215 | w := 3 216 | p, err := NewKMP(ts, w) 217 | p.Compute() 218 | filepath := "./kmp.json" 219 | if err = p.Save(filepath, "json"); err != nil { 220 | t.Errorf("Received error while saving matrix profile, %v", err) 221 | } 222 | 223 | newP := &KMP{} 224 | if err = newP.Load(filepath, "json"); err != nil { 225 | t.Errorf("Failed to load %s, %v", filepath, err) 226 | } 227 | 228 | if err = os.Remove(filepath); err != nil { 229 | t.Errorf("Could not remove file, %s, %v", filepath, err) 230 | } 231 | 232 | if newP.W != w { 233 | t.Errorf("Expected window of %d, but got %d", w, newP.W) 234 | } 235 | if len(newP.T) != len(ts) { 236 | t.Errorf("Expected timeseries length of %d, but got %d", len(ts), len(newP.T)) 237 | } 238 | 239 | } 240 | -------------------------------------------------------------------------------- /matrixprofile.go: -------------------------------------------------------------------------------- 1 | // Package matrixprofile computes the matrix profile and matrix profile index of a time series 2 | package matrixprofile 3 | 4 | import ( 5 | "container/heap" 6 | "encoding/json" 7 | "errors" 8 | "fmt" 9 | "io/ioutil" 10 | "math" 11 | "math/rand" 12 | "os" 13 | "runtime" 14 | "sort" 15 | "strconv" 16 | "sync" 17 | 18 | "github.com/matrix-profile-foundation/go-matrixprofile/av" 19 | "github.com/matrix-profile-foundation/go-matrixprofile/util" 20 | "gonum.org/v1/gonum/dsp/fourier" 21 | "gonum.org/v1/gonum/floats" 22 | "gonum.org/v1/plot/plotter" 23 | ) 24 | 25 | // MatrixProfile is a struct that tracks the current matrix profile computation 26 | // for a given timeseries of length N and subsequence length of W. The profile 27 | // and the profile index are stored here. 28 | type MatrixProfile struct { 29 | A []float64 `json:"a"` // query time series 30 | B []float64 `json:"b"` // timeseries to perform full join with 31 | AMean []float64 `json:"a_mean"` // sliding mean of a with a window of m each 32 | AStd []float64 `json:"a_std"` // sliding standard deviation of a with a window of m each 33 | BMean []float64 `json:"b_mean"` // sliding mean of b with a window of m each 34 | BStd []float64 `json:"b_std"` // sliding standard deviation of b with a window of m each 35 | BF []complex128 `json:"b_fft"` // holds an existing calculation of the FFT of b timeseries 36 | N int `json:"n"` // length of the timeseries 37 | W int `json:"w"` // length of a subsequence 38 | SelfJoin bool `json:"self_join"` // indicates whether a self join is performed with an exclusion zone 39 | MP []float64 `json:"mp"` // matrix profile 40 | Idx []int `json:"pi"` // matrix profile index 41 | MPB []float64 `json:"mp_ba"` // matrix profile for the BA join 42 | IdxB []int `json:"pi_ba"` // matrix profile index for the BA join 43 | AV av.AV `json:"annotation_vector"` // type of annotation vector which defaults to all ones 44 | Opts *MPOpts `json:"options"` // options used for the computation 45 | Motifs []MotifGroup 46 | Discords []int 47 | } 48 | 49 | // New creates a matrix profile struct with a given timeseries length n and 50 | // subsequence length of m. The first slice, a, is used as the initial 51 | // timeseries to join with the second, b. If b is nil, then the matrix profile 52 | // assumes a self join on the first timeseries. 53 | func New(a, b []float64, w int) (*MatrixProfile, error) { 54 | if a == nil || len(a) == 0 { 55 | return nil, fmt.Errorf("first slice is nil or has a length of 0") 56 | } 57 | 58 | if b != nil && len(b) == 0 { 59 | return nil, fmt.Errorf("second slice must be nil for self-join operation or have a length greater than 0") 60 | } 61 | 62 | mp := MatrixProfile{ 63 | A: a, 64 | W: w, 65 | N: len(b), 66 | } 67 | if b == nil { 68 | mp.N = len(a) 69 | mp.B = a 70 | mp.SelfJoin = true 71 | } else { 72 | mp.B = b 73 | } 74 | 75 | if mp.W > len(mp.A) || mp.W > len(mp.B) { 76 | return nil, fmt.Errorf("subsequence length must be less than the timeseries") 77 | } 78 | 79 | if mp.W < 2 { 80 | return nil, fmt.Errorf("subsequence length must be at least 2") 81 | } 82 | 83 | mp.AV = av.Default 84 | 85 | return &mp, nil 86 | } 87 | 88 | func applySingleAV(mp, ts []float64, w int, a av.AV) ([]float64, error) { 89 | avec, err := av.Create(a, ts, w) 90 | if err != nil { 91 | return nil, err 92 | } 93 | 94 | if len(avec) != len(mp) { 95 | return nil, fmt.Errorf("annotation vector length, %d, does not match matrix profile length, %d", len(avec), len(mp)) 96 | } 97 | 98 | // find the maximum matrix profile value 99 | maxMP := 0.0 100 | for _, val := range mp { 101 | if val > maxMP { 102 | maxMP = val 103 | } 104 | } 105 | 106 | // check that all annotation vector values are between 0 and 1 107 | for idx, val := range avec { 108 | if val < 0.0 || val > 1.0 { 109 | return nil, fmt.Errorf("got an annotation vector value of %.3f at index %d. must be between 0 and 1", val, idx) 110 | } 111 | } 112 | 113 | // applies the matrix profile correction. 1 results in no change to the matrix profile and 114 | // 0 results in lifting the current matrix profile value by the maximum matrix profile value 115 | out := make([]float64, len(mp)) 116 | for idx, val := range avec { 117 | out[idx] = mp[idx] + (1-val)*maxMP 118 | } 119 | 120 | return out, nil 121 | } 122 | 123 | // ApplyAV applies an annotation vector to the current matrix profile. Annotation vector 124 | // values must be between 0 and 1. 125 | func (mp MatrixProfile) ApplyAV() ([]float64, []float64, error) { 126 | var err error 127 | abmp := make([]float64, len(mp.MP)) 128 | bamp := make([]float64, len(mp.MPB)) 129 | 130 | copy(abmp, mp.MP) 131 | copy(bamp, mp.MPB) 132 | if !mp.Opts.Euclidean { 133 | util.P2E(abmp, mp.W) 134 | util.P2E(bamp, mp.W) 135 | } 136 | 137 | abmp, err = applySingleAV(abmp, mp.A, mp.W, mp.AV) 138 | if err != nil { 139 | return nil, nil, err 140 | } 141 | 142 | if mp.MPB != nil { 143 | bamp, err = applySingleAV(bamp, mp.B, mp.W, mp.AV) 144 | } 145 | 146 | if err != nil { 147 | return nil, nil, err 148 | } 149 | 150 | if !mp.Opts.Euclidean { 151 | util.E2P(abmp, mp.W) 152 | util.E2P(bamp, mp.W) 153 | } 154 | 155 | return abmp, bamp, nil 156 | } 157 | 158 | // Save will save the current matrix profile struct to disk 159 | func (mp MatrixProfile) Save(filepath, format string) error { 160 | var err error 161 | switch format { 162 | case "json": 163 | f, err := os.Open(filepath) 164 | if err != nil { 165 | f, err = os.Create(filepath) 166 | if err != nil { 167 | return err 168 | } 169 | } 170 | defer f.Close() 171 | out, err := json.Marshal(mp) 172 | if err != nil { 173 | return err 174 | } 175 | _, err = f.Write(out) 176 | default: 177 | return fmt.Errorf("invalid save format, %s", format) 178 | } 179 | return err 180 | } 181 | 182 | // Load will attempt to load a matrix profile from a file for iterative use 183 | func (mp *MatrixProfile) Load(filepath, format string) error { 184 | var err error 185 | switch format { 186 | case "json": 187 | f, err := os.Open(filepath) 188 | if err != nil { 189 | return err 190 | } 191 | defer f.Close() 192 | b, err := ioutil.ReadAll(f) 193 | if err != nil { 194 | return err 195 | } 196 | err = json.Unmarshal(b, mp) 197 | default: 198 | return fmt.Errorf("invalid load format, %s", format) 199 | } 200 | return err 201 | } 202 | 203 | type mpVals []float64 204 | 205 | func (m mpVals) Len() int { 206 | return len(m) 207 | } 208 | 209 | func (m mpVals) Swap(i, j int) { 210 | m[i], m[j] = m[j], m[i] 211 | } 212 | 213 | func (m mpVals) Less(i, j int) bool { 214 | return m[i] < m[j] 215 | } 216 | 217 | // Push implements the function in the heap interface 218 | func (m *mpVals) Push(x interface{}) { 219 | *m = append(*m, x.(float64)) 220 | } 221 | 222 | // Pop implements the function in the heap interface 223 | func (m *mpVals) Pop() interface{} { 224 | x := (*m)[len(*m)-1] 225 | *m = (*m)[:len(*m)-1] 226 | return x 227 | } 228 | 229 | type MPDistOpts struct { 230 | AV av.AV 231 | Opts *MPOpts 232 | } 233 | 234 | func NewMPDistOpts() *MPDistOpts { 235 | return &MPDistOpts{ 236 | AV: av.Default, 237 | Opts: NewMPOpts(), 238 | } 239 | } 240 | 241 | // MPDist computes the matrix profile distance measure between a and b with a 242 | // subsequence window of m. 243 | func MPDist(a, b []float64, w int, o *MPDistOpts) (float64, error) { 244 | if o == nil { 245 | o = NewMPDistOpts() 246 | } 247 | 248 | mp, err := New(a, b, w) 249 | if err != nil { 250 | return 0, err 251 | } 252 | 253 | if err = mp.Compute(o.Opts); err != nil { 254 | return 0, nil 255 | } 256 | 257 | mpab, mpba, err := mp.ApplyAV() 258 | if err != nil { 259 | return 0, nil 260 | } 261 | 262 | thresh := 0.05 263 | k := int(thresh * float64(len(a)+len(b))) 264 | mpABBASize := len(mpab) + len(mpba) 265 | 266 | if k < mpABBASize { 267 | var lowestMPs mpVals 268 | heap.Init(&lowestMPs) 269 | for _, d := range mpab { 270 | // since this is a max heap and correlations go from 0-1 we need high correlations 271 | // to stay in the heap with the poorest correlation at the root. 272 | if !mp.Opts.Euclidean { 273 | d = -d 274 | } 275 | if len(lowestMPs) == k+1 { 276 | if d < lowestMPs[0] { 277 | heap.Pop(&lowestMPs) 278 | heap.Push(&lowestMPs, d) 279 | } 280 | } else { 281 | heap.Push(&lowestMPs, d) 282 | } 283 | } 284 | 285 | for _, d := range mpba { 286 | // since this is a max heap and correlations go from 0-1 we need high correlations 287 | // to stay in the heap with the poorest correlation at the root. 288 | if !mp.Opts.Euclidean { 289 | d = -d 290 | } 291 | 292 | if len(lowestMPs) == k+1 { 293 | if d < lowestMPs[0] { 294 | heap.Pop(&lowestMPs) 295 | heap.Push(&lowestMPs, d) 296 | } 297 | } else { 298 | heap.Push(&lowestMPs, d) 299 | } 300 | } 301 | 302 | if !mp.Opts.Euclidean { 303 | return -lowestMPs[0], nil 304 | } 305 | return lowestMPs[0], nil 306 | } 307 | 308 | var trackVal float64 309 | if !mp.Opts.Euclidean { 310 | trackVal = 1 311 | } 312 | 313 | for _, d := range mp.MP { 314 | if mp.Opts.Euclidean { 315 | if d > trackVal { 316 | trackVal = d 317 | } 318 | } else { 319 | if d < trackVal { 320 | trackVal = d 321 | } 322 | } 323 | } 324 | 325 | for _, d := range mp.MPB { 326 | if mp.Opts.Euclidean { 327 | if d > trackVal { 328 | trackVal = d 329 | } 330 | } else { 331 | if d < trackVal { 332 | trackVal = d 333 | } 334 | } 335 | } 336 | 337 | return trackVal, nil 338 | } 339 | 340 | type Algo string 341 | 342 | const ( 343 | AlgoSTOMP Algo = "stomp" 344 | AlgoSTAMP Algo = "stamp" 345 | AlgoSTMP Algo = "stmp" 346 | AlgoMPX Algo = "mpx" 347 | ) 348 | 349 | // MPOpts are parameters to vary the algorithm to compute the matrix profile. 350 | type MPOpts struct { 351 | Algorithm Algo `json:"algorithm"` // choose which algorithm to compute the matrix profile 352 | SamplePct float64 `json:"sample_pct"` // only applicable to algorithm STAMP 353 | NJobs int `json:"n_jobs"` 354 | Euclidean bool `json:"euclidean"` // defaults to using euclidean distance instead of pearson correlation for matrix profile 355 | RemapNegCorr bool `json:"remap_negative_correlation"` // defaults to no remapping. This is used so that highly negatively correlated sequences will show a low distance as well. 356 | } 357 | 358 | // NewMPOpts returns a default MPOpts 359 | func NewMPOpts() *MPOpts { 360 | p := runtime.NumCPU() * 2 361 | if p < 1 { 362 | p = 1 363 | } 364 | return &MPOpts{ 365 | Algorithm: AlgoMPX, 366 | SamplePct: 1.0, 367 | NJobs: p, 368 | Euclidean: true, 369 | } 370 | } 371 | 372 | // Compute calculate the matrixprofile given a set of input options. 373 | func (mp *MatrixProfile) Compute(o *MPOpts) error { 374 | if o == nil { 375 | o = NewMPOpts() 376 | } 377 | mp.Opts = o 378 | 379 | if o.SamplePct < 1 { 380 | return mp.stamp() 381 | } 382 | 383 | switch o.Algorithm { 384 | case AlgoSTOMP: 385 | return mp.stomp() 386 | case AlgoSTAMP: 387 | return mp.stamp() 388 | case AlgoSTMP: 389 | return mp.stmp() 390 | case AlgoMPX: 391 | return mp.mpx() 392 | default: 393 | return fmt.Errorf("Unsupported algorithm for matrix profile, %s", o.Algorithm) 394 | } 395 | return nil 396 | } 397 | 398 | // initCaches initializes cached data including the timeseries a and b rolling mean 399 | // and standard deviation and full fourier transform of timeseries b 400 | func (mp *MatrixProfile) initCaches() error { 401 | var err error 402 | // precompute the mean and standard deviation for each window of size m for all 403 | // sliding windows across the b timeseries 404 | mp.BMean, mp.BStd, err = util.MovMeanStd(mp.B, mp.W) 405 | if err != nil { 406 | return err 407 | } 408 | 409 | mp.AMean, mp.AStd, err = util.MovMeanStd(mp.A, mp.W) 410 | if err != nil { 411 | return err 412 | } 413 | 414 | // precompute the fourier transform of the b timeseries since it will 415 | // be used multiple times while computing the matrix profile 416 | fft := fourier.NewFFT(mp.N) 417 | mp.BF = fft.Coefficients(nil, mp.B) 418 | 419 | return nil 420 | } 421 | 422 | // crossCorrelate computes the sliding dot product between two slices 423 | // given a query and time series. Uses fast fourier transforms to compute 424 | // the necessary values. Returns the a slice of floats for the cross-correlation 425 | // of the signal q and the mp.B signal. This makes an optimization where the query 426 | // length must be less than half the length of the timeseries, b. 427 | func (mp MatrixProfile) crossCorrelate(q []float64, fft *fourier.FFT) []float64 { 428 | qpad := make([]float64, mp.N) 429 | for i := 0; i < len(q); i++ { 430 | qpad[i] = q[mp.W-i-1] 431 | } 432 | qf := fft.Coefficients(nil, qpad) 433 | 434 | // in place multiply the fourier transform of the b time series with 435 | // the subsequence fourier transform and store in the subsequence fft slice 436 | for i := 0; i < len(qf); i++ { 437 | qf[i] = mp.BF[i] * qf[i] 438 | } 439 | 440 | dot := fft.Sequence(nil, qf) 441 | 442 | for i := 0; i < mp.N-mp.W+1; i++ { 443 | dot[mp.W-1+i] = dot[mp.W-1+i] / float64(mp.N) 444 | } 445 | return dot[mp.W-1:] 446 | } 447 | 448 | // mass calculates the Mueen's algorithm for similarity search (MASS) 449 | // between a specified query and timeseries. Writes the euclidean distance 450 | // of the query to every subsequence in mp.B to profile. 451 | func (mp MatrixProfile) mass(q []float64, profile []float64, fft *fourier.FFT) error { 452 | qnorm, err := util.ZNormalize(q) 453 | if err != nil { 454 | return err 455 | } 456 | 457 | dot := mp.crossCorrelate(qnorm, fft) 458 | 459 | // converting cross correlation value to euclidian distance 460 | for i := 0; i < len(dot); i++ { 461 | profile[i] = math.Sqrt(math.Abs(2 * (float64(mp.W) - (dot[i] / mp.BStd[i])))) 462 | } 463 | return nil 464 | } 465 | 466 | // distanceProfile computes the distance profile between a and b time series. 467 | // If b is set to nil then it assumes a self join and will create an exclusion 468 | // area for trivial nearest neighbors. Writes the euclidean distance between 469 | // the specified subsequence in mp.A with each subsequence in mp.B to profile 470 | func (mp MatrixProfile) distanceProfile(idx int, profile []float64, fft *fourier.FFT) error { 471 | if idx > len(mp.A)-mp.W { 472 | return fmt.Errorf("provided index %d is beyond the length of timeseries %d minus the subsequence length %d", idx, len(mp.A), mp.W) 473 | } 474 | 475 | if err := mp.mass(mp.A[idx:idx+mp.W], profile, fft); err != nil { 476 | return err 477 | } 478 | 479 | // sets the distance in the exclusion zone to +Inf 480 | if mp.SelfJoin { 481 | util.ApplyExclusionZone(profile, idx, mp.W/2) 482 | } 483 | return nil 484 | } 485 | 486 | // calculateDistanceProfile converts a sliding dot product slice of floats into 487 | // distances and normalizes the output. Writes results back into the profile slice 488 | // of floats representing the distance profile. 489 | func (mp MatrixProfile) calculateDistanceProfile(dot []float64, idx int, profile []float64) error { 490 | if idx > len(mp.A)-mp.W { 491 | return fmt.Errorf("provided index %d is beyond the length of timeseries a %d minus the subsequence length %d", idx, len(mp.A), mp.W) 492 | } 493 | 494 | if len(profile) != len(dot) { 495 | return fmt.Errorf("profile length, %d, is not the same as the dot product length, %d", len(profile), len(dot)) 496 | } 497 | 498 | // converting cross correlation value to euclidian distance 499 | for i := 0; i < len(dot); i++ { 500 | profile[i] = math.Sqrt(2 * float64(mp.W) * math.Abs(1-(dot[i]-float64(mp.W)*mp.BMean[i]*mp.AMean[idx])/(float64(mp.W)*mp.BStd[i]*mp.AStd[idx]))) 501 | } 502 | 503 | if mp.SelfJoin { 504 | // sets the distance in the exclusion zone to +Inf 505 | util.ApplyExclusionZone(profile, idx, mp.W/2) 506 | } 507 | return nil 508 | } 509 | 510 | // stmp computes the full matrix profile given two time series as inputs. 511 | // If the second time series is set to nil then a self join on the first 512 | // will be performed. Stores the matrix profile and matrix profile index 513 | // in the struct. 514 | func (mp *MatrixProfile) stmp() error { 515 | if err := mp.initCaches(); err != nil { 516 | return err 517 | } 518 | 519 | mp.MP = make([]float64, mp.N-mp.W+1) 520 | mp.Idx = make([]int, mp.N-mp.W+1) 521 | for i := 0; i < len(mp.MP); i++ { 522 | mp.MP[i] = math.Inf(1) 523 | mp.Idx[i] = math.MaxInt64 524 | } 525 | 526 | var err error 527 | profile := make([]float64, mp.N-mp.W+1) 528 | 529 | fft := fourier.NewFFT(mp.N) 530 | for i := 0; i < mp.N-mp.W+1; i++ { 531 | if err = mp.distanceProfile(i, profile, fft); err != nil { 532 | return err 533 | } 534 | 535 | for j := 0; j < len(profile); j++ { 536 | if profile[j] <= mp.MP[j] { 537 | mp.MP[j] = profile[j] 538 | mp.Idx[j] = i 539 | } 540 | } 541 | } 542 | 543 | return nil 544 | } 545 | 546 | // Update updates a matrix profile and matrix profile index in place providing streaming 547 | // like behavior. 548 | func (mp *MatrixProfile) Update(newValues []float64) error { 549 | var err error 550 | 551 | var profile []float64 552 | for _, val := range newValues { 553 | // add to the a and b time series and increment the time series length 554 | if mp.SelfJoin { 555 | mp.A = append(mp.A, val) 556 | mp.B = mp.A 557 | } else { 558 | mp.B = append(mp.B, val) 559 | } 560 | mp.N++ 561 | 562 | // increase the size of the Matrix Profile and Index 563 | mp.MP = append(mp.MP, math.Inf(1)) 564 | mp.Idx = append(mp.Idx, math.MaxInt64) 565 | 566 | if err = mp.initCaches(); err != nil { 567 | return err 568 | } 569 | 570 | // only compute the last distance profile 571 | profile = make([]float64, len(mp.MP)) 572 | fft := fourier.NewFFT(mp.N) 573 | if err = mp.distanceProfile(len(mp.A)-mp.W, profile, fft); err != nil { 574 | return err 575 | } 576 | 577 | minVal := math.Inf(1) 578 | minIdx := math.MaxInt64 579 | for j := 0; j < len(profile)-1; j++ { 580 | if profile[j] <= mp.MP[j] { 581 | mp.MP[j] = profile[j] 582 | mp.Idx[j] = mp.N - mp.W 583 | } 584 | if profile[j] < minVal { 585 | minVal = profile[j] 586 | minIdx = j 587 | } 588 | } 589 | mp.MP[mp.N-mp.W] = minVal 590 | mp.Idx[mp.N-mp.W] = minIdx 591 | } 592 | return nil 593 | } 594 | 595 | // mpResult is the output struct from a batch processing for STAMP, STOMP, and MPX. This struct 596 | // can later be merged together in linear time or with a divide and conquer approach 597 | type mpResult struct { 598 | MP []float64 599 | Idx []int 600 | MPB []float64 601 | IdxB []int 602 | Err error 603 | } 604 | 605 | // mergeMPResults reads from a slice of channels for Matrix Profile results and 606 | // updates the matrix profile in the struct 607 | func (mp *MatrixProfile) mergeMPResults(results []chan *mpResult, euclidean bool) error { 608 | var err error 609 | 610 | resultSlice := make([]*mpResult, len(results)) 611 | for i := 0; i < len(results); i++ { 612 | resultSlice[i] = <-results[i] 613 | 614 | // if an error is encountered set the variable so that it can be checked 615 | // for at the end of processing. Tracks the last error emitted by any 616 | // batch 617 | if resultSlice[i].Err != nil { 618 | err = resultSlice[i].Err 619 | continue 620 | } 621 | 622 | // continues to the next loop if the result returned is empty but 623 | // had no errors 624 | if resultSlice[i].MP == nil || resultSlice[i].Idx == nil { 625 | continue 626 | } 627 | for j := 0; j < len(resultSlice[i].MP); j++ { 628 | if euclidean { 629 | if resultSlice[i].MP[j] <= mp.MP[j] { 630 | mp.MP[j] = resultSlice[i].MP[j] 631 | mp.Idx[j] = resultSlice[i].Idx[j] 632 | } 633 | } else { 634 | if math.Abs(resultSlice[i].MP[j]) < math.Abs(mp.MP[j]) { 635 | mp.MP[j] = resultSlice[i].MP[j] 636 | mp.Idx[j] = resultSlice[i].Idx[j] 637 | } 638 | } 639 | } 640 | 641 | // check if the BA join has results and merge if so 642 | if resultSlice[i].MPB == nil || resultSlice[i].IdxB == nil { 643 | continue 644 | } 645 | for j := 0; j < len(resultSlice[i].MPB); j++ { 646 | if euclidean { 647 | if resultSlice[i].MPB[j] <= mp.MPB[j] { 648 | mp.MPB[j] = resultSlice[i].MPB[j] 649 | mp.IdxB[j] = resultSlice[i].IdxB[j] 650 | } 651 | } else { 652 | if math.Abs(resultSlice[i].MPB[j]) < math.Abs(mp.MPB[j]) { 653 | mp.MPB[j] = resultSlice[i].MPB[j] 654 | mp.IdxB[j] = resultSlice[i].IdxB[j] 655 | } 656 | } 657 | } 658 | 659 | } 660 | return err 661 | } 662 | 663 | // stamp uses random ordering to compute the matrix profile. User can specify the 664 | // sample to be anything between 0 and 1 so that the computation early terminates 665 | // and provides the current computed matrix profile. 1 represents the exact matrix 666 | // profile. This should compute far faster at the cost of an approximation of the 667 | // matrix profile. Stores the matrix profile and matrix profile index in the struct. 668 | func (mp *MatrixProfile) stamp() error { 669 | if mp.Opts.SamplePct <= 0.0 { 670 | return fmt.Errorf("must provide a sampling greater than 0 and at most 1, sample: %.3f", mp.Opts.SamplePct) 671 | } 672 | 673 | if err := mp.initCaches(); err != nil { 674 | return err 675 | } 676 | 677 | mp.MP = make([]float64, mp.N-mp.W+1) 678 | mp.Idx = make([]int, mp.N-mp.W+1) 679 | for i := 0; i < len(mp.MP); i++ { 680 | mp.MP[i] = math.Inf(1) 681 | mp.Idx[i] = math.MaxInt64 682 | } 683 | 684 | randIdx := rand.Perm(len(mp.A) - mp.W + 1) 685 | 686 | batchSize := (len(mp.A)-mp.W+1)/mp.Opts.NJobs + 1 687 | results := make([]chan *mpResult, mp.Opts.NJobs) 688 | for i := 0; i < mp.Opts.NJobs; i++ { 689 | results[i] = make(chan *mpResult) 690 | } 691 | 692 | // go routine to continually check for results on the slice of channels 693 | // for each batch kicked off. This merges the results of the batched go 694 | // routines by picking the lowest value in each batch's matrix profile and 695 | // updating the matrix profile index. 696 | var err error 697 | done := make(chan bool) 698 | go func() { 699 | err = mp.mergeMPResults(results, true) 700 | done <- true 701 | }() 702 | 703 | // kick off multiple go routines to process a batch of rows returning back 704 | // the matrix profile for that batch and any error encountered 705 | var wg sync.WaitGroup 706 | wg.Add(mp.Opts.NJobs) 707 | for batch := 0; batch < mp.Opts.NJobs; batch++ { 708 | go func(idx int) { 709 | results[idx] <- mp.stampBatch(idx, batchSize, mp.Opts.SamplePct, randIdx, &wg) 710 | }(batch) 711 | } 712 | wg.Wait() 713 | 714 | // waits for all results to be read and merged before returning success 715 | <-done 716 | 717 | return err 718 | } 719 | 720 | // stampBatch processes a batch set of rows in a matrix profile calculation 721 | func (mp MatrixProfile) stampBatch(idx, batchSize int, sample float64, randIdx []int, wg *sync.WaitGroup) *mpResult { 722 | defer wg.Done() 723 | if idx*batchSize+mp.W > len(mp.A) { 724 | // got an index larger than mp.A so ignore 725 | return &mpResult{} 726 | } 727 | 728 | // initialize this batch's matrix profile results 729 | result := &mpResult{ 730 | MP: make([]float64, mp.N-mp.W+1), 731 | Idx: make([]int, mp.N-mp.W+1), 732 | } 733 | for i := 0; i < len(mp.MP); i++ { 734 | result.MP[i] = math.Inf(1) 735 | result.Idx[i] = math.MaxInt64 736 | } 737 | 738 | var err error 739 | profile := make([]float64, len(result.MP)) 740 | fft := fourier.NewFFT(mp.N) 741 | for i := 0; i < int(float64(batchSize)*sample); i++ { 742 | if idx*batchSize+i >= len(randIdx) { 743 | break 744 | } 745 | if err = mp.distanceProfile(randIdx[idx*batchSize+i], profile, fft); err != nil { 746 | return &mpResult{nil, nil, nil, nil, err} 747 | } 748 | for j := 0; j < len(profile); j++ { 749 | if profile[j] <= result.MP[j] { 750 | result.MP[j] = profile[j] 751 | result.Idx[j] = randIdx[idx*batchSize+i] 752 | } 753 | } 754 | } 755 | return result 756 | } 757 | 758 | // stomp is an optimization on the STAMP approach reducing the runtime from O(n^2logn) 759 | // down to O(n^2). This is an ordered approach, since the sliding dot product or cross 760 | // correlation can be easily updated for the next sliding window, if the previous window 761 | // dot product is available. This should also greatly reduce the number of memory 762 | // allocations needed to compute an arbitrary timeseries length. 763 | func (mp *MatrixProfile) stomp() error { 764 | if err := mp.initCaches(); err != nil { 765 | return err 766 | } 767 | 768 | mp.MP = make([]float64, mp.N-mp.W+1) 769 | mp.Idx = make([]int, mp.N-mp.W+1) 770 | for i := 0; i < len(mp.MP); i++ { 771 | mp.MP[i] = math.Inf(1) 772 | mp.Idx[i] = math.MaxInt64 773 | } 774 | 775 | batchSize := (len(mp.A)-mp.W+1)/mp.Opts.NJobs + 1 776 | results := make([]chan *mpResult, mp.Opts.NJobs) 777 | for i := 0; i < mp.Opts.NJobs; i++ { 778 | results[i] = make(chan *mpResult) 779 | } 780 | 781 | // go routine to continually check for results on the slice of channels 782 | // for each batch kicked off. This merges the results of the batched go 783 | // routines by picking the lowest value in each batch's matrix profile and 784 | // updating the matrix profile index. 785 | var err error 786 | done := make(chan bool) 787 | go func() { 788 | err = mp.mergeMPResults(results, true) 789 | done <- true 790 | }() 791 | 792 | // kick off multiple go routines to process a batch of rows returning back 793 | // the matrix profile for that batch and any error encountered 794 | var wg sync.WaitGroup 795 | wg.Add(mp.Opts.NJobs) 796 | for batch := 0; batch < mp.Opts.NJobs; batch++ { 797 | go func(idx int) { 798 | results[idx] <- mp.stompBatch(idx, batchSize, &wg) 799 | }(batch) 800 | } 801 | wg.Wait() 802 | 803 | // waits for all results to be read and merged before returning success 804 | <-done 805 | 806 | return err 807 | } 808 | 809 | // stompBatch processes a batch set of rows in matrix profile calculation. Each batch 810 | // will compute its first row's dot product and build the subsequent matrix profile and 811 | // matrix profile index using the stomp iterative algorithm. This also uses the very 812 | // first row's dot product to update the very first index of the current row's 813 | // dot product. 814 | func (mp MatrixProfile) stompBatch(idx, batchSize int, wg *sync.WaitGroup) *mpResult { 815 | defer wg.Done() 816 | if idx*batchSize+mp.W > len(mp.A) { 817 | // got an index larger than mp.A so ignore 818 | return &mpResult{} 819 | } 820 | 821 | // compute for this batch the first row's sliding dot product 822 | fft := fourier.NewFFT(mp.N) 823 | dot := mp.crossCorrelate(mp.A[idx*batchSize:idx*batchSize+mp.W], fft) 824 | 825 | profile := make([]float64, len(dot)) 826 | var err error 827 | if err = mp.calculateDistanceProfile(dot, idx*batchSize, profile); err != nil { 828 | return &mpResult{nil, nil, nil, nil, err} 829 | } 830 | 831 | // initialize this batch's matrix profile results 832 | result := &mpResult{ 833 | MP: make([]float64, mp.N-mp.W+1), 834 | Idx: make([]int, mp.N-mp.W+1), 835 | } 836 | 837 | copy(result.MP, profile) 838 | for i := 0; i < len(profile); i++ { 839 | result.Idx[i] = idx * batchSize 840 | } 841 | 842 | // iteratively update for this batch each row's matrix profile and matrix 843 | // profile index 844 | var nextDotZero float64 845 | for i := 1; i < batchSize; i++ { 846 | if idx*batchSize+i-1 >= len(mp.A) || idx*batchSize+i+mp.W-1 >= len(mp.A) { 847 | // looking for an index beyond the length of mp.A so ignore and move one 848 | // with the current processed matrix profile 849 | break 850 | } 851 | for j := mp.N - mp.W; j > 0; j-- { 852 | dot[j] = dot[j-1] - mp.B[j-1]*mp.A[idx*batchSize+i-1] + mp.B[j+mp.W-1]*mp.A[idx*batchSize+i+mp.W-1] 853 | } 854 | 855 | // recompute the first cross correlation since the algorithm is only valid for 856 | // points after it. Previous optimization of using a precomputed cache ONLY applies 857 | // if we're doing a self-join and is invalidated with AB-joins of different time series 858 | nextDotZero = 0 859 | for k := 0; k < mp.W; k++ { 860 | nextDotZero += mp.A[idx*batchSize+i+k] * mp.B[k] 861 | } 862 | dot[0] = nextDotZero 863 | if err = mp.calculateDistanceProfile(dot, idx*batchSize+i, profile); err != nil { 864 | return &mpResult{nil, nil, nil, nil, err} 865 | } 866 | 867 | // element wise min update of the matrix profile and matrix profile index 868 | for j := 0; j < len(profile); j++ { 869 | if profile[j] <= result.MP[j] { 870 | result.MP[j] = profile[j] 871 | result.Idx[j] = idx*batchSize + i 872 | } 873 | } 874 | } 875 | return result 876 | } 877 | 878 | func (mp *MatrixProfile) mpx() error { 879 | lenA := len(mp.A) - mp.W + 1 880 | lenB := len(mp.B) - mp.W + 1 881 | 882 | mp.MP = make([]float64, lenA) 883 | mp.Idx = make([]int, lenA) 884 | for i := 0; i < len(mp.MP); i++ { 885 | mp.MP[i] = math.Inf(1) 886 | mp.Idx[i] = math.MaxInt64 887 | } 888 | 889 | if !mp.SelfJoin { 890 | mp.MPB = make([]float64, lenB) 891 | mp.IdxB = make([]int, lenB) 892 | for i := 0; i < len(mp.MPB); i++ { 893 | mp.MPB[i] = math.Inf(1) 894 | mp.IdxB[i] = math.MaxInt64 895 | } 896 | } 897 | 898 | mua, siga := util.MuInvN(mp.A, mp.W) 899 | mub, sigb := mua, siga 900 | if !mp.SelfJoin { 901 | mub, sigb = util.MuInvN(mp.B, mp.W) 902 | } 903 | 904 | dfa := make([]float64, lenA) 905 | dga := make([]float64, lenA) 906 | for i := 0; i < lenA-1; i++ { 907 | dfa[i+1] = 0.5 * (mp.A[mp.W+i] - mp.A[i]) 908 | dga[i+1] = (mp.A[mp.W+i] - mua[1+i]) + (mp.A[i] - mua[i]) 909 | } 910 | 911 | dfb, dgb := dfa, dga 912 | if !mp.SelfJoin { 913 | dfb = make([]float64, lenB) 914 | dgb = make([]float64, lenB) 915 | for i := 0; i < lenB-1; i++ { 916 | dfb[i+1] = 0.5 * (mp.B[mp.W+i] - mp.B[i]) 917 | dgb[i+1] = (mp.B[mp.W+i] - mub[1+i]) + (mp.B[i] - mub[i]) 918 | } 919 | } 920 | 921 | // setup for AB join 922 | batchScheme := util.DiagBatchingScheme(lenA, mp.Opts.NJobs) 923 | results := make([]chan *mpResult, mp.Opts.NJobs) 924 | for i := 0; i < mp.Opts.NJobs; i++ { 925 | results[i] = make(chan *mpResult) 926 | } 927 | 928 | // go routine to continually check for results on the slice of channels 929 | // for each batch kicked off. This merges the results of the batched go 930 | // routines by picking the lowest value in each batch's matrix profile and 931 | // updating the matrix profile index. 932 | var err error 933 | done := make(chan bool) 934 | go func() { 935 | err = mp.mergeMPResults(results, mp.Opts.Euclidean) 936 | done <- true 937 | }() 938 | 939 | // kick off multiple go routines to process a batch of rows returning back 940 | // the matrix profile for that batch and any error encountered 941 | var wg sync.WaitGroup 942 | wg.Add(mp.Opts.NJobs) 943 | for batch := 0; batch < mp.Opts.NJobs; batch++ { 944 | go func(batchNum int) { 945 | b := batchScheme[batchNum] 946 | if mp.SelfJoin { 947 | results[batchNum] <- mp.mpxBatch(b.Idx, mua, siga, dfa, dga, b.Size, &wg) 948 | } else { 949 | results[batchNum] <- mp.mpxabBatch(b.Idx, mua, siga, dfa, dga, mub, sigb, dfb, dgb, b.Size, &wg) 950 | } 951 | }(batch) 952 | } 953 | wg.Wait() 954 | 955 | // waits for all results to be read and merged before returning success 956 | <-done 957 | 958 | if mp.SelfJoin || err != nil { 959 | return err 960 | } 961 | 962 | // setup for BA join 963 | batchScheme = util.DiagBatchingScheme(lenB, mp.Opts.NJobs) 964 | results = make([]chan *mpResult, mp.Opts.NJobs) 965 | for i := 0; i < mp.Opts.NJobs; i++ { 966 | results[i] = make(chan *mpResult) 967 | } 968 | 969 | // go routine to continually check for results on the slice of channels 970 | // for each batch kicked off. This merges the results of the batched go 971 | // routines by picking the lowest value in each batch's matrix profile and 972 | // updating the matrix profile index. 973 | go func() { 974 | err = mp.mergeMPResults(results, mp.Opts.Euclidean) 975 | done <- true 976 | }() 977 | 978 | // kick off multiple go routines to process a batch of rows returning back 979 | // the matrix profile for that batch and any error encountered 980 | wg.Add(mp.Opts.NJobs) 981 | for batch := 0; batch < mp.Opts.NJobs; batch++ { 982 | go func(batchNum int) { 983 | b := batchScheme[batchNum] 984 | results[batchNum] <- mp.mpxbaBatch(b.Idx, mua, siga, dfa, dga, mub, sigb, dfb, dgb, b.Size, &wg) 985 | }(batch) 986 | } 987 | wg.Wait() 988 | 989 | // waits for all results to be read and merged before returning success 990 | <-done 991 | 992 | return err 993 | } 994 | 995 | // mpxBatch processes a batch set of rows in matrix profile calculation. 996 | func (mp MatrixProfile) mpxBatch(idx int, mu, sig, df, dg []float64, batchSize int, wg *sync.WaitGroup) *mpResult { 997 | defer wg.Done() 998 | exclZone := 1 // for seljoin we should at least get rid of neighboring points 999 | if mp.W/4 > exclZone { 1000 | exclZone = mp.W / 4 1001 | } 1002 | if idx+exclZone > len(mp.A)-mp.W+1 { 1003 | // got an index larger than max lag so ignore 1004 | return &mpResult{} 1005 | } 1006 | 1007 | mpr := &mpResult{ 1008 | MP: make([]float64, len(mp.A)-mp.W+1), 1009 | Idx: make([]int, len(mp.A)-mp.W+1), 1010 | } 1011 | for i := 0; i < len(mpr.MP); i++ { 1012 | mpr.MP[i] = -1 1013 | } 1014 | 1015 | var c, c_cmp float64 1016 | s1 := make([]float64, mp.W) 1017 | s2 := make([]float64, mp.W) 1018 | for diag := idx + exclZone; diag < idx+batchSize+exclZone; diag++ { 1019 | if diag >= len(mp.A)-mp.W+1 { 1020 | break 1021 | } 1022 | 1023 | //for i := 0; i < mp.W; i++ { 1024 | // c += (mp.A[diag+i] - mu[diag]) * (mp.A[i] - mu[0]) 1025 | //} 1026 | copy(s1, mp.A[diag:diag+mp.W]) 1027 | copy(s2, mp.A[:mp.W]) 1028 | floats.AddConst(-mu[diag], s1) 1029 | floats.AddConst(mu[0], s2) 1030 | c = floats.Dot(s1, s2) 1031 | 1032 | for offset := 0; offset < len(mp.A)-mp.W-diag+1; offset++ { 1033 | c += df[offset]*dg[offset+diag] + df[offset+diag]*dg[offset] 1034 | c_cmp = c * (sig[offset] * sig[offset+diag]) 1035 | if mp.Opts.RemapNegCorr && c_cmp < 0 { 1036 | c_cmp = -c_cmp 1037 | } 1038 | if c_cmp > mpr.MP[offset] { 1039 | mpr.MP[offset] = c_cmp 1040 | mpr.Idx[offset] = offset + diag 1041 | } 1042 | if c_cmp > mpr.MP[offset+diag] { 1043 | mpr.MP[offset+diag] = c_cmp 1044 | mpr.Idx[offset+diag] = offset 1045 | } 1046 | } 1047 | } 1048 | 1049 | if mp.Opts.Euclidean { 1050 | util.P2E(mpr.MP, mp.W) 1051 | } 1052 | 1053 | return mpr 1054 | } 1055 | 1056 | // mpxabBatch processes a batch set of rows in matrix profile AB join calculation. 1057 | func (mp MatrixProfile) mpxabBatch(idx int, mua, siga, dfa, dga, mub, sigb, dfb, dgb []float64, batchSize int, wg *sync.WaitGroup) *mpResult { 1058 | defer wg.Done() 1059 | lenA := len(mp.A) - mp.W + 1 1060 | lenB := len(mp.B) - mp.W + 1 1061 | 1062 | if idx > lenA { 1063 | // got an index larger than max lag so ignore 1064 | return &mpResult{} 1065 | } 1066 | 1067 | mpr := &mpResult{ 1068 | MP: make([]float64, lenA), 1069 | Idx: make([]int, lenA), 1070 | MPB: make([]float64, lenB), 1071 | IdxB: make([]int, lenB), 1072 | } 1073 | for i := 0; i < len(mpr.MP); i++ { 1074 | mpr.MP[i] = -1 1075 | } 1076 | for i := 0; i < len(mpr.MPB); i++ { 1077 | mpr.MPB[i] = -1 1078 | } 1079 | 1080 | var c, c_cmp float64 1081 | var offsetMax int 1082 | s1 := make([]float64, mp.W) 1083 | s2 := make([]float64, mp.W) 1084 | for diag := idx; diag < idx+batchSize; diag++ { 1085 | if diag >= lenA { 1086 | break 1087 | } 1088 | 1089 | //for i := 0; i < mp.W; i++ { 1090 | // c += (mp.A[diag+i] - mua[diag]) * (mp.B[i] - mub[0]) 1091 | //} 1092 | copy(s1, mp.A[diag:diag+mp.W]) 1093 | copy(s2, mp.B[:mp.W]) 1094 | floats.AddConst(-mua[diag], s1) 1095 | floats.AddConst(mub[0], s2) 1096 | c = floats.Dot(s1, s2) 1097 | 1098 | offsetMax = lenA - diag 1099 | if offsetMax > lenB { 1100 | offsetMax = lenB 1101 | } 1102 | 1103 | for offset := 0; offset < offsetMax; offset++ { 1104 | c += dfb[offset]*dga[offset+diag] + dfa[offset+diag]*dgb[offset] 1105 | c_cmp = c * (sigb[offset] * siga[offset+diag]) 1106 | if mp.Opts.RemapNegCorr && c_cmp < 0 { 1107 | c_cmp = -c_cmp 1108 | } 1109 | if c_cmp > mpr.MP[offset+diag] { 1110 | mpr.MP[offset+diag] = c_cmp 1111 | mpr.Idx[offset+diag] = offset 1112 | } 1113 | if c_cmp > mpr.MPB[offset] { 1114 | mpr.MPB[offset] = c_cmp 1115 | mpr.IdxB[offset] = offset + diag 1116 | } 1117 | } 1118 | } 1119 | 1120 | if mp.Opts.Euclidean { 1121 | util.P2E(mpr.MP, mp.W) 1122 | util.P2E(mpr.MPB, mp.W) 1123 | } 1124 | 1125 | return mpr 1126 | } 1127 | 1128 | // mpxbaBatch processes a batch set of rows in matrix profile calculation. 1129 | func (mp MatrixProfile) mpxbaBatch(idx int, mua, siga, dfa, dga, mub, sigb, dfb, dgb []float64, batchSize int, wg *sync.WaitGroup) *mpResult { 1130 | defer wg.Done() 1131 | lenA := len(mp.A) - mp.W + 1 1132 | lenB := len(mp.B) - mp.W + 1 1133 | 1134 | if idx > lenA { 1135 | // got an index larger than max lag so ignore 1136 | return &mpResult{} 1137 | } 1138 | 1139 | mpr := &mpResult{ 1140 | MP: make([]float64, lenA), 1141 | Idx: make([]int, lenA), 1142 | MPB: make([]float64, lenB), 1143 | IdxB: make([]int, lenB), 1144 | } 1145 | for i := 0; i < len(mpr.MP); i++ { 1146 | mpr.MP[i] = -1 1147 | } 1148 | for i := 0; i < len(mpr.MPB); i++ { 1149 | mpr.MPB[i] = -1 1150 | } 1151 | 1152 | var c, c_cmp float64 1153 | var offsetMax int 1154 | s1 := make([]float64, mp.W) 1155 | s2 := make([]float64, mp.W) 1156 | for diag := idx; diag < idx+batchSize; diag++ { 1157 | if diag >= lenB { 1158 | break 1159 | } 1160 | 1161 | //for i := 0; i < mp.W; i++ { 1162 | // c += (mp.B[diag+i] - mub[diag]) * (mp.A[i] - mua[0]) 1163 | //} 1164 | copy(s1, mp.B[diag:diag+mp.W]) 1165 | copy(s2, mp.A[:mp.W]) 1166 | floats.AddConst(-mub[diag], s1) 1167 | floats.AddConst(mua[0], s2) 1168 | c = floats.Dot(s1, s2) 1169 | 1170 | offsetMax = lenB - diag 1171 | if offsetMax > lenA { 1172 | offsetMax = lenA 1173 | } 1174 | 1175 | for offset := 0; offset < offsetMax; offset++ { 1176 | c += dfa[offset]*dgb[offset+diag] + dfb[offset+diag]*dga[offset] 1177 | c_cmp = c * (siga[offset] * sigb[offset+diag]) 1178 | if mp.Opts.RemapNegCorr && c_cmp < 0 { 1179 | c_cmp = -c_cmp 1180 | } 1181 | if c_cmp > mpr.MP[offset] { 1182 | mpr.MP[offset] = c_cmp 1183 | mpr.Idx[offset] = offset + diag 1184 | } 1185 | if c_cmp > mpr.MPB[offset+diag] { 1186 | mpr.MPB[offset+diag] = c_cmp 1187 | mpr.IdxB[offset+diag] = offset 1188 | } 1189 | } 1190 | } 1191 | 1192 | if mp.Opts.Euclidean { 1193 | util.P2E(mpr.MP, mp.W) 1194 | util.P2E(mpr.MPB, mp.W) 1195 | } 1196 | 1197 | return mpr 1198 | } 1199 | 1200 | // Analyze performs the matrix profile computation and discovers various features 1201 | // from the profile such as motifs, discords, and segmentation. The results are 1202 | // visualized and saved into an output file. 1203 | func (mp MatrixProfile) Analyze(mo *MPOpts, ao *AnalyzeOpts) error { 1204 | var err error 1205 | 1206 | if err = mp.Compute(mo); err != nil { 1207 | return err 1208 | } 1209 | 1210 | if ao == nil { 1211 | ao = NewAnalyzeOpts() 1212 | } 1213 | 1214 | _, err = mp.DiscoverMotifs(ao.kMotifs, ao.rMotifs, 10, mp.W/2) 1215 | if err != nil { 1216 | return err 1217 | } 1218 | 1219 | _, err = mp.DiscoverDiscords(ao.kDiscords, mp.W/2) 1220 | if err != nil { 1221 | return err 1222 | } 1223 | 1224 | return mp.Visualize(ao.OutputFilename) 1225 | } 1226 | 1227 | // DiscoverMotifs will iteratively go through the matrix profile to find the 1228 | // top k motifs with a given radius. Only applies to self joins. 1229 | func (mp *MatrixProfile) DiscoverMotifs(k int, radius float64, neighborCount, exclusionZone int) ([]MotifGroup, error) { 1230 | if !mp.SelfJoin { 1231 | return nil, errors.New("can only find top motifs if a self join is performed") 1232 | } 1233 | 1234 | if neighborCount == 0 { 1235 | neighborCount = 10 1236 | } 1237 | 1238 | var err error 1239 | var minDistIdx int 1240 | 1241 | motifs := make([]MotifGroup, k) 1242 | 1243 | mpCurrent, _, err := mp.ApplyAV() 1244 | if err != nil { 1245 | return nil, err 1246 | } 1247 | 1248 | if mp.BF == nil { 1249 | if err = mp.initCaches(); err != nil { 1250 | return nil, err 1251 | } 1252 | } 1253 | 1254 | prof := make([]float64, len(mpCurrent)) // stores minimum matrix profile distance between motif pairs 1255 | fft := fourier.NewFFT(mp.N) 1256 | var j int 1257 | 1258 | for j = 0; j < k; j++ { 1259 | // find minimum distance and index location 1260 | motifDistance := math.Inf(1) 1261 | minIdx := math.MaxInt64 1262 | for i, d := range mpCurrent { 1263 | if d < motifDistance { 1264 | motifDistance = d 1265 | minIdx = i 1266 | } 1267 | } 1268 | 1269 | if minIdx == math.MaxInt64 { 1270 | // can't find any more motifs so returning what we currently found 1271 | return motifs, nil 1272 | } 1273 | 1274 | // filter out all indexes that have a distance within r*motifDistance 1275 | motifSet := make(map[int]struct{}) 1276 | initialMotif := []int{minIdx, mp.Idx[minIdx]} 1277 | motifSet[minIdx] = struct{}{} 1278 | motifSet[mp.Idx[minIdx]] = struct{}{} 1279 | 1280 | if err = mp.distanceProfile(initialMotif[0], prof, fft); err != nil { 1281 | return nil, err 1282 | } 1283 | 1284 | // kill off any indices around the initial motif pair since they are 1285 | // trivial solutions 1286 | util.ApplyExclusionZone(prof, initialMotif[0], exclusionZone) 1287 | util.ApplyExclusionZone(prof, initialMotif[1], exclusionZone) 1288 | if j > 0 { 1289 | for k := j; k >= 0; k-- { 1290 | for _, idx := range motifs[k].Idx { 1291 | util.ApplyExclusionZone(prof, idx, exclusionZone) 1292 | } 1293 | } 1294 | } 1295 | // keep looking for the closest index to the current motif. Each 1296 | // index found will have an exclusion zone applied as to remove 1297 | // trivial solutions. This eventually exits when there's nothing 1298 | // found within the radius distance. 1299 | for { 1300 | minDistIdx = floats.MinIdx(prof) 1301 | 1302 | if prof[minDistIdx] < motifDistance*radius { 1303 | motifSet[minDistIdx] = struct{}{} 1304 | util.ApplyExclusionZone(prof, minDistIdx, exclusionZone) 1305 | } else { 1306 | // the closest distance in the profile is greater than the desired 1307 | // distance so break 1308 | break 1309 | } 1310 | // we hit our limit of neighborCount so stop searching 1311 | if len(motifSet) == neighborCount { 1312 | break 1313 | } 1314 | } 1315 | 1316 | // store the found motif indexes and create an exclusion zone around 1317 | // each index in the current matrix profile 1318 | motifs[j] = MotifGroup{ 1319 | Idx: make([]int, 0, len(motifSet)), 1320 | MinDist: motifDistance, 1321 | } 1322 | for idx := range motifSet { 1323 | motifs[j].Idx = append(motifs[j].Idx, idx) 1324 | util.ApplyExclusionZone(mpCurrent, idx, exclusionZone) 1325 | } 1326 | 1327 | // sorts the indices in ascending order 1328 | sort.IntSlice(motifs[j].Idx).Sort() 1329 | } 1330 | mp.Motifs = motifs[:j] 1331 | 1332 | return motifs[:j], nil 1333 | } 1334 | 1335 | // DiscoverDiscords finds the top k time series discords starting indexes from a computed 1336 | // matrix profile. Each discovery of a discord will apply an exclusion zone around 1337 | // the found index so that new discords can be discovered. 1338 | func (mp *MatrixProfile) DiscoverDiscords(k int, exclusionZone int) ([]int, error) { 1339 | mpCurrent, _, err := mp.ApplyAV() 1340 | if err != nil { 1341 | return nil, err 1342 | } 1343 | 1344 | // if requested k is larger than length of the matrix profile, cap it 1345 | if k > len(mpCurrent) { 1346 | k = len(mpCurrent) 1347 | } 1348 | 1349 | discords := make([]int, k) 1350 | var maxVal float64 1351 | var maxIdx int 1352 | var i int 1353 | 1354 | for i = 0; i < k; i++ { 1355 | maxVal = 0 1356 | maxIdx = math.MaxInt64 1357 | for j, val := range mpCurrent { 1358 | if !math.IsInf(val, 1) && val > maxVal { 1359 | maxVal = val 1360 | maxIdx = j 1361 | } 1362 | } 1363 | 1364 | if maxIdx == math.MaxInt64 { 1365 | break 1366 | } 1367 | 1368 | discords[i] = maxIdx 1369 | util.ApplyExclusionZone(mpCurrent, maxIdx, exclusionZone) 1370 | } 1371 | mp.Discords = discords[:i] 1372 | 1373 | return discords[:i], nil 1374 | } 1375 | 1376 | // DiscoverSegments finds the the index where there may be a potential timeseries 1377 | // change. Returns the index of the potential change, value of the corrected 1378 | // arc curve score and the histogram of all the crossings for each index in 1379 | // the matrix profile index. This approach is based on the UCR paper on 1380 | // segmentation of timeseries using matrix profiles which can be found 1381 | // https://www.cs.ucr.edu/%7Eeamonn/Segmentation_ICDM.pdf 1382 | func (mp MatrixProfile) DiscoverSegments() (int, float64, []float64) { 1383 | histo := arcCurve(mp.Idx) 1384 | 1385 | for i := 0; i < len(histo); i++ { 1386 | if i == 0 || i == len(histo)-1 { 1387 | histo[i] = math.Min(1.0, float64(len(histo))) 1388 | } else { 1389 | histo[i] = math.Min(1.0, histo[i]/iac(float64(i), len(histo))) 1390 | } 1391 | } 1392 | 1393 | minIdx := math.MaxInt64 1394 | minVal := math.Inf(1) 1395 | for i := 0; i < len(histo); i++ { 1396 | if histo[i] < minVal { 1397 | minIdx = i 1398 | minVal = histo[i] 1399 | } 1400 | } 1401 | 1402 | return minIdx, float64(minVal), histo 1403 | } 1404 | 1405 | // Visualize creates a png of the matrix profile given a matrix profile. 1406 | func (mp MatrixProfile) Visualize(fn string) error { 1407 | sigPts := points(mp.A, len(mp.A)) 1408 | mpPts := points(mp.MP, len(mp.A)) 1409 | motifPts := make([][]plotter.XYs, len(mp.Motifs)) 1410 | discordPts := make([]plotter.XYs, len(mp.Discords)) 1411 | discordLabels := make([]string, len(mp.Discords)) 1412 | 1413 | for i := 0; i < len(mp.Motifs); i++ { 1414 | motifPts[i] = make([]plotter.XYs, len(mp.Motifs[i].Idx)) 1415 | } 1416 | 1417 | for i := 0; i < len(mp.Motifs); i++ { 1418 | for j, idx := range mp.Motifs[i].Idx { 1419 | motifPts[i][j] = points(mp.A[idx:idx+mp.W], mp.W) 1420 | } 1421 | } 1422 | 1423 | for i, idx := range mp.Discords { 1424 | discordPts[i] = points(mp.A[idx:idx+mp.W], mp.W) 1425 | discordLabels[i] = strconv.Itoa(idx) 1426 | } 1427 | 1428 | return plotMP(sigPts, mpPts, motifPts, discordPts, discordLabels, fn) 1429 | } 1430 | -------------------------------------------------------------------------------- /matrixprofile_bench_test.go: -------------------------------------------------------------------------------- 1 | package matrixprofile 2 | 3 | import ( 4 | "math/rand" 5 | "testing" 6 | 7 | "github.com/matrix-profile-foundation/go-matrixprofile/siggen" 8 | "github.com/matrix-profile-foundation/go-matrixprofile/util" 9 | "gonum.org/v1/gonum/dsp/fourier" 10 | ) 11 | 12 | func setupData(numPoints int) []float64 { 13 | line := siggen.Line(0, 0, numPoints/2) 14 | ext := siggen.Line(0, 100, len(line)/2) 15 | ext2 := siggen.Line(0, 600, len(line)/2) 16 | sig := siggen.Append(line, ext, ext2) 17 | 18 | noise := siggen.Noise(10, len(sig)) 19 | sig = siggen.Add(sig, noise) 20 | 21 | return sig 22 | } 23 | 24 | func BenchmarkZNormalize(b *testing.B) { 25 | sig := setupData(1000) 26 | q := sig[:32] 27 | var err error 28 | var qnorm []float64 29 | for i := 0; i < b.N; i++ { 30 | qnorm, err = util.ZNormalize(q) 31 | if err != nil { 32 | b.Error(err) 33 | } 34 | if len(qnorm) < 1 { 35 | b.Error("expected at least one value from z-normalizing a timeseries") 36 | } 37 | } 38 | } 39 | 40 | func BenchmarkMovmeanstd(b *testing.B) { 41 | sig := setupData(1000) 42 | var err error 43 | var mean, std []float64 44 | for i := 0; i < b.N; i++ { 45 | mean, std, err = util.MovMeanStd(sig, 32) 46 | if err != nil { 47 | b.Error(err) 48 | } 49 | if len(std) < 1 { 50 | b.Error("expected at least one value from moving standard deviation of a timeseries") 51 | } 52 | if len(mean) < 1 { 53 | b.Error("expected at least one value from moving mean of a timeseries") 54 | } 55 | 56 | } 57 | } 58 | 59 | func BenchmarkCrossCorrelate(b *testing.B) { 60 | sig := setupData(1000) 61 | q := sig[:32] 62 | var err error 63 | var cc []float64 64 | 65 | mp, err := New(q, sig, 32) 66 | if err != nil { 67 | b.Error(err) 68 | } 69 | 70 | if err = mp.initCaches(); err != nil { 71 | b.Error(err) 72 | } 73 | 74 | fft := fourier.NewFFT(mp.N) 75 | for i := 0; i < b.N; i++ { 76 | cc = mp.crossCorrelate(q, fft) 77 | if len(cc) < 1 { 78 | b.Error("expected at least one value from cross correlation of a timeseries") 79 | } 80 | } 81 | } 82 | 83 | func BenchmarkMass(b *testing.B) { 84 | sig := setupData(1000) 85 | var err error 86 | var q []float64 87 | 88 | mp, err := New(sig, sig, 32) 89 | if err != nil { 90 | b.Error(err) 91 | } 92 | 93 | if err = mp.initCaches(); err != nil { 94 | b.Error(err) 95 | } 96 | 97 | mprof := make([]float64, mp.N-mp.W+1) 98 | fft := fourier.NewFFT(mp.N) 99 | for i := 0; i < b.N; i++ { 100 | q = sig[:32] 101 | err = mp.mass(q, mprof, fft) 102 | if err != nil { 103 | b.Error(err) 104 | } 105 | if len(mprof) < 1 { 106 | b.Error("expected at least one value from matrix profile") 107 | } 108 | } 109 | } 110 | 111 | func BenchmarkDistanceProfile(b *testing.B) { 112 | sig := setupData(1000) 113 | var err error 114 | 115 | mp, err := New(sig, nil, 32) 116 | if err != nil { 117 | b.Error(err) 118 | } 119 | 120 | if err = mp.initCaches(); err != nil { 121 | b.Error(err) 122 | } 123 | 124 | mprof := make([]float64, mp.N-mp.W+1) 125 | fft := fourier.NewFFT(mp.N) 126 | for i := 0; i < b.N; i++ { 127 | err = mp.distanceProfile(0, mprof, fft) 128 | if err != nil { 129 | b.Error(err) 130 | } 131 | if len(mprof) < 1 { 132 | b.Error("expected at least one value from matrix profile") 133 | } 134 | } 135 | } 136 | 137 | func BenchmarkCalculateDistanceProfile(b *testing.B) { 138 | sig := setupData(1000) 139 | var err error 140 | 141 | mp, err := New(sig, nil, 32) 142 | if err != nil { 143 | b.Error(err) 144 | } 145 | 146 | if err = mp.initCaches(); err != nil { 147 | b.Error(err) 148 | } 149 | 150 | fft := fourier.NewFFT(mp.N) 151 | dot := mp.crossCorrelate(mp.A[:mp.W], fft) 152 | 153 | mprof := make([]float64, len(dot)) 154 | 155 | for i := 0; i < b.N; i++ { 156 | err = mp.calculateDistanceProfile(dot, 0, mprof) 157 | if err != nil { 158 | b.Error(err) 159 | } 160 | if len(mprof) < 1 { 161 | b.Error("expected at least one value from matrix profile") 162 | } 163 | } 164 | } 165 | 166 | func BenchmarkStmp(b *testing.B) { 167 | sig := setupData(1000) 168 | 169 | benchmarks := []struct { 170 | name string 171 | m int 172 | }{ 173 | {"m32_pts1k", 32}, 174 | {"m128_pts1k", 128}, 175 | } 176 | 177 | o := NewMPOpts() 178 | o.Algorithm = AlgoSTMP 179 | 180 | for _, bm := range benchmarks { 181 | b.Run(bm.name, func(b *testing.B) { 182 | mp, err := New(sig, nil, bm.m) 183 | if err != nil { 184 | b.Error(err) 185 | } 186 | 187 | for i := 0; i < b.N; i++ { 188 | err = mp.Compute(o) 189 | if err != nil { 190 | b.Error(err) 191 | } 192 | if len(mp.MP) < 1 || len(mp.Idx) < 1 { 193 | b.Error("expected at least one value from matrix profile and matrix profile index") 194 | } 195 | } 196 | }) 197 | } 198 | } 199 | 200 | func BenchmarkStamp(b *testing.B) { 201 | sig := setupData(1000) 202 | 203 | mp, err := New(sig, nil, 32) 204 | if err != nil { 205 | b.Error(err) 206 | } 207 | 208 | o := NewMPOpts() 209 | o.Algorithm = AlgoSTAMP 210 | o.SamplePct = 1.0 211 | o.NJobs = 2 212 | 213 | b.Run("m32_p2_pts1k", func(b *testing.B) { 214 | for i := 0; i < b.N; i++ { 215 | err = mp.Compute(o) 216 | if err != nil { 217 | b.Error(err) 218 | } 219 | if len(mp.MP) < 1 || len(mp.Idx) < 1 { 220 | b.Error("expected at least one value from matrix profile and matrix profile index") 221 | } 222 | } 223 | }) 224 | } 225 | 226 | func BenchmarkStomp(b *testing.B) { 227 | benchmarks := []struct { 228 | name string 229 | m int 230 | parallelism int 231 | numPoints int 232 | }{ 233 | {"m128_p1_pts__1024", 128, 1, 1024}, 234 | {"m128_p2_pts__4096", 128, 2, 4096}, 235 | {"m128_p2_pts_16384", 128, 2, 16384}, 236 | {"m128_p4_pts_16384", 128, 4, 16384}, 237 | {"m1024_p2_pts_16384", 1024, 2, 16384}, 238 | } 239 | 240 | o := NewMPOpts() 241 | o.Algorithm = AlgoSTOMP 242 | 243 | for _, bm := range benchmarks { 244 | b.Run(bm.name, func(b *testing.B) { 245 | sig := setupData(bm.numPoints) 246 | mp, err := New(sig, nil, bm.m) 247 | if err != nil { 248 | b.Error(err) 249 | } 250 | 251 | o.NJobs = bm.parallelism 252 | for i := 0; i < b.N; i++ { 253 | err = mp.Compute(o) 254 | if err != nil { 255 | b.Error(err) 256 | } 257 | if len(mp.MP) < 1 || len(mp.Idx) < 1 { 258 | b.Error("expected at least one value from matrix profile and matrix profile index") 259 | } 260 | } 261 | }) 262 | } 263 | } 264 | 265 | func BenchmarkMpx(b *testing.B) { 266 | benchmarks := []struct { 267 | name string 268 | m int 269 | parallelism int 270 | numPoints int 271 | }{ 272 | {"m128_p1_pts__1024", 128, 1, 1024}, 273 | {"m128_p2_pts__4096", 128, 2, 4096}, 274 | {"m128_p2_pts_16384", 128, 2, 16384}, 275 | {"m128_p4_pts_16384", 128, 4, 16384}, 276 | {"m1024_p2_pts_16384", 1024, 2, 16384}, 277 | } 278 | 279 | o := NewMPOpts() 280 | o.Algorithm = AlgoMPX 281 | 282 | for _, bm := range benchmarks { 283 | b.Run(bm.name, func(b *testing.B) { 284 | sig := setupData(bm.numPoints) 285 | mp, err := New(sig, nil, bm.m) 286 | if err != nil { 287 | b.Error(err) 288 | } 289 | 290 | o.NJobs = bm.parallelism 291 | for i := 0; i < b.N; i++ { 292 | err = mp.Compute(o) 293 | if err != nil { 294 | b.Error(err) 295 | } 296 | if len(mp.MP) < 1 || len(mp.Idx) < 1 { 297 | b.Error("expected at least one value from matrix profile and matrix profile index") 298 | } 299 | } 300 | }) 301 | } 302 | } 303 | 304 | func BenchmarkUpdate(b *testing.B) { 305 | sig := setupData(5000) 306 | mp, err := New(sig, nil, 32) 307 | if err != nil { 308 | b.Error(err) 309 | } 310 | 311 | err = mp.Compute(NewMPOpts()) 312 | if err != nil { 313 | b.Error(err) 314 | } 315 | 316 | if len(mp.MP) < 1 || len(mp.Idx) < 1 { 317 | b.Error("expected at least one value from matrix profile and matrix profile index") 318 | } 319 | 320 | for i := 0; i < b.N; i++ { 321 | err = mp.Update([]float64{rand.Float64() - 0.5}) 322 | } 323 | } 324 | -------------------------------------------------------------------------------- /matrixprofile_test.go: -------------------------------------------------------------------------------- 1 | package matrixprofile 2 | 3 | import ( 4 | "math" 5 | "os" 6 | "sort" 7 | "testing" 8 | 9 | "github.com/matrix-profile-foundation/go-matrixprofile/av" 10 | "gonum.org/v1/gonum/dsp/fourier" 11 | ) 12 | 13 | func TestNew(t *testing.T) { 14 | testdata := []struct { 15 | a []float64 16 | b []float64 17 | m int 18 | expectedErr bool 19 | }{ 20 | {[]float64{}, []float64{}, 2, true}, 21 | {[]float64{1, 1, 1, 1, 1}, []float64{}, 2, true}, 22 | {[]float64{1, 1, 1, 1, 1}, nil, 2, false}, 23 | {[]float64{1, 1, 1, 1, 1}, nil, 6, true}, 24 | {[]float64{1, 1}, []float64{1, 1, 1, 1, 1, 1, 1, 1}, 3, true}, 25 | {[]float64{}, []float64{1, 1, 1, 1, 1}, 2, true}, 26 | {[]float64{1, 2, 3, 4, 5}, []float64{1, 1, 1, 1, 1}, 2, false}, 27 | {[]float64{1, 2, 3, 4, 5}, []float64{1, 1, 1, 1, 1}, 1, true}, 28 | {[]float64{1, 2, 3, 4, 5}, []float64{1, 1, 1, 1, 1}, 4, false}, 29 | } 30 | 31 | for _, d := range testdata { 32 | _, err := New(d.a, d.b, d.m) 33 | if d.expectedErr && err == nil { 34 | t.Errorf("Expected an error, but got none for %v", d) 35 | return 36 | } 37 | if !d.expectedErr && err != nil { 38 | t.Errorf("Expected no error, but got %v for %v", err, d) 39 | return 40 | } 41 | } 42 | } 43 | 44 | func TestApplyAVDefault(t *testing.T) { 45 | testdata := []struct { 46 | a []float64 47 | w int 48 | }{ 49 | {[]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, 4}, 50 | } 51 | 52 | var mp *MatrixProfile 53 | var err error 54 | var outab []float64 55 | for _, d := range testdata { 56 | mp, err = New(d.a, nil, d.w) 57 | if err != nil { 58 | t.Errorf("%v", err) 59 | break 60 | } 61 | if err = mp.Compute(NewMPOpts()); err != nil { 62 | t.Errorf("%v", err) 63 | break 64 | } 65 | 66 | mp.AV = av.Default 67 | outab, _, err = mp.ApplyAV() 68 | if err != nil { 69 | t.Fatal(err) 70 | } 71 | 72 | if len(outab) != len(mp.MP) { 73 | t.Errorf("Expected %d elements, but got %d, %+v", len(mp.MP), len(outab), d) 74 | break 75 | } 76 | for i := 0; i < len(outab); i++ { 77 | if math.Abs(float64(outab[i]-mp.MP[i])) > 1e-7 { 78 | t.Errorf("Expected %v,\nbut got\n%v for %+v", mp.MP, outab, d) 79 | break 80 | } 81 | } 82 | } 83 | } 84 | 85 | func TestSave(t *testing.T) { 86 | ts := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9} 87 | w := 3 88 | p, err := New(ts, nil, w) 89 | p.Compute(NewMPOpts()) 90 | filepath := "./mp.json" 91 | err = p.Save(filepath, "json") 92 | if err != nil { 93 | t.Errorf("Received error while saving matrix profile, %v", err) 94 | } 95 | if err = os.Remove(filepath); err != nil { 96 | t.Errorf("Could not remove file, %s, %v", filepath, err) 97 | } 98 | } 99 | 100 | func TestLoad(t *testing.T) { 101 | ts := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9} 102 | w := 3 103 | p, err := New(ts, nil, w) 104 | p.Compute(NewMPOpts()) 105 | filepath := "./mp.json" 106 | if err = p.Save(filepath, "json"); err != nil { 107 | t.Errorf("Received error while saving matrix profile, %v", err) 108 | } 109 | 110 | newP := &MatrixProfile{} 111 | if err = newP.Load(filepath, "json"); err != nil { 112 | t.Errorf("Failed to load %s, %v", filepath, err) 113 | } 114 | 115 | if err = os.Remove(filepath); err != nil { 116 | t.Errorf("Could not remove file, %s, %v", filepath, err) 117 | } 118 | 119 | if newP.W != w { 120 | t.Errorf("Expected window of %d, but got %d", w, newP.W) 121 | } 122 | if len(newP.A) != len(ts) { 123 | t.Errorf("Expected timeseries length of %d, but got %d", len(ts), len(newP.A)) 124 | } 125 | 126 | } 127 | 128 | func TestMPDist(t *testing.T) { 129 | testData := []struct { 130 | a []float64 131 | b []float64 132 | m int 133 | expected float64 134 | }{ 135 | { 136 | []float64{1, 2, 3, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 137 | []float64{0, 0, 0, 0, 0, 0, 0, 0, -1, -2, -3, -2, -1, 0, 1, 2, 1, 0}, 138 | 5, 139 | 0, 140 | }, 141 | { 142 | []float64{1, 2, 3, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 143 | []float64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 0}, 144 | 5, 145 | 0, 146 | }, 147 | } 148 | for _, d := range testData { 149 | res, err := MPDist(d.a, d.b, d.m, nil) 150 | if err != nil { 151 | t.Errorf("Did not expect to get an error, %v", err) 152 | } 153 | if math.Abs(res-d.expected) > 1e-4 { 154 | t.Errorf("Expected %.6f, but got %.6f", d.expected, res) 155 | } 156 | } 157 | } 158 | 159 | func TestCrossCorrelate(t *testing.T) { 160 | var err error 161 | var out []float64 162 | var mp *MatrixProfile 163 | 164 | testdata := []struct { 165 | q []float64 166 | t []float64 167 | expected []float64 168 | }{ 169 | {[]float64{1, 1}, []float64{1, 1, 1, 1, 1}, []float64{2, 2, 2, 2}}, 170 | {[]float64{1, 2}, []float64{1, 2, 3, 3, 2, 1}, []float64{5, 8, 9, 7, 4}}, 171 | {[]float64{1, 2}, []float64{1, 2, 3, 3, 2, 1, 1}, []float64{5, 8, 9, 7, 4, 3}}, 172 | {[]float64{1, 2, 1}, []float64{1, 2, 3, 4, 3, 2, 1}, []float64{8, 12, 14, 12, 8}}, 173 | {[]float64{1, 2, 1}, []float64{1, 2, 3, 4, 3, 2, 1, 1}, []float64{8, 12, 14, 12, 8, 5}}, 174 | } 175 | 176 | for _, d := range testdata { 177 | mp, err = New(d.q, d.t, len(d.q)) 178 | if err != nil { 179 | if d.expected == nil { 180 | // Got an error while creating a new matrix profile 181 | continue 182 | } else { 183 | t.Errorf("did not expect to get an error , %v, for %v", err, d) 184 | return 185 | } 186 | } 187 | if err = mp.initCaches(); err != nil { 188 | t.Errorf("Failed to initialize cache, %v", err) 189 | } 190 | 191 | fft := fourier.NewFFT(mp.N) 192 | out = mp.crossCorrelate(d.q, fft) 193 | if err != nil && d.expected == nil { 194 | // Got an error while z normalizing and expected an error 195 | continue 196 | } 197 | if d.expected == nil { 198 | t.Errorf("Expected an invalid cross correlation calculation, %v", d) 199 | return 200 | } 201 | if err != nil { 202 | t.Errorf("Did not expect error, %v", err) 203 | return 204 | } 205 | if len(out) != len(d.expected) { 206 | t.Errorf("Expected %d elements, but got %d, %v", len(d.expected), len(out), d) 207 | return 208 | } 209 | for i := 0; i < len(out); i++ { 210 | if math.Abs(out[i]-d.expected[i]) > 1e-7 { 211 | t.Errorf("Expected %v, but got %v for %v", d.expected, out, d) 212 | break 213 | } 214 | } 215 | 216 | } 217 | } 218 | 219 | func TestMass(t *testing.T) { 220 | var err error 221 | var mp *MatrixProfile 222 | var out []float64 223 | 224 | testdata := []struct { 225 | q []float64 226 | t []float64 227 | expected []float64 228 | }{ 229 | {[]float64{}, []float64{}, nil}, 230 | {[]float64{1, 1, 1, 1, 1}, []float64{}, nil}, 231 | {[]float64{}, []float64{1, 1, 1, 1, 1}, nil}, 232 | {[]float64{1, 1}, []float64{1, 1, 1, 1, 1}, nil}, 233 | {[]float64{0, 1, 1, 0}, []float64{0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0}, []float64{0, 2.8284271247461903, 4, 2.8284271247461903, 0, 2.82842712474619, 4, 2.8284271247461903, 0}}, 234 | {[]float64{0, 1, 1, 0}, []float64{1e-6, 1e-5, 1e-5, 1e-5, 5, 5, 1e-5, 1e-5, 1e-5, 1e-5, 7, 7, 1e-5, 1e-5}, 235 | []float64{1.838803373328544, 3.552295335908461, 2.828427124746192, 6.664001874625056e-08, 2.8284271247461885, 236 | 3.5522953359084606, 2.8284271366321914, 3.5522953359084606, 2.82842712474619, 0, 2.82842712474619070}}, 237 | } 238 | 239 | for _, d := range testdata { 240 | mp, err = New(d.q, d.t, len(d.q)) 241 | if err != nil && d.expected == nil { 242 | // Got an error while creating a new matrix profile 243 | continue 244 | } 245 | if err = mp.initCaches(); err != nil { 246 | t.Errorf("Failed to initialize cache, %v", err) 247 | } 248 | out = make([]float64, mp.N-mp.W+1) 249 | fft := fourier.NewFFT(mp.N) 250 | err = mp.mass(d.q, out, fft) 251 | if err != nil && d.expected == nil { 252 | // Got an error while z normalizing and expected an error 253 | continue 254 | } 255 | if d.expected == nil { 256 | t.Errorf("Expected an invalid mass calculation, %v", d) 257 | return 258 | } 259 | if err != nil { 260 | t.Errorf("Did not expect error, %v", err) 261 | return 262 | } 263 | if len(out) != len(d.expected) { 264 | t.Errorf("Expected %d elements, but got %d, %v", len(d.expected), len(out), d) 265 | return 266 | } 267 | for i := 0; i < len(out); i++ { 268 | if math.IsNaN(out[i]) { 269 | t.Errorf("Got NaN in output, %v", out) 270 | break 271 | } 272 | if math.Abs(out[i]-d.expected[i]) > 1e-7 { 273 | t.Errorf("Expected %v\n, but got %v\nfor %v", d.expected, out, d) 274 | break 275 | } 276 | } 277 | } 278 | } 279 | 280 | func TestDistanceProfile(t *testing.T) { 281 | var err error 282 | var mprof []float64 283 | var mp *MatrixProfile 284 | 285 | testdata := []struct { 286 | q []float64 287 | t []float64 288 | m int 289 | idx int 290 | expectedMP []float64 291 | }{ 292 | {[]float64{}, []float64{}, 2, 0, nil}, 293 | {[]float64{1, 1, 1, 1, 1}, []float64{}, 2, 0, nil}, 294 | {[]float64{}, []float64{1, 1, 1, 1, 1}, 2, 0, nil}, 295 | {[]float64{0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0}, nil, 4, 0, []float64{math.Inf(1), math.Inf(1), 4, 2.8284271247461903, 0, 2.8284271247461903, 4, 2.8284271247461903, 0}}, 296 | {[]float64{0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0}, nil, 4, 9, nil}, 297 | } 298 | 299 | for _, d := range testdata { 300 | mp, err = New(d.q, d.t, d.m) 301 | if err != nil && d.expectedMP == nil { 302 | // Got an error while creating a new matrix profile 303 | continue 304 | } 305 | 306 | if err = mp.initCaches(); err != nil { 307 | t.Errorf("Failed to initialize cache, %v", err) 308 | } 309 | 310 | mprof = make([]float64, mp.N-mp.W+1) 311 | fft := fourier.NewFFT(mp.N) 312 | err = mp.distanceProfile(d.idx, mprof, fft) 313 | if err != nil && d.expectedMP == nil { 314 | // Got an error while z normalizing and expected an error 315 | continue 316 | } 317 | if d.expectedMP == nil { 318 | t.Errorf("Expected an invalid distance profile calculation, %+v", d) 319 | return 320 | } 321 | if err != nil { 322 | t.Errorf("Did not expect error, %v\n%+v", err, d) 323 | return 324 | } 325 | if len(mprof) != len(d.expectedMP) { 326 | t.Errorf("Expected %d elements, but got %d\n%+v", len(d.expectedMP), len(mprof), d) 327 | return 328 | } 329 | for i := 0; i < len(mprof); i++ { 330 | if math.Abs(mprof[i]-d.expectedMP[i]) > 1e-7 { 331 | t.Errorf("Expected\n%.7f, but got\n%.7f for\n%+v", d.expectedMP, mprof, d) 332 | break 333 | } 334 | } 335 | } 336 | } 337 | 338 | func TestCalculateDistanceProfile(t *testing.T) { 339 | var err error 340 | var mprof []float64 341 | var mp *MatrixProfile 342 | 343 | testdata := []struct { 344 | q []float64 345 | t []float64 346 | m int 347 | idx int 348 | expectedMP []float64 349 | }{ 350 | {[]float64{}, []float64{}, 2, 0, nil}, 351 | {[]float64{1, 1, 1, 1, 1}, []float64{}, 2, 0, nil}, 352 | {[]float64{}, []float64{1, 1, 1, 1, 1}, 2, 0, nil}, 353 | {[]float64{0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0}, nil, 4, 0, []float64{math.Inf(1), math.Inf(1), 4, 2.8284271247461903, 0, 2.8284271247461903, 4, 2.8284271247461903, 0}}, 354 | {[]float64{0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0}, nil, 4, 9, nil}, 355 | } 356 | 357 | for _, d := range testdata { 358 | mp, err = New(d.q, d.t, d.m) 359 | if err != nil && d.expectedMP == nil { 360 | // Got an error while creating a new matrix profile 361 | continue 362 | } 363 | 364 | if err = mp.initCaches(); err != nil { 365 | t.Errorf("Failed to initialize cache, %v", err) 366 | } 367 | 368 | fft := fourier.NewFFT(mp.N) 369 | dot := mp.crossCorrelate(mp.A[:mp.W], fft) 370 | 371 | mprof = make([]float64, mp.N-mp.W+1) 372 | err = mp.calculateDistanceProfile(dot, d.idx, mprof) 373 | if err != nil { 374 | if d.expectedMP == nil { 375 | // Got an error while z normalizing and expected an error 376 | continue 377 | } else { 378 | t.Errorf("Did not expect to get error, %v, for %v", err, d) 379 | return 380 | } 381 | } 382 | if d.expectedMP == nil { 383 | t.Errorf("Expected an invalid distance profile calculation, %+v", d) 384 | return 385 | } 386 | if err != nil { 387 | t.Errorf("Did not expect error, %v\n%+v", err, d) 388 | return 389 | } 390 | if len(mprof) != len(d.expectedMP) { 391 | t.Errorf("Expected %d elements, but got %d\n%+v", len(d.expectedMP), len(mprof), d) 392 | return 393 | } 394 | for i := 0; i < len(mprof); i++ { 395 | if math.Abs(mprof[i]-d.expectedMP[i]) > 1e-7 { 396 | t.Errorf("Expected\n%.7f, but got\n%.7f for\n%+v", d.expectedMP, mprof, d) 397 | break 398 | } 399 | } 400 | } 401 | 402 | } 403 | 404 | func TestComputeStmp(t *testing.T) { 405 | var err error 406 | var mp *MatrixProfile 407 | 408 | testdata := []struct { 409 | q []float64 410 | t []float64 411 | m int 412 | expectedMP []float64 413 | expectedMPIdx []int 414 | }{ 415 | {[]float64{}, []float64{}, 2, nil, nil}, 416 | {[]float64{1, 1, 1, 1, 1}, []float64{}, 2, nil, nil}, 417 | {[]float64{}, []float64{1, 1, 1, 1, 1}, 2, nil, nil}, 418 | {[]float64{1, 1}, []float64{1, 1, 1, 1, 1}, 2, nil, nil}, 419 | {[]float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}, nil, 4, 420 | []float64{0.014355034678331376, 0.014355034678269504, 0.0291386974835963, 0.029138697483626783, 0.01435503467830044, 0.014355034678393249, 0.029138697483504856, 0.029138697483474377, 0.0291386974835963}, 421 | []int{4, 5, 6, 7, 0, 1, 2, 3, 4}}, 422 | } 423 | 424 | for _, d := range testdata { 425 | mp, err = New(d.q, d.t, d.m) 426 | if err != nil && d.expectedMP == nil { 427 | // Got an error while creating a new matrix profile 428 | continue 429 | } 430 | 431 | o := NewMPOpts() 432 | o.Algorithm = AlgoSTMP 433 | 434 | err = mp.Compute(o) 435 | if err != nil && d.expectedMP == nil { 436 | // Got an error while z normalizing and expected an error 437 | continue 438 | } 439 | if d.expectedMP == nil { 440 | t.Errorf("Expected an invalid STMP calculation, %+v", d) 441 | return 442 | } 443 | if err != nil { 444 | t.Errorf("Did not expect error, %v, %+v", err, d) 445 | return 446 | } 447 | if len(mp.MP) != len(d.expectedMP) { 448 | t.Errorf("Expected %d elements, but got %d, %+v", len(d.expectedMP), len(mp.MP), d) 449 | return 450 | } 451 | for i := 0; i < len(mp.MP); i++ { 452 | if math.Abs(mp.MP[i]-d.expectedMP[i]) > 1e-7 { 453 | t.Errorf("Expected\n%v, but got\n%v for\n%+v", d.expectedMP, mp.MP, d) 454 | break 455 | } 456 | } 457 | for i := 0; i < len(mp.Idx); i++ { 458 | if math.Abs(float64(mp.Idx[i]-d.expectedMPIdx[i])) > 1e-7 { 459 | t.Errorf("Expected %v,\nbut got\n%v for\n%+v", d.expectedMPIdx, mp.Idx, d) 460 | break 461 | } 462 | } 463 | } 464 | } 465 | 466 | func TestComputeStamp(t *testing.T) { 467 | var err error 468 | var mp *MatrixProfile 469 | 470 | testdata := []struct { 471 | q []float64 472 | t []float64 473 | m int 474 | sample float64 475 | expectedMP []float64 476 | expectedMPIdx []int 477 | }{ 478 | {[]float64{}, []float64{}, 2, 1.0, nil, nil}, 479 | {[]float64{1, 1, 1, 1, 1}, []float64{}, 2, 1.0, nil, nil}, 480 | {[]float64{}, []float64{1, 1, 1, 1, 1}, 2, 1.0, nil, nil}, 481 | {[]float64{1, 1}, []float64{1, 1, 1, 1, 1}, 2, 1.0, nil, nil}, 482 | {[]float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}, nil, 4, 1.0, 483 | []float64{0.014355034678331376, 0.014355034678269504, 0.0291386974835963, 0.029138697483626783, 0.01435503467830044, 0.014355034678393249, 0.029138697483504856, 0.029138697483474377, 0.0291386974835963}, 484 | []int{4, 5, 6, 7, 0, 1, 2, 3, 4}}, 485 | {[]float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}, nil, 4, 0.0, nil, nil}, 486 | } 487 | 488 | for _, d := range testdata { 489 | mp, err = New(d.q, d.t, d.m) 490 | if err != nil && d.expectedMP == nil { 491 | // Got an error while creating a new matrix profile 492 | continue 493 | } 494 | 495 | o := NewMPOpts() 496 | o.Algorithm = AlgoSTAMP 497 | o.SamplePct = d.sample 498 | 499 | err = mp.Compute(o) 500 | 501 | if err != nil && d.expectedMP == nil { 502 | // Got an error while z normalizing and expected an error 503 | continue 504 | } 505 | if d.expectedMP == nil { 506 | t.Errorf("Expected an invalid STAMP calculation, %+v", d) 507 | return 508 | } 509 | if err != nil { 510 | t.Errorf("Did not expect error, %v, %+v", err, d) 511 | return 512 | } 513 | if len(mp.MP) != len(d.expectedMP) { 514 | t.Errorf("Expected %d elements, but got %d, %+v", len(d.expectedMP), len(mp.MP), d) 515 | return 516 | } 517 | for i := 0; i < len(mp.MP); i++ { 518 | if math.Abs(mp.MP[i]-d.expectedMP[i]) > 1e-7 { 519 | t.Errorf("Expected\n%v, but got\n%v for\n%+v", d.expectedMP, mp.MP, d) 520 | break 521 | } 522 | } 523 | for i := 0; i < len(mp.Idx); i++ { 524 | if math.Abs(float64(mp.Idx[i]-d.expectedMPIdx[i])) > 1e-7 { 525 | t.Errorf("Expected %v,\nbut got\n%v for\n%+v", d.expectedMPIdx, mp.Idx, d) 526 | break 527 | } 528 | } 529 | 530 | } 531 | } 532 | 533 | func TestComputeStomp(t *testing.T) { 534 | var err error 535 | var mp *MatrixProfile 536 | 537 | testdata := []struct { 538 | q []float64 539 | t []float64 540 | m int 541 | p int 542 | expectedMP []float64 543 | expectedMPIdx []int 544 | }{ 545 | {[]float64{}, []float64{}, 2, 1, nil, nil}, 546 | {[]float64{1, 1, 1, 1, 1}, []float64{}, 2, 1, nil, nil}, 547 | {[]float64{}, []float64{1, 1, 1, 1, 1}, 2, 1, nil, nil}, 548 | {[]float64{1, 1}, []float64{1, 1, 1, 1, 1}, 2, 1, []float64{math.Inf(1), math.Inf(1), math.Inf(1), math.Inf(1)}, []int{math.MaxInt64, math.MaxInt64, math.MaxInt64, math.MaxInt64}}, 549 | {[]float64{1, 1, 1, 1, 1, 1, 1, 1}, []float64{1, 1, 1, 1, 1}, 2, 1, []float64{math.Inf(1), math.Inf(1), math.Inf(1), math.Inf(1)}, []int{math.MaxInt64, math.MaxInt64, math.MaxInt64, math.MaxInt64}}, 550 | {[]float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}, []float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}, 4, 1, 551 | []float64{0, 0, 0, 0, 0, 0, 0, 0, 0}, 552 | []int{0, 1, 2, 3, 4, 5, 6, 7, 8}}, 553 | {[]float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}, nil, 4, 1, 554 | []float64{0.014355034678331376, 0.014355034678269504, 0.0291386974835963, 0.029138697483626783, 0.01435503467830044, 0.014355034678393249, 0.029138697483504856, 0.029138697483474377, 0.0291386974835963}, 555 | []int{4, 5, 6, 7, 0, 1, 2, 3, 4}}, 556 | {[]float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}, nil, 4, 2, 557 | []float64{0.014355034678331376, 0.014355034678269504, 0.0291386974835963, 0.029138697483626783, 0.01435503467830044, 0.014355034678393249, 0.029138697483504856, 0.029138697483474377, 0.0291386974835963}, 558 | []int{4, 5, 6, 7, 0, 1, 2, 3, 4}}, 559 | {[]float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}, nil, 4, 4, 560 | []float64{0.014355034678331376, 0.014355034678269504, 0.0291386974835963, 0.029138697483626783, 0.01435503467830044, 0.014355034678393249, 0.029138697483504856, 0.029138697483474377, 0.0291386974835963}, 561 | []int{4, 5, 6, 7, 0, 1, 2, 3, 4}}, 562 | {[]float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}, nil, 4, 100, 563 | []float64{0.014355034678331376, 0.014355034678269504, 0.0291386974835963, 0.029138697483626783, 0.01435503467830044, 0.014355034678393249, 0.029138697483504856, 0.029138697483474377, 0.0291386974835963}, 564 | []int{4, 5, 6, 7, 0, 1, 2, 3, 4}}, 565 | } 566 | 567 | for _, d := range testdata { 568 | mp, err = New(d.q, d.t, d.m) 569 | if err != nil { 570 | if d.expectedMP == nil { 571 | // Got an error while creating a new matrix profile 572 | continue 573 | } else { 574 | t.Errorf("Did not expect an error, %v, while creating new mp for %v", err, d) 575 | return 576 | } 577 | } 578 | 579 | o := NewMPOpts() 580 | o.Algorithm = AlgoSTOMP 581 | err = mp.Compute(o) 582 | if err != nil { 583 | if d.expectedMP == nil { 584 | // Got an error while z normalizing and expected an error 585 | continue 586 | } else { 587 | t.Errorf("Did not expect an error, %v, while calculating for %v", err, d) 588 | break 589 | } 590 | } 591 | if d.expectedMP == nil { 592 | t.Errorf("Expected an invalid STOMP calculation, %+v", d) 593 | break 594 | } 595 | 596 | if len(mp.MP) != len(d.expectedMP) { 597 | t.Errorf("Expected %d elements, but got %d, %+v", len(d.expectedMP), len(mp.MP), d) 598 | return 599 | } 600 | for i := 0; i < len(mp.MP); i++ { 601 | if math.Abs(mp.MP[i]-d.expectedMP[i]) > 1e-7 { 602 | t.Errorf("Expected\n%.4f, but got\n%.4f for\n%+v", d.expectedMP, mp.MP, d) 603 | break 604 | } 605 | } 606 | for i := 0; i < len(mp.Idx); i++ { 607 | if math.Abs(float64(mp.Idx[i]-d.expectedMPIdx[i])) > 1e-7 { 608 | t.Errorf("Expected %d,\nbut got\n%v for\n%+v", d.expectedMPIdx, mp.Idx, d) 609 | break 610 | } 611 | } 612 | } 613 | } 614 | 615 | func TestComputeMpx(t *testing.T) { 616 | var err error 617 | var mp *MatrixProfile 618 | 619 | testdata := []struct { 620 | q []float64 621 | t []float64 622 | m int 623 | p int 624 | remap bool 625 | expectedMP []float64 626 | expectedMPIdx []int 627 | }{ 628 | {[]float64{}, []float64{}, 2, 1, false, nil, nil}, 629 | {[]float64{1, 1, 1, 1, 1}, []float64{}, 2, 1, false, nil, nil}, 630 | {[]float64{}, []float64{1, 1, 1, 1, 1}, 2, 1, false, nil, nil}, 631 | {[]float64{1, 2, 1, 3, 1}, []float64{2, 1, 1, 2, 1, 3, 1, -1, -2}, 2, 1, false, []float64{0, 0, 0, 0}, []int{2, 3, 2, 3}}, 632 | {[]float64{1, 1, 1, 1, 1}, []float64{1, 1, 1, 1, 1, 2, 2, 3, 4, 5}, 2, 1, false, []float64{2, 2, 2, 2}, []int{0, 1, 2, 3}}, 633 | {[]float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}, []float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}, 4, 1, false, 634 | []float64{0, 0, 0, 0, 0, 0, 0, 0, 0}, 635 | []int{0, 1, 2, 3, 4, 5, 6, 7, 8}}, 636 | {[]float64{0, 1, 1, 1, 0, 0, 2, 1, 0, 0, 2, 1}, nil, 4, 1, false, 637 | []float64{1.9550, 1.8388, 0.8739, 0, 0, 1.9550, 0.8739, 0, 0}, 638 | []int{4, 2, 6, 7, 8, 1, 2, 3, 4}}, 639 | {[]float64{0, 1, 1, 1, 0, 0, 2, 1, 0, 0, 2, 1}, nil, 4, 1, true, 640 | []float64{1.0183, 1.0183, 0.8739, 0, 0, 1.2060, 0.8739, 0, 0}, 641 | []int{6, 3, 4, 7, 8, 3, 2, 3, 4}}, 642 | {[]float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}, nil, 4, 1, false, 643 | []float64{0.01435, 0.01435, 0.02913, 0.02913, 0.01435, 0.01435, 0.02913, 0.02913, 0.02913}, 644 | []int{4, 5, 6, 7, 0, 1, 2, 3, 4}}, 645 | {[]float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}, nil, 4, 2, false, 646 | []float64{0.01435, 0.01435, 0.02913, 0.02913, 0.01435, 0.01435, 0.02913, 0.02913, 0.02913}, 647 | []int{4, 5, 6, 7, 0, 1, 2, 3, 4}}, 648 | {[]float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}, nil, 4, 4, false, 649 | []float64{0.01435, 0.01435, 0.02913, 0.02913, 0.01435, 0.01435, 0.02913, 0.02913, 0.02913}, 650 | []int{4, 5, 6, 7, 0, 1, 2, 3, 4}}, 651 | {[]float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}, nil, 4, 100, false, 652 | []float64{0.01435, 0.01435, 0.02913, 0.02913, 0.01435, 0.01435, 0.02913, 0.02913, 0.02913}, 653 | []int{4, 5, 6, 7, 0, 1, 2, 3, 4}}, 654 | } 655 | 656 | for _, d := range testdata { 657 | mp, err = New(d.q, d.t, d.m) 658 | if err != nil { 659 | if d.expectedMP == nil { 660 | // Got an error while creating a new matrix profile 661 | continue 662 | } else { 663 | t.Errorf("Did not expect an error, %v, while creating new mp for %v", err, d) 664 | return 665 | } 666 | } 667 | 668 | o := NewMPOpts() 669 | o.Algorithm = AlgoMPX 670 | o.NJobs = d.p 671 | o.RemapNegCorr = d.remap 672 | err = mp.Compute(o) 673 | if err != nil { 674 | if d.expectedMP == nil { 675 | // Got an error while z normalizing and expected an error 676 | continue 677 | } else { 678 | t.Errorf("Did not expect an error, %v, while calculating for %v", err, d) 679 | break 680 | } 681 | } 682 | if d.expectedMP == nil { 683 | t.Errorf("Expected an invalid calculation, %+v", d) 684 | break 685 | } 686 | 687 | if len(mp.MP) != len(d.expectedMP) { 688 | t.Errorf("Expected %d elements, but got %d, %+v", len(d.expectedMP), len(mp.MP), d) 689 | return 690 | } 691 | for i := 0; i < len(mp.MP); i++ { 692 | if math.Abs(mp.MP[i]-d.expectedMP[i]) > 1e-4 { 693 | t.Errorf("Expected\n%.4f, but got\n%.4f for\n%+v", d.expectedMP, mp.MP, d) 694 | break 695 | } 696 | } 697 | for i := 0; i < len(mp.Idx); i++ { 698 | if math.Abs(float64(mp.Idx[i]-d.expectedMPIdx[i])) > 1e-4 { 699 | t.Errorf("Expected %d,\nbut got\n%v for\n%+v", d.expectedMPIdx, mp.Idx, d) 700 | break 701 | } 702 | } 703 | } 704 | } 705 | 706 | func TestUpdate(t *testing.T) { 707 | var err error 708 | var outMP []float64 709 | var outIdx []int 710 | var mp *MatrixProfile 711 | 712 | a := []float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0} 713 | 714 | testdata := []struct { 715 | vals []float64 716 | }{ 717 | {[]float64{}}, 718 | {[]float64{0.5}}, 719 | {[]float64{0.2, 0.3, 0.4, 0.9}}, 720 | } 721 | 722 | mp, err = New(a, nil, 4) 723 | if err != nil { 724 | t.Error(err) 725 | return 726 | } 727 | o := NewMPOpts() 728 | o.Algorithm = AlgoSTOMP 729 | if err = mp.Compute(o); err != nil { 730 | t.Error(err) 731 | return 732 | } 733 | 734 | for _, d := range testdata { 735 | if err = mp.Update(d.vals); err != nil { 736 | t.Error(err) 737 | return 738 | } 739 | outMP = make([]float64, len(mp.MP)) 740 | outIdx = make([]int, len(mp.Idx)) 741 | copy(outMP, mp.MP) 742 | copy(outIdx, mp.Idx) 743 | 744 | if err = mp.stomp(); err != nil { 745 | t.Error(err) 746 | return 747 | } 748 | 749 | for i := 0; i < len(mp.MP); i++ { 750 | if math.Abs(mp.MP[i]-outMP[i]) > 1e-7 { 751 | t.Errorf("Expected\n%.4f, but got\n%.4f for\n%+v", mp.MP, outMP, d) 752 | break 753 | } 754 | } 755 | for i := 0; i < len(mp.Idx); i++ { 756 | if math.Abs(float64(mp.Idx[i]-outIdx[i])) > 1e-7 { 757 | t.Errorf("Expected %d,\nbut got\n%v for\n%+v", mp.Idx, outIdx, d) 758 | break 759 | } 760 | } 761 | } 762 | } 763 | 764 | func TestDiscoverDiscords(t *testing.T) { 765 | mprof := []float64{1, 2, 3, 4} 766 | a := []float64{1, 2, 3, 4, 5, 6} 767 | w := 3 768 | 769 | testdata := []struct { 770 | mp []float64 771 | k int 772 | exzone int 773 | expectedDiscords []int 774 | }{ 775 | {mprof, 4, 0, []int{3, 3, 3, 3}}, 776 | {mprof, 4, 1, []int{3, 1}}, 777 | {mprof, 10, 1, []int{3, 1}}, 778 | {mprof, 0, 1, []int{}}, 779 | } 780 | 781 | for _, d := range testdata { 782 | mp := MatrixProfile{A: a, B: a, W: w, MP: d.mp, AV: av.Default, Opts: NewMPOpts()} 783 | discords, err := mp.DiscoverDiscords(d.k, d.exzone) 784 | if err != nil { 785 | t.Errorf("Got error %v on %v", err, d) 786 | return 787 | } 788 | if len(discords) != len(d.expectedDiscords) { 789 | t.Errorf("Got a length of %d discords, but expected %d, for %v", len(discords), len(d.expectedDiscords), d) 790 | return 791 | } 792 | for i, idx := range discords { 793 | if idx != d.expectedDiscords[i] { 794 | t.Errorf("expected index, %d, but got %d, for %v", d.expectedDiscords[i], idx, d) 795 | return 796 | } 797 | } 798 | } 799 | } 800 | 801 | func TestDiscoverMotifs(t *testing.T) { 802 | a := []float64{0, 0, 0.56, 0.99, 0.97, 0.75, 0, 0, 0, 0.43, 0.98, 0.99, 0.65, 0, 0, 0, 0.6, 0.97, 0.965, 0.8, 0, 0, 0} 803 | 804 | testdata := []struct { 805 | a []float64 806 | b []float64 807 | m int 808 | k int 809 | expectedMotifs [][]int 810 | expectedMinDist []float64 811 | }{ 812 | { 813 | a, nil, 7, 3, 814 | [][]int{{0, 14}, {0, 7}, {3, 10}}, 815 | []float64{0.1459619228330262, 0.3352336136782056, 0.46369664551715467}, 816 | }, 817 | { 818 | a, a, 7, 3, 819 | nil, 820 | nil, 821 | }, 822 | { 823 | a, nil, 7, 5, 824 | [][]int{{0, 14}, {0, 7}, {3, 10}, {}, {}}, 825 | []float64{0.1459619228330262, 0.3352336136782056, 0.46369664551715467, 0, 0}, 826 | }, 827 | { 828 | []float64{0, 1, 0, 0, 1, 0, 0}, nil, 3, 2, 829 | [][]int{{0, 3}, {1, 4}}, 830 | []float64{5.1619136559035694e-08, 0}, 831 | }, 832 | } 833 | 834 | for _, d := range testdata { 835 | mp, err := New(d.a, d.b, d.m) 836 | if err != nil { 837 | t.Error(err) 838 | return 839 | } 840 | 841 | o := NewMPOpts() 842 | o.Algorithm = AlgoSTOMP 843 | if err = mp.Compute(o); err != nil { 844 | t.Error(err) 845 | return 846 | } 847 | motifs, err := mp.DiscoverMotifs(d.k, 2, 10, mp.W/2) 848 | if err != nil { 849 | if d.expectedMotifs == nil { 850 | continue 851 | } 852 | t.Error(err) 853 | return 854 | } 855 | 856 | for i := range motifs { 857 | sort.Ints(motifs[i].Idx) 858 | } 859 | 860 | for i, mg := range motifs { 861 | if len(mg.Idx) != len(d.expectedMotifs[i]) { 862 | t.Errorf("expected %d motifs for group %d, but got %d, %v, for %v", len(d.expectedMotifs[i]), i, len(mg.Idx), mg.Idx, d) 863 | return 864 | } 865 | 866 | for j, idx := range mg.Idx { 867 | if idx != d.expectedMotifs[i][j] { 868 | t.Errorf("expected index, %d for group %d, but got %d for %v", d.expectedMotifs[i][j], i, idx, d) 869 | return 870 | } 871 | } 872 | if math.Abs(mg.MinDist-d.expectedMinDist[i]) > 1e-7 { 873 | t.Errorf("expected minimum distance, %v for group %d, but got %v for %v", d.expectedMinDist[i], i, mg.MinDist, d) 874 | return 875 | } 876 | } 877 | } 878 | } 879 | 880 | func TestDiscoverSegments(t *testing.T) { 881 | testdata := []struct { 882 | mpIdx []int 883 | expectedIdx int 884 | expectedVal float64 885 | expectedHisto []float64 886 | }{ 887 | {[]int{}, 0, 0, nil}, 888 | {[]int{1, 1, 1, 1, 1}, 0, 0, nil}, 889 | {[]int{4, 5, 6, 0, 2, 1, 0}, 5, 0.7, []float64{1, 1, 1, 1, 1, 0.7, 1}}, 890 | {[]int{4, 5, 12, 0, 2, 1, 0}, 5, 0.35, []float64{1, 1, 1, 1, 0.875, 0.35, 1}}, 891 | {[]int{4, 5, -1, 0, 2, 1, 0}, 5, 0.35, []float64{1, 1, 1, 1, 0.875, 0.35, 1}}, 892 | {[]int{4, 5, 6, 2, 2, 1, 0}, 5, 0.7, []float64{1, 1, 1, 1, 1, 0.7, 1}}, 893 | {[]int{2, 3, 0, 0, 6, 3, 4}, 3, 0, []float64{1, 1, 0.7, 0, 0.29166666, 0.7, 1}}, 894 | } 895 | 896 | var minIdx int 897 | var minVal float64 898 | var histo []float64 899 | for _, d := range testdata { 900 | mp := MatrixProfile{Idx: d.mpIdx} 901 | minIdx, minVal, histo = mp.DiscoverSegments() 902 | if histo != nil && d.expectedHisto == nil { 903 | // Failed to compute histogram 904 | continue 905 | } 906 | if minIdx != d.expectedIdx { 907 | t.Errorf("Expected %d min index but got %d, %+v", d.expectedIdx, minIdx, d) 908 | } 909 | if minVal != d.expectedVal { 910 | t.Errorf("Expected %.3f min index value but got %.3f, %+v", d.expectedVal, minVal, d) 911 | } 912 | if len(histo) != len(d.expectedHisto) { 913 | t.Errorf("Expected %d elements, but got %d, %+v", len(d.expectedHisto), len(histo), d) 914 | } 915 | for i := 0; i < len(histo); i++ { 916 | if math.Abs(float64(histo[i]-d.expectedHisto[i])) > 1e-7 { 917 | t.Errorf("Expected %v,\nbut got\n%v for\n%+v", d.expectedHisto, histo, d) 918 | break 919 | } 920 | } 921 | } 922 | } 923 | -------------------------------------------------------------------------------- /mp_kdim.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matrix-profile-foundation/go-matrixprofile/5633282d77de93098b5f9bb8f5fb87bf24f6bc76/mp_kdim.png -------------------------------------------------------------------------------- /mp_sine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matrix-profile-foundation/go-matrixprofile/5633282d77de93098b5f9bb8f5fb87bf24f6bc76/mp_sine.png -------------------------------------------------------------------------------- /pmp.go: -------------------------------------------------------------------------------- 1 | package matrixprofile 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "fmt" 7 | "io/ioutil" 8 | "math" 9 | "os" 10 | 11 | "github.com/matrix-profile-foundation/go-matrixprofile/util" 12 | ) 13 | 14 | // PMP represents the pan matrix profile 15 | type PMP struct { 16 | A []float64 `json:"a"` // query time series 17 | B []float64 `json:"b"` // timeseries to perform full join with 18 | SelfJoin bool `json:"self_join"` // indicates whether a self join is performed with an exclusion zone 19 | PMP [][]float64 `json:"pmp"` // pan matrix profile 20 | PIdx [][]int `json:"ppi"` // pan matrix profile index 21 | PWindows []int `json:"windows"` // pan matrix windows used and is aligned with PMP and PIdx 22 | Opts *PMPOpts `json:"options"` // options used for the computation 23 | } 24 | 25 | // NewPMP creates a new Pan matrix profile 26 | func NewPMP(a, b []float64) (*PMP, error) { 27 | if a == nil || len(a) == 0 { 28 | return nil, fmt.Errorf("first slice is nil or has a length of 0") 29 | } 30 | 31 | if b != nil && len(b) == 0 { 32 | return nil, fmt.Errorf("second slice must be nil for self-join operation or have a length greater than 0") 33 | } 34 | 35 | p := PMP{A: a} 36 | if b == nil { 37 | p.B = a 38 | p.SelfJoin = true 39 | } else { 40 | p.B = b 41 | } 42 | 43 | return &p, nil 44 | } 45 | 46 | // Save will save the current matrix profile struct to disk 47 | func (p PMP) Save(filepath, format string) error { 48 | var err error 49 | switch format { 50 | case "json": 51 | f, err := os.Open(filepath) 52 | if err != nil { 53 | f, err = os.Create(filepath) 54 | if err != nil { 55 | return err 56 | } 57 | } 58 | defer f.Close() 59 | out, err := json.Marshal(p) 60 | if err != nil { 61 | return err 62 | } 63 | _, err = f.Write(out) 64 | default: 65 | return fmt.Errorf("invalid save format, %s", format) 66 | } 67 | return err 68 | } 69 | 70 | // Load will attempt to load a matrix profile from a file for iterative use 71 | func (p *PMP) Load(filepath, format string) error { 72 | var err error 73 | switch format { 74 | case "json": 75 | f, err := os.Open(filepath) 76 | if err != nil { 77 | return err 78 | } 79 | defer f.Close() 80 | b, err := ioutil.ReadAll(f) 81 | if err != nil { 82 | return err 83 | } 84 | err = json.Unmarshal(b, p) 85 | default: 86 | return fmt.Errorf("invalid load format, %s", format) 87 | } 88 | return err 89 | } 90 | 91 | // PMPOpts are parameters to vary the algorithm to compute the pan matrix profile. 92 | type PMPOpts struct { 93 | LowerM int `json:"lower_m"` // used for pan matrix profile 94 | UpperM int `json:"upper_m"` // used for pan matrix profile 95 | MPOpts *MPOpts `json:"mp_options"` 96 | } 97 | 98 | // NewPMPOpts returns a default PMPOpts 99 | func NewPMPOpts(l, u int) *PMPOpts { 100 | if l > u { 101 | u = l 102 | } 103 | return &PMPOpts{ 104 | LowerM: l, 105 | UpperM: u, 106 | MPOpts: NewMPOpts(), 107 | } 108 | } 109 | 110 | // Compute calculate the pan matrixprofile given a set of input options. 111 | func (p *PMP) Compute(o *PMPOpts) error { 112 | if o == nil { 113 | return errors.New("Must provide PMP compute options") 114 | } 115 | p.Opts = o 116 | return p.pmp() 117 | } 118 | 119 | func (p *PMP) pmp() error { 120 | windows := util.BinarySplit(p.Opts.LowerM, p.Opts.UpperM) 121 | windows = windows[:int(float64(len(windows))*p.Opts.MPOpts.SamplePct)] 122 | if len(windows) < 1 { 123 | return errors.New("Need more than one subsequence window for pmp") 124 | } 125 | p.PWindows = windows 126 | 127 | p.PMP = make([][]float64, len(windows)) 128 | p.PIdx = make([][]int, len(windows)) 129 | for i := 0; i < len(windows); i++ { 130 | lenA := len(p.A) - (i + p.Opts.LowerM) + 1 131 | p.PMP[i] = make([]float64, lenA) 132 | p.PIdx[i] = make([]int, lenA) 133 | for j := 0; j < lenA; j++ { 134 | p.PMP[i][j] = math.Inf(1) 135 | p.PIdx[i][j] = math.MaxInt64 136 | } 137 | } 138 | 139 | // need to create a new mp 140 | var mp *MatrixProfile 141 | var err error 142 | if p.SelfJoin { 143 | mp, err = New(p.A, nil, windows[0]) 144 | } else { 145 | mp, err = New(p.A, p.B, windows[0]) 146 | } 147 | if err != nil { 148 | return err 149 | } 150 | 151 | for _, w := range windows { 152 | mp.W = w 153 | if err := mp.Compute(p.Opts.MPOpts); err != nil { 154 | return err 155 | } 156 | copy(p.PMP[w-p.Opts.LowerM], mp.MP) 157 | copy(p.PIdx[w-p.Opts.LowerM], mp.Idx) 158 | } 159 | 160 | return nil 161 | } 162 | 163 | // Analyze has not been implemented yet 164 | func (p PMP) Analyze(co *MPOpts, ao *AnalyzeOpts) error { 165 | return errors.New("Analyze for PMP has not been implemented yet.") 166 | } 167 | 168 | // DiscoverMotifs has not been implemented yet 169 | func (p PMP) DiscoverMotifs(k int, r float64) ([]MotifGroup, error) { 170 | return nil, errors.New("Motifs for PMP has not been implemented yet.") 171 | } 172 | 173 | // DiscoverDiscords has not been implemented yet 174 | func (p PMP) DiscoverDiscords(k int, exclusionZone int) ([]int, error) { 175 | return nil, errors.New("Discords for PMP has not been implemented yet.") 176 | } 177 | 178 | // DiscoverSegments has not been implemented yet 179 | func (p PMP) DiscoverSegments() (int, float64, []float64) { 180 | return 0, 0, nil 181 | } 182 | 183 | // Visualize has not been implemented yet 184 | func (p PMP) Visualize(fn string, motifs []MotifGroup, discords []int, cac []float64) error { 185 | return errors.New("Visualize for PMP has not been implemented yet.") 186 | } 187 | -------------------------------------------------------------------------------- /pmp_test.go: -------------------------------------------------------------------------------- 1 | package matrixprofile 2 | 3 | import ( 4 | "math" 5 | "os" 6 | "testing" 7 | ) 8 | 9 | func TestPMPSave(t *testing.T) { 10 | ts := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13} 11 | p, err := NewPMP(ts, nil) 12 | p.Compute(NewPMPOpts(3, 5)) 13 | filepath := "./pmp.json" 14 | err = p.Save(filepath, "json") 15 | if err != nil { 16 | t.Errorf("Received error while saving matrix profile, %v", err) 17 | } 18 | if err = os.Remove(filepath); err != nil { 19 | t.Errorf("Could not remove file, %s, %v", filepath, err) 20 | } 21 | } 22 | 23 | func TestPMPLoad(t *testing.T) { 24 | ts := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13} 25 | p, err := NewPMP(ts, nil) 26 | p.Compute(NewPMPOpts(3, 5)) 27 | filepath := "./pmp.json" 28 | if err = p.Save(filepath, "json"); err != nil { 29 | t.Errorf("Received error while saving matrix profile, %v", err) 30 | } 31 | 32 | newP := &PMP{} 33 | if err = newP.Load(filepath, "json"); err != nil { 34 | t.Errorf("Failed to load %s, %v", filepath, err) 35 | } 36 | 37 | if err = os.Remove(filepath); err != nil { 38 | t.Errorf("Could not remove file, %s, %v", filepath, err) 39 | } 40 | 41 | if len(newP.A) != len(ts) { 42 | t.Errorf("Expected timeseries length of %d, but got %d", len(ts), len(newP.A)) 43 | } 44 | 45 | } 46 | 47 | func TestComputePmp(t *testing.T) { 48 | var err error 49 | var p *PMP 50 | 51 | testdata := []struct { 52 | a []float64 53 | b []float64 54 | lb int 55 | ub int 56 | p int 57 | expectedPMP [][]float64 58 | expectedPIdx [][]int 59 | }{ 60 | {[]float64{}, []float64{}, 2, 2, 1, nil, nil}, 61 | {[]float64{1, 1, 1, 1, 1}, []float64{}, 2, 2, 1, nil, nil}, 62 | {[]float64{}, []float64{1, 1, 1, 1, 1}, 2, 2, 1, nil, nil}, 63 | {[]float64{1, 2, 1, 3, 1}, []float64{2, 1, 1, 2, 1, 3, 1, -1, -2}, 2, 2, 1, [][]float64{{0, 0, 0, 0}}, [][]int{{2, 3, 2, 3}}}, 64 | {[]float64{1, 1, 1, 1, 1}, []float64{1, 1, 1, 1, 1, 2, 2, 3, 4, 5}, 2, 2, 1, [][]float64{{2, 2, 2, 2}}, [][]int{{0, 1, 2, 3}}}, 65 | {[]float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}, []float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}, 4, 4, 1, 66 | [][]float64{{0, 0, 0, 0, 0, 0, 0, 0, 0}}, 67 | [][]int{{0, 1, 2, 3, 4, 5, 6, 7, 8}}}, 68 | {[]float64{0, 1, 1, 1, 0, 0, 2, 1, 0, 0, 2, 1}, nil, 4, 4, 1, 69 | [][]float64{{1.9550, 1.8388, 0.8739, 0, 0, 1.9550, 0.8739, 0, 0}}, 70 | [][]int{{4, 2, 6, 7, 8, 1, 2, 3, 4}}}, 71 | {[]float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}, nil, 4, 4, 1, 72 | [][]float64{{0.014355, 0.014355, 0.029138, 0.029138, 0.014355, 0.014355, 0.029138, 0.029138, 0.029138}}, 73 | [][]int{{4, 5, 6, 7, 0, 1, 2, 3, 4}}}, 74 | {[]float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}, nil, 4, 4, 2, 75 | [][]float64{{0.014355, 0.014355, 0.029138, 0.029138, 0.014355, 0.014355, 0.029138, 0.029138, 0.029138}}, 76 | [][]int{{4, 5, 6, 7, 0, 1, 2, 3, 4}}}, 77 | {[]float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}, nil, 4, 4, 4, 78 | [][]float64{{0.014355, 0.014355, 0.029138, 0.029138, 0.014355, 0.014355, 0.029138, 0.029138, 0.029138}}, 79 | [][]int{{4, 5, 6, 7, 0, 1, 2, 3, 4}}}, 80 | {[]float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}, nil, 4, 4, 100, 81 | [][]float64{{0.014355, 0.014355, 0.029138, 0.029138, 0.014355, 0.014355, 0.029138, 0.029138, 0.029138}}, 82 | [][]int{{4, 5, 6, 7, 0, 1, 2, 3, 4}}}, 83 | {[]float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}, nil, 3, 5, 1, 84 | [][]float64{ 85 | {0.015225, 0.015225, 0.000000, 0.000000, 0.015225, 0.015225, 0.000000, 0.000000, 0.030899, 0.030899}, 86 | {0.014355, 0.014355, 0.029138, 0.029138, 0.014355, 0.014355, 0.029138, 0.029138, 0.029138}, 87 | {0.014651, 0.029742, 0.033992, 0.029742, 0.014651, 0.029742, 0.033992, 0.029742}, 88 | }, 89 | [][]int{ 90 | {4, 5, 6, 7, 0, 1, 2, 3, 4, 5}, 91 | {4, 5, 6, 7, 0, 1, 2, 3, 4}, 92 | {4, 5, 6, 7, 0, 1, 2, 3}, 93 | }}, 94 | } 95 | 96 | for _, d := range testdata { 97 | p, err = NewPMP(d.a, d.b) 98 | if err != nil { 99 | if d.expectedPMP == nil { 100 | // Got an error while creating a new matrix profile 101 | continue 102 | } else { 103 | t.Errorf("Did not expect an error, %v, while creating new mp for %v", err, d) 104 | return 105 | } 106 | } 107 | 108 | o := NewPMPOpts(d.lb, d.ub) 109 | o.MPOpts.NJobs = d.p 110 | err = p.Compute(o) 111 | if err != nil { 112 | if d.expectedPMP == nil { 113 | // Got an error while z normalizing and expected an error 114 | continue 115 | } else { 116 | t.Errorf("Did not expect an error, %v, while calculating for %v", err, d) 117 | break 118 | } 119 | } 120 | if d.expectedPMP == nil { 121 | t.Errorf("Expected an invalid calculation, %+v", d) 122 | break 123 | } 124 | 125 | if len(p.PMP) != len(d.expectedPMP) { 126 | t.Errorf("Expected %d elements, but got %d, %+v", len(d.expectedPMP), len(p.PMP), d) 127 | return 128 | } 129 | for j := 0; j < len(p.PMP); j++ { 130 | if len(p.PMP[j]) != len(d.expectedPMP[j]) { 131 | t.Errorf("Expected %d elements, but got %d, %+v", len(d.expectedPMP[j]), len(p.PMP[j]), d) 132 | return 133 | } 134 | for i := 0; i < len(p.PMP[j]); i++ { 135 | if math.Abs(p.PMP[j][i]-d.expectedPMP[j][i]) > 1e-4 { 136 | t.Errorf("Expected\n%.6f, but got\n%.6f for\n%+v", d.expectedPMP[j], p.PMP[j], d) 137 | break 138 | } 139 | } 140 | for i := 0; i < len(p.PIdx[j]); i++ { 141 | if math.Abs(float64(p.PIdx[j][i]-d.expectedPIdx[j][i])) > 1e-7 { 142 | t.Errorf("Expected %d,\nbut got\n%v for\n%+v", d.expectedPIdx[j], p.PIdx[j], d) 143 | break 144 | } 145 | } 146 | } 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /siggen/siggen.go: -------------------------------------------------------------------------------- 1 | // Package siggen provides basic timeseries generation wrappers 2 | package siggen 3 | 4 | import ( 5 | "math" 6 | "math/rand" 7 | ) 8 | 9 | // Sin produces a sin wave with a given amplitude, frequency, 10 | // phase, sampleRate and duration in seconds 11 | func Sin(amp, freq, phase, offset, sampleRate, durationSec float64) []float64 { 12 | nsamp := int(sampleRate * durationSec) 13 | out := make([]float64, nsamp) 14 | for i := 0; i < nsamp; i++ { 15 | out[i] = amp*math.Sin(2*math.Pi*freq*float64(i)/sampleRate+phase) + offset 16 | } 17 | return out 18 | } 19 | 20 | // Sawtooth produces a sawtooth wave with a given amplitude, 21 | // frequency, phase, sampleRate and duration in seconds 22 | func Sawtooth(amp, freq, phase, offset, sampleRate, durationSec float64) []float64 { 23 | nsamp := int(sampleRate * durationSec) 24 | out := make([]float64, nsamp) 25 | for i := 0; i < nsamp; i++ { 26 | out[i] = -2*amp/math.Pi*math.Atan(1.0/math.Tan(float64(i)/sampleRate*math.Pi*freq)) + offset 27 | } 28 | return out 29 | } 30 | 31 | // Square produces a square wave with a given amplitude, 32 | // frequency, phase, sampleRate and duration in seconds 33 | func Square(amp, freq, phase, offset, sampleRate, durationSec float64) []float64 { 34 | nsamp := int(sampleRate * durationSec) 35 | out := make([]float64, nsamp) 36 | var val float64 37 | for i := 0; i < nsamp; i++ { 38 | val = math.Sin(2*math.Pi*freq*float64(i)/sampleRate + phase) 39 | switch { 40 | case val > 0: 41 | out[i] = amp + offset 42 | case val < 0: 43 | out[i] = -amp + offset 44 | default: 45 | out[i] = offset 46 | } 47 | } 48 | return out 49 | } 50 | 51 | // Rect creates a rectangular signal with the specified 52 | // width and offset from center. 53 | func Rect(amp, offset, width, sampleRate, durationSec float64) []float64 { 54 | nsamp := int(sampleRate * durationSec) 55 | out := make([]float64, nsamp) 56 | start := int((offset - width/2.0) / sampleRate) 57 | end := int((offset + width/2.0) / sampleRate) 58 | if start < 0 { 59 | start = 0 60 | } 61 | if start > nsamp-1 { 62 | start = nsamp - 1 63 | } 64 | if end < 0 { 65 | end = 0 66 | } 67 | if end > nsamp-1 { 68 | end = nsamp - 1 69 | } 70 | 71 | for i := start; i < end; i++ { 72 | out[i] = amp 73 | } 74 | 75 | return out 76 | } 77 | 78 | // Line creates a line given a slope, offset and number of data points 79 | func Line(slope, offset float64, n int) []float64 { 80 | out := make([]float64, n) 81 | for i := 0; i < n; i++ { 82 | out[i] = slope*float64(i) + offset 83 | } 84 | return out 85 | } 86 | 87 | // Noise creates a noise signal centered around 0 88 | func Noise(amp float64, n int) []float64 { 89 | out := make([]float64, n) 90 | for i := 0; i < n; i++ { 91 | out[i] = amp * (rand.Float64() - 0.5) 92 | } 93 | return out 94 | } 95 | 96 | // Add adds one or more slices of floats together returning a signal 97 | // with a length equal to the longest signal passed in 98 | func Add(sig ...[]float64) []float64 { 99 | var maxLen int 100 | for _, signal := range sig { 101 | if len(signal) > maxLen { 102 | maxLen = len(signal) 103 | } 104 | } 105 | out := make([]float64, maxLen) 106 | for _, signal := range sig { 107 | for i, val := range signal { 108 | out[i] += val 109 | } 110 | } 111 | return out 112 | } 113 | 114 | // Append adds a series of signals to the input signal extending the length 115 | func Append(in []float64, sig ...[]float64) []float64 { 116 | totalLen := len(in) 117 | for _, signal := range sig { 118 | totalLen += len(signal) 119 | } 120 | 121 | out := make([]float64, totalLen) 122 | currIdx := 0 123 | copy(out[:len(in)], in) 124 | currIdx += len(in) 125 | for _, signal := range sig { 126 | copy(out[currIdx:currIdx+len(signal)], signal) 127 | currIdx += len(signal) 128 | } 129 | return out 130 | } 131 | -------------------------------------------------------------------------------- /siggen/siggen_test.go: -------------------------------------------------------------------------------- 1 | package siggen 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestSin(t *testing.T) { 8 | testdata := []struct { 9 | fs float64 10 | duration float64 11 | expectedN int 12 | }{ 13 | {0, 10, 0}, 14 | {100, 1, 100}, 15 | {100, 1.5, 150}, 16 | {100, 0, 0}, 17 | } 18 | 19 | var out []float64 20 | for _, d := range testdata { 21 | out = Sin(1, 5, 0, 0, d.fs, d.duration) 22 | if len(out) != d.expectedN { 23 | t.Errorf("expected output length, %d, but got, %d, for %v", d.expectedN, len(out), d) 24 | } 25 | } 26 | } 27 | 28 | func TestSquare(t *testing.T) { 29 | testdata := []struct { 30 | fs float64 31 | duration float64 32 | expectedN int 33 | }{ 34 | {0, 10, 0}, 35 | {100, 1, 100}, 36 | {100, 1.5, 150}, 37 | {100, 0, 0}, 38 | } 39 | 40 | var out []float64 41 | for _, d := range testdata { 42 | out = Square(1, 5, 0, 0, d.fs, d.duration) 43 | if len(out) != d.expectedN { 44 | t.Errorf("expected output length, %d, but got, %d, for %v", d.expectedN, len(out), d) 45 | } 46 | } 47 | } 48 | 49 | func TestSawtooth(t *testing.T) { 50 | testdata := []struct { 51 | fs float64 52 | duration float64 53 | expectedN int 54 | }{ 55 | {0, 10, 0}, 56 | {100, 1, 100}, 57 | {100, 1.5, 150}, 58 | {100, 0, 0}, 59 | } 60 | 61 | var out []float64 62 | for _, d := range testdata { 63 | out = Sawtooth(1, 5, 0, 0, d.fs, d.duration) 64 | if len(out) != d.expectedN { 65 | t.Errorf("expected output length, %d, but got, %d, for %v", d.expectedN, len(out), d) 66 | } 67 | } 68 | } 69 | 70 | func TestLine(t *testing.T) { 71 | testdata := []struct { 72 | n int 73 | expectedN int 74 | }{ 75 | {0, 0}, 76 | {100, 100}, 77 | } 78 | 79 | var out []float64 80 | for _, d := range testdata { 81 | out = Line(1, 0, d.n) 82 | if len(out) != d.expectedN { 83 | t.Errorf("expected output length, %d, but got, %d, for %v", d.expectedN, len(out), d) 84 | } 85 | } 86 | } 87 | 88 | func TestRect(t *testing.T) { 89 | testdata := []struct { 90 | fs float64 91 | duration float64 92 | expectedN int 93 | }{ 94 | {0, 10, 0}, 95 | {100, 1, 100}, 96 | {100, 1.5, 150}, 97 | {100, 0, 0}, 98 | } 99 | 100 | var out []float64 101 | for _, d := range testdata { 102 | out = Rect(0.2, 1, 0.25, d.fs, d.duration) 103 | if len(out) != d.expectedN { 104 | t.Errorf("expected output length, %d, but got, %d, for %v", d.expectedN, len(out), d) 105 | } 106 | } 107 | } 108 | 109 | func TestNoise(t *testing.T) { 110 | testdata := []struct { 111 | n int 112 | expectedN int 113 | }{ 114 | {0, 0}, 115 | {100, 100}, 116 | } 117 | 118 | var out []float64 119 | for _, d := range testdata { 120 | out = Noise(0, d.n) 121 | if len(out) != d.expectedN { 122 | t.Errorf("expected output length, %d, but got, %d, for %v", d.expectedN, len(out), d) 123 | } 124 | } 125 | } 126 | 127 | func TestAdd(t *testing.T) { 128 | testdata := []struct { 129 | sig1 []float64 130 | sig2 []float64 131 | expectedOut []float64 132 | }{ 133 | {Line(0, 2, 100), Line(0, 5, 100), Line(0, 7, 100)}, 134 | {Line(0, 2, 200), Line(0, 5, 100), Append(Line(0, 7, 100), Line(0, 2, 100))}, 135 | {Line(0, 2, 100), Line(0, 5, 200), Append(Line(0, 7, 100), Line(0, 5, 100))}, 136 | } 137 | 138 | var out []float64 139 | for _, d := range testdata { 140 | out = Add(d.sig1, d.sig2) 141 | if len(out) != len(d.expectedOut) { 142 | t.Errorf("expected output length, %d, but got, %d, for %v", len(d.expectedOut), len(out), d) 143 | break 144 | } 145 | 146 | for i, val := range out { 147 | if val != d.expectedOut[i] { 148 | t.Errorf("expected value of %.3f at index %d, but got %.3f for %v", d.expectedOut[i], i, val, d) 149 | break 150 | } 151 | } 152 | } 153 | } 154 | 155 | func TestAppend(t *testing.T) { 156 | testdata := []struct { 157 | sig1 []float64 158 | sig2 []float64 159 | expectedN int 160 | }{ 161 | {Line(0, 2, 100), Line(0, 5, 100), 200}, 162 | {Line(0, 2, 200), Line(0, 5, 100), 300}, 163 | } 164 | 165 | var out []float64 166 | for _, d := range testdata { 167 | out = Append(d.sig1, d.sig2) 168 | if len(out) != d.expectedN { 169 | t.Errorf("expected output length, %d, but got, %d, for %v", d.expectedN, len(out), d) 170 | break 171 | } 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /util/util.go: -------------------------------------------------------------------------------- 1 | // Package util is a set of utility functions that are used throughout the matrixprofile package. 2 | package util 3 | 4 | import ( 5 | "fmt" 6 | "math" 7 | 8 | "gonum.org/v1/gonum/stat" 9 | ) 10 | 11 | // ZNormalize computes a z-normalized version of a slice of floats. 12 | // This is represented by y[i] = (x[i] - mean(x))/std(x) 13 | func ZNormalize(ts []float64) ([]float64, error) { 14 | var i int 15 | 16 | if len(ts) == 0 { 17 | return nil, fmt.Errorf("slice does not have any data") 18 | } 19 | 20 | m := stat.Mean(ts, nil) 21 | 22 | out := make([]float64, len(ts)) 23 | for i = 0; i < len(ts); i++ { 24 | out[i] = ts[i] - m 25 | } 26 | 27 | var std float64 28 | for _, val := range out { 29 | std += val * val 30 | } 31 | std = math.Sqrt(std / float64(len(out))) 32 | 33 | if std == 0 { 34 | return out, fmt.Errorf("standard deviation is zero") 35 | } 36 | 37 | for i = 0; i < len(ts); i++ { 38 | out[i] = out[i] / std 39 | } 40 | 41 | return out, nil 42 | } 43 | 44 | // MovMeanStd computes the mean and standard deviation of each sliding 45 | // window of m over a slice of floats. This is done by one pass through 46 | // the data and keeping track of the cumulative sum and cumulative sum 47 | // squared. s between these at intervals of m provide a total of O(n) 48 | // calculations for the standard deviation of each window of size m for 49 | // the time series ts. 50 | func MovMeanStd(ts []float64, m int) ([]float64, []float64, error) { 51 | if m <= 1 { 52 | return nil, nil, fmt.Errorf("length of slice must be greater than 1") 53 | } 54 | 55 | if m > len(ts) { 56 | return nil, nil, fmt.Errorf("m cannot be greater than length of slice") 57 | } 58 | 59 | var i int 60 | 61 | c := make([]float64, len(ts)+1) 62 | csqr := make([]float64, len(ts)+1) 63 | for i = 0; i < len(ts)+1; i++ { 64 | if i == 0 { 65 | c[i] = 0 66 | csqr[i] = 0 67 | } else { 68 | c[i] = ts[i-1] + c[i-1] 69 | csqr[i] = ts[i-1]*ts[i-1] + csqr[i-1] 70 | } 71 | } 72 | 73 | mean := make([]float64, len(ts)-m+1) 74 | std := make([]float64, len(ts)-m+1) 75 | for i = 0; i < len(ts)-m+1; i++ { 76 | mean[i] = (c[i+m] - c[i]) / float64(m) 77 | std[i] = math.Sqrt((csqr[i+m]-csqr[i])/float64(m) - mean[i]*mean[i]) 78 | } 79 | 80 | return mean, std, nil 81 | } 82 | 83 | // ApplyExclusionZone performs an in place operation on a given matrix 84 | // profile setting distances around an index to +Inf 85 | func ApplyExclusionZone(profile []float64, idx, zoneSize int) { 86 | startIdx := 0 87 | if idx-zoneSize > startIdx { 88 | startIdx = idx - zoneSize 89 | } 90 | endIdx := len(profile) 91 | if idx+zoneSize < endIdx { 92 | endIdx = idx + zoneSize 93 | } 94 | for i := startIdx; i < endIdx; i++ { 95 | profile[i] = math.Inf(1) 96 | } 97 | } 98 | 99 | func MuInvN(a []float64, w int) ([]float64, []float64) { 100 | mu := Sum2s(a, w) 101 | sig := make([]float64, len(a)-w+1) 102 | h := make([]float64, len(a)) 103 | r := make([]float64, len(a)) 104 | 105 | var mu_a, c float64 106 | var a1, a2, a3, p, s, x, z float64 107 | bigNum := math.Pow(2.0, 27.0) + 1 108 | for i := 0; i < len(mu); i++ { 109 | for j := i; j < i+w; j++ { 110 | mu_a = a[j] - mu[i] 111 | h[j] = mu_a * mu_a 112 | 113 | c = bigNum * mu_a 114 | a1 = c - (c - mu_a) 115 | a2 = mu_a - a1 116 | a3 = a1 * a2 117 | r[j] = a2*a2 - (((h[j] - a1*a1) - a3) - a3) 118 | } 119 | 120 | p = h[i] 121 | s = r[i] 122 | 123 | for j := i + 1; j < i+w; j++ { 124 | x = p + h[j] 125 | z = x - p 126 | s += ((p - (x - z)) + (h[j] - z)) + r[j] 127 | p = x 128 | } 129 | 130 | if p+s == 0 { 131 | sig[i] = 0 132 | } else { 133 | sig[i] = 1 / math.Sqrt(p+s) 134 | } 135 | } 136 | return mu, sig 137 | } 138 | 139 | func Sum2s(a []float64, w int) []float64 { 140 | if len(a) < w { 141 | return nil 142 | } 143 | p := a[0] 144 | s := 0.0 145 | var x, z float64 146 | for i := 1; i < w; i++ { 147 | x = p + a[i] 148 | z = x - p 149 | s += (p - (x - z)) + (a[i] - z) 150 | p = x 151 | } 152 | 153 | res := make([]float64, len(a)-w+1) 154 | res[0] = (p + s) / float64(w) 155 | for i := w; i < len(a); i++ { 156 | x = p - a[i-w] 157 | z = x - p 158 | s += (p - (x - z)) - (a[i-w] + z) 159 | p = x 160 | 161 | x = p + a[i] 162 | z = x - p 163 | s += (p - (x - z)) + (a[i] - z) 164 | p = x 165 | 166 | res[i-w+1] = (p + s) / float64(w) 167 | } 168 | 169 | return res 170 | } 171 | 172 | func BinarySplit(lb, ub int) []int { 173 | if ub < lb { 174 | return []int{} 175 | } 176 | res := make([]int, 1, ub-lb+1) 177 | res[0] = lb 178 | if ub == lb { 179 | return res 180 | } 181 | 182 | ranges := []*idxRange{&idxRange{lb + 1, ub}} 183 | 184 | var r *idxRange 185 | var mid int 186 | for { 187 | if len(ranges) == 0 { 188 | break 189 | } 190 | // pop first element 191 | r = ranges[0] 192 | copy(ranges, ranges[1:]) 193 | ranges = ranges[:len(ranges)-1] 194 | 195 | mid = (r.upper + r.lower) / 2 196 | res = append(res, mid) 197 | 198 | if r.upper < r.lower { 199 | continue 200 | } 201 | 202 | l, r := split(r.lower, r.upper, mid) 203 | if l != nil { 204 | ranges = append(ranges, l) 205 | } 206 | if r != nil { 207 | ranges = append(ranges, r) 208 | } 209 | } 210 | return res 211 | } 212 | 213 | type idxRange struct { 214 | lower int 215 | upper int 216 | } 217 | 218 | func split(lower, upper, mid int) (*idxRange, *idxRange) { 219 | var l *idxRange 220 | var r *idxRange 221 | 222 | if lower < upper { 223 | if mid-1 >= lower { 224 | l = &idxRange{lower, mid - 1} 225 | } 226 | if upper >= mid+1 { 227 | r = &idxRange{mid + 1, upper} 228 | } 229 | } 230 | 231 | return l, r 232 | } 233 | 234 | // Batch indicates which index to start at and how many to process from that 235 | // index. 236 | type Batch struct { 237 | Idx int 238 | Size int 239 | } 240 | 241 | // DiagBatchingScheme computes a more balanced batching scheme based on the 242 | // diagonal nature of computing matrix profiles. Later batches get more to 243 | // work on since those operate on less data in the matrix. 244 | func DiagBatchingScheme(l, p int) []Batch { 245 | numElem := float64(l*(l+1)) / float64(2*p) 246 | batchScheme := make([]Batch, p) 247 | var pi, sum int 248 | for i := 0; i < l+1; i++ { 249 | sum += i 250 | batchScheme[p-pi-1].Size += 1 251 | if float64(sum) > numElem { 252 | sum = 0 253 | pi += 1 254 | } 255 | } 256 | 257 | for i := 1; i < p; i++ { 258 | batchScheme[i].Idx = batchScheme[i-1].Idx + batchScheme[i-1].Size 259 | } 260 | 261 | return batchScheme 262 | } 263 | 264 | // P2E converts a slice of pearson correlation values to euclidean distances. This 265 | // is only valid for z-normalized time series. 266 | func P2E(mp []float64, w int) { 267 | for i := 0; i < len(mp); i++ { 268 | // caps pearson correlation to 1 in case there are floating point accumulated errors 269 | if mp[i] > 1 { 270 | mp[i] = 1 271 | } 272 | mp[i] = math.Sqrt(2 * float64(w) * (1 - mp[i])) 273 | } 274 | } 275 | 276 | // E2P converts a slice of euclidean distances to pearson correlation values. This 277 | // is only valid for z-normalized time series. Negative pearson correlation values will not be 278 | // discovered 279 | func E2P(mp []float64, w int) { 280 | for i := 0; i < len(mp); i++ { 281 | mp[i] = 1 - mp[i]*mp[i]/(2*float64(w)) 282 | // caps pearson correlation to 1 in case there are floating point accumulated errors 283 | if mp[i] > 1 { 284 | mp[i] = 1 285 | } 286 | if mp[i] < 0 { 287 | mp[i] = 0 288 | } 289 | } 290 | } 291 | -------------------------------------------------------------------------------- /util/util_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "math" 5 | "testing" 6 | ) 7 | 8 | func TestZNormalize(t *testing.T) { 9 | var out []float64 10 | var err error 11 | 12 | testdata := []struct { 13 | data []float64 14 | expected []float64 15 | }{ 16 | {[]float64{}, nil}, 17 | {[]float64{1, 1, 1, 1}, nil}, 18 | {[]float64{-1, 1, -1, 1}, []float64{-1, 1, -1, 1}}, 19 | {[]float64{7, 5, 5, 7}, []float64{1, -1, -1, 1}}, 20 | } 21 | 22 | for _, d := range testdata { 23 | out, err = ZNormalize(d.data) 24 | if err != nil && d.expected == nil { 25 | // Got an error and expected an error 26 | continue 27 | } 28 | if d.expected == nil { 29 | t.Errorf("Expected an invalid standard deviation of 0, %v", d) 30 | } 31 | if len(out) != len(d.expected) { 32 | t.Errorf("Expected %d elements, but got %d, %v", len(d.expected), len(out), d) 33 | } 34 | for i := 0; i < len(out); i++ { 35 | if math.Abs(out[i]-d.expected[i]) > 1e-7 { 36 | t.Errorf("Expected %v, but got %v for %v", d.expected, out, d) 37 | break 38 | } 39 | } 40 | } 41 | } 42 | 43 | func TestMovmeanstd(t *testing.T) { 44 | var err error 45 | var mean, std []float64 46 | 47 | testdata := []struct { 48 | data []float64 49 | m int 50 | expectedMean []float64 51 | expectedStd []float64 52 | }{ 53 | {[]float64{}, 4, nil, nil}, 54 | {[]float64{}, 0, nil, nil}, 55 | {[]float64{1, 1, 1, 1}, 0, nil, nil}, 56 | {[]float64{1, 1, 1, 1}, 4, []float64{1}, []float64{0}}, 57 | {[]float64{1, 1, 1, 1}, 2, []float64{1, 1, 1}, []float64{0, 0, 0}}, 58 | {[]float64{-1, -1, -1, -1}, 2, []float64{-1, -1, -1}, []float64{0, 0, 0}}, 59 | {[]float64{1, -1, -1, 1}, 2, []float64{0, -1, 0}, []float64{1, 0, 1}}, 60 | {[]float64{1, 2, 4, 8}, 2, []float64{1.5, 3, 6}, []float64{0.5, 1, 2}}, 61 | } 62 | 63 | for _, d := range testdata { 64 | mean, std, err = MovMeanStd(d.data, d.m) 65 | if err != nil { 66 | if d.expectedStd == nil && d.expectedMean == nil { 67 | // Got an error while calculating and expected an error 68 | continue 69 | } else { 70 | t.Errorf("Did not expect an error, %v for %v", err, d) 71 | break 72 | } 73 | } 74 | if d.expectedStd == nil { 75 | t.Errorf("Expected an invalid moving standard deviation, %v", d) 76 | } 77 | if len(mean) != len(d.expectedMean) { 78 | t.Errorf("Expected %d elements, but got %d, %v", len(d.expectedMean), len(mean), d) 79 | } 80 | for i := 0; i < len(mean); i++ { 81 | if math.Abs(mean[i]-d.expectedMean[i]) > 1e-7 { 82 | t.Errorf("Expected %v, but got %v for %v", d.expectedMean, mean, d) 83 | break 84 | } 85 | } 86 | 87 | if len(std) != len(d.expectedStd) { 88 | t.Errorf("Expected %d elements, but got %d, %v", len(d.expectedStd), len(std), d) 89 | } 90 | for i := 0; i < len(std); i++ { 91 | if math.Abs(std[i]-d.expectedStd[i]) > 1e-7 { 92 | t.Errorf("Expected %v, but got %v for %v", d.expectedStd, std, d) 93 | break 94 | } 95 | } 96 | 97 | } 98 | } 99 | 100 | func TestMuInvN(t *testing.T) { 101 | testdata := []struct { 102 | a []float64 103 | w int 104 | expectedMu []float64 105 | expectedSig []float64 106 | }{ 107 | {[]float64{2, 2, 2, 2, 2, 2}, 3, []float64{2, 2, 2, 2}, []float64{0, 0, 0, 0}}, 108 | {[]float64{2, 4, 3, 5, 4, 6}, 3, []float64{3, 4, 4, 5}, []float64{math.Sqrt(2) / 2, math.Sqrt(2) / 2, math.Sqrt(2) / 2, math.Sqrt(2) / 2}}, 109 | {[]float64{1, 1, 1, 1}, 4, []float64{1}, []float64{0}}, 110 | {[]float64{1, 1, 1, 1}, 2, []float64{1, 1, 1}, []float64{0, 0, 0}}, 111 | {[]float64{-1, -1, -1, -1}, 2, []float64{-1, -1, -1}, []float64{0, 0, 0}}, 112 | } 113 | 114 | for _, d := range testdata { 115 | mu, sig := MuInvN(d.a, d.w) 116 | if len(mu) != len(d.expectedMu) { 117 | t.Errorf("Expected %d elements of mu but got %d", len(d.expectedMu), len(mu)) 118 | continue 119 | } 120 | if len(sig) != len(d.expectedSig) { 121 | t.Errorf("Expected %d elements of sig but got %d", len(d.expectedSig), len(sig)) 122 | continue 123 | } 124 | for i := 0; i < len(mu); i++ { 125 | if mu[i] != d.expectedMu[i] { 126 | t.Errorf("Expected mu: %.3f, but got %.3f", d.expectedMu, mu) 127 | break 128 | } 129 | if math.Abs(sig[i]-d.expectedSig[i]) > 1e-9 { 130 | t.Errorf("Expected sig: %.9f, but got %.9f", d.expectedSig, sig) 131 | break 132 | } 133 | } 134 | } 135 | } 136 | 137 | func TestBinarySplit(t *testing.T) { 138 | testdata := []struct { 139 | lb int 140 | ub int 141 | expected []int 142 | }{ 143 | {4, 0, []int{}}, 144 | {1, 1, []int{1}}, 145 | {0, 1, []int{0, 1}}, 146 | {0, 4, []int{0, 2, 1, 3, 4}}, 147 | {0, 9, []int{0, 5, 2, 7, 1, 3, 6, 8, 4, 9}}, 148 | {0, 16, []int{0, 8, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13, 15, 16}}, 149 | {7, 15, []int{7, 11, 9, 13, 8, 10, 12, 14, 15}}, 150 | } 151 | 152 | for _, d := range testdata { 153 | res := BinarySplit(d.lb, d.ub) 154 | if len(res) != len(d.expected) { 155 | t.Errorf("Expected result length of %d, but got %d for %v", len(d.expected), len(res), d) 156 | break 157 | } 158 | for i, v := range res { 159 | if v != d.expected[i] { 160 | t.Errorf("Expected value %d at index, %d, but got %d for %v", d.expected[i], i, v, d) 161 | break 162 | } 163 | } 164 | } 165 | } 166 | 167 | func TestDiagBatchingScheme(t *testing.T) { 168 | testdata := []struct { 169 | l, p int 170 | expected []Batch 171 | }{ 172 | {33, 4, []Batch{{0, 3}, {3, 6}, {9, 7}, {16, 18}}}, 173 | } 174 | 175 | for _, d := range testdata { 176 | res := DiagBatchingScheme(d.l, d.p) 177 | if len(res) != len(d.expected) { 178 | t.Errorf("Expected result length of %d, but got %d for %v", len(d.expected), len(res), d) 179 | break 180 | } 181 | for i, v := range res { 182 | if v.Idx != d.expected[i].Idx { 183 | t.Errorf("Expected Idx of %d at index, %d, but got %d for %v", d.expected[i], i, v, d) 184 | break 185 | } 186 | if v.Size != d.expected[i].Size { 187 | t.Errorf("Expected Idx of %d at index, %d, but got %d for %v", d.expected[i], i, v, d) 188 | break 189 | } 190 | } 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /visualize.go: -------------------------------------------------------------------------------- 1 | package matrixprofile 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "gonum.org/v1/plot" 8 | "gonum.org/v1/plot/plotter" 9 | "gonum.org/v1/plot/plotutil" 10 | "gonum.org/v1/plot/vg" 11 | "gonum.org/v1/plot/vg/draw" 12 | "gonum.org/v1/plot/vg/vgimg" 13 | ) 14 | 15 | func points(a []float64, n int) plotter.XYs { 16 | pts := make(plotter.XYs, n) 17 | for i := 0; i < n; i++ { 18 | pts[i].X = float64(i) 19 | if i < len(a) { 20 | pts[i].Y = a[i] 21 | } 22 | } 23 | return pts 24 | } 25 | 26 | func createPlot(pts []plotter.XYs, labels []string, title string) (*plot.Plot, error) { 27 | if labels != nil && len(pts) != len(labels) { 28 | return nil, fmt.Errorf("number of XYs, %d, does not match number of labels, %d", len(pts), len(labels)) 29 | } 30 | 31 | p, err := plot.New() 32 | if err != nil { 33 | return p, err 34 | } 35 | 36 | p.Title.Text = title 37 | for i := 0; i < len(pts); i++ { 38 | line, points, err := plotter.NewLinePoints(pts[i]) 39 | if err != nil { 40 | return p, err 41 | } 42 | line.Color = plotutil.Color(i) 43 | points.Color = plotutil.Color(i) 44 | points.Shape = nil 45 | p.Add(line, points) 46 | if labels != nil { 47 | p.Legend.Add(labels[i], line) 48 | } 49 | } 50 | return p, err 51 | } 52 | 53 | func plotMP(sigPts, mpPts plotter.XYs, motifPts [][]plotter.XYs, discordPts []plotter.XYs, discordLabels []string, filename string) error { 54 | var err error 55 | rows, cols := len(motifPts), 2 56 | if rows < 4 { 57 | rows = 4 58 | } 59 | plots := make([][]*plot.Plot, rows) 60 | 61 | for i := 0; i < rows; i++ { 62 | plots[i] = make([]*plot.Plot, cols) 63 | } 64 | 65 | plots[0][0], err = createPlot([]plotter.XYs{sigPts}, nil, "signal") 66 | if err != nil { 67 | return err 68 | } 69 | 70 | plots[1][0], err = createPlot([]plotter.XYs{mpPts}, nil, "matrix profile") 71 | if err != nil { 72 | return err 73 | } 74 | 75 | plots[2][0], err = createPlot(discordPts, discordLabels, "discords") 76 | if err != nil { 77 | return err 78 | } 79 | 80 | for i := 0; i < len(motifPts); i++ { 81 | plots[i][1], err = createPlot(motifPts[i], nil, fmt.Sprintf("motif %d", i)) 82 | if err != nil { 83 | return err 84 | } 85 | } 86 | 87 | img := vgimg.New(vg.Points(1200), vg.Points(600)) 88 | dc := draw.New(img) 89 | 90 | t := draw.Tiles{ 91 | Rows: rows, 92 | Cols: cols, 93 | } 94 | 95 | canvases := plot.Align(plots, t, dc) 96 | for j := 0; j < rows; j++ { 97 | for i := 0; i < cols; i++ { 98 | if plots[j][i] != nil { 99 | plots[j][i].Draw(canvases[j][i]) 100 | } 101 | } 102 | } 103 | 104 | w, err := os.Create(filename) 105 | if err != nil { 106 | return err 107 | } 108 | 109 | png := vgimg.PngCanvas{Canvas: img} 110 | _, err = png.WriteTo(w) 111 | return err 112 | } 113 | 114 | func plotKMP(sigPts, mpPts []plotter.XYs, filename string) error { 115 | var err error 116 | 117 | rows, cols := len(sigPts)*2, 1 118 | 119 | plots := make([][]*plot.Plot, rows) 120 | 121 | for i := 0; i < len(sigPts)*2; i++ { 122 | plots[i] = make([]*plot.Plot, cols) 123 | } 124 | 125 | for i := 0; i < len(sigPts); i++ { 126 | plots[i][0], err = createPlot([]plotter.XYs{sigPts[i]}, nil, fmt.Sprintf("signal%d", i)) 127 | if err != nil { 128 | return err 129 | } 130 | } 131 | 132 | for i := 0; i < len(sigPts); i++ { 133 | plots[len(sigPts)+i][0], err = createPlot([]plotter.XYs{mpPts[i]}, nil, fmt.Sprintf("mp%d", i)) 134 | if err != nil { 135 | return err 136 | } 137 | } 138 | 139 | img := vgimg.New(vg.Points(600), vg.Points(600)) 140 | dc := draw.New(img) 141 | 142 | t := draw.Tiles{ 143 | Rows: rows, 144 | Cols: cols, 145 | } 146 | 147 | canvases := plot.Align(plots, t, dc) 148 | for j := 0; j < rows; j++ { 149 | for i := 0; i < cols; i++ { 150 | if plots[j][i] != nil { 151 | plots[j][i].Draw(canvases[j][i]) 152 | } 153 | } 154 | } 155 | 156 | w, err := os.Create(filename) 157 | if err != nil { 158 | return err 159 | } 160 | 161 | png := vgimg.PngCanvas{Canvas: img} 162 | _, err = png.WriteTo(w) 163 | return err 164 | } 165 | --------------------------------------------------------------------------------