├── .github
    └── workflows
    │   └── publish-documentation.yml
├── .gitignore
├── .gitlab-ci.yml
├── LICENSE.txt
├── README.md
├── distancematrix
    ├── __init__.py
    ├── calculator.py
    ├── consumer
    │   ├── __init__.py
    │   ├── abstract_consumer.py
    │   ├── contextmanager.py
    │   ├── contextual_matrix_profile.py
    │   ├── distance_matrix.py
    │   ├── matrix_profile_lr.py
    │   ├── multidimensional_matrix_profile_lr.py
    │   ├── radius_profile.py
    │   └── threshold_counter.py
    ├── generator
    │   ├── __init__.py
    │   ├── abstract_generator.py
    │   ├── euclidean.py
    │   ├── filter_generator.py
    │   └── znorm_euclidean.py
    ├── insights.py
    ├── interrupt_util.py
    ├── math_tricks.py
    ├── ostinato.py
    ├── ringbuffer.py
    ├── tests
    │   ├── __init__.py
    │   ├── consumer
    │   │   ├── __init__.py
    │   │   ├── test_contextmanager.py
    │   │   ├── test_contextual_matrix_profile.py
    │   │   ├── test_distance_matrix.py
    │   │   ├── test_matrix_profile_lr.py
    │   │   ├── test_multidimensional_matrix_profile_lr.py
    │   │   ├── test_radius_profile.py
    │   │   └── test_threshold_counter.py
    │   ├── generator
    │   │   ├── __init__.py
    │   │   ├── mock_generator.py
    │   │   ├── test_euclidean.py
    │   │   ├── test_filter_generator.py
    │   │   └── test_znorm_euclidean.py
    │   ├── test_calculator.py
    │   ├── test_insights.py
    │   ├── test_math_tricks.py
    │   ├── test_ostinato.py
    │   ├── test_ringbuffer.py
    │   ├── test_util.py
    │   └── test_valmod.py
    ├── util.py
    └── valmod.py
├── docs
    ├── .gitignore
    ├── Example_matrix_profile.ipynb
    ├── Makefile
    ├── conf.py
    ├── doc_environment.yml
    ├── examples.rst
    ├── index.rst
    ├── install.md
    └── make.bat
├── setup.py
└── test_environment.yml


/.github/workflows/publish-documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Publish documentation
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 | 
 8 | jobs:
 9 |   # Inspired by https://github.com/conda-incubator/setup-miniconda#usage-examples
10 |   run-tests:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |     - uses: actions/checkout@v2
14 |     - name: Set up test environment
15 |       uses: conda-incubator/setup-miniconda@v2
16 |       with:
17 |         environment-file: test_environment.yml
18 |         activate-environment: test_env
19 |     - name: Run tests
20 |       shell: bash -l {0}
21 |       run: |
22 |         nosetests -v --nocapture
23 | 
24 |   publish-docs:
25 |     needs: run-tests
26 |     runs-on: ubuntu-latest
27 |     steps:
28 |     - uses: actions/checkout@v2
29 |     - name: Set up doc building environment
30 |       uses: conda-incubator/setup-miniconda@v2
31 |       with:
32 |         environment-file: docs/doc_environment.yml
33 |         activate-environment: doc_env
34 |     - name: Build documentation
35 |       shell: bash -l {0}
36 |       working-directory: ./docs
37 |       run: |
38 |         make html
39 |     - name: Push to gh-pages branch
40 |       uses: JamesIves/github-pages-deploy-action@4.1.4
41 |       with:
42 |         branch: gh-pages # The branch the action should deploy to.
43 |         folder: docs/_build/html # The folder the action should deploy.
44 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # IDE
 2 | .idea/
 3 | 
 4 | # Byte-compiled / optimized / DLL files
 5 | __pycache__/
 6 | *.py[cod]
 7 | 
 8 | # C extensions
 9 | *.so
10 | 
11 | # Distribution / packaging
12 | bin/
13 | build/
14 | develop-eggs/
15 | dist/
16 | eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # Installer logs
27 | pip-log.txt
28 | pip-delete-this-directory.txt
29 | 
30 | # Unit test / coverage reports
31 | .tox/
32 | .coverage
33 | .cache
34 | nosetests.xml
35 | coverage.xml
36 | 
37 | # Jupyter notebook checkpoints
38 | .ipynb_checkpoints/


--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
 1 | # Based on: https://henningtimm.gitlab.io/post/gitlab_ci_and_conda/
 2 | 
 3 | image: continuumio/miniconda3:latest
 4 | 
 5 | unittests:
 6 |   script:
 7 |     - apt-get update -q -y
 8 |     - apt-get install -y build-essential
 9 |     - conda env create -f test_environment.yml
10 |     - source activate test_env
11 |     - nosetests -v --nocapture


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Dieter De Paepe, Sofie Van Hoecke
 4 | Ghent University - imec, Belgium
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Series Distance Matrix 
  2 | 
  3 | This library implements the [Series Distance Matrix framework](https://doi.org/10.1016/j.engappai.2020.103487),
  4 | a flexible component-based framework that bundles various [Matrix Profile](https://www.cs.ucr.edu/~eamonn/MatrixProfile.html)
  5 | related techniques.
  6 | These techniques can be used for (time) series mining and analysis. 
  7 | Some example applications include:
  8 | - motif discovery: finding the best (imperfect) matching subsequence pair in a larger series
  9 | - discord discovery: finding the most dissimilar subsequence in a larger series
 10 | - finding repeating subsequences in one or more series (common and consensus motifs)
 11 | - visualizing series
 12 | - finding changing patterns
 13 | - ...
 14 | 
 15 | The **Series Distance Matrix** framework was designed to integrate the various
 16 | Matrix Profile variants that were established over the years.
 17 | It does this by splitting the generation and consumption of
 18 | the all-pair subsequence distances,
 19 | putting the focus on the distance matrix itself.
 20 | This allows for easier and more flexible experiments by
 21 | freely combining components and eliminates the need
 22 | to re-implement algorithms to combine techniques in an efficient way.
 23 | 
 24 | 
 25 | Following core techniques are implemented:
 26 | - Z-normalized Euclidean distance (including noise elimination)
 27 | - Euclidean distance
 28 | - (Left/Right) Matrix Profile
 29 | - Multidimensional Matrix Profile
 30 | - Contextual Matrix Profile
 31 | - Radius Profile
 32 | - Streaming and batch calculation
 33 | 
 34 | 
 35 | Following Matrix Profile related techniques are implemented:
 36 | - Valmod: find the top-1 motif in a series for each subsequence length in a given range
 37 | - Ostinato: find the top-1 (k of n) consensus motif in a collection of series
 38 | - Anytime Ostinato: find the radius profile for a collection of series
 39 | 
 40 | 
 41 | ## Basic Usage
 42 | 
 43 | Calculate a standard Matrix Profile using z-normalized Euclidean distance over a single series.
 44 | 
 45 | ```python
 46 | import numpy as np
 47 | from distancematrix.generator.znorm_euclidean import ZNormEuclidean
 48 | from distancematrix.consumer.matrix_profile_lr import MatrixProfileLR
 49 | from distancematrix.calculator import AnytimeCalculator
 50 | 
 51 | data = np.random.randn(10000)
 52 | m = 100 # Subsequence length
 53 | 
 54 | calc = AnytimeCalculator(m, data)
 55 | gen_0 = calc.add_generator(0, ZNormEuclidean())
 56 | cons_mp = calc.add_consumer([0], MatrixProfileLR())
 57 | calc.calculate_columns()
 58 | 
 59 | matrix_profile = cons_mp.matrix_profile()
 60 | ```
 61 | 
 62 | Calculate a Matrix Profile and (common-10) Radius Profile over a single series using Euclidean distance.
 63 | A combined calculation is more efficient, as it can reuse the calculated distances.
 64 | 
 65 | ```python
 66 | import numpy as np
 67 | from distancematrix.generator.euclidean import Euclidean
 68 | from distancematrix.consumer.radius_profile import RadiusProfile
 69 | from distancematrix.consumer.matrix_profile_lr import MatrixProfileLR
 70 | from distancematrix.calculator import AnytimeCalculator
 71 | 
 72 | data = np.random.randn(10000)
 73 | m = 100 # Subsequence length
 74 | 
 75 | calc = AnytimeCalculator(m, data)
 76 | gen_0 = calc.add_generator(0, Euclidean()) # Generator 0 works on channel 0
 77 | cons_mp = calc.add_consumer([0], MatrixProfileLR()) # Consumer consumes generator 0
 78 | cons_rp = calc.add_consumer([0], RadiusProfile(10, m//2)) # Consumer consumes generator 0
 79 | calc.calculate_columns()
 80 | 
 81 | matrix_profile = cons_mp.matrix_profile()
 82 | radius_profile = cons_rp.values
 83 | ```
 84 | 
 85 | Calculate a partial multidimensional Matrix Profile over two data channels.
 86 | Partial calculations return approximated results but are significantly faster,
 87 | they are particularly interesting in interactive workflows, as they can be resumed.
 88 | 
 89 | ```python
 90 | import numpy as np
 91 | from distancematrix.generator.znorm_euclidean import ZNormEuclidean
 92 | from distancematrix.consumer.multidimensional_matrix_profile_lr import MultidimensionalMatrixProfileLR
 93 | from distancematrix.consumer.matrix_profile_lr import MatrixProfileLR
 94 | from distancematrix.calculator import AnytimeCalculator
 95 | 
 96 | data = np.random.randn(2, 10000)
 97 | m = 100 # Subsequence length
 98 | 
 99 | calc = AnytimeCalculator(m, data)
100 | gen_0 = calc.add_generator(0, ZNormEuclidean()) # Generator 0 works on channel 0
101 | gen_1 = calc.add_generator(1, ZNormEuclidean()) # Generator 1 works on channel 1
102 | cons_mmp = calc.add_consumer([0, 1], MultidimensionalMatrixProfileLR()) # Consumer consumes generator 0 & 1
103 | 
104 | # Calculate only 1/4 of all distances: faster, but returns approximated results
105 | calc.calculate_diagonals(partial=0.25)
106 | multidimensional_matrix_profile = cons_mmp.md_matrix_profile()
107 | 
108 | # Calculate the next quarter, so in total 1/2 of all distances are processed.
109 | calc.calculate_diagonals(partial=0.5)
110 | multidimensional_matrix_profile = cons_mmp.md_matrix_profile()
111 | ```
112 | 
113 | ## Documentation
114 | 
115 | Documentation for the latest version is available [online](https://predict-idlab.github.io/seriesdistancematrix).
116 | 
117 | Building the documentation locally is done using Sphinx. Navigate to the `docs` folder, activate the conda environment
118 | defined in the environment file, and run:
119 | 
120 | ```bash
121 | make html
122 | ```
123 | 
124 | ## Installing
125 | 
126 | Using pip:
127 | ```bash
128 | pip install seriesdistancematrix
129 | ```
130 | 
131 | Alternatively, clone this repositor and run:
132 | ```bash
133 | python setup.py clean build install
134 | ```
135 | 
136 | For local development (this allows you to edit code without having to reinstall the library):
137 | ```bash
138 | python setup.py develop
139 | ```
140 | 
141 | ## Academic Usage
142 | 
143 | When using this library for academic purposes, please cite:
144 | ```
145 | @article{series_distance_matrix,
146 |   title = "A generalized matrix profile framework with support for contextual series analysis",
147 |   journal = "Engineering Applications of Artificial Intelligence",
148 |   volume = "90",
149 |   pages = "103487",
150 |   year = "2020",
151 |   issn = "0952-1976",
152 |   doi = "https://doi.org/10.1016/j.engappai.2020.103487",
153 |   url = "http://www.sciencedirect.com/science/article/pii/S0952197620300087",
154 |   author = "De Paepe, Dieter and Vanden Hautte, Sander and Steenwinckel, Bram and De Turck, Filip and Ongenae, Femke and Janssens, Olivier and Van Hoecke, Sofie"
155 | }
156 | ```
157 | 


--------------------------------------------------------------------------------
/distancematrix/__init__.py:
--------------------------------------------------------------------------------
1 | import distancematrix.consumer
2 | import distancematrix.generator
3 | from distancematrix.calculator import AnytimeCalculator
4 | from distancematrix.calculator import StreamingCalculator
5 | 
6 | __version__ = "0.3.1" # Also update ../setup.py!


--------------------------------------------------------------------------------
/distancematrix/consumer/__init__.py:
--------------------------------------------------------------------------------
1 | from distancematrix.consumer.contextual_matrix_profile import ContextualMatrixProfile
2 | from distancematrix.consumer.distance_matrix import DistanceMatrix
3 | from distancematrix.consumer.matrix_profile_lr import MatrixProfileLR
4 | from distancematrix.consumer.matrix_profile_lr import ShiftingMatrixProfileLR
5 | from distancematrix.consumer.matrix_profile_lr import MatrixProfileLRReservoir
6 | from distancematrix.consumer.multidimensional_matrix_profile_lr import MultidimensionalMatrixProfileLR
7 | from distancematrix.consumer.threshold_counter import ThresholdCounter
8 | 


--------------------------------------------------------------------------------
/distancematrix/consumer/abstract_consumer.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | 
 4 | class AbstractConsumer(ABC):
 5 |     @abstractmethod
 6 |     def initialise(self, dims, query_subseq, series_subseq):
 7 |         """
 8 |         Initialise this consumer.
 9 | 
10 |         :param dims: the number of dimensions (data channels) this consumer will receive
11 |         :param query_subseq: the number of query subsequences (rows in the distance matrix)
12 |         :param series_subseq: the number of series subsequences (column in the distance matrix)
13 |         :return: None
14 |         """
15 |         pass
16 | 
17 |     @abstractmethod
18 |     def process_diagonal(self, diagonal_index, values):
19 |         """
20 |         Method called when a diagonal of the distance matrix is calculated.
21 | 
22 |         The number of values on the diagonal might be less than the diagonal of the full matrix profile,
23 |         this can occur when not enough data is available yet to calculate the entire distance matrix
24 |         (typically for streaming when not enough data is available to fill the entire foreseen space).
25 | 
26 |         :param diagonal_index: index of the diagonal in range ]-num_query_subseq, num_series_subseq[,
27 |             the main diagonal has index 0
28 |         :param values: array of shape (num_dimensions, num_values_on_diagonal) containing the distances
29 |         :return: None
30 |         """
31 |         pass
32 | 
33 |     @abstractmethod
34 |     def process_column(self, column_index, values):
35 |         """
36 |         Method called when a column of the distance matrix is calculated.
37 | 
38 |         The number of values on the column might be less than the column of the full matrix profile,
39 |         this can occur when not enough data is available yet to calculate the entire distance matrix
40 |         (typically for streaming when not enough data is available to fill the entire foreseen space).
41 | 
42 |         :param column_index: index of the column, in range [0, series_subseq[
43 |         :param values: array of shape (num_dimensions, num_values_on_column) containing the distances
44 |         :return: None
45 |         """
46 |         pass
47 | 
48 | 
49 | class AbstractStreamingConsumer(AbstractConsumer):
50 |     @abstractmethod
51 |     def shift_query(self, amount):
52 |         """
53 |         Inform the consumer that the distance matrix has shifted in the query direction.
54 | 
55 |         :param amount: amount of subsequences that were shifted
56 |         :return: None
57 |         """
58 |         pass
59 | 
60 |     @abstractmethod
61 |     def shift_series(self, amount):
62 |         """
63 |         Inform the consumer that the distance matrix has shifted in the series direction.
64 | 
65 |         :param amount: amount of subsequences that were shifted
66 |         :return: None
67 |         """
68 |         pass
69 | 


--------------------------------------------------------------------------------
/distancematrix/consumer/contextmanager.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from typing import Iterable, Tuple
  3 | import collections
  4 | import numpy as np
  5 | 
  6 | 
  7 | class AbstractContextManager(ABC):
  8 |     @abstractmethod
  9 |     def query_contexts(self, start: int, stop: int) -> Iterable[Tuple[int, int, int]]:
 10 |         """
 11 |         Return all non-empty query context definitions that fall in the given range of the distance matrix query axis.
 12 | 
 13 |         :param start: start of the range
 14 |         :param stop: end of the range
 15 |         :return: iterable of tuples (start of context, end of context, context id)
 16 |         """
 17 |         pass
 18 | 
 19 |     @abstractmethod
 20 |     def series_contexts(self, start: int, stop: int) -> Iterable[Tuple[int, int, int]]:
 21 |         """
 22 |         Return all non-empty series context definitions that fall in the given range of the distance matrix series axis.
 23 | 
 24 |         :param start: start of the range
 25 |         :param stop: end of the range
 26 |         :return: iterable of tuples (start of context, end of context, context id)
 27 |         """
 28 |         pass
 29 | 
 30 |     @abstractmethod
 31 |     def context_matrix_shape(self) -> (int, int):
 32 |         """
 33 |         Returns the shape of the contextual distance matrix
 34 | 
 35 |         :return: upper bound for any context id returned by this manager, for query and series axis
 36 |         """
 37 |         pass
 38 | 
 39 |     def shift_query(self, amount: int) -> int:
 40 |         """
 41 |         Informs the manager that the distance matrix has shifted along the query axis.
 42 | 
 43 |         :param amount: amount of values shifted
 44 |         :return: the amount of values that the contextual distance matrix should shift along the query axis
 45 |         """
 46 |         raise RuntimeError("This generator does not support query shifting.")
 47 | 
 48 |     def shift_series(self, amount: int) -> int:
 49 |         """
 50 |         Informs the manager that the distance matrix has shifted along the series axis.
 51 | 
 52 |         :param amount: amount of values shifted
 53 |         :return: the amount of values that the contextual distance matrix should shift along the series axis
 54 |         """
 55 |         raise RuntimeError("This generator does not support series shifting.")
 56 | 
 57 | 
 58 | class GeneralStaticManager(AbstractContextManager):
 59 |     """
 60 |     General purpose context manager for contextual matrix profile. This manager does not support streaming data.
 61 |     """
 62 | 
 63 |     def __init__(self, series_contexts, query_contexts=None):
 64 |         """
 65 |         Creates a new context manager.
 66 | 
 67 |         :param series_contexts: an iterable of ranges, each range defines one context. You can also
 68 |           use lists of ranges, to specify non-consecutive contexts.
 69 |         :param query_contexts: iterable of ranges, defaults to None, meaning to use the same contexts as the series
 70 |         """
 71 |         _verify_ranges([r for i, r in _enumerate_flattened(series_contexts)])
 72 | 
 73 |         if query_contexts is None:
 74 |             query_contexts = series_contexts
 75 |         else:
 76 |             _verify_ranges([r for i, r in _enumerate_flattened(query_contexts)])
 77 | 
 78 |         self._series_contexts = np.array(
 79 |             [(r.start, r.stop, i) for i, r in _filter_empty(_enumerate_flattened(series_contexts))], dtype=int)
 80 |         self._query_contexts = np.array(
 81 |             [(r.start, r.stop, i) for i, r in _filter_empty(_enumerate_flattened(query_contexts))], dtype=int)
 82 | 
 83 |         self._qc_sorted_start = self._query_contexts[np.argsort(self._query_contexts[:, 0])]
 84 |         self._qc_sorted_stop = self._query_contexts[np.argsort(self._query_contexts[:, 1])]
 85 | 
 86 |     def context_matrix_shape(self) -> (int, int):
 87 |         num_query_contexts = np.max(self._query_contexts[:, 2]) + 1
 88 |         num_series_contexts = np.max(self._series_contexts[:, 2]) + 1
 89 | 
 90 |         return num_query_contexts, num_series_contexts
 91 | 
 92 |     def series_contexts(self, start, stop):
 93 |         return self._series_contexts[np.logical_and(
 94 |             self._series_contexts[:, 0] < stop,  # Start of context is before stop
 95 |             self._series_contexts[:, 1] > start  # End of context is after start
 96 |         )]
 97 | 
 98 |     def query_contexts(self, start, stop):
 99 |         if start <= self._qc_sorted_start[0, 0] and stop >= self._qc_sorted_stop[-1, 1]:
100 |             return self._query_contexts
101 | 
102 |         if start == 0:
103 |             # All contexts that start before stop
104 |             contexts = self._qc_sorted_start[0: np.searchsorted(self._qc_sorted_start[:, 0], stop)]
105 |             return filter(lambda c: c[1] > 0, contexts)
106 |         elif stop >= self._qc_sorted_stop[-1, 1]:
107 |             # All contexts that end after start
108 |             contexts = self._qc_sorted_stop[np.searchsorted(self._qc_sorted_stop[:, 1], start, side="right"):]
109 |             return filter(lambda c: c[0] < stop, contexts)
110 |         else:
111 |             return self._query_contexts[np.logical_and(
112 |                 self._query_contexts[:, 0] < stop,  # Start of context is before stop
113 |                 self._query_contexts[:, 1] > start  # End of context is after start
114 |             )]
115 | 
116 | 
117 | def _verify_ranges(ranges):
118 |     for r in ranges:
119 |         if r.step != 1:
120 |             raise RuntimeError("Only ranges with step 1 supported.")
121 |         if r.start < 0:
122 |             raise RuntimeError("Range start should not be negative.")
123 | 
124 | 
125 | def _enumerate_flattened(l):
126 |     """
127 |     Converts a list of elements and lists into tuples (index, element), so that elements in nested lists
128 |     have the same index.
129 | 
130 |     Eg: [1, [2,3], 4] => (0, 1), (1, 2), (1, 3), (2, 4)
131 |     """
132 |     for i, el in enumerate(l):
133 |         if isinstance(el, collections.abc.Iterable) and not isinstance(el, range):
134 |             for r in el:
135 |                 yield i, r
136 |         else:
137 |             yield i, el
138 | 
139 | 
140 | def _filter_empty(iter):
141 |     for i, r in iter:
142 |         if r.start < r.stop:
143 |             yield (i, r)
144 | 


--------------------------------------------------------------------------------
/distancematrix/consumer/contextual_matrix_profile.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from distancematrix.ringbuffer import RingBuffer
  4 | from distancematrix.consumer.abstract_consumer import AbstractStreamingConsumer
  5 | from distancematrix.consumer.contextmanager import AbstractContextManager
  6 | 
  7 | 
  8 | class ContextualMatrixProfile(AbstractStreamingConsumer):
  9 |     """
 10 |     A consumer that constructs the contextual matrix profile. The contextual matrix profile is formed by
 11 |     taking the minimum of rectangles across the full distance matrix (where the matrix profile takes the
 12 |     minimum across columns).
 13 | 
 14 |     This consumer supports streaming if the provided context manager does.
 15 |     """
 16 | 
 17 |     def __init__(self, context_manager: AbstractContextManager, rb_scale_factor=2.):
 18 |         """
 19 |         Creates a new consumer that calculates a contextual matrix profile,
 20 |         according to the contexts defined by the manager.
 21 | 
 22 |         :param context_manager: object responsible for defining the spans of each context over the query and series axis
 23 |         :param rb_scale_factor: scaling factor used for RingBuffers in case of streaming data (should be >= 1),
 24 |             this allows choosing a balance between less memory (low values) and reduced data copying (higher values)
 25 |         """
 26 |         if rb_scale_factor < 1.:
 27 |             raise ValueError("rb_scale_factor should be >= 1, it was: " + str(rb_scale_factor))
 28 | 
 29 |         self._num_series_subseq = None
 30 |         self._num_query_subseq = None
 31 |         self._range = None
 32 | 
 33 |         self._contexts = context_manager
 34 |         self._query_shift = 0
 35 |         self._series_shift = 0
 36 | 
 37 |         self._distance_matrix = None
 38 |         self._match_index_series = None
 39 |         self._match_index_query = None
 40 | 
 41 |         self._rb_scale_factor = rb_scale_factor
 42 | 
 43 |     def initialise(self, dims, query_subseq, series_subseq):
 44 |         self._num_series_subseq = series_subseq
 45 |         self._num_query_subseq = query_subseq
 46 |         self._range = np.arange(0, max(series_subseq, query_subseq), dtype=int)
 47 | 
 48 |         num_query_contexts, num_series_contexts = self._contexts.context_matrix_shape()
 49 | 
 50 |         self._distance_matrix = RingBuffer(np.full((num_query_contexts, num_series_contexts), np.Inf, dtype=float),
 51 |                                            scaling_factor=self._rb_scale_factor)
 52 |         self._match_index_series = RingBuffer(np.full((num_query_contexts, num_series_contexts), -1, dtype=int),
 53 |                                               scaling_factor=self._rb_scale_factor)
 54 |         self._match_index_query = RingBuffer(np.full((num_query_contexts, num_series_contexts), -1, dtype=int),
 55 |                                              scaling_factor=self._rb_scale_factor)
 56 | 
 57 |     def process_diagonal(self, diag, values):
 58 |         values = values[0]
 59 |         num_values = len(values)
 60 | 
 61 |         if diag >= 0:
 62 |             values_idx1_start = diag
 63 |             context0_idxs = self._contexts.query_contexts(0, num_values)
 64 |         else:
 65 |             values_idx1_start = 0
 66 |             context0_idxs = self._contexts.query_contexts(-diag, self._num_query_subseq)
 67 | 
 68 |         for c0_start, c0_end, c0_identifier in context0_idxs:
 69 |             # We now have a sub-sequence (ss) defined by the first context on the query axis
 70 |             # In absolute coordinates, start/end of this subsequence on 2nd axis (series axis)
 71 |             ss1_start = min(max(0, c0_start + diag), self._num_series_subseq)
 72 |             ss1_end = min(self._num_series_subseq, min(self._num_query_subseq, c0_end) + diag)
 73 | 
 74 |             if ss1_start == ss1_end:
 75 |                 continue
 76 | 
 77 |             context1_idxs = self._contexts.series_contexts(ss1_start, ss1_end)
 78 | 
 79 |             for c1_start, c1_end, c1_identifier in context1_idxs:
 80 |                 # In absolute coordinates, start/end of the subsequence on 2nd axis defined by both contexts
 81 |                 sss1_start = max(ss1_start, c1_start)
 82 |                 sss1_end = min(ss1_end, c1_end)
 83 | 
 84 |                 # Values that belong to both contexts
 85 |                 sss_values = values[sss1_start - values_idx1_start: sss1_end - values_idx1_start]
 86 | 
 87 |                 # Compare if better than current
 88 |                 min_sss_value = np.min(sss_values)
 89 |                 is_better = min_sss_value < self._distance_matrix[c0_identifier, c1_identifier]
 90 | 
 91 |                 if is_better:
 92 |                     self._distance_matrix[c0_identifier, c1_identifier] = min_sss_value
 93 |                     rel_indices = np.argmin(sss_values)
 94 |                     sss0_start = sss1_start - diag
 95 |                     self._match_index_query[c0_identifier, c1_identifier] = rel_indices + sss0_start + self._query_shift
 96 |                     self._match_index_series[c0_identifier, c1_identifier] = rel_indices + sss1_start + self._series_shift
 97 | 
 98 |     def process_column(self, column_index, values):
 99 |         values = values[0]
100 |         context1_idxs = self._contexts.series_contexts(column_index, column_index + 1)
101 | 
102 |         for _, _, c1_identifier in context1_idxs:
103 |             query_contexts = self._contexts.query_contexts(0, self._num_query_subseq)
104 | 
105 |             for c0_start, c0_end, c0_identifier in query_contexts:
106 |                 subseq = values[c0_start: c0_end]
107 |                 best_value = np.min(subseq)
108 | 
109 |                 if best_value < self._distance_matrix[c0_identifier, c1_identifier]:
110 |                     self._distance_matrix[c0_identifier, c1_identifier] = best_value
111 |                     self._match_index_query[c0_identifier, c1_identifier] = np.argmin(subseq) + c0_start + self._query_shift
112 |                     self._match_index_series[c0_identifier, c1_identifier] = column_index + self._series_shift
113 | 
114 |     def shift_series(self, amount):
115 |         context_shift = self._contexts.shift_series(amount)
116 |         self._series_shift += amount
117 | 
118 |         if context_shift > 0:
119 |             height = self._distance_matrix.max_shape[0]
120 |             self._distance_matrix.push(np.full((height, context_shift), np.Inf, dtype=float))
121 |             self._match_index_series.push(np.full((height, context_shift), -1, dtype=int))
122 |             self._match_index_query.push(np.full((height, context_shift), -1, dtype=int))
123 | 
124 |     def shift_query(self, amount):
125 |         context_shift = self._contexts.shift_query(amount)
126 |         self._query_shift += amount
127 | 
128 |         if context_shift > 0:
129 |             # Note: This could be more efficient using a 2D Ringbuffer.
130 |             height = min(context_shift, self._distance_matrix.max_shape[0])
131 |             self._distance_matrix.view = np.roll(self._distance_matrix.view, context_shift, axis=0)
132 |             self._distance_matrix[-height:, :] = np.Inf
133 |             self._match_index_series.view = np.roll(self._match_index_series.view, context_shift, axis=0)
134 |             self._match_index_series[-height:, :] = -1
135 |             self._match_index_query.view = np.roll(self._match_index_query.view, context_shift, axis=0)
136 |             self._match_index_query[-height:, :] = -1
137 | 
138 |     @property
139 |     def match_index_query(self):
140 |         return self._match_index_query.view
141 | 
142 |     @property
143 |     def match_index_series(self):
144 |         return self._match_index_series.view
145 | 
146 |     @property
147 |     def distance_matrix(self):
148 |         return self._distance_matrix.view
149 | 


--------------------------------------------------------------------------------
/distancematrix/consumer/distance_matrix.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from ..util import diag_indices_of
 3 | from .abstract_consumer import AbstractStreamingConsumer
 4 | 
 5 | 
 6 | class DistanceMatrix(AbstractStreamingConsumer):
 7 |     def __init__(self):
 8 |         """
 9 |         Creates a new consumer that will store the complete distance matrix.
10 |         This consumer supports streaming.
11 | 
12 |         Note that the distance matrix requires quadratic memory, so it is unsuited for long time series.
13 |         """
14 | 
15 |         self.distance_matrix = None
16 | 
17 |     def initialise(self, dims, query_subseq, series_subseq):
18 |         if dims != 1:
19 |             raise RuntimeError("Input should be 1D")
20 | 
21 |         self.distance_matrix = np.full((query_subseq, series_subseq), np.nan, dtype=float)
22 | 
23 |     def process_diagonal(self, diagonal_index, values):
24 |         num_values = values.shape[1]
25 |         indices = diag_indices_of(self.distance_matrix, diagonal_index)
26 |         indices = (indices[0][:num_values], indices[1][:num_values])
27 |         self.distance_matrix[indices] = values
28 | 
29 |     def process_column(self, column_index, values):
30 |         num_values = values.shape[1]
31 |         self.distance_matrix[:num_values, column_index] = values
32 | 
33 |     def shift_series(self, amount):
34 |         if amount == 0:
35 |             return
36 | 
37 |         self.distance_matrix = np.roll(self.distance_matrix, -amount, axis=1)
38 |         self.distance_matrix[:, -amount:] = np.nan
39 | 
40 |     def shift_query(self, amount):
41 |         if amount == 0:
42 |             return
43 | 
44 |         self.distance_matrix = np.roll(self.distance_matrix, -amount, axis=0)
45 |         self.distance_matrix[-amount:, :] = np.nan
46 | 


--------------------------------------------------------------------------------
/distancematrix/consumer/multidimensional_matrix_profile_lr.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from distancematrix.ringbuffer import RingBuffer
  3 | 
  4 | from .abstract_consumer import AbstractStreamingConsumer
  5 | 
  6 | 
  7 | class MultidimensionalMatrixProfileLR(AbstractStreamingConsumer):
  8 |     """
  9 |     A consumer that builds the multidimensional matrix profile. This consumer takes in distance measures from
 10 |     multiple channels (dimensions) at the same time and tracks the best distance, the index of this match and
 11 |     the dimensions used in this match.
 12 |     More specifically, if the input has N data channels, this consumer will select for each number of channels
 13 |     (1, 2, ..., N), the channels containing the best match, index and dimensions. It will not track matches for
 14 |     any possible combination of channels.
 15 | 
 16 |     This consumer keeps track of the left and right multidimensional profile, and can be used to create the
 17 |     (normal) multidimensional profile from it. The left profile, index and dimensions
 18 |     at index i contain information about a match whose index is less than or equal to i, while the right
 19 |     profile, index and dimensions track information about a match whose index is larger than i.
 20 | 
 21 |     The profile is an array with shape (num_dimensions, num_distances). The value at row i, j contains the best averaged
 22 |     distances encountered at index j for any i+1 dimensions. The index is similar, but tracks the index of the query
 23 |     series that had the best match.
 24 | 
 25 |     The dimensions being tracked is a list of length num_dimensions. Entry i of this list contains an
 26 |     (i+1, num_distances) array that lists the indices of the dimensions that contained the best match.
 27 | 
 28 |     This consumer supports streaming.
 29 |     """
 30 | 
 31 |     def __init__(self, rb_scale_factor=2.):
 32 |         """
 33 |         Creates a new consumer that calculates the left and right matrix profile, the corresponding
 34 |         indices and the used dimensions over multiple dimensions (data channels).
 35 | 
 36 |         :param rb_scale_factor: scaling factor used for RingBuffers in case of streaming data (should be >= 1),
 37 |             this allows choosing a balance between less memory (low values) and reduced data copying (higher values)
 38 |         """
 39 | 
 40 |         if rb_scale_factor < 1.:
 41 |             raise ValueError("rb_scale_factor should be >= 1, it was: " + str(rb_scale_factor))
 42 | 
 43 |         self._num_subseq = None
 44 |         self._range = None
 45 |         self._n_dim = None
 46 | 
 47 |         self._md_matrix_profile_left = None
 48 |         self._md_profile_index_left = None
 49 |         self._md_profile_dimension_left = None
 50 | 
 51 |         self._md_matrix_profile_right = None
 52 |         self._md_profile_index_right = None
 53 |         self._md_profile_dimension_right = None
 54 | 
 55 |         self._series_shift = 0
 56 |         self._query_shift = 0
 57 | 
 58 |         self._rb_scale_factor = rb_scale_factor
 59 | 
 60 |     def initialise(self, dims, query_subseq, series_subseq):
 61 |         self._n_dim = dims
 62 |         self._num_subseq = series_subseq
 63 |         self._range = RingBuffer(np.arange(0, self._num_subseq, dtype=int),
 64 |                                  scaling_factor=self._rb_scale_factor)
 65 | 
 66 |         self._md_matrix_profile_left = RingBuffer(np.full((dims, self._num_subseq), np.inf, dtype=float),
 67 |                                                   scaling_factor=self._rb_scale_factor)
 68 |         self._md_profile_index_left = RingBuffer(np.full((dims, self._num_subseq), -1, dtype=int),
 69 |                                                  scaling_factor=self._rb_scale_factor)
 70 |         self._md_profile_dimension_left = \
 71 |             [RingBuffer(np.full((i + 1, self._num_subseq), -1, dtype=int),
 72 |                         scaling_factor=self._rb_scale_factor) for i in range(dims)]
 73 | 
 74 |         self._md_matrix_profile_right = RingBuffer(np.full((dims, self._num_subseq), np.inf, dtype=float),
 75 |                                                    scaling_factor=self._rb_scale_factor)
 76 |         self._md_profile_index_right = RingBuffer(np.full((dims, self._num_subseq), -1, dtype=int),
 77 |                                                   scaling_factor=self._rb_scale_factor)
 78 |         self._md_profile_dimension_right = \
 79 |             [RingBuffer(np.full((i + 1, self._num_subseq), -1, dtype=int),
 80 |                         scaling_factor=self._rb_scale_factor) for i in range(dims)]
 81 | 
 82 |     def process_diagonal(self, diag, values):
 83 |         n_dim, num_values = values.shape
 84 |         shift_diff = self._series_shift - self._query_shift
 85 | 
 86 |         values_sort_order = np.argsort(values, axis=0)
 87 |         values_sorted = np.sort(values, axis=0)
 88 |         values_cumsum = np.zeros(num_values)
 89 | 
 90 |         if diag + shift_diff >= 0:
 91 |             # left MP
 92 |             if diag >= 0:
 93 |                 for dim in range(n_dim):
 94 |                     values_cumsum += values_sorted[dim, :]
 95 |                     values_mean_over_dim = values_cumsum / (dim + 1)
 96 | 
 97 |                     self._update_matrix_profile(values_mean_over_dim,
 98 |                                                 self._range[:num_values],
 99 |                                                 values_sort_order[:dim + 1, :],
100 |                                                 self._md_matrix_profile_left[dim, diag:diag + num_values],
101 |                                                 self._md_profile_index_left[dim, diag:diag + num_values],
102 |                                                 self._md_profile_dimension_left[dim][:, diag:diag + num_values])
103 |             else:
104 |                 for dim in range(n_dim):
105 |                     values_cumsum += values_sorted[dim, :]
106 |                     values_mean_over_dim = values_cumsum / (dim + 1)
107 | 
108 |                     self._update_matrix_profile(values_mean_over_dim,
109 |                                                 self._range[-diag:-diag + num_values],
110 |                                                 values_sort_order[:dim + 1, :],
111 |                                                 self._md_matrix_profile_left[dim, :num_values],
112 |                                                 self._md_profile_index_left[dim, :num_values],
113 |                                                 self._md_profile_dimension_left[dim][:, :num_values])
114 |         else:
115 |             # right MP
116 |             if diag >= 0:
117 |                 for dim in range(n_dim):
118 |                     values_cumsum += values_sorted[dim, :]
119 |                     values_mean_over_dim = values_cumsum / (dim + 1)
120 | 
121 |                     self._update_matrix_profile(values_mean_over_dim,
122 |                                                 self._range[num_values],
123 |                                                 values_sort_order[:dim + 1, :],
124 |                                                 self._md_matrix_profile_right[dim, diag:diag + num_values],
125 |                                                 self._md_profile_index_right[dim, diag:diag + num_values],
126 |                                                 self._md_profile_dimension_right[dim][:, diag:diag + num_values])
127 |             else:
128 |                 for dim in range(n_dim):
129 |                     values_cumsum += values_sorted[dim, :]
130 |                     values_mean_over_dim = values_cumsum / (dim + 1)
131 | 
132 |                     self._update_matrix_profile(values_mean_over_dim,
133 |                                                 self._range[-diag:-diag + num_values],
134 |                                                 values_sort_order[:dim + 1, :],
135 |                                                 self._md_matrix_profile_right[dim, :num_values],
136 |                                                 self._md_profile_index_right[dim, :num_values],
137 |                                                 self._md_profile_dimension_right[dim][:, :num_values])
138 | 
139 |         if diag >= 0:
140 |             for dim in range(n_dim):
141 |                 values_cumsum += values_sorted[dim, :]
142 |                 values_mean_over_dim = values_cumsum / (dim + 1)
143 | 
144 |                 self._update_matrix_profile(values_mean_over_dim,
145 |                                             self._range[:num_values],
146 |                                             values_sort_order[:dim + 1, :],
147 |                                             self._md_matrix_profile_left[dim, diag:diag + num_values],
148 |                                             self._md_profile_index_left[dim, diag:diag + num_values],
149 |                                             self._md_profile_dimension_left[dim][:, diag:diag + num_values])
150 | 
151 |         else:
152 |             for dim in range(n_dim):
153 |                 values_cumsum += values_sorted[dim, :]
154 |                 values_mean_over_dim = values_cumsum / (dim + 1)
155 | 
156 |                 self._update_matrix_profile(values_mean_over_dim,
157 |                                             self._range[-diag:-diag + num_values],
158 |                                             values_sort_order[:dim + 1, :],
159 |                                             self._md_matrix_profile_right[dim, :num_values],
160 |                                             self._md_profile_index_right[dim, :num_values],
161 |                                             self._md_profile_dimension_right[dim][:, :num_values])
162 | 
163 |     def _update_matrix_profile(self, new_distances, new_distance_indices, new_distance_dimensions,
164 |                                matrix_profile, matrix_profile_index, matrix_profile_dims):
165 |         update_pos = new_distances < matrix_profile
166 |         matrix_profile[update_pos] = new_distances[update_pos]
167 |         matrix_profile_index[update_pos] = new_distance_indices[update_pos]
168 |         matrix_profile_dims[:, update_pos] = new_distance_dimensions[:, update_pos]
169 | 
170 |     def process_column(self, column_index, values):
171 |         n_dim, num_values = values.shape
172 |         shift_diff = self._series_shift - self._query_shift
173 | 
174 |         border = max(0, column_index + 1 + shift_diff)
175 | 
176 |         values_sorted = np.sort(values, axis=0)
177 |         values_cumsum = np.zeros(num_values)
178 | 
179 |         for dim in range(n_dim):
180 |             values_cumsum += values_sorted[dim, :]
181 | 
182 |             if border > 0:
183 |                 min_position_l = np.argmin(values_cumsum[:border])
184 |                 new_min_value = values_cumsum[min_position_l] / (dim + 1)
185 | 
186 |                 if new_min_value < self._md_matrix_profile_left[dim, column_index]:
187 |                     self._md_matrix_profile_left[dim, column_index] = new_min_value
188 |                     self._md_profile_index_left[dim, column_index] = min_position_l + self._query_shift
189 |                     self._md_profile_dimension_left[dim][:, column_index] =\
190 |                         np.argsort(values[:, min_position_l])[:dim + 1]
191 | 
192 |             # Check if column crosses into the lower triangle of the distance matrix
193 |             if num_values > border:
194 |                 min_position_r = np.argmin(values_cumsum[border:]) + border
195 |                 new_min_value = values_cumsum[min_position_r] / (dim + 1)
196 | 
197 |                 # In case of shifting, a lower value could already be present
198 |                 if new_min_value < self._md_matrix_profile_right[dim, column_index]:
199 |                     self._md_matrix_profile_right[dim, column_index] = new_min_value
200 |                     self._md_profile_index_right[dim, column_index] = min_position_r + self._query_shift
201 |                     self._md_profile_dimension_right[dim][:, column_index] =\
202 |                         np.argsort(values[:, min_position_r])[:dim + 1]
203 | 
204 |     def shift_query(self, amount):
205 |         if amount == 0:
206 |             return
207 | 
208 |         self._query_shift += amount
209 |         self._range.push(np.arange(self._range[-1] + 1, self._range[-1] + 1 + amount))
210 | 
211 |     def shift_series(self, amount):
212 |         if amount == 0:
213 |             return
214 | 
215 |         self._series_shift += amount
216 | 
217 |         push_values = np.full((self._n_dim, amount), np.inf)
218 |         self._md_matrix_profile_left.push(push_values)
219 |         self._md_matrix_profile_right.push(push_values)
220 | 
221 |         push_values[:] = -1
222 |         self._md_profile_index_left.push(push_values)
223 |         self._md_profile_index_right.push(push_values)
224 | 
225 |         for dim in range(self._n_dim):
226 |             self._md_profile_dimension_left[dim].push(push_values[:dim + 1, :])
227 |             self._md_profile_dimension_right[dim].push(push_values[:dim + 1, :])
228 | 
229 |     def md_matrix_profile(self):
230 |         """
231 |         Merges the left and right multidimensional matrix profile, to create the multidimensional matrix profile.
232 |         :return: ndarray of shape (num_dimensions, num_subsequences)
233 |         """
234 |         left_best = self._md_matrix_profile_left.view < self._md_matrix_profile_right.view
235 |         return np.where(
236 |             left_best,
237 |             self._md_matrix_profile_left.view,
238 |             self._md_matrix_profile_right.view
239 |         )
240 | 
241 |     def md_profile_index(self):
242 |         """
243 |         Merges the left and right multidimensional matrix profile index, to create the multidimensional matrix profile
244 |         index.
245 |         :return: ndarray of shape (num_dimensions, num_subsequences)
246 |         """
247 |         left_best = self._md_matrix_profile_left.view < self._md_matrix_profile_right.view
248 |         return np.where(
249 |             left_best,
250 |             self._md_profile_index_left.view,
251 |             self._md_profile_index_right.view
252 |         )
253 | 
254 |     def md_profile_dimensions(self):
255 |         """
256 |         Merges the left and right dimensions, to create the dimensions for the multidimensional matrix profile.
257 |         :return: list of length num_dimensions, where the entry at index i is an ndarray of shape
258 |         (i+1, num_subsequences).
259 |         """
260 |         profile_dimension = [np.full((i + 1, self._num_subseq), -1, dtype=int) for i in range(self._n_dim)]
261 | 
262 |         for dim in range(self._n_dim):
263 |             left_best = self._md_matrix_profile_left[dim, :] < self._md_matrix_profile_right[dim, :]
264 |             profile_dimension[dim] = np.where(
265 |                 left_best,
266 |                 self._md_profile_dimension_left[dim].view,
267 |                 self._md_profile_dimension_right[dim].view
268 |             )
269 | 
270 |         return profile_dimension
271 | 
272 |     @property
273 |     def md_matrix_profile_left(self):
274 |         return self._md_matrix_profile_left.view
275 | 
276 |     @property
277 |     def md_matrix_profile_right(self):
278 |         return self._md_matrix_profile_right.view
279 | 
280 |     @property
281 |     def md_profile_index_left(self):
282 |         return self._md_profile_index_left.view
283 | 
284 |     @property
285 |     def md_profile_index_right(self):
286 |         return self._md_profile_index_right.view
287 | 
288 |     @property
289 |     def md_profile_dimension_left(self):
290 |         return [buffer.view for buffer in self._md_profile_dimension_left]
291 | 
292 |     @property
293 |     def md_profile_dimension_right(self):
294 |         return [buffer.view for buffer in self._md_profile_dimension_right]
295 | 


--------------------------------------------------------------------------------
/distancematrix/consumer/radius_profile.py:
--------------------------------------------------------------------------------
  1 | from typing import Union, List
  2 | 
  3 | import numpy as np
  4 | 
  5 | from distancematrix.consumer.abstract_consumer import AbstractConsumer
  6 | from distancematrix.insights import lowest_value_idxs
  7 | 
  8 | 
  9 | class RadiusProfile0(AbstractConsumer):
 10 |     """
 11 |     Consumer that calculates (common-k) radius profiles.
 12 | 
 13 |     The (common-k) radius profile tracks the distance between each subsequence and its k-th best match.
 14 |     It can be used to find subsequences with at least k repetitions (so called common motifs).
 15 | 
 16 |     This class has been optimised for finding matches without ignoring trivial matches.
 17 |     In other words, it is not possible to define an exclusion zone for the matches.
 18 |     """
 19 |     def __init__(self, track_indices):
 20 |         """
 21 |         Creates a new radius profile consumer that tracks the distance between each subsequence and its
 22 |         k-th best matches.
 23 | 
 24 |         Note that the resulting radius profile will contain distances as if the given track_indices were sorted.
 25 | 
 26 |         :param track_indices: values of k to track
 27 |         """
 28 |         self.track_indices = np.array(track_indices, ndmin=1, dtype=int)
 29 | 
 30 |         if self.track_indices.ndim != 1:
 31 |             raise ValueError('Track_indices should be scalar or one-dimensional.')
 32 |         if len(self.track_indices) == 0:
 33 |             raise ValueError('At least one track index needed.')
 34 |         if np.any(self.track_indices < 0):
 35 |             raise ValueError('Only positive track_indices allowed.')
 36 | 
 37 |         self.track_indices.sort()
 38 |         self.values = None
 39 | 
 40 |     def initialise(self, dims, query_subseq, series_subseq):
 41 |         self.values = np.full((len(self.track_indices), series_subseq), np.nan, dtype=float)
 42 | 
 43 |     def process_diagonal(self, diag, values):
 44 |         raise NotImplementedError
 45 | 
 46 |     def process_column(self, column_index, values):
 47 |         values = values[0]
 48 | 
 49 |         sorted_values = np.empty(len(values) + 1, dtype=float)
 50 |         sorted_values[:-1] = np.sort(values)
 51 |         sorted_values[-1] = np.nan
 52 | 
 53 |         self.values[:, column_index] = np.take(sorted_values, self.track_indices, mode="clip")
 54 | 
 55 | 
 56 | class RadiusProfile(AbstractConsumer):
 57 |     """
 58 |     Consumer that calculates (common-k) radius profiles.
 59 | 
 60 |     The (common-k) radius profile tracks the distance between each subsequence and its k-th best match.
 61 |     It can be used to find subsequences with at least k repetitions (so called common motifs).
 62 |     """
 63 |     def __init__(self, track_indices: Union[int, List[int]], exclude_distance: int):
 64 |         """
 65 |         Creates a new radius profile consumer that tracks the distance between each subsequence and its
 66 |         k-th best matches.
 67 | 
 68 |         Note that the resulting radius profile will contain distances as if the given track_indices were sorted.
 69 | 
 70 |         .. seealso:: If excludedistance is zero,
 71 |          consider using :class:`distancematrix.consumer.radius_profile.RadiusProfile0`
 72 | 
 73 |         :param track_indices: values of k to track
 74 |         :param exclude_distance: trivial match exclusion distance, typical subsequence length / 2.
 75 |         """
 76 |         self.track_indices = np.array(track_indices, ndmin=1, dtype=int)
 77 | 
 78 |         if self.track_indices.ndim != 1:
 79 |             raise ValueError('Track_indices should be scalar or one-dimensional.')
 80 |         if len(self.track_indices) == 0:
 81 |             raise ValueError('At least one track index needed.')
 82 |         if np.any(self.track_indices < 0):
 83 |             raise ValueError('Only positive track_indices allowed.')
 84 |         if type(exclude_distance) is not int or exclude_distance < 0:
 85 |             raise RuntimeError('Exclude distance should be positive integer.')
 86 | 
 87 |         self.track_indices.sort()
 88 |         self.exclusion = exclude_distance
 89 |         self.values = None
 90 | 
 91 |     def initialise(self, dims, query_subseq, series_subseq):
 92 |         self.values = np.full((len(self.track_indices), series_subseq), np.nan, dtype=float)
 93 | 
 94 |     def process_diagonal(self, diag, values):
 95 |         raise NotImplementedError
 96 | 
 97 |     def process_column(self, column_index, values):
 98 |         values = values[0]
 99 | 
100 |         iterator = lowest_value_idxs(values, self.exclusion)
101 |         tracker_idx = 0
102 | 
103 |         # Iterate from best match to worst, ignoring trivial matches
104 |         for i, low_value_idx in enumerate(iterator):
105 |             # If we are interested in the i-th match
106 |             if i == self.track_indices[tracker_idx]:
107 |                 self.values[tracker_idx, column_index] = values[low_value_idx]
108 |                 tracker_idx += 1
109 | 
110 |                 # Abort if we found all matches we are tracking
111 |                 if tracker_idx >= len(self.track_indices):
112 |                     return
113 | 
114 |         return
115 | 


--------------------------------------------------------------------------------
/distancematrix/consumer/threshold_counter.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from .abstract_consumer import AbstractConsumer
 4 | 
 5 | 
 6 | class ThresholdCounter(AbstractConsumer):
 7 |     """
 8 |     Consumer that counts the number of values in each column of the distancematrix that are below
 9 |     or equal to specified thresholds.
10 | 
11 |     This consumer counts values as they are passed and does not extrapolate or keep information about which
12 |     values were already counted. Specifically: partial calculations will result in counts of the produced values,
13 |     and passing the same diagonals multiple time could result in double counts.
14 |     """
15 | 
16 |     def __init__(self, thresholds):
17 |         """
18 |         Creates a new counter.
19 | 
20 |         :param thresholds: scalar or 1D array of threshold values
21 |         """
22 |         self.thresholds = np.array(thresholds, ndmin=1, dtype=float)
23 |         if self.thresholds.ndim != 1:
24 |             raise ValueError('Thresholds should be scalar or one-dimensional.')
25 |         self.counts = None
26 | 
27 |     def initialise(self, dims, query_subseq, series_subseq):
28 |         self.counts = np.full((len(self.thresholds), series_subseq), 0, dtype=int)
29 | 
30 |     def process_diagonal(self, diag, values):
31 |         values = values[0]
32 |         num_values = len(values)
33 | 
34 |         if diag >= 0:
35 |             self.counts[:, diag:diag + num_values] += values <= self.thresholds[:, None]
36 |         else:
37 |             self.counts[:, :num_values] += values <= self.thresholds[:, None]
38 | 
39 |     def process_column(self, column_index, values):
40 |         values = values[0]
41 | 
42 |         self.counts[:, column_index] = np.count_nonzero(values <= self.thresholds[:, None], axis=1)
43 | 
44 | 
45 | class DistancedThresholdCounter(AbstractConsumer):
46 |     """
47 |     Consumer that counts the number of values in each column of the distancematrix that are below
48 |     or equal to specified thresholds, with the added restriction of only counting elements that are at least
49 |     a number of values apart from each other.
50 | 
51 |     This consumer does not support diagonal calculations.
52 |     """
53 | 
54 |     def __init__(self, thresholds, exclusion):
55 |         """
56 |         Creates a new counter.
57 | 
58 |         :param thresholds: scalar or 1D array of threshold values
59 |         :param exclusion: number of required spaces in between counted values
60 |         """
61 |         self.thresholds = np.array(thresholds, ndmin=1, dtype=float)
62 |         if self.thresholds.ndim != 1:
63 |             raise ValueError('Thresholds should be scalar or one-dimensional.')
64 |         self.thresholds.sort()
65 |         self.exclusion = exclusion
66 |         self.counts = None
67 | 
68 |     def initialise(self, dims, query_subseq, series_subseq):
69 |         self.counts = np.full((len(self.thresholds), series_subseq), 0, dtype=int)
70 | 
71 |     def process_diagonal(self, diag, values):
72 |         raise NotImplementedError("Diagonal processing is not supported.")
73 | 
74 |     def process_column(self, column_index, values):
75 |         values = values[0]
76 | 
77 |         threshold_idx = 0
78 |         current_thresh = self.thresholds[threshold_idx]
79 | 
80 |         # Todo: check performance if this is a class variable instead of a local one
81 |         exclusions = np.zeros(len(values), dtype=bool)
82 |         order = np.argsort(values)
83 | 
84 |         # Iterate over value indices from smallest to largest value
85 |         for i in order:
86 |             value = values[i]
87 |             while value > current_thresh:
88 |                 threshold_idx += 1
89 |                 if threshold_idx == len(self.thresholds):
90 |                     return
91 |                 current_thresh = self.thresholds[threshold_idx]
92 | 
93 |             if not exclusions[i]:
94 |                 self.counts[threshold_idx:, column_index] += 1
95 |                 exclusions[max(0, i - self.exclusion):i + self.exclusion + 1] = True
96 | 
97 |         return
98 | 


--------------------------------------------------------------------------------
/distancematrix/generator/__init__.py:
--------------------------------------------------------------------------------
1 | from distancematrix.generator.euclidean import Euclidean
2 | from distancematrix.generator.znorm_euclidean import ZNormEuclidean
3 | from distancematrix.generator.filter_generator import FilterGenerator
4 | 


--------------------------------------------------------------------------------
/distancematrix/generator/abstract_generator.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | 
 4 | class AbstractGenerator(ABC):
 5 |     @abstractmethod
 6 |     def prepare(self, m, series, query=None):
 7 |         """
 8 |         Create a bound non-streaming generator for the given series and query sequences.
 9 | 
10 |         :param m: the size of the subsequences used to calculate distances between series and query
11 |         :param series: 1D array, used as the horizontal axis of a distance matrix
12 |         :param query: 1D array, used as the vertical axis of a distance matrix, or None to indicate a self-join
13 |         :return: a bound generator
14 |         """
15 |         pass
16 | 
17 |     @abstractmethod
18 |     def prepare_streaming(self, m, series_window, query_window=None):
19 |         """
20 |         Create a bound generator that supports streaming data.
21 |         The generator will need to receive data before any distances can be calculated.
22 | 
23 |         :param m: the size of the subsequences used to calculate distances between series and query
24 |         :param series_window: number of values to keep in memory for series, the length of the
25 |           horizontal axis of the distance matrix will be equal to (series_window - m + 1)
26 |         :param query_window: number of values to keep in memory for query, the length of the
27 |           vertical axis of the distance matrix will be equal to (query_window - m + 1),
28 |           or None to indicate a self-join.
29 |         :return: a bound generator that supports streaming
30 |         """
31 |         pass
32 | 
33 | 
34 | class AbstractBoundGenerator(ABC):
35 |     @abstractmethod
36 |     def calc_diagonal(self, diag):
37 |         """
38 |         Calculates all distances of the distance matrix diagonal with the given index for the available data.
39 | 
40 |         If diag is zero, this calculates the main diagonal, running from the top left to the bottom right.
41 |         Any positive value represents a diagonal above the main diagonal, and a negative value represents
42 |         a diagonal below the main diagonal.
43 | 
44 |         :param diag: the diagonal index
45 |         :return: 1D array, containing all values
46 |         """
47 |         pass
48 | 
49 |     @abstractmethod
50 |     def calc_column(self, column):
51 |         """
52 |         Calculates all distances of the distance matrix on the specified column for the available data.
53 | 
54 |         :param column: the column index (starting at 0)
55 |         :return: 1D array, containing all values
56 |         """
57 |         pass
58 | 
59 | 
60 | class AbstractBoundStreamingGenerator(ABC):
61 |     @abstractmethod
62 |     def append_series(self, values):
63 |         """
64 |         Adds more data points to the series sequence (and the query in case of a self-join).
65 |         Older data points will be dropped if the series would become larger than the foreseen capacity.
66 | 
67 |         :param values: 1D array, the new values to append to the series
68 |         :return: None
69 |         """
70 | 
71 |     @abstractmethod
72 |     def append_query(self, values):
73 |         """
74 |         Adds more data points to the query sequence.
75 |         Older data points will be dropped if the query would become larger than the foreseen capacity.
76 | 
77 |         :param values: 1D array, the new values to append to the query
78 |         :return: None
79 |         """


--------------------------------------------------------------------------------
/distancematrix/generator/euclidean.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from distancematrix.util import diag_length
  4 | from distancematrix.util import sliding_window_view
  5 | from distancematrix.ringbuffer import RingBuffer
  6 | from distancematrix.generator.abstract_generator import AbstractGenerator
  7 | from distancematrix.generator.abstract_generator import AbstractBoundStreamingGenerator
  8 | 
  9 | EPSILON = 1e-15
 10 | 
 11 | 
 12 | class Euclidean(AbstractGenerator):
 13 |     """
 14 |     Class capable of efficiently calculating parts of the euclidean distance matrix between two series,
 15 |     where each entry in the distance matrix equals the euclidean distance between 2 subsequences of both series.
 16 | 
 17 |     This generator can handle streaming data.
 18 |     """
 19 | 
 20 |     def __init__(self, rb_scale_factor=2.):
 21 |         """
 22 |         Creates a new instance.
 23 | 
 24 |         :param rb_scale_factor: scaling factor used for RingBuffers in case of streaming data (should be >= 1),
 25 |             this allows choosing a balance between less memory (low values) and reduced data copying (higher values)
 26 |         """
 27 |         if rb_scale_factor < 1.:
 28 |             raise ValueError("rb_scale_factor should be >= 1, it was: " + str(rb_scale_factor))
 29 | 
 30 |         self._rb_scale_factor = rb_scale_factor
 31 | 
 32 |     def prepare_streaming(self, m, series_window, query_window=None):
 33 |         series = RingBuffer(None, (series_window,), dtype=float, scaling_factor=self._rb_scale_factor)
 34 | 
 35 |         if query_window is not None:
 36 |             query = RingBuffer(None, (query_window,), dtype=float, scaling_factor=self._rb_scale_factor)
 37 |             self_join = False
 38 |         else:
 39 |             query = series
 40 |             self_join = True
 41 | 
 42 |         return BoundStreamingEuclidean(m, series, query, self_join)
 43 | 
 44 |     def prepare(self, m, series, query=None):
 45 |         if series.ndim != 1:
 46 |             raise RuntimeError("Series should be 1D")
 47 |         if query is not None and query.ndim != 1:
 48 |             raise RuntimeError("Query should be 1D")
 49 | 
 50 |         series = RingBuffer(series, dtype=float, scaling_factor=1)
 51 |         if query is not None:
 52 |             query = RingBuffer(query, dtype=float, scaling_factor=1)
 53 |             self_join = False
 54 |         else:
 55 |             query = series
 56 |             self_join = True
 57 |         return BoundStreamingEuclidean(m, series, query, self_join)
 58 | 
 59 | 
 60 | class BoundStreamingEuclidean(AbstractBoundStreamingGenerator):
 61 |     def __init__(self, m, series, query, self_join):
 62 |         self.m = m
 63 |         self.series = series
 64 |         self.query = query
 65 |         self.self_join = self_join
 66 | 
 67 |         self.first_row = None
 68 |         self.first_row_backlog = 0  # The number of values not yet processed for the first row cache
 69 |         self.prev_calc_column_index = None
 70 |         self.prev_calc_column_sq_dist = None
 71 | 
 72 |     def append_series(self, values):
 73 |         if len(values) == 0:
 74 |             return
 75 | 
 76 |         data_dropped = self.series.push(values)
 77 |         num_dropped = len(values) - (self.series.max_shape[0] - self.series.view.shape[0])
 78 |         self.first_row_backlog += len(values)
 79 | 
 80 |         if self.prev_calc_column_index is not None and num_dropped > 0:
 81 |             self.prev_calc_column_index -= num_dropped
 82 | 
 83 |         if self.self_join:
 84 |             if data_dropped:
 85 |                 self.first_row = None  # The first row was dropped by new data
 86 |             self.prev_calc_column_index = None
 87 | 
 88 |     def append_query(self, values):
 89 |         if self.self_join:
 90 |             raise RuntimeError("Cannot append query data in case of a self join.")
 91 | 
 92 |         if len(values) == 0:
 93 |             return
 94 | 
 95 |         if self.query.push(values):
 96 |             self.first_row = None  # The first row was dropped by new data
 97 |         self.prev_calc_column_index = None
 98 | 
 99 |     def calc_diagonal(self, diag):
100 |         dl = diag_length(len(self.query.view), len(self.series.view), diag)
101 |         cumsum = np.zeros(dl + 1, dtype=float)
102 | 
103 |         if diag >= 0:
104 |             # Eg: for diag = 2:
105 |             # D = (y0 - x2)², (y1 - x3)², (y2 - x4)²...
106 |             # cumsum = 0, D0, D0+D1, D0+D1+D2, ...
107 |             cumsum[1:] = np.cumsum(np.square(self.query[:dl] - self.series[diag: diag + dl]))
108 |         else:
109 |             # Eg: for diag = -2:
110 |             # D = (y2 - x0)², (y3 - x1)², (y4 - x2)²...
111 |             # cumsum = 0, D0, D0+D1, D0+D1+D2, ...
112 |             cumsum[1:] = np.cumsum(np.square(self.query[-diag: -diag + dl] - self.series[:dl]))
113 | 
114 |         return np.sqrt(cumsum[self.m:] - cumsum[:len(cumsum) - self.m])
115 | 
116 |     def calc_column(self, column):
117 |         if self.prev_calc_column_index != column - 1 or column == 0:
118 |             # Previous column not cached or data for incremental calculation not available: full calculation
119 |             sq_dist = _euclidean_distance_squared(self.query.view, self.series[column:column + self.m])
120 |         else:
121 |             # Previous column cached, reuse it
122 |             if self.first_row is None:
123 |                 self.first_row = RingBuffer(_euclidean_distance_squared(self.series.view, self.query[0: self.m]),
124 |                                             shape=(self.series.max_shape[0] - self.m + 1,))
125 |                 self.first_row_backlog = 0
126 |             elif self.first_row_backlog > 0:
127 |                 # Series has been updated since last calculation of first_row
128 |                 elems_to_recalc = self.first_row_backlog + self.m - 1
129 |                 self.first_row.push(_euclidean_distance_squared(self.series[-elems_to_recalc:], self.query[0: self.m]))
130 |                 self.first_row_backlog = 0
131 | 
132 |             sq_dist = self.prev_calc_column_sq_dist  # work in same array
133 |             sq_dist[1:] = (self.prev_calc_column_sq_dist[:-1]
134 |                            - np.square(self.series[column - 1] - self.query[:len(self.query.view)-self.m])
135 |                            + np.square(self.series[column + self.m - 1] - self.query[self.m:]))
136 |             sq_dist[0] = self.first_row[column]
137 | 
138 |         self.prev_calc_column_sq_dist = sq_dist
139 |         self.prev_calc_column_index = column
140 | 
141 |         return np.sqrt(sq_dist)
142 | 
143 | 
144 | def _euclidean_distance_squared(series, sequence):
145 |     """
146 |     Calculates the squared euclidean distance between the given sequence and each possible subsequence of the series
147 |     (using a sliding window of the same length as the sequence).
148 | 
149 |     :param series: 1D numpy array of length n
150 |     :param sequence: 1D numpy array of length m
151 |     :return: a 1D numpy array of length n-m+1 containing the squared euclidean distance
152 |     """
153 |     if series.ndim != 1:
154 |         raise RuntimeError("Series should be 1D")
155 |     if sequence.ndim != 1:
156 |         raise RuntimeError("Sequence should be 1D")
157 | 
158 |     m = len(sequence)
159 | 
160 |     sliding_view = sliding_window_view(series, [m])
161 | 
162 |     # (X - Y)^2 = X^2 - 2XY + Y^2
163 |     # Here, einsum is used to calculate dot products over sliding window to prevent memory copying.
164 |     # Using the normal euclidean distance calculation over the sliding window (x - y)^2 would result in copying
165 |     # each window, which leads to memory errors for long series.
166 |     dist = np.einsum('ij,ij->i', sliding_view, sliding_view)  # Dot product of every window with itself
167 |     dist -= 2 * np.einsum('ij,j->i', sliding_view, sequence)  # Dot product of every window with sequence
168 |     dist += np.dot(sequence, sequence)  # Dot product of sequence with itself
169 |     dist[dist < EPSILON] = 0  # Avoid very small negative numbers due to rounding
170 | 
171 |     # Simple implementation, this takes double as long to calculate as the einsum approach, though it contains
172 |     # no approximations. For very long series (100k when testing), suddenly takes 10 times as long, most likely
173 |     # due to cpu caching that cannot contain the entire series (could be circumvented by batching):
174 |     # num_sub_seq = len(series) - m + 1
175 |     # dist = np.zeros(num_sub_seq)
176 |     # for i in range(m):
177 |     #     dist += np.square(series[i:num_sub_seq + i] - sequence[i])
178 | 
179 |     return dist
180 | 


--------------------------------------------------------------------------------
/distancematrix/generator/filter_generator.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from distancematrix.util import sliding_window_view
  4 | from distancematrix.ringbuffer import RingBuffer
  5 | from distancematrix.generator.abstract_generator import AbstractGenerator
  6 | from distancematrix.generator.abstract_generator import AbstractBoundGenerator
  7 | from distancematrix.generator.abstract_generator import AbstractBoundStreamingGenerator
  8 | 
  9 | 
 10 | def is_not_finite(data, subseq_length):
 11 |     """
 12 |     Marks infinite or nan values as invalid.
 13 |     """
 14 |     return ~np.isfinite(data)
 15 | 
 16 | 
 17 | class FilterGenerator(AbstractGenerator):
 18 |     def __init__(self, generator, invalid_data_function=is_not_finite, rb_scale_factor=2.):
 19 |         """
 20 |         Creates a new generator by wrapping another generator.
 21 | 
 22 |         :param generator: the generator whose results and input data will be filtered
 23 |         :param invalid_data_function: a function that takes in the original data (series or query) and
 24 |            subsequence length and returns a boolean array of the same size that has a True value for any invalid values.
 25 |            These values will be replaced by zeros before reaching the wrapped generator. Any distance values
 26 |            that were calculated using invalid data points will be positive infinite values.
 27 |         :param invalid_subseq_function: optional - a function that takes in the original data (series or query) and
 28 |            subsequence length and returns a boolean array of size matching the number of subsequences that has
 29 |            a True value for any invalid subsequence. Invalid subsequences will have positive infinte values
 30 |            as distance.
 31 |         """
 32 |         if rb_scale_factor < 1.:
 33 |             raise ValueError("rb_scale_factor should be >= 1, it was: " + str(rb_scale_factor))
 34 | 
 35 |         self._rb_scale_factor = rb_scale_factor
 36 |         self._generator = generator
 37 |         self._invalid_data_function = invalid_data_function
 38 | 
 39 |     def prepare_streaming(self, m, series_window, query_window=None):
 40 |         gen = self._generator.prepare_streaming(m, series_window, query_window)
 41 | 
 42 |         num_s_subseq = series_window - m + 1
 43 |         if query_window is None:
 44 |             num_q_subseq = None
 45 |         else:
 46 |             num_q_subseq = query_window - m + 1
 47 | 
 48 |         return BoundStreamingFilterGenerator(gen, m, num_s_subseq, num_q_subseq,
 49 |                                              self._invalid_data_function, self._rb_scale_factor)
 50 | 
 51 |     def prepare(self, m, series, query=None):
 52 |         new_series, invalid_series_subseq = _correct_data_and_create_masks(series, m, self._invalid_data_function)
 53 | 
 54 |         if query is not None:
 55 |             new_query, invalid_query_subseq = _correct_data_and_create_masks(query, m, self._invalid_data_function)
 56 |             num_q_subseq = len(query) - m + 1
 57 |         else:
 58 |             new_query = None
 59 |             invalid_query_subseq = invalid_series_subseq
 60 |             num_q_subseq = len(series) - m + 1
 61 | 
 62 |         generator = self._generator.prepare(m, new_series, new_query)
 63 |         return BoundFilterGenerator(generator, m, num_q_subseq, invalid_series_subseq, invalid_query_subseq)
 64 | 
 65 | 
 66 | class BoundFilterGenerator(AbstractBoundGenerator):
 67 |     """
 68 |         Wrapper around other generators that will replace values in the distance matrix marked as invalid
 69 |         by positive infinity. It can also perform a data pre-processing step before data reaches the wrapped generator,
 70 |         by setting values marked as invalid to zero, this can be useful for example to remove nan values for a generator
 71 |         that does not support nan values.
 72 |         """
 73 | 
 74 |     def __init__(self, generator, m, num_q_subseq, invalid_series_subseq, invalid_query_subseq):
 75 |         """
 76 |         Creates a new generator by wrapping another generator.
 77 | 
 78 |         :param generator: the generator whose results and input data will be filtered
 79 |         :param invalid_data_function: optional - a function that takes in the original data (series or query) and
 80 |            subsequence length and returns a boolean array of the same size that has a True value for any invalid values.
 81 |            These values will be replaced by zeros before reaching the wrapped generator. Any distance values
 82 |            that were calculated using invalid data points will be positive infinite values.
 83 |         :param invalid_subseq_function: optional - a function that takes in the original data (series or query) and
 84 |            subsequence length and returns a boolean array of size matching the number of subsequences that has
 85 |            a True value for any invalid subsequence. Invalid subsequences will have positive infinte values
 86 |            as distance.
 87 |         """
 88 |         self.generator = generator
 89 | 
 90 |         self.m = m
 91 |         self.num_q_subseq = num_q_subseq
 92 | 
 93 |         self.invalid_series_subseq = invalid_series_subseq
 94 |         self.invalid_query_subseq = invalid_query_subseq
 95 | 
 96 |     def calc_diagonal(self, diag):
 97 |         distances = self.generator.calc_diagonal(diag)
 98 | 
 99 |         if diag >= 0:
100 |             if self.invalid_series_subseq is not None:
101 |                 distances[self.invalid_series_subseq[diag: diag+len(distances)]] = np.Inf
102 |             if self.invalid_query_subseq is not None:
103 |                 distances[self.invalid_query_subseq[:len(distances)]] = np.Inf
104 |         else:
105 |             if self.invalid_series_subseq is not None:
106 |                 distances[self.invalid_series_subseq[:len(distances)]] = np.Inf
107 |             if self.invalid_query_subseq is not None:
108 |                 distances[self.invalid_query_subseq[-diag: -diag+len(distances)]] = np.Inf
109 | 
110 |         return distances
111 | 
112 |     def calc_column(self, column):
113 |         if self.invalid_series_subseq is not None and self.invalid_series_subseq[column]:
114 |             return np.full(self.num_q_subseq, np.Inf)
115 | 
116 |         distances = self.generator.calc_column(column)
117 | 
118 |         if self.invalid_query_subseq is not None:
119 |             distances[self.invalid_query_subseq] = np.Inf
120 | 
121 |         return distances
122 | 
123 | 
124 | class BoundStreamingFilterGenerator(BoundFilterGenerator, AbstractBoundStreamingGenerator):
125 |     """
126 |     Wrapper around other generators that will replace values in the distance matrix marked as invalid
127 |     by positive infinity. It can also perform a data pre-processing step before data reaches the wrapped generator,
128 |     by setting values marked as invalid to zero, this can be useful for example to remove nan values for a generator
129 |     that does not support nan values.
130 |     """
131 | 
132 |     def __init__(self, generator, m, num_s_subseq, num_q_subseq, invalid_data_function, rb_scale_factor):
133 |         """
134 |         Creates a new generator by wrapping another generator.
135 | 
136 |         :param generator: the generator whose results and input data will be filtered
137 |         :param invalid_data_function: optional - a function that takes in the original data (series or query) and
138 |            subsequence length and returns a boolean array of the same size that has a True value for any invalid values.
139 |            These values will be replaced by zeros before reaching the wrapped generator. Any distance values
140 |            that were calculated using invalid data points will be positive infinite values.
141 |         """
142 | 
143 |         self._invalid_data_function = invalid_data_function
144 | 
145 |         invalid_s_subseq_buffer = RingBuffer(None, shape=(num_s_subseq,),
146 |                                              dtype=bool, scaling_factor=rb_scale_factor)
147 | 
148 |         self.invalid_series = RingBuffer(None, shape=(num_s_subseq + m - 1,),
149 |                                          dtype=bool, scaling_factor=rb_scale_factor)
150 | 
151 |         if num_q_subseq is None:
152 |             self.self_join = True
153 |             invalid_q_subseq_buffer = invalid_s_subseq_buffer
154 |             num_q_subseq = num_s_subseq
155 |             self.invalid_query = self.invalid_series
156 |         else:
157 |             self.self_join = False
158 | 
159 |             invalid_q_subseq_buffer = RingBuffer(None, shape=(num_q_subseq,),
160 |                                                  dtype=bool, scaling_factor=rb_scale_factor)
161 |             self.invalid_query = RingBuffer(None, shape=(num_q_subseq + m - 1,),
162 |                                             dtype=bool, scaling_factor=rb_scale_factor)
163 | 
164 |         super().__init__(generator, m, num_q_subseq, invalid_s_subseq_buffer, invalid_q_subseq_buffer)
165 | 
166 |     def append_series(self, values):
167 |         invalid_points = _apply_data_validation(values, self.m, self._invalid_data_function)
168 |         self.invalid_series.push(invalid_points)
169 | 
170 |         if np.any(invalid_points):
171 |             values = values.copy()
172 |             values[invalid_points] = 0
173 | 
174 |         if len(self.invalid_series.view) >= self.m:
175 |             rel_values = self.invalid_series[-(len(values) + self.m - 1):]
176 |             self.invalid_series_subseq.push(np.any(sliding_window_view(rel_values, (self.m,)), axis=-1))
177 | 
178 |         self.generator.append_series(values)
179 | 
180 |     def append_query(self, values):
181 |         if self.self_join:
182 |             raise RuntimeError("Cannot append to query for a self-join.")
183 | 
184 |         invalid_points = _apply_data_validation(values, self.m, self._invalid_data_function)
185 |         self.invalid_query.push(invalid_points)
186 | 
187 |         if np.any(invalid_points):
188 |             values = values.copy()
189 |             values[invalid_points] = 0
190 | 
191 |         if len(self.invalid_query.view) >= self.m:
192 |             rel_values = self.invalid_query[-(len(values) + self.m - 1):]
193 |             self.invalid_query_subseq.push(np.any(sliding_window_view(rel_values, (self.m,)), axis=-1))
194 | 
195 |         self.generator.append_query(values)
196 | 
197 |     def calc_column(self, column):
198 |         if self.invalid_series_subseq[column]:
199 |             return np.full(len(self.invalid_query_subseq.view), np.Inf)
200 | 
201 |         distances = self.generator.calc_column(column)
202 |         distances[self.invalid_query_subseq.view] = np.Inf
203 | 
204 |         return distances
205 | 
206 | 
207 | def _apply_data_validation(data, m, invalid_data_function):
208 |     """
209 |     Returns a boolean array of the same size as data.
210 | 
211 |     :param data:
212 |     :param m:
213 |     :param invalid_data_function:
214 |     :return:
215 |     """
216 |     invalid_data = invalid_data_function(data, m)
217 |     if invalid_data.shape != data.shape:
218 |         raise RuntimeError("Invalid_data_function's output does not have expected dimension.")
219 | 
220 |     return invalid_data
221 | 
222 | 
223 | def _correct_data_and_create_masks(data, m, invalid_data_function):
224 |     """
225 |     Runs invalid_data_function and invalid_subseq_function, if they are defined.
226 |     Any invalid data points are set to zero value and returned in a copied array.
227 |     A boolean array is created to mark all invalid subsequence indices (= True values).
228 | 
229 |     :param data: 1D-array
230 |     :param m: subsequence length
231 |     :return: tuple of: data or a modified copy of data; None or a boolean 1D array containing at least 1 True
232 |       (= invalid subsequence) value
233 |     """
234 |     invalid_data = invalid_data_function(data, m)
235 |     if invalid_data.shape != data.shape:
236 |         raise RuntimeError("Invalid_data_function's output does not have expected dimension.")
237 | 
238 | 
239 |     # invalid_data = invalid_data and np.any(invalid_data)
240 |     # invalid_subseq = invalid_subseq and np.any(invalid_subseq)
241 | 
242 |     new_data = data
243 |     invalid_mask = None
244 |     if invalid_data is not None:
245 |         new_data = data.copy()
246 |         new_data[invalid_data] = 0
247 |         invalid_mask = _invalid_data_to_invalid_subseq(invalid_data, m)
248 | 
249 |     return new_data, invalid_mask
250 | 
251 | def _invalid_data_to_invalid_subseq(invalid_data, subseq_length):
252 |     """
253 |     Converts a boolean array marking invalid data points to a boolean array marking invalid subsequences.
254 |     (A subsequence is invalid if it contained any invalid data point.)
255 | 
256 |     :param invalid_data: 1D array of booleans, True indicating invalid data points
257 |     :param subseq_length: subsequence length
258 |     :return: 1D boolean array of length num-subsequences
259 |     """
260 |     data_length = invalid_data.shape[0]
261 |     result = np.zeros(data_length - subseq_length + 1, dtype=bool)
262 | 
263 |     impacted = 0
264 |     for i in range(0, subseq_length - 1):
265 |         if invalid_data[i]:
266 |             impacted = subseq_length
267 |         if impacted:
268 |             impacted -= 1
269 | 
270 |     for i in range(subseq_length-1, data_length):
271 |         if invalid_data[i]:
272 |             impacted = subseq_length
273 |         if impacted:
274 |             result[i - subseq_length + 1] = True
275 |             impacted -= 1
276 | 
277 |     return result
278 | 


--------------------------------------------------------------------------------
/distancematrix/insights.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def lowest_value_idxs(array, exclude_distance):
 5 |     """
 6 |     Creates a generator that returns the indices of the lowest elements, where each index differs by at least
 7 |     exclude_distance from every previously returned index. Non-finite values are ignored.
 8 | 
 9 |     :param array: 1D array
10 |     :param exclude_distance: a positive integer
11 |     :return: a generator
12 |     """
13 |     if not array.ndim == 1:
14 |         raise RuntimeError("Array should be 1-dimensional.")
15 |     if type(exclude_distance) is not int or exclude_distance < 0:
16 |         raise RuntimeError('Exclude distance should be positive integer.')
17 | 
18 |     array = array.astype(float, copy=True)
19 |     array[~np.isfinite(array)] = np.inf
20 | 
21 |     min_idx = np.argmin(array)
22 | 
23 |     while array[min_idx] != np.inf:
24 |         yield min_idx
25 | 
26 |         array[max(0, min_idx - exclude_distance): min_idx + exclude_distance + 1] = np.inf
27 |         min_idx = np.argmin(array)
28 | 
29 |     return
30 | 
31 | 
32 | def highest_value_idxs(array, exclude_distance):
33 |     """
34 |     Creates a generator that returns the indices of the highest elements, where each index differs by at least
35 |     exclude_distance from every previously returned index. Non-finite values are ignored.
36 | 
37 |     :param array: 1D array
38 |     :param exclude_distance: a positive integer
39 |     :return: a generator
40 |     """
41 |     return lowest_value_idxs(-array, exclude_distance)
42 | 


--------------------------------------------------------------------------------
/distancematrix/interrupt_util.py:
--------------------------------------------------------------------------------
 1 | import signal
 2 | from contextlib import contextmanager
 3 | 
 4 | 
 5 | @contextmanager
 6 | def interrupt_catcher():
 7 |     """
 8 |     A context that allows for gracefully terminating a calculation by catching interrupts
 9 |     and providing a method to check whether an interrupt has occurred.
10 | 
11 |     :return: None
12 |     """
13 |     interrupted = False
14 | 
15 |     def set_interrupted(signum, frame):
16 |         nonlocal interrupted
17 |         interrupted = True
18 | 
19 |     def is_interrupted():
20 |         nonlocal interrupted
21 |         return interrupted
22 | 
23 |     # Replace the current interrupt handler
24 |     original_sigint_handler = signal.getsignal(signal.SIGINT)
25 |     signal.signal(signal.SIGINT, set_interrupted)
26 | 
27 |     try:
28 |         yield is_interrupted
29 |     finally:
30 |         # Restore original interrupt handler
31 |         signal.signal(signal.SIGINT, original_sigint_handler)
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     import time
36 |     for i in range(5):
37 |         print(i)
38 |         time.sleep(1)
39 | 
40 |     print("-- Interrupts will now simply halt the loop --")
41 |     with interrupt_catcher() as is_interrupted:
42 |         for i in range(5):
43 |             if is_interrupted():
44 |                 break
45 |             print(i)
46 |             time.sleep(1)
47 |     print("-- Interrupts are back to normal --")
48 |     for i in range(5):
49 |         print(i)
50 |         time.sleep(1)


--------------------------------------------------------------------------------
/distancematrix/math_tricks.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from distancematrix.ringbuffer import RingBuffer
  3 | from distancematrix.util import sliding_window_view
  4 | 
  5 | 
  6 | def sliding_mean_std(series, m):
  7 |     """
  8 |     Calculates the sliding mean and standard deviation over the series using a window of size m.
  9 |     The series should only contain finite values.
 10 | 
 11 |     :param series: 1D numpy array
 12 |     :param m: sliding window size
 13 |     :return: tuple of 2 arrays, each of size (len(series) - m + 1)
 14 |     """
 15 |     if m <= 0 or not isinstance(m, int):
 16 |         raise RuntimeError('m should be an integer > 0.')
 17 | 
 18 |     if series.ndim != 1:
 19 |         raise RuntimeError('series should be one dimensional')
 20 | 
 21 |     if not np.isfinite(series).all():
 22 |         raise RuntimeError('Provided series contains nan or infinite values.')
 23 | 
 24 |     sliding_view = sliding_window_view(series, [m])
 25 |     return np.mean(sliding_view, axis=1), np.std(sliding_view, axis=1)
 26 | 
 27 | 
 28 | def sliding_mean_var(series, m):
 29 |     """
 30 |     Calculates the sliding mean and variance over the series using a window of size m.
 31 |     The series should only contain finite values.
 32 | 
 33 |     :param series: 1D numpy array
 34 |     :param m: sliding window size
 35 |     :return: tuple of 2 arrays, each of size (len(series) - m + 1)
 36 |     """
 37 |     if m <= 0 or not isinstance(m, int):
 38 |         raise RuntimeError('m should be an integer > 0.')
 39 | 
 40 |     if series.ndim != 1:
 41 |         raise RuntimeError('series should be one dimensional')
 42 | 
 43 |     if not np.isfinite(series).all():
 44 |         raise RuntimeError('Provided series contains nan or infinite values.')
 45 | 
 46 |     sliding_view = sliding_window_view(series, [m])
 47 |     return np.mean(sliding_view, axis=1), np.var(sliding_view, axis=1)
 48 | 
 49 | 
 50 | class StreamingStats(object):
 51 |     """
 52 |     Class that tracks a data stream and corresponding mean and standard deviation of a window over this data.
 53 | 
 54 |     The data stream has to be updated by the user, after which the mean/std stream will be updated automatically.
 55 | 
 56 |     This class uses RingBuffers internally, so any old view (data, mean, std) should be considered unreliable
 57 |     after new data was pushed to this class.
 58 |     """
 59 | 
 60 |     def __init__(self, series, m) -> None:
 61 |         """
 62 |         Creates a new instance. This instance will keep track of a data stream (with dimensions matching those of
 63 |         series) and a stream of moving mean and standard deviation using a window of length m.
 64 | 
 65 |         :param series: Starting data of the data stream
 66 |         :param m: window size for mean and variance
 67 |         """
 68 |         if m > series.shape[-1]:
 69 |             raise RuntimeError("M should be <= series.shape[-1].")
 70 | 
 71 |         self._data_buffer = RingBuffer(series)
 72 |         self._m = m
 73 | 
 74 |         sliding_avg, sliding_std = sliding_mean_std(series, m)
 75 |         self._mean_buffer = RingBuffer(sliding_avg)
 76 |         self._std_buffer = RingBuffer(sliding_std)
 77 | 
 78 |     def append(self, data):
 79 |         data_length = data.shape[-1]
 80 | 
 81 |         if data_length == 0:
 82 |             return
 83 | 
 84 |         self._data_buffer.push(data)
 85 |         new_means, new_stds = sliding_mean_std(self._data_buffer[max(-self._m - 1 - data_length, 0):], self._m)
 86 |         self._mean_buffer.push(new_means)
 87 |         self._std_buffer.push(new_stds)
 88 | 
 89 |         # Original implementation below, this approach might still be interesting if the current approach proves to be
 90 |         # too slow in practice. One issue that remains to be solved (why this method was replaced) is that
 91 |         # a mid-signal constant window will not result in variance of 0. One approach might be to simply check
 92 |         # for constant signals. A starting point might be:
 93 |         # https://stackoverflow.com/questions/1066758/find-length-of-sequences-of-identical-values-in-a-numpy-array-run-length-encodi?rq=1
 94 |         # The numerical stability test gives a use case where this method fails.
 95 |         #
 96 |         # buffer_length = self._data_buffer.view.shape[-1]
 97 |         # if data_length >= buffer_length:
 98 |         #     sliding_avg, sliding_var = sliding_mean_var(data[..., -buffer_length:], self._m)
 99 |         #     self._mean_buffer.push(sliding_avg)
100 |         #     self._var_buffer.push(sliding_var)
101 |         # else:
102 |         #     # Sliding variance formula: http://jonisalonen.com/2014/efficient-and-accurate-rolling-standard-deviation/
103 |         #     # First steps of derivation: http://jonisalonen.com/2013/deriving-welfords-method-for-computing-variance/
104 |         #     # (For non-online calculation, the formula used in sliding_mean_var is faster)
105 |         #
106 |         #     old_mean = self._mean_buffer.view[..., -1]
107 |         #     old_var = self._var_buffer.view[..., -1]
108 |         #     values_to_remove = self._data_buffer.view[..., -self._m: min(-1, -self._m + data_length)]
109 |         #     values_to_add = data[..., :values_to_remove.shape[-1]]
110 |         #     new_means = old_mean + np.cumsum(- values_to_remove + values_to_add) / self._m
111 |         #     old_means = np.concatenate((np.atleast_1d(old_mean), new_means[..., :-1]))
112 |         #     new_vars = old_var + np.cumsum((values_to_add - values_to_remove) * (
113 |         #        values_to_add - new_means + values_to_remove - old_means) / self._m)
114 |         #     new_vars[new_vars < 1e-12] = 0.  # Unreliable!
115 |         #
116 |         #     self._mean_buffer.push(new_means)
117 |         #     self._var_buffer.push(new_vars)
118 |         #
119 |         #     if data_length >= self._m:
120 |         #         sliding_avg, sliding_var = sliding_mean_var(data, self._m)
121 |         #         self._mean_buffer.push(sliding_avg)
122 |         #         self._var_buffer.push(sliding_var)
123 |         #
124 |         # self._data_buffer.push(data)
125 | 
126 |     @property
127 |     def data(self):
128 |         return self._data_buffer.view
129 | 
130 |     @property
131 |     def mean(self):
132 |         return self._mean_buffer.view
133 | 
134 |     @property
135 |     def std(self):
136 |         return self._std_buffer.view
137 | 


--------------------------------------------------------------------------------
/distancematrix/ostinato.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple
  2 | import numpy as np
  3 | 
  4 | from distancematrix import AnytimeCalculator
  5 | from distancematrix.generator import ZNormEuclidean
  6 | from distancematrix.generator.znorm_euclidean import BoundZNormEuclidean, _CONSTANT_SUBSEQ_THRESHOLD
  7 | from distancematrix.consumer import MatrixProfileLR
  8 | from distancematrix.math_tricks import sliding_mean_std
  9 | from distancematrix.ringbuffer import RingBuffer
 10 | 
 11 | CMResult = namedtuple('CMResult', ['radius', 'series_index', 'subseq_index'])
 12 | 
 13 | 
 14 | def find_consensus_motif(series_list, m: int) -> CMResult:
 15 |     """
 16 |     Finds the top-1 consensus motif and corresponding distance for the given collection of series.
 17 |     The consensus motif is the subsequence (extracted from one of the series),
 18 |     that has a match to a subsequence from each other series within a certain distance,
 19 |     where that distance is minimal.
 20 | 
 21 |     This method implements the Ostinato algorithm, described in
 22 |     "Matrix Profile XV: Exploiting Time Series Consensus  Motifs to Find Structure in Time Series Sets"
 23 |     by K. Kamgar, S. Gharghabi and E. Keogh.
 24 | 
 25 |     :param series_list: list of 1-dimensional arrays
 26 |     :param m: length of the consensus motif
 27 |     :return: tuple containing radius, series index and subsequence index of the consensus motif
 28 |     """
 29 |     if len(series_list) < 2:
 30 |         raise RuntimeError("At least 2 series are required.")
 31 |     if m < 3:
 32 |         raise RuntimeError("Motif length should be >= 3.")
 33 |     for series in series_list:
 34 |         series = np.array(series)
 35 |         if len(series) < m:
 36 |             raise RuntimeError("One or more series are shorter than the desired motif length.")
 37 |         if series.ndim != 1:
 38 |             raise RuntimeError("One or more series are not one dimensional.")
 39 | 
 40 |     best_result = CMResult(np.inf, -1, -1)
 41 |     num_series = len(series_list)
 42 | 
 43 |     # Create a distance calculator for each series pair, but reuse mu/std calculations per series.
 44 |     # Step 1: mu/std calculation
 45 |     cached_generators = {}
 46 |     mus = []
 47 |     stds = []
 48 |     stdsz = []
 49 |     for series in series_list:
 50 |         mu, std = sliding_mean_std(series, m)
 51 |         mus.append(RingBuffer(mu, scaling_factor=1.))
 52 |         stds.append(RingBuffer(std, scaling_factor=1.))
 53 |         stdsz.append(RingBuffer(std > _CONSTANT_SUBSEQ_THRESHOLD, scaling_factor=1.))
 54 | 
 55 |     # Step 2: create the distance calculator
 56 |     for i, series1 in enumerate(series_list):
 57 |         for j, series2 in enumerate(series_list):
 58 |             if i == j:
 59 |                 continue
 60 |             gen = BoundZNormEuclidean(m, RingBuffer(series1, scaling_factor=1.), RingBuffer(series2, scaling_factor=1.),
 61 |                                       False, 0., mus[i], stds[i], stdsz[i], mus[j], stds[j], stdsz[j])
 62 |             cached_generators[i, j] = gen
 63 | 
 64 |     # Look for the consensus motif: iterator over all series
 65 |     for series_idx in range(num_series):
 66 |         next_series_idx = (series_idx + 1) % num_series
 67 |         active_series = series_list[series_idx]
 68 | 
 69 |         # Calculate a full matrix profile between the series and the next series
 70 |         dist_calc = cached_generators[(series_idx, next_series_idx)]
 71 |         num_subseq = len(active_series) - m + 1
 72 |         mp = np.empty(num_subseq, dtype=float)
 73 |         for col in range(num_subseq):
 74 |             mp[col] = np.min(dist_calc.calc_column(col))
 75 | 
 76 |         # Order the subsequences of the series from lowest to highest distances (as given by the Matrix Profile)
 77 |         candidates = np.argsort(mp)
 78 | 
 79 |         # Iterate over all candidate subsequences, starting from those that had the best match to next_series.
 80 |         for subseq_idx in candidates:
 81 |             candidate_radius = mp[subseq_idx]
 82 |             aborted = False
 83 | 
 84 |             # Abort if the distance (to next_series) is worse than best result so far
 85 |             if candidate_radius >= best_result.radius:
 86 |                 break
 87 | 
 88 |             # Check distance of the candidate subsequence to all other series.
 89 |             for other_series_idx in range(num_series):
 90 |                 # Skip the current and next_series, as we already considered those.
 91 |                 if other_series_idx in [series_idx, next_series_idx]:
 92 |                     continue
 93 | 
 94 |                 # Calculates the distance from the candidate subsequence to all subsequences in other_series.
 95 |                 other_gen = cached_generators[(series_idx, other_series_idx)]
 96 |                 distances = other_gen.calc_column(subseq_idx)
 97 |                 min_distance = np.min(distances)
 98 |                 candidate_radius = max(candidate_radius, min_distance)
 99 | 
100 |                 # Abort search if distance is greater than best so far.
101 |                 if candidate_radius >= best_result.radius:
102 |                     aborted = True
103 |                     break
104 | 
105 |             # Store the current candidate as best result so far.
106 |             if not aborted and candidate_radius < best_result.radius:
107 |                 best_result = CMResult(candidate_radius, series_idx, subseq_idx)
108 | 
109 |     return best_result
110 | 
111 | 
112 | def find_consensus_motif_subset(series_list, m: int, k: int) -> CMResult:
113 |     """
114 |     Finds the top-1 k of n consensus motif and corresponding distance for the given collection of series.
115 |     The consensus motif is the subsequence (extracted from one of the series),
116 |     that has a match to a subsequence from k other series within a certain distance,
117 |     where that distance is minimal.
118 | 
119 |     This method implements the k of n Ostinato algorithm, described in
120 |     "Matrix Profile XV: Exploiting Time Series Consensus  Motifs to Find Structure in Time Series Sets"
121 |     by K. Kamgar, S. Gharghabi and E. Keogh.
122 | 
123 |     Note: this algorithm has not yet been optimized for speed.
124 |     (Instead, consider using the Anytime Ostinato algorithm.)
125 | 
126 |     :param series_list: list of 1-dimensional arrays
127 |     :param m: length of the consensus motif
128 |     :return: tuple containing radius, series index and subsequence index of the consensus motif
129 |     """
130 |     if len(series_list) < 2:
131 |         raise RuntimeError("At least 2 series are required.")
132 |     if m < 3:
133 |         raise RuntimeError("Motif length should be >= 3.")
134 |     if k < 2 or k > len(series_list):
135 |         raise RuntimeError("Number of considered series should be >= 2 and <= len(series).")
136 |     for series in series_list:
137 |         series = np.array(series)
138 |         if len(series) < m:
139 |             raise RuntimeError("One or more series are shorter than the desired motif length.")
140 |         if series.ndim != 1:
141 |             raise RuntimeError("One or more series are not one dimensional.")
142 | 
143 |     best_result = CMResult(np.inf, -1, -1)
144 |     num_series = len(series_list)
145 |     num_ignored_series = num_series - k
146 | 
147 |     # Using streaming generators avoids having to recalculate the means/stds for calculating
148 |     # distance between the series and a single subsequence
149 |     cached_generators = []
150 |     for series in series_list:
151 |         gen = ZNormEuclidean().prepare_streaming(m, m, len(series))
152 |         gen.append_query(series)
153 |         cached_generators.append(gen)
154 | 
155 |     for series_idx in range(num_series):
156 |         active_series = series_list[series_idx]
157 |         num_subseqs = len(active_series) - m + 1
158 | 
159 |         # Calculate for each subsequence in active_series the best match to the next
160 |         # (num_ignored_series + 1) series
161 |         next_mps = np.empty((num_ignored_series + 1, num_subseqs))
162 |         for i in range(num_ignored_series + 1):
163 |             next_series_idx = (series_idx + 1 + i) % num_series
164 |             next_series = series_list[next_series_idx]
165 |             next_mps[i, :] = _calculate_mp(m, active_series, next_series)
166 | 
167 |         candidates = np.argsort(np.min(next_mps, axis=0))
168 | 
169 |         # Iterate over all candidate subsequences, starting from those that had the best match to any next_series.
170 |         for subseq_idx in candidates:
171 |             aborted = False
172 | 
173 |             # We track the (num_ignored_series + 1) biggest radii found,
174 |             # where only the smallest value determines the actual radius for the subsequence
175 |             # (since we can ignore the other values).
176 |             candidate_radii: np.ndarray = next_mps[:, subseq_idx].copy()
177 | 
178 |             # Iterate over all other, not yet calculated, series
179 |             for j in range(num_series - num_ignored_series - 2):
180 |                 other_series_idx = (series_idx + num_ignored_series + 2) % num_series
181 | 
182 |                 candidate_radii.sort()
183 |                 if candidate_radii[0] >= best_result.radius:
184 |                     aborted = True
185 |                     break
186 | 
187 |                 # Calculates the distance from the candidate subsequence to all subsequences in other_series.
188 |                 other_gen = cached_generators[other_series_idx]
189 |                 other_gen.append_series(active_series[subseq_idx: subseq_idx + m])
190 |                 min_distance = np.min(other_gen.calc_column(0))
191 |                 candidate_radii[0] = max(candidate_radii[0], min_distance)
192 | 
193 |             if not aborted:
194 |                 best_radius = np.min(candidate_radii)
195 |                 if best_radius < best_result.radius:
196 |                     best_result = CMResult(best_radius, series_idx, subseq_idx)
197 | 
198 |     return best_result
199 | 
200 | 
201 | def _calculate_mp(m, series, query) -> np.array:
202 |     """Calculates the z-norm-based Matrix Profile."""
203 | 
204 |     # Todo: MP_LR will have unneeded overhead, change to lightweight MP (only MP, no idx, no left/right)
205 |     calc = AnytimeCalculator(m, series, query)
206 |     calc.add_generator(0, ZNormEuclidean())
207 |     cons = calc.add_consumer([0], MatrixProfileLR())
208 |     calc.calculate_columns()
209 |     return cons.matrix_profile()
210 | 
211 | 
212 | class _MPReverse(MatrixProfileLR):
213 |     def __init__(self):
214 |         super().__init__()
215 | 
216 |     def initialise(self, dims, query_subseq, series_subseq):
217 |         super().initialise(dims, series_subseq, query_subseq)
218 | 
219 |     def process_diagonal(self, diag, values):
220 |         super().process_diagonal(-diag, values)
221 | 
222 | 
223 | class OstinatoAnytime(object):
224 |     """
225 |     Implementation of the Anytime Ostinato algorithm, which can be used to find the radius profile
226 |     for a collection of series. Since it is an anytime algorithm, the user can choose between more accurate results
227 |     or a shorter runtime.
228 | 
229 |     The radius profile contains for each subsequence the minimum distance needed to match a subsequence
230 |     from all other series.
231 |     Given the radius profile, the top-k minimal values correspond to the top-k consensus motifs.
232 | 
233 |     This algorithm is described in
234 |     "Mining Recurring Patterns in Real-Valued Time Series using the Radius Profile"
235 |     by D. De Paepe and S. Van Hoecke.
236 |     """
237 |     def __init__(self, series, m: int) -> None:
238 |         """
239 |         Creates a new instance that can be used to find the radius profile for the given series.
240 | 
241 |         :param series: the series for which to calculate the radius profile, a list of 1-D series
242 |         :param m: subsequence length
243 |         """
244 |         num_series = len(series)
245 | 
246 |         self.calculators = []
247 |         self.mps = [[] for i in range(num_series)]
248 | 
249 |         for i in range(num_series):
250 |             for j in range(i + 1, num_series):
251 |                 calc = AnytimeCalculator(m, series[j], series[i])
252 |                 calc.add_generator(0, ZNormEuclidean())
253 | 
254 |                 self.mps[j].append(calc.add_consumer([0], MatrixProfileLR()))
255 |                 self.mps[i].append(calc.add_consumer([0], _MPReverse()))
256 |                 self.calculators.append(calc)
257 | 
258 |     def calculate(self, fraction: float):
259 |         """
260 |         Calculates a given fraction of all distances.
261 | 
262 |         Experiments show that even for low fractions, the resulting radius profile will give representative
263 |         approximate results. The runtime of this method scales linear with the fraction.
264 | 
265 |         :param fraction: fraction of values to calculate, value in [0 .. 1]
266 |         """
267 |         for calc in self.calculators:
268 |             calc.calculate_diagonals(fraction)
269 | 
270 |     def get_radii(self, k_best: int = None):
271 |         """
272 |         Retrieves the radius profile for each series.
273 |         If the calculation was not performed completely, the returned profiles will overestimate the real
274 |         radius profile.
275 | 
276 |         :param k_best: If specified, calculates the radius using only the k_best best matching series
277 |          (instead of all series)
278 |         """
279 |         radii = []
280 | 
281 |         for serie_consumers in self.mps:
282 |             serie_mps = [cons.matrix_profile() for cons in serie_consumers]
283 | 
284 |             if k_best is None:
285 |                 radii.append(np.max(serie_mps, axis=0))
286 |             else:
287 |                 radii.append(np.sort(serie_mps, axis=0)[k_best-1, :])
288 | 
289 |         return radii
290 | 


--------------------------------------------------------------------------------
/distancematrix/ringbuffer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from math import ceil
  3 | 
  4 | 
  5 | class RingBuffer(object):
  6 |     """
  7 |     A data structure that represents a sliding window over a data stream. Data can be pushed onto the buffer,
  8 |     thereby discarding the oldest data. The buffer is not resizable.
  9 | 
 10 |     Data is pushed onto the last dimension (in case of multidimensional data).
 11 | 
 12 |     Users should always reference the buffer instance, not the buffer view, as the view will be replaced
 13 |     as data is pushed onto the buffer. For user comfort, indexing and slicing on the buffer instance will
 14 |     immediately access the buffer view.
 15 |     """
 16 | 
 17 |     def __init__(self, data, shape=None, dtype=None, scaling_factor=2.) -> None:
 18 |         """
 19 |         Creates a new RingBuffer.
 20 | 
 21 |         :param data: data to initialize the buffer, data may be smaller or larger than shape, may be None to
 22 |             initialize an empty buffer
 23 |         :param shape: the shape of the buffer, if None, uses the shape of data
 24 |         :param dtype: the datatype for the buffer, if None, uses the dtype of data
 25 |         :param scaling_factor: determines internal buffer size (window size x scaling_factor)
 26 |         """
 27 |         super().__init__()
 28 | 
 29 |         if data is None and shape is None:
 30 |             raise RuntimeError("Data and shape may not both be None.")
 31 | 
 32 |         if data is None and dtype is None:
 33 |             raise RuntimeError("Data and dtype may not both be None.")
 34 | 
 35 |         if data is not None:
 36 |             data = np.asarray(data)
 37 | 
 38 |         if not shape:
 39 |             shape = list(data.shape)
 40 |         if not dtype:
 41 |             dtype = data.dtype
 42 | 
 43 |         self.max_shape = tuple(shape)
 44 |         self._view_start = 0  # Where view of the buffer starts
 45 |         self._view_max_length = shape[-1]  # Max length (last dimension) of the exposed view
 46 |         self._view_length = 0  # Current length of the exposed view
 47 | 
 48 |         buffer_shape = list(shape)
 49 |         buffer_shape[-1] = ceil(scaling_factor * shape[-1])
 50 |         self._buffer = np.empty(buffer_shape, dtype)
 51 | 
 52 |         self.view = self._buffer[..., self._view_start: self._view_start + self._view_length]
 53 |         if data is not None:
 54 |             self.push(data)
 55 | 
 56 |     def push(self, data) -> int:
 57 |         """
 58 |         Appends the given data to the buffer, discarding the oldest values.
 59 |         Data is appended to the last dimension of the data window.
 60 | 
 61 |         :param data: the data to append, all dimensions except the last should match those of the window
 62 |         :return: The number of data points (per dimension) dropped from the sliding window by this operation
 63 |         """
 64 |         data = np.atleast_1d(data)
 65 |         if not data.shape[:-1] == self._buffer.shape[:-1]:
 66 |             raise RuntimeError("Data shape does not match buffer size.")
 67 | 
 68 |         data_len = data.shape[-1]
 69 | 
 70 |         if data_len == 0:
 71 |             return 0
 72 | 
 73 |         # If the view does not has its target capacity, first fill until it does
 74 |         if self._view_length < self._view_max_length:
 75 |             delta = min(data_len, self._view_max_length - self._view_length)
 76 |             self._buffer[..., self._view_length: self._view_length+delta] = data[..., :delta]
 77 |             self._view_length += delta
 78 |             self.view = self._buffer[..., :self._view_length]
 79 | 
 80 |             if data_len == delta:
 81 |                 return 0
 82 | 
 83 |             # The buffer (its view) is now filled, continue the normal flow to process the remaining data.
 84 |             data = data[..., delta:]
 85 |             data_len = data.shape[-1]
 86 | 
 87 |         # The view is at target capacity at this point, we will start "dropping" data.
 88 | 
 89 |         # The data fits in the remaining pre-allocated memory
 90 |         if self._view_start + self._view_max_length + data_len <= self._buffer.shape[-1]:
 91 |             self._view_start += data_len
 92 |             self.view = self._buffer[..., self._view_start:self._view_start + self._view_max_length]
 93 |             self.view[..., -data_len:] = data
 94 | 
 95 |         # The data does not fit in the remaining memory, but is less than the view capacity:
 96 |         # we reset the view, copy enough old data to fill to capacity, and append the new data
 97 |         elif data_len < self._view_max_length:
 98 |             mem_len = self._view_max_length - data_len
 99 |             self._buffer[..., :mem_len] = \
100 |                 self._buffer[..., self._view_start+data_len:self._view_start+self._view_max_length]
101 |             self._buffer[..., mem_len:self._view_max_length] = data
102 |             self._view_start = 0
103 |             self.view = self._buffer[..., self._view_start:self._view_start + self._view_max_length]
104 | 
105 |         # The data does not fit in the remaining memory, and can (over)fill the view capacity:
106 |         # we reset the view and copy a part of the new data equal to the view capacity.
107 |         else:
108 |             self._buffer[..., :self._view_max_length] = data[..., -self._view_max_length:]
109 |             self._view_start = 0
110 |             self.view = self._buffer[..., self._view_start:self._view_start + self._view_max_length]
111 | 
112 |         return data_len
113 | 
114 |     def __setitem__(self, key, value):
115 |         self.view.__setitem__(key, value)
116 | 
117 |     def __getitem__(self, key):
118 |         return self.view.__getitem__(key)
119 | 
120 |     def __delitem__(self, key):
121 |         self.view.__delitem__(key)


--------------------------------------------------------------------------------
/distancematrix/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/predict-idlab/seriesdistancematrix/c0e666d036f24184511e766cee9fdfa55f41df97/distancematrix/tests/__init__.py


--------------------------------------------------------------------------------
/distancematrix/tests/consumer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/predict-idlab/seriesdistancematrix/c0e666d036f24184511e766cee9fdfa55f41df97/distancematrix/tests/consumer/__init__.py


--------------------------------------------------------------------------------
/distancematrix/tests/consumer/test_contextmanager.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | import numpy.testing as npt
 3 | from itertools import zip_longest
 4 | 
 5 | from distancematrix.consumer.contextmanager import GeneralStaticManager
 6 | 
 7 | 
 8 | class TestGeneralStaticManager(TestCase):
 9 |     def test_does_not_return_empty_contexts(self):
10 |         r = [range(1, 5), range(0, 0), range(5, 10)]
11 |         m = GeneralStaticManager(r)
12 | 
13 |         _assert_equal_iteration(m.series_contexts(0, 1), [])
14 |         _assert_equal_iteration(m.series_contexts(0, 4), [(1, 5, 0)])
15 |         _assert_equal_iteration(m.series_contexts(0, 8), [(1, 5, 0), (5, 10, 2)])
16 |         _assert_equal_iteration(m.series_contexts(0, 12), [(1, 5, 0), (5, 10, 2)])
17 |         _assert_equal_iteration(m.series_contexts(5, 12), [(5, 10, 2)])
18 | 
19 |         _assert_equal_iteration(m.query_contexts(0, 1), [])
20 |         _assert_equal_iteration(m.query_contexts(0, 4), [(1, 5, 0)])
21 |         _assert_equal_iteration(m.query_contexts(0, 8), [(1, 5, 0), (5, 10, 2)])
22 |         _assert_equal_iteration(m.query_contexts(0, 12), [(1, 5, 0), (5, 10, 2)])
23 |         _assert_equal_iteration(m.query_contexts(5, 12), [(5, 10, 2)])
24 | 
25 | 
26 | def _assert_equal_iteration(actual, expected, msg=''):
27 |     """
28 |     Assert function similar to TestCase.assertSequenceEqual, but that actually treats 2D numpy arrays as iterables.
29 |     """
30 |     sentinel = object()
31 |     for actual_value, expected_value in zip_longest(actual, expected, fillvalue=sentinel):
32 |         if sentinel is actual_value:
33 |             raise AssertionError("Actual iterator is shorter, does not include " + str(expected_value))
34 | 
35 |         if sentinel is expected_value:
36 |             raise AssertionError("Actual iterator is longer, contained " + str(actual_value))
37 | 
38 |         npt.assert_equal(actual_value, expected_value, msg)
39 | 


--------------------------------------------------------------------------------
/distancematrix/tests/consumer/test_distance_matrix.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from unittest import TestCase
  3 | import numpy.testing as npt
  4 | 
  5 | from distancematrix.util import diag_indices_of
  6 | from distancematrix.consumer.distance_matrix import DistanceMatrix
  7 | 
  8 | 
  9 | class TestContextualMatrixProfile(TestCase):
 10 | 
 11 |     def setUp(self):
 12 |         self.dist_matrix = np.array([
 13 |             [8.67, 1.10, 1.77, 1.26, 1.91, 4.29, 6.32, 4.24, 4.64, 5.06, 6.41, 4.07, 4.67, 9.32, 5.09],
 14 |             [4.33, 4.99, 0.14, 2.79, 2.10, 6.26, 9.40, 4.14, 5.53, 4.26, 8.21, 5.91, 6.83, 9.26, 6.19],
 15 |             [0.16, 9.05, 1.35, 4.78, 7.01, 4.36, 5.24, 8.81, 7.90, 5.84, 8.90, 7.88, 3.37, 4.70, 6.94],
 16 |             [0.94, 8.70, 3.87, 6.29, 0.32, 1.79, 5.80, 2.61, 1.43, 6.32, 1.62, 0.20, 2.28, 7.11, 2.15],
 17 |             [9.90, 4.51, 2.11, 2.83, 5.52, 8.55, 6.90, 0.24, 1.58, 4.26, 8.75, 3.71, 9.93, 8.33, 0.38],
 18 |             [7.30, 5.84, 9.63, 1.95, 3.76, 3.61, 9.42, 5.56, 5.09, 7.07, 1.90, 4.78, 1.06, 0.69, 3.67],
 19 |             [2.17, 8.37, 3.99, 4.28, 4.37, 2.86, 8.61, 3.39, 8.37, 6.95, 6.57, 1.79, 7.40, 4.41, 7.64],
 20 |             [6.26, 0.29, 6.44, 8.84, 1.24, 2.52, 6.25, 3.07, 5.55, 3.19, 8.16, 5.32, 9.01, 0.39, 9.],
 21 |             [4.67, 8.88, 3.05, 3.06, 2.36, 8.34, 4.91, 5.46, 9.25, 9.78, 0.03, 5.64, 5.10, 3.58, 6.92],
 22 |             [1.01, 0.91, 6.28, 7.79, 0.68, 5.50, 6.72, 5.11, 0.80, 9.30, 9.77, 4.71, 3.26, 7.29, 6.26]])
 23 | 
 24 |     def mock_initialise(self, dm):
 25 |         dm.initialise(1, self.dist_matrix.shape[0], self.dist_matrix.shape[1])
 26 | 
 27 |     def test_process_diagonal(self):
 28 |         dm = DistanceMatrix()
 29 |         self.mock_initialise(dm)
 30 | 
 31 |         for diag in range(-self.dist_matrix.shape[0] + 1, self.dist_matrix.shape[1]):
 32 |             diag_ind = diag_indices_of(self.dist_matrix, diag)
 33 |             dm.process_diagonal(diag, np.atleast_2d(self.dist_matrix[diag_ind]))
 34 | 
 35 |         npt.assert_equal(dm.distance_matrix, self.dist_matrix)
 36 | 
 37 |     def test_process_diagonal_partial_calculation(self):
 38 |         dm = DistanceMatrix()
 39 |         self.mock_initialise(dm)
 40 | 
 41 |         correct = np.full_like(self.dist_matrix, np.nan, dtype=float)
 42 | 
 43 |         for diag in range(-8, self.dist_matrix.shape[1], 3):
 44 |             diag_ind = diag_indices_of(self.dist_matrix, diag)
 45 |             dm.process_diagonal(diag, np.atleast_2d(self.dist_matrix[diag_ind]))
 46 |             correct[diag_ind] = self.dist_matrix[diag_ind]
 47 | 
 48 |         npt.assert_equal(dm.distance_matrix, correct)
 49 | 
 50 |     def test_process_column(self):
 51 |         dm = DistanceMatrix()
 52 |         self.mock_initialise(dm)
 53 | 
 54 |         for column in range(0, self.dist_matrix.shape[1]):
 55 |             dm.process_column(column, np.atleast_2d(self.dist_matrix[:, column]))
 56 | 
 57 |         npt.assert_equal(dm.distance_matrix, self.dist_matrix)
 58 | 
 59 |     def test_process_column_partial_calculation(self):
 60 |         dm = DistanceMatrix()
 61 |         self.mock_initialise(dm)
 62 | 
 63 |         correct = np.full_like(self.dist_matrix, np.nan, dtype=float)
 64 | 
 65 |         for column in [2, 3, 4, 5, 10, 11, 12]:
 66 |             dm.process_column(column, np.atleast_2d(self.dist_matrix[:, column]))
 67 |             correct[:, column] = self.dist_matrix[:, column]
 68 | 
 69 |         npt.assert_equal(dm.distance_matrix, correct)
 70 | 
 71 |     def test_streaming_process_column(self):
 72 |         dm = DistanceMatrix()
 73 |         dm.initialise(1, 5, 5)
 74 | 
 75 |         dm.process_column(0, np.atleast_2d(self.dist_matrix[0, 0]))
 76 |         dm.process_column(1, np.atleast_2d(self.dist_matrix[:2, 1]))
 77 |         expected = np.full((5, 5), np.nan)
 78 |         expected[0, 0] = self.dist_matrix[0, 0]
 79 |         expected[:2, 1] = self.dist_matrix[:2, 1]
 80 |         npt.assert_equal(dm.distance_matrix, expected)
 81 | 
 82 |         for column in range(0, 5):
 83 |             dm.process_column(column, np.atleast_2d(self.dist_matrix[:5, :5][:, column]))
 84 |         npt.assert_equal(dm.distance_matrix, self.dist_matrix[:5, :5])
 85 | 
 86 |         dm.shift_query(1)
 87 |         dm.shift_series(3)
 88 | 
 89 |         correct = np.full((5, 5), np.nan)
 90 |         correct[0:4, 0:2] = self.dist_matrix[1:5, 3:5]
 91 |         npt.assert_equal(dm.distance_matrix, correct)
 92 | 
 93 |         for column in range(0, 5):
 94 |             dm.process_column(column, np.atleast_2d(self.dist_matrix[1:6, 3:8][:, column]))
 95 |         npt.assert_equal(dm.distance_matrix, self.dist_matrix[1:6, 3:8])
 96 | 
 97 |         dm.shift_query(2)
 98 |         dm.shift_series(1)
 99 |         dm.process_column(4, np.atleast_2d(self.dist_matrix[3:8, 8]))
100 | 
101 |         correct = np.full((5, 5), np.nan)
102 |         correct[0:3, 0:4] = self.dist_matrix[3:6, 4:8]
103 |         correct[:, 4] = self.dist_matrix[3:8, 8]
104 |         npt.assert_equal(dm.distance_matrix, correct)
105 | 
106 |     def test_streaming_process_diagonal(self):
107 |         dm = DistanceMatrix()
108 |         dm.initialise(1, 5, 5)
109 | 
110 |         dm.process_diagonal(0, np.atleast_2d(self.dist_matrix[0, 0]))
111 |         diag_ind = diag_indices_of(self.dist_matrix[:3, :3], 1)
112 |         dm.process_diagonal(1, np.atleast_2d(np.atleast_2d(self.dist_matrix[diag_ind])))
113 |         expected = np.full((5, 5), np.nan)
114 |         expected[0, 0] = self.dist_matrix[0, 0]
115 |         expected[0, 1] = self.dist_matrix[0, 1]
116 |         expected[1, 2] = self.dist_matrix[1, 2]
117 |         npt.assert_equal(dm.distance_matrix, expected)
118 | 
119 |         for diag in range(-4,5):
120 |             diag_ind = diag_indices_of(self.dist_matrix[:5, :5], diag)
121 |             dm.process_diagonal(diag, np.atleast_2d(self.dist_matrix[diag_ind]))
122 | 
123 |         npt.assert_equal(dm.distance_matrix, self.dist_matrix[:5, :5])
124 | 
125 |         dm.shift_query(2)
126 |         dm.shift_series(1)
127 |         expected = self.dist_matrix[2:7, 1:6].copy()
128 |         expected[-2:, :] = np.nan
129 |         expected[:, -1:] = np.nan
130 |         npt.assert_equal(dm.distance_matrix, expected)
131 | 
132 |         for diag in range(-4,5):
133 |             diag_ind = diag_indices_of(self.dist_matrix[:5, :5], diag)
134 |             dm.process_diagonal(diag, np.atleast_2d(self.dist_matrix[diag_ind]))
135 |         npt.assert_equal(dm.distance_matrix, self.dist_matrix[:5, :5])
136 | 


--------------------------------------------------------------------------------
/distancematrix/tests/consumer/test_radius_profile.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from unittest import TestCase
  3 | import numpy.testing as npt
  4 | from itertools import takewhile
  5 | 
  6 | from distancematrix.consumer.radius_profile import RadiusProfile0
  7 | from distancematrix.consumer.radius_profile import RadiusProfile
  8 | from distancematrix.insights import lowest_value_idxs
  9 | 
 10 | 
 11 | class TestRadiusProfile0(TestCase):
 12 |     def setUp(self):
 13 |         self.dm = np.array([
 14 |             [8.67, 1.10, 1.77, 1.26, 1.91, 4.29, 6.32, 4.24, 4.64, 5.06, 6.41, 4.07, 4.67, 9.32, 5.09],
 15 |             [4.33, 4.99, 0.14, 2.79, 2.10, 6.26, 9.40, 4.14, 5.53, 4.26, 8.21, 5.91, 6.83, 9.26, 6.19],
 16 |             [0.16, 9.05, 1.35, 4.78, 7.01, 4.36, 5.24, 8.81, 7.90, 5.84, 8.90, 7.88, 3.37, 4.70, 6.94],
 17 |             [0.94, 8.70, 3.87, 6.29, 0.32, 1.79, 5.80, 2.61, 1.43, 6.32, 1.62, 0.20, 2.28, 7.11, 2.15],
 18 |             [9.90, 4.51, 2.11, 2.83, 5.52, 8.55, 6.90, 0.24, 1.58, 4.26, 8.75, 3.71, 9.93, 8.33, 0.38],
 19 |             [7.30, 5.84, 9.63, 1.95, 3.76, 3.61, 9.42, 5.56, 5.09, 7.07, 1.90, 4.78, 1.06, 0.69, 3.67],
 20 |             [2.17, 8.37, 3.99, 4.28, 4.37, 2.86, 8.61, 3.39, 8.37, 6.95, 6.57, 1.79, 7.40, 4.41, 7.64],
 21 |             [6.26, 0.29, 6.44, 8.84, 1.24, 2.52, 6.25, 3.07, 5.55, 3.19, 8.16, 5.32, 9.01, 0.39, 9.],
 22 |             [4.67, 8.88, 3.05, 3.06, 2.36, 8.34, 4.91, 5.46, 9.25, 9.78, 0.03, 5.64, 5.10, 3.58, 6.92],
 23 |             [1.01, 0.91, 6.28, 7.79, 0.68, 5.50, 6.72, 5.11, 0.80, 9.30, 9.77, 4.71, 3.26, 7.29, 6.26]])
 24 | 
 25 | 
 26 |     @staticmethod
 27 |     def bruteforce_calc(array, target_idxs):
 28 |         target_idxs = sorted(target_idxs)
 29 |         result = np.zeros((len(target_idxs), array.shape[1]), dtype=float)
 30 | 
 31 |         for col in range(array.shape[1]):
 32 |             sorted_col_values = np.sort(array[:, col])
 33 |             for i, target_idx in enumerate(target_idxs):
 34 |                 if target_idx < len(sorted_col_values):
 35 |                     result[i, col] = sorted_col_values[target_idx]
 36 |                 else:
 37 |                     result[i, col] = np.nan
 38 | 
 39 |         return result
 40 | 
 41 |     def test_process_diagonal(self):
 42 |         tracker = RadiusProfile0(0)
 43 |         tracker.initialise(1, self.dm.shape[0], self.dm.shape[1])
 44 | 
 45 |         with self.assertRaises(NotImplementedError):
 46 |             tracker.process_diagonal(0, np.zeros(10))
 47 | 
 48 |     def test_process_column_single_value(self):
 49 |         tracker = RadiusProfile0(2)
 50 |         tracker.initialise(1, self.dm.shape[0], self.dm.shape[1])
 51 | 
 52 |         for column in range(0, self.dm.shape[1]):
 53 |             tracker.process_column(column, np.atleast_2d(self.dm[:, column]))
 54 | 
 55 |         npt.assert_equal(tracker.values, self.bruteforce_calc(self.dm, [2]))
 56 | 
 57 |     def test_process_column_multiple_value(self):
 58 |         track_idxs = [2, 5, 0, 9, len(self.dm)]
 59 |         tracker = RadiusProfile0(track_idxs)
 60 |         tracker.initialise(1, self.dm.shape[0], self.dm.shape[1])
 61 | 
 62 |         for column in range(0, self.dm.shape[1]):
 63 |             tracker.process_column(column, np.atleast_2d(self.dm[:, column]))
 64 | 
 65 |         npt.assert_equal(tracker.values, self.bruteforce_calc(self.dm, track_idxs))
 66 |         npt.assert_equal(tracker.values[-1, :], np.full(self.dm.shape[1], np.nan))
 67 | 
 68 | 
 69 | class TestRadiusProfile(TestCase):
 70 |     def setUp(self):
 71 |         self.dm = np.array([
 72 |             [8.67, 1.10, 1.77, 1.26, 1.91, 4.29, 6.32, 4.24, 4.64, 5.06, 6.41, 4.07, 4.67, 9.32, 5.09],
 73 |             [4.33, 4.99, 0.14, 2.79, 2.10, 6.26, 9.40, 4.14, 5.53, 4.26, 8.21, 5.91, 6.83, 9.26, 6.19],
 74 |             [0.16, 9.05, 1.35, 4.78, 7.01, 4.36, 5.24, 8.81, 7.90, 5.84, 8.90, 7.88, 3.37, 4.70, 6.94],
 75 |             [0.94, 8.70, 3.87, 6.29, 0.32, 1.79, 5.80, 2.61, 1.43, 6.32, 1.62, 0.20, 2.28, 7.11, 2.15],
 76 |             [9.90, 4.51, 2.11, 2.83, 5.52, 8.55, 6.90, 0.24, 1.58, 4.26, 8.75, 3.71, 9.93, 8.33, 0.38],
 77 |             [7.30, 5.84, 9.63, 1.95, 3.76, 3.61, 9.42, 5.56, 5.09, 7.07, 1.90, 4.78, 1.06, 0.69, 3.67],
 78 |             [2.17, 8.37, 3.99, 4.28, 4.37, 2.86, 8.61, 3.39, 8.37, 6.95, 6.57, 1.79, 7.40, 4.41, 7.64],
 79 |             [6.26, 0.29, 6.44, 8.84, 1.24, 2.52, 6.25, 3.07, 5.55, 3.19, 8.16, 5.32, 9.01, 0.39, 9.],
 80 |             [4.67, 8.88, 3.05, 3.06, 2.36, 8.34, 4.91, 5.46, 9.25, 9.78, 0.03, 5.64, 5.10, 3.58, 6.92],
 81 |             [1.01, 0.91, 6.28, 7.79, 0.68, 5.50, 6.72, 5.11, 0.80, 9.30, 9.77, 4.71, 3.26, 7.29, 6.26]])
 82 | 
 83 |     @staticmethod
 84 |     def bruteforce_calc(array, target_idxs, exclusion):
 85 |         target_idxs = np.array(target_idxs)
 86 |         target_idxs.sort()
 87 | 
 88 |         result = np.zeros((len(target_idxs), array.shape[1]), dtype=float)
 89 | 
 90 |         for col in range(array.shape[1]):
 91 |             for i, target_idx in enumerate(target_idxs):
 92 |                 lowest_idxs = list(lowest_value_idxs(array[:, col], exclusion))  # takes care of the exclusion distance
 93 |                 lowest_values = array[:, col][lowest_idxs]
 94 | 
 95 |                 if target_idx < len(lowest_values):
 96 |                     result[i, col] = lowest_values[target_idx]
 97 |                 else:
 98 |                     result[i, col] = np.nan
 99 | 
100 |         return result
101 | 
102 |     def test_process_diagonal(self):
103 |         tracker = RadiusProfile(2, 2)
104 |         tracker.initialise(1, self.dm.shape[0], self.dm.shape[1])
105 | 
106 |         with self.assertRaises(NotImplementedError):
107 |             tracker.process_diagonal(0, np.zeros(10))
108 | 
109 |     def test_process_column_single_value(self):
110 |         tracker = RadiusProfile(2, 2)
111 |         tracker.initialise(1, self.dm.shape[0], self.dm.shape[1])
112 | 
113 |         for column in range(0, self.dm.shape[1]):
114 |             tracker.process_column(column, np.atleast_2d(self.dm[:, column]))
115 | 
116 |         npt.assert_equal(tracker.values, self.bruteforce_calc(self.dm, [2], 2))
117 | 
118 |     def test_process_column_multiple_value(self):
119 |         track_idxs = [2, 0, len(self.dm)]
120 |         tracker = RadiusProfile(track_idxs, 1)
121 |         tracker.initialise(1, self.dm.shape[0], self.dm.shape[1])
122 | 
123 |         for column in range(0, self.dm.shape[1]):
124 |             tracker.process_column(column, np.atleast_2d(self.dm[:, column]))
125 | 
126 |         npt.assert_equal(tracker.values, self.bruteforce_calc(self.dm, track_idxs, 1))
127 | 


--------------------------------------------------------------------------------
/distancematrix/tests/consumer/test_threshold_counter.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from unittest import TestCase
  3 | import numpy.testing as npt
  4 | from itertools import takewhile
  5 | 
  6 | from distancematrix.util import diag_indices_of
  7 | from distancematrix.insights import lowest_value_idxs
  8 | from distancematrix.consumer.threshold_counter import ThresholdCounter
  9 | from distancematrix.consumer.threshold_counter import DistancedThresholdCounter
 10 | 
 11 | 
 12 | class TestThresholdCounter(TestCase):
 13 |     def setUp(self):
 14 |         self.dm = np.array([
 15 |             [8.67, 1.10, 1.77, 1.26, 1.91, 4.29, 6.32, 4.24, 4.64, 5.06, 6.41, 4.07, 4.67, 9.32, 5.09],
 16 |             [4.33, 4.99, 0.14, 2.79, 2.10, 6.26, 9.40, 4.14, 5.53, 4.26, 8.21, 5.91, 6.83, 9.26, 6.19],
 17 |             [0.16, 9.05, 1.35, 4.78, 7.01, 4.36, 5.24, 8.81, 7.90, 5.84, 8.90, 7.88, 3.37, 4.70, 6.94],
 18 |             [0.94, 8.70, 3.87, 6.29, 0.32, 1.79, 5.80, 2.61, 1.43, 6.32, 1.62, 0.20, 2.28, 7.11, 2.15],
 19 |             [9.90, 4.51, 2.11, 2.83, 5.52, 8.55, 6.90, 0.24, 1.58, 4.26, 8.75, 3.71, 9.93, 8.33, 0.38],
 20 |             [7.30, 5.84, 9.63, 1.95, 3.76, 3.61, 9.42, 5.56, 5.09, 7.07, 1.90, 4.78, 1.06, 0.69, 3.67],
 21 |             [2.17, 8.37, 3.99, 4.28, 4.37, 2.86, 8.61, 3.39, 8.37, 6.95, 6.57, 1.79, 7.40, 4.41, 7.64],
 22 |             [6.26, 0.29, 6.44, 8.84, 1.24, 2.52, 6.25, 3.07, 5.55, 3.19, 8.16, 5.32, 9.01, 0.39, 9.],
 23 |             [4.67, 8.88, 3.05, 3.06, 2.36, 8.34, 4.91, 5.46, 9.25, 9.78, 0.03, 5.64, 5.10, 3.58, 6.92],
 24 |             [1.01, 0.91, 6.28, 7.79, 0.68, 5.50, 6.72, 5.11, 0.80, 9.30, 9.77, 4.71, 3.26, 7.29, 6.26]])
 25 | 
 26 |     @staticmethod
 27 |     def bruteforce_count(array, threshold_array):
 28 |         result = np.zeros((len(threshold_array), array.shape[1]), dtype=int)
 29 | 
 30 |         for i, threshold in enumerate(threshold_array):
 31 |             for col in range(array.shape[1]):
 32 |                 result[i, col] = np.count_nonzero(array[:, col] <= threshold)
 33 | 
 34 |         return result
 35 | 
 36 |     def test_process_diagonal_single_threshold(self):
 37 |         threshold = 2.83
 38 |         counter = ThresholdCounter(threshold)
 39 |         counter.initialise(1, self.dm.shape[0], self.dm.shape[1])
 40 | 
 41 |         for diag in range(-self.dm.shape[0] + 1, self.dm.shape[1]):
 42 |             diag_ind = diag_indices_of(self.dm, diag)
 43 |             counter.process_diagonal(diag, np.atleast_2d(self.dm[diag_ind]))
 44 | 
 45 |         npt.assert_equal(counter.counts, self.bruteforce_count(self.dm, [threshold]))
 46 | 
 47 |     def test_process_diagonal_multiple_thresholds(self):
 48 |         thresholds = [-1, 2.12, 2.83, 6]
 49 |         counter = ThresholdCounter(thresholds)
 50 |         counter.initialise(1, self.dm.shape[0], self.dm.shape[1])
 51 | 
 52 |         for diag in range(-self.dm.shape[0] + 1, self.dm.shape[1]):
 53 |             diag_ind = diag_indices_of(self.dm, diag)
 54 |             counter.process_diagonal(diag, np.atleast_2d(self.dm[diag_ind]))
 55 | 
 56 |         npt.assert_equal(counter.counts, self.bruteforce_count(self.dm, thresholds))
 57 | 
 58 |     def test_process_column_single_threshold(self):
 59 |         threshold = 5.09
 60 |         counter = ThresholdCounter(threshold)
 61 |         counter.initialise(1, self.dm.shape[0], self.dm.shape[1])
 62 | 
 63 |         for column in range(0, self.dm.shape[1]):
 64 |             counter.process_column(column, np.atleast_2d(self.dm[:, column]))
 65 | 
 66 |         npt.assert_equal(counter.counts, self.bruteforce_count(self.dm, [threshold]))
 67 | 
 68 |     def test_process_column_multiple_thresholds(self):
 69 |         thresholds = [-1, 0.68, 4.67, 5]
 70 |         counter = ThresholdCounter(thresholds)
 71 |         counter.initialise(1, self.dm.shape[0], self.dm.shape[1])
 72 | 
 73 |         for column in range(0, self.dm.shape[1]):
 74 |             counter.process_column(column, np.atleast_2d(self.dm[:, column]))
 75 | 
 76 |         npt.assert_equal(counter.counts, self.bruteforce_count(self.dm, thresholds))
 77 | 
 78 | 
 79 | class TestDistancedThresholdCounter(TestCase):
 80 |     def setUp(self):
 81 |         self.dm = np.array([
 82 |             [8.67, 1.10, 1.77, 1.26, 1.91, 4.29, 6.32, 4.24, 4.64, 5.06, 6.41, 4.07, 4.67, 9.32, 5.09],
 83 |             [4.33, 4.99, 0.14, 2.79, 2.10, 6.26, 9.40, 4.14, 5.53, 4.26, 8.21, 5.91, 6.83, 9.26, 6.19],
 84 |             [0.16, 9.05, 1.35, 4.78, 7.01, 4.36, 5.24, 8.81, 7.90, 5.84, 8.90, 7.88, 3.37, 4.70, 6.94],
 85 |             [0.94, 8.70, 3.87, 6.29, 0.32, 1.79, 5.80, 2.61, 1.43, 6.32, 1.62, 0.20, 2.28, 7.11, 2.15],
 86 |             [9.90, 4.51, 2.11, 2.83, 5.52, 8.55, 6.90, 0.24, 1.58, 4.26, 8.75, 3.71, 9.93, 8.33, 0.38],
 87 |             [7.30, 5.84, 9.63, 1.95, 3.76, 3.61, 9.42, 5.56, 5.09, 7.07, 1.90, 4.78, 1.06, 0.69, 3.67],
 88 |             [2.17, 8.37, 3.99, 4.28, 4.37, 2.86, 8.61, 3.39, 8.37, 6.95, 6.57, 1.79, 7.40, 4.41, 7.64],
 89 |             [6.26, 0.29, 6.44, 8.84, 1.24, 2.52, 6.25, 3.07, 5.55, 3.19, 8.16, 5.32, 9.01, 0.39, 9.],
 90 |             [4.67, 8.88, 3.05, 3.06, 2.36, 8.34, 4.91, 5.46, 9.25, 9.78, 0.03, 5.64, 5.10, 3.58, 6.92],
 91 |             [1.01, 0.91, 6.28, 7.79, 0.68, 5.50, 6.72, 5.11, 0.80, 9.30, 9.77, 4.71, 3.26, 7.29, 6.26]])
 92 | 
 93 |     @staticmethod
 94 |     def bruteforce_count(array, threshold_array, exclusion):
 95 |         threshold_array = np.array(threshold_array)
 96 |         threshold_array.sort()
 97 | 
 98 |         result = np.zeros((len(threshold_array), array.shape[1]), dtype=int)
 99 | 
100 |         for i, threshold in enumerate(threshold_array):
101 |             for col in range(array.shape[1]):
102 |                 _iter = lowest_value_idxs(array[:, col], exclusion)  # takes care of the exclusion distance
103 |                 value_iter = takewhile(lambda i: array[i, col] <= threshold, _iter)  # takes care of the threshold
104 |                 result[i, col] = len(list(value_iter))
105 | 
106 |         return result
107 | 
108 |     def test_process_diagonal_single_threshold(self):
109 |         threshold = 2.83
110 |         counter = DistancedThresholdCounter(threshold, 2)
111 |         counter.initialise(1, self.dm.shape[0], self.dm.shape[1])
112 | 
113 |         with self.assertRaises(NotImplementedError):
114 |             counter.process_diagonal(0, np.zeros(10))
115 | 
116 |     def test_process_column_single_threshold(self):
117 |         threshold = 5.09
118 |         counter = DistancedThresholdCounter(threshold, 2)
119 |         counter.initialise(1, self.dm.shape[0], self.dm.shape[1])
120 | 
121 |         for column in range(0, self.dm.shape[1]):
122 |             counter.process_column(column, np.atleast_2d(self.dm[:, column]))
123 | 
124 |         npt.assert_equal(counter.counts, self.bruteforce_count(self.dm, [threshold], 2))
125 | 
126 |     def test_process_column_multiple_thresholds(self):
127 |         thresholds = [-1, 0.68, 4.67, 5]
128 |         counter = DistancedThresholdCounter(thresholds, 2)
129 |         counter.initialise(1, self.dm.shape[0], self.dm.shape[1])
130 | 
131 |         for column in range(0, self.dm.shape[1]):
132 |             counter.process_column(column, np.atleast_2d(self.dm[:, column]))
133 | 
134 |         npt.assert_equal(counter.counts, self.bruteforce_count(self.dm, thresholds, 2))
135 | 


--------------------------------------------------------------------------------
/distancematrix/tests/generator/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/predict-idlab/seriesdistancematrix/c0e666d036f24184511e766cee9fdfa55f41df97/distancematrix/tests/generator/__init__.py


--------------------------------------------------------------------------------
/distancematrix/tests/generator/mock_generator.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from distancematrix.util import diag_indices_of
  4 | from distancematrix.generator.abstract_generator import AbstractGenerator
  5 | from distancematrix.generator.abstract_generator import AbstractBoundStreamingGenerator
  6 | 
  7 | 
  8 | class MockGenerator(AbstractGenerator):
  9 |     """
 10 |     Mock generator for testing purposes. Simply returns distances from a given distance matrix.
 11 |     """
 12 | 
 13 |     def __init__(self, dist_matrix):
 14 |         """
 15 |         Creates a new mock generator that will return distances from the provided distance matrix.
 16 | 
 17 |         :param dist_matrix: distances to return.
 18 |         """
 19 |         self._dist_matrix = dist_matrix
 20 | 
 21 |         # Storage for parameters used for prepare and prepare_streaming
 22 |         self.m = None
 23 |         self.series_window = None
 24 |         self.query_window = None
 25 |         self.series = None
 26 |         self.query = None
 27 |         self.bound_gen = None
 28 | 
 29 |     def prepare_streaming(self, m, series_window, query_window=None):
 30 |         self.m = m
 31 |         self.series_window = series_window
 32 |         self.query_window = query_window
 33 | 
 34 |         if query_window is None:
 35 |             query_window = series_window
 36 |             self_join = True
 37 |         else:
 38 |             self_join = False
 39 | 
 40 |         s_subseqs = series_window - m + 1
 41 |         q_subseqs = query_window - m + 1
 42 |         self.bound_gen = BoundMockGenerator(self._dist_matrix, s_subseqs, q_subseqs,
 43 |                                             self_join, -series_window, -query_window)
 44 | 
 45 |         return self.bound_gen
 46 | 
 47 |     def prepare(self, m, series, query=None):
 48 |         self.m = m
 49 |         self.series = series
 50 |         self.query = query
 51 | 
 52 |         s_win = len(series) - m + 1
 53 |         if query is None:
 54 |             q_win = s_win
 55 |             self_join = True
 56 |         else:
 57 |             q_win = len(query) - m + 1
 58 |             self_join = False
 59 | 
 60 |         self.bound_gen = BoundMockGenerator(self._dist_matrix, s_win, q_win, self_join, 0, 0)
 61 |         return self.bound_gen
 62 | 
 63 | 
 64 | class BoundMockGenerator(AbstractBoundStreamingGenerator):
 65 |     """
 66 |     Mock generator for testing purposes. Simply returns distances from a given distance matrix.
 67 |     """
 68 |     def __init__(self, dist_matrix, s_win, q_win, self_join, s_view_index, q_view_index):
 69 |         """
 70 |         Creates a new mock generator that will return distances from the provided distance matrix.
 71 | 
 72 |         :param dist_matrix: 2D matrix, base distance values to use, a view will be used to determine
 73 |             which values to return for mocked calculations
 74 |         :param s_win: window size of the view over the series axis
 75 |         :param q_win: window size of the view over the query axis
 76 |         :param self_join: are we doing a self-join (does adding series data also implicitly add query data)
 77 |         :param s_view_index: start index of the view of dist_matrix (for series)
 78 |         :param q_view_index: start index of the view of dist_matrix (for query)
 79 |         """
 80 |         self._dist_matrix = dist_matrix
 81 |         self._s_win = s_win
 82 |         self._q_win = q_win
 83 |         self._self_join = self_join
 84 | 
 85 |         self._s_index = s_view_index
 86 |         self._q_index = q_view_index
 87 | 
 88 |         self.appended_series = np.empty((0,), dtype=float)
 89 |         self.appended_query = np.empty((0,), dtype=float)
 90 | 
 91 |     def calc_diagonal(self, diag):
 92 |         view = self._dist_matrix[
 93 |                max(self._q_index, 0): max(self._q_index + self._q_win, 0),
 94 |                max(self._s_index, 0): max(self._s_index + self._s_win, 0)
 95 |                ]
 96 |         return view[diag_indices_of(view, diag)]
 97 | 
 98 |     def calc_column(self, column):
 99 |         view = self._dist_matrix[
100 |                max(self._q_index, 0): max(self._q_index + self._q_win, 0),
101 |                max(self._s_index, 0): max(self._s_index + self._s_win, 0)
102 |                ]
103 |         return view[:, column]
104 | 
105 |     def append_series(self, values):
106 |         self.appended_series = np.concatenate([self.appended_series, values])
107 |         self._s_index += len(values)
108 |         if self._self_join:
109 |             self._q_index += len(values)
110 | 
111 |     def append_query(self, values):
112 |         if self._self_join:
113 |             raise RuntimeError("Should not append query if self-joining.")
114 | 
115 |         self.appended_query = np.concatenate([self.appended_query, values])
116 |         self._q_index += len(values)
117 | 


--------------------------------------------------------------------------------
/distancematrix/tests/generator/test_euclidean.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from unittest import TestCase
  3 | import numpy.testing as npt
  4 | 
  5 | from distancematrix.util import diag_indices
  6 | from distancematrix.generator.euclidean import Euclidean
  7 | 
  8 | 
  9 | class TestEuclidean(TestCase):
 10 |     def setUp(self):
 11 |         self.series = np.array(
 12 |             [0.5578463, 0.4555404, 0.18124978, 0.252396, 0.60623881,
 13 |              0.5546021, 0.13714127, 0.903246, 0.03695094, 0.23420792,
 14 |              0.27482897, 0.57765821, 0.23571178, 0.65772705, 0.00292154,
 15 |              0.87258653, 0.29869269, 0.91492178, 0.69096235, 0.6786107,
 16 |              0.85413687, 0.19725933, 0.39460891, 0.32650366, 0.35188833,
 17 |              0.92658149, 0.07503563, 0.37864432, 0.9415974, 0.62313779])
 18 | 
 19 |         self.query = np.array(
 20 |             [0.03737861, 0.53931239, 0.06194507, 0.0938707, 0.95875364,
 21 |              0.09495936, 0.12392364, 0.81358582, 0.56507776, 0.61620183,
 22 |              0.24720462, 0.83886639, 0.38130506, 0.13693176, 0.90555723,
 23 |              0.23274948, 0.31526678, 0.28504739, 0.45200344, 0.9867946])
 24 | 
 25 |     def test_calc_diagonal(self):
 26 |         m = 5
 27 |         euclid = Euclidean().prepare(m, self.series, self.query)
 28 |         _verify_diagonals_correct(self.series, self.query, m, euclid)
 29 | 
 30 |     def test_calc_column_no_cache(self):
 31 |         m = 5
 32 |         euclid = Euclidean().prepare(m, self.series, self.query)
 33 |         _verify_columns_correct(self.series, self.query, m, euclid, True)
 34 | 
 35 |     def test_calc_column_cache(self):
 36 |         m = 5
 37 |         euclid = Euclidean().prepare(m, self.series, self.query)
 38 |         _verify_columns_correct(self.series, self.query, m, euclid, False)
 39 | 
 40 | 
 41 | class TestEuclideanSelfJoin(TestCase):
 42 |     def setUp(self):
 43 |         self.series = np.array(
 44 |             [0.5578463, 0.4555404, 0.18124978, 0.252396, 0.60623881,
 45 |              0.5546021, 0.13714127, 0.903246, 0.03695094, 0.23420792,
 46 |              0.27482897, 0.57765821, 0.23571178, 0.65772705, 0.00292154,
 47 |              0.87258653, 0.29869269, 0.91492178, 0.69096235, 0.6786107,
 48 |              0.85413687, 0.19725933, 0.39460891, 0.32650366, 0.35188833,
 49 |              0.92658149, 0.07503563, 0.37864432, 0.9415974, 0.62313779])
 50 | 
 51 |     def test_calc_diagonal(self):
 52 |         m = 5
 53 |         euclid = Euclidean().prepare(m, self.series)
 54 |         _verify_diagonals_correct(self.series, self.series, m, euclid)
 55 | 
 56 |     def test_calc_column_no_cache(self):
 57 |         m = 5
 58 |         euclid = Euclidean().prepare(m, self.series)
 59 |         _verify_columns_correct(self.series, self.series, m, euclid, True)
 60 | 
 61 |     def test_calc_column_cache(self):
 62 |         m = 5
 63 |         euclid = Euclidean().prepare(m, self.series)
 64 |         _verify_columns_correct(self.series, self.series, m, euclid, False)
 65 | 
 66 | 
 67 | class TestStreamingEuclidean(TestCase):
 68 |     def setUp(self):
 69 |         self.series = np.array(
 70 |             [0.5578463, 0.4555404, 0.18124978, 0.252396, 0.60623881,
 71 |              0.5546021, 0.13714127, 0.903246, 0.03695094, 0.23420792,
 72 |              0.27482897, 0.57765821, 0.23571178, 0.65772705, 0.00292154,
 73 |              0.87258653, 0.29869269, 0.91492178, 0.69096235, 0.6786107,
 74 |              0.85413687, 0.19725933, 0.39460891, 0.32650366, 0.35188833,
 75 |              0.92658149, 0.07503563, 0.37864432, 0.9415974, 0.62313779])
 76 | 
 77 |         self.query = np.array(
 78 |             [0.03737861, 0.53931239, 0.06194507, 0.0938707, 0.95875364,
 79 |              0.09495936, 0.12392364, 0.81358582, 0.56507776, 0.61620183,
 80 |              0.24720462, 0.83886639, 0.38130506, 0.13693176, 0.90555723,
 81 |              0.23274948, 0.31526678, 0.28504739, 0.45200344, 0.9867946])
 82 | 
 83 |     def test_calc_diagonal(self):
 84 |         m = 5
 85 |         euclid = Euclidean().prepare_streaming(m, 20, 15)
 86 | 
 87 |         euclid.append_series(self.series[:10])
 88 |         euclid.append_query(self.query[:5])
 89 |         _verify_diagonals_correct(self.series[:10], self.query[:5], m, euclid)
 90 | 
 91 |         euclid.append_series(self.series[10: 15])
 92 |         euclid.append_query(self.query[5: 10])
 93 |         _verify_diagonals_correct(self.series[:15], self.query[:10], m, euclid)
 94 | 
 95 |         euclid.append_series(self.series[15: 25])
 96 |         euclid.append_query(self.query[10: 20])
 97 |         _verify_diagonals_correct(self.series[5: 25], self.query[5: 20], m, euclid)
 98 | 
 99 |         euclid.append_series(self.series[25:30])
100 |         _verify_diagonals_correct(self.series[10: 30], self.query[5: 20], m, euclid)
101 | 
102 |     def test_calc_column_no_cache(self):
103 |         m = 5
104 |         euclid = Euclidean().prepare_streaming(m, 20, 15)
105 | 
106 |         euclid.append_series(self.series[:10])
107 |         euclid.append_query(self.query[:5])
108 |         _verify_columns_correct(self.series[:10], self.query[:5], m, euclid, True)
109 | 
110 |         euclid.append_series(self.series[10: 15])
111 |         euclid.append_query(self.query[5: 10])
112 |         _verify_columns_correct(self.series[:15], self.query[:10], m, euclid, True)
113 | 
114 |         euclid.append_series(self.series[15: 25])
115 |         euclid.append_query(self.query[10: 18])
116 |         _verify_columns_correct(self.series[5: 25], self.query[3: 18], m, euclid, True)
117 | 
118 |         euclid.append_query(self.query[18: 20])
119 |         _verify_columns_correct(self.series[5: 25], self.query[5: 20], m, euclid, True)
120 | 
121 |         euclid.append_series(self.series[25:30])
122 |         _verify_columns_correct(self.series[10: 30], self.query[5: 20], m, euclid, True)
123 | 
124 |     def test_calc_column_cache(self):
125 |         m = 5
126 |         euclid = Euclidean().prepare_streaming(m, 20, 15)
127 | 
128 |         euclid.append_series(self.series[:10])
129 |         euclid.append_query(self.query[:5])
130 |         _verify_columns_correct(self.series[:10], self.query[:5], m, euclid, False)
131 | 
132 |         euclid.append_series(self.series[10: 15])
133 |         euclid.append_query(self.query[5: 10])
134 |         _verify_columns_correct(self.series[:15], self.query[:10], m, euclid, False)
135 | 
136 |         euclid.append_series(self.series[15: 25])
137 |         euclid.append_query(self.query[10: 18])
138 |         _verify_columns_correct(self.series[5: 25], self.query[3: 18], m, euclid, False)
139 | 
140 |         euclid.append_query(self.query[18: 20])
141 |         _verify_columns_correct(self.series[5: 25], self.query[5: 20], m, euclid, False)
142 | 
143 |         euclid.append_series(self.series[25:30])
144 |         _verify_columns_correct(self.series[10: 30], self.query[5: 20], m, euclid, False)
145 | 
146 |     def test_streaming_updates_cached_row(self):
147 |         # Override series & query to ensure there are no constant subsequences
148 |         self.series = np.array(
149 |             [0.5546021, 0.13714127, 0.903246, 0.03695094, 0.23420792,
150 |              0.27482897, 0.57765821, 0.23571178, 0.65772705, 0.00292154,
151 |              0.87258653, 0.29869269, 0.91492178, 0.69096235, 0.6786107])
152 | 
153 |         self.query = np.array(
154 |             [0.03737861, 0.53931239, 0.06194507, 0.0938707, 0.95875364,
155 |              0.09495936, 0.12392364, 0.81358582, 0.56507776, 0.61620183])
156 | 
157 |         gen = Euclidean().prepare_streaming(5, 10, 10)
158 |         gen.append_series(self.series[:10])
159 |         gen.append_query(self.query[:10])
160 |         bf_dist_matrix = _bruteforce_euclidean_distance_matrix(self.series[:15], self.query[:10], 5)
161 | 
162 |         # Test shifted behaviour
163 |         npt.assert_allclose(bf_dist_matrix[:, 0], gen.calc_column(0))
164 |         gen.append_series(self.series[10:11])
165 |         npt.assert_allclose(bf_dist_matrix[:, 1], gen.calc_column(0))
166 | 
167 |         # Test shifted but off-by-one behaviour
168 |         npt.assert_allclose(bf_dist_matrix[:, 4], gen.calc_column(3))
169 |         gen.append_series(self.series[11:12])
170 |         npt.assert_allclose(bf_dist_matrix[:, 6], gen.calc_column(4))
171 | 
172 | 
173 | class TestStreamingEuclideanSelfJoin(TestCase):
174 |     def setUp(self):
175 |         self.series = np.array(
176 |             [0.5578463, 0.4555404, 0.18124978, 0.252396, 0.60623881,
177 |              0.5546021, 0.13714127, 0.903246, 0.03695094, 0.23420792,
178 |              0.27482897, 0.57765821, 0.23571178, 0.65772705, 0.00292154,
179 |              0.87258653, 0.29869269, 0.91492178, 0.69096235, 0.6786107,
180 |              0.85413687, 0.19725933, 0.39460891, 0.32650366, 0.35188833,
181 |              0.92658149, 0.07503563, 0.37864432, 0.9415974, 0.62313779])
182 | 
183 |     def test_calc_diagonal(self):
184 |         m = 5
185 |         euclid = Euclidean().prepare_streaming(m, 20)
186 | 
187 |         euclid.append_series(self.series[:10])
188 |         _verify_diagonals_correct(self.series[:10], self.series[:10], m, euclid)
189 | 
190 |         euclid.append_series(self.series[10: 15])
191 |         _verify_diagonals_correct(self.series[:15], self.series[:15], m, euclid)
192 | 
193 |         euclid.append_series(self.series[15: 25])
194 |         _verify_diagonals_correct(self.series[5: 25], self.series[5: 25], m, euclid)
195 | 
196 |         euclid.append_series(self.series[25:30])
197 |         _verify_diagonals_correct(self.series[10: 30], self.series[10: 30], m, euclid)
198 | 
199 |     def test_calc_column_no_cache(self):
200 |         m = 5
201 |         euclid = Euclidean().prepare_streaming(m, 20)
202 | 
203 |         euclid.append_series(self.series[:10])
204 |         _verify_columns_correct(self.series[:10], self.series[:10], m, euclid, True)
205 | 
206 |         euclid.append_series(self.series[10: 15])
207 |         _verify_columns_correct(self.series[:15], self.series[:15], m, euclid, True)
208 | 
209 |         euclid.append_series(self.series[15: 25])
210 |         _verify_columns_correct(self.series[5: 25], self.series[5: 25], m, euclid, True)
211 | 
212 |         euclid.append_series(self.series[25:30])
213 |         _verify_columns_correct(self.series[10: 30], self.series[10: 30], m, euclid, True)
214 | 
215 |     def test_calc_column_cache(self):
216 |         m = 5
217 |         euclid = Euclidean().prepare_streaming(m, 20)
218 | 
219 |         euclid.append_series(self.series[:10])
220 |         _verify_columns_correct(self.series[:10], self.series[:10], m, euclid, False)
221 | 
222 |         euclid.append_series(self.series[10: 15])
223 |         _verify_columns_correct(self.series[:15], self.series[:15], m, euclid, False)
224 | 
225 |         euclid.append_series(self.series[15: 25])
226 |         _verify_columns_correct(self.series[5: 25], self.series[5: 25], m, euclid, False)
227 | 
228 |         euclid.append_series(self.series[25:30])
229 |         _verify_columns_correct(self.series[10: 30], self.series[10: 30], m, euclid, False)
230 | 
231 | 
232 | def _verify_diagonals_correct(series, query, m, euclid):
233 |     h = len(query) - m + 1
234 |     w = len(series) - m + 1
235 |     bf_distance_matrix = _bruteforce_euclidean_distance_matrix(series, query, m)
236 | 
237 |     for i in range(-h + 1, w):
238 |         result = euclid.calc_diagonal(i)
239 |         expected = bf_distance_matrix[diag_indices(h, w, i)]
240 |         npt.assert_allclose(result, expected)
241 | 
242 | 
243 | def _verify_columns_correct(series, query, m, euclid, backwards):
244 |     w = len(series) - m + 1
245 |     bf_distance_matrix = _bruteforce_euclidean_distance_matrix(series, query, m)
246 | 
247 |     if backwards:
248 |         r = range(w - 1, -1, -1)
249 |     else:
250 |         r = range(w)
251 | 
252 |     for i in r:
253 |         result = euclid.calc_column(i)
254 |         expected = bf_distance_matrix[:, i]
255 |         npt.assert_allclose(result, expected, err_msg="Mismatch for row {row}".format(row=i))
256 | 
257 | 
258 | def _bruteforce_euclidean_distance_matrix(series, query, m):
259 |     num_cols = len(series) - m + 1
260 |     num_rows = len(query) - m + 1
261 |     distance_matrix = np.zeros((num_rows, num_cols))
262 | 
263 |     for row in range(num_rows):
264 |         for col in range(num_cols):
265 |             distance_matrix[row, col] = _euclidean_distance(
266 |                 query[row: row + m],
267 |                 series[col: col + m])
268 | 
269 |     return distance_matrix
270 | 
271 | 
272 | def _euclidean_distance(s1, s2):
273 |     return np.sqrt(np.sum(np.square(s1 - s2)))
274 | 


--------------------------------------------------------------------------------
/distancematrix/tests/generator/test_znorm_euclidean.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from unittest import TestCase
  3 | import numpy.testing as npt
  4 | from abc import abstractmethod
  5 | 
  6 | from distancematrix.util import diag_indices
  7 | from distancematrix.generator.znorm_euclidean import ZNormEuclidean
  8 | 
  9 | 
 10 | class AbstractGeneratorTest(object):
 11 |     def setUp(self):
 12 |         self.series = np.array(
 13 |             [0.2488674, 0.1547179, 2, 2, 2,
 14 |              2, 2, 2, 0.02841, 0.371845,
 15 |              0.5578463, 0.4555404, 0.18124978, 0.252396, 0.60623881,
 16 |              0.5546021, 0.13714127, 0.903246, 0.03695094, 0.23420792,
 17 |              0.27482897, 0.57765821, 0.23571178, 0.65772705, 0.00292154,
 18 |              0.87258653, 0.29869269, 0.91492178, 0.69096235, 0.6786107,
 19 |              0.85413687, 0.19725933, 0.39460891, 0.32650366, 0.35188833,
 20 |              0.92658149, 0.07503563, 0.37864432, 0.9415974, 0.62313779])
 21 | 
 22 |         self.query = np.array(
 23 |             [6., 6., 6., 6., 6.,
 24 |              0.03737861, 0.53931239, 0.06194507, 0.0938707, 0.95875364,
 25 |              0.09495936, 0.12392364, 0.81358582, 0.56507776, 0.61620183,
 26 |              0.24720462, 0.83886639, 0.38130506, 0.13693176, 0.90555723,
 27 |              0.23274948, 0.31526678, 0.28504739, 0.45200344, 0.9867946])
 28 | 
 29 |         self.m = 5
 30 | 
 31 |     @abstractmethod
 32 |     def create_generator(self):
 33 |         pass
 34 | 
 35 |     @abstractmethod
 36 |     def bruteforce_matrix(self, m, series, query):
 37 |         pass
 38 | 
 39 |     def test_non_streaming_calc_diagonal(self):
 40 |         gen = self.create_generator().prepare(self.m, self.series, self.query)
 41 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series, self.query)
 42 |         _verify_diagonals_correct(bf_dist_matrix, gen)
 43 | 
 44 |     def test_non_streaming_calc_column_no_cache(self):
 45 |         gen = self.create_generator().prepare(self.m, self.series, self.query)
 46 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series, self.query)
 47 |         _verify_columns_correct(bf_dist_matrix, gen, True)
 48 | 
 49 |     def test_non_streaming_calc_column_cache(self):
 50 |         gen = self.create_generator().prepare(self.m, self.series, self.query)
 51 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series, self.query)
 52 |         _verify_columns_correct(bf_dist_matrix, gen, False)
 53 | 
 54 |     def test_non_streaming_self_join_calc_diagonal(self):
 55 |         gen = self.create_generator().prepare(self.m, self.series)
 56 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series, self.series)
 57 |         _verify_diagonals_correct(bf_dist_matrix, gen)
 58 | 
 59 |     def test_non_streaming_self_join_calc_column_no_cache(self):
 60 |         gen = self.create_generator().prepare(self.m, self.series)
 61 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series, self.series)
 62 |         _verify_columns_correct(bf_dist_matrix, gen, True)
 63 | 
 64 |     def test_non_streaming_self_join_calc_column_cache(self):
 65 |         gen = self.create_generator().prepare(self.m, self.series, self.series)
 66 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series, self.series)
 67 |         _verify_columns_correct(bf_dist_matrix, gen, False)
 68 | 
 69 |     def test_streaming_calc_diagonal(self):
 70 |         gen = self.create_generator().prepare_streaming(self.m, 20, 15)
 71 | 
 72 |         gen.append_series(self.series[:10])
 73 |         gen.append_query(self.query[:5])
 74 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[:10], self.query[:5])
 75 |         _verify_diagonals_correct(bf_dist_matrix, gen)
 76 | 
 77 |         gen.append_series(self.series[10: 15])
 78 |         gen.append_query(self.query[5: 10])
 79 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[:15], self.query[:10])
 80 |         _verify_diagonals_correct(bf_dist_matrix, gen)
 81 | 
 82 |         gen.append_query(self.query[10: 15])
 83 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[:15], self.query[:15])
 84 |         _verify_diagonals_correct(bf_dist_matrix, gen)
 85 | 
 86 |         gen.append_series(self.series[15: 25])
 87 |         gen.append_query(self.query[15: 25])
 88 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[5: 25], self.query[10: 25])
 89 |         _verify_diagonals_correct(bf_dist_matrix, gen)
 90 | 
 91 |         gen.append_series(self.series[25:40])
 92 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[20: 40], self.query[10: 25])
 93 |         _verify_diagonals_correct(bf_dist_matrix, gen)
 94 | 
 95 |     def test_streaming_calc_column_no_cache(self):
 96 |         gen = self.create_generator().prepare_streaming(self.m, 20, 15)
 97 | 
 98 |         gen.append_series(self.series[:10])
 99 |         gen.append_query(self.query[:5])
100 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[:10], self.query[:5])
101 |         _verify_columns_correct(bf_dist_matrix, gen, True)
102 | 
103 |         gen.append_series(self.series[10: 15])
104 |         gen.append_query(self.query[5: 10])
105 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[:15], self.query[:10])
106 |         _verify_columns_correct(bf_dist_matrix, gen, True)
107 | 
108 |         gen.append_query(self.query[10: 15])
109 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[:15], self.query[:15])
110 |         _verify_columns_correct(bf_dist_matrix, gen, True)
111 | 
112 |         gen.append_series(self.series[15: 25])
113 |         gen.append_query(self.query[15: 25])
114 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[5: 25], self.query[10: 25])
115 |         _verify_columns_correct(bf_dist_matrix, gen, True)
116 | 
117 |         gen.append_series(self.series[25:40])
118 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[20: 40], self.query[10: 25])
119 |         _verify_columns_correct(bf_dist_matrix, gen, True)
120 | 
121 |     def test_streaming_calc_column_cache(self):
122 |         gen = self.create_generator().prepare_streaming(self.m, 20, 15)
123 | 
124 |         gen.append_series(self.series[:10])
125 |         gen.append_query(self.query[:5])
126 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[:10], self.query[:5])
127 |         _verify_columns_correct(bf_dist_matrix, gen, False)
128 | 
129 |         gen.append_series(self.series[10: 15])
130 |         gen.append_query(self.query[5: 10])
131 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[:15], self.query[:10])
132 |         _verify_columns_correct(bf_dist_matrix, gen, False)
133 | 
134 |         gen.append_query(self.query[10: 15])
135 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[:15], self.query[:15])
136 |         _verify_columns_correct(bf_dist_matrix, gen, False)
137 | 
138 |         gen.append_series(self.series[15: 25])
139 |         gen.append_query(self.query[15: 25])
140 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[5: 25], self.query[10: 25])
141 |         _verify_columns_correct(bf_dist_matrix, gen, False)
142 | 
143 |         gen.append_series(self.series[25:40])
144 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[20: 40], self.query[10: 25])
145 |         _verify_columns_correct(bf_dist_matrix, gen, False)
146 | 
147 |     def test_streaming_updates_cached_row(self):
148 |         # Override series & query to ensure there are no constant subsequences
149 |         self.series = np.array(
150 |             [0.5546021, 0.13714127, 0.903246, 0.03695094, 0.23420792,
151 |              0.27482897, 0.57765821, 0.23571178, 0.65772705, 0.00292154,
152 |              0.87258653, 0.29869269, 0.91492178, 0.69096235, 0.6786107])
153 | 
154 |         self.query = np.array(
155 |             [0.03737861, 0.53931239, 0.06194507, 0.0938707, 0.95875364,
156 |              0.09495936, 0.12392364, 0.81358582, 0.56507776, 0.61620183])
157 | 
158 |         gen = self.create_generator().prepare_streaming(self.m, 10, 10)
159 |         gen.append_series(self.series[:10])
160 |         gen.append_query(self.query[:10])
161 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[:15], self.query[:10])
162 | 
163 |         # Test shifted behaviour
164 |         npt.assert_allclose(bf_dist_matrix[:, 0], gen.calc_column(0))
165 |         gen.append_series(self.series[10:11])
166 |         npt.assert_allclose(bf_dist_matrix[:, 1], gen.calc_column(0))
167 | 
168 |         # Test shifted but off-by-one behaviour
169 |         npt.assert_allclose(bf_dist_matrix[:, 4], gen.calc_column(3))
170 |         gen.append_series(self.series[11:12])
171 |         npt.assert_allclose(bf_dist_matrix[:, 6], gen.calc_column(4))
172 | 
173 |     def test_streaming_self_join_calc_diagonal(self):
174 |         gen = self.create_generator().prepare_streaming(self.m, 20)
175 | 
176 |         gen.append_series(self.series[:10])
177 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[:10], self.series[:10])
178 |         _verify_diagonals_correct(bf_dist_matrix, gen)
179 | 
180 |         gen.append_series(self.series[10: 15])
181 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[:15], self.series[:15])
182 |         _verify_diagonals_correct(bf_dist_matrix, gen)
183 | 
184 |         gen.append_series(self.series[15: 16])
185 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[:16], self.series[:16])
186 |         _verify_diagonals_correct(bf_dist_matrix, gen)
187 | 
188 |         gen.append_series(self.series[16:40])
189 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[20: 40], self.series[20: 40])
190 |         _verify_diagonals_correct(bf_dist_matrix, gen)
191 | 
192 |     def test_streaming_self_join_calc_column_no_cache(self):
193 |         gen = self.create_generator().prepare_streaming(self.m, 20)
194 | 
195 |         gen.append_series(self.series[:10])
196 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[:10], self.series[:10])
197 |         _verify_columns_correct(bf_dist_matrix, gen, True)
198 | 
199 |         gen.append_series(self.series[10: 15])
200 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[:15], self.series[:15])
201 |         _verify_columns_correct(bf_dist_matrix, gen, True)
202 | 
203 |         gen.append_series(self.series[15: 16])
204 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[:16], self.series[:16])
205 |         _verify_columns_correct(bf_dist_matrix, gen, True)
206 | 
207 |         gen.append_series(self.series[16:40])
208 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[20: 40], self.series[20: 40])
209 |         _verify_columns_correct(bf_dist_matrix, gen, True)
210 | 
211 |     def test_streaming_self_join_calc_column_cache(self):
212 |         gen = self.create_generator().prepare_streaming(self.m, 20)
213 | 
214 |         gen.append_series(self.series[:10])
215 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[:10], self.series[:10])
216 |         _verify_columns_correct(bf_dist_matrix, gen, False)
217 | 
218 |         gen.append_series(self.series[10: 15])
219 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[:15], self.series[:15])
220 |         _verify_columns_correct(bf_dist_matrix, gen, False)
221 | 
222 |         gen.append_series(self.series[15: 16])
223 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[:16], self.series[:16])
224 |         _verify_columns_correct(bf_dist_matrix, gen, False)
225 | 
226 |         gen.append_series(self.series[16:40])
227 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series[20: 40], self.series[20: 40])
228 |         _verify_columns_correct(bf_dist_matrix, gen, False)
229 | 
230 |     def test_non_streaming_calc_single(self):
231 |         gen = self.create_generator().prepare(self.m, self.series, self.query)
232 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series, self.query)
233 |         num_cols = len(self.series) - self.m + 1
234 |         num_rows = len(self.query) - self.m + 1
235 |         result = np.full((num_rows, num_cols), np.nan, dtype=float)
236 |         for col in range(num_cols):
237 |             for row in range(num_rows):
238 |                 result[row, col] = gen.calc_single(row, col)
239 | 
240 |         npt.assert_allclose(result, bf_dist_matrix, atol=1e-10)
241 | 
242 |     def test_numerical_stability(self):
243 |         self.series = np.array([9.859169023394657, 18.026092617400675, 1.6423838253843416e-24, 0.0, 0.0])
244 |         self.m = 3
245 |         gen = self.create_generator().prepare(self.m, self.series, self.series)
246 | 
247 |         col0 = gen.calc_column(0)
248 |         col1 = gen.calc_column(1)
249 |         col2 = gen.calc_column(2)
250 | 
251 |         # These assertions failed when using FFT convolve
252 |         npt.assert_(np.max(col0) <= 2 * np.sqrt(self.m), f"Max value was: {np.max(col0)}")
253 |         npt.assert_(np.max(col1) <= 2 * np.sqrt(self.m), f"Max value was: {np.max(col1)}")
254 |         npt.assert_(np.max(col2) <= 2 * np.sqrt(self.m), f"Max value was: {np.max(col2)}")
255 | 
256 |         # These assertions check that calculations for a single value (not using bulk-calculated dot products)
257 |         # do not differ. They focus on the checking the more complex dot-product based calculation.
258 |         for col_i, col in enumerate([col0, col1, col2]):
259 |             for row_i in range(3):
260 |                 npt.assert_allclose(col[row_i], gen.calc_single(row_i, col_i), atol=1e-10)
261 | 
262 |         # These assertions check against the more simple euclidean-of-znormalized calculation.
263 |         bf_dist_matrix = self.bruteforce_matrix(self.m, self.series, self.series)
264 |         npt.assert_allclose(col0, bf_dist_matrix[:, 0], atol=1e-10)
265 |         npt.assert_allclose(col1, bf_dist_matrix[:, 1], atol=1e-10)
266 |         npt.assert_allclose(col2, bf_dist_matrix[:, 2], atol=1e-10)
267 | 
268 | 
269 | class TestZnormEuclidean(AbstractGeneratorTest, TestCase):
270 |     def create_generator(self):
271 |         return ZNormEuclidean()
272 | 
273 |     def bruteforce_matrix(self, m, series, query):
274 |         return _bruteforce_zeuclidean_distance_matrix(series, query, m, 0.)
275 | 
276 | 
277 | class TestZnormEuclideanNoiseElimination(AbstractGeneratorTest, TestCase):
278 |     def create_generator(self):
279 |         return ZNormEuclidean(noise_std=0.2)
280 | 
281 |     def bruteforce_matrix(self, m, series, query):
282 |         return _bruteforce_zeuclidean_distance_matrix(series, query, m, 0.2)
283 | 
284 | 
285 | def _verify_diagonals_correct(bf_distance_matrix, zeuclid):
286 |     h, w = bf_distance_matrix.shape
287 | 
288 |     for i in range(-h + 1, w):
289 |         result = zeuclid.calc_diagonal(i)
290 |         expected = bf_distance_matrix[diag_indices(h, w, i)]
291 |         npt.assert_allclose(result, expected, atol=1e-10)
292 | 
293 | 
294 | def _verify_columns_correct(bf_distance_matrix, euclid, backwards):
295 |     w = bf_distance_matrix.shape[1]
296 | 
297 |     if backwards:
298 |         r = range(w - 1, -1, -1)
299 |     else:
300 |         r = range(w)
301 | 
302 |     for i in r:
303 |         result = euclid.calc_column(i)
304 |         expected = bf_distance_matrix[:, i]
305 |         npt.assert_allclose(result, expected, atol=1e-10, err_msg="Mismatch for row {row}".format(row=i))
306 | 
307 | 
308 | def _bruteforce_zeuclidean_distance_matrix(series, query, m, noise_std=0.):
309 |     num_cols = len(series) - m + 1
310 |     num_rows = len(query) - m + 1
311 |     distance_matrix = np.zeros((num_rows, num_cols))
312 | 
313 |     for row in range(num_rows):
314 |         for col in range(num_cols):
315 |             distance_matrix[row, col] = _euclidean_znorm_distance(
316 |                 query[row: row + m],
317 |                 series[col: col + m],
318 |                 m,
319 |                 noise_std
320 |             )
321 | 
322 |     return distance_matrix
323 | 
324 | 
325 | def _euclidean_znorm_distance(s1, s2, m, noise_std=0.):
326 |     sq_dist = np.sum(
327 |         np.square(_znorm(s1) - _znorm(s2)))
328 | 
329 |     if noise_std != 0.:
330 |         std1 = np.std(s1)
331 |         std2 = np.std(s2)
332 | 
333 |         if std1 != 0. or std2 != 0.:
334 |             max_std = np.maximum(np.std(s1), np.std(s2))
335 |             sq_dist -= (2 * (m + 1) * np.square(noise_std) /
336 |                         np.square(max_std))
337 |             sq_dist = np.maximum(sq_dist, 0)
338 | 
339 |     return np.sqrt(sq_dist)
340 | 
341 | 
342 | def _znorm(a):
343 |     std = np.std(a)
344 |     if std < 1e-6:
345 |         std = 1
346 |     return (a - np.mean(a)) / std
347 | 


--------------------------------------------------------------------------------
/distancematrix/tests/test_insights.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from unittest import TestCase
 3 | import numpy.testing as npt
 4 | 
 5 | from distancematrix.insights import lowest_value_idxs
 6 | from distancematrix.insights import highest_value_idxs
 7 | 
 8 | 
 9 | class TestSlidingMeanStd(TestCase):
10 |     def test_lowest_value_idxs(self):
11 |         a = np.array([1, 5, 3, 9, 4, 7, 6, 0, 2, 8], dtype=float)
12 | 
13 |         npt.assert_equal(list(lowest_value_idxs(a, 0)), np.argsort(a))
14 |         npt.assert_equal(list(lowest_value_idxs(a, 1)), [7, 0, 2, 4, 9])
15 |         npt.assert_equal(list(lowest_value_idxs(a, 2)), [7, 0, 4])
16 |         npt.assert_equal(list(lowest_value_idxs(a, 3)), [7, 0])
17 | 
18 |     def test_highest_value_idxs(self):
19 |         a = np.array([4, 8, 6, 1, 0, 3, 7, 9, 2, 5], dtype=float)
20 | 
21 |         npt.assert_equal(list(highest_value_idxs(a, 0)), np.argsort(a)[::-1])
22 |         npt.assert_equal(list(highest_value_idxs(a, 1)), [7, 1, 9, 5, 3])
23 |         npt.assert_equal(list(highest_value_idxs(a, 2)), [7, 1, 4])
24 |         npt.assert_equal(list(highest_value_idxs(a, 3)), [7, 1])
25 | 


--------------------------------------------------------------------------------
/distancematrix/tests/test_math_tricks.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from unittest import TestCase
  3 | import numpy.testing as npt
  4 | 
  5 | import distancematrix.math_tricks as math_tricks
  6 | 
  7 | 
  8 | def brute_sliding_mean(data, m):
  9 |     return np.array([np.mean(data[i:i + m]) for i in range(len(data) - m + 1)])
 10 | 
 11 | 
 12 | def brute_sliding_var(data, m):
 13 |     return np.array([np.var(data[i:i + m]) for i in range(len(data) - m + 1)])
 14 | 
 15 | 
 16 | def brute_sliding_std(data, m):
 17 |     return np.array([np.std(data[i:i + m]) for i in range(len(data) - m + 1)])
 18 | 
 19 | MEAN_STABILITY_DATA = np.array([
 20 |     0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
 21 |     0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
 22 |     0., 43.33, 69.39, 76.01, 76.03, 75.19, 82.21, 91.37, 86.44, 88.09, 88.56, 98.88, 91.62, 93.97, 90.81, 88.25,
 23 |     95.3, 100., 95.96, 98.13, 97.57, 94.02, 95.24, 92.59, 98.98, 100., 100., 100., 97.88, 96.33, 98.07, 95.18,
 24 |     93.52, 79.99, 37.08, 13.9, 17.43, 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
 25 |     0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
 26 |     0., 0., 0., 0., 0., 0., 0., 0., 58.58, 70.16, 83.06, 82.79, 85.38, 100., 100., 100.,
 27 |     100., 100., 85.97, 56.18, 0., 0., 18.69, 0., 0., 13.9, 13.94, 25.69, 34.33, 65.06, 80.1, 85.65,
 28 |     84.57, 83.74, 94.75, 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100.,
 29 |     100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100.,
 30 |     100., 100., 100., 100., 100., 100., 100., 100., 90.1, 79.01, 65.47, 54.24, 25.05, 15.01, 0., 0.])
 31 | 
 32 | # For a subsequence length of 24, this data array provided a lot of approximation errors for various techniques
 33 | # that were tested to calculate sliding variance/std.
 34 | STD_VAR_STABILITY_DATA = np.array([
 35 |     12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
 36 |     12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
 37 |     12., 12., 12., 12., 12., 43.33, 69.39, 76.01, 76.03, 75.19, 82.21, 91.37, 86.44, 88.09,
 38 |     88.56, 98.88, 91.62, 93.97, 90.81, 88.25, 95.3, 100., 95.96, 98.13, 97.57, 94.02, 95.24, 92.59,
 39 |     98.98, 100., 100., 100., 97.88, 96.33, 98.07, 95.18, 93.52, 79.99, 37.08, 13.9, 17.43, 12.,
 40 |     12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
 41 |     12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
 42 |     12., 12., 12., 12., 12., 12., 58.58, 70.16, 83.06, 82.79, 85.38, 100., 100., 100.,
 43 |     100., 100., 85.97, 56.18, 12., 12., 18.69, 12., 12., 13.9, 13.94, 25.69, 34.33, 65.06,
 44 |     80.1, 85.65, 84.57, 83.74, 94.75, 100., 100., 100., 100., 100., 100., 100., 100., 100.,
 45 |     100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100.,
 46 |     100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100.,
 47 |     90.1, 79.01, 65.47, 54.24, 25.05, 15.01, 12., 12., 12., 12., 12., 12., 12., 12.,
 48 |     12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
 49 |     12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
 50 |     12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
 51 |     12., 12., 15.94, 42.61, 71.12, 100., 100., 100., 100., 100., 100., 100., 100., 100.,
 52 |     100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100.,
 53 |     100., 100., 100., 120., 120., 120., 120., 120., 120., 120., 14.69, 12., 12., 12.,
 54 |     12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
 55 |     12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
 56 |     12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
 57 |     12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
 58 |     12., 12., 15.19, 14.81, 22.67, 31.61, 32.21, 39.68, 47.36, 52.63, 61.79, 62.49, 67.66, 120.,
 59 |     120., 120., 120., 109.44, 87.13, 51.72, 55.24, 57.78, 62.97, 66.43, 120., 120., 120., 120.,
 60 |     110.46, 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 12.,
 61 |     12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
 62 |     12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
 63 |     12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
 64 |     12., 12., 12., 12., 12., 12., 31.04, 52.73, 49.78, 57.56, 66.5, 66.92, 75.89, 88.17,
 65 |     97.98, 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100.,
 66 |     100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100.,
 67 |     100., 100., 100., 49.6, 45.2, 13.15, 12., 12., 12., 12., 12., 12., 12., 12.,
 68 |     12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
 69 |     12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.,
 70 |     12., 12., 12., 12., 12., 12., 12., 12., 12., 12.])
 71 | 
 72 | 
 73 | class TestSlidingMeanStdVar(TestCase):
 74 |     def test_sliding_mean_std(self):
 75 |         random_gen = np.random.RandomState(0)
 76 | 
 77 |         data_array = [
 78 |             np.array([5.15, 2.15, 1.05, -9.2, 0.01, 7.14, 4.18, 10.2, 3.25, 14.1, -9.85, 5.12, 0.11, 0.14, 0.98]),
 79 |             np.array([0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., -50, -50, -50, -50, -50, -50]),
 80 |             np.array([1e8, 1.6e9, 0.9e8, 6e4, 5.6e2, 9.9e6, 9e7, 6.48e4, 9.2e4, 1e8, 3.14e7]),
 81 |             random_gen.rand(1000)
 82 |         ]
 83 |         m = 5
 84 | 
 85 |         for data in data_array:
 86 |             correct_mean = [np.mean(data[i:i + m]) for i in range(len(data) - m + 1)]
 87 |             correct_std = [np.std(data[i:i + m]) for i in range(len(data) - m + 1)]
 88 | 
 89 |             mean, std = math_tricks.sliding_mean_std(data, m)
 90 | 
 91 |             npt.assert_allclose(mean, correct_mean)
 92 |             npt.assert_allclose(std, correct_std)
 93 | 
 94 |     def test_sliding_mean_numerical_stability(self):
 95 |         npt.assert_allclose(
 96 |             math_tricks.sliding_mean_std(MEAN_STABILITY_DATA, 24)[0],
 97 |             brute_sliding_mean(MEAN_STABILITY_DATA, 24), )
 98 | 
 99 |     def test_sliding_std_numerical_stability(self):
100 |         npt.assert_allclose(
101 |             math_tricks.sliding_mean_std(STD_VAR_STABILITY_DATA, 24)[1],
102 |             brute_sliding_std(STD_VAR_STABILITY_DATA, 24))
103 | 
104 |     def test_sliding_var_numerical_stability(self):
105 |         npt.assert_allclose(
106 |             math_tricks.sliding_mean_var(STD_VAR_STABILITY_DATA, 24)[1],
107 |             brute_sliding_var(STD_VAR_STABILITY_DATA, 24))
108 | 
109 | 
110 | class TestStreamingStatistics(TestCase):
111 |     def test_different_m(self):
112 |         data = np.array([
113 |             5.15, 2.15, 1.05, -9.2, 0.01, 7.14, 4.18, 10.2, 3.25, 14.1,
114 |             -9.85, 5.12, 0.11, 0.14, 0.98, 0., 0., 0., 0., 0.,
115 |             0., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
116 |             1., -50, -50, -50, -50, -50, -50, 1e8, 1.6e9, 0.9e8,
117 |             6e4, 5.6e2, 9.9e6, 9e7, 6.48e4, 9.2e4, 1e8, 3.14e7, 42., 1.
118 |         ])
119 | 
120 |         self._test_for_params(data, 10, 5)
121 |         self._test_for_params(data, 10, 4)
122 |         self._test_for_params(data, 10, 3)
123 |         self._test_for_params(data, 10, 2)
124 |         self._test_for_params(data, 10, 1)
125 |         self._test_for_params(data, 5, 5)
126 |         self._test_for_params(data, 5, 4)
127 |         self._test_for_params(data, 5, 3)
128 |         self._test_for_params(data, 5, 2)
129 |         self._test_for_params(data, 5, 1)
130 | 
131 |     def test_different_stepsize(self):
132 |         data = np.array([
133 |             5.15, 2.15, 1.05, -9.2, 0.01, 7.14, 4.18, 10.2, 3.25, 14.1,
134 |             -9.85, 5.12, 0.11, 0.14, 0.98, 0., 0., 0., 0., 0.,
135 |             0., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
136 |             1., -50, -50, -50, -50, -50, -50, 1e8, 1.6e9, 0.9e8,
137 |             6e4, 5.6e2, 9.9e6, 9e7, 6.48e4, 9.2e4, 1e8, 3.14e7, 42., 1.
138 |         ])
139 | 
140 |         self._test_for_params(data, 10, 5, 1)
141 |         self._test_for_params(data, 10, 5, 2)
142 |         self._test_for_params(data, 10, 5, 3)
143 |         self._test_for_params(data, 10, 5, 4)
144 |         self._test_for_params(data, 10, 5, 5)
145 |         self._test_for_params(data, 10, 5, 6)
146 |         self._test_for_params(data, 10, 5, 7)
147 |         self._test_for_params(data, 10, 5, 9)
148 |         self._test_for_params(data, 10, 5, 10)
149 |         self._test_for_params(data, 10, 5, 11)
150 |         self._test_for_params(data, 10, 5, 12)
151 | 
152 |     def _test_for_params(self, data, data_len, m, stepsize=1):
153 |         start = 0
154 |         ss = math_tricks.StreamingStats(data[:data_len], m)
155 | 
156 |         npt.assert_equal(ss.data, data[start: start + data_len])
157 |         npt.assert_allclose(ss.mean, [np.mean(data[start + i: start + i + m]) for i in range(data_len - m + 1)])
158 |         npt.assert_allclose(ss.std, [np.std(data[start + i: start + i + m]) for i in range(data_len - m + 1)])
159 | 
160 |         while start + data_len + stepsize < len(data):
161 |             ss.append(data[start + data_len: start + data_len + stepsize])
162 |             start += stepsize
163 |             npt.assert_equal(ss.data, data[start: start + data_len])
164 |             npt.assert_allclose(
165 |                 ss.mean, [np.mean(data[start + i: start + i + m]) for i in range(data_len - m + 1)],
166 |                 atol=2e-15, err_msg="Different for window starting at " + str(start))
167 | 
168 |             expected_std = [np.std(data[start + i: start + i + m]) for i in range(data_len - m + 1)]
169 |             npt.assert_allclose(
170 |                 ss.std, expected_std,
171 |                 atol=2e-15,
172 |                 err_msg="Different for window starting at " + str(start) + ": " + str(ss.std - expected_std))
173 | 
174 |     def test_stability(self):
175 |         self._test_for_params(STD_VAR_STABILITY_DATA, 50, 24)
176 | 


--------------------------------------------------------------------------------
/distancematrix/tests/test_ostinato.py:
--------------------------------------------------------------------------------
  1 | from itertools import permutations
  2 | from unittest import TestCase
  3 | 
  4 | import numpy as np
  5 | import numpy.testing as npt
  6 | 
  7 | from distancematrix.generator import ZNormEuclidean
  8 | from distancematrix.consumer import MatrixProfileLR
  9 | from distancematrix.calculator import AnytimeCalculator
 10 | from distancematrix.ostinato import find_consensus_motif, CMResult
 11 | 
 12 | 
 13 | class TestOstinato(TestCase):
 14 |     def test_exact_match(self):
 15 |         # Each series contains a shifted/scaled version of [1, 1, 0, 2, 2]
 16 |         series_list = np.array([
 17 |             np.array([0.04, 0.45, 0.45, 0.00, 0.90, 0.90, 0.74, 0.72, 0.48, 0.82, 0.49, 0.36, 0.02, 0.37, 0.21]),
 18 |             np.array([0.08, 0.19, 0.25, 0.59, 0.50, 0.72, 0.16, 0.45, 1.49, 1.49, 0.49, 2.49, 2.49, 0.92, 0.16]),
 19 |             np.array([0.29, 0.42, 0.96, 1.68, 1.68, 1.00, 2.36, 2.36, 0.14, 0.22, 0.51, 0.45, 0.01, 0.66, 0.53]),
 20 |             np.array([0.84, 0.01, 0.01, 0.00, 0.02, 0.02, 0.51, 0.53, 0.91, 0.94, 0.47, 0.36, 0.28, 0.15, 0.08])
 21 |         ])
 22 | 
 23 |         correct_subseq_idx = [1, 8, 3, 1]
 24 | 
 25 |         for perm in permutations(range(len(series_list))):
 26 |             perm = list(perm)  # Tuple to list for indexing
 27 |             calc_result = find_consensus_motif(series_list[perm], 5)
 28 |             bf_result = find_consensus_motif_bruteforce(series_list[perm], 5)
 29 | 
 30 |             npt.assert_almost_equal(bf_result.radius, 0)
 31 |             npt.assert_equal(bf_result.series_index, 0)
 32 |             npt.assert_equal(bf_result.subseq_index, correct_subseq_idx[perm[0]])
 33 | 
 34 |             npt.assert_almost_equal(calc_result.radius, 0)
 35 |             npt.assert_equal(calc_result.series_index, 0)
 36 |             npt.assert_equal(calc_result.subseq_index, correct_subseq_idx[perm[0]])
 37 | 
 38 |     def test_near_match(self):
 39 |         # Fourth series contains shifted/scaled [1, 1, 1, 2, 2],
 40 |         # all other series contain shifted/scaled versions with slight noise.
 41 |         series_list = np.array([
 42 |             np.array([0.04, 0.40, 0.50, 0.45, 0.90, 0.90, 0.74, 0.72, 0.48, 0.82, 0.49, 0.36, 0.02, 0.37, 0.21]),
 43 |             np.array([0.08, 0.19, 0.25, 0.59, 0.50, 0.72, 0.16, 0.45, 1.53, 1.44, 1.49, 2.49, 2.49, 0.92, 0.16]),
 44 |             np.array([0.29, 0.42, 0.96, 1.68, 1.78, 1.58, 2.36, 2.36, 0.14, 0.22, 0.51, 0.45, 0.01, 0.66, 0.53]),
 45 |             np.array([0.84, 0.01, 0.01, 0.01, 0.02, 0.02, 0.51, 0.53, 0.91, 0.94, 0.47, 0.36, 0.28, 0.15, 0.08])
 46 |         ])
 47 | 
 48 |         for perm in permutations(range(len(series_list))):
 49 |             perm = list(perm)  # Tuple to list for indexing
 50 |             calc_result = find_consensus_motif(series_list[perm], 5)
 51 |             bf_result = find_consensus_motif_bruteforce(series_list[perm], 5)
 52 | 
 53 |             npt.assert_almost_equal(calc_result.radius, bf_result.radius)
 54 |             npt.assert_equal(bf_result.series_index, perm.index(3))
 55 |             npt.assert_equal(calc_result.series_index, perm.index(3))
 56 |             npt.assert_equal(bf_result.subseq_index, 1)
 57 |             npt.assert_equal(calc_result.subseq_index, 1)
 58 | 
 59 |     def test_on_random_data(self):
 60 |         data = np.array([
 61 |             [0.292, 0.183, 0.509, 0.128, 0.718, 0.054, 0.7, 0.532, 0.178, 0.076, 0.46, 0.027, 0.882, 0.288, 0.746],
 62 |             [0.57, 0.539, 0.239, 0.328, 0.784, 0.614, 0.288, 0.696, 0.12, 0.337, 0.54, 0.401, 0.589, 0.461, 0.666],
 63 |             [0.454, 0.487, 0.687, 0.981, 0.24, 0.863, 0.458, 0.203, 0.798, 0.917, 0.336, 0.562, 0.266, 0.325, 0.818],
 64 |             [0.749, 0.886, 0.095, 0.335, 0.247, 0.403, 0.063, 0.047, 0.804, 0.976, 0.836, 0.065, 0.27, 0.59, 0.747],
 65 |             [0.196, 0.924, 0.968, 0.19, 0.999, 0.31, 0.908, 0.576, 0.521, 0.246, 0.444, 0.319, 0.781, 0.628, 0.183],
 66 |             [0.136, 0.444, 0.115, 0.954, 0.231, 0.876, 0.566, 0.886, 0.898, 0.287, 0.544, 0.365, 0.108, 0.345, 0.03],
 67 |             [0.813, 0.324, 0.465, 0.459, 0.565, 0.28, 0.334, 0.169, 0.479, 0.957, 0.621, 0.026, 0.998, 0.732, 0.365],
 68 |             [0.176, 0.072, 0.288, 0.915, 0.867, 0.215, 0.566, 0.555, 0.602, 0.943, 0.786, 0.404, 0.271, 0.579, 0.362],
 69 |             [0.7, 0.113, 0.159, 0.701, 0.476, 0.216, 0.359, 0.613, 0.358, 0.871, 0.888, 0.668, 0.604, 0.574, 0.555],
 70 |             [0.745, 0.298, 0.213, 0.669, 0.303, 0.737, 0.93, 0.998, 0.529, 0.215, 0.839, 0.666, 0.669, 0.583, 0.168]])
 71 | 
 72 |         calc_result = find_consensus_motif(data, 5)
 73 |         bf_result = find_consensus_motif_bruteforce(data, 5)
 74 | 
 75 |         npt.assert_almost_equal(calc_result.radius, bf_result.radius)
 76 |         npt.assert_equal(calc_result.series_index, bf_result.series_index)
 77 |         npt.assert_equal(calc_result.subseq_index, bf_result.subseq_index)
 78 | 
 79 | 
 80 | def find_consensus_motif_bruteforce(series_list, m) -> CMResult:
 81 |     result = CMResult(np.inf, -1, -1)
 82 | 
 83 |     for series_idx, series in enumerate(series_list):
 84 |         radii = np.zeros(len(series) - m + 1)
 85 |         for series2_idx, series2 in enumerate(series_list):
 86 |             if series_idx == series2_idx:
 87 |                 continue
 88 | 
 89 |             calc = AnytimeCalculator(m, series, series2)
 90 |             calc.add_generator(0, ZNormEuclidean())
 91 |             mp_cons = calc.add_consumer([0], MatrixProfileLR())
 92 |             calc.calculate_columns()
 93 |             mp = mp_cons.matrix_profile()
 94 | 
 95 |             radii = np.maximum(radii, mp)
 96 | 
 97 |         subseq_idx = np.argmin(radii)
 98 |         subseq_radius = radii[subseq_idx]
 99 |         if subseq_radius < result.radius:
100 |             result = CMResult(subseq_radius, series_idx, subseq_idx)
101 | 
102 |     return result
103 | 
104 | 
105 | 
106 | 
107 | 


--------------------------------------------------------------------------------
/distancematrix/tests/test_ringbuffer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from unittest import TestCase
  3 | import numpy.testing as npt
  4 | 
  5 | from distancematrix.ringbuffer import RingBuffer
  6 | 
  7 | 
  8 | class TestRingBuffer(TestCase):
  9 |     def test_one_dimensional(self):
 10 |         buffer = RingBuffer([0, 1, 2, 3, 4])
 11 |         npt.assert_equal(buffer.view, np.array([0, 1, 2, 3, 4]))
 12 |         npt.assert_equal(buffer.max_shape, (5,))
 13 | 
 14 |         self.assertEqual(buffer.push([]), 0)
 15 |         npt.assert_equal(buffer.view, np.array([0, 1, 2, 3, 4]))
 16 |         self.assertEqual(buffer[0], 0)
 17 | 
 18 |         self.assertEqual(buffer.push(5), 1)
 19 |         npt.assert_equal(buffer.view, np.array([1, 2, 3, 4, 5]))
 20 |         self.assertEqual(buffer[0], 1)
 21 | 
 22 |         self.assertEqual(buffer.push([6]), 1)
 23 |         self.assertEqual(buffer.push([7]), 1)
 24 |         npt.assert_equal(buffer.view, np.array([3, 4, 5, 6, 7]))
 25 |         self.assertEqual(buffer[0], 3)
 26 | 
 27 |         self.assertEqual(buffer.push([8, 9, 10]), 3)
 28 |         npt.assert_equal(buffer.view, np.array([6, 7, 8, 9, 10]))
 29 |         self.assertEqual(buffer[0], 6)
 30 | 
 31 |         self.assertEqual(buffer.push([11, 12, 13, 14]), 4)
 32 |         npt.assert_equal(buffer.view, np.array([10, 11, 12, 13, 14]))
 33 |         self.assertEqual(buffer[0], 10)
 34 | 
 35 |         self.assertEqual(buffer.push([15, 16, 17, 18, 19]), 5)
 36 |         npt.assert_equal(buffer.view, np.array([15, 16, 17, 18, 19]))
 37 |         self.assertEqual(buffer[0], 15)
 38 | 
 39 |         self.assertEqual(buffer.push([20, 21, 22, 23, 24, 25]), 6)
 40 |         npt.assert_equal(buffer.view, np.array([21, 22, 23, 24, 25]))
 41 |         self.assertEqual(buffer[0], 21)
 42 | 
 43 |     def test_multi_dimensional(self):
 44 |         buffer = RingBuffer([[0, 1, 2, 3, 4], [0, -1, -2, -3, -4]])
 45 |         npt.assert_equal(buffer.view, np.array([[0, 1, 2, 3, 4], [0, -1, -2, -3, -4]]))
 46 |         npt.assert_equal(buffer.max_shape, (2, 5))
 47 | 
 48 |         self.assertEqual(buffer.push([[], []]), 0)
 49 |         npt.assert_equal(buffer.view, np.array([[0, 1, 2, 3, 4], [0, -1, -2, -3, -4]]))
 50 |         npt.assert_equal(buffer[:, 0], [0, 0])
 51 | 
 52 |         self.assertEqual(buffer.push([[5], [-5]]), 1)
 53 |         npt.assert_equal(buffer.view, np.array([[1, 2, 3, 4, 5], [-1, -2, -3, -4, -5]]))
 54 |         npt.assert_equal(buffer[:, 0], [1, -1])
 55 | 
 56 |         self.assertEqual(buffer.push([[6, 7], [-6, -7]]), 2)
 57 |         npt.assert_equal(buffer.view, np.array([[3, 4, 5, 6, 7], [-3, -4, -5, -6, -7]]))
 58 |         npt.assert_equal(buffer[:, 0], [3, -3])
 59 | 
 60 |         self.assertEqual(buffer.push([[8, 9, 10], [-8, -9, -10]]), 3)
 61 |         npt.assert_equal(buffer.view, np.array([[6, 7, 8, 9, 10], [-6, -7, -8, -9, -10]]))
 62 |         npt.assert_equal(buffer[:, 0], [6, -6])
 63 | 
 64 |         self.assertEqual(buffer.push([[11, 12, 13, 14], [-11, -12, -13, -14]]), 4)
 65 |         npt.assert_equal(buffer.view, np.array([[10, 11, 12, 13, 14], [-10, -11, -12, -13, -14]]))
 66 |         npt.assert_equal(buffer[:, 0], [10, -10])
 67 | 
 68 |         self.assertEqual(buffer.push([[15, 16, 17, 18, 19], [-15, -16, -17, -18, -19]]), 5)
 69 |         npt.assert_equal(buffer.view, np.array([[15, 16, 17, 18, 19], [-15, -16, -17, -18, -19]]))
 70 |         npt.assert_equal(buffer[:, 0], [15, -15])
 71 | 
 72 |         self.assertEqual(buffer.push([[20, 21, 22, 23, 24, 25], [-20, -21, -22, -23, -24, -25]]), 6)
 73 |         npt.assert_equal(buffer.view, np.array([[21, 22, 23, 24, 25], [-21, -22, -23, -24, -25]]))
 74 |         npt.assert_equal(buffer[:, 0], [21, -21])
 75 | 
 76 |     def test_empty_intialization(self):
 77 |         buffer = RingBuffer(None, shape=(5,), dtype=int)
 78 |         npt.assert_equal(buffer.max_shape, (5,))
 79 | 
 80 |         npt.assert_equal(buffer.view, np.array([]))
 81 | 
 82 |         self.assertEqual(buffer.push([1]), 0)
 83 |         npt.assert_equal(buffer.view, np.array([1]))
 84 |         self.assertEqual(buffer[0], 1)
 85 | 
 86 |         self.assertEqual(buffer.push([2, 3]), 0)
 87 |         npt.assert_equal(buffer.view, np.array([1, 2, 3]))
 88 |         self.assertEqual(buffer[0], 1)
 89 | 
 90 |         self.assertEqual(buffer.push([4, 5, 6]), 1)
 91 |         npt.assert_equal(buffer.view, np.array([2, 3, 4, 5, 6]))
 92 |         self.assertEqual(buffer[0], 2)
 93 | 
 94 |     def test_partial_intialization(self):
 95 |         buffer = RingBuffer([1, 2], shape=(5,), dtype=int)
 96 |         npt.assert_equal(buffer.max_shape, (5,))
 97 | 
 98 |         npt.assert_equal(buffer.view, np.array([1, 2]))
 99 |         self.assertEqual(buffer[0], 1)
100 | 
101 |         self.assertEqual(buffer.push([3]), 0)
102 |         npt.assert_equal(buffer.view, np.array([1, 2, 3]))
103 |         self.assertEqual(buffer[0], 1)
104 | 
105 |         self.assertEqual(buffer.push([4, 5, 6]), 1)
106 |         npt.assert_equal(buffer.view, np.array([2, 3, 4, 5, 6]))
107 |         self.assertEqual(buffer[0], 2)
108 | 
109 |     def test_oversized_initialization(self):
110 |         buffer = RingBuffer([1, 2, 3, 4, 5, 6], shape=(5,), dtype=int)
111 |         npt.assert_equal(buffer.max_shape, (5,))
112 | 
113 |         npt.assert_equal(buffer.view, np.array([2, 3, 4, 5, 6]))
114 |         self.assertEqual(buffer[0], 2)
115 | 
116 |         self.assertEqual(buffer.push([7]), 1)
117 |         npt.assert_equal(buffer.view, np.array([3, 4, 5, 6, 7]))
118 |         self.assertEqual(buffer[0], 3)
119 | 
120 |         self.assertEqual(buffer.push([8, 9, 10]), 3)
121 |         npt.assert_equal(buffer.view, np.array([6, 7, 8, 9, 10]))
122 |         self.assertEqual(buffer[0], 6)
123 | 


--------------------------------------------------------------------------------
/distancematrix/tests/test_util.py:
--------------------------------------------------------------------------------
  1 | from unittest import TestCase
  2 | import numpy as np
  3 | import numpy.testing as npt
  4 | 
  5 | from distancematrix.util import diag_length
  6 | from distancematrix.util import diag_indices
  7 | from distancematrix.util import diag_indices_of
  8 | from distancematrix.util import cut_indices_of
  9 | from distancematrix.util import shortest_path_distances
 10 | from distancematrix.util import shortest_path
 11 | from distancematrix.util import sliding_min
 12 | from distancematrix.util import sliding_max
 13 | from distancematrix.util import sliding_window_view
 14 | 
 15 | 
 16 | class TestUtil(TestCase):
 17 |     def test_diag_length_square_matrix(self):
 18 |         self.assertEqual(diag_length(5, 5, 0), 5)
 19 |         self.assertEqual(diag_length(5, 5, 1), 4)
 20 |         self.assertEqual(diag_length(5, 5, -2), 3)
 21 |         self.assertEqual(diag_length(5, 5, 4), 1)
 22 |         self.assertEqual(diag_length(5, 5, 5), 0)
 23 |         self.assertEqual(diag_length(5, 5, 6), 0)
 24 | 
 25 |     def test_diag_length_rect_matrix(self):
 26 |         self.assertEqual(diag_length(5, 3, 0), 3)
 27 |         self.assertEqual(diag_length(5, 3, 1), 2)
 28 |         self.assertEqual(diag_length(5, 3, 2), 1)
 29 |         self.assertEqual(diag_length(5, 3, 3), 0)
 30 |         self.assertEqual(diag_length(5, 3, 4), 0)
 31 | 
 32 |         self.assertEqual(diag_length(5, 3, -1), 3)
 33 |         self.assertEqual(diag_length(5, 3, -2), 3)
 34 |         self.assertEqual(diag_length(5, 3, -3), 2)
 35 |         self.assertEqual(diag_length(5, 3, -4), 1)
 36 |         self.assertEqual(diag_length(5, 3, -5), 0)
 37 |         self.assertEqual(diag_length(5, 3, -6), 0)
 38 | 
 39 |         self.assertEqual(diag_length(3, 5, 0), 3)
 40 |         self.assertEqual(diag_length(3, 5, 1), 3)
 41 |         self.assertEqual(diag_length(3, 5, 2), 3)
 42 |         self.assertEqual(diag_length(3, 5, 3), 2)
 43 |         self.assertEqual(diag_length(3, 5, 4), 1)
 44 |         self.assertEqual(diag_length(3, 5, 5), 0)
 45 |         self.assertEqual(diag_length(3, 5, 6), 0)
 46 | 
 47 |         self.assertEqual(diag_length(3, 5, -1), 2)
 48 |         self.assertEqual(diag_length(3, 5, -2), 1)
 49 |         self.assertEqual(diag_length(3, 5, -3), 0)
 50 |         self.assertEqual(diag_length(3, 5, -4), 0)
 51 | 
 52 |     def test_diag_indices_square(self):
 53 |         data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
 54 |         npt.assert_equal(data[diag_indices(3, 3, -3)], [])
 55 |         npt.assert_equal(data[diag_indices(3, 3, -2)], [7])
 56 |         npt.assert_equal(data[diag_indices(3, 3, -1)], [4, 8])
 57 |         npt.assert_equal(data[diag_indices(3, 3, 0)], [1, 5, 9])
 58 |         npt.assert_equal(data[diag_indices(3, 3, 1)], [2, 6])
 59 |         npt.assert_equal(data[diag_indices(3, 3, 2)], [3])
 60 |         npt.assert_equal(data[diag_indices(3, 3, 3)], [])
 61 | 
 62 |     def test_diag_indices_rect(self):
 63 |         data = np.array([[1, 2, 3], [4, 5, 6]])
 64 |         npt.assert_equal(data[diag_indices(2, 3, -2)], [])
 65 |         npt.assert_equal(data[diag_indices(2, 3, -1)], [4])
 66 |         npt.assert_equal(data[diag_indices(2, 3, 0)], [1, 5])
 67 |         npt.assert_equal(data[diag_indices(2, 3, 1)], [2, 6])
 68 |         npt.assert_equal(data[diag_indices(2, 3, 2)], [3])
 69 |         npt.assert_equal(data[diag_indices(2, 3, 3)], [])
 70 | 
 71 |     def test_diag_indices_of_rect(self):
 72 |         data = np.array([[1, 2, 3], [4, 5, 6]])
 73 |         npt.assert_equal(data[diag_indices_of(data, -2)], [])
 74 |         npt.assert_equal(data[diag_indices_of(data, -1)], [4])
 75 |         npt.assert_equal(data[diag_indices_of(data, 0)], [1, 5])
 76 |         npt.assert_equal(data[diag_indices_of(data, 1)], [2, 6])
 77 |         npt.assert_equal(data[diag_indices_of(data, 2)], [3])
 78 |         npt.assert_equal(data[diag_indices_of(data, 3)], [])
 79 | 
 80 |     def test_cut_indices_of(self):
 81 |         data = np.array([
 82 |             [1, 2, 3],
 83 |             [4, 5, 6],
 84 |             [7, 8, 9],
 85 |             [10, 11, 12]
 86 |         ])
 87 | 
 88 |         npt.assert_equal(data[cut_indices_of(data, 0)], [1])
 89 |         npt.assert_equal(data[cut_indices_of(data, 1)], [4, 2])
 90 |         npt.assert_equal(data[cut_indices_of(data, 2)], [7, 5, 3])
 91 |         npt.assert_equal(data[cut_indices_of(data, 3)], [10, 8, 6])
 92 |         npt.assert_equal(data[cut_indices_of(data, 4)], [11, 9])
 93 |         npt.assert_equal(data[cut_indices_of(data, 5)], [12])
 94 | 
 95 |         npt.assert_equal(data[cut_indices_of(data, 6)], [])
 96 | 
 97 |         data = np.array([
 98 |             [0, 1, 2, 3, 4],
 99 |             [5, 6, 7, 8, 9]
100 |         ])
101 | 
102 |         npt.assert_equal(data[cut_indices_of(data, 0)], [0])
103 |         npt.assert_equal(data[cut_indices_of(data, 1)], [5, 1])
104 |         npt.assert_equal(data[cut_indices_of(data, 2)], [6, 2])
105 |         npt.assert_equal(data[cut_indices_of(data, 3)], [7, 3])
106 |         npt.assert_equal(data[cut_indices_of(data, 4)], [8, 4])
107 |         npt.assert_equal(data[cut_indices_of(data, 5)], [9])
108 | 
109 |     def test_shortest_path_distances(self):
110 |         data = np.array([
111 |             [1, 2, 1, 0, 3],
112 |             [1, 3, 0, 1, 1],
113 |             [0, 1, 1, 4, 0],
114 |             [2, 5, 5, 2, 2],
115 |             [0, 1, 2, 3, 9]
116 |         ], dtype=float)
117 | 
118 |         expected = np.array([
119 |             [1, 3, 4, 4, 7],
120 |             [2, 4, 3, 4, 5],
121 |             [2, 3, 4, 7, 4],
122 |             [4, 7, 8, 6, 6],
123 |             [4, 5, 7, 9, 15]
124 |         ], dtype=float)
125 | 
126 |         result = shortest_path_distances(data)
127 |         npt.assert_equal(result, expected)
128 | 
129 |         result = shortest_path_distances(data[:3, :])
130 |         npt.assert_equal(result, expected[:3, :])
131 | 
132 |         result = shortest_path_distances(data[:, :3])
133 |         npt.assert_equal(result, expected[:, :3])
134 | 
135 |     def test_shortest_path(self):
136 |         data = np.array([
137 |             [1, 2, 1, 0, 3],
138 |             [1, 3, 3, 1, 1],
139 |             [4, 3, 8, 4, 0],
140 |             [2, 2, 5, 2, 5],
141 |             [0, 1, 1, 3, 2],
142 |             [0, 1, 1, 5, 9]
143 |         ], dtype=float)
144 | 
145 |         result = shortest_path(data)
146 |         npt.assert_equal(result, [[0, 0], [1, 0], [2, 1], [3, 1], [4, 2], [4, 3], [5, 4]])
147 | 
148 |     def test_sliding_min(self):
149 |         data = np.array([1, 2, 3, 4, 5, 6, 7, 8])
150 |         npt.assert_equal(
151 |             sliding_min(data, 3),
152 |             [1, 2, 3, 4, 5, 6]
153 |         )
154 | 
155 |         data = np.array([8, 7, 6, 5, 4, 3, 2, 1])
156 |         npt.assert_equal(
157 |             sliding_min(data, 3),
158 |             [6, 5, 4, 3, 2, 1]
159 |         )
160 | 
161 |         data = np.array([8, 3, 4, 0, 6, 1, 1, 1, 2, 7, 6, 4, 3, 4])
162 |         npt.assert_equal(
163 |             sliding_min(data, 3),
164 |             [3, 0, 0, 0, 1, 1, 1, 1, 2, 4, 3, 3]
165 |         )
166 | 
167 |     def test_sliding_max(self):
168 |         data = np.array([1, 2, 3, 4, 5, 6, 7, 8])
169 |         npt.assert_equal(
170 |             sliding_max(data, 3),
171 |             [3, 4, 5, 6, 7, 8]
172 |         )
173 | 
174 |         data = np.array([8, 7, 6, 5, 4, 3, 2, 1])
175 |         npt.assert_equal(
176 |             sliding_max(data, 3),
177 |             [8, 7, 6, 5, 4, 3]
178 |         )
179 | 
180 |         data = np.array([8, 3, 4, 0, 6, 1, 1, 1, 2, 7, 6, 4, 3, 4])
181 |         npt.assert_equal(
182 |             sliding_max(data, 3),
183 |             [8, 4, 6, 6, 6, 1, 2, 7, 7, 7, 6, 4]
184 |         )
185 | 
186 |     def test_sliding_window_view(self):
187 |         data = np.array([1, 2, 3, 4, 5, 6, 7, 8])
188 |         npt.assert_equal(
189 |             sliding_window_view(data, [3]),
190 |             [[1, 2, 3], [2, 3, 4], [3, 4, 5], [4, 5, 6], [5, 6, 7], [6, 7, 8]]
191 |         )
192 | 
193 |         npt.assert_equal(
194 |             sliding_window_view(data, [3], step=[2]),
195 |             [[1, 2, 3], [3, 4, 5], [5, 6, 7]]
196 |         )
197 | 
198 |         data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
199 |         npt.assert_equal(
200 |             sliding_window_view(data, [2, 2]),
201 |             [[[[1, 2], [4, 5]], [[2, 3], [5, 6]]], [[[4, 5], [7, 8]], [[5, 6], [8, 9]]]]
202 |         )
203 | 
204 |         npt.assert_equal(
205 |             sliding_window_view(data, [1, 3], step=[2, 1]),
206 |             [[[[1, 2, 3]]], [[[7, 8, 9]]]]
207 |         )
208 | 


--------------------------------------------------------------------------------
/distancematrix/tests/test_valmod.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | 
  3 | import numpy as np
  4 | from unittest import TestCase
  5 | import numpy.testing as npt
  6 | 
  7 | from distancematrix.valmod import _find_all_motifs_full_matrix_iteration
  8 | from distancematrix.valmod import LowerBoundEntry
  9 | from distancematrix.generator.znorm_euclidean import ZNormEuclidean
 10 | 
 11 | 
 12 | class TestValmod(TestCase):
 13 |     def _test_find_all_motifs_full_matrix_iteration(self, data, m, lb_list_size):
 14 |         dist_gen = ZNormEuclidean(0.).prepare(m, data)
 15 | 
 16 |         calc_lb_lists, calc_motif_idxs = _find_all_motifs_full_matrix_iteration(dist_gen, lb_list_size, int(np.ceil(m / 2)))
 17 |         bf_lb_lists, bf_motif_idxs = bruteforce_full_matrix_iteration(data, m, lb_list_size)
 18 | 
 19 |         npt.assert_equal(set(bf_motif_idxs), set(calc_motif_idxs))
 20 | 
 21 |         npt.assert_equal(len(bf_lb_lists), len(calc_lb_lists))
 22 |         for iteration, (bf_lb_list, calc_lb_list) in enumerate(zip(bf_lb_lists, calc_lb_lists)):
 23 |             # Ensure lower bounds match
 24 |             bf_lower_bounds = [e.lower_bound_base for e in bf_lb_list]
 25 |             calc_lower_bounds = [e.lower_bound_base for e in calc_lb_list]
 26 | 
 27 |             npt.assert_allclose(bf_lower_bounds, calc_lower_bounds, err_msg="Mismatch for iteration " + str(iteration))
 28 | 
 29 |             if len(bf_lower_bounds) == 0:
 30 |                 continue
 31 | 
 32 |             # Since multiple entries may have the same lower bound for different dot products: sort again
 33 |             bf_lb_list.sort(key=lambda e: (e.lower_bound_base, e.dot_prod, e.q_index))
 34 |             calc_lb_list.sort(key=lambda e: (e.lower_bound_base, e.dot_prod, e.q_index))
 35 |             lists_match_upto = bf_lower_bounds.index(bf_lower_bounds[-1])
 36 | 
 37 |             npt.assert_allclose(
 38 |                 [e.dot_prod for e in bf_lb_list[:lists_match_upto]],
 39 |                 [e.dot_prod for e in calc_lb_list[:lists_match_upto]])
 40 | 
 41 |             npt.assert_equal(
 42 |                 [(e.q_index, e.s_index) for e in bf_lb_list[:lists_match_upto]],
 43 |                 [(e.q_index, e.s_index) for e in calc_lb_list[:lists_match_upto]])
 44 | 
 45 |             for entry in calc_lb_list[lists_match_upto:]:
 46 |                 subseq_1 = data[entry.q_index: entry.q_index + m ]
 47 |                 subseq_2 = data[entry.s_index: entry.s_index + m]
 48 |                 npt.assert_almost_equal(np.sum(subseq_1 * subseq_2), entry.dot_prod)
 49 | 
 50 |     def test_find_all_motifs_full_matrix_iteration_normal_data(self):
 51 |         # Random data, 20 points
 52 |         data = np.array(
 53 |             [-1.61, -0.43, -0.43, 0.82, 0.42, 1.58, -0.46, 1.41, 1.31,
 54 |              -0.13, -0.05, 0.59, 1.76, -0.43, -0.14, -0.14, 1.07, 1.1, 0.84, -1.49])
 55 | 
 56 |         self._test_find_all_motifs_full_matrix_iteration(data, 4, 1)
 57 |         self._test_find_all_motifs_full_matrix_iteration(data, 4, 5)
 58 | 
 59 |         # Due to the large subseq length, some lower bound arrays will be empty
 60 |         self._test_find_all_motifs_full_matrix_iteration(data, 10, 5)
 61 | 
 62 |     # Because the division by zero can result in inf or -inf, results for the lower bound are not deterministic,
 63 |     # which is a pain to test. Behavior should be correct though.
 64 |     @unittest.skip("VALMOD: Flat signals have undefined lower bounds.")
 65 |     def test_find_all_motifs_full_matrix_iteration_data_with_flats(self):
 66 |         # Random data, 20 points, with flat signals
 67 |         data = np.array(
 68 |             [-1.61, -0.43, -0.43, -0.43, -0.43, 1.58, -0.46, 1.41, 1.31,
 69 |              -0.13, -0.05, 0.59, 1.76, -0.43, 0.84, 0.84, 0.84, 0.84, 0.84, -1.49])
 70 | 
 71 |         self._test_find_all_motifs_full_matrix_iteration(data, 4, 1)
 72 |         self._test_find_all_motifs_full_matrix_iteration(data, 4, 5)
 73 | 
 74 |         # Due to the large subseq length, some lower bound arrays will be empty
 75 |         self._test_find_all_motifs_full_matrix_iteration(data, 10, 5)
 76 | 
 77 | 
 78 | def bruteforce_full_matrix_iteration(series, subseq_length, lb_list_size):
 79 |     """
 80 |     Brute force implementation of _find_all_motifs_full_matrix_iteration
 81 | 
 82 |     :param series: 1D series
 83 |     :param subseq_length: subsequence length to use
 84 |     :param lb_list_size: max size of lower bound lists
 85 |     :return: tuple of: list of all lb_lists per column, indices of the best motif for the entire distance matrix
 86 |     """
 87 |     num_subseq = series.shape[0] - subseq_length + 1
 88 |     triv_match_buffer = int(np.ceil(subseq_length / 2))
 89 | 
 90 |     means = np.array([np.mean(series[i: i + subseq_length]) for i in range(num_subseq)])
 91 |     stds = np.array([np.std(series[i: i + subseq_length]) for i in range(num_subseq)])
 92 | 
 93 |     # Finding the best motif
 94 |     motif_dist2 = np.inf
 95 |     motif_idxs = None
 96 | 
 97 |     # Lower bounds
 98 |     lb_lists = []
 99 | 
100 |     for s_i in range(num_subseq):
101 |         subseq_1 = series[s_i: s_i + subseq_length]
102 | 
103 |         lb_list = []
104 | 
105 |         for q_i in range(num_subseq):
106 |             # Avoid trivial match
107 |             if abs(s_i - q_i) <= triv_match_buffer:
108 |                 continue
109 | 
110 |             subseq_2 = series[q_i: q_i + subseq_length]
111 |             dot_prod = np.sum(subseq_1 * subseq_2)
112 | 
113 |             # Calculate z-normalised distance (squared)
114 |             if stds[s_i] == 0 and stds[q_i] == 0:
115 |                 z_dist2 = 0
116 |             elif stds[s_i] == 0 or stds[q_i] == 0:
117 |                 z_dist2 = np.square(subseq_length)
118 |             else:
119 |                 z_dist2 = 2 * (subseq_length - (dot_prod - subseq_length * means[s_i] * means[q_i]) /
120 |                                (stds[s_i] * stds[q_i]))
121 | 
122 |             if z_dist2 < motif_dist2:
123 |                 motif_dist2 = z_dist2
124 |                 motif_idxs = (s_i, q_i)
125 | 
126 |             # Calculate lower bound
127 |             if stds[s_i] != 0:
128 |                 std_q = stds[q_i]
129 |                 lower_bound_q = np.clip(
130 |                     (dot_prod / subseq_length - means[s_i] * means[q_i]) / (stds[s_i] * std_q), 0, 1)
131 |                 lower_bound = np.sqrt(subseq_length * (1 - np.square(lower_bound_q))) * stds[s_i]
132 | 
133 |                 lb_list.append(LowerBoundEntry(q_i, s_i, lower_bound, dot_prod))
134 | 
135 |         # Trim lower bound lists
136 |         lb_lists.append(sorted(lb_list, key=lambda e: e.lower_bound_base)[:lb_list_size])
137 | 
138 |     return lb_lists, motif_idxs


--------------------------------------------------------------------------------
/distancematrix/util.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | 
  4 | 
  5 | def diag_length(h, w, diagonal=0):
  6 |     """
  7 |     Returns the number of elements on the specified diagonal of a matrix with dimensions (h, w).
  8 | 
  9 |     :param h: int, height of the matrix
 10 |     :param w: int, width of the matrix
 11 |     :param diagonal: int, diagonal index of the matrix
 12 |     :return: a positive integer, zero if diagonal fall completely outside the matrix
 13 |     """
 14 |     if diagonal >= 0:
 15 |         return max(min(h, w - diagonal), 0)
 16 |     else:
 17 |         return max(min(w, h + diagonal), 0)
 18 | 
 19 | 
 20 | def diag_indices(h, w, diagonal=0):
 21 |     """
 22 |     Returns the indices of the elements on the specified diagonal of a matrix with dimensions (h, w).
 23 | 
 24 |     :param h: int, height of the matrix
 25 |     :param w: int, width of the matrix
 26 |     :param diagonal: int, diagonal index of the matrix
 27 |     :return: a tuple of ranges, serving as indices of the elements
 28 |     """
 29 |     dl = diag_length(h, w, diagonal)
 30 | 
 31 |     if diagonal >= 0:
 32 |         return range(0, dl), range(diagonal, diagonal + dl)
 33 |     else:
 34 |         return range(-diagonal, -diagonal + dl), range(0, dl)
 35 | 
 36 | 
 37 | def diag_indices_of(array, diagonal=0):
 38 |     """
 39 |     Returns the indices of the elements on the specified diagonal of the given matrix.
 40 | 
 41 |     :param array: 2D array
 42 |     :param diagonal: int, diagonal index of the matrix
 43 |     :return: a tuple of ranges, serving as indices of the elements
 44 |     """
 45 |     if array.ndim != 2:
 46 |         raise RuntimeError("array should be 2D")
 47 | 
 48 |     return diag_indices(array.shape[0], array.shape[1], diagonal)
 49 | 
 50 | 
 51 | def cut_indices_of(array, cut):
 52 |     """
 53 |     Calculates the indices of the elements on the given cut for the given matrix.
 54 |     Where a diagonal runs from top left to bottom right, a cut runs from bottom left to top right.
 55 | 
 56 |     :param array: 2D array
 57 |     :param cut: index of the cut (cut 0 is the single element of the top left)
 58 |     :return: the indices to retrieve the cut
 59 |     """
 60 |     if array.ndim != 2:
 61 |         raise RuntimeError("array should be 2D")
 62 | 
 63 |     h, w = array.shape
 64 | 
 65 |     if cut < 0 or cut >= w + h - 1:
 66 |         return range(0, 0), range(0, 0)
 67 | 
 68 |     cut_length = cut + 1 - max(0, cut - h + 1) - max(0, cut - w + 1)
 69 | 
 70 |     if cut < h:
 71 |         return range(cut, cut - cut_length, -1), range(0, cut_length)
 72 |     else:
 73 |         return range(h-1, h-cut_length-1, -1), range(cut - h + 1, cut - h + 1 + cut_length)
 74 | 
 75 | 
 76 | def shortest_path_distances(cost_array):
 77 |     """
 78 |     Creates a new array of the same shape, where each entry contains the lowest sum of elements on the path
 79 |     from (0, 0) to that entry. Steps in the path can go horizontal, vertical and diagonal.
 80 | 
 81 |     :param cost_array: 2D array containing only positives
 82 |     :return: a new array
 83 |     """
 84 |     if cost_array.ndim != 2:
 85 |         raise RuntimeError("array should be 2D")
 86 | 
 87 |     dist = np.empty_like(cost_array, dtype=float)
 88 | 
 89 |     # Borders can only come from previous step
 90 |     dist[0, :] = np.cumsum(cost_array[0, :])
 91 |     dist[:, 0] = np.cumsum(cost_array[:, 0])
 92 | 
 93 |     # This operation could be vectorised by calculating one cut at a time, but the index juggling becomes quite
 94 |     # complex for rectangular arrays.
 95 |     for c in range(1, dist.shape[0]):
 96 |         for r in range(1, dist.shape[1]):
 97 |             dist[c, r] = min(dist[c-1, r], dist[c, r-1], dist[c-1, r-1]) + cost_array[c, r]
 98 | 
 99 |     return dist
100 | 
101 | 
102 | def shortest_path(cost_array):
103 |     """
104 |     Finds the shortest (= least summed cost) path from the top left of the array to the bottom right.
105 | 
106 |     :param cost_array: 2D array containing only positives
107 |     :return: array of indices, starting from the top left (index: [0, 0])
108 |     """
109 |     if cost_array.ndim != 2:
110 |         raise RuntimeError("array should be 2D")
111 | 
112 |     row = cost_array.shape[0] - 1
113 |     col = cost_array.shape[1] - 1
114 | 
115 |     walk_dist_matrix = shortest_path_distances(cost_array)
116 | 
117 |     path = [(row, col)]
118 |     while row != 0 or col != 0:
119 |         best_cost = np.inf
120 |         if row != 0 and col != 0:
121 |             delta_step = (-1, -1)
122 |             best_cost = walk_dist_matrix[row - 1, col - 1]
123 |         if row != 0 and walk_dist_matrix[row - 1, col] < best_cost:
124 |             delta_step = (-1, 0)
125 |             best_cost = walk_dist_matrix[row - 1, col]
126 |         if col != 0 and walk_dist_matrix[row, col -1] < best_cost:
127 |             delta_step = (0, -1)
128 | 
129 |         row += delta_step[0]
130 |         col += delta_step[1]
131 |         path.append((row, col))
132 | 
133 |     return path[::-1] # TODO: other indices order
134 | 
135 | 
136 | def sliding_min(array, window_size):
137 |     #result = np.empty(array.shape[0] - window_size + 1, array.dtype)
138 |     #d = collections.deque()  # d is always sorted
139 |     #
140 |     #for i in range(array.shape[0]):
141 |     #    while len(d) > 0 and d[-1][0] >= array[i]:
142 |     #        d.pop()
143 |     #    d.append((array[i], i))
144 |     #
145 |     #    if d[0][1] <= i - window_size:
146 |     #        d.popleft()
147 |     #
148 |     #    if i >= window_size - 1:
149 |     #        result[i - window_size + 1] = d[0][0]
150 |     #
151 |     #return result
152 | 
153 |     # Pandas has implemented this in native code, speedup of about 10 times
154 |     return pd.Series(array).rolling(window_size).min().values[window_size - 1:]
155 | 
156 | 
157 | def sliding_max(array, window_size):
158 |     return pd.Series(array).rolling(window_size).max().values[window_size - 1:]
159 | 
160 | 
161 | def sliding_window_view(x, shape, step=None, subok=False, writeable=False):
162 |     """
163 |     Create sliding window views of the N dimensions array with the given window
164 |     shape. Window slides across each dimension of `x` and provides subsets of `x`
165 |     at any window position.
166 | 
167 |     ``sliding_window_view`` create sliding window views of the N dimensions array
168 |     with the given window shape and its implementation based on ``as_strided``.
169 |     Please note that if writeable set to False, the return is views, not copies
170 |     of array. In this case, write operations could be unpredictable, so the return
171 |     views is readonly. Bear in mind, return copies (writeable=True), could possibly
172 |     take memory multiple amount of origin array, due to overlapping windows.
173 | 
174 |     For some cases, there may be more efficient approaches
175 | 
176 |     :param x: ndarray
177 |         Array to create sliding window views.
178 |     :param shape: sequence of int
179 |         The shape of the window. Must have same length as number of input array dimensions.
180 |     :param step: sequence of int, optional
181 |         The steps of window shifts for each dimension on input array at a time.
182 |         If given, must have same length as number of input array dimensions.
183 |         Defaults to 1 on all dimensions.
184 |     :param subok: bool, optional
185 |         If True, then sub-classes will be passed-through, otherwise the returned
186 |         array will be forced to be a base-class array (default).
187 |     :param writeable: bool, optional
188 |         If set to False, the returned array will always be readonly view.
189 |         Otherwise it will return writable copies(see Notes).
190 |     :return: ndarray
191 |         Sliding window views (or copies) of `x`. view.shape = (x.shape - shape) // step + 1
192 |     """
193 | 
194 |     # MIT License
195 |     # Copyright (c) 2018 Fanjin Zeng
196 |     # This work is licensed under the terms of the MIT license, see <https://opensource.org/licenses/MIT>.
197 |     # https://gist.github.com/Fnjn/b061b28c05b5b0e768c60964d2cafa8d
198 | 
199 |     # first convert input to array, possibly keeping subclass
200 |     x = np.array(x, copy=False, subok=subok)
201 | 
202 |     try:
203 |         shape = np.array(shape, int)
204 |     except:
205 |         raise TypeError('`shape` must be a sequence of integer')
206 |     else:
207 |         if shape.ndim > 1:
208 |             raise ValueError('`shape` must be one-dimensional sequence of integer')
209 |         if len(x.shape) != len(shape):
210 |             raise ValueError("`shape` length doesn't match with input array dimensions")
211 |         if np.any(shape <= 0):
212 |             raise ValueError('`shape` cannot contain non-positive value')
213 | 
214 |     if step is None:
215 |         step = np.ones(len(x.shape), np.intp)
216 |     else:
217 |         try:
218 |             step = np.array(step, np.intp)
219 |         except:
220 |             raise TypeError('`step` must be a sequence of integer')
221 |         else:
222 |             if step.ndim > 1:
223 |                 raise ValueError('`step` must be one-dimensional sequence of integer')
224 |             if len(x.shape) != len(step):
225 |                 raise ValueError("`step` length doesn't match with input array dimensions")
226 |             if np.any(step <= 0):
227 |                 raise ValueError('`step` cannot contain non-positive value')
228 | 
229 |     o = (np.array(x.shape) - shape) // step + 1  # output shape
230 |     if np.any(o <= 0):
231 |         raise ValueError('window shape cannot larger than input array shape')
232 | 
233 |     strides = x.strides
234 |     view_strides = strides * step
235 | 
236 |     view_shape = np.concatenate((o, shape), axis=0)
237 |     view_strides = np.concatenate((view_strides, strides), axis=0)
238 |     view = np.lib.stride_tricks.as_strided(x, view_shape, view_strides, subok=subok, writeable=writeable)
239 | 
240 |     if writeable:
241 |         return view.copy()
242 |     else:
243 |         return view
244 | 


--------------------------------------------------------------------------------
/distancematrix/valmod.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from distancematrix.generator.znorm_euclidean import ZNormEuclidean
  3 | import time
  4 | 
  5 | 
  6 | def find_variable_length_motifs(series, min_motif_length, max_motif_length, cache_size=3, noise_std=0.):
  7 |     """
  8 |     Finds the top motif for each subsequence length in the given range. The top motif is defined as the
  9 |     subsequence (for a given length) for which the z-normalized euclidean distance is minimal, excluding any
 10 |     trivial matches.
 11 | 
 12 |     This method implements the VALMOD algorithm described in "Matrix Profile X: VALMOD - Scalable Discovery of
 13 |     Variable-Length Motifs in Data Series" by M. Linardi et al.
 14 | 
 15 |     :param series: one dimensional time series
 16 |     :param min_motif_length: minimum motif length
 17 |     :param max_motif_length: maximum motif length (inclusive)
 18 |     :param cache_size: number of entries kept in memory per subsequence (can only affect performance, default should
 19 |       be okay)
 20 |     :param noise_std: standard deviation of noise on the signal, used for correcting the z-normalized euclidean distance
 21 |     :return: a list of tuples of length (max_motif_length - min_motif_length + 1), containing the indices of the
 22 |       motif and its match
 23 |     """
 24 | 
 25 |     if series.ndim != 1:
 26 |         raise RuntimeError("Series should be 1D")
 27 |     if min_motif_length < 2 or not np.isfinite(min_motif_length):
 28 |         raise RuntimeError("Invalid min_motif_length: " + str(min_motif_length))
 29 |     if max_motif_length < min_motif_length or not np.isfinite(max_motif_length):
 30 |         raise RuntimeError("Invalid max_motif_length: " + str(min_motif_length))
 31 |     if cache_size < 0:
 32 |         raise RuntimeError("Invalid p: " + str(min_motif_length))
 33 | 
 34 |     # Stores for each motif length a tuple of the indices of the motif
 35 |     motifs_found = []
 36 | 
 37 |     dist_generator = ZNormEuclidean(noise_std=noise_std).prepare(min_motif_length, series)
 38 | 
 39 |     # Full distance matrix calculation for first motif length
 40 |     lb_lists, best_motif_idxs = _find_all_motifs_full_matrix_iteration(dist_generator, cache_size,
 41 |                                                                        int(np.ceil(min_motif_length / 2)))
 42 |     motifs_found.append(best_motif_idxs)
 43 | 
 44 |     # For all following motif lengths: try exploiting the lower bound to avoid calculations
 45 |     for m in range(min_motif_length + 1, max_motif_length + 1):
 46 |         # Note: might be possible to simply update the existing generator?
 47 |         dist_generator = ZNormEuclidean(noise_std=noise_std).prepare(m, series)
 48 | 
 49 |         num_subseq = len(series) - m + 1
 50 |         trivial_match_buffer = int(np.ceil(min_motif_length / 2))
 51 | 
 52 |         best_candidate_motif_distance = np.inf
 53 |         best_candidate_motif_idxs = None
 54 |         invalid_subseq_idxs = []  # Indices of subsequences for which lower bound pruning did not work
 55 |         invalid_subseq_lbs = []  # Lower bound for the match on subsequences where pruning did not work
 56 | 
 57 |         for i in range(num_subseq):
 58 |             subseq_lb_list = lb_lists[i]
 59 | 
 60 |             best_match_entry = None
 61 |             best_match_distance = np.inf
 62 |             subseq_lower_bound = -1
 63 | 
 64 |             for entry in subseq_lb_list:
 65 |                 # As motif length grows, some lowerbound entries may have become trivial matches
 66 |                 if abs(entry.q_index - i) <= trivial_match_buffer:
 67 |                     continue
 68 | 
 69 |                 # Or they may no longer contain valid indices
 70 |                 if entry.q_index >= num_subseq or entry.s_index >= num_subseq:
 71 |                     continue
 72 | 
 73 |                 entry.dot_prod += series[entry.q_index + m - 1] * series[entry.s_index + m - 1]
 74 | 
 75 |                 # Calculate actual distance for these indices
 76 |                 dist = dist_generator.calc_single(entry.q_index, entry.s_index, dot_prod=entry.dot_prod)
 77 |                 if dist < best_match_distance:
 78 |                     best_match_distance = dist
 79 |                     best_match_entry = entry
 80 | 
 81 |                 # Calculate lower bound using last (highest) non-trivial entry
 82 |                 # (all previous entries should have lower bound)
 83 |                 subseq_lower_bound = max(subseq_lower_bound, entry.lower_bound_base / dist_generator.std_s[i])
 84 | 
 85 |             # if minimum of actual distances < largest lower bound
 86 |             if best_match_distance < subseq_lower_bound:
 87 |                 # best match for this subseq found
 88 |                 if best_match_distance < best_candidate_motif_distance:
 89 |                     best_candidate_motif_distance = best_match_distance
 90 |                     best_candidate_motif_idxs = (best_match_entry.q_index, best_match_entry.s_index)
 91 |             else:
 92 |                 # best match may be outside the lowerbound entries, but we have a lower bound for its distance
 93 |                 invalid_subseq_idxs.append(i)
 94 |                 invalid_subseq_lbs.append(subseq_lower_bound)
 95 | 
 96 |         # If the best candidate motif has a lower distance than all lower bounds, we have the motif
 97 |         if best_candidate_motif_idxs and best_candidate_motif_distance <= np.min(invalid_subseq_lbs):
 98 |             motifs_found.append(best_candidate_motif_idxs)
 99 |             continue
100 | 
101 |         # if not, we need to calculate all those whose lower bound was lower than the candidate motif to be sure
102 |         if len(invalid_subseq_idxs) > num_subseq * np.log(num_subseq):
103 |             # If too many columns have to be recalculated, recalculate the entire matrix and update the lb_lists.
104 |             # A clear boundary for when this should happen isn't available,
105 |             # different strategies might affect performance (but not correctness)
106 |             lb_lists, best_candidate_motif_idxs = _find_all_motifs_full_matrix_iteration(
107 |                 dist_generator, cache_size, trivial_match_buffer)
108 |         else:
109 |             # Recalculate all columns that might have a better match
110 |             for invalid_idx, lower_bound in zip(invalid_subseq_idxs, invalid_subseq_lbs):
111 |                 if lower_bound < best_candidate_motif_distance:
112 |                     distances = dist_generator.calc_column(invalid_idx)
113 |                     trivial_match_start = max(0, invalid_idx - trivial_match_buffer)
114 |                     trivial_match_end = invalid_idx + trivial_match_buffer + 1
115 |                     distances[trivial_match_start: trivial_match_end] = np.inf
116 |                     best_match_distance = np.min(distances)
117 | 
118 |                     if best_match_distance < best_candidate_motif_distance:
119 |                         best_candidate_motif_distance = best_match_distance
120 |                         best_candidate_motif_idxs = (np.argmin(distances), invalid_idx)
121 | 
122 |         # We now have the best motif for sure
123 |         motifs_found.append(best_candidate_motif_idxs)
124 | 
125 |     return motifs_found
126 | 
127 | 
128 | def _find_all_motifs_full_matrix_iteration(dist_generator, lb_list_size, trivial_match_buffer):
129 |     """
130 |     Calculates the entire distance matrix using the provided distance generator.
131 |     For each column, lower bounds are calculated (as described in the VALMOD paper) and the lb_list_size best entries
132 |     are stored (ordered by ascending distance).
133 | 
134 |     :param dist_generator: z-normalized distance generator
135 |     :param lb_list_size: max number of lower bound entries to store
136 |     :param trivial_match_buffer: trivial match buffer, the lb_list will not contain any entries that fall inside
137 |       this buffer
138 |     :return: tuple of: list of all lb_lists per column, indices of the best motif for the entire distance matrix
139 |     """
140 |     num_subseq = dist_generator.mu_s.view.shape[0]
141 |     subseq_length = dist_generator.m
142 | 
143 |     lb_lists = []
144 |     best_motif_dist = np.Inf
145 |     best_motif_idxs = None
146 | 
147 |     for column_idx in range(num_subseq):
148 |         distances = dist_generator.calc_column(column_idx)
149 | 
150 |         # Find best match, while avoiding trivial matches
151 |         trivial_match_start = max(0, column_idx - trivial_match_buffer)
152 |         trivial_match_end = column_idx + trivial_match_buffer + 1
153 |         distances[trivial_match_start: trivial_match_end] = np.inf
154 | 
155 |         best_dist = np.min(distances)
156 |         if best_dist < best_motif_dist:
157 |             best_motif_dist = best_dist
158 |             best_motif_idxs = (np.argmin(distances), column_idx)
159 | 
160 |         # Determine lower boundaries
161 |         dotprod = dist_generator.prev_calc_column_dot_prod
162 |         mu = dist_generator.mu_s.view
163 |         std = dist_generator.std_s.view
164 | 
165 |         if std[column_idx] == 0:
166 |             # In case one of the stds is zero, there is no defined formula for a lower bound (not found yet at least).
167 |             # So we simply return no empty bounds, so this column will always be calculated.
168 |             lb_list = []
169 |             # We can get away with only checking std[column_idx] and not every entry of std (in the else clause):
170 |             # if a lower bound is underestimated, it can only result in unneeded calculation, which is ok
171 |             # if a lower bound is overestimated, a motif for a stable signal may go undetected, but since the entire
172 |             # column will be calculated, it will be found this way.
173 |         else:
174 |             lower_bound_q = np.clip((dotprod / subseq_length - mu * mu[column_idx]) / (std * std[column_idx]), 0, 1)
175 |             lower_bound_base = np.sqrt(subseq_length * (1 - np.square(lower_bound_q))) * std[column_idx]
176 |             lower_bound_base[trivial_match_start: trivial_match_end] = np.inf
177 | 
178 |             closest_indices = np.argsort(lower_bound_base)[:lb_list_size]
179 | 
180 |             # Cover corner case where there may not be enough non-trivial matches to fill the lb_list
181 |             if lower_bound_base[closest_indices[-1]] == np.inf:
182 |                 first_inf_idx = np.searchsorted(lower_bound_base[closest_indices], np.inf)
183 |                 closest_indices = closest_indices[:first_inf_idx]
184 | 
185 |             lb_list = []
186 |             for i in range(len(closest_indices)):
187 |                 lb_list.append(LowerBoundEntry(closest_indices[i], column_idx, lower_bound_base[closest_indices[i]],
188 |                                                dotprod[closest_indices[i]]))
189 |         lb_lists.append(lb_list)
190 | 
191 |     return lb_lists, best_motif_idxs
192 | 
193 | 
194 | class LowerBoundEntry:
195 |     def __init__(self, q_index, s_index, lower_bound_base, dot_prod):
196 |         self.q_index = q_index
197 |         self.s_index = s_index
198 |         self.lower_bound_base = lower_bound_base
199 |         self.dot_prod = dot_prod
200 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _autosummary/
2 | _build/
3 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | 
14 | import os
15 | import sys
16 | sys.path.insert(0, os.path.abspath('..'))
17 | 
18 | 
19 | # -- Project information -----------------------------------------------------
20 | 
21 | project = 'Series Distance Matrix'
22 | copyright = '2021, Dieter De Paepe'
23 | author = 'Dieter De Paepe'
24 | 
25 | 
26 | # -- General configuration ---------------------------------------------------
27 | 
28 | # Add any Sphinx extension module names here, as strings. They can be
29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
30 | # ones.
31 | extensions = [
32 |   'sphinx.ext.githubpages', # Create a .no_jekyll file in output
33 |   'autoapi.extension', # Automatically generate an API overview
34 |   'nbsphinx', # Convert jupyter notebooks
35 |   'myst_parser' # Accept markdown files
36 | ]
37 | autoapi_type = 'python'
38 | autoapi_dirs = ['../distancematrix']
39 | autoapi_options = [ 'members', 'undoc-members', 'show-inheritance', 'show-module-summary', 'imported-members', ]
40 | autoapi_ignore = ['*test*']
41 | 
42 | # Add any paths that contain templates here, relative to this directory.
43 | templates_path = ['_templates']
44 | 
45 | # List of patterns, relative to source directory, that match files and
46 | # directories to ignore when looking for source files.
47 | # This pattern also affects html_static_path and html_extra_path.
48 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
49 | 
50 | 
51 | # -- Options for HTML output -------------------------------------------------
52 | 
53 | # The theme to use for HTML and HTML Help pages.  See the documentation for
54 | # a list of builtin themes.
55 | #
56 | # html_theme = 'alabaster'
57 | html_theme = 'sphinx_rtd_theme'  # https://sphinx-themes.org/sample-sites/sphinx-rtd-theme/
58 | 
59 | # Add any paths that contain custom static files (such as style sheets) here,
60 | # relative to this directory. They are copied after the builtin static files,
61 | # so a file named "default.css" will overwrite the builtin "default.css".
62 | html_static_path = ['_static']


--------------------------------------------------------------------------------
/docs/doc_environment.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | # Create this anaconda environment using the following command:
 3 | #   conda env create -f doc_environment.yml
 4 | 
 5 | name: doc_env
 6 | 
 7 | channels:
 8 |   - conda-forge
 9 |   - defaults
10 | 
11 | dependencies:
12 |   - python>=3.6
13 |   - sphinx-autoapi
14 |   - myst-parser
15 |   - sphinx_rtd_theme
16 |   - nbsphinx  # Enable notebook conversion
17 |   - ipython  # Includes ipython lexer for converting notebooks
18 | 
19 | 


--------------------------------------------------------------------------------
/docs/examples.rst:
--------------------------------------------------------------------------------
1 | ========
2 | Examples
3 | ========
4 | 
5 | .. toctree::
6 |     :maxdepth: 2
7 |     :glob:
8 | 
9 |     Example_matrix_profile.ipynb


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Welcome to Series Distance Matrix's documentation!
 3 | ==================================================
 4 | 
 5 | 
 6 | .. toctree::
 7 |    :hidden:
 8 |    :maxdepth: 2
 9 |    :caption: Contents:
10 | 
11 |    examples.rst
12 |    install.md
13 | 
14 | 
15 | 
16 | Indices and tables
17 | ==================
18 | 
19 | * :ref:`genindex`
20 | * :ref:`modindex`
21 | 


--------------------------------------------------------------------------------
/docs/install.md:
--------------------------------------------------------------------------------
 1 | # Installing
 2 | 
 3 | Using pip:
 4 | ```bash
 5 | pip install seriesdistancematrix
 6 | ```
 7 | 
 8 | Alternatively, clone this repositor and run:
 9 | ```bash
10 | python setup.py clean build install
11 | ```
12 | 
13 | For local development (this allows you to edit code without having to reinstall the library):
14 | ```bash
15 | python setup.py develop
16 | ```


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from setuptools import find_packages
 3 | 
 4 | def readme():
 5 |     with open("README.md") as readme:
 6 |         return readme.read()
 7 | 
 8 | setup(name='seriesdistancematrix',
 9 |       version='0.3.1', # Also update distancematrix/__init__.py!
10 |       description=(
11 |         'Flexible time series analysis library'
12 |         'implementing Matrix Profile related functionality.'
13 |       ),
14 |       long_description_content_type="text/markdown",
15 |       long_description=readme(),
16 |       keywords=[
17 |         'time series',
18 |         'matrix profile',
19 |         'contextual matrix profile',
20 |         'radius profile',
21 |         'series distance matrix',
22 |         'motif',
23 |         'discord'
24 |       ],
25 |       url='https://github.com/predict-idlab/seriesdistancematrix/',
26 |       project_urls={
27 |         'Documentation': 'https://predict-idlab.github.io/seriesdistancematrix/',
28 |         'Source': 'https://github.com/predict-idlab/seriesdistancematrix/'
29 |       },
30 |       author='Dieter De Paepe',
31 |       author_email='dieter.depaepe@gmail.com',
32 |       license='MIT',
33 |       packages=find_packages(exclude=["distancematrix.tests*"]),
34 |       classifiers=(
35 |           'License :: OSI Approved :: MIT License',
36 |           'Intended Audience :: Science/Research',
37 |           'Intended Audience :: Developers',
38 |           'Topic :: Software Development',
39 |           'Topic :: Scientific/Engineering',
40 |           'Programming Language :: Python',
41 |           'Programming Language :: Python :: 3',
42 |           'Operating System :: OS Independent'
43 |       ),
44 |       install_requires=['numpy', 'scipy', 'pandas']
45 | )
46 | 


--------------------------------------------------------------------------------
/test_environment.yml:
--------------------------------------------------------------------------------
1 | name: test_env
2 | 
3 | dependencies:
4 |   - python>=3.6
5 |   - nose
6 |   - numpy
7 |   - scipy
8 |   - pandas


--------------------------------------------------------------------------------