├── __init__.py
├── requirements
    ├── requirements-dev.txt
    ├── requirements-docs.txt
    └── requirements.txt
├── setup.cfg
├── docs
    ├── authors.rst
    ├── changelog.rst
    ├── imgs
    │   ├── logo.jpg
    │   └── fhp_logo.png
    ├── license.rst
    ├── _templates
    │   └── module_functions_template.rst
    ├── descriptions
    │   ├── modules.rst
    │   ├── lockstep_distances.rst
    │   ├── modules
    │   │   ├── tssearch_search.rst
    │   │   └── tssearch_distances.rst
    │   ├── segmentation_search.rst
    │   └── elastic_distances.rst
    ├── index.rst
    └── conf.py
├── tssearch
    ├── examples
    │   ├── __init__.py
    │   ├── ecg.pickle
    │   └── ecg_example_data.py
    ├── __init__.py
    ├── search
    │   ├── __init__.py
    │   ├── segmentation.py
    │   ├── query_search.py
    │   └── search_utils.py
    ├── utils
    │   ├── __init__.py
    │   ├── preprocessing.py
    │   ├── distances_settings.py
    │   ├── add_personal_distance.py
    │   └── visualisation.py
    └── distances
    │   ├── __init__.py
    │   ├── lockstep_utils.py
    │   ├── time_distances.py
    │   ├── distances.json
    │   ├── compute_distance.py
    │   ├── elastic_distances.py
    │   ├── lockstep_distances.py
    │   └── elastic_utils.py
├── CHANGELOG.rst
├── .gitattributes
├── tests
    ├── test_segmentation.py
    ├── search_example.py
    ├── test_query_search.py
    ├── test_distances.py
    └── main_example.py
├── AUTHORS.rst
├── .flake8
├── LICENSE.txt
├── setup.py
├── .gitignore
├── pyproject.toml
├── README.md
└── .pre-commit-config.yaml


/__init__.py:
--------------------------------------------------------------------------------
1 | from tssearch import *


--------------------------------------------------------------------------------
/requirements/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | pre-commit >= 3.7.1
2 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | 


--------------------------------------------------------------------------------
/docs/authors.rst:
--------------------------------------------------------------------------------
1 | .. _authors:
2 | .. include:: ../AUTHORS.rst
3 | 


--------------------------------------------------------------------------------
/docs/changelog.rst:
--------------------------------------------------------------------------------
1 | .. _changelog:
2 | .. include:: ../CHANGELOG.rst


--------------------------------------------------------------------------------
/requirements/requirements-docs.txt:
--------------------------------------------------------------------------------
1 | jinja2 < 3.1.0
2 | Sphinx == 1.8.6
3 | 


--------------------------------------------------------------------------------
/tssearch/examples/__init__.py:
--------------------------------------------------------------------------------
1 | from tssearch.examples.ecg_example_data import *
2 | 


--------------------------------------------------------------------------------
/docs/imgs/logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fraunhoferportugal/tssearch/HEAD/docs/imgs/logo.jpg


--------------------------------------------------------------------------------
/docs/imgs/fhp_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fraunhoferportugal/tssearch/HEAD/docs/imgs/fhp_logo.png


--------------------------------------------------------------------------------
/docs/license.rst:
--------------------------------------------------------------------------------
1 | .. _license:
2 | 
3 | =======
4 | License
5 | =======
6 | 
7 | .. literalinclude:: ../LICENSE.txt


--------------------------------------------------------------------------------
/tssearch/examples/ecg.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fraunhoferportugal/tssearch/HEAD/tssearch/examples/ecg.pickle


--------------------------------------------------------------------------------
/tssearch/__init__.py:
--------------------------------------------------------------------------------
1 | from tssearch.utils import *
2 | from tssearch.search import *
3 | from tssearch.distances import *
4 | from tssearch.examples import *
5 | 


--------------------------------------------------------------------------------
/tssearch/search/__init__.py:
--------------------------------------------------------------------------------
1 | from tssearch.search.query_search import *
2 | from tssearch.search.segmentation import *
3 | from tssearch.search.search_utils import *
4 | 


--------------------------------------------------------------------------------
/tssearch/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from tssearch.utils.preprocessing import *
2 | from tssearch.utils.visualisation import *
3 | from tssearch.utils.distances_settings import *
4 | 


--------------------------------------------------------------------------------
/docs/_templates/module_functions_template.rst:
--------------------------------------------------------------------------------
 1 | .. currentmodule:: {{ fullname }}
 2 | 
 3 | {% block functions %}
 4 | 
 5 | .. autosummary::
 6 | {% for item in functions %}
 7 |    {{ item }}
 8 | {%- endfor %}
 9 | 
10 | {% endblock %}
11 | 


--------------------------------------------------------------------------------
/docs/descriptions/modules.rst:
--------------------------------------------------------------------------------
 1 | Module Reference
 2 | ================
 3 | 
 4 | .. automodule:: tssearch
 5 |     :members:
 6 |     :undoc-members:
 7 |     :show-inheritance:
 8 | 
 9 | .. toctree::
10 | 
11 |     modules/tssearch_search
12 |     modules/tssearch_distances


--------------------------------------------------------------------------------
/requirements/requirements.txt:
--------------------------------------------------------------------------------
 1 | # (Pseudo) Automatically generated by https://github.com/damnever/pigar.
 2 | h5py >= 3.6.0
 3 | matplotlib >= 3.5.0
 4 | numba >= 0.54.1
 5 | numpy >= 1.20.3
 6 | pandas >= 1.3.4
 7 | scipy >= 1.7.1
 8 | seaborn >= 0.11.2
 9 | setuptools >= 47.1.1
10 | 


--------------------------------------------------------------------------------
/tssearch/examples/ecg_example_data.py:
--------------------------------------------------------------------------------
 1 | import tssearch
 2 | import pickle
 3 | import numpy as np
 4 | 
 5 | 
 6 | def load_ecg_example():
 7 | 
 8 |     filename = tssearch.__path__[0] + "/examples/ecg.pickle"
 9 |     with open(filename, "rb") as handle:
10 |         data = pickle.load(handle)
11 | 
12 |     return data
13 | 


--------------------------------------------------------------------------------
/tssearch/distances/__init__.py:
--------------------------------------------------------------------------------
1 | from tssearch.distances.lockstep_distances import *
2 | from tssearch.distances.elastic_distances import *
3 | from tssearch.distances.lockstep_utils import *
4 | from tssearch.distances.elastic_utils import *
5 | from tssearch.distances.time_distances import *
6 | from tssearch.distances.compute_distance import *
7 | 


--------------------------------------------------------------------------------
/CHANGELOG.rst:
--------------------------------------------------------------------------------
 1 | =========
 2 | Changelog
 3 | =========
 4 | 
 5 | Version 0.1.3
 6 | =============
 7 | - Fixed a bug on the setup.py to correctly build the PyPI package
 8 | - Removed novainstrumentation from dependencies
 9 | - Fixed a bug on TWED distance (`#7 <https://github.com/fraunhoferportugal/tssearch/pull/7>`_)
10 | 
11 | Version 0.1.0
12 | =============
13 | 
14 | - Release of TSSEARCH with documentation
15 | 


--------------------------------------------------------------------------------
/docs/descriptions/lockstep_distances.rst:
--------------------------------------------------------------------------------
1 | ==================
2 | Lockstep Distances
3 | ==================
4 | 
5 | Distance measures that compare the :math:`i`-th point of one time series to the :math:`i`-th point of another are named as lock-step measures (e.g., Euclidean distance and the other Lp norms).
6 | A linear interpolation is computed to the longer series to ensure it has the same length as the shorter series.
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/descriptions/modules/tssearch_search.rst:
--------------------------------------------------------------------------------
 1 | Search
 2 | ======
 3 | 
 4 | .. automodule:: tssearch.search
 5 |     :members:
 6 |     :undoc-members:
 7 |     :show-inheritance:
 8 | 
 9 | query_search
10 | ------------
11 | 
12 | .. automodule:: tssearch.search.query_search
13 |     :members:
14 |     :undoc-members:
15 |     :show-inheritance:
16 | 
17 | segmentation
18 | ------------
19 | 
20 | .. automodule:: tssearch.search.segmentation
21 |     :members:
22 |     :undoc-members:
23 |     :show-inheritance:
24 | 
25 | utils
26 | -----
27 | 
28 | .. automodule:: tssearch.search.search_utils
29 |     :members:
30 |     :undoc-members:
31 |     :show-inheritance:


--------------------------------------------------------------------------------
/docs/descriptions/modules/tssearch_distances.rst:
--------------------------------------------------------------------------------
 1 | Distances
 2 | =========
 3 | 
 4 | .. automodule:: tssearch.distances
 5 |     :members:
 6 |     :undoc-members:
 7 |     :show-inheritance:
 8 | 
 9 | Lockstep Distances
10 | ------------------
11 | 
12 | .. automodule:: tssearch.distances.lockstep_distances
13 |     :members:
14 |     :undoc-members:
15 |     :show-inheritance:
16 | 
17 | 
18 | Elastic Distances
19 | -----------------
20 | 
21 | .. automodule:: tssearch.distances.elastic_distances
22 |     :members:
23 |     :undoc-members:
24 |     :show-inheritance:
25 | 
26 | 
27 | Time Distances
28 | --------------
29 | 
30 | .. automodule:: tssearch.distances.time_distances
31 |     :members:
32 |     :undoc-members:
33 |     :show-inheritance:


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Basic .gitattributes for a python repo.
 2 | 
 3 | # Source files
 4 | # ============
 5 | *.pxd    text diff=python
 6 | *.py     text diff=python
 7 | *.py3    text diff=python
 8 | *.pyw    text diff=python
 9 | *.pyx    text diff=python
10 | *.pyz    text diff=python
11 | 
12 | # Binary files
13 | # ============
14 | *.db     binary
15 | *.p      binary
16 | *.pkl    binary
17 | *.pickle binary
18 | *.pyc    binary
19 | *.pyd    binary
20 | *.pyo    binary
21 | 
22 | # Jupyter notebook
23 | *.ipynb  text
24 | 
25 | # Consider notebook files as support documentation
26 | *.ipynb linguist-documentation
27 | 
28 | # Note: .db, .p, and .pkl files are associated
29 | # with the python modules ``pickle``, ``dbm.*``,
30 | # ``shelve``, ``marshal``, ``anydbm``, & ``bsddb``
31 | # (among others).
32 | 


--------------------------------------------------------------------------------
/tests/test_segmentation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from tssearch import load_ecg_example, get_distance_dict, time_series_segmentation
 4 | 
 5 | 
 6 | def segmentation():
 7 |     data = load_ecg_example()
 8 |     cfg = get_distance_dict(["Dynamic Time Warping", "Euclidean Distance"])
 9 |     out = time_series_segmentation(cfg, data["query"], data["sequence"], data["tq"], data["ts"], weight=data["weight"])
10 | 
11 |     np.testing.assert_almost_equal(
12 |         out["Dynamic Time Warping"], [7, 51, 120, 161, 210, 263, 318, 394, 444, 510, 584, 666, 740, 804, 878]
13 |     )
14 | 
15 |     np.testing.assert_almost_equal(
16 |         out["Euclidean Distance"], [10, 58, 127, 193, 257, 320, 384, 452, 527, 596, 667, 736, 807]
17 |     )
18 | 
19 |     return out
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     out = segmentation()
24 | 


--------------------------------------------------------------------------------
/AUTHORS.rst:
--------------------------------------------------------------------------------
 1 | Authors
 2 | ==========
 3 | This package is being developed and maintained by `Fraunhofer AICOS <https://www.aicos.fraunhofer.pt/en/home.html>`_.
 4 | 
 5 | .. image:: imgs/fhp_logo.png
 6 |     :align: center
 7 |     :scale: 25 %
 8 |     :alt: FhP-AICOS
 9 | 
10 | TSSEARCH was written in collaboration with `Cognitive Systems Lab of University of Bremen <https://www.uni-bremen.de/csl/>`_.
11 | 
12 | Lead Development Team
13 | ---------------------
14 | 
15 | - Duarte Folgado (`duarte.folgado@fraunhofer.pt <duarte.folgado@fraunhofer.pt>`_)
16 | - Hugo Gamboa (`hugo.gamboa@fraunhofer.pt <hugo.gamboa@fraunhofer.pt>`_)
17 | - Marília Barandas  (`marilia.barandas@fraunhofer.pt <marilia.barandas@fraunhofer.pt>`_)
18 | - Maria Lua Nunes (`maria.nunes@fraunhofer.pt <maria.nunes@fraunhofer.pt>`_)
19 | - Margarida Antunes (`maria.antunes@fraunhofer.pt <maria.antunes@fraunhofer.pt>`_)
20 | 
21 | 
22 | Contributors
23 | ------------
24 | - Hui Liu
25 | - Tanja Schultz
26 | - Yale Hartmann
27 | 


--------------------------------------------------------------------------------
/tests/search_example.py:
--------------------------------------------------------------------------------
 1 | from tssearch import *
 2 | 
 3 | 
 4 | if __name__ == "__main__":
 5 | 
 6 |     # Example of a sequence to search in
 7 |     t = np.arange(0, 20 * np.pi, 0.1)
 8 |     sequence = np.sin(t)
 9 | 
10 |     # Example of a sequence to search for
11 |     tq = t[:70]
12 |     query = np.sin(tq)
13 | 
14 |     dict_distances = get_distances_by_type()
15 | 
16 |     result = time_series_search(dict_distances, query, sequence, tq, t, output=("number", 1))
17 | 
18 |     plt.figure()
19 |     plt.title("Dynamic Time Warping")
20 |     plot_alignment(query, sequence, result["Dynamic Time Warping"]["path"][0])
21 | 
22 |     plt.figure()
23 |     plt.title("Longest Common Subsequence")
24 |     plot_alignment(query, sequence, result["Longest Common Subsequence"]["path"][0])
25 | 
26 |     plt.figure()
27 |     plt.title("Time Warp Edit Distance")
28 |     plot_alignment(query, sequence, result["Time Warp Edit Distance"]["path"][0])
29 | 
30 |     plt.figure()
31 |     plt.title("Euclidean Distance")
32 |     start = result["Euclidean Distance"]["start"][0]
33 |     end = result["Euclidean Distance"]["end"][0]
34 |     path = [np.arange(len(query)), np.arange(start, end)]
35 |     plot_alignment(query, sequence, path, hoffset=start)
36 | 
37 |     plt.show()
38 | 


--------------------------------------------------------------------------------
/tests/test_query_search.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from tssearch import load_ecg_example, get_distance_dict, time_series_search
 4 | 
 5 | 
 6 | def query_search():
 7 |     data = load_ecg_example()
 8 |     cfg = get_distance_dict(["Dynamic Time Warping", "Longest Common Subsequence", "Euclidean Distance"])
 9 |     out = time_series_search(cfg, data["query"], data["sequence"], tq=data["tq"], ts=data["ts"], weight=data["weight"])
10 | 
11 |     np.testing.assert_almost_equal(out["Dynamic Time Warping"]["path_dist"][0], 0.09974783)
12 |     np.testing.assert_almost_equal(out["Dynamic Time Warping"]["start"][0], 445)
13 |     np.testing.assert_almost_equal(out["Dynamic Time Warping"]["end"][0], 510)
14 | 
15 |     np.testing.assert_almost_equal(out["Longest Common Subsequence"]["path_dist"][0], 1.0)
16 |     np.testing.assert_almost_equal(out["Longest Common Subsequence"]["start"][0], 844)
17 |     np.testing.assert_almost_equal(out["Longest Common Subsequence"]["end"][0], 921)
18 | 
19 |     np.testing.assert_almost_equal(out["Euclidean Distance"]["path_dist"][0], 0.05480903)
20 |     np.testing.assert_almost_equal(out["Euclidean Distance"]["start"][0], 596)
21 |     np.testing.assert_almost_equal(out["Euclidean Distance"]["end"][0], 674)
22 | 
23 |     return out
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     out = query_search()
28 | 


--------------------------------------------------------------------------------
/tests/test_distances.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from tssearch import *
 4 | 
 5 | 
 6 | def test_distances(dist):
 7 |     np.testing.assert_almost_equal(dist["Time Warp Edit Distance"], 223.85651832411503)
 8 |     np.testing.assert_almost_equal(dist["Dynamic Time Warping"], 0.2509773694532439)
 9 |     np.testing.assert_almost_equal(dist["Longest Common Subsequence"], 0.7774244833068362)
10 |     np.testing.assert_almost_equal(dist["Time Alignment Measurement"], 1.492823)
11 |     np.testing.assert_almost_equal(dist["Euclidean Distance"], 25.066280)
12 |     # np.testing.assert_almost_equal(dist['Minkowski Distance'], )
13 |     np.testing.assert_almost_equal(dist["Chebyshev Distance"], 1.760120)
14 |     np.testing.assert_almost_equal(dist["Cross Correlation Distance"], 1.0000008394102025)
15 |     np.testing.assert_almost_equal(dist["Pearson Correlation Distance"], 2.000001678820405)
16 |     np.testing.assert_almost_equal(dist["Short Time Series Distance"], 3.9573142706233573)
17 | 
18 | 
19 | if __name__ == "__main__":
20 | 
21 |     # Example of a sequence to search in
22 |     t = np.arange(0, 20 * np.pi, 0.1)
23 |     ts1 = np.sin(t)
24 |     ts2 = np.sin(2 * t)
25 | 
26 |     dict_distances = get_distances_by_type()
27 |     dist = time_series_distance(dict_distances, ts1, ts2, t, t)
28 | 
29 |     test_distances(dist.to_dict()["Distance"])
30 | 


--------------------------------------------------------------------------------
/tssearch/distances/lockstep_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from numba import njit, prange
 3 | 
 4 | 
 5 | @njit(parallel=True, fastmath=True)
 6 | def _lnorm_multidimensional(x, y, weight, p=2):
 7 |     """
 8 | 
 9 |     Parameters
10 |     ----------
11 |     x : nd-array
12 |         Time series x.
13 |     y : nd-array
14 |         Time series y.
15 |     weight: nd-array (Default: None)
16 |         query weight values.
17 |     p: int
18 |         Lp norm distance degree.
19 | 
20 |     Returns
21 |     -------
22 |         The Lp norm distance.
23 |     """
24 |     l1 = x.shape[0]
25 |     l3 = x.shape[1]
26 | 
27 |     distance = np.zeros_like(x, dtype=float)
28 |     for i in prange(l1):
29 |         dist = 0.0
30 |         for di in range(l3):
31 |             diff = x[i, di] - y[i, di]
32 |             dist += weight[i, di] * (diff ** p)
33 |         distance[i] = dist ** (1 / p)
34 | 
35 |     return distance
36 | 
37 | 
38 | def _lnorm_unidimensional(x, y, weight, p=2):
39 |     """
40 | 
41 |     Parameters
42 |     ----------
43 |     x : nd-array
44 |         Time series x.
45 |     y : nd-array
46 |         Time series y.
47 |     weight: nd-array (Default: None)
48 |         query weight values.
49 |     p: int
50 |         Lp norm distance degree.
51 | 
52 |     Returns
53 |     -------
54 |         The Lp norm distance.
55 |     """
56 |     distance = weight * np.power(np.power(np.abs(x - y), p), (1 / p))
57 | 
58 |     return distance
59 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
 1 | # Unfortunately, flake8 does not support pyproject.toml configuration.
 2 | # https://github.com/PyCQA/flake8/issues/234
 3 | [flake8]
 4 | per-file-ignores =
 5 |     __init__.py:F401
 6 | show-source = True
 7 | count= True
 8 | statistics = True
 9 | # https://www.flake8rules.com
10 | # E203 = Whitespace before ‘:'
11 | # E265 = comment blocks like @{ section, which it can't handle
12 | # E266 = too many leading '#' for block comment
13 | # E731 = do not assign a lambda expression, use a def
14 | # W293 = Blank line contains whitespace
15 | # W503 = Line break before binary operator
16 | # E704 = multiple statements in one line  - used for @override
17 | # TC002 = move third party import to TYPE_CHECKING
18 | # ANN = flake8-annotations
19 | # TC, TC2 = flake8-type-checking
20 | # B = flake8-bugbear
21 | # S = flake8-bandit
22 | # D = flake8-docstrings
23 | # S = flake8-bandit
24 | # F are errors reported by pyflakes
25 | # E and W are warnings and errors reported by pycodestyle
26 | # C are violations reported by mccabe
27 | # BLK = flake8-black
28 | # DAR = darglint
29 | # SC = flake8-spellcheck
30 | ignore = E203, E211, E265, E501, E999, F401, F821, W503, W505, SC100, SC200, C400, C401, C402, B008, E800, E741, F403, F405, C901, B028, E226
31 | max-line-length = 120
32 | max-doc-length = 120
33 | import-order-style = google
34 | docstring-convention = google
35 | inline-quotes = "
36 | strictness=short
37 | dictionaries=en_US,python,technical,pandas
38 | min-python-version = 3.8.0
39 | exclude = .git,.tox,.nox,venv,.venv,.venv-docs,.venv-dev,.venv-note,.venv-dempy,docs,test
40 | max-complexity = 10
41 | #spellcheck-targets=comments
42 | 


--------------------------------------------------------------------------------
/tssearch/distances/time_distances.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from tssearch.distances.elastic_distances import dtw
 4 | from tssearch.distances.elastic_utils import traceback
 5 | 
 6 | 
 7 | def tam(x, y):
 8 |     """Calculates the Time Alignment Measurement (TAM) based on an optimal warping path
 9 |     between two time series.
10 | 
11 |     Reference: Folgado et. al, Time Alignment Measurement for Time Series, 2016.
12 | 
13 |     Parameters
14 |     ----------
15 |     x : nd-array
16 |         Time series x.
17 |     y : nd-array
18 |         Time series y.
19 | 
20 |     Returns
21 |     -------
22 |     In case ``report=instants`` the number of indexes in advance, delay and phase
23 |     will be returned.
24 |     For ``report=ratios``, the ratio of advance, delay and phase.
25 |     will be returned. In case ``report=distance``, only the TAM will be returned.
26 | 
27 |     """
28 |     ac = dtw(x, y, report="cost_matrix")
29 | 
30 |     path = traceback(ac)
31 | 
32 |     # Delay and advance counting
33 |     delay = len(np.where(np.diff(path[0]) == 0)[0])
34 |     advance = len(np.where(np.diff(path[1]) == 0)[0])
35 | 
36 |     # Phase counting
37 |     incumbent = np.where((np.diff(path[0]) == 1) * (np.diff(path[1]) == 1))[0]
38 |     phase = len(incumbent)
39 | 
40 |     # Estimated and reference time series duration.
41 |     len_estimation = path[1][-1]
42 |     len_ref = path[0][-1]
43 | 
44 |     p_advance = advance * 1.0 / len_ref
45 |     p_delay = delay * 1.0 / len_estimation
46 |     p_phase = phase * 1.0 / np.min([len_ref, len_estimation])
47 | 
48 |     distance = p_advance + p_delay + (1 - p_phase)
49 |     return distance
50 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2022, Associação Fraunhofer Portugal Research
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import setuptools
 4 | 
 5 | ROOT = Path(__file__).parent
 6 | 
 7 | with open("README.md") as fh:
 8 |     long_description = fh.read()
 9 | 
10 | 
11 | def find_requirements(filename):
12 |     with (ROOT / "requirements" / filename).open() as f:
13 |         return [s for s in [line.strip(" \n") for line in f] if not s.startswith("#") and s != ""]
14 | 
15 | 
16 | install_requirements = find_requirements("requirements.txt")
17 | docs_requirements = find_requirements("requirements-docs.txt")
18 | 
19 | setuptools.setup(
20 |     name="tssearch",
21 |     version="0.1.3",
22 |     author="Fraunhofer Portugal",
23 |     description="Library for time series subsequence search",
24 |     long_description=long_description,
25 |     long_description_content_type="text/markdown",
26 |     download_url="https://github.com/fraunhoferportugal/tssearch/archive/refs/tags/v0.1.3.tar.gz",
27 |     package_data={"tssearch": ["distances/distances.json", "examples/ecg.pickle"]},
28 |     packages=setuptools.find_packages(),
29 |     classifiers=[
30 |         "License :: OSI Approved :: BSD License",
31 |         "Operating System :: Microsoft :: Windows",
32 |         "Operating System :: POSIX",
33 |         "Operating System :: Unix",
34 |         "Operating System :: MacOS",
35 |         "Programming Language :: Python :: 3",
36 |         "Programming Language :: Python :: 3.8",
37 |         "Programming Language :: Python :: 3.9",
38 |         "Programming Language :: Python :: 3.10",
39 |         "Programming Language :: Python :: 3.11",
40 |         "Programming Language :: Python :: 3.12",
41 |     ],
42 |     install_requires=install_requirements,
43 |     extras_require={
44 |         "docs": docs_requirements,
45 |     },
46 | )
47 | 


--------------------------------------------------------------------------------
/tssearch/search/segmentation.py:
--------------------------------------------------------------------------------
 1 | from scipy.signal import find_peaks
 2 | from tssearch.search.search_utils import lockstep_search, elastic_search
 3 | 
 4 | 
 5 | def time_series_segmentation(dict_distances, query, sequence, tq=None, ts=None, weight=None):
 6 |     """
 7 |     Time series segmentation locates the time instants between consecutive query repetitions on a more extended and
 8 |     repetitive sequence.
 9 | 
10 |     Parameters
11 |     ----------
12 |     dict_distances: dict
13 |         Configuration file with distances
14 |     query: nd-array
15 |         Query time series.
16 |     sequence: nd-array
17 |         Sequence time series.
18 |     tq: nd-array
19 |         Time stamp time series query.
20 |     ts: nd-array
21 |         Time stamp time series sequence.
22 |     weight: nd-array (Default: None)
23 |         query weight values
24 |     Returns
25 |     -------
26 |     segment_results: dict
27 |         Segmented time instants for each given distances
28 |     """
29 | 
30 |     l_query = len(query)
31 |     segment_results = {}
32 | 
33 |     for d_type in dict_distances:
34 |         for dist in dict_distances[d_type]:
35 | 
36 |             if "use" not in dict_distances[d_type][dist] or dict_distances[d_type][dist]["use"] == "yes":
37 |                 segment_results[dist] = {}
38 |                 if d_type == "lockstep":
39 |                     distance = lockstep_search(dict_distances[d_type][dist], query, sequence, weight)
40 |                 elif d_type == "elastic":
41 |                     distance, ac = elastic_search(dict_distances[d_type][dist], query, sequence, tq, ts, weight)
42 |                 else:
43 |                     print("WARNING")
44 |                     continue
45 | 
46 |                 pks, _ = find_peaks(-distance, distance=l_query / 2)
47 |                 segment_results[dist] = pks
48 | 
49 |     return segment_results
50 | 


--------------------------------------------------------------------------------
/tssearch/utils/preprocessing.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def standardization(signal, fit=False, param=None):
 5 |     """Normalizes a given signal by subtracting the mean and dividing by the standard deviation.
 6 | 
 7 |     Parameters
 8 |     ----------
 9 |     signal : nd-array
10 |         input signal
11 | 
12 |     Returns
13 |     -------
14 |     nd-array
15 |         standardized signal
16 | 
17 |     """
18 |     if param is not None:
19 |         s_mean = param[0]
20 |         s_std = param[1]
21 |     else:
22 |         s_mean = np.mean(signal, axis=0)
23 |         s_std = np.std(signal, axis=0)
24 | 
25 |     if fit:
26 |         d_mean = np.mean(np.diff(signal, axis=0), axis=0)
27 |         d_std = np.std(np.diff(signal, axis=0), axis=0)
28 |         return (signal - s_mean) / s_std, np.array([s_mean, s_std, d_mean, d_std])
29 |     else:
30 |         return (signal - s_mean) / s_std
31 | 
32 | 
33 | def interpolation(x, y):
34 |     """Computes the interpolation given two time series of different length.
35 | 
36 | 
37 |     Parameters
38 |     ----------
39 |     x : nd-array
40 |         Time series x
41 |     y : nd-array
42 |         Time series y
43 | 
44 |     Returns
45 |     -------
46 |     interp_signal (nd-array)
47 |         Interpolated signal
48 |     nd-array
49 |         Time series
50 | 
51 |     """
52 | 
53 |     lx = len(x)
54 |     ly = len(y)
55 |     if lx > ly:
56 |         t_old = np.linspace(0, lx, ly)
57 |         t_new = np.linspace(0, lx, lx)
58 |         if len(np.shape(x)) == 1:
59 |             y_new = np.interp(t_new, t_old, y)
60 |         else:
61 |             y_new = np.array([np.interp(t_new, t_old, y[:, ax]) for ax in range(np.shape(x)[1])]).T
62 |         return x, y_new
63 |     else:
64 |         t_old = np.linspace(0, ly, lx)
65 |         t_new = np.linspace(0, ly, ly)
66 | 
67 |         if len(np.shape(x)) == 1:
68 |             x_new = np.interp(t_new, t_old, x)
69 |         else:
70 |             x_new = np.array([np.interp(t_new, t_old, x[:, ax]) for ax in range(np.shape(x)[1])]).T
71 |         return x_new, y
72 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | .hypothesis/
 50 | .pytest_cache/
 51 | 
 52 | # Translations
 53 | *.mo
 54 | *.pot
 55 | 
 56 | # Django stuff:
 57 | *.log
 58 | local_settings.py
 59 | db.sqlite3
 60 | 
 61 | # Flask stuff:
 62 | instance/
 63 | .webassets-cache
 64 | 
 65 | # Scrapy stuff:
 66 | .scrapy
 67 | 
 68 | # Sphinx documentation
 69 | docs/_build/
 70 | Makefile
 71 | 
 72 | # PyBuilder
 73 | target/
 74 | 
 75 | # Jupyter Notebook
 76 | .ipynb_checkpoints
 77 | 
 78 | # IPython
 79 | profile_default/
 80 | ipython_config.py
 81 | 
 82 | # pyenv
 83 | .python-version
 84 | 
 85 | # celery beat schedule file
 86 | celerybeat-schedule
 87 | 
 88 | # SageMath parsed files
 89 | *.sage.py
 90 | 
 91 | # Environments
 92 | .env
 93 | .venv
 94 | env/
 95 | venv/
 96 | ENV/
 97 | env.bak/
 98 | venv.bak/
 99 | 
100 | # Spyder project settings
101 | .spyderproject
102 | .spyproject
103 | 
104 | # Rope project settings
105 | .ropeproject
106 | 
107 | # mkdocs documentation
108 | /site
109 | 
110 | # mypy
111 | .mypy_cache/
112 | .dmypy.json
113 | dmypy.json
114 | 
115 | # Pyre type checker
116 | .pyre/
117 | .idea/
118 | 
119 | *.pdf
120 | 
121 | # documentation
122 | docs/_twed.py
123 | docs/_lcss.py
124 | docs/_dtw.py
125 | 


--------------------------------------------------------------------------------
/tssearch/utils/distances_settings.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import tssearch
 3 | 
 4 | 
 5 | def load_json(json_path):
 6 |     """Loads the json file given by filename.
 7 |     Parameters
 8 |     ----------
 9 |     json_path : string
10 |         Json path
11 |     Returns
12 |     -------
13 |     Dict
14 |         Dictionary
15 |     """
16 | 
17 |     return json.load(open(json_path))
18 | 
19 | 
20 | def get_distances_by_type(domain=None, json_path=None):
21 |     """Creates a dictionary with the features settings by domain.
22 |     Parameters
23 |     ----------
24 |     domain : string
25 |         Available domains: "statistical"; "spectral"; "temporal"
26 |         If domain equals None, then the features settings from all domains are returned.
27 |     json_path : string
28 |         Directory of json file. Default: package features.json directory
29 |     Returns
30 |     -------
31 |     Dict
32 |         Dictionary with the features settings
33 |     """
34 | 
35 |     if json_path is None:
36 |         json_path = tssearch.__path__[0] + "/distances/distances.json"
37 | 
38 |         if domain not in ["elastic", "lockstep", "time", None]:
39 |             raise SystemExit("No valid domain. Choose: lockstep, elastic, time or None (for all distances settings).")
40 | 
41 |     dict_features = load_json(json_path)
42 |     if domain is None:
43 |         return dict_features
44 |     else:
45 |         return {domain: dict_features[domain]}
46 | 
47 | 
48 | def get_distance_dict(dist_list):
49 | 
50 |     json_path = tssearch.__path__[0] + "/distances/distances.json"
51 | 
52 |     dict_features = load_json(json_path)
53 | 
54 |     select_distances = {}
55 |     for d in dist_list:
56 |         if d in dict_features["elastic"]:
57 |             d_type = "elastic"
58 |         elif d in dict_features["lockstep"]:
59 |             d_type = "lockstep"
60 |         elif d in dict_features["time"]:
61 |             d_type = "time"
62 |         else:
63 |             continue
64 | 
65 |         if d_type not in select_distances:
66 |             select_distances[d_type] = {}
67 |         select_distances[d_type][d] = dict_features[d_type][d]
68 | 
69 |     return select_distances
70 | 


--------------------------------------------------------------------------------
/tests/main_example.py:
--------------------------------------------------------------------------------
 1 | from tssearch import *
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | 
 6 | if __name__ == "__main__":
 7 |     # time
 8 |     t = np.arange(0, 20 * np.pi, 0.1)
 9 | 
10 |     # 1. 1D, sample, euclidean distance
11 |     sequence = np.sin(t)
12 |     query = np.sin(t[:70])
13 | 
14 |     dict_distances = {
15 |         "lockstep": {"Euclidean Distance": {"function": "euclidean_distance", "parameters": "", "use": "yes"}}
16 |     }
17 | 
18 |     result1 = time_series_search(dict_distances, query, sequence, output=("number", 1))
19 | 
20 |     plt.figure(1)
21 |     start = result1["Euclidean Distance"]["start"][0]
22 |     end = result1["Euclidean Distance"]["end"][0]
23 |     path = [np.arange(len(query)), np.arange(start, end)]
24 |     plot_alignment(query, sequence, path, hoffset=start)
25 | 
26 |     # 2. 3-axis, reference, sdtw, equal weight 3 axis fw = [1,1,1]
27 |     sequence = np.array([np.sin(t), np.sin(2 * t), np.cos(t)]).T
28 |     query = sequence[70:140]
29 | 
30 |     dict_distances = {
31 |         "elastic": {"Dynamic Time Warping": {"function": "dtw", "parameters": {"dtw_type": "sub-dtw"}, "use": "yes"}}
32 |     }
33 | 
34 |     result2 = time_series_search(dict_distances, query, sequence, output=("number", 1))
35 | 
36 |     path = result2["Dynamic Time Warping"]["path"][0]
37 |     plt.figure(2)
38 |     plot_alignment(query[:, 1], sequence[:, 1], path, hoffset=path[1][0])
39 | 
40 |     # 3. 3-axis, reference, sdtw, different axes weights
41 |     # derivate and abs with different weight fw = [.7,.7,.7,.3,.3,.3]
42 |     sequence = np.array([np.sin(t), np.sin(2 * t), np.cos(t)]).T
43 |     query = np.array([np.sin(t[:70]), np.sin(2 * t[10:80]), np.cos(t[30:100])]).T
44 |     weight = np.ones_like(query)
45 |     weight[:, 2] = 0.5
46 |     weight[:, 1] = 0.8
47 | 
48 |     dict_distances = {
49 |         "elastic": {"Dynamic Time Warping": {"function": "dtw", "parameters": {"dtw_type": "sub-dtw"}, "use": "yes"}}
50 |     }
51 | 
52 |     result3 = time_series_search(dict_distances, query, sequence, weight=weight, output=("number", 1))
53 | 
54 |     path = result3["Dynamic Time Warping"]["path"][0]
55 |     plt.figure(3)
56 |     plot_alignment(query[:, 0], sequence[:, 0], path, hoffset=path[1][0])
57 | 
58 |     # 4. with 4 points to be forced in time and amplitude qw = [10000010000...0011]
59 | 
60 |     # 5. Emulate gaussian process
61 | 
62 |     plt.show()
63 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. image:: imgs/logo.jpg
 2 |    :width: 70 %
 3 |    :alt: Logo
 4 |    :align: center
 5 | 
 6 | |
 7 | |
 8 | 
 9 | Welcome to TSSEARCH documentation!
10 | ==================================
11 | 
12 | Time Series Subsequence Search Python package (TSSEARCH for short) is a Python package that assists researchers in exploratory analysis for query search and time series segmentation without requiring significant programming effort. It contains curated routines for query and subsequence search. TSSEARCH installation is straightforward and goes along with startup code examples. Our goal is to provide the tools to get faster insights for your time series.
13 | 
14 | Highlights
15 | ==========
16 | 
17 | - **Search**: we provide methods for time series query search and segmentation
18 | - **Weights**: the relative contribution of each point of the query to the overall distance can be expressed using a user-defined weight vector
19 | - **Visualization**: we provide visualizations to present the results of the segmentation and query search
20 | - **Unit tested**: we provide unit tests for each distance
21 | - **Easily extended**: adding new distances is easy, and we encourage you to contribute with your custom distances or search methods
22 | 
23 | Contents
24 | ========
25 | 
26 | In development
27 | 
28 | .. toctree::
29 |    :maxdepth: 2
30 | 
31 |    Lockstep Distances <descriptions/lockstep_distances>
32 |    Elastic Distances <descriptions/elastic_distances>
33 |    Segmentation and Search <descriptions/segmentation_search>
34 |    Module Reference <descriptions/modules>
35 |    Authors <authors>
36 |    Changelog <changelog>
37 |    License <license>
38 | 
39 | Installation
40 | ============
41 | 
42 | This packages is available on PyPI:
43 | 
44 | .. code:: bash
45 | 
46 |     $ pip install tssearch
47 | 
48 | Get started
49 | ===========
50 | 
51 | The code below segments a 10 s electrocardiography record:
52 | 
53 | .. code:: python
54 | 
55 |     import tssearch
56 | 
57 |     # Load the query, (optional) weight vector and sequence
58 |     data = tssearch.load_ecg_example()
59 | 
60 |     # Selects the Dynamic Time Warping (DTW) as the distance for the segmentation
61 |     cfg = tssearch.get_distance_dict(["Dynamic Time Warping"])
62 | 
63 |     # Performs the segmentation
64 |     out = tssearch.time_series_segmentation(cfg, data['query'], data['sequence'], data['weight'])
65 | 
66 | Indices and tables
67 | ==================
68 | 
69 | * :ref:`genindex`
70 | * :ref:`modindex`
71 | * :ref:`search`
72 | 


--------------------------------------------------------------------------------
/tssearch/distances/distances.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "elastic": {
 3 |     "Time Warp Edit Distance": {
 4 |       "multivariate": "no",
 5 |       "description": "",
 6 |       "function": "twed",
 7 |       "parameters": {
 8 |         "nu": 1e-3,
 9 |         "lmbda": 0,
10 |         "p": 2,
11 |         "time": "true"
12 |       },
13 |       "use": "yes"
14 |     },
15 |     "Dynamic Time Warping": {
16 |       "multivariate": "yes",
17 |       "description": "",
18 |       "function": "dtw",
19 |       "parameters": {
20 |         "dtw_type": "dtw",
21 |         "alpha": 1
22 |       },
23 |       "use": "yes"
24 |     },
25 |     "Longest Common Subsequence": {
26 |       "multivariate": "yes",
27 |       "description": "",
28 |       "function": "lcss",
29 |       "parameters": {
30 |         "eps": 1,
31 |         "report": "distance"
32 |       },
33 |       "use": "yes"
34 |     }
35 |   },
36 |   "time": {
37 |     "Time Alignment Measurement": {
38 |       "multivariate": "yes",
39 |       "description": "",
40 |       "function": "tam",
41 |       "parameters": "",
42 |       "use": "yes"
43 |     }
44 |   },
45 |   "lockstep": {
46 |     "Euclidean Distance": {
47 |       "multivariate": "yes",
48 |       "description": "",
49 |       "function": "euclidean_distance",
50 |       "parameters": "",
51 |       "use": "yes"
52 |     },
53 |     "Minkowski Distance": {
54 |       "multivariate": "yes",
55 |       "description": "",
56 |       "function": "minkowski_distance",
57 |       "parameters":
58 |             {"p":  3},
59 |       "use": "yes"
60 |     },
61 |     "Manhattan Distance": {
62 |       "multivariate": "yes",
63 |       "description": "",
64 |       "function": "manhattan_distance",
65 |       "parameters": "",
66 |       "use": "yes"
67 |     },
68 |     "Chebyshev Distance": {
69 |       "multivariate": "yes",
70 |       "description": "",
71 |       "function": "chebyshev_distance",
72 |       "parameters": "",
73 |       "use": "yes"
74 |     },
75 |     "Cross Correlation Distance": {
76 |       "multivariate": "no",
77 |       "description": "",
78 |       "function": "correlation_distance",
79 |       "parameters": "",
80 |       "use": "yes"
81 |     },
82 |     "Pearson Correlation Distance": {
83 |       "multivariate": "no",
84 |       "description": "",
85 |       "function": "pearson_correlation",
86 |       "parameters": "",
87 |       "use": "yes"
88 |     },
89 |     "Short Time Series Distance": {
90 |       "multivariate": "no",
91 |       "description": "",
92 |       "function": "short_time_series_distance",
93 |       "parameters": "",
94 |       "use": "yes"
95 |     }
96 |   }
97 | }


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [tool.black]
  2 | line-length = 120
  3 | color = true
  4 | target-version = ['py37', 'py38']
  5 | include = '\.pyi?$'
  6 | exclude = '''
  7 | (
  8 |       \.egg
  9 |     | \.eggs
 10 |     | \.git
 11 |     | \.hg
 12 |     | \.dvc
 13 |     | \.mypy_cache
 14 |     | \.pytest_cache
 15 |     | \.nox
 16 |     | \.tox
 17 |     | \.venv
 18 |     | \.venv-docs
 19 |     | \.venv-dev
 20 |     | \.venv-note
 21 |     | \.venv-dempy
 22 |     | _build
 23 |     | build
 24 |     | dist
 25 |     | setup.py
 26 | )
 27 | '''
 28 | 
 29 | [tool.isort]
 30 | # https://github.com/timothycrosley/isort
 31 | py_version = 38
 32 | profile = "black"
 33 | multi_line_output = 3
 34 | include_trailing_comma = true
 35 | force_grid_wrap = 0
 36 | use_parentheses = true
 37 | line_length = 120
 38 | skip_gitignore = true
 39 | color_output = true
 40 | #known_typing = ["typing", "types", "typing_extensions", "mypy", "mypy_extensions"]
 41 | 
 42 | [tool.coverage.report]
 43 | exclude_lines = [
 44 |   "pragma: nocover",
 45 |   "raise NotImplementedError",
 46 |   "if __name__ == .__main__.:",
 47 |   "if TYPE_CHECKING:",
 48 |   "raise AssertionError",
 49 | ]
 50 | show_missing = true
 51 | ignore_errors = true
 52 | skip_covered = true
 53 | #fail_under = 100
 54 | #precision = 1
 55 | omit = [
 56 |   "test/*",
 57 |   ".venv*",
 58 | ]
 59 | 
 60 | # `pytest` configurations
 61 | [tool.pytest.ini_options]
 62 | minversion = "6.0"
 63 | addopts = ["-vv", "--doctest-modules"]
 64 | doctest_optionflags = "NORMALIZE_WHITESPACE"
 65 | testpaths = ["test"]
 66 | filterwarnings = ["ignore::DeprecationWarning"]
 67 | 
 68 | [tool.mypy]
 69 | # https://mypy.readthedocs.io/en/latest/config_file.html
 70 | python_version = 3.8
 71 | pretty = true
 72 | show_traceback = true
 73 | color_output = true
 74 | warn_return_any = true
 75 | warn_no_return = true
 76 | warn_unused_configs = true
 77 | warn_unused_ignores = true
 78 | warn_redundant_casts = true
 79 | warn_unreachable = true
 80 | 
 81 | [tool.vulture]
 82 | paths = ["src"]
 83 | min_confidence = 65
 84 | 
 85 | [tool.pydocstyle]
 86 | convention = "google"
 87 | #ignore = "D205,D415"
 88 | 
 89 | [tool.interrogate]
 90 | # https://github.com/econchick/interrogate#configuration
 91 | ignore-init-method = true
 92 | fail-under = 95
 93 | color = true
 94 | # possible values: 0 (minimal output), 1 (-v), 2 (-vv)
 95 | verbose = 0
 96 | quiet = false
 97 | exclude = ["setup.py", "docs", "build"]
 98 | 
 99 | [tool.nbqa.config]
100 | black = "pyproject.toml"
101 | isort = "pyproject.toml"
102 | 
103 | [tool.nbqa.mutate]
104 | isort = 1
105 | black = 1
106 | pyupgrade = 1
107 | 
108 | [tool.nbqa.addopts]
109 | pyupgrade = ["--py36-plus"]
110 | 
111 | [tool.nbqa.files]
112 | isort = "^notebooks/"
113 | black = "^notebooks/"
114 | flake8 = "^notebooks/"
115 | mypy = "^notebooks/"
116 | pydocstyle = "^notebooks/"
117 | pyupgrade = "^notebooks/"
118 | 
119 | [tool.bandit]
120 | targets = ["src"]
121 | # (optional) list included test IDs here, eg '[B101, B406]':
122 | tests = ["B201", "B301"]
123 | # (optional) list skipped test IDs here, eg '[B101, B406]':
124 | skips = ["B101", "B601"]
125 | 
126 | [tool.bandit.assert_used]
127 | exclude = ["*_test.py", "test_*.py"]
128 | 
129 | [tool.cruft]
130 | skip = [".git"]
131 | 


--------------------------------------------------------------------------------
/docs/descriptions/segmentation_search.rst:
--------------------------------------------------------------------------------
 1 | =======================
 2 | Segmentation and Search
 3 | =======================
 4 | 
 5 | ************
 6 | Segmentation
 7 | ************
 8 | 
 9 | 
10 | The :class:`~tssearch.search.segmentation.time_series_segmentation` locates the time instants between consecutive query repetitions on a longer and repetitive sequence.
11 | You will need to define the distance used for segmentation and provide a query and a sequence as inputs to :class:`~tssearch.search.segmentation.time_series_segmentation`, as follows:
12 | 
13 | .. code:: python
14 | 
15 |     import tssearch
16 | 
17 |     data = tssearch.load_ecg_example()
18 |     cfg = tssearch.get_distance_dict(["Dynamic Time Warping"])
19 | 
20 |     out = tssearch.time_series_segmentation(cfg, data["query"], data["sequence"], data["weight"])
21 | 
22 | In the code above a ten-second segment from an electrocardiography record is used to define the query and the sequence and the DTW is defined as the distance for the segmentation. Then, the segmentation is calculated and the output is assigned to a variable. The method receives as inputs the configuration file, the query, and the sequence. Additionally, an optional vector input that assigns weights for each time instance of the query is also given as input.
23 | 
24 | .. image:: https://i.postimg.cc/4yfGJJVB/Fig-4-1.png
25 |   :alt: Example ECG segmentation output
26 | 
27 | In this example, the specified weights vector assigned less contribution to the second local maxima of the ECG (T wave).
28 | 
29 | If you are interested in further characterizing each subsequence, this could be accomplished using the distances values calculated for each segment and/or using `TSFEL
30 | <https://github.com/fraunhoferportugal/tsfel/>`_ to extract temporal, statistical, and spectral features as data representations for classification algorithms.
31 | 
32 | ******
33 | Search
34 | ******
35 | 
36 | The :class:`~tssearch.search.query_search.time_series_search` method locates the k-best occurrences of a given query on a longer sequence based on a distance measurement. By default, k is set to retrieve the maximum number of matches. The user can also explicitly define the value of k to retrieve the k-best occurrences.
37 | 
38 | An illustrative example is provided below:
39 | 
40 | .. code:: python
41 | 
42 |     import tssearch
43 |     import numpy as np
44 | 
45 |     query = np.loadtxt("query.txt")
46 |     sequence = np.loadtxt("sequence.txt")
47 | 
48 |     cfg = tssearch.get_distance_dict(["Dynamic Time Warping"])
49 |     cfg['elastic']['Dynamic Time Warping']['parameters']['alpha'] = 0.5
50 | 
51 |     out = tssearch.time_series_search(cfg, query, sequence)
52 | 
53 | In the above code, the DTW with an additional parameter :math:`{\alpha}` that weights the contribution between the cost in the amplitude and its first derivative is defined. Then, the query search is calculated, and the output is assigned to a variable. The method receives as inputs the configuration file, the query, and the sequence. Since the number of matches is not defined, the method retrieves the maximum number of matches.
54 | 
55 | To illustrate this example, a wearable sensor-based human activity dataset with multidimensional data was used and the following visualization was obtained:
56 | 
57 | .. image:: https://i.postimg.cc/rmrp3Fcb/Fig-6-1.png
58 |   :alt: Example of query search in stride segmentation


--------------------------------------------------------------------------------
/tssearch/utils/add_personal_distance.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import inspect
 3 | import json
 4 | import os
 5 | import sys
 6 | import warnings
 7 | from inspect import getmembers, isfunction
 8 | 
 9 | from tssearch.utils.distances_settings import load_json
10 | 
11 | 
12 | def add_distance_json(distances_path, json_path):
13 |     """Adds new distance to features.json.
14 |     Parameters
15 |     ----------
16 |     distances_path: string
17 |         Personal Python module directory containing new distances implementation.
18 |     json_path: string
19 |         Personal .json file directory containing existing disatnces from TSSEARCH.
20 |         New customised distances will be added to file in this directory.
21 |     """
22 | 
23 |     sys.path.append(distances_path[: -len(distances_path.split(os.sep)[-1]) - 1])
24 |     exec("import " + distances_path.split(os.sep)[-1][:-3])
25 | 
26 |     # Reload module containing the new features
27 |     importlib.reload(sys.modules[distances_path.split(os.sep)[-1][:-3]])
28 |     exec("import " + distances_path.split(os.sep)[-1][:-3] + " as pymodule")
29 | 
30 |     # Functions from module containing the new features
31 |     functions_list = [o for o in getmembers(locals()["pymodule"]) if isfunction(o[1])]
32 |     function_names = [fname[0] for fname in functions_list]
33 | 
34 |     # Check if @set_domain was declared on features module
35 |     vset_domain = False
36 | 
37 |     for fname, f in list(locals()["pymodule"].__dict__.items()):
38 | 
39 |         if getattr(f, "domain", None) is not None:
40 | 
41 |             vset_domain = True
42 | 
43 |             # Access to personal features.json
44 |             feat_json = load_json(json_path)
45 | 
46 |             # Assign domain and tag
47 |             domain = getattr(f, "domain", None)
48 | 
49 |             # Feature specifications
50 |             # Description
51 |             if f.__doc__ is not None:
52 |                 descrip = f.__doc__.split("\n")[0]
53 |             else:
54 |                 descrip = ""
55 |             # Feature usage
56 |             use = "yes"
57 |             # Feature function arguments
58 |             args_name = inspect.getfullargspec(f)[0]
59 | 
60 |             # Access feature parameters
61 |             if args_name != "":
62 |                 # Retrieve default values of arguments
63 |                 spec = inspect.getfullargspec(f)
64 |                 defaults = dict(zip(spec.args[::-1], (spec.defaults or ())[::-1]))
65 |                 defaults.update(spec.kwonlydefaults or {})
66 | 
67 |                 for p in args_name[1:]:
68 |                     if p not in list(defaults.keys()):
69 |                         defaults[p] = None
70 |                 if len(defaults) == 0:
71 |                     defaults = ""
72 |             else:
73 |                 defaults = ""
74 | 
75 |             # Settings of new feature
76 |             new_feature = {"description": descrip, "parameters": defaults, "function": fname, "use": use}
77 | 
78 |             # Check if domain exists
79 |             try:
80 |                 feat_json[domain][fname] = new_feature
81 |             except KeyError:
82 |                 feat_json[domain] = {fname: new_feature}
83 | 
84 |             # Write new feature on json file
85 |             with open(json_path, "w") as fout:
86 |                 json.dump(feat_json, fout, indent=" ")
87 | 
88 |             print("Feature " + str(fname) + " was added.")
89 | 
90 |     if vset_domain is False:
91 |         warnings.warn("No features were added. Please declare @set_domain.", stacklevel=2)
92 | 


--------------------------------------------------------------------------------
/tssearch/search/query_search.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from tssearch.distances.elastic_utils import traceback_adj, lcss_path, lcss_score
 3 | from tssearch.search.search_utils import lockstep_search, elastic_search, start_sequences_index
 4 | 
 5 | 
 6 | def time_series_search(dict_distances, query, sequence, tq=None, ts=None, weight=None, output=("number", 1)):
 7 |     """
 8 |     Time series search method locates the k-best occurrences of a given query on a more extended sequence based on a
 9 |     distance measurement.
10 | 
11 |     Parameters
12 |     ----------
13 |     dict_distances: dict
14 |         Configuration file with distances.
15 |     query: nd-array
16 |         Query time series.
17 |     sequence: nd-array
18 |         Sequence time series.
19 |     tq: nd-array
20 |         Time stamp time series query.
21 |     ts: nd-array
22 |         Time stamp time series sequence.
23 |     weight: nd-array (Default: None)
24 |         query weight values.
25 |     output: tuple
26 |         number of occurrences.
27 | 
28 |     Returns
29 |     -------
30 |     distance_results: dict
31 |         time instants, optimal alignment path and distance for each occurrence per distance.
32 |     """
33 | 
34 |     l_query = len(query)
35 |     distance_results = {}
36 | 
37 |     for d_type in dict_distances:
38 |         for dist in dict_distances[d_type]:
39 | 
40 |             if "use" not in dict_distances[d_type][dist] or dict_distances[d_type][dist]["use"] == "yes":
41 |                 distance_results[dist] = {}
42 |                 if d_type == "lockstep":
43 |                     distance = lockstep_search(dict_distances[d_type][dist], query, sequence, weight)
44 | 
45 |                     start_index = start_sequences_index(distance, output=output, overlap=l_query)
46 |                     end_index, path = [], []
47 |                     for start in start_index:
48 |                         end_index += [start + l_query]
49 |                         path += [(np.arange(l_query), np.arange(start, end_index[-1]))]
50 |                     distance_results[dist]["path_dist"] = distance[start_index]
51 |                 elif d_type == "elastic":
52 |                     distance, ac = elastic_search(dict_distances[d_type][dist], query, sequence, tq, ts, weight)
53 | 
54 |                     if dist == "Longest Common Subsequence":
55 |                         eps = dict_distances[d_type][dist]["parameters"]["eps"]
56 |                         if len(np.shape(query)) == 1:
57 |                             query_copy = query.reshape(-1, 1)
58 |                             sequence_copy = sequence.reshape(-1, 1)
59 |                             path = [lcss_path(query_copy, sequence_copy, ac, eps)]
60 |                         else:
61 |                             path = [lcss_path(query, sequence, ac, eps)]
62 |                         distance_results[dist]["path_dist"] = [lcss_score(ac)]
63 |                         end_index = [path_i[1][-1] for path_i in path]
64 |                     else:
65 |                         end_index = start_sequences_index(distance, output=output, overlap=l_query / 2)
66 |                         # check if traceback_adj is equal to other elastic measures
67 |                         path = [traceback_adj(ac[:, : int(pk) + 1]) for pk in end_index]
68 |                         distance_results[dist]["path_dist"] = distance[end_index]
69 |                     start_index = [path_i[1][0] for path_i in path]
70 | 
71 |                 else:
72 |                     print("WARNING")
73 |                     continue
74 | 
75 |                 distance_results[dist]["distance"] = distance
76 |                 distance_results[dist]["start"] = start_index
77 |                 distance_results[dist]["end"] = end_index
78 |                 distance_results[dist]["path"] = path
79 | 
80 |     return distance_results
81 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <div align="center">
 2 |   <img width="70%" src="./docs/imgs/logo.jpg">
 3 | </div>
 4 | 
 5 | -----------------
 6 | 
 7 | [![license](https://img.shields.io/badge/License-BSD%203-brightgreen)](https://github.com/fraunhoferportugal/tssearch/blob/master/LICENSE.txt)
 8 | [![Documentation Status](https://readthedocs.org/projects/tssearch/badge/?version=latest)](https://tssearch.readthedocs.io/en/latest/?badge=latest)
 9 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/tssearch)
10 | ![PyPI](https://img.shields.io/pypi/v/tssearch?logo=pypi&color=blue)
11 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
12 | [![Downloads](https://pepy.tech/badge/tssearch)](https://pepy.tech/project/tssearch)
13 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/fraunhoferportugal/tssearch/blob/master/notebooks/Query_search_unidimensional.ipynb)
14 | 
15 | # Time Series Subsequence Search Library
16 | 
17 | ## Intuitive time series subsequence search
18 | This repository hosts the **TSSEARCH - Time Series Subsequence Search** Python package. TSSEARCH assists researchers in exploratory analysis for query search and time series segmentation without requiring significant programming effort.
19 | 
20 | ## Functionalities
21 | 
22 | * **Search**: We provide methods for time series query search and segmentation
23 | * **Weights**: The relative contribution of each point of the query to the overall distance can be expressed using a user-defined weight vector. 
24 | * **Visualization**: We provide visualizations to present the results of the
25 | segmentation and query search
26 | * **Unit tested**: we provide unit tests for each feature
27 | * **Easily extended**: adding new distances is easy, and we encourage you to contribute with your custom distances or search methods
28 | 
29 | ## Get started
30 | 
31 | ### ⚙️ Installation
32 | TSSEARCH supports Python 3.8 or greater. You can easily install via PyPI:
33 | 
34 | ```bash
35 | pip install tssearch
36 | ```
37 | 
38 | ### Example
39 | The code below segments a 10 s electrocardiography record:
40 | 
41 | ```python
42 | import tssearch
43 | 
44 | # Load the query, (optional) weight vector and sequence
45 | data = tssearch.load_ecg_example()
46 | 
47 | # Selects the Dynamic Time Warping (DTW) as the distance for the segmentation
48 | cfg = tssearch.get_distance_dict(["Dynamic Time Warping"])
49 | 
50 | # Performs the segmentation
51 | out = tssearch.time_series_segmentation(cfg, data['query'], data['sequence'], data['weight'])
52 | ```
53 | 
54 | ### Documentation
55 | The documentation is available [here](https://tssearch.readthedocs.io/en/latest/).
56 | 
57 | ## Available distances
58 | 
59 | | Lockstep                             |
60 | |--------------------------------------|
61 | | Lp Distances                         |
62 | | Pearson Correlation Distance         |
63 | | Short Time Series Distance (STS)     |
64 | 
65 | | Elastic                              |
66 | |--------------------------------------|
67 | | Dynamic Time Warping (DTW)           |
68 | |Longest Common Subsequence (LCSS)     |
69 | |Time Warp Edit Distance (TWED)        |
70 | 
71 | | Time                                 |
72 | |--------------------------------------|
73 | | Time Alignment Measurement (TAM)     |
74 | 
75 | ## Citing
76 | When using TSSEARCH please cite the following publication:
77 | 
78 | Folgado, Duarte and Barandas, Marília, et al. "*TSSEARCH: Time Series Subsequence Search Library*" SoftwareX 11 (2022). [https://doi.org/10.1016/j.softx.2022.101049](https://doi.org/10.1016/j.softx.2022.101049)
79 | 
80 | 
81 | ## Acknowledgements
82 | This work is a result of the project ConnectedHealth (n.º 46858), supported by Competitiveness and Internationalisation Operational Programme (POCI) and Lisbon Regional Operational Programme (LISBOA 2020), under the PORTUGAL 2020 Partnership Agreement, through the European Regional Development Fund (ERDF)
83 | 


--------------------------------------------------------------------------------
/tssearch/search/search_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.signal import find_peaks
  3 | 
  4 | 
  5 | def elastic_search(dict_distances, query, sequence, tq=None, ts=None, weight=None):
  6 |     """
  7 |     Query search for elastic measures
  8 | 
  9 |     Parameters
 10 |     ----------
 11 |     dict_distances: dict
 12 |         Configuration file with distances
 13 |     query: nd-array
 14 |         Query time series.
 15 |     sequence: nd-array
 16 |         Sequence time series.
 17 |     tq: nd-array
 18 |         Time stamp time series query.
 19 |     ts: nd-array
 20 |         Time stamp time series sequence.
 21 |     weight: nd-array (Default: None)
 22 |         query weight values
 23 | 
 24 |     Returns
 25 |     -------
 26 |     distance: nd-array
 27 |         distance value between query and sequence
 28 |     ac: nd-array
 29 |         accumulated cost matrix
 30 |     """
 31 | 
 32 |     exec("from tssearch import *")
 33 | 
 34 |     # distance function
 35 |     func_total = dict_distances["function"]
 36 | 
 37 |     # Check for parameters
 38 |     parameters_total = {}
 39 |     if dict_distances["parameters"] != "":
 40 |         parameters_total = dict_distances["parameters"]
 41 |     parameters_total["report"] = "search"
 42 | 
 43 |     if "dtw_type" in parameters_total:
 44 |         if parameters_total["dtw_type"] == "dtw":
 45 |             parameters_total["dtw_type"] = "sub-dtw"
 46 | 
 47 |     if "time" in parameters_total:
 48 |         parameters_total_copy = parameters_total.copy()
 49 |         del parameters_total_copy["time"]
 50 |         distances, ac = locals()[func_total](query, sequence, tq, ts, **parameters_total_copy)
 51 |     else:
 52 |         distances, ac = locals()[func_total](query, sequence, **parameters_total)
 53 | 
 54 |     return distances, ac
 55 | 
 56 | 
 57 | def lockstep_search(dict_distances, query, sequence, weight):
 58 |     """
 59 |     Query search for lockstep measures
 60 | 
 61 |     Parameters
 62 |     ----------
 63 |     dict_distances: dict
 64 |         Configuration file with distances
 65 |     query: nd-array
 66 |         Query time series.
 67 |     sequence: nd-array
 68 |         Sequence time series.
 69 |     weight: nd-array (Default: None)
 70 |         query weight values
 71 | 
 72 |     Returns
 73 |     -------
 74 |     res: nd-array
 75 |         distance value between query and sequence
 76 |     """
 77 | 
 78 |     exec("from tssearch import *")
 79 | 
 80 |     # distance function
 81 |     func_total = dict_distances["function"]
 82 | 
 83 |     # Check for parameters
 84 |     parameters_total = {}
 85 |     if dict_distances["parameters"] != "":
 86 |         parameters_total = dict_distances["parameters"]
 87 | 
 88 |     lw = len(query)
 89 |     res = np.zeros(len(sequence) - lw, "d")
 90 |     for i in range(len(sequence) - lw):
 91 |         seq_window = sequence[i : i + lw]
 92 | 
 93 |         eval_result = locals()[func_total](seq_window, query, weight, **parameters_total)
 94 | 
 95 |         res[i] = eval_result / lw  # default normalization
 96 | 
 97 |     return res
 98 | 
 99 | 
100 | def start_sequences_index(distance, output=("number", 1), overlap=1.0):
101 |     """
102 |     Method to retrieve the k-best occurrences from a given vector distance
103 | 
104 |     Parameters
105 |     ----------
106 |     distance: nd-array
107 |         distance values
108 |     output: tuple
109 |         number of occurrences
110 |     overlap: float
111 |         minimum distance between occurrences
112 | 
113 |     Returns
114 |     -------
115 |     id_s: nd-array
116 |         indexes of k-best occurrences
117 |     """
118 | 
119 |     # pks - min
120 |     pks, _ = find_peaks(-distance, distance=overlap)  # TODO if necessary add first and last sequence
121 |     pks_val = distance[pks]
122 | 
123 |     if output[0] == "number":
124 |         num_events = output[1]
125 |         pks_val_sort = np.argsort(pks_val)
126 |         id_s = pks[pks_val_sort[:num_events]]
127 |     elif output[0] == "percentile":
128 |         perct = output[1]
129 |         perct_val = np.percentile(distance, 100 - perct)
130 |         pks_perct = np.where(pks_val < perct_val)[0]
131 |         id_s = pks[pks_perct]
132 |     elif output[0] == "threshold":
133 |         thres = output[1]
134 |         pks_thres = np.where(pks_val < thres)[0]
135 |         id_s = pks[pks_thres]
136 |     else:
137 |         id_s = pks[np.argmin(pks_val)]
138 | 
139 |     return id_s
140 | 


--------------------------------------------------------------------------------
/tssearch/distances/compute_distance.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from tssearch.search.segmentation import time_series_segmentation
  4 | 
  5 | 
  6 | def time_series_distance(dict_distances, x, y, tx=None, ty=None):
  7 |     """
  8 | 
  9 |     Parameters
 10 |     ----------
 11 |     dict_distances: dict
 12 |         Dictionary of distances parameters.
 13 |     x: nd-array
 14 |         Time series x (query).
 15 |     y: nd-array
 16 |         Time series y.
 17 |     tx: nd-array
 18 |         Time stamp time series x.
 19 |     ty: nd-array
 20 |         Time stamp time series y.
 21 | 
 22 |     Returns
 23 |     -------
 24 |     distances: pandas DataFrame
 25 |         Distances values.
 26 | 
 27 |     """
 28 | 
 29 |     exec("from tssearch import *")
 30 | 
 31 |     distance_results = []
 32 |     distance_names = []
 33 | 
 34 |     multivariate = True if len(np.shape(x)) > 1 else False
 35 | 
 36 |     for d_type in dict_distances:
 37 |         for dist in dict_distances[d_type]:
 38 | 
 39 |             # Only returns used functions
 40 |             if "use" not in dict_distances[d_type][dist] or dict_distances[d_type][dist]["use"] == "yes":
 41 |                 # remove unidimensional distances
 42 |                 if multivariate and dict_distances[d_type][dist]["multivariate"] == "no":
 43 |                     continue
 44 | 
 45 |                 func_total = dict_distances[d_type][dist]["function"]
 46 | 
 47 |                 # Check for parameters
 48 |                 parameters_total = {}
 49 |                 if dict_distances[d_type][dist]["parameters"] != "":
 50 |                     parameters_total = dict_distances[d_type][dist]["parameters"]
 51 | 
 52 |                 if "time" in parameters_total:
 53 |                     parameters_total_copy = parameters_total.copy()
 54 |                     del parameters_total_copy["time"]
 55 |                     eval_result = locals()[func_total](x, y, tx, ty, **parameters_total_copy)
 56 |                 else:
 57 |                     eval_result = locals()[func_total](x, y, **parameters_total)
 58 | 
 59 |                 distance_results += [eval_result]
 60 |                 distance_names += [dist]
 61 | 
 62 |     distances = pd.DataFrame(data=np.array(distance_results), index=np.array(distance_names), columns=["Distance"])
 63 | 
 64 |     return distances
 65 | 
 66 | 
 67 | def time_series_distance_windows(dict_distances, x, y, tx=None, ty=None, segmentation=None):
 68 |     """
 69 | 
 70 |     Parameters
 71 |     ----------
 72 |     dict_distances: dict
 73 |         Dictionary of distances parameters.
 74 |     x: nd-array
 75 |         Time series x (query).
 76 |     y: nd-array
 77 |         Time series y (windows).
 78 |     tx: nd-array
 79 |         Time stamp time series x.
 80 |     ty: nd-array
 81 |         Time stamp time series y (windows).
 82 |     segmentation: dict
 83 |         Dictionary of distances parameters.
 84 | 
 85 |     Returns
 86 |     -------
 87 |     dist_windows: pandas DataFrame
 88 |         Distances values per window.
 89 | 
 90 |     """
 91 | 
 92 |     if segmentation is not None:
 93 |         results = time_series_segmentation(segmentation, x, y, tx, ty)
 94 |         func_name = list(segmentation[list(dict_distances.keys())[0]].keys())[0]
 95 | 
 96 |         ts_w = None if ty is None else []
 97 |         windows = []
 98 |         for i in range(len(results[func_name]) - 1):
 99 |             if ty is not None:
100 |                 ts_w += [ty[results[func_name][i] : results[func_name][i + 1]]]
101 |             windows += [y[results[func_name][i] : results[func_name][i + 1]]]
102 |     else:
103 |         windows = y
104 |         ts_w = ty
105 | 
106 |     multivariate = True if len(np.shape(x)) > 1 else False
107 | 
108 |     exec("from tssearch import *")
109 | 
110 |     dist_windows = pd.DataFrame()
111 |     for d_type in dict_distances:
112 |         for dist in dict_distances[d_type]:
113 | 
114 |             # Only returns used functions
115 |             if "use" not in dict_distances[d_type][dist] or dict_distances[d_type][dist]["use"] == "yes":
116 | 
117 |                 if multivariate and dict_distances[d_type][dist]["multivariate"] == "no":
118 |                     continue
119 | 
120 |                 func_total = dict_distances[d_type][dist]["function"]
121 | 
122 |                 # Check for parameters
123 |                 parameters_total = {}
124 |                 if dict_distances[d_type][dist]["parameters"] != "":
125 |                     parameters_total = dict_distances[d_type][dist]["parameters"]
126 | 
127 |                 distance_results = []
128 |                 if "time" in parameters_total:
129 |                     parameters_total_copy = parameters_total.copy()
130 |                     del parameters_total_copy["time"]
131 |                     for ty_window, window in zip(ts_w, windows):
132 |                         eval_result = locals()[func_total](x, window, tx, ty_window, **parameters_total_copy)
133 |                         distance_results += [eval_result]
134 |                 else:
135 |                     for window in windows:
136 |                         eval_result = locals()[func_total](x, window, **parameters_total)
137 |                         distance_results += [eval_result]
138 | 
139 |                 dist_windows[dist] = distance_results
140 | 
141 |     return dist_windows
142 | 


--------------------------------------------------------------------------------
/tssearch/utils/visualisation.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import seaborn as sns
  3 | import matplotlib.pyplot as plt
  4 | 
  5 | from matplotlib.collections import LineCollection
  6 | 
  7 | 
  8 | # Visualisation
  9 | def plot_alignment(ref_signal, estimated_signal, path, **kwargs):
 10 |     """
 11 |     This functions plots the resulted alignment of two sequences given the path
 12 |     calculated by the Dynamic Time Warping algorithm.
 13 | 
 14 |     :param ref_signal: (array-like)
 15 |                      The reference sequence.
 16 |     :param estimated_signal: (array-like)
 17 |                      The estimated sequence.
 18 |     :param path: (array-like)
 19 |                      A 2D array congaing the path resulted from the algorithm
 20 |     :param \**kwargs:
 21 |         See below:
 22 | 
 23 |         * *offset* (``double``) --
 24 |             The offset used to move the reference signal to an upper position for
 25 |             visualization purposes.
 26 |             (default: ``2``)
 27 | 
 28 |         * *linewidths* (``list``) --
 29 |             A list containing the linewidth for the reference, estimated and connection
 30 |             plots, respectively.
 31 |             (default: ``[3, 3, 0.5]``)
 32 | 
 33 |         * *step* (``int``) --
 34 |             The step for
 35 |           (default: ``2``)
 36 | 
 37 |         * *colors* (``list``) --
 38 |           A list containing the colors for the reference, estimated and connection
 39 |           plots, respectively.
 40 |           (default: ``[sns.color_palette()[0], sns.color_palette()[1], 'k']``)
 41 | 
 42 |         * *label* (``list``) --
 43 |             A list containing the labels for the reference and estimated signals.
 44 |             (default: ``['Reference', 'Estimated']``)
 45 |     """
 46 | 
 47 |     step = kwargs.get("step", 2)
 48 |     hoffset = kwargs.get("hoffset", 0)
 49 |     voffset = kwargs.get("offset", 2) * np.max(ref_signal)
 50 |     linewidths = kwargs.get("linewidths", [3, 3, 0.5])
 51 |     colors = kwargs.get("colors", [sns.color_palette()[0], sns.color_palette()[1], "k"])
 52 |     label = kwargs.get("label", ["Reference", "Estimated"])
 53 | 
 54 |     # This prevents unexpected changes in the reference signal after the duplicate
 55 |     # Set an offset for visualization
 56 |     copy_ref = np.copy(ref_signal) + voffset  
 57 |     xref = np.arange(len(copy_ref)) + hoffset
 58 |     
 59 |     # Actual plot occurs here
 60 |     # Get current axis 
 61 |     ax = plt.gca()
 62 |     
 63 |     # Create secondary axis to the right, that counteracts the offset
 64 |     ax2 = ax.secondary_yaxis('right', functions=(lambda x: x - voffset, lambda x: x + voffset))
 65 |     
 66 |     # plot offset/reference and adjust tick colors
 67 |     ax.plot(xref, copy_ref, color=sns.color_palette()[0], lw=linewidths[0], label=label[0])
 68 |     plt.setp(ax2.get_yticklabels(), color=sns.color_palette()[0])
 69 |     
 70 |     # plot non-offset/estimatied signal and adjust tick colors
 71 |     ax.plot(estimated_signal, color=sns.color_palette()[1], lw=linewidths[1], label=label[1])
 72 |     plt.setp(ax.get_yticklabels(), color=sns.color_palette()[1])
 73 |     
 74 |     plt.legend(fontsize=17)
 75 | 
 76 |     [
 77 |         plt.plot(
 78 |             [[path[0][i] + hoffset], [path[1][i]]],
 79 |             [copy_ref[path[0][i]], estimated_signal[path[1][i]]],
 80 |             color=colors[2],
 81 |             lw=linewidths[2],
 82 |         )
 83 |         for i in range(len(path[0]))[::step]
 84 |     ]
 85 | 
 86 | 
 87 | def plot_costmatrix(matrix, path):
 88 |     """
 89 |     This functions overlays the optimal warping path and the cost matrices
 90 |     :param matrix: (ndarray-like)
 91 |                 The cost matrix (local cost or accumulated)
 92 |     :param path:   (ndarray-like)
 93 |                 The optimal warping path
 94 |     :return: (void)
 95 |                 Plots the optimal warping path with an overlay of the cost matrix.
 96 |     """
 97 |     plt.imshow(matrix.T, cmap="viridis", origin="lower", interpolation="None")
 98 |     plt.colorbar()
 99 |     plt.plot(path[0], path[1], "w.-")
100 |     plt.xlim((-0.5, matrix.shape[0] - 0.5))
101 |     plt.ylim((-0.5, matrix.shape[1] - 0.5))
102 | 
103 | 
104 | def plot_search_distance_result(res, sequence, ts=None, cmap_name="viridis"):
105 | 
106 |     if ts is None:
107 |         ts = np.arange(len(sequence))
108 |     # set distance scale
109 |     cmap = plt.cm.get_cmap(cmap_name)
110 |     colors = cmap(np.arange(cmap.N))
111 | 
112 |     if len(np.shape(sequence)) > 1:
113 |         sequence_shape = np.shape(sequence)[1]
114 |     else:
115 |         sequence_shape = 1
116 | 
117 |     all_axs = []
118 |     for k in res.keys():
119 |         max_dist = np.max(res[k]["path_dist"])
120 |         min_dist = np.min(res[k]["path_dist"])
121 |         if max_dist == min_dist:
122 |             max_dist += 1
123 |             min_dist -= 1
124 |         delta_dist = max_dist - min_dist
125 | 
126 |         fig, axs = plt.subplots(sequence_shape + 1, 1, figsize=(15, 5))
127 |         axs[0].set_title(k)
128 |         for i in range(sequence_shape):
129 |             plot_seq = sequence if sequence_shape == 1 else sequence[:, i]
130 |             axs[i].plot(ts, plot_seq, "lightgray")
131 |             for s, e, d in zip(res[k]["start"], res[k]["end"], res[k]["path_dist"]):
132 |                 d_idx = int((d - min_dist) * cmap.N / delta_dist) - 1
133 |                 axs[i].plot(ts[np.arange(s, e)], plot_seq[s:e], c=colors[d_idx])
134 |             if i < sequence_shape - 2:
135 |                 axs[i].sharex(axs[i + 1])
136 |                 axs[i].set_xticks([])
137 |                 # [axs[i-1].sharex(axs[i]) for i in range(1, sequence_shape)]
138 |         axs[sequence_shape].set_xlabel("Distance")
139 |         axs[sequence_shape].imshow([colors], extent=[min_dist, max_dist, 0, 0.02 * delta_dist])
140 |         axs[sequence_shape].set_yticks([])
141 | 
142 |         all_axs += [axs]
143 | 
144 |     return all_axs
145 | 
146 | 
147 | def plot_weight_query(x, query, weight, cmap="viridis", axs=None, fig=None):
148 | 
149 |     points = np.array([x, query]).T.reshape(-1, 1, 2)
150 |     segments = np.concatenate([points[:-1], points[1:]], axis=1)
151 | 
152 |     if axs is None:
153 |         fig = plt.figure()
154 |         axs = plt.subplot()
155 | 
156 |     lc = LineCollection(segments, cmap=cmap, norm=plt.Normalize(0, weight.max()))
157 |     lc.set_array(weight)
158 |     lc.set_linewidth(2)
159 |     line = axs.add_collection(lc)
160 | 
161 |     cbar = fig.colorbar(line, ax=axs)
162 |     cbar.set_label("weight")
163 | 
164 |     axs.set_xlim(x.min(), x.max())
165 |     axs.set_ylim(query.min() - 1, query.max() + 1)
166 |     plt.show()
167 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # TSSEARCH documentation build configuration file, created by
  4 | # sphinx-quickstart on Tue Dez  28 18:37:41 2021.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | # If extensions (or modules to document with autodoc) are in another directory,
 16 | # add these directories to sys.path here. If the directory is relative to the
 17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 18 | 
 19 | import os
 20 | 
 21 | if os.environ.get('READTHEDOCS', None) == 'True':
 22 |     import inspect
 23 |     from sphinx import apidoc
 24 | 
 25 |     __location__ = os.path.join(os.getcwd(), os.path.dirname(inspect.getfile(inspect.currentframe())))
 26 | 
 27 |     output_dir = os.path.join(__location__, "../docs/descriptions/modules")
 28 |     module_dir = os.path.join(__location__, "../tssearch")
 29 |     cmd_line_template = "sphinx-apidoc -f -o {outputdir} {moduledir}"
 30 |     cmd_line = cmd_line_template.format(outputdir=output_dir, moduledir=module_dir)
 31 |     apidoc.main(cmd_line.split(" "))
 32 | 
 33 | # -- General configuration ------------------------------------------------
 34 | 
 35 | # If your documentation needs a minimal Sphinx version, state it here.
 36 | #
 37 | # needs_sphinx = '1.0'
 38 | 
 39 | # Add any Sphinx extension module names here, as strings. They can be
 40 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 41 | # ones.
 42 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 'sphinx.ext.todo',
 43 |               'sphinx.ext.autosummary', 'sphinx.ext.viewcode', 'sphinx.ext.coverage',
 44 |               'sphinx.ext.doctest', 'sphinx.ext.ifconfig', 'sphinx.ext.imgmath',
 45 |               'sphinx.ext.napoleon'
 46 | ]
 47 | 
 48 | # Add any paths that contain templates here, relative to this directory.
 49 | templates_path = ['_templates']
 50 | 
 51 | # The suffix(es) of source filenames.
 52 | # You can specify multiple suffix as a list of string:
 53 | #
 54 | # source_suffix = ['.rst', '.md']
 55 | source_suffix = '.rst'
 56 | 
 57 | # The master toctree document.
 58 | master_doc = 'index'
 59 | 
 60 | # General information about the project.
 61 | import datetime
 62 | now = datetime.datetime.today()
 63 | project = u'TSSEARCH'
 64 | copyright = u'2022, Fraunhofer AICOS'
 65 | author = u'Fraunhofer AICOS'
 66 | 
 67 | # The version info for the project you're documenting, acts as replacement for
 68 | # |version| and |release|, also used in various other places throughout the
 69 | # built documents.
 70 | #
 71 | # The short X.Y version.
 72 | version = u'0.1.3'
 73 | # The full version, including alpha/beta/rc tags.
 74 | release = u'0.1.3'
 75 | 
 76 | # The language for content autogenerated by Sphinx. Refer to documentation
 77 | # for a list of supported languages.
 78 | #
 79 | # This is also used if you do content translation via gettext catalogs.
 80 | # Usually you set "language" from the command line for these cases.
 81 | language = None
 82 | 
 83 | # List of patterns, relative to source directory, that match files and
 84 | # directories to ignore when looking for source files.
 85 | # This patterns also effect to html_static_path and html_extra_path
 86 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 87 | 
 88 | # If true, keep warnings as "system message" paragraphs in the built documents.
 89 | keep_warnings = True
 90 | 
 91 | # Boolean indicating whether to scan all found documents for autosummary
 92 | # directives, and to generate stub pages for each
 93 | autosummary_generate = True
 94 | 
 95 | # The name of the Pygments (syntax highlighting) style to use.
 96 | pygments_style = 'default'
 97 | 
 98 | # -- Options for HTML output ----------------------------------------------
 99 | 
100 | # The theme to use for HTML and HTML Help pages.  See the documentation for
101 | # a list of builtin themes.
102 | #
103 | html_theme = 'sphinx_rtd_theme'
104 | 
105 | # Theme options are theme-specific and customize the look and feel of a theme
106 | # further.  For a list of options available for each theme, see the
107 | # documentation.
108 | #
109 | # html_theme_options = {}
110 | 
111 | # Add any paths that contain custom static files (such as style sheets) here,
112 | # relative to this directory. They are copied after the builtin static files,
113 | # so a file named "default.css" will overwrite the builtin "default.css".
114 | html_static_path = []
115 | 
116 | # -- Options for HTMLHelp output ------------------------------------------
117 | 
118 | # Output file base name for HTML help builder.
119 | htmlhelp_basename = 'TSSEARCHdoc'
120 | 
121 | 
122 | # -- Options for LaTeX output ---------------------------------------------
123 | 
124 | latex_elements = {
125 |     # The paper size ('letterpaper' or 'a4paper').
126 |     #
127 |     # 'papersize': 'letterpaper',
128 | 
129 |     # The font size ('10pt', '11pt' or '12pt').
130 |     #
131 |     # 'pointsize': '10pt',
132 | 
133 |     # Additional stuff for the LaTeX preamble.
134 |     #
135 |     # 'preamble': '',
136 | 
137 |     # Latex figure (float) alignment
138 |     #
139 |     # 'figure_align': 'htbp',
140 | }
141 | 
142 | # Grouping the document tree into LaTeX files. List of tuples
143 | # (source start file, target name, title,
144 | #  author, documentclass [howto, manual, or own class]).
145 | latex_documents = [
146 |     (master_doc, 'TSSEARCH.tex', u'TSSEARCH Documentation',
147 |      u'Fraunhofer AICOS', 'manual'),
148 | ]
149 | 
150 | 
151 | # -- Options for manual page output ---------------------------------------
152 | 
153 | # One entry per manual page. List of tuples
154 | # (source start file, name, description, authors, manual section).
155 | man_pages = [
156 |     (master_doc, 'tssearch', u'TSSEARCH Documentation',
157 |      [author], 1)
158 | ]
159 | 
160 | 
161 | # -- Options for Texinfo output -------------------------------------------
162 | 
163 | # Grouping the document tree into Texinfo files. List of tuples
164 | # (source start file, target name, title, author,
165 | #  dir menu entry, description, category)
166 | texinfo_documents = [
167 |     (master_doc, 'TSSEARCH', u'TSSEARCH Documentation',
168 |      author, 'TSSEARCH', 'One line description of project.',
169 |      'Miscellaneous'),
170 | ]
171 | 
172 | 
173 | # -- Options for Epub output ----------------------------------------------
174 | 
175 | # Bibliographic Dublin Core info.
176 | epub_title = project
177 | epub_author = author
178 | epub_publisher = author
179 | epub_copyright = copyright
180 | 
181 | # The unique identifier of the text. This can be a ISBN number
182 | # or the project homepage.
183 | #
184 | # epub_identifier = ''
185 | 
186 | # A unique identification for the text.
187 | #
188 | # epub_uid = ''
189 | 
190 | # A list of files that should not be packed into the epub file.
191 | epub_exclude_files = ['search.html']
192 | 
193 | 
194 | # Example configuration for intersphinx: refer to the Python standard library.
195 | intersphinx_mapping = {'https://docs.python.org/': None}
196 | 


--------------------------------------------------------------------------------
/docs/descriptions/elastic_distances.rst:
--------------------------------------------------------------------------------
  1 | =================
  2 | Elastic Distances
  3 | =================
  4 | 
  5 | Distance measures that perform a non-linear mapping to align the time series and allow flexible comparison of one-to-many or one-to-none points (e.g., Dynamic Time Warping, Longest Common Subsequence). These measures produce elastic adjustment to compensate for potential localized misalignment.
  6 | 
  7 | **************************
  8 | Dynamic Time Warping (DTW)
  9 | **************************
 10 | 
 11 | The DTW algorithm computes the stretch of the time axis which optimally maps between two time series. It measures the remaining cumulative distance after the alignment and the pairwise correspondence between each sample.
 12 | 
 13 | .. code:: python
 14 | 
 15 |     import numpy as np
 16 |     import matplotlib.pyplot as plt
 17 |     from tssearch.search.query_search import time_series_search
 18 |     from tssearch.utils.visualisation import plot_alignment
 19 | 
 20 |     # generates signals
 21 |     freq = 2
 22 |     amp = 2
 23 |     time = np.linspace(0, 2, 100)
 24 |     ts1 = np.concatenate([amp * np.sin(np.pi * time), np.zeros(100), amp * np.sin(np.pi * time), np.zeros(10)])
 25 |     ts2 = np.concatenate([np.zeros(10), amp * np.sin(np.pi * time), np.zeros(150), amp * np.sin(np.pi * time), np.zeros(5)])
 26 | 
 27 |     dict_distances = {
 28 |         "elastic": {"Dynamic Time Warping": {
 29 |             "multivariate": "yes",
 30 |             "description": "",
 31 |             "function": "dtw",
 32 |             "parameters": {"dtw_type": "dtw", "alpha": 1},
 33 |             "use": "yes"}
 34 |         }
 35 |     }
 36 | 
 37 |     result = time_series_search(dict_distances, ts1, ts2, output=("number", 1))
 38 | 
 39 |     plt.figure()
 40 |     plt.title("Dynamic Time Warping")
 41 |     plot_alignment(ts1, ts2, result["Dynamic Time Warping"]["path"][0])
 42 |     plt.legend(fontsize=17, loc="lower right")
 43 | 
 44 | 
 45 | .. image:: https://i.postimg.cc/sgQKCBfj/dtw-search.png
 46 |    :alt: An example of DTW.
 47 | 
 48 | *********************************
 49 | Longest Common Subsequence (LCSS)
 50 | *********************************
 51 | 
 52 | The Longest Common Subsequence (LCSS) measures the similarity between two time series whose lengths might be different. Since it is formulated based on edit distances,  gaps or unmatched regions are permitted and they are penalized with a value proportional to their length. It can be useful to identify similarities between time series whose lengths differ greatly or have noise [1]_.
 53 | 
 54 | In the example below, we compute the LCSS alignment between two time series, one of them with added noise.
 55 | 
 56 | .. code:: python
 57 | 
 58 |     import numpy as np
 59 |     import matplotlib.pyplot as plt
 60 |     from tssearch.search.query_search import time_series_search
 61 |     from tssearch.utils.visualisation import plot_alignment
 62 | 
 63 |     ts1 = np.sin(np.arange(0, 4*np.pi, 0.1))
 64 |     noise = np.random.normal(0, 0.1, ts1.shape)
 65 |     ts2 = 1 + np.sin(np.arange(0, 4*np.pi, 0.1) + 2) + noise
 66 | 
 67 |     ts1 = ts1.reshape(-1, 1)
 68 |     ts2 = ts2.reshape(-1, 1)
 69 | 
 70 |     dict_distances = {
 71 |         "elastic": {"Longest Common Subsequence": {
 72 |             "multivariate": "yes",
 73 |             "description": "",
 74 |             "function": "lcss",
 75 |             "parameters": {"eps": 1, "report": "distance"},
 76 |             "use": "yes"}
 77 |         }
 78 |     }
 79 | 
 80 |     result = time_series_search(dict_distances, ts1, ts2, output=("number", 1))
 81 | 
 82 |     plt.figure()
 83 |     plt.title("Longest Common Subsequence")
 84 |     plot_alignment(ts1, ts2, result["Longest Common Subsequence"]["path"][0])
 85 | 
 86 | 
 87 | .. image:: https://i.postimg.cc/43Rx3ZBV/lcss-search.png
 88 |    :alt: An example of LCSS.
 89 | 
 90 | 
 91 | ******************************
 92 | Time Warp Edit Distance (TWED)
 93 | ******************************
 94 | 
 95 | Time warp edit distance (TWED) uses sequences’ samples indexes/timestamps difference to linearly penalize the matching of samples for which indexes/timestamps values are too far and to favor the matching samples for which indexes/timestamps values are closed. Contrarily to other elastic measures, TWED entails a time shift tolerance controlled by the stiffness parameter of the measure. Moreover, it involves a second parameter defining a constant penalty for insert or delete operations. If stiffness > 0, TWED is a distance (i.e., verifies the triangle inequality) in both space and time [2]_.
 96 | 
 97 | TWED has been used in time series classification assessing classification performance while varying TWED input parameters [2]_, [3]_. In the example, we calculate TWED between two time series varying its parameters.
 98 | 
 99 | .. code:: python
100 | 
101 |     import numpy as np
102 |     import pandas as pd
103 |     import seaborn as sns
104 |     import matplotlib.pyplot as plt
105 |     from tssearch.distances.compute_distance import time_series_distance
106 | 
107 |     # generates signals
108 |     freq = 2
109 |     amp = 2
110 |     time = np.linspace(0, 2, 1000)
111 |     ts1 = amp * np.sin(2 * np.pi * freq * time)
112 |     ts2 = amp * np.sin(6 * np.pi * freq * time)[::50]
113 | 
114 |     # visualize original and downsampled sequence
115 |     plt.figure()
116 |     plt.plot(time, ts1, color=sns.color_palette("Greens")[2], label="Time series 1", lw=3.)
117 |     plt.plot(time[::50], ts2, color=sns.color_palette("Greens")[5], label="Time series 2",  lw=3.)
118 |     plt.ylabel('Space')
119 |     plt.xlabel('Time')
120 |     plt.legend(fontsize=17, loc="lower right")
121 | 
122 |     stiffness = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1]
123 |     penalty = [0, .25, .5, .75, 1.0]
124 | 
125 |     distance = list()
126 |     for s in stiffness:
127 |         for p in penalty:
128 |             # calculate distances
129 |             dict_distances = {
130 |                     "elastic": {"Time Warp Edit Distance": {"multivariate": "no",
131 |                                                             "description": "",
132 |                                                             "function": "twed",
133 |                                                             "parameters": {"nu": s, "lmbda": p, "p": 2, "time": "true"},
134 |                                                             "use": "yes"}}}
135 | 
136 |             distance.append({'stiffness': s,
137 |                              'penalty': p,
138 |                              'distance': time_series_distance(dict_distances,
139 |                                                               ts1, ts2,
140 |                                                               time, time[::50]).values[0][0]})
141 | 
142 |     df = pd.DataFrame(distance)
143 |     df_pivot = df.pivot("stiffness", "penalty", "distance")
144 | 
145 |     plt.figure()
146 |     sns.heatmap(df_pivot, annot=True, cbar_kws={'label': "TWED"}, cmap="viridis")
147 | 
148 | 
149 | .. image:: https://i.postimg.cc/tJp6nWkd/twed-time-series-originals.png
150 |   :alt: Two example series
151 | 
152 | .. image:: https://i.postimg.cc/bryGw8Y3/twed-heatmap.png
153 |   :alt: Resulting TWED distances
154 | 
155 | 
156 | .. [1] M. Vlachos, G. Kollios and D. Gunopulos, "Discovering similar multidimensional trajectories," Proceedings 18th International Conference on Data Engineering, 2002, pp. 673-684, doi: 10.1109/ICDE.2002.994784.
157 | 
158 | .. [2] P. Marteau, "Time Warp Edit Distance with Stiffness Adjustment for Time Series Matching," in IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 31, no. 2, pp. 306-318, Feb. 2009, doi: 10.1109/TPAMI.2008.76.
159 | 
160 | .. [3] Joan Serrà, Josep Ll. Arcos, An empirical evaluation of similarity measures for time series classification, Knowledge-Based Systems, Volume 67, 2014, Pages 305-314, ISSN 0950-7051, https://doi.org/10.1016/j.knosys.2014.04.035.
161 | 
162 | 
163 | 
164 | 
165 | 


--------------------------------------------------------------------------------
/tssearch/distances/elastic_distances.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from tssearch.distances.elastic_utils import (
  4 |     cost_matrix,
  5 |     accumulated_cost_matrix,
  6 |     acc_initialization,
  7 |     lcss_accumulated_matrix,
  8 |     lcss_path,
  9 |     lcss_score,
 10 |     traceback_adj,
 11 |     backtracking,
 12 | )
 13 | 
 14 | 
 15 | def dtw(x, y, weight=None, **kwargs):
 16 |     """Computes Dynamic Time Warping (DTW) of two time series.
 17 | 
 18 |     Parameters
 19 |     ----------
 20 |     x: nd-array
 21 |         Time series x (query).
 22 |     y: nd-array
 23 |         Time series y.
 24 |     dist: function
 25 |         The distance used as a local cost measure. None defaults to the squared euclidean distance.
 26 |     \**kwargs:
 27 |     See below:
 28 | 
 29 |     * *do_sign_norm* (``bool``) --
 30 |       If ``True`` the signals will be normalized before computing the DTW,
 31 |       (default: ``False``)
 32 | 
 33 |     * *do_dist_norm* (``bool``) --
 34 |       If ``True`` the DTW distance will be normalized by dividing the summation of the path dimension.
 35 |       (default: ``True``)
 36 | 
 37 |     * *window* (``String``) --
 38 |       Selects the global window constrains. Available options are ``None`` and ``sakoe-chiba``.
 39 |       (default: ``None``)
 40 | 
 41 |     * *factor* (``Float``) --
 42 |       Selects the global constrain factor.
 43 |       (default: ``min(xl, yl) * .50``)
 44 | 
 45 | 
 46 |     Returns
 47 |     -------
 48 |     d: float
 49 |         The DTW distance.
 50 |     ac: nd-array
 51 |         The accumulated cost matrix.
 52 |     path: nd-array
 53 |         The optimal warping path between the two sequences.
 54 |     """
 55 | 
 56 |     xl, yl = len(x), len(y)
 57 | 
 58 |     alpha = kwargs.get("alpha", 1)
 59 |     do_dist_norm = kwargs.get("dist_norm", True)
 60 |     window = kwargs.get("window", None)
 61 |     factor = kwargs.get("factor", np.min((xl, yl)) * 0.50)
 62 |     dtw_type = kwargs.get("dtw_type", "dtw")
 63 |     tolerance = kwargs.get("tolerance", 0)
 64 |     report = kwargs.get("report", "distance")
 65 | 
 66 |     # cost matrix
 67 |     c = cost_matrix(x, y, alpha, weight=weight)
 68 |     # Acc cost matrix
 69 |     ac = accumulated_cost_matrix(c, window=window, factor=factor, dtw_type=dtw_type, tolerance=tolerance)
 70 | 
 71 |     # Distance
 72 |     if report == "cost_matrix":
 73 |         return ac
 74 |     elif report == "search":
 75 |         d = ac[-1, :]
 76 |         return d, ac
 77 |     elif report == "path":
 78 |         path = traceback_adj(ac)
 79 |         return path
 80 |     else:  # report = "distance" default
 81 |         d = ac[-1, -1] / xl if do_dist_norm else ac[-1, -1]
 82 |         return d
 83 | 
 84 | 
 85 | def lcss(x, y, eps=1, **kwargs):
 86 |     """Computes the Longest Common Subsequence (LCSS) distance between two numeric time series.
 87 | 
 88 |     Parameters
 89 |     ----------
 90 |     x: nd-array
 91 |         Time series x (query).
 92 |     y: nd-array
 93 |         Time series y.
 94 |     eps : float
 95 |         Amplitude matching threshold.
 96 |     \**kwargs:
 97 |     See below:
 98 | 
 99 |     * *window* (``String``) --
100 |       Selects the global window constrains. Available options are ``None`` and ``sakoe-chiba``.
101 |       (default: ``None``)
102 | 
103 |     Returns
104 |     -------
105 |     d: float
106 |         The LCSS distance.
107 |     ac: nd-array
108 |         The similarity matrix.
109 |     path: nd-array
110 |         The optimal path between the two sequences.
111 |     """
112 | 
113 |     window = kwargs.get("window", None)
114 |     report = kwargs.get("report", "distance")
115 | 
116 |     dim = len(np.shape(x))  # tem de dar erro se forem inseridas duas TS com dims diferentes
117 |     if dim == 1:
118 |         x = x.reshape(-1, 1)
119 |         y = y.reshape(-1, 1)
120 | 
121 |     ac = lcss_accumulated_matrix(x, y, eps=eps)
122 |     path = lcss_path(x, y, ac, eps=eps)
123 |     sim_score = lcss_score(ac)
124 | 
125 |     if report == "cost_matrix":
126 |         return ac
127 |     elif report == "search":
128 |         return sim_score, ac
129 |     elif report == "path":
130 |         return path
131 |     else:
132 |         return sim_score
133 | 
134 | 
135 | def dlp(x, y, p=2):
136 |     """Computes Lp norm distance between two time series.
137 | 
138 |     Parameters
139 |     ----------
140 |     x: nd-array
141 |         Time series x (query).
142 |     y: nd-array
143 |         Time series y.
144 |     p: int
145 |         Lp norm distance degree for local cost computation.
146 | 
147 |     Returns
148 |     -------
149 |         The Lp distance.
150 |     """
151 | 
152 |     cost = np.sum(np.power(np.abs(x - y), p))
153 |     return np.power(cost, 1 / p)
154 | 
155 | 
156 | def twed(x, y, tx, ty, nu=0.001, lmbda=1.0, p=2, report="distance"):
157 |     """Computes Time Warp Edit Distance (TWED) of two time series.
158 | 
159 |     Reference :
160 |        Marteau, P.; F. (2009). "Time Warp Edit Distance with Stiffness Adjustment for Time Series Matching".
161 |        IEEE Transactions on Pattern Analysis and Machine Intelligence. 31 (2): 306–318. arXiv:cs/0703033
162 |        http://people.irisa.fr/Pierre-Francois.Marteau/
163 | 
164 |     Parameters
165 |     ----------
166 |     x: nd-array
167 |         Time series x (query).
168 |     y: nd-array
169 |         Time series y.
170 |     tx: nd-array
171 |         Time stamp time series x.
172 |     ty: nd-array
173 |         Time stamp time series y.
174 |     nu: int
175 |         Stiffness parameter (nu >= 0)
176 |             nu = 0, TWED distance measure on amplitude.
177 |             nu > 0, TWED distance measure on amplitude x time.
178 |     lmbda: int
179 |         Penalty for deletion operation (lmbda >= 0).
180 |     p: int
181 |         Lp norm distance degree for local cost computation.
182 |     report: str
183 |         distance, cost matrix, path.
184 | 
185 |     Returns
186 |     -------
187 |     d: float
188 |         The TWED distance.
189 |     ac: nd-array
190 |         The accumulated cost matrix.
191 |     path: nd-array
192 |         The optimal warping path between the two sequences.
193 |     """
194 | 
195 |     # Check if input arguments
196 |     if len(x) != len(tx):
197 |         print("The length of x is not equal length of tx")
198 |         return None, None
199 | 
200 |     if len(y) != len(ty):
201 |         print("The length of y is not equal length of ty")
202 |         return None, None
203 | 
204 |     if nu < 0:
205 |         print("nu is negative")
206 |         return None, None
207 | 
208 |     # Dynamical programming
209 |     ac = acc_initialization(len(x), len(y), report)
210 | 
211 |     # Add padding
212 |     query = np.array([0] + list(x))
213 |     tq = np.array([0] + list(tx))
214 |     sequence = np.array([0] + list(y))
215 |     ts = np.array([0] + list(ty))
216 | 
217 |     n = len(query)
218 |     m = len(sequence)
219 | 
220 |     # Compute minimal cost
221 |     for i in range(1, n):
222 |         for j in range(1, m):
223 |             # Calculate and save cost of various operations
224 |             C = np.ones((3, 1)) * np.inf
225 |             # Deletion in A
226 |             C[0] = ac[i - 1, j] + dlp(query[i - 1], query[i], p) + nu * (tq[i] - tq[i - 1]) + lmbda
227 |             # Deletion in B
228 |             C[1] = ac[i, j - 1] + dlp(sequence[j - 1], sequence[j], p) + nu * (ts[j] - ts[j - 1]) + lmbda
229 |             # Keep data points in both time series
230 |             C[2] = (
231 |                 ac[i - 1, j - 1]
232 |                 + dlp(query[i], sequence[j], p)
233 |                 + dlp(query[i - 1], sequence[j - 1], p)
234 |                 + nu * (abs(tq[i] - ts[j]) + abs(tq[i - 1] - ts[j - 1]))
235 |             )
236 |             # Choose the operation with the minimal cost and update c Matrix
237 |             ac[i, j] = np.min(C)
238 | 
239 |     if report == "cost_matrix":
240 |         return ac
241 |     elif report == "search":
242 |         d = ac[n - 1, :]
243 |         return d, ac
244 |     elif report == "path":
245 |         path = backtracking(ac)
246 |         return path
247 |     else:  # report = 'search'
248 |         return ac[n - 1, m - 1]
249 | 


--------------------------------------------------------------------------------
/tssearch/distances/lockstep_distances.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy import stats
  3 | from scipy.spatial import distance
  4 | from tssearch.utils.preprocessing import interpolation
  5 | from tssearch.distances.lockstep_utils import _lnorm_multidimensional, _lnorm_unidimensional
  6 | 
  7 | 
  8 | def euclidean_distance(x, y, weight=None):
  9 |     """Computes the Euclidean distance between two time series.
 10 | 
 11 |     If the time series do not have the same length, an interpolation is performed.
 12 | 
 13 |     Parameters
 14 |     ----------
 15 |     x : nd-array
 16 |         Time series x.
 17 |     y : nd-array
 18 |         Time series y.
 19 |     weight: nd-array (Default: None)
 20 |         query weight values.
 21 | 
 22 |     Returns
 23 |     -------
 24 |     float
 25 |         Euclidean distance value.
 26 | 
 27 |     """
 28 |     p = 2
 29 | 
 30 |     if len(x) != len(y):
 31 |         x, y = interpolation(x, y)
 32 | 
 33 |     if weight is None:
 34 |         ed = np.linalg.norm(x - y, p)
 35 |     else:
 36 |         if len(np.shape(x)) > 1:
 37 |             distance = _lnorm_multidimensional(x, y, weight, p=p)
 38 |         else:
 39 |             distance = _lnorm_unidimensional(x, y, weight, p=p)
 40 |         ed = np.sum(distance)
 41 |     return ed
 42 | 
 43 | 
 44 | def minkowski_distance(x, y, weight=None, p=3):
 45 |     """Computes the Minkowski distance between two time series.
 46 | 
 47 |     If the time series do not have the same length, an interpolation is performed.
 48 | 
 49 |     Parameters
 50 |     ----------
 51 |     x : nd-array
 52 |         Time series x.
 53 |     y : nd-array
 54 |         Time series y.
 55 |     weight: nd-array (Default: None)
 56 |         query weight values.
 57 |     p: int
 58 |         Lp norm distance degree.
 59 | 
 60 |     Returns
 61 |     -------
 62 |     float
 63 |         Minkowski distance value.
 64 | 
 65 |     """
 66 |     if len(x) != len(y):
 67 |         x, y = interpolation(x, y)
 68 | 
 69 |     if weight is None and (p < 3 or p == np.inf):
 70 |         distance = np.linalg.norm(x - y, p)
 71 |     else:
 72 |         if weight is None:
 73 |             weight = np.ones_like(x)
 74 |         if len(np.shape(x)) > 1:
 75 |             distance = _lnorm_multidimensional(x, y, weight, p=p)
 76 |         else:
 77 |             distance = _lnorm_unidimensional(x, y, weight, p=p)
 78 |         distance = np.sum(distance)
 79 | 
 80 |     return distance
 81 | 
 82 | 
 83 | def manhattan_distance(x, y, weight=None):
 84 |     """Computes the Manhattan distance between two time series.
 85 | 
 86 |     If the time series do not have the same length, an interpolation is performed.
 87 | 
 88 |     Parameters
 89 |     ----------
 90 |     x : nd-array
 91 |         Time series x.
 92 |     y : nd-array
 93 |         Time series y.
 94 |     weight: nd-array (Default: None)
 95 |         query weight values.
 96 | 
 97 |     Returns
 98 |     -------
 99 |     float
100 |         Manhattan distance value.
101 | 
102 |     """
103 |     p = 1
104 | 
105 |     if len(x) != len(y):
106 |         x, y = interpolation(x, y)
107 | 
108 |     if weight is None:
109 |         distance = np.linalg.norm(x - y, p)
110 |     else:
111 |         if len(np.shape(x)) > 1:
112 |             distance = _lnorm_multidimensional(x, y, weight, p=p)
113 |         else:
114 |             distance = _lnorm_unidimensional(x, y, weight, p=p)
115 |         distance = np.sum(distance)
116 | 
117 |     return distance
118 | 
119 | 
120 | def chebyshev_distance(x, y, weight=None):
121 |     """Computes the Chebyshev distance between two time series.
122 | 
123 |     If the time series do not have the same length, an interpolation is performed.
124 | 
125 |     Parameters
126 |     ----------
127 |     x : nd-array
128 |         Time series x.
129 |     y : nd-array
130 |         Time series y.
131 |     weight: nd-array (Default: None)
132 |         query weight values.
133 | 
134 |     Returns
135 |     -------
136 |     float
137 |         Chebyshev distance value.
138 | 
139 |     """
140 |     p = np.inf
141 | 
142 |     if len(x) != len(y):
143 |         x, y = interpolation(x, y)
144 | 
145 |     if weight is None:
146 |         d = np.linalg.norm(x - y, p)
147 |     else:
148 |         if len(np.shape(x)) > 1:
149 |             distance = _lnorm_multidimensional(x, y, weight, p=p)
150 |         else:
151 |             distance = _lnorm_unidimensional(x, y, weight, p=p)
152 |         d = np.sum(distance)
153 |     return d
154 | 
155 | 
156 | def correlation_distance(x, y, weight=None):
157 |     """Computes the correlation distance between two time series.
158 | 
159 |     If the time series do not have the same length, an interpolation is performed.
160 | 
161 |     Parameters
162 |     ----------
163 |     x : nd-array
164 |         Time series x.
165 |     y : nd-array
166 |         Time series y.
167 |     weight: nd-array (Default: None)
168 |         query weight values.
169 | 
170 |     Returns
171 |     -------
172 |     float
173 |         Correlation distance value.
174 | 
175 |     """
176 |     if len(x) != len(y):
177 |         x, y = interpolation(x, y)
178 | 
179 |     correlation_d = distance.correlation(x, y, weight)
180 | 
181 |     return correlation_d
182 | 
183 | 
184 | def pearson_correlation(x, y, beta=None):
185 |     """Computes the Pearson correlation between two time series.
186 | 
187 |     If the time series do not have the same length, an interpolation is performed.
188 | 
189 |     Parameters
190 |     ----------
191 |     x : nd-array
192 |         Time series x.
193 |     y : nd-array
194 |         Time series y.
195 |     beta: float
196 |         Beta coefficient.
197 | 
198 |     Returns
199 |     -------
200 |     float
201 |         Pearson correlation value.
202 | 
203 |     """
204 |     if len(x) != len(y):
205 |         x, y = interpolation(x, y)
206 | 
207 |     r, p = stats.pearsonr(x, y)
208 | 
209 |     if beta is None:
210 |         d = 2 * (1 - r)
211 |     else:
212 |         d = ((1 - r) / (1 + r)) ** beta
213 |     return d
214 | 
215 | 
216 | def short_time_series_distance(x, y, tx=None, ty=None):
217 |     """Computes the short time series distance (STS) between two time series.
218 | 
219 |     Reference: Möller-Levet, C. S., Klawonn, F., Cho, K., and Wolkenhauer, O. (2003).
220 | 
221 |     Parameters
222 |     ----------
223 |     x : nd-array
224 |         Time series x.
225 |     y : nd-array
226 |         Time series y.
227 |     tx : nd-array
228 |         Sampling index of time series x.
229 |     ty : nd-array
230 |         Sampling index of time series y.
231 | 
232 |     Returns
233 |     -------
234 |     float
235 |         Short time series distance value.
236 | 
237 |     """
238 |     if len(x) != len(y):
239 |         x, y = interpolation(x, y)
240 | 
241 |     if tx is None:
242 |         tx = np.arange(len(x))
243 |     if ty is None:
244 |         ty = np.arange(len(y))
245 | 
246 |     sts = np.sqrt(np.sum((np.diff(y) / np.diff(tx) - np.diff(x) / np.diff(ty)) ** 2))
247 | 
248 |     return sts
249 | 
250 | 
251 | def braycurtis_distance(x, y, weight=None):
252 |     """Computes the Braycurtis distance between two time series.
253 | 
254 |     If the time series do not have the same length, an interpolation is performed.
255 | 
256 |     Parameters
257 |     ----------
258 |     x : nd-array
259 |         Time series x.
260 |     y : nd-array
261 |         Time series y.
262 |     weight: nd-array (Default: None)
263 |         query weight values.
264 | 
265 |     Returns
266 |     -------
267 |     float
268 |         Braycurtis distance value.
269 | 
270 |     """
271 |     if len(x) != len(y):
272 |         x, y = interpolation(x, y)
273 | 
274 |     braycurtis_d = distance.braycurtis(x, y, weight)
275 | 
276 |     return braycurtis_d
277 | 
278 | 
279 | def canberra_distance(x, y, weight=None):
280 |     """Computes the Canberra distance between two time series.
281 | 
282 |     If the time series do not have the same length, an interpolation is performed.
283 | 
284 |     Parameters
285 |     ----------
286 |     x : nd-array
287 |         Time series x.
288 |     y : nd-array
289 |         Time series y.
290 |     weight: nd-array (Default: None)
291 |         query weight values.
292 | 
293 |     Returns
294 |     -------
295 |     float
296 |         Canberra distance value.
297 | 
298 |     """
299 |     if len(x) != len(y):
300 |         x, y = interpolation(x, y)
301 | 
302 |     canberra_d = distance.canberra(x, y, weight)
303 | 
304 |     return canberra_d
305 | 
306 | 
307 | def cosine_distance(x, y, weight=None):
308 |     """Computes the cosine distance between two time series.
309 | 
310 |     If the time series do not have the same length, an interpolation is performed.
311 | 
312 |     Parameters
313 |     ----------
314 |     x : nd-array
315 |         Time series x.
316 |     y : nd-array
317 |         Time series y.
318 |     weight: nd-array (Default: None)
319 |         query weight values.
320 | 
321 |     Returns
322 |     -------
323 |     float
324 |         Cosine distance value.
325 | 
326 |     """
327 |     if len(x) != len(y):
328 |         x, y = interpolation(x, y)
329 | 
330 |     cosine_d = distance.cosine(x, y, weight)
331 | 
332 |     return cosine_d
333 | 
334 | 
335 | def mahalanobis_distance(x, y, weight=None):
336 |     """Computes the Mahalanobis distance between two time series.
337 | 
338 |     If the time series do not have the same length, an interpolation is performed.
339 | 
340 |     Parameters
341 |     ----------
342 |     x : nd-array
343 |         Time series x.
344 |     y : nd-array
345 |         Time series y.
346 |     weight: nd-array (Default: None)
347 |         query weight values.
348 | 
349 |     Returns
350 |     -------
351 |     float
352 |         Mahalanobis distance value.
353 | 
354 |     """
355 |     if len(x) != len(y):
356 |         x, y = interpolation(x, y)
357 | 
358 |     mahalanobis_d = distance.mahalanobis(x, y, weight)
359 | 
360 |     return mahalanobis_d
361 | 
362 | 
363 | def sqeuclidean_distance(x, y, weight=None):
364 |     """Computes the squared Euclidean distance between two time series.
365 | 
366 |     If the time series do not have the same length, an interpolation is performed.
367 | 
368 |     Parameters
369 |     ----------
370 |     x : nd-array
371 |         Time series x.
372 |     y : nd-array
373 |         Time series y.
374 |     weight: nd-array (Default: None)
375 |         query weight values.
376 | 
377 |     Returns
378 |     -------
379 |     float
380 |         Squared Euclidean distance value.
381 | 
382 |     """
383 |     if len(x) != len(y):
384 |         x, y = interpolation(x, y)
385 | 
386 |     sqeuclidean_d = distance.sqeuclidean(x, y, weight)
387 | 
388 |     return sqeuclidean_d
389 | 
390 | 
391 | def hamming_distance(x, y, weight=None):
392 |     """Computes the Hamming distance between two time series.
393 | 
394 |     If the time series do not have the same length, an interpolation is performed.
395 | 
396 |     Parameters
397 |     ----------
398 |     x : nd-array
399 |         Time series x.
400 |     y : nd-array
401 |         Time series y.
402 |     weight: nd-array (Default: None)
403 |         query weight values.
404 | 
405 |     Returns
406 |     -------
407 |     float
408 |         Hamming distance value.
409 | 
410 |     """
411 |     if len(x) != len(y):
412 |         x, y = interpolation(x, y)
413 | 
414 |     hamming_d = distance.hamming(x, y, weight)
415 | 
416 |     return hamming_d
417 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | ##########################################################################################
  3 | #                                                                                        #
  4 | #                       Pre-commit configuration file                                    #
  5 | #                                                                                        #
  6 | #                                                                                        #
  7 | # See https://pre-commit.com for more information                                        #
  8 | # See https://pre-commit.com/hooks.html for more hooks                                   #
  9 | #                                                                                        #
 10 | # To install the git pre-commit hook run:                                                #
 11 | #   pre-commit install                                                                   #
 12 | #   pre-commit autoupdate                                                                #
 13 | # To update the pre-commit hooks run:                                                    #
 14 | #   pre-commit install --install-hooks -t pre-commit -t commit-msg                       #
 15 | # To run all hooks against current changes in your repository                            #
 16 | #   pre-commit run --all-files                                                           #
 17 | # If you wish to execute an individual hook use pre-commit run <hook_id>. Example:       #
 18 | #   pre-commit run black                                                                 #
 19 | #                                                                                        #
 20 | ##########################################################################################
 21 | default_language_version:
 22 |     python: python3
 23 | default_stages: [commit, push]
 24 | fail_fast: false
 25 | repos:
 26 |     - repo: https://github.com/pre-commit/pre-commit-hooks
 27 |       rev: v4.5.0
 28 |       hooks:
 29 |           - id: fix-byte-order-marker
 30 |             name: fix-byte-order-marker
 31 |             description: removes UTF-8 byte order marker
 32 |           - id: trailing-whitespace
 33 |             name: trailing-whitespace
 34 |             description: Trims trailing whitespace
 35 |             args: [--markdown-linebreak-ext=md]
 36 |           - id: end-of-file-fixer
 37 |             name: end-of-file-fixer
 38 |             description: Makes sure files end in a newline and only a newline
 39 |           - id: check-json
 40 |             name: check-json
 41 |             description: Attempts to load all json files to verify syntax
 42 |           - id: check-toml
 43 |             name: check-toml
 44 |             description: Attempts to load all TOML files to verify syntax
 45 |           - id: check-symlinks
 46 |             name: check-symlinks
 47 |             description: Checks for symlinks which do not point to anything
 48 |           - id: check-added-large-files
 49 |             name: check-added-large-files
 50 |             description: Prevent files larger than 1 MB from being committed
 51 |             args: [ "--maxkb=1024", '--enforce-all' ]
 52 |           - id: check-case-conflict
 53 |             name: check-case-conflict
 54 |             description: Check for files with names that would conflict on a case-insensitive filesystem like MacOS HFS+ or Windows FAT
 55 |           - id: end-of-file-fixer
 56 |             name: end-of-file-fixer
 57 |             description: Makes sure files end in a newline and only a newline
 58 |           - id: mixed-line-ending
 59 |             name: mixed-line-ending
 60 |             description: Replaces or checks mixed line ending
 61 |           - id: check-ast
 62 |             name: check-ast
 63 |             description: Simply check whether files parse as valid python
 64 |           - id: debug-statements
 65 |             name: debug-statements
 66 |             description: Check for debugger imports and py37+ breakpoint() calls in python source
 67 |           - id: detect-aws-credentials
 68 |             name: detect-aws-credentials
 69 |             description: Checks for the existence of AWS/Minio secrets that you have set up
 70 |             args: [--allow-missing-credentials]
 71 |           - id: detect-private-key
 72 |             name: detect-private-key
 73 |             description: Checks for the existence of private keys.
 74 |           - id: requirements-txt-fixer
 75 |             name: requirements-txt-fixer
 76 |             description: Sorts entries in requirements.txt and removes incorrect entries
 77 |           #- id: no-commit-to-branch
 78 |           #  name: no-commit-to-master-branch
 79 |           #  description: Prevent commits to master/main branch
 80 |           #  language: python
 81 |           #  args: ["-b", master, "-b", main]
 82 |           #  pass_filenames: false
 83 |           - id: check-merge-conflict
 84 |             name: check-merge-conflict
 85 |             description: Check for files that contain merge conflict strings
 86 |     - repo: https://github.com/pre-commit/pygrep-hooks
 87 |       rev: v1.10.0
 88 |       hooks:
 89 |           - id: python-check-mock-methods
 90 |             name: check-mock-methods
 91 |             description: Prevent common mistakes of assert mck.not_called(), assert mck.called_once_with(...) and mck.assert_called.
 92 |           - id: python-use-type-annotations
 93 |             name: python-use-type-annotations
 94 |             description: Enforce that python3.6+ type annotations are used instead of type comments
 95 |           - id: python-check-blanket-noqa
 96 |             name: python-check-blanket-noqa
 97 |             description: Enforce that noqa annotations always occur with specific codes.
 98 |           # - id: python-no-eval
 99 |           #   name: python-no-eval
100 |           #   description: A quick check for the eval() built-in function
101 |     # - repo: https://github.com/pre-commit/mirrors-mypy
102 |     #   rev: v1.5.1
103 |     #   hooks:
104 |     #       - id: mypy
105 |     #         name: mypy - static type checker for Python
106 |     #         description: Static type checker for Python
107 |     #         files: ^src/
108 |     #         exclude: ^tests/
109 |     #         args: [--ignore-missing-imports]
110 |     #         additional_dependencies: [types-all]
111 |     # not working really well
112 |     # - repo: https://github.com/asottile/yesqa
113 |     #   rev: v1.4.0
114 |     #   hooks:
115 |     #       - id: yesqa
116 |     #         name: yesqa - remove unnecessary `# noqa` comments
117 |     #         description: Automatically remove unnecessary `# noqa` comments
118 |     - repo: https://github.com/asottile/add-trailing-comma
119 |       rev: v3.1.0
120 |       hooks:
121 |           - id: add-trailing-comma
122 |             name: add-trailing-comma
123 |             description: Automatically add trailing commas to calls and literals.
124 |     - repo: https://github.com/PyCQA/flake8
125 |       rev: 7.0.0
126 |       hooks:
127 |           - id: flake8
128 |             name: flake8 - check the style and quality of some python code
129 |             description: Python tool that glues together pycodestyle, pyflakes, mccabe, and third-party plugins to check the style and quality of some python code
130 |             additional_dependencies:
131 |                 - flake8-bugbear
132 |                 # - flake8-variables-names
133 |                 # - pep8-naming
134 |                 # - flake8-print
135 |                 - flake8-quotes
136 |                 - flake8-broken-line
137 |                 - flake8-comprehensions
138 |                 - flake8-spellcheck # ignored by now
139 |                 - flake8-eradicate
140 |                 #- flake8-walrus==1.1.0
141 |                 - flake8-typing-imports==1.12.0
142 |                 #- flake8-match==1.0.0
143 |     - repo: https://github.com/psf/black
144 |       rev: 24.1.1
145 |       hooks:
146 |           - id: black
147 |             name: black - consistent Python code formatting
148 |             description: The uncompromising Python code formatter
149 |     - repo: https://github.com/PyCQA/isort
150 |       rev: 5.13.2
151 |       hooks:
152 |           - id: isort
153 |             name: isort - sort Python imports
154 |             description: Library to sort imports
155 |     - repo: https://github.com/asottile/pyupgrade
156 |       rev: v3.15.0
157 |       hooks:
158 |           - id: pyupgrade
159 |             name: pyupgrade - upgrade syntax for newer versions of the language
160 |             description: Automatically upgrade syntax for newer versions of the language
161 |             args: [--py36-plus]
162 |     # - repo: https://github.com/jendrikseipp/vulture
163 |     #   rev: v2.9.1
164 |     #   hooks:
165 |     #       - id: vulture
166 |     #         name: vulture - finds unused code in Python programs
167 |     #         description: Finds unused code in Python programs
168 | ##########################################################################################
169 | # Notebooks
170 | ##########################################################################################
171 |     - repo: https://github.com/nbQA-dev/nbQA
172 |       rev: 1.7.1
173 |       hooks:
174 | #          - id: nbqa-flake8
175 | #            name: nbqa-flake8 - Python linting (notebooks)
176 | #            additional_dependencies: [flake8]
177 |           #- id: nbqa-mypy
178 |           #  name: nbqa-mypy - Static type checker for Python (notebooks)
179 |           #  additional_dependencies: [mypy]
180 |           #  args: [--ignore-missing-imports]
181 |           - id: nbqa-isort
182 |             name: nbqa-isort - Sort Python imports (notebooks)
183 |             additional_dependencies: [isort]
184 |           - id: nbqa-pyupgrade
185 |             name: nbqa-pyupgrade - Upgrade syntax for newer versions of Python (notebooks)
186 |             additional_dependencies: [pyupgrade]
187 |             args: [--py36-plus]
188 |           - id: nbqa-black
189 |             name: nbqa-black - consistent Python code formatting (notebooks)
190 |             additional_dependencies: [black]
191 | #          - id: nbqa-pydocstyle
192 | #            additional_dependencies: [pydocstyle, toml==0.10.2]
193 |     - repo: https://github.com/kynan/nbstripout
194 |       rev: 0.6.1
195 |       hooks:
196 |           - id: nbstripout
197 |             name: nbstripout - strip outputs from notebooks
198 |             description: Strip output from Jupyter and IPython notebooks
199 |             args:
200 |               - --extra-keys
201 |               - "metadata.colab metadata.kernelspec cell.metadata.colab cell.metadata.executionInfo cell.metadata.id cell.metadata.outputId"
202 |             files: .ipynb
203 | ##########################################################################################
204 | # Shell Scripting
205 | ##########################################################################################
206 |     - repo: local
207 |       hooks:
208 |           - id: shellcheck
209 |             name: shellcheck - static analysis tool for shell scripts
210 |             description: A static analysis tool for shell scripts
211 |             language: script
212 |             entry: scripts/shellcheck.sh
213 |             types: [shell]
214 |             args: [-e, SC1091]
215 |     - repo: https://github.com/lovesegfault/beautysh
216 |       rev: v6.2.1
217 |       hooks:
218 |           - id: beautysh
219 |             name: beautysh - Autoformat shell scripts
220 |             description: Autoformat shell scripts
221 | ##########################################################################################
222 | # Tests
223 | ##########################################################################################
224 |     - repo: local
225 |       hooks:
226 |           - id: pytest
227 |             name: pytest
228 |             description: Run pytest
229 |             entry: pytest -sv test
230 |             language: system
231 |             always_run: true
232 |             types: [python]
233 |             stages: [push]
234 |             pass_filenames: false
235 | ##########################################################################################
236 | # Security
237 | ##########################################################################################
238 | #    - repo: local
239 | #      hooks:
240 | #          - id: safety
241 | #            name: safety
242 | #            description: Analyze your Python requirements for known security vulnerabilities
243 | #            entry: safety check --short-report -r
244 | #            language: system
245 | #            files: requirements/*.txt
246 |     - repo:  https://github.com/PyCQA/bandit
247 |       rev: 1.7.7
248 |       hooks:
249 |           - id: bandit
250 |             args: ["-c", "pyproject.toml"]
251 |             additional_dependencies: [".[toml]"]
252 |     # - repo: https://github.com/PyCQA/bandit
253 |     #   rev: 1.7.5
254 |     #   hooks:
255 |     #       - id: bandit
256 |     #         name: bandit - find common security issues in Python code.
257 |     #         description: Tool designed to find common security issues in Python code
258 |     #         args: ["-c", "pyproject.toml"]
259 |     #         additional_dependencies: [toml==0.10.2]
260 | ##########################################################################################
261 | # Git
262 | ##########################################################################################
263 |     # - repo: https://github.com/commitizen-tools/commitizen
264 |     #   rev: 3.6.0
265 |     #   hooks:
266 |     #       - id: commitizen
267 |     #         stages: [commit-msg]
268 |     #         additional_dependencies: [git+https://bitbucket.fraunhofer.pt/scm/is2020/mlops-commit-drafter.git]
269 | ##########################################################################################
270 | # Documentation
271 | ##########################################################################################
272 |     - repo: https://github.com/executablebooks/mdformat
273 |       rev: 0.7.17
274 |       hooks:
275 |           - id: mdformat
276 |             name: mdformat - Markdown formatter that can be used to enforce a consistent style in Markdown files
277 |             description: Markdown formatter that can be used to enforce a consistent style in Markdown files
278 |             additional_dependencies:
279 |               - mdformat-black
280 |               - mdformat-beautysh
281 |             exclude: CHANGELOG.md
282 |     - repo: https://github.com/myint/docformatter
283 |       rev: v1.7.5
284 |       hooks:
285 |           - id: docformatter
286 |             name: docformatter - formats docstrings to follow PEP 257
287 |             description: Formats docstrings to follow PEP 257
288 |             args: [--in-place]
289 |     - repo: https://github.com/terrencepreilly/darglint
290 |       rev: v1.8.1
291 |       hooks:
292 |           - id: darglint
293 |             name: darglint - Python documentation linter
294 |             description: A python documentation linter which checks that the docstring description matches the definition.
295 |             args: ["-z", long]
296 | #    - repo: https://github.com/econchick/interrogate
297 | #      rev: 1.5.0
298 | #      hooks:
299 | #          - id: interrogate
300 | #            name: interrogate - interrogate a codebase for docstring coverage
301 | #            description: Interrogate a codebase for docstring coverage
302 | # WIP
303 | #    - repo: https://github.com/PyCQA/prospector
304 | #      rev: 1.5.3.1
305 | #      hooks:
306 | #          - id: prospector
307 | ##########################################################################################
308 | # DVC
309 | ##########################################################################################
310 | # https://dvc.org/doc/command-reference/install#--use-pre-commit-tool
311 | #    - repo: https://github.com/iterative/dvc
312 | #      hooks:
313 | #         - id: dvc-pre-commit
314 | #           language_version: python3
315 | #           stages:
316 | #           - commit
317 | #         - id: dvc-pre-push
318 | #           # use s3/gs/etc instead of all to only install specific cloud support
319 | #           additional_dependencies: ['.[all]']
320 | #           language_version: python3
321 | #           stages:
322 | #           - push
323 | #           - always_run: true
324 | #         - id: dvc-post-checkout
325 | #           language_version: python3
326 | #           stages:
327 | #           - post-checkout
328 | ##########################################################################################
329 | # Docker
330 | ##########################################################################################
331 |     - repo: local
332 |       hooks:
333 |           - id: hadolint
334 |             name: hadolint - Lint Dockerfile for errors and enforce best practices
335 |             description: Lint Dockerfile for errors and enforce best practices
336 |             language: script
337 |             entry: scripts/hadolint.sh
338 |             files: Dockerfile
339 | 


--------------------------------------------------------------------------------
/tssearch/distances/elastic_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from numba import njit, prange
  3 | from tssearch.utils.preprocessing import standardization
  4 | 
  5 | 
  6 | @njit(parallel=True, fastmath=True)
  7 | def _cost_matrix(x, y):
  8 |     """
  9 | 
 10 |     Parameters
 11 |     ----------
 12 |     x: nd-array
 13 |         Time series x (query).
 14 |     y: nd-array
 15 |         Time series y.
 16 | 
 17 |     Returns
 18 |     -------
 19 |     c: nd-array
 20 |         The cost matrix.
 21 |     """
 22 |     l1 = x.shape[0]
 23 |     l2 = y.shape[0]
 24 |     c = np.zeros((l1, l2), dtype=np.float32)
 25 | 
 26 |     for i in prange(l1):
 27 |         for j in prange(l2):
 28 |             c[i, j] = (x[i] - y[j]) ** 2
 29 | 
 30 |     return c
 31 | 
 32 | 
 33 | @njit(parallel=True, fastmath=True)
 34 | def _multidimensional_cost_matrix(x, y, weight):
 35 |     """Helper function for fast computation of cost matrix in cost_matrix_diff_vec.
 36 |     Defined outside to prevent recompilation from numba
 37 | 
 38 |     Parameters
 39 |     ----------
 40 |     x: nd-array
 41 |         Time series x (query).
 42 |     y: nd-array
 43 |         Time series y.
 44 | 
 45 |     Returns
 46 |     -------
 47 |     c: nd-array
 48 |         The cost matrix.
 49 |     """
 50 |     l1 = x.shape[0]
 51 |     l2 = y.shape[0]
 52 |     l3 = x.shape[1]
 53 |     c = np.zeros((l1, l2), dtype=np.float32)
 54 | 
 55 |     for i in prange(l1):
 56 |         for j in prange(l2):
 57 |             dist = 0.0
 58 |             for di in range(l3):
 59 |                 diff = x[i, di] - y[j, di]
 60 |                 dist += weight[i, di] * (diff * diff)
 61 |             c[i, j] = dist ** 0.5
 62 | 
 63 |     return c
 64 | 
 65 | 
 66 | @njit(nogil=True, fastmath=True)
 67 | def _accumulated_cost_matrix(ac):
 68 |     """Fast computation of accumulated cost matrix using cost matrix.
 69 | 
 70 |     Parameters
 71 |     ----------
 72 |     ac: nd-array
 73 |         Given cost matrix c, ac = acc_initialization(...), ac[1:, 1:] = c.
 74 | 
 75 |     Returns
 76 |     -------
 77 |         The accumulated cost matrix.
 78 |     """
 79 |     for i in range(ac.shape[0] - 1):
 80 |         for j in range(ac.shape[1] - 1):
 81 |             ac[i + 1, j + 1] += min(ac[i, j + 1], ac[i + 1, j], ac[i, j])
 82 |     return ac
 83 | 
 84 | 
 85 | def acc_initialization(x, y, _type, tolerance=0):
 86 |     """Initializes the cost matrix according to the dtw type.
 87 | 
 88 |     Parameters
 89 |     ----------
 90 |     x: nd-array
 91 |         Time series x (query).
 92 |     y: nd-array
 93 |         Time series y.
 94 |     _type: string
 95 |         Name of dtw type.
 96 |     tolerance: int
 97 |         Tolerance value.
 98 | 
 99 |     Returns
100 |     -------
101 |     ac: nd-array
102 |         The accumulated cost matrix.
103 |     """
104 |     ac = np.zeros((x + 1, y + 1))
105 |     if _type == "dtw":
106 |         ac[0, 1:] = np.inf
107 |         ac[1:, 0] = np.inf
108 |     elif _type == "oe-dtw":
109 |         ac[0, 1:] = np.inf
110 |         ac[1:, 0] = np.inf
111 |     elif _type == "obe-dtw" or _type == "sub-dtw" or _type == "search":
112 |         ac[1:, 0] = np.inf
113 |     elif _type == "psi-dtw":
114 |         ac[0, tolerance + 1 :] = np.inf
115 |         ac[tolerance + 1 :, 0] = np.inf
116 |     else:
117 |         ac[0, 1:] = np.inf
118 |         ac[1:, 0] = np.inf
119 | 
120 |     return ac
121 | 
122 | 
123 | def cost_matrix(x, y, alpha=1, weight=None):
124 |     """Computes cost matrix using a specified distance (dist) between two time series.
125 | 
126 |     Parameters
127 |     ----------
128 |     x: nd-array
129 |         Time series x (query).
130 |     y: nd-array
131 |         Time series y.
132 |     dist: function
133 |         The distance used as a local cost measure. None defaults to the squared euclidean distance.
134 | 
135 |     \**kwargs:
136 |     See below:
137 | 
138 |     * *do_sign_norm* (``bool``) --
139 |       If ``True`` the signals will be normalized before computing the DTW,
140 |       (default: ``False``)
141 | 
142 |     * *do_dist_norm* (``bool``) --
143 |       If ``True`` the DTW distance will be normalized by dividing the summation of the path dimension.
144 |       (default: ``True``)
145 | 
146 |     * *window* (``String``) --
147 |       Selects the global window constrains. Available options are ``None`` and ``sakoe-chiba``.
148 |       (default: ``None``)
149 | 
150 |     * *factor* (``Float``) --
151 |       Selects the global constrain factor.
152 |       (default: ``min(xl, yl) * .50``)
153 | 
154 |     Returns
155 |     -------
156 |     c: nd-array
157 |         The cost matrix.
158 |     """
159 |     if weight is None:
160 |         weight = np.ones_like(x)
161 | 
162 |     if len(np.shape(weight)) == 1:
163 |         weight = weight.reshape(-1, 1)
164 | 
165 |     if alpha == 1:
166 |         C_d = 1
167 |         if len(np.shape(x)) == 1:
168 |             C_n = (_cost_matrix(x, y) * weight) / np.max(weight)
169 |         else:
170 |             C_n = _multidimensional_cost_matrix(x, y, weight)
171 |     else:
172 |         # standardization parameters
173 |         abs_norm = np.mean(x, axis=0), np.std(x, axis=0)
174 |         diff_norm = np.mean(np.diff(x, axis=0), axis=0), np.std(np.diff(x, axis=0), axis=0)
175 | 
176 |         # Derivative calculation and standardization
177 |         _x = standardization(np.diff(x, axis=0), param=diff_norm)
178 |         _y = standardization(np.diff(y, axis=0), param=diff_norm)
179 |         # same length of derivative
180 |         x = standardization(x[:-1], param=abs_norm)
181 |         y = standardization(y[:-1], param=abs_norm)
182 | 
183 |         weight = weight[:-1]
184 | 
185 |         if len(np.shape(x)) == 1:
186 |             C_d = _cost_matrix(_x, _y) * weight
187 |             C_n = _cost_matrix(x, y) * weight
188 |         else:
189 |             C_d = _multidimensional_cost_matrix(_x, _y, weight)
190 |             C_n = _multidimensional_cost_matrix(x, y, weight)
191 | 
192 |     c = alpha * C_n + (1 - alpha) * C_d
193 | 
194 |     return c
195 | 
196 | 
197 | def accumulated_cost_matrix(c, **kwargs):
198 |     """
199 | 
200 |     Parameters
201 |     ----------
202 |     c: nd-array
203 |         The cost matrix.
204 | 
205 |     \**kwargs:
206 | 
207 |     Returns
208 |     -------
209 |     ac: nd-array
210 |         The accumulated cost matrix.
211 |     """
212 |     xl, yl = np.shape(c)
213 | 
214 |     window = kwargs.get("window", None)
215 |     factor = kwargs.get("factor", np.min((xl, yl)) * 0.50)
216 |     dtw_type = kwargs.get("dtw_type", "dtw")
217 |     tolerance = kwargs.get("tolerance", 0)
218 | 
219 |     if window == "sakoe-chiba":
220 |         c[np.abs(np.diff(np.indices(c.shape), axis=0))[0] > factor] = np.inf
221 | 
222 |     ac = acc_initialization(xl, yl, dtw_type, tolerance)
223 |     ac[1:, 1:] = c.copy()
224 |     ac = _accumulated_cost_matrix(ac)[1:, 1:]
225 | 
226 |     return ac
227 | 
228 | 
229 | @njit(nogil=True, fastmath=True)
230 | def traceback(ac):
231 |     """Computes the traceback path of the matrix c.
232 | 
233 |     Parameters
234 |     ----------
235 |     ac: nd-array
236 |         The accumulated cost matrix.
237 | 
238 |     Returns
239 |     -------
240 |         Coordinates p and q of the minimum path.
241 | 
242 |     """
243 | 
244 |     i, j = np.array(ac.shape) - 2
245 |     p, q = [i], [j]
246 |     while (i > 0) and (j > 0):
247 |         tb = 0
248 |         if ac[i, j + 1] < ac[i, j]:
249 |             tb = 1
250 |         if ac[i + 1, j] < ac[i, j + tb]:
251 |             tb = 2
252 |         if tb == 0:
253 |             i -= 1
254 |             j -= 1
255 |         elif tb == 1:
256 |             i -= 1
257 |         else:
258 |             j -= 1
259 |         p.insert(0, i)
260 |         q.insert(0, j)
261 |     while j > 0:
262 |         j -= 1
263 |         p.insert(0, i)
264 |         q.insert(0, j)
265 |     while i > 0:
266 |         i -= 1
267 |         p.insert(0, i)
268 |         q.insert(0, j)
269 | 
270 |     return np.array(p), np.array(q)
271 | 
272 | 
273 | @njit(nogil=True, fastmath=True)
274 | def traceback_adj(ac):
275 |     """Computes the adjusted traceback path of the matrix c.
276 | 
277 |     Parameters
278 |     ----------
279 |     ac: nd-array
280 |         The accumulated cost matrix.
281 | 
282 |     Returns
283 |     -------
284 |         Coordinates p and q of the minimum path adjusted.
285 | 
286 |     """
287 |     i, j = np.array(ac.shape) - 2
288 |     p, q = [i], [j]
289 |     while (i > 0) and (j > 0):
290 |         tb = 0
291 |         if ac[i, j + 1] < ac[i, j]:
292 |             tb = 1
293 |         if ac[i + 1, j] < ac[i, j + tb]:
294 |             tb = 2
295 |         if tb == 0:
296 |             i -= 1
297 |             j -= 1
298 |         elif tb == 1:
299 |             i -= 1
300 |         else:  # (tb == 2):
301 |             j -= 1
302 |         p.insert(0, i)
303 |         q.insert(0, j)
304 |     while i > 0:
305 |         i -= 1
306 |         p.insert(0, i)
307 |         q.insert(0, j)
308 |     return np.array(p), np.array(q)
309 | 
310 | 
311 | def backtracking(ac):
312 |     """Compute the most cost-efficient path.
313 | 
314 |     Parameters
315 |     ----------
316 |     ac: nd-array
317 |         The accumulated cost matrix.
318 | 
319 |     Returns
320 |     -------
321 |          Coordinates of the most cost-efficient path.
322 |     """
323 |     x = np.shape(ac)
324 |     i = x[0] - 1
325 |     j = x[1] - 1
326 | 
327 |     # The indices of the paths are save in opposite direction
328 |     # path = np.ones((i + j, 2 )) * np.inf;
329 |     best_path = []
330 | 
331 |     steps = 0
332 |     while i != 0 or j != 0:
333 | 
334 |         best_path.append((i - 1, j - 1))
335 | 
336 |         C = np.ones((3, 1)) * np.inf
337 | 
338 |         # Keep data points in both time series
339 |         C[0] = ac[i - 1, j - 1]
340 |         # Deletion in A
341 |         C[1] = ac[i - 1, j]
342 |         # Deletion in B
343 |         C[2] = ac[i, j - 1]
344 | 
345 |         # Find the index for the lowest cost
346 |         idx = np.argmin(C)
347 | 
348 |         if idx == 0:
349 |             # Keep data points in both time series
350 |             i = i - 1
351 |             j = j - 1
352 |         elif idx == 1:
353 |             # Deletion in A
354 |             i = i - 1
355 |             j = j
356 |         else:
357 |             # Deletion in B
358 |             i = i
359 |             j = j - 1
360 |         steps = steps + 1
361 | 
362 |     best_path.append((i - 1, j - 1))
363 | 
364 |     best_path.reverse()
365 |     best_path = np.array(best_path[1:])
366 | 
367 |     return best_path[:, 0], best_path[:, 1]
368 | 
369 | 
370 | # DTW SW
371 | def dtw_sw(x, y, winlen, alpha=0.5, **kwargs):
372 |     """Computes Dynamic Time Warping (DTW) of two time series using a sliding window.
373 |     TODO: Check if this needs to be speed up.
374 | 
375 |     Parameters
376 |     ----------
377 |     x: nd-array
378 |         Time series x (query).
379 |     y: nd-array
380 |         Time series y.
381 |     winlen: int
382 |         The sliding window length.
383 |     alpha: float
384 |         A factor between 0 and 1 which weights the amplitude and derivative contributions.
385 |         A higher value will favor amplitude and a lower value will favor the first derivative.
386 | 
387 |     \**kwargs:
388 |         See below:
389 | 
390 |         * *do_sign_norm* (``bool``) --
391 |           If ``True`` the signals will be normalized before computing the DTW,
392 |           (default: ``False``)
393 | 
394 |         * *do_dist_norm* (``bool``) --
395 |           If ``True`` the DTW distance will be normalized by dividing the summation of the path dimension.
396 |           (default: ``True``)
397 | 
398 |         * *window* (``String``) --
399 |           Selects the global window constrains. Available options are ``None`` and ``sakoe-chiba``.
400 |           (default: ``None``)
401 | 
402 |         * *factor* (``Float``) --
403 |           Selects the global constrain factor.
404 |           (default: ``min(xl, yl) * .50``)
405 | 
406 | 
407 |     Returns
408 |     -------
409 |     d: float
410 |         The SW-DTW distance.
411 |     c: nd-array
412 |         The local cost matrix.
413 |     ac: nd-array
414 |         The accumulated cost matrix.
415 |     path: nd-array
416 |         The optimal warping path between the two sequences.
417 | 
418 |     """
419 |     xl, yl = len(x), len(y)
420 | 
421 |     do_sign_norm = kwargs.get("normalize", False)
422 |     do_dist_norm = kwargs.get("dist_norm", True)
423 |     window = kwargs.get("window", None)
424 |     factor = kwargs.get("factor", np.min((xl, yl)) * 0.50)
425 | 
426 |     if do_sign_norm:
427 |         x, y = standardization(x), standardization(y)
428 | 
429 |     ac = np.zeros((xl + 1, yl + 1))
430 |     ac[0, 1:] = np.inf
431 |     ac[1:, 0] = np.inf
432 |     tmp_ac = ac[1:, 1:]
433 | 
434 |     nx = get_mirror(x, winlen)
435 |     ny = get_mirror(y, winlen)
436 | 
437 |     dnx = np.diff(nx)
438 |     dny = np.diff(ny)
439 | 
440 |     nx = nx[:-1]
441 |     ny = ny[:-1]
442 | 
443 |     # Workaround to deal with even window sizes
444 |     if winlen % 2 == 0:
445 |         winlen -= 1
446 | 
447 |     swindow = np.hamming(winlen)
448 |     swindow = swindow / np.sum(swindow)
449 | 
450 |     for i in range(xl):
451 |         for j in range(yl):
452 |             pad_i, pad_j = i + winlen, j + winlen
453 |             # No window selected
454 |             if window is None:
455 |                 tmp_ac[i, j] = sliding_dist(
456 |                     nx[pad_i - (winlen // 2) : pad_i + (winlen // 2) + 1],
457 |                     ny[pad_j - (winlen // 2) : pad_j + (winlen // 2) + 1],
458 |                     dnx[pad_i - (winlen // 2) : pad_i + (winlen // 2) + 1],
459 |                     dny[pad_j - (winlen // 2) : pad_j + (winlen // 2) + 1],
460 |                     alpha,
461 |                     swindow,
462 |                 )
463 | 
464 |             # Sakoe-Chiba band
465 |             elif window == "sakoe-chiba":
466 |                 if abs(i - j) < factor:
467 |                     tmp_ac[i, j] = sliding_dist(
468 |                         nx[pad_i - (winlen // 2) : pad_i + (winlen // 2) + 1],
469 |                         ny[pad_j - (winlen // 2) : pad_j + (winlen // 2) + 1],
470 |                         dnx[pad_i - (winlen // 2) : pad_i + (winlen // 2) + 1],
471 |                         dny[pad_j - (winlen // 2) : pad_j + (winlen // 2) + 1],
472 |                         alpha,
473 |                         swindow,
474 |                     )
475 |                 else:
476 |                     tmp_ac[i, j] = np.inf
477 | 
478 |             # As last resource, the complete window is calculated
479 |             else:
480 |                 tmp_ac[i, j] = sliding_dist(
481 |                     nx[pad_i - (winlen / 2) : pad_i + (winlen / 2) + 1],
482 |                     ny[pad_j - (winlen / 2) : pad_j + (winlen / 2) + 1],
483 |                     dnx[pad_i - (winlen / 2) : pad_i + (winlen / 2) + 1],
484 |                     dny[pad_j - (winlen / 2) : pad_j + (winlen / 2) + 1],
485 |                     alpha,
486 |                     swindow,
487 |                 )
488 | 
489 |     c = tmp_ac.copy()
490 | 
491 |     for i in range(xl):
492 |         for j in range(yl):
493 |             tmp_ac[i, j] += min([ac[i, j], ac[i, j + 1], ac[i + 1, j]])
494 | 
495 |     path = traceback(ac)
496 | 
497 |     if do_dist_norm:
498 |         d = ac[-1, -1] / np.sum(np.shape(path))
499 |     else:
500 |         d = ac[-1, -1]
501 | 
502 |     return d, c, ac, path
503 | 
504 | 
505 | def sliding_dist(xw, yw, dxw, dyw, alpha, win):
506 |     """Computes the sliding distance.
507 | 
508 |     Parameters
509 |     ----------
510 |     xw: nd-array
511 |         x coords window.
512 |     yw: nd-array
513 |         y coords window.
514 |     dxw: nd-array
515 |         x coords diff window.
516 |     dyw: nd-array
517 |         y coords diff window.
518 |     alpha: float
519 |         Rely more on absolute or difference values 1- abs, 0 - diff.
520 |     win: nd-array
521 |         Signal window used for sliding distance.
522 | 
523 |     Returns
524 |     -------
525 |         Sliding distance
526 |     """
527 |     return (1 - alpha) * np.sqrt(np.sum((((dxw - dyw) * win) ** 2.0))) + alpha * np.sqrt(
528 |         np.sum((((xw - yw) * win) ** 2.0))
529 |     )
530 | 
531 | 
532 | def get_mirror(s, ws):
533 |     """Performs a signal windowing based on a double inversion from the start and end segments.
534 | 
535 |     Parameters
536 |     ----------
537 |     s: nd-array
538 |             the input-signal.
539 |     ws: int
540 |             window size.
541 | 
542 |     Returns
543 |     -------
544 |         Signal windowed
545 |     """
546 | 
547 |     return np.r_[2 * s[0] - s[ws:0:-1], s, 2 * s[-1] - s[-2 : -ws - 2 : -1]]
548 | 
549 | 
550 | @njit()
551 | def _lcss_point_dist(x, y):
552 |     """
553 | 
554 |     Parameters
555 |     ----------
556 |     x: nd-array
557 |         Time series x (query).
558 |     y: nd-array
559 |         Time series y.
560 | 
561 |     Returns
562 |     -------
563 |         The LCSS distance.
564 |     """
565 |     dist = 0.0
566 |     for di in range(x.shape[0]):
567 |         diff = x[di] - y[di]
568 |         dist += diff * diff
569 | 
570 |     return dist ** 0.5
571 | 
572 | 
573 | def lcss_accumulated_matrix(x, y, eps):
574 |     """Computes the LCSS cost matrix using the euclidean distance (dist) between two time series.
575 | 
576 |     Parameters
577 |     ----------
578 |     x: nd-array
579 |         Time series x (query).
580 |     y: nd-array
581 |         Time series y.
582 |     eps : float
583 |         Amplitude matching threshold.
584 | 
585 |     Returns
586 |     -------
587 |     ac : nd-array
588 |             The accumulated cost matrix.
589 |     """
590 | 
591 |     xl, yl = len(x), len(y)
592 | 
593 |     ac = np.zeros((xl + 1, yl + 1))
594 | 
595 |     for i in range(1, xl + 1):
596 |         for j in range(1, yl + 1):
597 |             if _lcss_point_dist(x[i - 1, :], y[j - 1, :]) <= eps:
598 |                 ac[i, j] = 1 + ac[i - 1, j - 1]
599 |             else:
600 |                 ac[i, j] = max(ac[i, j - 1], ac[i - 1, j])
601 | 
602 |     return ac
603 | 
604 | 
605 | def lcss_path(x, y, c, eps):
606 |     """Computes the LCSS path between two time series.
607 | 
608 |     Parameters
609 |     ----------
610 |     x: nd-array
611 |         The reference signal.
612 |     y: nd-array
613 |         The estimated signal.
614 |     c : nd-array
615 |         The cost matrix.
616 |     eps : float
617 |         Matching threshold.
618 | 
619 |     Returns
620 |     -------
621 |         Coordinates of the minimum LCSS path.
622 |     """
623 |     i, j = len(x), len(y)
624 |     path = []
625 | 
626 |     while i > 0 and j > 0:
627 |         if _lcss_point_dist(x[i - 1, :], y[j - 1, :]) <= eps:
628 |             path.append((i - 1, j - 1))
629 |             i -= 1
630 |             j -= 1
631 |         elif c[i - 1, j] > c[i, j - 1]:
632 |             i -= 1
633 |         else:
634 |             j -= 1
635 | 
636 |     path = np.array(path[::-1])
637 |     return path[1:, 0], path[1:, 1]
638 | 
639 | 
640 | def lcss_score(c):
641 |     """Computes the LCSS similarity score between two time series.
642 | 
643 |     Parameters
644 |     ----------
645 |     c : nd-array
646 |         The cost matrix.
647 | 
648 |     Returns
649 |     -------
650 |         The LCSS score.
651 |     """
652 | 
653 |     xl = c.shape[0] - 1
654 |     yl = c.shape[1] - 1
655 | 
656 |     return float(c[-1, -1]) / min([xl, yl])
657 | 


--------------------------------------------------------------------------------