├── __init__.py ├── requirements ├── requirements-dev.txt ├── requirements-docs.txt └── requirements.txt ├── setup.cfg ├── docs ├── authors.rst ├── changelog.rst ├── imgs │ ├── logo.jpg │ └── fhp_logo.png ├── license.rst ├── _templates │ └── module_functions_template.rst ├── descriptions │ ├── modules.rst │ ├── lockstep_distances.rst │ ├── modules │ │ ├── tssearch_search.rst │ │ └── tssearch_distances.rst │ ├── segmentation_search.rst │ └── elastic_distances.rst ├── index.rst └── conf.py ├── tssearch ├── examples │ ├── __init__.py │ ├── ecg.pickle │ └── ecg_example_data.py ├── __init__.py ├── search │ ├── __init__.py │ ├── segmentation.py │ ├── query_search.py │ └── search_utils.py ├── utils │ ├── __init__.py │ ├── preprocessing.py │ ├── distances_settings.py │ ├── add_personal_distance.py │ └── visualisation.py └── distances │ ├── __init__.py │ ├── lockstep_utils.py │ ├── time_distances.py │ ├── distances.json │ ├── compute_distance.py │ ├── elastic_distances.py │ ├── lockstep_distances.py │ └── elastic_utils.py ├── CHANGELOG.rst ├── .gitattributes ├── tests ├── test_segmentation.py ├── search_example.py ├── test_query_search.py ├── test_distances.py └── main_example.py ├── AUTHORS.rst ├── .flake8 ├── LICENSE.txt ├── setup.py ├── .gitignore ├── pyproject.toml ├── README.md └── .pre-commit-config.yaml /__init__.py: -------------------------------------------------------------------------------- 1 | from tssearch import * -------------------------------------------------------------------------------- /requirements/requirements-dev.txt: -------------------------------------------------------------------------------- 1 | pre-commit >= 3.7.1 2 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /docs/authors.rst: -------------------------------------------------------------------------------- 1 | .. _authors: 2 | .. include:: ../AUTHORS.rst 3 | -------------------------------------------------------------------------------- /docs/changelog.rst: -------------------------------------------------------------------------------- 1 | .. _changelog: 2 | .. include:: ../CHANGELOG.rst -------------------------------------------------------------------------------- /requirements/requirements-docs.txt: -------------------------------------------------------------------------------- 1 | jinja2 < 3.1.0 2 | Sphinx == 1.8.6 3 | -------------------------------------------------------------------------------- /tssearch/examples/__init__.py: -------------------------------------------------------------------------------- 1 | from tssearch.examples.ecg_example_data import * 2 | -------------------------------------------------------------------------------- /docs/imgs/logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fraunhoferportugal/tssearch/HEAD/docs/imgs/logo.jpg -------------------------------------------------------------------------------- /docs/imgs/fhp_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fraunhoferportugal/tssearch/HEAD/docs/imgs/fhp_logo.png -------------------------------------------------------------------------------- /docs/license.rst: -------------------------------------------------------------------------------- 1 | .. _license: 2 | 3 | ======= 4 | License 5 | ======= 6 | 7 | .. literalinclude:: ../LICENSE.txt -------------------------------------------------------------------------------- /tssearch/examples/ecg.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fraunhoferportugal/tssearch/HEAD/tssearch/examples/ecg.pickle -------------------------------------------------------------------------------- /tssearch/__init__.py: -------------------------------------------------------------------------------- 1 | from tssearch.utils import * 2 | from tssearch.search import * 3 | from tssearch.distances import * 4 | from tssearch.examples import * 5 | -------------------------------------------------------------------------------- /tssearch/search/__init__.py: -------------------------------------------------------------------------------- 1 | from tssearch.search.query_search import * 2 | from tssearch.search.segmentation import * 3 | from tssearch.search.search_utils import * 4 | -------------------------------------------------------------------------------- /tssearch/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from tssearch.utils.preprocessing import * 2 | from tssearch.utils.visualisation import * 3 | from tssearch.utils.distances_settings import * 4 | -------------------------------------------------------------------------------- /docs/_templates/module_functions_template.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: {{ fullname }} 2 | 3 | {% block functions %} 4 | 5 | .. autosummary:: 6 | {% for item in functions %} 7 | {{ item }} 8 | {%- endfor %} 9 | 10 | {% endblock %} 11 | -------------------------------------------------------------------------------- /docs/descriptions/modules.rst: -------------------------------------------------------------------------------- 1 | Module Reference 2 | ================ 3 | 4 | .. automodule:: tssearch 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | .. toctree:: 10 | 11 | modules/tssearch_search 12 | modules/tssearch_distances -------------------------------------------------------------------------------- /requirements/requirements.txt: -------------------------------------------------------------------------------- 1 | # (Pseudo) Automatically generated by https://github.com/damnever/pigar. 2 | h5py >= 3.6.0 3 | matplotlib >= 3.5.0 4 | numba >= 0.54.1 5 | numpy >= 1.20.3 6 | pandas >= 1.3.4 7 | scipy >= 1.7.1 8 | seaborn >= 0.11.2 9 | setuptools >= 47.1.1 10 | -------------------------------------------------------------------------------- /tssearch/examples/ecg_example_data.py: -------------------------------------------------------------------------------- 1 | import tssearch 2 | import pickle 3 | import numpy as np 4 | 5 | 6 | def load_ecg_example(): 7 | 8 | filename = tssearch.__path__[0] + "/examples/ecg.pickle" 9 | with open(filename, "rb") as handle: 10 | data = pickle.load(handle) 11 | 12 | return data 13 | -------------------------------------------------------------------------------- /tssearch/distances/__init__.py: -------------------------------------------------------------------------------- 1 | from tssearch.distances.lockstep_distances import * 2 | from tssearch.distances.elastic_distances import * 3 | from tssearch.distances.lockstep_utils import * 4 | from tssearch.distances.elastic_utils import * 5 | from tssearch.distances.time_distances import * 6 | from tssearch.distances.compute_distance import * 7 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | Changelog 3 | ========= 4 | 5 | Version 0.1.3 6 | ============= 7 | - Fixed a bug on the setup.py to correctly build the PyPI package 8 | - Removed novainstrumentation from dependencies 9 | - Fixed a bug on TWED distance (`#7 `_) 10 | 11 | Version 0.1.0 12 | ============= 13 | 14 | - Release of TSSEARCH with documentation 15 | -------------------------------------------------------------------------------- /docs/descriptions/lockstep_distances.rst: -------------------------------------------------------------------------------- 1 | ================== 2 | Lockstep Distances 3 | ================== 4 | 5 | Distance measures that compare the :math:`i`-th point of one time series to the :math:`i`-th point of another are named as lock-step measures (e.g., Euclidean distance and the other Lp norms). 6 | A linear interpolation is computed to the longer series to ensure it has the same length as the shorter series. 7 | 8 | -------------------------------------------------------------------------------- /docs/descriptions/modules/tssearch_search.rst: -------------------------------------------------------------------------------- 1 | Search 2 | ====== 3 | 4 | .. automodule:: tssearch.search 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | query_search 10 | ------------ 11 | 12 | .. automodule:: tssearch.search.query_search 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | segmentation 18 | ------------ 19 | 20 | .. automodule:: tssearch.search.segmentation 21 | :members: 22 | :undoc-members: 23 | :show-inheritance: 24 | 25 | utils 26 | ----- 27 | 28 | .. automodule:: tssearch.search.search_utils 29 | :members: 30 | :undoc-members: 31 | :show-inheritance: -------------------------------------------------------------------------------- /docs/descriptions/modules/tssearch_distances.rst: -------------------------------------------------------------------------------- 1 | Distances 2 | ========= 3 | 4 | .. automodule:: tssearch.distances 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | Lockstep Distances 10 | ------------------ 11 | 12 | .. automodule:: tssearch.distances.lockstep_distances 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | 18 | Elastic Distances 19 | ----------------- 20 | 21 | .. automodule:: tssearch.distances.elastic_distances 22 | :members: 23 | :undoc-members: 24 | :show-inheritance: 25 | 26 | 27 | Time Distances 28 | -------------- 29 | 30 | .. automodule:: tssearch.distances.time_distances 31 | :members: 32 | :undoc-members: 33 | :show-inheritance: -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Basic .gitattributes for a python repo. 2 | 3 | # Source files 4 | # ============ 5 | *.pxd text diff=python 6 | *.py text diff=python 7 | *.py3 text diff=python 8 | *.pyw text diff=python 9 | *.pyx text diff=python 10 | *.pyz text diff=python 11 | 12 | # Binary files 13 | # ============ 14 | *.db binary 15 | *.p binary 16 | *.pkl binary 17 | *.pickle binary 18 | *.pyc binary 19 | *.pyd binary 20 | *.pyo binary 21 | 22 | # Jupyter notebook 23 | *.ipynb text 24 | 25 | # Consider notebook files as support documentation 26 | *.ipynb linguist-documentation 27 | 28 | # Note: .db, .p, and .pkl files are associated 29 | # with the python modules ``pickle``, ``dbm.*``, 30 | # ``shelve``, ``marshal``, ``anydbm``, & ``bsddb`` 31 | # (among others). 32 | -------------------------------------------------------------------------------- /tests/test_segmentation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from tssearch import load_ecg_example, get_distance_dict, time_series_segmentation 4 | 5 | 6 | def segmentation(): 7 | data = load_ecg_example() 8 | cfg = get_distance_dict(["Dynamic Time Warping", "Euclidean Distance"]) 9 | out = time_series_segmentation(cfg, data["query"], data["sequence"], data["tq"], data["ts"], weight=data["weight"]) 10 | 11 | np.testing.assert_almost_equal( 12 | out["Dynamic Time Warping"], [7, 51, 120, 161, 210, 263, 318, 394, 444, 510, 584, 666, 740, 804, 878] 13 | ) 14 | 15 | np.testing.assert_almost_equal( 16 | out["Euclidean Distance"], [10, 58, 127, 193, 257, 320, 384, 452, 527, 596, 667, 736, 807] 17 | ) 18 | 19 | return out 20 | 21 | 22 | if __name__ == "__main__": 23 | out = segmentation() 24 | -------------------------------------------------------------------------------- /AUTHORS.rst: -------------------------------------------------------------------------------- 1 | Authors 2 | ========== 3 | This package is being developed and maintained by `Fraunhofer AICOS `_. 4 | 5 | .. image:: imgs/fhp_logo.png 6 | :align: center 7 | :scale: 25 % 8 | :alt: FhP-AICOS 9 | 10 | TSSEARCH was written in collaboration with `Cognitive Systems Lab of University of Bremen `_. 11 | 12 | Lead Development Team 13 | --------------------- 14 | 15 | - Duarte Folgado (`duarte.folgado@fraunhofer.pt `_) 16 | - Hugo Gamboa (`hugo.gamboa@fraunhofer.pt `_) 17 | - Marília Barandas (`marilia.barandas@fraunhofer.pt `_) 18 | - Maria Lua Nunes (`maria.nunes@fraunhofer.pt `_) 19 | - Margarida Antunes (`maria.antunes@fraunhofer.pt `_) 20 | 21 | 22 | Contributors 23 | ------------ 24 | - Hui Liu 25 | - Tanja Schultz 26 | - Yale Hartmann 27 | -------------------------------------------------------------------------------- /tests/search_example.py: -------------------------------------------------------------------------------- 1 | from tssearch import * 2 | 3 | 4 | if __name__ == "__main__": 5 | 6 | # Example of a sequence to search in 7 | t = np.arange(0, 20 * np.pi, 0.1) 8 | sequence = np.sin(t) 9 | 10 | # Example of a sequence to search for 11 | tq = t[:70] 12 | query = np.sin(tq) 13 | 14 | dict_distances = get_distances_by_type() 15 | 16 | result = time_series_search(dict_distances, query, sequence, tq, t, output=("number", 1)) 17 | 18 | plt.figure() 19 | plt.title("Dynamic Time Warping") 20 | plot_alignment(query, sequence, result["Dynamic Time Warping"]["path"][0]) 21 | 22 | plt.figure() 23 | plt.title("Longest Common Subsequence") 24 | plot_alignment(query, sequence, result["Longest Common Subsequence"]["path"][0]) 25 | 26 | plt.figure() 27 | plt.title("Time Warp Edit Distance") 28 | plot_alignment(query, sequence, result["Time Warp Edit Distance"]["path"][0]) 29 | 30 | plt.figure() 31 | plt.title("Euclidean Distance") 32 | start = result["Euclidean Distance"]["start"][0] 33 | end = result["Euclidean Distance"]["end"][0] 34 | path = [np.arange(len(query)), np.arange(start, end)] 35 | plot_alignment(query, sequence, path, hoffset=start) 36 | 37 | plt.show() 38 | -------------------------------------------------------------------------------- /tests/test_query_search.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from tssearch import load_ecg_example, get_distance_dict, time_series_search 4 | 5 | 6 | def query_search(): 7 | data = load_ecg_example() 8 | cfg = get_distance_dict(["Dynamic Time Warping", "Longest Common Subsequence", "Euclidean Distance"]) 9 | out = time_series_search(cfg, data["query"], data["sequence"], tq=data["tq"], ts=data["ts"], weight=data["weight"]) 10 | 11 | np.testing.assert_almost_equal(out["Dynamic Time Warping"]["path_dist"][0], 0.09974783) 12 | np.testing.assert_almost_equal(out["Dynamic Time Warping"]["start"][0], 445) 13 | np.testing.assert_almost_equal(out["Dynamic Time Warping"]["end"][0], 510) 14 | 15 | np.testing.assert_almost_equal(out["Longest Common Subsequence"]["path_dist"][0], 1.0) 16 | np.testing.assert_almost_equal(out["Longest Common Subsequence"]["start"][0], 844) 17 | np.testing.assert_almost_equal(out["Longest Common Subsequence"]["end"][0], 921) 18 | 19 | np.testing.assert_almost_equal(out["Euclidean Distance"]["path_dist"][0], 0.05480903) 20 | np.testing.assert_almost_equal(out["Euclidean Distance"]["start"][0], 596) 21 | np.testing.assert_almost_equal(out["Euclidean Distance"]["end"][0], 674) 22 | 23 | return out 24 | 25 | 26 | if __name__ == "__main__": 27 | out = query_search() 28 | -------------------------------------------------------------------------------- /tests/test_distances.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from tssearch import * 4 | 5 | 6 | def test_distances(dist): 7 | np.testing.assert_almost_equal(dist["Time Warp Edit Distance"], 223.85651832411503) 8 | np.testing.assert_almost_equal(dist["Dynamic Time Warping"], 0.2509773694532439) 9 | np.testing.assert_almost_equal(dist["Longest Common Subsequence"], 0.7774244833068362) 10 | np.testing.assert_almost_equal(dist["Time Alignment Measurement"], 1.492823) 11 | np.testing.assert_almost_equal(dist["Euclidean Distance"], 25.066280) 12 | # np.testing.assert_almost_equal(dist['Minkowski Distance'], ) 13 | np.testing.assert_almost_equal(dist["Chebyshev Distance"], 1.760120) 14 | np.testing.assert_almost_equal(dist["Cross Correlation Distance"], 1.0000008394102025) 15 | np.testing.assert_almost_equal(dist["Pearson Correlation Distance"], 2.000001678820405) 16 | np.testing.assert_almost_equal(dist["Short Time Series Distance"], 3.9573142706233573) 17 | 18 | 19 | if __name__ == "__main__": 20 | 21 | # Example of a sequence to search in 22 | t = np.arange(0, 20 * np.pi, 0.1) 23 | ts1 = np.sin(t) 24 | ts2 = np.sin(2 * t) 25 | 26 | dict_distances = get_distances_by_type() 27 | dist = time_series_distance(dict_distances, ts1, ts2, t, t) 28 | 29 | test_distances(dist.to_dict()["Distance"]) 30 | -------------------------------------------------------------------------------- /tssearch/distances/lockstep_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numba import njit, prange 3 | 4 | 5 | @njit(parallel=True, fastmath=True) 6 | def _lnorm_multidimensional(x, y, weight, p=2): 7 | """ 8 | 9 | Parameters 10 | ---------- 11 | x : nd-array 12 | Time series x. 13 | y : nd-array 14 | Time series y. 15 | weight: nd-array (Default: None) 16 | query weight values. 17 | p: int 18 | Lp norm distance degree. 19 | 20 | Returns 21 | ------- 22 | The Lp norm distance. 23 | """ 24 | l1 = x.shape[0] 25 | l3 = x.shape[1] 26 | 27 | distance = np.zeros_like(x, dtype=float) 28 | for i in prange(l1): 29 | dist = 0.0 30 | for di in range(l3): 31 | diff = x[i, di] - y[i, di] 32 | dist += weight[i, di] * (diff ** p) 33 | distance[i] = dist ** (1 / p) 34 | 35 | return distance 36 | 37 | 38 | def _lnorm_unidimensional(x, y, weight, p=2): 39 | """ 40 | 41 | Parameters 42 | ---------- 43 | x : nd-array 44 | Time series x. 45 | y : nd-array 46 | Time series y. 47 | weight: nd-array (Default: None) 48 | query weight values. 49 | p: int 50 | Lp norm distance degree. 51 | 52 | Returns 53 | ------- 54 | The Lp norm distance. 55 | """ 56 | distance = weight * np.power(np.power(np.abs(x - y), p), (1 / p)) 57 | 58 | return distance 59 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | # Unfortunately, flake8 does not support pyproject.toml configuration. 2 | # https://github.com/PyCQA/flake8/issues/234 3 | [flake8] 4 | per-file-ignores = 5 | __init__.py:F401 6 | show-source = True 7 | count= True 8 | statistics = True 9 | # https://www.flake8rules.com 10 | # E203 = Whitespace before ‘:' 11 | # E265 = comment blocks like @{ section, which it can't handle 12 | # E266 = too many leading '#' for block comment 13 | # E731 = do not assign a lambda expression, use a def 14 | # W293 = Blank line contains whitespace 15 | # W503 = Line break before binary operator 16 | # E704 = multiple statements in one line - used for @override 17 | # TC002 = move third party import to TYPE_CHECKING 18 | # ANN = flake8-annotations 19 | # TC, TC2 = flake8-type-checking 20 | # B = flake8-bugbear 21 | # S = flake8-bandit 22 | # D = flake8-docstrings 23 | # S = flake8-bandit 24 | # F are errors reported by pyflakes 25 | # E and W are warnings and errors reported by pycodestyle 26 | # C are violations reported by mccabe 27 | # BLK = flake8-black 28 | # DAR = darglint 29 | # SC = flake8-spellcheck 30 | ignore = E203, E211, E265, E501, E999, F401, F821, W503, W505, SC100, SC200, C400, C401, C402, B008, E800, E741, F403, F405, C901, B028, E226 31 | max-line-length = 120 32 | max-doc-length = 120 33 | import-order-style = google 34 | docstring-convention = google 35 | inline-quotes = " 36 | strictness=short 37 | dictionaries=en_US,python,technical,pandas 38 | min-python-version = 3.8.0 39 | exclude = .git,.tox,.nox,venv,.venv,.venv-docs,.venv-dev,.venv-note,.venv-dempy,docs,test 40 | max-complexity = 10 41 | #spellcheck-targets=comments 42 | -------------------------------------------------------------------------------- /tssearch/distances/time_distances.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from tssearch.distances.elastic_distances import dtw 4 | from tssearch.distances.elastic_utils import traceback 5 | 6 | 7 | def tam(x, y): 8 | """Calculates the Time Alignment Measurement (TAM) based on an optimal warping path 9 | between two time series. 10 | 11 | Reference: Folgado et. al, Time Alignment Measurement for Time Series, 2016. 12 | 13 | Parameters 14 | ---------- 15 | x : nd-array 16 | Time series x. 17 | y : nd-array 18 | Time series y. 19 | 20 | Returns 21 | ------- 22 | In case ``report=instants`` the number of indexes in advance, delay and phase 23 | will be returned. 24 | For ``report=ratios``, the ratio of advance, delay and phase. 25 | will be returned. In case ``report=distance``, only the TAM will be returned. 26 | 27 | """ 28 | ac = dtw(x, y, report="cost_matrix") 29 | 30 | path = traceback(ac) 31 | 32 | # Delay and advance counting 33 | delay = len(np.where(np.diff(path[0]) == 0)[0]) 34 | advance = len(np.where(np.diff(path[1]) == 0)[0]) 35 | 36 | # Phase counting 37 | incumbent = np.where((np.diff(path[0]) == 1) * (np.diff(path[1]) == 1))[0] 38 | phase = len(incumbent) 39 | 40 | # Estimated and reference time series duration. 41 | len_estimation = path[1][-1] 42 | len_ref = path[0][-1] 43 | 44 | p_advance = advance * 1.0 / len_ref 45 | p_delay = delay * 1.0 / len_estimation 46 | p_phase = phase * 1.0 / np.min([len_ref, len_estimation]) 47 | 48 | distance = p_advance + p_delay + (1 - p_phase) 49 | return distance 50 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2022, Associação Fraunhofer Portugal Research 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import setuptools 4 | 5 | ROOT = Path(__file__).parent 6 | 7 | with open("README.md") as fh: 8 | long_description = fh.read() 9 | 10 | 11 | def find_requirements(filename): 12 | with (ROOT / "requirements" / filename).open() as f: 13 | return [s for s in [line.strip(" \n") for line in f] if not s.startswith("#") and s != ""] 14 | 15 | 16 | install_requirements = find_requirements("requirements.txt") 17 | docs_requirements = find_requirements("requirements-docs.txt") 18 | 19 | setuptools.setup( 20 | name="tssearch", 21 | version="0.1.3", 22 | author="Fraunhofer Portugal", 23 | description="Library for time series subsequence search", 24 | long_description=long_description, 25 | long_description_content_type="text/markdown", 26 | download_url="https://github.com/fraunhoferportugal/tssearch/archive/refs/tags/v0.1.3.tar.gz", 27 | package_data={"tssearch": ["distances/distances.json", "examples/ecg.pickle"]}, 28 | packages=setuptools.find_packages(), 29 | classifiers=[ 30 | "License :: OSI Approved :: BSD License", 31 | "Operating System :: Microsoft :: Windows", 32 | "Operating System :: POSIX", 33 | "Operating System :: Unix", 34 | "Operating System :: MacOS", 35 | "Programming Language :: Python :: 3", 36 | "Programming Language :: Python :: 3.8", 37 | "Programming Language :: Python :: 3.9", 38 | "Programming Language :: Python :: 3.10", 39 | "Programming Language :: Python :: 3.11", 40 | "Programming Language :: Python :: 3.12", 41 | ], 42 | install_requires=install_requirements, 43 | extras_require={ 44 | "docs": docs_requirements, 45 | }, 46 | ) 47 | -------------------------------------------------------------------------------- /tssearch/search/segmentation.py: -------------------------------------------------------------------------------- 1 | from scipy.signal import find_peaks 2 | from tssearch.search.search_utils import lockstep_search, elastic_search 3 | 4 | 5 | def time_series_segmentation(dict_distances, query, sequence, tq=None, ts=None, weight=None): 6 | """ 7 | Time series segmentation locates the time instants between consecutive query repetitions on a more extended and 8 | repetitive sequence. 9 | 10 | Parameters 11 | ---------- 12 | dict_distances: dict 13 | Configuration file with distances 14 | query: nd-array 15 | Query time series. 16 | sequence: nd-array 17 | Sequence time series. 18 | tq: nd-array 19 | Time stamp time series query. 20 | ts: nd-array 21 | Time stamp time series sequence. 22 | weight: nd-array (Default: None) 23 | query weight values 24 | Returns 25 | ------- 26 | segment_results: dict 27 | Segmented time instants for each given distances 28 | """ 29 | 30 | l_query = len(query) 31 | segment_results = {} 32 | 33 | for d_type in dict_distances: 34 | for dist in dict_distances[d_type]: 35 | 36 | if "use" not in dict_distances[d_type][dist] or dict_distances[d_type][dist]["use"] == "yes": 37 | segment_results[dist] = {} 38 | if d_type == "lockstep": 39 | distance = lockstep_search(dict_distances[d_type][dist], query, sequence, weight) 40 | elif d_type == "elastic": 41 | distance, ac = elastic_search(dict_distances[d_type][dist], query, sequence, tq, ts, weight) 42 | else: 43 | print("WARNING") 44 | continue 45 | 46 | pks, _ = find_peaks(-distance, distance=l_query / 2) 47 | segment_results[dist] = pks 48 | 49 | return segment_results 50 | -------------------------------------------------------------------------------- /tssearch/utils/preprocessing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def standardization(signal, fit=False, param=None): 5 | """Normalizes a given signal by subtracting the mean and dividing by the standard deviation. 6 | 7 | Parameters 8 | ---------- 9 | signal : nd-array 10 | input signal 11 | 12 | Returns 13 | ------- 14 | nd-array 15 | standardized signal 16 | 17 | """ 18 | if param is not None: 19 | s_mean = param[0] 20 | s_std = param[1] 21 | else: 22 | s_mean = np.mean(signal, axis=0) 23 | s_std = np.std(signal, axis=0) 24 | 25 | if fit: 26 | d_mean = np.mean(np.diff(signal, axis=0), axis=0) 27 | d_std = np.std(np.diff(signal, axis=0), axis=0) 28 | return (signal - s_mean) / s_std, np.array([s_mean, s_std, d_mean, d_std]) 29 | else: 30 | return (signal - s_mean) / s_std 31 | 32 | 33 | def interpolation(x, y): 34 | """Computes the interpolation given two time series of different length. 35 | 36 | 37 | Parameters 38 | ---------- 39 | x : nd-array 40 | Time series x 41 | y : nd-array 42 | Time series y 43 | 44 | Returns 45 | ------- 46 | interp_signal (nd-array) 47 | Interpolated signal 48 | nd-array 49 | Time series 50 | 51 | """ 52 | 53 | lx = len(x) 54 | ly = len(y) 55 | if lx > ly: 56 | t_old = np.linspace(0, lx, ly) 57 | t_new = np.linspace(0, lx, lx) 58 | if len(np.shape(x)) == 1: 59 | y_new = np.interp(t_new, t_old, y) 60 | else: 61 | y_new = np.array([np.interp(t_new, t_old, y[:, ax]) for ax in range(np.shape(x)[1])]).T 62 | return x, y_new 63 | else: 64 | t_old = np.linspace(0, ly, lx) 65 | t_new = np.linspace(0, ly, ly) 66 | 67 | if len(np.shape(x)) == 1: 68 | x_new = np.interp(t_new, t_old, x) 69 | else: 70 | x_new = np.array([np.interp(t_new, t_old, x[:, ax]) for ax in range(np.shape(x)[1])]).T 71 | return x_new, y 72 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | Makefile 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # IPython 79 | profile_default/ 80 | ipython_config.py 81 | 82 | # pyenv 83 | .python-version 84 | 85 | # celery beat schedule file 86 | celerybeat-schedule 87 | 88 | # SageMath parsed files 89 | *.sage.py 90 | 91 | # Environments 92 | .env 93 | .venv 94 | env/ 95 | venv/ 96 | ENV/ 97 | env.bak/ 98 | venv.bak/ 99 | 100 | # Spyder project settings 101 | .spyderproject 102 | .spyproject 103 | 104 | # Rope project settings 105 | .ropeproject 106 | 107 | # mkdocs documentation 108 | /site 109 | 110 | # mypy 111 | .mypy_cache/ 112 | .dmypy.json 113 | dmypy.json 114 | 115 | # Pyre type checker 116 | .pyre/ 117 | .idea/ 118 | 119 | *.pdf 120 | 121 | # documentation 122 | docs/_twed.py 123 | docs/_lcss.py 124 | docs/_dtw.py 125 | -------------------------------------------------------------------------------- /tssearch/utils/distances_settings.py: -------------------------------------------------------------------------------- 1 | import json 2 | import tssearch 3 | 4 | 5 | def load_json(json_path): 6 | """Loads the json file given by filename. 7 | Parameters 8 | ---------- 9 | json_path : string 10 | Json path 11 | Returns 12 | ------- 13 | Dict 14 | Dictionary 15 | """ 16 | 17 | return json.load(open(json_path)) 18 | 19 | 20 | def get_distances_by_type(domain=None, json_path=None): 21 | """Creates a dictionary with the features settings by domain. 22 | Parameters 23 | ---------- 24 | domain : string 25 | Available domains: "statistical"; "spectral"; "temporal" 26 | If domain equals None, then the features settings from all domains are returned. 27 | json_path : string 28 | Directory of json file. Default: package features.json directory 29 | Returns 30 | ------- 31 | Dict 32 | Dictionary with the features settings 33 | """ 34 | 35 | if json_path is None: 36 | json_path = tssearch.__path__[0] + "/distances/distances.json" 37 | 38 | if domain not in ["elastic", "lockstep", "time", None]: 39 | raise SystemExit("No valid domain. Choose: lockstep, elastic, time or None (for all distances settings).") 40 | 41 | dict_features = load_json(json_path) 42 | if domain is None: 43 | return dict_features 44 | else: 45 | return {domain: dict_features[domain]} 46 | 47 | 48 | def get_distance_dict(dist_list): 49 | 50 | json_path = tssearch.__path__[0] + "/distances/distances.json" 51 | 52 | dict_features = load_json(json_path) 53 | 54 | select_distances = {} 55 | for d in dist_list: 56 | if d in dict_features["elastic"]: 57 | d_type = "elastic" 58 | elif d in dict_features["lockstep"]: 59 | d_type = "lockstep" 60 | elif d in dict_features["time"]: 61 | d_type = "time" 62 | else: 63 | continue 64 | 65 | if d_type not in select_distances: 66 | select_distances[d_type] = {} 67 | select_distances[d_type][d] = dict_features[d_type][d] 68 | 69 | return select_distances 70 | -------------------------------------------------------------------------------- /tests/main_example.py: -------------------------------------------------------------------------------- 1 | from tssearch import * 2 | 3 | import matplotlib.pyplot as plt 4 | 5 | 6 | if __name__ == "__main__": 7 | # time 8 | t = np.arange(0, 20 * np.pi, 0.1) 9 | 10 | # 1. 1D, sample, euclidean distance 11 | sequence = np.sin(t) 12 | query = np.sin(t[:70]) 13 | 14 | dict_distances = { 15 | "lockstep": {"Euclidean Distance": {"function": "euclidean_distance", "parameters": "", "use": "yes"}} 16 | } 17 | 18 | result1 = time_series_search(dict_distances, query, sequence, output=("number", 1)) 19 | 20 | plt.figure(1) 21 | start = result1["Euclidean Distance"]["start"][0] 22 | end = result1["Euclidean Distance"]["end"][0] 23 | path = [np.arange(len(query)), np.arange(start, end)] 24 | plot_alignment(query, sequence, path, hoffset=start) 25 | 26 | # 2. 3-axis, reference, sdtw, equal weight 3 axis fw = [1,1,1] 27 | sequence = np.array([np.sin(t), np.sin(2 * t), np.cos(t)]).T 28 | query = sequence[70:140] 29 | 30 | dict_distances = { 31 | "elastic": {"Dynamic Time Warping": {"function": "dtw", "parameters": {"dtw_type": "sub-dtw"}, "use": "yes"}} 32 | } 33 | 34 | result2 = time_series_search(dict_distances, query, sequence, output=("number", 1)) 35 | 36 | path = result2["Dynamic Time Warping"]["path"][0] 37 | plt.figure(2) 38 | plot_alignment(query[:, 1], sequence[:, 1], path, hoffset=path[1][0]) 39 | 40 | # 3. 3-axis, reference, sdtw, different axes weights 41 | # derivate and abs with different weight fw = [.7,.7,.7,.3,.3,.3] 42 | sequence = np.array([np.sin(t), np.sin(2 * t), np.cos(t)]).T 43 | query = np.array([np.sin(t[:70]), np.sin(2 * t[10:80]), np.cos(t[30:100])]).T 44 | weight = np.ones_like(query) 45 | weight[:, 2] = 0.5 46 | weight[:, 1] = 0.8 47 | 48 | dict_distances = { 49 | "elastic": {"Dynamic Time Warping": {"function": "dtw", "parameters": {"dtw_type": "sub-dtw"}, "use": "yes"}} 50 | } 51 | 52 | result3 = time_series_search(dict_distances, query, sequence, weight=weight, output=("number", 1)) 53 | 54 | path = result3["Dynamic Time Warping"]["path"][0] 55 | plt.figure(3) 56 | plot_alignment(query[:, 0], sequence[:, 0], path, hoffset=path[1][0]) 57 | 58 | # 4. with 4 points to be forced in time and amplitude qw = [10000010000...0011] 59 | 60 | # 5. Emulate gaussian process 61 | 62 | plt.show() 63 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. image:: imgs/logo.jpg 2 | :width: 70 % 3 | :alt: Logo 4 | :align: center 5 | 6 | | 7 | | 8 | 9 | Welcome to TSSEARCH documentation! 10 | ================================== 11 | 12 | Time Series Subsequence Search Python package (TSSEARCH for short) is a Python package that assists researchers in exploratory analysis for query search and time series segmentation without requiring significant programming effort. It contains curated routines for query and subsequence search. TSSEARCH installation is straightforward and goes along with startup code examples. Our goal is to provide the tools to get faster insights for your time series. 13 | 14 | Highlights 15 | ========== 16 | 17 | - **Search**: we provide methods for time series query search and segmentation 18 | - **Weights**: the relative contribution of each point of the query to the overall distance can be expressed using a user-defined weight vector 19 | - **Visualization**: we provide visualizations to present the results of the segmentation and query search 20 | - **Unit tested**: we provide unit tests for each distance 21 | - **Easily extended**: adding new distances is easy, and we encourage you to contribute with your custom distances or search methods 22 | 23 | Contents 24 | ======== 25 | 26 | In development 27 | 28 | .. toctree:: 29 | :maxdepth: 2 30 | 31 | Lockstep Distances 32 | Elastic Distances 33 | Segmentation and Search 34 | Module Reference 35 | Authors 36 | Changelog 37 | License 38 | 39 | Installation 40 | ============ 41 | 42 | This packages is available on PyPI: 43 | 44 | .. code:: bash 45 | 46 | $ pip install tssearch 47 | 48 | Get started 49 | =========== 50 | 51 | The code below segments a 10 s electrocardiography record: 52 | 53 | .. code:: python 54 | 55 | import tssearch 56 | 57 | # Load the query, (optional) weight vector and sequence 58 | data = tssearch.load_ecg_example() 59 | 60 | # Selects the Dynamic Time Warping (DTW) as the distance for the segmentation 61 | cfg = tssearch.get_distance_dict(["Dynamic Time Warping"]) 62 | 63 | # Performs the segmentation 64 | out = tssearch.time_series_segmentation(cfg, data['query'], data['sequence'], data['weight']) 65 | 66 | Indices and tables 67 | ================== 68 | 69 | * :ref:`genindex` 70 | * :ref:`modindex` 71 | * :ref:`search` 72 | -------------------------------------------------------------------------------- /tssearch/distances/distances.json: -------------------------------------------------------------------------------- 1 | { 2 | "elastic": { 3 | "Time Warp Edit Distance": { 4 | "multivariate": "no", 5 | "description": "", 6 | "function": "twed", 7 | "parameters": { 8 | "nu": 1e-3, 9 | "lmbda": 0, 10 | "p": 2, 11 | "time": "true" 12 | }, 13 | "use": "yes" 14 | }, 15 | "Dynamic Time Warping": { 16 | "multivariate": "yes", 17 | "description": "", 18 | "function": "dtw", 19 | "parameters": { 20 | "dtw_type": "dtw", 21 | "alpha": 1 22 | }, 23 | "use": "yes" 24 | }, 25 | "Longest Common Subsequence": { 26 | "multivariate": "yes", 27 | "description": "", 28 | "function": "lcss", 29 | "parameters": { 30 | "eps": 1, 31 | "report": "distance" 32 | }, 33 | "use": "yes" 34 | } 35 | }, 36 | "time": { 37 | "Time Alignment Measurement": { 38 | "multivariate": "yes", 39 | "description": "", 40 | "function": "tam", 41 | "parameters": "", 42 | "use": "yes" 43 | } 44 | }, 45 | "lockstep": { 46 | "Euclidean Distance": { 47 | "multivariate": "yes", 48 | "description": "", 49 | "function": "euclidean_distance", 50 | "parameters": "", 51 | "use": "yes" 52 | }, 53 | "Minkowski Distance": { 54 | "multivariate": "yes", 55 | "description": "", 56 | "function": "minkowski_distance", 57 | "parameters": 58 | {"p": 3}, 59 | "use": "yes" 60 | }, 61 | "Manhattan Distance": { 62 | "multivariate": "yes", 63 | "description": "", 64 | "function": "manhattan_distance", 65 | "parameters": "", 66 | "use": "yes" 67 | }, 68 | "Chebyshev Distance": { 69 | "multivariate": "yes", 70 | "description": "", 71 | "function": "chebyshev_distance", 72 | "parameters": "", 73 | "use": "yes" 74 | }, 75 | "Cross Correlation Distance": { 76 | "multivariate": "no", 77 | "description": "", 78 | "function": "correlation_distance", 79 | "parameters": "", 80 | "use": "yes" 81 | }, 82 | "Pearson Correlation Distance": { 83 | "multivariate": "no", 84 | "description": "", 85 | "function": "pearson_correlation", 86 | "parameters": "", 87 | "use": "yes" 88 | }, 89 | "Short Time Series Distance": { 90 | "multivariate": "no", 91 | "description": "", 92 | "function": "short_time_series_distance", 93 | "parameters": "", 94 | "use": "yes" 95 | } 96 | } 97 | } -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 120 3 | color = true 4 | target-version = ['py37', 'py38'] 5 | include = '\.pyi?$' 6 | exclude = ''' 7 | ( 8 | \.egg 9 | | \.eggs 10 | | \.git 11 | | \.hg 12 | | \.dvc 13 | | \.mypy_cache 14 | | \.pytest_cache 15 | | \.nox 16 | | \.tox 17 | | \.venv 18 | | \.venv-docs 19 | | \.venv-dev 20 | | \.venv-note 21 | | \.venv-dempy 22 | | _build 23 | | build 24 | | dist 25 | | setup.py 26 | ) 27 | ''' 28 | 29 | [tool.isort] 30 | # https://github.com/timothycrosley/isort 31 | py_version = 38 32 | profile = "black" 33 | multi_line_output = 3 34 | include_trailing_comma = true 35 | force_grid_wrap = 0 36 | use_parentheses = true 37 | line_length = 120 38 | skip_gitignore = true 39 | color_output = true 40 | #known_typing = ["typing", "types", "typing_extensions", "mypy", "mypy_extensions"] 41 | 42 | [tool.coverage.report] 43 | exclude_lines = [ 44 | "pragma: nocover", 45 | "raise NotImplementedError", 46 | "if __name__ == .__main__.:", 47 | "if TYPE_CHECKING:", 48 | "raise AssertionError", 49 | ] 50 | show_missing = true 51 | ignore_errors = true 52 | skip_covered = true 53 | #fail_under = 100 54 | #precision = 1 55 | omit = [ 56 | "test/*", 57 | ".venv*", 58 | ] 59 | 60 | # `pytest` configurations 61 | [tool.pytest.ini_options] 62 | minversion = "6.0" 63 | addopts = ["-vv", "--doctest-modules"] 64 | doctest_optionflags = "NORMALIZE_WHITESPACE" 65 | testpaths = ["test"] 66 | filterwarnings = ["ignore::DeprecationWarning"] 67 | 68 | [tool.mypy] 69 | # https://mypy.readthedocs.io/en/latest/config_file.html 70 | python_version = 3.8 71 | pretty = true 72 | show_traceback = true 73 | color_output = true 74 | warn_return_any = true 75 | warn_no_return = true 76 | warn_unused_configs = true 77 | warn_unused_ignores = true 78 | warn_redundant_casts = true 79 | warn_unreachable = true 80 | 81 | [tool.vulture] 82 | paths = ["src"] 83 | min_confidence = 65 84 | 85 | [tool.pydocstyle] 86 | convention = "google" 87 | #ignore = "D205,D415" 88 | 89 | [tool.interrogate] 90 | # https://github.com/econchick/interrogate#configuration 91 | ignore-init-method = true 92 | fail-under = 95 93 | color = true 94 | # possible values: 0 (minimal output), 1 (-v), 2 (-vv) 95 | verbose = 0 96 | quiet = false 97 | exclude = ["setup.py", "docs", "build"] 98 | 99 | [tool.nbqa.config] 100 | black = "pyproject.toml" 101 | isort = "pyproject.toml" 102 | 103 | [tool.nbqa.mutate] 104 | isort = 1 105 | black = 1 106 | pyupgrade = 1 107 | 108 | [tool.nbqa.addopts] 109 | pyupgrade = ["--py36-plus"] 110 | 111 | [tool.nbqa.files] 112 | isort = "^notebooks/" 113 | black = "^notebooks/" 114 | flake8 = "^notebooks/" 115 | mypy = "^notebooks/" 116 | pydocstyle = "^notebooks/" 117 | pyupgrade = "^notebooks/" 118 | 119 | [tool.bandit] 120 | targets = ["src"] 121 | # (optional) list included test IDs here, eg '[B101, B406]': 122 | tests = ["B201", "B301"] 123 | # (optional) list skipped test IDs here, eg '[B101, B406]': 124 | skips = ["B101", "B601"] 125 | 126 | [tool.bandit.assert_used] 127 | exclude = ["*_test.py", "test_*.py"] 128 | 129 | [tool.cruft] 130 | skip = [".git"] 131 | -------------------------------------------------------------------------------- /docs/descriptions/segmentation_search.rst: -------------------------------------------------------------------------------- 1 | ======================= 2 | Segmentation and Search 3 | ======================= 4 | 5 | ************ 6 | Segmentation 7 | ************ 8 | 9 | 10 | The :class:`~tssearch.search.segmentation.time_series_segmentation` locates the time instants between consecutive query repetitions on a longer and repetitive sequence. 11 | You will need to define the distance used for segmentation and provide a query and a sequence as inputs to :class:`~tssearch.search.segmentation.time_series_segmentation`, as follows: 12 | 13 | .. code:: python 14 | 15 | import tssearch 16 | 17 | data = tssearch.load_ecg_example() 18 | cfg = tssearch.get_distance_dict(["Dynamic Time Warping"]) 19 | 20 | out = tssearch.time_series_segmentation(cfg, data["query"], data["sequence"], data["weight"]) 21 | 22 | In the code above a ten-second segment from an electrocardiography record is used to define the query and the sequence and the DTW is defined as the distance for the segmentation. Then, the segmentation is calculated and the output is assigned to a variable. The method receives as inputs the configuration file, the query, and the sequence. Additionally, an optional vector input that assigns weights for each time instance of the query is also given as input. 23 | 24 | .. image:: https://i.postimg.cc/4yfGJJVB/Fig-4-1.png 25 | :alt: Example ECG segmentation output 26 | 27 | In this example, the specified weights vector assigned less contribution to the second local maxima of the ECG (T wave). 28 | 29 | If you are interested in further characterizing each subsequence, this could be accomplished using the distances values calculated for each segment and/or using `TSFEL 30 | `_ to extract temporal, statistical, and spectral features as data representations for classification algorithms. 31 | 32 | ****** 33 | Search 34 | ****** 35 | 36 | The :class:`~tssearch.search.query_search.time_series_search` method locates the k-best occurrences of a given query on a longer sequence based on a distance measurement. By default, k is set to retrieve the maximum number of matches. The user can also explicitly define the value of k to retrieve the k-best occurrences. 37 | 38 | An illustrative example is provided below: 39 | 40 | .. code:: python 41 | 42 | import tssearch 43 | import numpy as np 44 | 45 | query = np.loadtxt("query.txt") 46 | sequence = np.loadtxt("sequence.txt") 47 | 48 | cfg = tssearch.get_distance_dict(["Dynamic Time Warping"]) 49 | cfg['elastic']['Dynamic Time Warping']['parameters']['alpha'] = 0.5 50 | 51 | out = tssearch.time_series_search(cfg, query, sequence) 52 | 53 | In the above code, the DTW with an additional parameter :math:`{\alpha}` that weights the contribution between the cost in the amplitude and its first derivative is defined. Then, the query search is calculated, and the output is assigned to a variable. The method receives as inputs the configuration file, the query, and the sequence. Since the number of matches is not defined, the method retrieves the maximum number of matches. 54 | 55 | To illustrate this example, a wearable sensor-based human activity dataset with multidimensional data was used and the following visualization was obtained: 56 | 57 | .. image:: https://i.postimg.cc/rmrp3Fcb/Fig-6-1.png 58 | :alt: Example of query search in stride segmentation -------------------------------------------------------------------------------- /tssearch/utils/add_personal_distance.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import inspect 3 | import json 4 | import os 5 | import sys 6 | import warnings 7 | from inspect import getmembers, isfunction 8 | 9 | from tssearch.utils.distances_settings import load_json 10 | 11 | 12 | def add_distance_json(distances_path, json_path): 13 | """Adds new distance to features.json. 14 | Parameters 15 | ---------- 16 | distances_path: string 17 | Personal Python module directory containing new distances implementation. 18 | json_path: string 19 | Personal .json file directory containing existing disatnces from TSSEARCH. 20 | New customised distances will be added to file in this directory. 21 | """ 22 | 23 | sys.path.append(distances_path[: -len(distances_path.split(os.sep)[-1]) - 1]) 24 | exec("import " + distances_path.split(os.sep)[-1][:-3]) 25 | 26 | # Reload module containing the new features 27 | importlib.reload(sys.modules[distances_path.split(os.sep)[-1][:-3]]) 28 | exec("import " + distances_path.split(os.sep)[-1][:-3] + " as pymodule") 29 | 30 | # Functions from module containing the new features 31 | functions_list = [o for o in getmembers(locals()["pymodule"]) if isfunction(o[1])] 32 | function_names = [fname[0] for fname in functions_list] 33 | 34 | # Check if @set_domain was declared on features module 35 | vset_domain = False 36 | 37 | for fname, f in list(locals()["pymodule"].__dict__.items()): 38 | 39 | if getattr(f, "domain", None) is not None: 40 | 41 | vset_domain = True 42 | 43 | # Access to personal features.json 44 | feat_json = load_json(json_path) 45 | 46 | # Assign domain and tag 47 | domain = getattr(f, "domain", None) 48 | 49 | # Feature specifications 50 | # Description 51 | if f.__doc__ is not None: 52 | descrip = f.__doc__.split("\n")[0] 53 | else: 54 | descrip = "" 55 | # Feature usage 56 | use = "yes" 57 | # Feature function arguments 58 | args_name = inspect.getfullargspec(f)[0] 59 | 60 | # Access feature parameters 61 | if args_name != "": 62 | # Retrieve default values of arguments 63 | spec = inspect.getfullargspec(f) 64 | defaults = dict(zip(spec.args[::-1], (spec.defaults or ())[::-1])) 65 | defaults.update(spec.kwonlydefaults or {}) 66 | 67 | for p in args_name[1:]: 68 | if p not in list(defaults.keys()): 69 | defaults[p] = None 70 | if len(defaults) == 0: 71 | defaults = "" 72 | else: 73 | defaults = "" 74 | 75 | # Settings of new feature 76 | new_feature = {"description": descrip, "parameters": defaults, "function": fname, "use": use} 77 | 78 | # Check if domain exists 79 | try: 80 | feat_json[domain][fname] = new_feature 81 | except KeyError: 82 | feat_json[domain] = {fname: new_feature} 83 | 84 | # Write new feature on json file 85 | with open(json_path, "w") as fout: 86 | json.dump(feat_json, fout, indent=" ") 87 | 88 | print("Feature " + str(fname) + " was added.") 89 | 90 | if vset_domain is False: 91 | warnings.warn("No features were added. Please declare @set_domain.", stacklevel=2) 92 | -------------------------------------------------------------------------------- /tssearch/search/query_search.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tssearch.distances.elastic_utils import traceback_adj, lcss_path, lcss_score 3 | from tssearch.search.search_utils import lockstep_search, elastic_search, start_sequences_index 4 | 5 | 6 | def time_series_search(dict_distances, query, sequence, tq=None, ts=None, weight=None, output=("number", 1)): 7 | """ 8 | Time series search method locates the k-best occurrences of a given query on a more extended sequence based on a 9 | distance measurement. 10 | 11 | Parameters 12 | ---------- 13 | dict_distances: dict 14 | Configuration file with distances. 15 | query: nd-array 16 | Query time series. 17 | sequence: nd-array 18 | Sequence time series. 19 | tq: nd-array 20 | Time stamp time series query. 21 | ts: nd-array 22 | Time stamp time series sequence. 23 | weight: nd-array (Default: None) 24 | query weight values. 25 | output: tuple 26 | number of occurrences. 27 | 28 | Returns 29 | ------- 30 | distance_results: dict 31 | time instants, optimal alignment path and distance for each occurrence per distance. 32 | """ 33 | 34 | l_query = len(query) 35 | distance_results = {} 36 | 37 | for d_type in dict_distances: 38 | for dist in dict_distances[d_type]: 39 | 40 | if "use" not in dict_distances[d_type][dist] or dict_distances[d_type][dist]["use"] == "yes": 41 | distance_results[dist] = {} 42 | if d_type == "lockstep": 43 | distance = lockstep_search(dict_distances[d_type][dist], query, sequence, weight) 44 | 45 | start_index = start_sequences_index(distance, output=output, overlap=l_query) 46 | end_index, path = [], [] 47 | for start in start_index: 48 | end_index += [start + l_query] 49 | path += [(np.arange(l_query), np.arange(start, end_index[-1]))] 50 | distance_results[dist]["path_dist"] = distance[start_index] 51 | elif d_type == "elastic": 52 | distance, ac = elastic_search(dict_distances[d_type][dist], query, sequence, tq, ts, weight) 53 | 54 | if dist == "Longest Common Subsequence": 55 | eps = dict_distances[d_type][dist]["parameters"]["eps"] 56 | if len(np.shape(query)) == 1: 57 | query_copy = query.reshape(-1, 1) 58 | sequence_copy = sequence.reshape(-1, 1) 59 | path = [lcss_path(query_copy, sequence_copy, ac, eps)] 60 | else: 61 | path = [lcss_path(query, sequence, ac, eps)] 62 | distance_results[dist]["path_dist"] = [lcss_score(ac)] 63 | end_index = [path_i[1][-1] for path_i in path] 64 | else: 65 | end_index = start_sequences_index(distance, output=output, overlap=l_query / 2) 66 | # check if traceback_adj is equal to other elastic measures 67 | path = [traceback_adj(ac[:, : int(pk) + 1]) for pk in end_index] 68 | distance_results[dist]["path_dist"] = distance[end_index] 69 | start_index = [path_i[1][0] for path_i in path] 70 | 71 | else: 72 | print("WARNING") 73 | continue 74 | 75 | distance_results[dist]["distance"] = distance 76 | distance_results[dist]["start"] = start_index 77 | distance_results[dist]["end"] = end_index 78 | distance_results[dist]["path"] = path 79 | 80 | return distance_results 81 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 | 5 | ----------------- 6 | 7 | [![license](https://img.shields.io/badge/License-BSD%203-brightgreen)](https://github.com/fraunhoferportugal/tssearch/blob/master/LICENSE.txt) 8 | [![Documentation Status](https://readthedocs.org/projects/tssearch/badge/?version=latest)](https://tssearch.readthedocs.io/en/latest/?badge=latest) 9 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/tssearch) 10 | ![PyPI](https://img.shields.io/pypi/v/tssearch?logo=pypi&color=blue) 11 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 12 | [![Downloads](https://pepy.tech/badge/tssearch)](https://pepy.tech/project/tssearch) 13 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/fraunhoferportugal/tssearch/blob/master/notebooks/Query_search_unidimensional.ipynb) 14 | 15 | # Time Series Subsequence Search Library 16 | 17 | ## Intuitive time series subsequence search 18 | This repository hosts the **TSSEARCH - Time Series Subsequence Search** Python package. TSSEARCH assists researchers in exploratory analysis for query search and time series segmentation without requiring significant programming effort. 19 | 20 | ## Functionalities 21 | 22 | * **Search**: We provide methods for time series query search and segmentation 23 | * **Weights**: The relative contribution of each point of the query to the overall distance can be expressed using a user-defined weight vector. 24 | * **Visualization**: We provide visualizations to present the results of the 25 | segmentation and query search 26 | * **Unit tested**: we provide unit tests for each feature 27 | * **Easily extended**: adding new distances is easy, and we encourage you to contribute with your custom distances or search methods 28 | 29 | ## Get started 30 | 31 | ### ⚙️ Installation 32 | TSSEARCH supports Python 3.8 or greater. You can easily install via PyPI: 33 | 34 | ```bash 35 | pip install tssearch 36 | ``` 37 | 38 | ### Example 39 | The code below segments a 10 s electrocardiography record: 40 | 41 | ```python 42 | import tssearch 43 | 44 | # Load the query, (optional) weight vector and sequence 45 | data = tssearch.load_ecg_example() 46 | 47 | # Selects the Dynamic Time Warping (DTW) as the distance for the segmentation 48 | cfg = tssearch.get_distance_dict(["Dynamic Time Warping"]) 49 | 50 | # Performs the segmentation 51 | out = tssearch.time_series_segmentation(cfg, data['query'], data['sequence'], data['weight']) 52 | ``` 53 | 54 | ### Documentation 55 | The documentation is available [here](https://tssearch.readthedocs.io/en/latest/). 56 | 57 | ## Available distances 58 | 59 | | Lockstep | 60 | |--------------------------------------| 61 | | Lp Distances | 62 | | Pearson Correlation Distance | 63 | | Short Time Series Distance (STS) | 64 | 65 | | Elastic | 66 | |--------------------------------------| 67 | | Dynamic Time Warping (DTW) | 68 | |Longest Common Subsequence (LCSS) | 69 | |Time Warp Edit Distance (TWED) | 70 | 71 | | Time | 72 | |--------------------------------------| 73 | | Time Alignment Measurement (TAM) | 74 | 75 | ## Citing 76 | When using TSSEARCH please cite the following publication: 77 | 78 | Folgado, Duarte and Barandas, Marília, et al. "*TSSEARCH: Time Series Subsequence Search Library*" SoftwareX 11 (2022). [https://doi.org/10.1016/j.softx.2022.101049](https://doi.org/10.1016/j.softx.2022.101049) 79 | 80 | 81 | ## Acknowledgements 82 | This work is a result of the project ConnectedHealth (n.º 46858), supported by Competitiveness and Internationalisation Operational Programme (POCI) and Lisbon Regional Operational Programme (LISBOA 2020), under the PORTUGAL 2020 Partnership Agreement, through the European Regional Development Fund (ERDF) 83 | -------------------------------------------------------------------------------- /tssearch/search/search_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.signal import find_peaks 3 | 4 | 5 | def elastic_search(dict_distances, query, sequence, tq=None, ts=None, weight=None): 6 | """ 7 | Query search for elastic measures 8 | 9 | Parameters 10 | ---------- 11 | dict_distances: dict 12 | Configuration file with distances 13 | query: nd-array 14 | Query time series. 15 | sequence: nd-array 16 | Sequence time series. 17 | tq: nd-array 18 | Time stamp time series query. 19 | ts: nd-array 20 | Time stamp time series sequence. 21 | weight: nd-array (Default: None) 22 | query weight values 23 | 24 | Returns 25 | ------- 26 | distance: nd-array 27 | distance value between query and sequence 28 | ac: nd-array 29 | accumulated cost matrix 30 | """ 31 | 32 | exec("from tssearch import *") 33 | 34 | # distance function 35 | func_total = dict_distances["function"] 36 | 37 | # Check for parameters 38 | parameters_total = {} 39 | if dict_distances["parameters"] != "": 40 | parameters_total = dict_distances["parameters"] 41 | parameters_total["report"] = "search" 42 | 43 | if "dtw_type" in parameters_total: 44 | if parameters_total["dtw_type"] == "dtw": 45 | parameters_total["dtw_type"] = "sub-dtw" 46 | 47 | if "time" in parameters_total: 48 | parameters_total_copy = parameters_total.copy() 49 | del parameters_total_copy["time"] 50 | distances, ac = locals()[func_total](query, sequence, tq, ts, **parameters_total_copy) 51 | else: 52 | distances, ac = locals()[func_total](query, sequence, **parameters_total) 53 | 54 | return distances, ac 55 | 56 | 57 | def lockstep_search(dict_distances, query, sequence, weight): 58 | """ 59 | Query search for lockstep measures 60 | 61 | Parameters 62 | ---------- 63 | dict_distances: dict 64 | Configuration file with distances 65 | query: nd-array 66 | Query time series. 67 | sequence: nd-array 68 | Sequence time series. 69 | weight: nd-array (Default: None) 70 | query weight values 71 | 72 | Returns 73 | ------- 74 | res: nd-array 75 | distance value between query and sequence 76 | """ 77 | 78 | exec("from tssearch import *") 79 | 80 | # distance function 81 | func_total = dict_distances["function"] 82 | 83 | # Check for parameters 84 | parameters_total = {} 85 | if dict_distances["parameters"] != "": 86 | parameters_total = dict_distances["parameters"] 87 | 88 | lw = len(query) 89 | res = np.zeros(len(sequence) - lw, "d") 90 | for i in range(len(sequence) - lw): 91 | seq_window = sequence[i : i + lw] 92 | 93 | eval_result = locals()[func_total](seq_window, query, weight, **parameters_total) 94 | 95 | res[i] = eval_result / lw # default normalization 96 | 97 | return res 98 | 99 | 100 | def start_sequences_index(distance, output=("number", 1), overlap=1.0): 101 | """ 102 | Method to retrieve the k-best occurrences from a given vector distance 103 | 104 | Parameters 105 | ---------- 106 | distance: nd-array 107 | distance values 108 | output: tuple 109 | number of occurrences 110 | overlap: float 111 | minimum distance between occurrences 112 | 113 | Returns 114 | ------- 115 | id_s: nd-array 116 | indexes of k-best occurrences 117 | """ 118 | 119 | # pks - min 120 | pks, _ = find_peaks(-distance, distance=overlap) # TODO if necessary add first and last sequence 121 | pks_val = distance[pks] 122 | 123 | if output[0] == "number": 124 | num_events = output[1] 125 | pks_val_sort = np.argsort(pks_val) 126 | id_s = pks[pks_val_sort[:num_events]] 127 | elif output[0] == "percentile": 128 | perct = output[1] 129 | perct_val = np.percentile(distance, 100 - perct) 130 | pks_perct = np.where(pks_val < perct_val)[0] 131 | id_s = pks[pks_perct] 132 | elif output[0] == "threshold": 133 | thres = output[1] 134 | pks_thres = np.where(pks_val < thres)[0] 135 | id_s = pks[pks_thres] 136 | else: 137 | id_s = pks[np.argmin(pks_val)] 138 | 139 | return id_s 140 | -------------------------------------------------------------------------------- /tssearch/distances/compute_distance.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from tssearch.search.segmentation import time_series_segmentation 4 | 5 | 6 | def time_series_distance(dict_distances, x, y, tx=None, ty=None): 7 | """ 8 | 9 | Parameters 10 | ---------- 11 | dict_distances: dict 12 | Dictionary of distances parameters. 13 | x: nd-array 14 | Time series x (query). 15 | y: nd-array 16 | Time series y. 17 | tx: nd-array 18 | Time stamp time series x. 19 | ty: nd-array 20 | Time stamp time series y. 21 | 22 | Returns 23 | ------- 24 | distances: pandas DataFrame 25 | Distances values. 26 | 27 | """ 28 | 29 | exec("from tssearch import *") 30 | 31 | distance_results = [] 32 | distance_names = [] 33 | 34 | multivariate = True if len(np.shape(x)) > 1 else False 35 | 36 | for d_type in dict_distances: 37 | for dist in dict_distances[d_type]: 38 | 39 | # Only returns used functions 40 | if "use" not in dict_distances[d_type][dist] or dict_distances[d_type][dist]["use"] == "yes": 41 | # remove unidimensional distances 42 | if multivariate and dict_distances[d_type][dist]["multivariate"] == "no": 43 | continue 44 | 45 | func_total = dict_distances[d_type][dist]["function"] 46 | 47 | # Check for parameters 48 | parameters_total = {} 49 | if dict_distances[d_type][dist]["parameters"] != "": 50 | parameters_total = dict_distances[d_type][dist]["parameters"] 51 | 52 | if "time" in parameters_total: 53 | parameters_total_copy = parameters_total.copy() 54 | del parameters_total_copy["time"] 55 | eval_result = locals()[func_total](x, y, tx, ty, **parameters_total_copy) 56 | else: 57 | eval_result = locals()[func_total](x, y, **parameters_total) 58 | 59 | distance_results += [eval_result] 60 | distance_names += [dist] 61 | 62 | distances = pd.DataFrame(data=np.array(distance_results), index=np.array(distance_names), columns=["Distance"]) 63 | 64 | return distances 65 | 66 | 67 | def time_series_distance_windows(dict_distances, x, y, tx=None, ty=None, segmentation=None): 68 | """ 69 | 70 | Parameters 71 | ---------- 72 | dict_distances: dict 73 | Dictionary of distances parameters. 74 | x: nd-array 75 | Time series x (query). 76 | y: nd-array 77 | Time series y (windows). 78 | tx: nd-array 79 | Time stamp time series x. 80 | ty: nd-array 81 | Time stamp time series y (windows). 82 | segmentation: dict 83 | Dictionary of distances parameters. 84 | 85 | Returns 86 | ------- 87 | dist_windows: pandas DataFrame 88 | Distances values per window. 89 | 90 | """ 91 | 92 | if segmentation is not None: 93 | results = time_series_segmentation(segmentation, x, y, tx, ty) 94 | func_name = list(segmentation[list(dict_distances.keys())[0]].keys())[0] 95 | 96 | ts_w = None if ty is None else [] 97 | windows = [] 98 | for i in range(len(results[func_name]) - 1): 99 | if ty is not None: 100 | ts_w += [ty[results[func_name][i] : results[func_name][i + 1]]] 101 | windows += [y[results[func_name][i] : results[func_name][i + 1]]] 102 | else: 103 | windows = y 104 | ts_w = ty 105 | 106 | multivariate = True if len(np.shape(x)) > 1 else False 107 | 108 | exec("from tssearch import *") 109 | 110 | dist_windows = pd.DataFrame() 111 | for d_type in dict_distances: 112 | for dist in dict_distances[d_type]: 113 | 114 | # Only returns used functions 115 | if "use" not in dict_distances[d_type][dist] or dict_distances[d_type][dist]["use"] == "yes": 116 | 117 | if multivariate and dict_distances[d_type][dist]["multivariate"] == "no": 118 | continue 119 | 120 | func_total = dict_distances[d_type][dist]["function"] 121 | 122 | # Check for parameters 123 | parameters_total = {} 124 | if dict_distances[d_type][dist]["parameters"] != "": 125 | parameters_total = dict_distances[d_type][dist]["parameters"] 126 | 127 | distance_results = [] 128 | if "time" in parameters_total: 129 | parameters_total_copy = parameters_total.copy() 130 | del parameters_total_copy["time"] 131 | for ty_window, window in zip(ts_w, windows): 132 | eval_result = locals()[func_total](x, window, tx, ty_window, **parameters_total_copy) 133 | distance_results += [eval_result] 134 | else: 135 | for window in windows: 136 | eval_result = locals()[func_total](x, window, **parameters_total) 137 | distance_results += [eval_result] 138 | 139 | dist_windows[dist] = distance_results 140 | 141 | return dist_windows 142 | -------------------------------------------------------------------------------- /tssearch/utils/visualisation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import seaborn as sns 3 | import matplotlib.pyplot as plt 4 | 5 | from matplotlib.collections import LineCollection 6 | 7 | 8 | # Visualisation 9 | def plot_alignment(ref_signal, estimated_signal, path, **kwargs): 10 | """ 11 | This functions plots the resulted alignment of two sequences given the path 12 | calculated by the Dynamic Time Warping algorithm. 13 | 14 | :param ref_signal: (array-like) 15 | The reference sequence. 16 | :param estimated_signal: (array-like) 17 | The estimated sequence. 18 | :param path: (array-like) 19 | A 2D array congaing the path resulted from the algorithm 20 | :param \**kwargs: 21 | See below: 22 | 23 | * *offset* (``double``) -- 24 | The offset used to move the reference signal to an upper position for 25 | visualization purposes. 26 | (default: ``2``) 27 | 28 | * *linewidths* (``list``) -- 29 | A list containing the linewidth for the reference, estimated and connection 30 | plots, respectively. 31 | (default: ``[3, 3, 0.5]``) 32 | 33 | * *step* (``int``) -- 34 | The step for 35 | (default: ``2``) 36 | 37 | * *colors* (``list``) -- 38 | A list containing the colors for the reference, estimated and connection 39 | plots, respectively. 40 | (default: ``[sns.color_palette()[0], sns.color_palette()[1], 'k']``) 41 | 42 | * *label* (``list``) -- 43 | A list containing the labels for the reference and estimated signals. 44 | (default: ``['Reference', 'Estimated']``) 45 | """ 46 | 47 | step = kwargs.get("step", 2) 48 | hoffset = kwargs.get("hoffset", 0) 49 | voffset = kwargs.get("offset", 2) * np.max(ref_signal) 50 | linewidths = kwargs.get("linewidths", [3, 3, 0.5]) 51 | colors = kwargs.get("colors", [sns.color_palette()[0], sns.color_palette()[1], "k"]) 52 | label = kwargs.get("label", ["Reference", "Estimated"]) 53 | 54 | # This prevents unexpected changes in the reference signal after the duplicate 55 | # Set an offset for visualization 56 | copy_ref = np.copy(ref_signal) + voffset 57 | xref = np.arange(len(copy_ref)) + hoffset 58 | 59 | # Actual plot occurs here 60 | # Get current axis 61 | ax = plt.gca() 62 | 63 | # Create secondary axis to the right, that counteracts the offset 64 | ax2 = ax.secondary_yaxis('right', functions=(lambda x: x - voffset, lambda x: x + voffset)) 65 | 66 | # plot offset/reference and adjust tick colors 67 | ax.plot(xref, copy_ref, color=sns.color_palette()[0], lw=linewidths[0], label=label[0]) 68 | plt.setp(ax2.get_yticklabels(), color=sns.color_palette()[0]) 69 | 70 | # plot non-offset/estimatied signal and adjust tick colors 71 | ax.plot(estimated_signal, color=sns.color_palette()[1], lw=linewidths[1], label=label[1]) 72 | plt.setp(ax.get_yticklabels(), color=sns.color_palette()[1]) 73 | 74 | plt.legend(fontsize=17) 75 | 76 | [ 77 | plt.plot( 78 | [[path[0][i] + hoffset], [path[1][i]]], 79 | [copy_ref[path[0][i]], estimated_signal[path[1][i]]], 80 | color=colors[2], 81 | lw=linewidths[2], 82 | ) 83 | for i in range(len(path[0]))[::step] 84 | ] 85 | 86 | 87 | def plot_costmatrix(matrix, path): 88 | """ 89 | This functions overlays the optimal warping path and the cost matrices 90 | :param matrix: (ndarray-like) 91 | The cost matrix (local cost or accumulated) 92 | :param path: (ndarray-like) 93 | The optimal warping path 94 | :return: (void) 95 | Plots the optimal warping path with an overlay of the cost matrix. 96 | """ 97 | plt.imshow(matrix.T, cmap="viridis", origin="lower", interpolation="None") 98 | plt.colorbar() 99 | plt.plot(path[0], path[1], "w.-") 100 | plt.xlim((-0.5, matrix.shape[0] - 0.5)) 101 | plt.ylim((-0.5, matrix.shape[1] - 0.5)) 102 | 103 | 104 | def plot_search_distance_result(res, sequence, ts=None, cmap_name="viridis"): 105 | 106 | if ts is None: 107 | ts = np.arange(len(sequence)) 108 | # set distance scale 109 | cmap = plt.cm.get_cmap(cmap_name) 110 | colors = cmap(np.arange(cmap.N)) 111 | 112 | if len(np.shape(sequence)) > 1: 113 | sequence_shape = np.shape(sequence)[1] 114 | else: 115 | sequence_shape = 1 116 | 117 | all_axs = [] 118 | for k in res.keys(): 119 | max_dist = np.max(res[k]["path_dist"]) 120 | min_dist = np.min(res[k]["path_dist"]) 121 | if max_dist == min_dist: 122 | max_dist += 1 123 | min_dist -= 1 124 | delta_dist = max_dist - min_dist 125 | 126 | fig, axs = plt.subplots(sequence_shape + 1, 1, figsize=(15, 5)) 127 | axs[0].set_title(k) 128 | for i in range(sequence_shape): 129 | plot_seq = sequence if sequence_shape == 1 else sequence[:, i] 130 | axs[i].plot(ts, plot_seq, "lightgray") 131 | for s, e, d in zip(res[k]["start"], res[k]["end"], res[k]["path_dist"]): 132 | d_idx = int((d - min_dist) * cmap.N / delta_dist) - 1 133 | axs[i].plot(ts[np.arange(s, e)], plot_seq[s:e], c=colors[d_idx]) 134 | if i < sequence_shape - 2: 135 | axs[i].sharex(axs[i + 1]) 136 | axs[i].set_xticks([]) 137 | # [axs[i-1].sharex(axs[i]) for i in range(1, sequence_shape)] 138 | axs[sequence_shape].set_xlabel("Distance") 139 | axs[sequence_shape].imshow([colors], extent=[min_dist, max_dist, 0, 0.02 * delta_dist]) 140 | axs[sequence_shape].set_yticks([]) 141 | 142 | all_axs += [axs] 143 | 144 | return all_axs 145 | 146 | 147 | def plot_weight_query(x, query, weight, cmap="viridis", axs=None, fig=None): 148 | 149 | points = np.array([x, query]).T.reshape(-1, 1, 2) 150 | segments = np.concatenate([points[:-1], points[1:]], axis=1) 151 | 152 | if axs is None: 153 | fig = plt.figure() 154 | axs = plt.subplot() 155 | 156 | lc = LineCollection(segments, cmap=cmap, norm=plt.Normalize(0, weight.max())) 157 | lc.set_array(weight) 158 | lc.set_linewidth(2) 159 | line = axs.add_collection(lc) 160 | 161 | cbar = fig.colorbar(line, ax=axs) 162 | cbar.set_label("weight") 163 | 164 | axs.set_xlim(x.min(), x.max()) 165 | axs.set_ylim(query.min() - 1, query.max() + 1) 166 | plt.show() 167 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # TSSEARCH documentation build configuration file, created by 4 | # sphinx-quickstart on Tue Dez 28 18:37:41 2021. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | # If extensions (or modules to document with autodoc) are in another directory, 16 | # add these directories to sys.path here. If the directory is relative to the 17 | # documentation root, use os.path.abspath to make it absolute, like shown here. 18 | 19 | import os 20 | 21 | if os.environ.get('READTHEDOCS', None) == 'True': 22 | import inspect 23 | from sphinx import apidoc 24 | 25 | __location__ = os.path.join(os.getcwd(), os.path.dirname(inspect.getfile(inspect.currentframe()))) 26 | 27 | output_dir = os.path.join(__location__, "../docs/descriptions/modules") 28 | module_dir = os.path.join(__location__, "../tssearch") 29 | cmd_line_template = "sphinx-apidoc -f -o {outputdir} {moduledir}" 30 | cmd_line = cmd_line_template.format(outputdir=output_dir, moduledir=module_dir) 31 | apidoc.main(cmd_line.split(" ")) 32 | 33 | # -- General configuration ------------------------------------------------ 34 | 35 | # If your documentation needs a minimal Sphinx version, state it here. 36 | # 37 | # needs_sphinx = '1.0' 38 | 39 | # Add any Sphinx extension module names here, as strings. They can be 40 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 41 | # ones. 42 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 43 | 'sphinx.ext.autosummary', 'sphinx.ext.viewcode', 'sphinx.ext.coverage', 44 | 'sphinx.ext.doctest', 'sphinx.ext.ifconfig', 'sphinx.ext.imgmath', 45 | 'sphinx.ext.napoleon' 46 | ] 47 | 48 | # Add any paths that contain templates here, relative to this directory. 49 | templates_path = ['_templates'] 50 | 51 | # The suffix(es) of source filenames. 52 | # You can specify multiple suffix as a list of string: 53 | # 54 | # source_suffix = ['.rst', '.md'] 55 | source_suffix = '.rst' 56 | 57 | # The master toctree document. 58 | master_doc = 'index' 59 | 60 | # General information about the project. 61 | import datetime 62 | now = datetime.datetime.today() 63 | project = u'TSSEARCH' 64 | copyright = u'2022, Fraunhofer AICOS' 65 | author = u'Fraunhofer AICOS' 66 | 67 | # The version info for the project you're documenting, acts as replacement for 68 | # |version| and |release|, also used in various other places throughout the 69 | # built documents. 70 | # 71 | # The short X.Y version. 72 | version = u'0.1.3' 73 | # The full version, including alpha/beta/rc tags. 74 | release = u'0.1.3' 75 | 76 | # The language for content autogenerated by Sphinx. Refer to documentation 77 | # for a list of supported languages. 78 | # 79 | # This is also used if you do content translation via gettext catalogs. 80 | # Usually you set "language" from the command line for these cases. 81 | language = None 82 | 83 | # List of patterns, relative to source directory, that match files and 84 | # directories to ignore when looking for source files. 85 | # This patterns also effect to html_static_path and html_extra_path 86 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 87 | 88 | # If true, keep warnings as "system message" paragraphs in the built documents. 89 | keep_warnings = True 90 | 91 | # Boolean indicating whether to scan all found documents for autosummary 92 | # directives, and to generate stub pages for each 93 | autosummary_generate = True 94 | 95 | # The name of the Pygments (syntax highlighting) style to use. 96 | pygments_style = 'default' 97 | 98 | # -- Options for HTML output ---------------------------------------------- 99 | 100 | # The theme to use for HTML and HTML Help pages. See the documentation for 101 | # a list of builtin themes. 102 | # 103 | html_theme = 'sphinx_rtd_theme' 104 | 105 | # Theme options are theme-specific and customize the look and feel of a theme 106 | # further. For a list of options available for each theme, see the 107 | # documentation. 108 | # 109 | # html_theme_options = {} 110 | 111 | # Add any paths that contain custom static files (such as style sheets) here, 112 | # relative to this directory. They are copied after the builtin static files, 113 | # so a file named "default.css" will overwrite the builtin "default.css". 114 | html_static_path = [] 115 | 116 | # -- Options for HTMLHelp output ------------------------------------------ 117 | 118 | # Output file base name for HTML help builder. 119 | htmlhelp_basename = 'TSSEARCHdoc' 120 | 121 | 122 | # -- Options for LaTeX output --------------------------------------------- 123 | 124 | latex_elements = { 125 | # The paper size ('letterpaper' or 'a4paper'). 126 | # 127 | # 'papersize': 'letterpaper', 128 | 129 | # The font size ('10pt', '11pt' or '12pt'). 130 | # 131 | # 'pointsize': '10pt', 132 | 133 | # Additional stuff for the LaTeX preamble. 134 | # 135 | # 'preamble': '', 136 | 137 | # Latex figure (float) alignment 138 | # 139 | # 'figure_align': 'htbp', 140 | } 141 | 142 | # Grouping the document tree into LaTeX files. List of tuples 143 | # (source start file, target name, title, 144 | # author, documentclass [howto, manual, or own class]). 145 | latex_documents = [ 146 | (master_doc, 'TSSEARCH.tex', u'TSSEARCH Documentation', 147 | u'Fraunhofer AICOS', 'manual'), 148 | ] 149 | 150 | 151 | # -- Options for manual page output --------------------------------------- 152 | 153 | # One entry per manual page. List of tuples 154 | # (source start file, name, description, authors, manual section). 155 | man_pages = [ 156 | (master_doc, 'tssearch', u'TSSEARCH Documentation', 157 | [author], 1) 158 | ] 159 | 160 | 161 | # -- Options for Texinfo output ------------------------------------------- 162 | 163 | # Grouping the document tree into Texinfo files. List of tuples 164 | # (source start file, target name, title, author, 165 | # dir menu entry, description, category) 166 | texinfo_documents = [ 167 | (master_doc, 'TSSEARCH', u'TSSEARCH Documentation', 168 | author, 'TSSEARCH', 'One line description of project.', 169 | 'Miscellaneous'), 170 | ] 171 | 172 | 173 | # -- Options for Epub output ---------------------------------------------- 174 | 175 | # Bibliographic Dublin Core info. 176 | epub_title = project 177 | epub_author = author 178 | epub_publisher = author 179 | epub_copyright = copyright 180 | 181 | # The unique identifier of the text. This can be a ISBN number 182 | # or the project homepage. 183 | # 184 | # epub_identifier = '' 185 | 186 | # A unique identification for the text. 187 | # 188 | # epub_uid = '' 189 | 190 | # A list of files that should not be packed into the epub file. 191 | epub_exclude_files = ['search.html'] 192 | 193 | 194 | # Example configuration for intersphinx: refer to the Python standard library. 195 | intersphinx_mapping = {'https://docs.python.org/': None} 196 | -------------------------------------------------------------------------------- /docs/descriptions/elastic_distances.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | Elastic Distances 3 | ================= 4 | 5 | Distance measures that perform a non-linear mapping to align the time series and allow flexible comparison of one-to-many or one-to-none points (e.g., Dynamic Time Warping, Longest Common Subsequence). These measures produce elastic adjustment to compensate for potential localized misalignment. 6 | 7 | ************************** 8 | Dynamic Time Warping (DTW) 9 | ************************** 10 | 11 | The DTW algorithm computes the stretch of the time axis which optimally maps between two time series. It measures the remaining cumulative distance after the alignment and the pairwise correspondence between each sample. 12 | 13 | .. code:: python 14 | 15 | import numpy as np 16 | import matplotlib.pyplot as plt 17 | from tssearch.search.query_search import time_series_search 18 | from tssearch.utils.visualisation import plot_alignment 19 | 20 | # generates signals 21 | freq = 2 22 | amp = 2 23 | time = np.linspace(0, 2, 100) 24 | ts1 = np.concatenate([amp * np.sin(np.pi * time), np.zeros(100), amp * np.sin(np.pi * time), np.zeros(10)]) 25 | ts2 = np.concatenate([np.zeros(10), amp * np.sin(np.pi * time), np.zeros(150), amp * np.sin(np.pi * time), np.zeros(5)]) 26 | 27 | dict_distances = { 28 | "elastic": {"Dynamic Time Warping": { 29 | "multivariate": "yes", 30 | "description": "", 31 | "function": "dtw", 32 | "parameters": {"dtw_type": "dtw", "alpha": 1}, 33 | "use": "yes"} 34 | } 35 | } 36 | 37 | result = time_series_search(dict_distances, ts1, ts2, output=("number", 1)) 38 | 39 | plt.figure() 40 | plt.title("Dynamic Time Warping") 41 | plot_alignment(ts1, ts2, result["Dynamic Time Warping"]["path"][0]) 42 | plt.legend(fontsize=17, loc="lower right") 43 | 44 | 45 | .. image:: https://i.postimg.cc/sgQKCBfj/dtw-search.png 46 | :alt: An example of DTW. 47 | 48 | ********************************* 49 | Longest Common Subsequence (LCSS) 50 | ********************************* 51 | 52 | The Longest Common Subsequence (LCSS) measures the similarity between two time series whose lengths might be different. Since it is formulated based on edit distances, gaps or unmatched regions are permitted and they are penalized with a value proportional to their length. It can be useful to identify similarities between time series whose lengths differ greatly or have noise [1]_. 53 | 54 | In the example below, we compute the LCSS alignment between two time series, one of them with added noise. 55 | 56 | .. code:: python 57 | 58 | import numpy as np 59 | import matplotlib.pyplot as plt 60 | from tssearch.search.query_search import time_series_search 61 | from tssearch.utils.visualisation import plot_alignment 62 | 63 | ts1 = np.sin(np.arange(0, 4*np.pi, 0.1)) 64 | noise = np.random.normal(0, 0.1, ts1.shape) 65 | ts2 = 1 + np.sin(np.arange(0, 4*np.pi, 0.1) + 2) + noise 66 | 67 | ts1 = ts1.reshape(-1, 1) 68 | ts2 = ts2.reshape(-1, 1) 69 | 70 | dict_distances = { 71 | "elastic": {"Longest Common Subsequence": { 72 | "multivariate": "yes", 73 | "description": "", 74 | "function": "lcss", 75 | "parameters": {"eps": 1, "report": "distance"}, 76 | "use": "yes"} 77 | } 78 | } 79 | 80 | result = time_series_search(dict_distances, ts1, ts2, output=("number", 1)) 81 | 82 | plt.figure() 83 | plt.title("Longest Common Subsequence") 84 | plot_alignment(ts1, ts2, result["Longest Common Subsequence"]["path"][0]) 85 | 86 | 87 | .. image:: https://i.postimg.cc/43Rx3ZBV/lcss-search.png 88 | :alt: An example of LCSS. 89 | 90 | 91 | ****************************** 92 | Time Warp Edit Distance (TWED) 93 | ****************************** 94 | 95 | Time warp edit distance (TWED) uses sequences’ samples indexes/timestamps difference to linearly penalize the matching of samples for which indexes/timestamps values are too far and to favor the matching samples for which indexes/timestamps values are closed. Contrarily to other elastic measures, TWED entails a time shift tolerance controlled by the stiffness parameter of the measure. Moreover, it involves a second parameter defining a constant penalty for insert or delete operations. If stiffness > 0, TWED is a distance (i.e., verifies the triangle inequality) in both space and time [2]_. 96 | 97 | TWED has been used in time series classification assessing classification performance while varying TWED input parameters [2]_, [3]_. In the example, we calculate TWED between two time series varying its parameters. 98 | 99 | .. code:: python 100 | 101 | import numpy as np 102 | import pandas as pd 103 | import seaborn as sns 104 | import matplotlib.pyplot as plt 105 | from tssearch.distances.compute_distance import time_series_distance 106 | 107 | # generates signals 108 | freq = 2 109 | amp = 2 110 | time = np.linspace(0, 2, 1000) 111 | ts1 = amp * np.sin(2 * np.pi * freq * time) 112 | ts2 = amp * np.sin(6 * np.pi * freq * time)[::50] 113 | 114 | # visualize original and downsampled sequence 115 | plt.figure() 116 | plt.plot(time, ts1, color=sns.color_palette("Greens")[2], label="Time series 1", lw=3.) 117 | plt.plot(time[::50], ts2, color=sns.color_palette("Greens")[5], label="Time series 2", lw=3.) 118 | plt.ylabel('Space') 119 | plt.xlabel('Time') 120 | plt.legend(fontsize=17, loc="lower right") 121 | 122 | stiffness = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1] 123 | penalty = [0, .25, .5, .75, 1.0] 124 | 125 | distance = list() 126 | for s in stiffness: 127 | for p in penalty: 128 | # calculate distances 129 | dict_distances = { 130 | "elastic": {"Time Warp Edit Distance": {"multivariate": "no", 131 | "description": "", 132 | "function": "twed", 133 | "parameters": {"nu": s, "lmbda": p, "p": 2, "time": "true"}, 134 | "use": "yes"}}} 135 | 136 | distance.append({'stiffness': s, 137 | 'penalty': p, 138 | 'distance': time_series_distance(dict_distances, 139 | ts1, ts2, 140 | time, time[::50]).values[0][0]}) 141 | 142 | df = pd.DataFrame(distance) 143 | df_pivot = df.pivot("stiffness", "penalty", "distance") 144 | 145 | plt.figure() 146 | sns.heatmap(df_pivot, annot=True, cbar_kws={'label': "TWED"}, cmap="viridis") 147 | 148 | 149 | .. image:: https://i.postimg.cc/tJp6nWkd/twed-time-series-originals.png 150 | :alt: Two example series 151 | 152 | .. image:: https://i.postimg.cc/bryGw8Y3/twed-heatmap.png 153 | :alt: Resulting TWED distances 154 | 155 | 156 | .. [1] M. Vlachos, G. Kollios and D. Gunopulos, "Discovering similar multidimensional trajectories," Proceedings 18th International Conference on Data Engineering, 2002, pp. 673-684, doi: 10.1109/ICDE.2002.994784. 157 | 158 | .. [2] P. Marteau, "Time Warp Edit Distance with Stiffness Adjustment for Time Series Matching," in IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 31, no. 2, pp. 306-318, Feb. 2009, doi: 10.1109/TPAMI.2008.76. 159 | 160 | .. [3] Joan Serrà, Josep Ll. Arcos, An empirical evaluation of similarity measures for time series classification, Knowledge-Based Systems, Volume 67, 2014, Pages 305-314, ISSN 0950-7051, https://doi.org/10.1016/j.knosys.2014.04.035. 161 | 162 | 163 | 164 | 165 | -------------------------------------------------------------------------------- /tssearch/distances/elastic_distances.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from tssearch.distances.elastic_utils import ( 4 | cost_matrix, 5 | accumulated_cost_matrix, 6 | acc_initialization, 7 | lcss_accumulated_matrix, 8 | lcss_path, 9 | lcss_score, 10 | traceback_adj, 11 | backtracking, 12 | ) 13 | 14 | 15 | def dtw(x, y, weight=None, **kwargs): 16 | """Computes Dynamic Time Warping (DTW) of two time series. 17 | 18 | Parameters 19 | ---------- 20 | x: nd-array 21 | Time series x (query). 22 | y: nd-array 23 | Time series y. 24 | dist: function 25 | The distance used as a local cost measure. None defaults to the squared euclidean distance. 26 | \**kwargs: 27 | See below: 28 | 29 | * *do_sign_norm* (``bool``) -- 30 | If ``True`` the signals will be normalized before computing the DTW, 31 | (default: ``False``) 32 | 33 | * *do_dist_norm* (``bool``) -- 34 | If ``True`` the DTW distance will be normalized by dividing the summation of the path dimension. 35 | (default: ``True``) 36 | 37 | * *window* (``String``) -- 38 | Selects the global window constrains. Available options are ``None`` and ``sakoe-chiba``. 39 | (default: ``None``) 40 | 41 | * *factor* (``Float``) -- 42 | Selects the global constrain factor. 43 | (default: ``min(xl, yl) * .50``) 44 | 45 | 46 | Returns 47 | ------- 48 | d: float 49 | The DTW distance. 50 | ac: nd-array 51 | The accumulated cost matrix. 52 | path: nd-array 53 | The optimal warping path between the two sequences. 54 | """ 55 | 56 | xl, yl = len(x), len(y) 57 | 58 | alpha = kwargs.get("alpha", 1) 59 | do_dist_norm = kwargs.get("dist_norm", True) 60 | window = kwargs.get("window", None) 61 | factor = kwargs.get("factor", np.min((xl, yl)) * 0.50) 62 | dtw_type = kwargs.get("dtw_type", "dtw") 63 | tolerance = kwargs.get("tolerance", 0) 64 | report = kwargs.get("report", "distance") 65 | 66 | # cost matrix 67 | c = cost_matrix(x, y, alpha, weight=weight) 68 | # Acc cost matrix 69 | ac = accumulated_cost_matrix(c, window=window, factor=factor, dtw_type=dtw_type, tolerance=tolerance) 70 | 71 | # Distance 72 | if report == "cost_matrix": 73 | return ac 74 | elif report == "search": 75 | d = ac[-1, :] 76 | return d, ac 77 | elif report == "path": 78 | path = traceback_adj(ac) 79 | return path 80 | else: # report = "distance" default 81 | d = ac[-1, -1] / xl if do_dist_norm else ac[-1, -1] 82 | return d 83 | 84 | 85 | def lcss(x, y, eps=1, **kwargs): 86 | """Computes the Longest Common Subsequence (LCSS) distance between two numeric time series. 87 | 88 | Parameters 89 | ---------- 90 | x: nd-array 91 | Time series x (query). 92 | y: nd-array 93 | Time series y. 94 | eps : float 95 | Amplitude matching threshold. 96 | \**kwargs: 97 | See below: 98 | 99 | * *window* (``String``) -- 100 | Selects the global window constrains. Available options are ``None`` and ``sakoe-chiba``. 101 | (default: ``None``) 102 | 103 | Returns 104 | ------- 105 | d: float 106 | The LCSS distance. 107 | ac: nd-array 108 | The similarity matrix. 109 | path: nd-array 110 | The optimal path between the two sequences. 111 | """ 112 | 113 | window = kwargs.get("window", None) 114 | report = kwargs.get("report", "distance") 115 | 116 | dim = len(np.shape(x)) # tem de dar erro se forem inseridas duas TS com dims diferentes 117 | if dim == 1: 118 | x = x.reshape(-1, 1) 119 | y = y.reshape(-1, 1) 120 | 121 | ac = lcss_accumulated_matrix(x, y, eps=eps) 122 | path = lcss_path(x, y, ac, eps=eps) 123 | sim_score = lcss_score(ac) 124 | 125 | if report == "cost_matrix": 126 | return ac 127 | elif report == "search": 128 | return sim_score, ac 129 | elif report == "path": 130 | return path 131 | else: 132 | return sim_score 133 | 134 | 135 | def dlp(x, y, p=2): 136 | """Computes Lp norm distance between two time series. 137 | 138 | Parameters 139 | ---------- 140 | x: nd-array 141 | Time series x (query). 142 | y: nd-array 143 | Time series y. 144 | p: int 145 | Lp norm distance degree for local cost computation. 146 | 147 | Returns 148 | ------- 149 | The Lp distance. 150 | """ 151 | 152 | cost = np.sum(np.power(np.abs(x - y), p)) 153 | return np.power(cost, 1 / p) 154 | 155 | 156 | def twed(x, y, tx, ty, nu=0.001, lmbda=1.0, p=2, report="distance"): 157 | """Computes Time Warp Edit Distance (TWED) of two time series. 158 | 159 | Reference : 160 | Marteau, P.; F. (2009). "Time Warp Edit Distance with Stiffness Adjustment for Time Series Matching". 161 | IEEE Transactions on Pattern Analysis and Machine Intelligence. 31 (2): 306–318. arXiv:cs/0703033 162 | http://people.irisa.fr/Pierre-Francois.Marteau/ 163 | 164 | Parameters 165 | ---------- 166 | x: nd-array 167 | Time series x (query). 168 | y: nd-array 169 | Time series y. 170 | tx: nd-array 171 | Time stamp time series x. 172 | ty: nd-array 173 | Time stamp time series y. 174 | nu: int 175 | Stiffness parameter (nu >= 0) 176 | nu = 0, TWED distance measure on amplitude. 177 | nu > 0, TWED distance measure on amplitude x time. 178 | lmbda: int 179 | Penalty for deletion operation (lmbda >= 0). 180 | p: int 181 | Lp norm distance degree for local cost computation. 182 | report: str 183 | distance, cost matrix, path. 184 | 185 | Returns 186 | ------- 187 | d: float 188 | The TWED distance. 189 | ac: nd-array 190 | The accumulated cost matrix. 191 | path: nd-array 192 | The optimal warping path between the two sequences. 193 | """ 194 | 195 | # Check if input arguments 196 | if len(x) != len(tx): 197 | print("The length of x is not equal length of tx") 198 | return None, None 199 | 200 | if len(y) != len(ty): 201 | print("The length of y is not equal length of ty") 202 | return None, None 203 | 204 | if nu < 0: 205 | print("nu is negative") 206 | return None, None 207 | 208 | # Dynamical programming 209 | ac = acc_initialization(len(x), len(y), report) 210 | 211 | # Add padding 212 | query = np.array([0] + list(x)) 213 | tq = np.array([0] + list(tx)) 214 | sequence = np.array([0] + list(y)) 215 | ts = np.array([0] + list(ty)) 216 | 217 | n = len(query) 218 | m = len(sequence) 219 | 220 | # Compute minimal cost 221 | for i in range(1, n): 222 | for j in range(1, m): 223 | # Calculate and save cost of various operations 224 | C = np.ones((3, 1)) * np.inf 225 | # Deletion in A 226 | C[0] = ac[i - 1, j] + dlp(query[i - 1], query[i], p) + nu * (tq[i] - tq[i - 1]) + lmbda 227 | # Deletion in B 228 | C[1] = ac[i, j - 1] + dlp(sequence[j - 1], sequence[j], p) + nu * (ts[j] - ts[j - 1]) + lmbda 229 | # Keep data points in both time series 230 | C[2] = ( 231 | ac[i - 1, j - 1] 232 | + dlp(query[i], sequence[j], p) 233 | + dlp(query[i - 1], sequence[j - 1], p) 234 | + nu * (abs(tq[i] - ts[j]) + abs(tq[i - 1] - ts[j - 1])) 235 | ) 236 | # Choose the operation with the minimal cost and update c Matrix 237 | ac[i, j] = np.min(C) 238 | 239 | if report == "cost_matrix": 240 | return ac 241 | elif report == "search": 242 | d = ac[n - 1, :] 243 | return d, ac 244 | elif report == "path": 245 | path = backtracking(ac) 246 | return path 247 | else: # report = 'search' 248 | return ac[n - 1, m - 1] 249 | -------------------------------------------------------------------------------- /tssearch/distances/lockstep_distances.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import stats 3 | from scipy.spatial import distance 4 | from tssearch.utils.preprocessing import interpolation 5 | from tssearch.distances.lockstep_utils import _lnorm_multidimensional, _lnorm_unidimensional 6 | 7 | 8 | def euclidean_distance(x, y, weight=None): 9 | """Computes the Euclidean distance between two time series. 10 | 11 | If the time series do not have the same length, an interpolation is performed. 12 | 13 | Parameters 14 | ---------- 15 | x : nd-array 16 | Time series x. 17 | y : nd-array 18 | Time series y. 19 | weight: nd-array (Default: None) 20 | query weight values. 21 | 22 | Returns 23 | ------- 24 | float 25 | Euclidean distance value. 26 | 27 | """ 28 | p = 2 29 | 30 | if len(x) != len(y): 31 | x, y = interpolation(x, y) 32 | 33 | if weight is None: 34 | ed = np.linalg.norm(x - y, p) 35 | else: 36 | if len(np.shape(x)) > 1: 37 | distance = _lnorm_multidimensional(x, y, weight, p=p) 38 | else: 39 | distance = _lnorm_unidimensional(x, y, weight, p=p) 40 | ed = np.sum(distance) 41 | return ed 42 | 43 | 44 | def minkowski_distance(x, y, weight=None, p=3): 45 | """Computes the Minkowski distance between two time series. 46 | 47 | If the time series do not have the same length, an interpolation is performed. 48 | 49 | Parameters 50 | ---------- 51 | x : nd-array 52 | Time series x. 53 | y : nd-array 54 | Time series y. 55 | weight: nd-array (Default: None) 56 | query weight values. 57 | p: int 58 | Lp norm distance degree. 59 | 60 | Returns 61 | ------- 62 | float 63 | Minkowski distance value. 64 | 65 | """ 66 | if len(x) != len(y): 67 | x, y = interpolation(x, y) 68 | 69 | if weight is None and (p < 3 or p == np.inf): 70 | distance = np.linalg.norm(x - y, p) 71 | else: 72 | if weight is None: 73 | weight = np.ones_like(x) 74 | if len(np.shape(x)) > 1: 75 | distance = _lnorm_multidimensional(x, y, weight, p=p) 76 | else: 77 | distance = _lnorm_unidimensional(x, y, weight, p=p) 78 | distance = np.sum(distance) 79 | 80 | return distance 81 | 82 | 83 | def manhattan_distance(x, y, weight=None): 84 | """Computes the Manhattan distance between two time series. 85 | 86 | If the time series do not have the same length, an interpolation is performed. 87 | 88 | Parameters 89 | ---------- 90 | x : nd-array 91 | Time series x. 92 | y : nd-array 93 | Time series y. 94 | weight: nd-array (Default: None) 95 | query weight values. 96 | 97 | Returns 98 | ------- 99 | float 100 | Manhattan distance value. 101 | 102 | """ 103 | p = 1 104 | 105 | if len(x) != len(y): 106 | x, y = interpolation(x, y) 107 | 108 | if weight is None: 109 | distance = np.linalg.norm(x - y, p) 110 | else: 111 | if len(np.shape(x)) > 1: 112 | distance = _lnorm_multidimensional(x, y, weight, p=p) 113 | else: 114 | distance = _lnorm_unidimensional(x, y, weight, p=p) 115 | distance = np.sum(distance) 116 | 117 | return distance 118 | 119 | 120 | def chebyshev_distance(x, y, weight=None): 121 | """Computes the Chebyshev distance between two time series. 122 | 123 | If the time series do not have the same length, an interpolation is performed. 124 | 125 | Parameters 126 | ---------- 127 | x : nd-array 128 | Time series x. 129 | y : nd-array 130 | Time series y. 131 | weight: nd-array (Default: None) 132 | query weight values. 133 | 134 | Returns 135 | ------- 136 | float 137 | Chebyshev distance value. 138 | 139 | """ 140 | p = np.inf 141 | 142 | if len(x) != len(y): 143 | x, y = interpolation(x, y) 144 | 145 | if weight is None: 146 | d = np.linalg.norm(x - y, p) 147 | else: 148 | if len(np.shape(x)) > 1: 149 | distance = _lnorm_multidimensional(x, y, weight, p=p) 150 | else: 151 | distance = _lnorm_unidimensional(x, y, weight, p=p) 152 | d = np.sum(distance) 153 | return d 154 | 155 | 156 | def correlation_distance(x, y, weight=None): 157 | """Computes the correlation distance between two time series. 158 | 159 | If the time series do not have the same length, an interpolation is performed. 160 | 161 | Parameters 162 | ---------- 163 | x : nd-array 164 | Time series x. 165 | y : nd-array 166 | Time series y. 167 | weight: nd-array (Default: None) 168 | query weight values. 169 | 170 | Returns 171 | ------- 172 | float 173 | Correlation distance value. 174 | 175 | """ 176 | if len(x) != len(y): 177 | x, y = interpolation(x, y) 178 | 179 | correlation_d = distance.correlation(x, y, weight) 180 | 181 | return correlation_d 182 | 183 | 184 | def pearson_correlation(x, y, beta=None): 185 | """Computes the Pearson correlation between two time series. 186 | 187 | If the time series do not have the same length, an interpolation is performed. 188 | 189 | Parameters 190 | ---------- 191 | x : nd-array 192 | Time series x. 193 | y : nd-array 194 | Time series y. 195 | beta: float 196 | Beta coefficient. 197 | 198 | Returns 199 | ------- 200 | float 201 | Pearson correlation value. 202 | 203 | """ 204 | if len(x) != len(y): 205 | x, y = interpolation(x, y) 206 | 207 | r, p = stats.pearsonr(x, y) 208 | 209 | if beta is None: 210 | d = 2 * (1 - r) 211 | else: 212 | d = ((1 - r) / (1 + r)) ** beta 213 | return d 214 | 215 | 216 | def short_time_series_distance(x, y, tx=None, ty=None): 217 | """Computes the short time series distance (STS) between two time series. 218 | 219 | Reference: Möller-Levet, C. S., Klawonn, F., Cho, K., and Wolkenhauer, O. (2003). 220 | 221 | Parameters 222 | ---------- 223 | x : nd-array 224 | Time series x. 225 | y : nd-array 226 | Time series y. 227 | tx : nd-array 228 | Sampling index of time series x. 229 | ty : nd-array 230 | Sampling index of time series y. 231 | 232 | Returns 233 | ------- 234 | float 235 | Short time series distance value. 236 | 237 | """ 238 | if len(x) != len(y): 239 | x, y = interpolation(x, y) 240 | 241 | if tx is None: 242 | tx = np.arange(len(x)) 243 | if ty is None: 244 | ty = np.arange(len(y)) 245 | 246 | sts = np.sqrt(np.sum((np.diff(y) / np.diff(tx) - np.diff(x) / np.diff(ty)) ** 2)) 247 | 248 | return sts 249 | 250 | 251 | def braycurtis_distance(x, y, weight=None): 252 | """Computes the Braycurtis distance between two time series. 253 | 254 | If the time series do not have the same length, an interpolation is performed. 255 | 256 | Parameters 257 | ---------- 258 | x : nd-array 259 | Time series x. 260 | y : nd-array 261 | Time series y. 262 | weight: nd-array (Default: None) 263 | query weight values. 264 | 265 | Returns 266 | ------- 267 | float 268 | Braycurtis distance value. 269 | 270 | """ 271 | if len(x) != len(y): 272 | x, y = interpolation(x, y) 273 | 274 | braycurtis_d = distance.braycurtis(x, y, weight) 275 | 276 | return braycurtis_d 277 | 278 | 279 | def canberra_distance(x, y, weight=None): 280 | """Computes the Canberra distance between two time series. 281 | 282 | If the time series do not have the same length, an interpolation is performed. 283 | 284 | Parameters 285 | ---------- 286 | x : nd-array 287 | Time series x. 288 | y : nd-array 289 | Time series y. 290 | weight: nd-array (Default: None) 291 | query weight values. 292 | 293 | Returns 294 | ------- 295 | float 296 | Canberra distance value. 297 | 298 | """ 299 | if len(x) != len(y): 300 | x, y = interpolation(x, y) 301 | 302 | canberra_d = distance.canberra(x, y, weight) 303 | 304 | return canberra_d 305 | 306 | 307 | def cosine_distance(x, y, weight=None): 308 | """Computes the cosine distance between two time series. 309 | 310 | If the time series do not have the same length, an interpolation is performed. 311 | 312 | Parameters 313 | ---------- 314 | x : nd-array 315 | Time series x. 316 | y : nd-array 317 | Time series y. 318 | weight: nd-array (Default: None) 319 | query weight values. 320 | 321 | Returns 322 | ------- 323 | float 324 | Cosine distance value. 325 | 326 | """ 327 | if len(x) != len(y): 328 | x, y = interpolation(x, y) 329 | 330 | cosine_d = distance.cosine(x, y, weight) 331 | 332 | return cosine_d 333 | 334 | 335 | def mahalanobis_distance(x, y, weight=None): 336 | """Computes the Mahalanobis distance between two time series. 337 | 338 | If the time series do not have the same length, an interpolation is performed. 339 | 340 | Parameters 341 | ---------- 342 | x : nd-array 343 | Time series x. 344 | y : nd-array 345 | Time series y. 346 | weight: nd-array (Default: None) 347 | query weight values. 348 | 349 | Returns 350 | ------- 351 | float 352 | Mahalanobis distance value. 353 | 354 | """ 355 | if len(x) != len(y): 356 | x, y = interpolation(x, y) 357 | 358 | mahalanobis_d = distance.mahalanobis(x, y, weight) 359 | 360 | return mahalanobis_d 361 | 362 | 363 | def sqeuclidean_distance(x, y, weight=None): 364 | """Computes the squared Euclidean distance between two time series. 365 | 366 | If the time series do not have the same length, an interpolation is performed. 367 | 368 | Parameters 369 | ---------- 370 | x : nd-array 371 | Time series x. 372 | y : nd-array 373 | Time series y. 374 | weight: nd-array (Default: None) 375 | query weight values. 376 | 377 | Returns 378 | ------- 379 | float 380 | Squared Euclidean distance value. 381 | 382 | """ 383 | if len(x) != len(y): 384 | x, y = interpolation(x, y) 385 | 386 | sqeuclidean_d = distance.sqeuclidean(x, y, weight) 387 | 388 | return sqeuclidean_d 389 | 390 | 391 | def hamming_distance(x, y, weight=None): 392 | """Computes the Hamming distance between two time series. 393 | 394 | If the time series do not have the same length, an interpolation is performed. 395 | 396 | Parameters 397 | ---------- 398 | x : nd-array 399 | Time series x. 400 | y : nd-array 401 | Time series y. 402 | weight: nd-array (Default: None) 403 | query weight values. 404 | 405 | Returns 406 | ------- 407 | float 408 | Hamming distance value. 409 | 410 | """ 411 | if len(x) != len(y): 412 | x, y = interpolation(x, y) 413 | 414 | hamming_d = distance.hamming(x, y, weight) 415 | 416 | return hamming_d 417 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | ########################################################################################## 3 | # # 4 | # Pre-commit configuration file # 5 | # # 6 | # # 7 | # See https://pre-commit.com for more information # 8 | # See https://pre-commit.com/hooks.html for more hooks # 9 | # # 10 | # To install the git pre-commit hook run: # 11 | # pre-commit install # 12 | # pre-commit autoupdate # 13 | # To update the pre-commit hooks run: # 14 | # pre-commit install --install-hooks -t pre-commit -t commit-msg # 15 | # To run all hooks against current changes in your repository # 16 | # pre-commit run --all-files # 17 | # If you wish to execute an individual hook use pre-commit run . Example: # 18 | # pre-commit run black # 19 | # # 20 | ########################################################################################## 21 | default_language_version: 22 | python: python3 23 | default_stages: [commit, push] 24 | fail_fast: false 25 | repos: 26 | - repo: https://github.com/pre-commit/pre-commit-hooks 27 | rev: v4.5.0 28 | hooks: 29 | - id: fix-byte-order-marker 30 | name: fix-byte-order-marker 31 | description: removes UTF-8 byte order marker 32 | - id: trailing-whitespace 33 | name: trailing-whitespace 34 | description: Trims trailing whitespace 35 | args: [--markdown-linebreak-ext=md] 36 | - id: end-of-file-fixer 37 | name: end-of-file-fixer 38 | description: Makes sure files end in a newline and only a newline 39 | - id: check-json 40 | name: check-json 41 | description: Attempts to load all json files to verify syntax 42 | - id: check-toml 43 | name: check-toml 44 | description: Attempts to load all TOML files to verify syntax 45 | - id: check-symlinks 46 | name: check-symlinks 47 | description: Checks for symlinks which do not point to anything 48 | - id: check-added-large-files 49 | name: check-added-large-files 50 | description: Prevent files larger than 1 MB from being committed 51 | args: [ "--maxkb=1024", '--enforce-all' ] 52 | - id: check-case-conflict 53 | name: check-case-conflict 54 | description: Check for files with names that would conflict on a case-insensitive filesystem like MacOS HFS+ or Windows FAT 55 | - id: end-of-file-fixer 56 | name: end-of-file-fixer 57 | description: Makes sure files end in a newline and only a newline 58 | - id: mixed-line-ending 59 | name: mixed-line-ending 60 | description: Replaces or checks mixed line ending 61 | - id: check-ast 62 | name: check-ast 63 | description: Simply check whether files parse as valid python 64 | - id: debug-statements 65 | name: debug-statements 66 | description: Check for debugger imports and py37+ breakpoint() calls in python source 67 | - id: detect-aws-credentials 68 | name: detect-aws-credentials 69 | description: Checks for the existence of AWS/Minio secrets that you have set up 70 | args: [--allow-missing-credentials] 71 | - id: detect-private-key 72 | name: detect-private-key 73 | description: Checks for the existence of private keys. 74 | - id: requirements-txt-fixer 75 | name: requirements-txt-fixer 76 | description: Sorts entries in requirements.txt and removes incorrect entries 77 | #- id: no-commit-to-branch 78 | # name: no-commit-to-master-branch 79 | # description: Prevent commits to master/main branch 80 | # language: python 81 | # args: ["-b", master, "-b", main] 82 | # pass_filenames: false 83 | - id: check-merge-conflict 84 | name: check-merge-conflict 85 | description: Check for files that contain merge conflict strings 86 | - repo: https://github.com/pre-commit/pygrep-hooks 87 | rev: v1.10.0 88 | hooks: 89 | - id: python-check-mock-methods 90 | name: check-mock-methods 91 | description: Prevent common mistakes of assert mck.not_called(), assert mck.called_once_with(...) and mck.assert_called. 92 | - id: python-use-type-annotations 93 | name: python-use-type-annotations 94 | description: Enforce that python3.6+ type annotations are used instead of type comments 95 | - id: python-check-blanket-noqa 96 | name: python-check-blanket-noqa 97 | description: Enforce that noqa annotations always occur with specific codes. 98 | # - id: python-no-eval 99 | # name: python-no-eval 100 | # description: A quick check for the eval() built-in function 101 | # - repo: https://github.com/pre-commit/mirrors-mypy 102 | # rev: v1.5.1 103 | # hooks: 104 | # - id: mypy 105 | # name: mypy - static type checker for Python 106 | # description: Static type checker for Python 107 | # files: ^src/ 108 | # exclude: ^tests/ 109 | # args: [--ignore-missing-imports] 110 | # additional_dependencies: [types-all] 111 | # not working really well 112 | # - repo: https://github.com/asottile/yesqa 113 | # rev: v1.4.0 114 | # hooks: 115 | # - id: yesqa 116 | # name: yesqa - remove unnecessary `# noqa` comments 117 | # description: Automatically remove unnecessary `# noqa` comments 118 | - repo: https://github.com/asottile/add-trailing-comma 119 | rev: v3.1.0 120 | hooks: 121 | - id: add-trailing-comma 122 | name: add-trailing-comma 123 | description: Automatically add trailing commas to calls and literals. 124 | - repo: https://github.com/PyCQA/flake8 125 | rev: 7.0.0 126 | hooks: 127 | - id: flake8 128 | name: flake8 - check the style and quality of some python code 129 | description: Python tool that glues together pycodestyle, pyflakes, mccabe, and third-party plugins to check the style and quality of some python code 130 | additional_dependencies: 131 | - flake8-bugbear 132 | # - flake8-variables-names 133 | # - pep8-naming 134 | # - flake8-print 135 | - flake8-quotes 136 | - flake8-broken-line 137 | - flake8-comprehensions 138 | - flake8-spellcheck # ignored by now 139 | - flake8-eradicate 140 | #- flake8-walrus==1.1.0 141 | - flake8-typing-imports==1.12.0 142 | #- flake8-match==1.0.0 143 | - repo: https://github.com/psf/black 144 | rev: 24.1.1 145 | hooks: 146 | - id: black 147 | name: black - consistent Python code formatting 148 | description: The uncompromising Python code formatter 149 | - repo: https://github.com/PyCQA/isort 150 | rev: 5.13.2 151 | hooks: 152 | - id: isort 153 | name: isort - sort Python imports 154 | description: Library to sort imports 155 | - repo: https://github.com/asottile/pyupgrade 156 | rev: v3.15.0 157 | hooks: 158 | - id: pyupgrade 159 | name: pyupgrade - upgrade syntax for newer versions of the language 160 | description: Automatically upgrade syntax for newer versions of the language 161 | args: [--py36-plus] 162 | # - repo: https://github.com/jendrikseipp/vulture 163 | # rev: v2.9.1 164 | # hooks: 165 | # - id: vulture 166 | # name: vulture - finds unused code in Python programs 167 | # description: Finds unused code in Python programs 168 | ########################################################################################## 169 | # Notebooks 170 | ########################################################################################## 171 | - repo: https://github.com/nbQA-dev/nbQA 172 | rev: 1.7.1 173 | hooks: 174 | # - id: nbqa-flake8 175 | # name: nbqa-flake8 - Python linting (notebooks) 176 | # additional_dependencies: [flake8] 177 | #- id: nbqa-mypy 178 | # name: nbqa-mypy - Static type checker for Python (notebooks) 179 | # additional_dependencies: [mypy] 180 | # args: [--ignore-missing-imports] 181 | - id: nbqa-isort 182 | name: nbqa-isort - Sort Python imports (notebooks) 183 | additional_dependencies: [isort] 184 | - id: nbqa-pyupgrade 185 | name: nbqa-pyupgrade - Upgrade syntax for newer versions of Python (notebooks) 186 | additional_dependencies: [pyupgrade] 187 | args: [--py36-plus] 188 | - id: nbqa-black 189 | name: nbqa-black - consistent Python code formatting (notebooks) 190 | additional_dependencies: [black] 191 | # - id: nbqa-pydocstyle 192 | # additional_dependencies: [pydocstyle, toml==0.10.2] 193 | - repo: https://github.com/kynan/nbstripout 194 | rev: 0.6.1 195 | hooks: 196 | - id: nbstripout 197 | name: nbstripout - strip outputs from notebooks 198 | description: Strip output from Jupyter and IPython notebooks 199 | args: 200 | - --extra-keys 201 | - "metadata.colab metadata.kernelspec cell.metadata.colab cell.metadata.executionInfo cell.metadata.id cell.metadata.outputId" 202 | files: .ipynb 203 | ########################################################################################## 204 | # Shell Scripting 205 | ########################################################################################## 206 | - repo: local 207 | hooks: 208 | - id: shellcheck 209 | name: shellcheck - static analysis tool for shell scripts 210 | description: A static analysis tool for shell scripts 211 | language: script 212 | entry: scripts/shellcheck.sh 213 | types: [shell] 214 | args: [-e, SC1091] 215 | - repo: https://github.com/lovesegfault/beautysh 216 | rev: v6.2.1 217 | hooks: 218 | - id: beautysh 219 | name: beautysh - Autoformat shell scripts 220 | description: Autoformat shell scripts 221 | ########################################################################################## 222 | # Tests 223 | ########################################################################################## 224 | - repo: local 225 | hooks: 226 | - id: pytest 227 | name: pytest 228 | description: Run pytest 229 | entry: pytest -sv test 230 | language: system 231 | always_run: true 232 | types: [python] 233 | stages: [push] 234 | pass_filenames: false 235 | ########################################################################################## 236 | # Security 237 | ########################################################################################## 238 | # - repo: local 239 | # hooks: 240 | # - id: safety 241 | # name: safety 242 | # description: Analyze your Python requirements for known security vulnerabilities 243 | # entry: safety check --short-report -r 244 | # language: system 245 | # files: requirements/*.txt 246 | - repo: https://github.com/PyCQA/bandit 247 | rev: 1.7.7 248 | hooks: 249 | - id: bandit 250 | args: ["-c", "pyproject.toml"] 251 | additional_dependencies: [".[toml]"] 252 | # - repo: https://github.com/PyCQA/bandit 253 | # rev: 1.7.5 254 | # hooks: 255 | # - id: bandit 256 | # name: bandit - find common security issues in Python code. 257 | # description: Tool designed to find common security issues in Python code 258 | # args: ["-c", "pyproject.toml"] 259 | # additional_dependencies: [toml==0.10.2] 260 | ########################################################################################## 261 | # Git 262 | ########################################################################################## 263 | # - repo: https://github.com/commitizen-tools/commitizen 264 | # rev: 3.6.0 265 | # hooks: 266 | # - id: commitizen 267 | # stages: [commit-msg] 268 | # additional_dependencies: [git+https://bitbucket.fraunhofer.pt/scm/is2020/mlops-commit-drafter.git] 269 | ########################################################################################## 270 | # Documentation 271 | ########################################################################################## 272 | - repo: https://github.com/executablebooks/mdformat 273 | rev: 0.7.17 274 | hooks: 275 | - id: mdformat 276 | name: mdformat - Markdown formatter that can be used to enforce a consistent style in Markdown files 277 | description: Markdown formatter that can be used to enforce a consistent style in Markdown files 278 | additional_dependencies: 279 | - mdformat-black 280 | - mdformat-beautysh 281 | exclude: CHANGELOG.md 282 | - repo: https://github.com/myint/docformatter 283 | rev: v1.7.5 284 | hooks: 285 | - id: docformatter 286 | name: docformatter - formats docstrings to follow PEP 257 287 | description: Formats docstrings to follow PEP 257 288 | args: [--in-place] 289 | - repo: https://github.com/terrencepreilly/darglint 290 | rev: v1.8.1 291 | hooks: 292 | - id: darglint 293 | name: darglint - Python documentation linter 294 | description: A python documentation linter which checks that the docstring description matches the definition. 295 | args: ["-z", long] 296 | # - repo: https://github.com/econchick/interrogate 297 | # rev: 1.5.0 298 | # hooks: 299 | # - id: interrogate 300 | # name: interrogate - interrogate a codebase for docstring coverage 301 | # description: Interrogate a codebase for docstring coverage 302 | # WIP 303 | # - repo: https://github.com/PyCQA/prospector 304 | # rev: 1.5.3.1 305 | # hooks: 306 | # - id: prospector 307 | ########################################################################################## 308 | # DVC 309 | ########################################################################################## 310 | # https://dvc.org/doc/command-reference/install#--use-pre-commit-tool 311 | # - repo: https://github.com/iterative/dvc 312 | # hooks: 313 | # - id: dvc-pre-commit 314 | # language_version: python3 315 | # stages: 316 | # - commit 317 | # - id: dvc-pre-push 318 | # # use s3/gs/etc instead of all to only install specific cloud support 319 | # additional_dependencies: ['.[all]'] 320 | # language_version: python3 321 | # stages: 322 | # - push 323 | # - always_run: true 324 | # - id: dvc-post-checkout 325 | # language_version: python3 326 | # stages: 327 | # - post-checkout 328 | ########################################################################################## 329 | # Docker 330 | ########################################################################################## 331 | - repo: local 332 | hooks: 333 | - id: hadolint 334 | name: hadolint - Lint Dockerfile for errors and enforce best practices 335 | description: Lint Dockerfile for errors and enforce best practices 336 | language: script 337 | entry: scripts/hadolint.sh 338 | files: Dockerfile 339 | -------------------------------------------------------------------------------- /tssearch/distances/elastic_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numba import njit, prange 3 | from tssearch.utils.preprocessing import standardization 4 | 5 | 6 | @njit(parallel=True, fastmath=True) 7 | def _cost_matrix(x, y): 8 | """ 9 | 10 | Parameters 11 | ---------- 12 | x: nd-array 13 | Time series x (query). 14 | y: nd-array 15 | Time series y. 16 | 17 | Returns 18 | ------- 19 | c: nd-array 20 | The cost matrix. 21 | """ 22 | l1 = x.shape[0] 23 | l2 = y.shape[0] 24 | c = np.zeros((l1, l2), dtype=np.float32) 25 | 26 | for i in prange(l1): 27 | for j in prange(l2): 28 | c[i, j] = (x[i] - y[j]) ** 2 29 | 30 | return c 31 | 32 | 33 | @njit(parallel=True, fastmath=True) 34 | def _multidimensional_cost_matrix(x, y, weight): 35 | """Helper function for fast computation of cost matrix in cost_matrix_diff_vec. 36 | Defined outside to prevent recompilation from numba 37 | 38 | Parameters 39 | ---------- 40 | x: nd-array 41 | Time series x (query). 42 | y: nd-array 43 | Time series y. 44 | 45 | Returns 46 | ------- 47 | c: nd-array 48 | The cost matrix. 49 | """ 50 | l1 = x.shape[0] 51 | l2 = y.shape[0] 52 | l3 = x.shape[1] 53 | c = np.zeros((l1, l2), dtype=np.float32) 54 | 55 | for i in prange(l1): 56 | for j in prange(l2): 57 | dist = 0.0 58 | for di in range(l3): 59 | diff = x[i, di] - y[j, di] 60 | dist += weight[i, di] * (diff * diff) 61 | c[i, j] = dist ** 0.5 62 | 63 | return c 64 | 65 | 66 | @njit(nogil=True, fastmath=True) 67 | def _accumulated_cost_matrix(ac): 68 | """Fast computation of accumulated cost matrix using cost matrix. 69 | 70 | Parameters 71 | ---------- 72 | ac: nd-array 73 | Given cost matrix c, ac = acc_initialization(...), ac[1:, 1:] = c. 74 | 75 | Returns 76 | ------- 77 | The accumulated cost matrix. 78 | """ 79 | for i in range(ac.shape[0] - 1): 80 | for j in range(ac.shape[1] - 1): 81 | ac[i + 1, j + 1] += min(ac[i, j + 1], ac[i + 1, j], ac[i, j]) 82 | return ac 83 | 84 | 85 | def acc_initialization(x, y, _type, tolerance=0): 86 | """Initializes the cost matrix according to the dtw type. 87 | 88 | Parameters 89 | ---------- 90 | x: nd-array 91 | Time series x (query). 92 | y: nd-array 93 | Time series y. 94 | _type: string 95 | Name of dtw type. 96 | tolerance: int 97 | Tolerance value. 98 | 99 | Returns 100 | ------- 101 | ac: nd-array 102 | The accumulated cost matrix. 103 | """ 104 | ac = np.zeros((x + 1, y + 1)) 105 | if _type == "dtw": 106 | ac[0, 1:] = np.inf 107 | ac[1:, 0] = np.inf 108 | elif _type == "oe-dtw": 109 | ac[0, 1:] = np.inf 110 | ac[1:, 0] = np.inf 111 | elif _type == "obe-dtw" or _type == "sub-dtw" or _type == "search": 112 | ac[1:, 0] = np.inf 113 | elif _type == "psi-dtw": 114 | ac[0, tolerance + 1 :] = np.inf 115 | ac[tolerance + 1 :, 0] = np.inf 116 | else: 117 | ac[0, 1:] = np.inf 118 | ac[1:, 0] = np.inf 119 | 120 | return ac 121 | 122 | 123 | def cost_matrix(x, y, alpha=1, weight=None): 124 | """Computes cost matrix using a specified distance (dist) between two time series. 125 | 126 | Parameters 127 | ---------- 128 | x: nd-array 129 | Time series x (query). 130 | y: nd-array 131 | Time series y. 132 | dist: function 133 | The distance used as a local cost measure. None defaults to the squared euclidean distance. 134 | 135 | \**kwargs: 136 | See below: 137 | 138 | * *do_sign_norm* (``bool``) -- 139 | If ``True`` the signals will be normalized before computing the DTW, 140 | (default: ``False``) 141 | 142 | * *do_dist_norm* (``bool``) -- 143 | If ``True`` the DTW distance will be normalized by dividing the summation of the path dimension. 144 | (default: ``True``) 145 | 146 | * *window* (``String``) -- 147 | Selects the global window constrains. Available options are ``None`` and ``sakoe-chiba``. 148 | (default: ``None``) 149 | 150 | * *factor* (``Float``) -- 151 | Selects the global constrain factor. 152 | (default: ``min(xl, yl) * .50``) 153 | 154 | Returns 155 | ------- 156 | c: nd-array 157 | The cost matrix. 158 | """ 159 | if weight is None: 160 | weight = np.ones_like(x) 161 | 162 | if len(np.shape(weight)) == 1: 163 | weight = weight.reshape(-1, 1) 164 | 165 | if alpha == 1: 166 | C_d = 1 167 | if len(np.shape(x)) == 1: 168 | C_n = (_cost_matrix(x, y) * weight) / np.max(weight) 169 | else: 170 | C_n = _multidimensional_cost_matrix(x, y, weight) 171 | else: 172 | # standardization parameters 173 | abs_norm = np.mean(x, axis=0), np.std(x, axis=0) 174 | diff_norm = np.mean(np.diff(x, axis=0), axis=0), np.std(np.diff(x, axis=0), axis=0) 175 | 176 | # Derivative calculation and standardization 177 | _x = standardization(np.diff(x, axis=0), param=diff_norm) 178 | _y = standardization(np.diff(y, axis=0), param=diff_norm) 179 | # same length of derivative 180 | x = standardization(x[:-1], param=abs_norm) 181 | y = standardization(y[:-1], param=abs_norm) 182 | 183 | weight = weight[:-1] 184 | 185 | if len(np.shape(x)) == 1: 186 | C_d = _cost_matrix(_x, _y) * weight 187 | C_n = _cost_matrix(x, y) * weight 188 | else: 189 | C_d = _multidimensional_cost_matrix(_x, _y, weight) 190 | C_n = _multidimensional_cost_matrix(x, y, weight) 191 | 192 | c = alpha * C_n + (1 - alpha) * C_d 193 | 194 | return c 195 | 196 | 197 | def accumulated_cost_matrix(c, **kwargs): 198 | """ 199 | 200 | Parameters 201 | ---------- 202 | c: nd-array 203 | The cost matrix. 204 | 205 | \**kwargs: 206 | 207 | Returns 208 | ------- 209 | ac: nd-array 210 | The accumulated cost matrix. 211 | """ 212 | xl, yl = np.shape(c) 213 | 214 | window = kwargs.get("window", None) 215 | factor = kwargs.get("factor", np.min((xl, yl)) * 0.50) 216 | dtw_type = kwargs.get("dtw_type", "dtw") 217 | tolerance = kwargs.get("tolerance", 0) 218 | 219 | if window == "sakoe-chiba": 220 | c[np.abs(np.diff(np.indices(c.shape), axis=0))[0] > factor] = np.inf 221 | 222 | ac = acc_initialization(xl, yl, dtw_type, tolerance) 223 | ac[1:, 1:] = c.copy() 224 | ac = _accumulated_cost_matrix(ac)[1:, 1:] 225 | 226 | return ac 227 | 228 | 229 | @njit(nogil=True, fastmath=True) 230 | def traceback(ac): 231 | """Computes the traceback path of the matrix c. 232 | 233 | Parameters 234 | ---------- 235 | ac: nd-array 236 | The accumulated cost matrix. 237 | 238 | Returns 239 | ------- 240 | Coordinates p and q of the minimum path. 241 | 242 | """ 243 | 244 | i, j = np.array(ac.shape) - 2 245 | p, q = [i], [j] 246 | while (i > 0) and (j > 0): 247 | tb = 0 248 | if ac[i, j + 1] < ac[i, j]: 249 | tb = 1 250 | if ac[i + 1, j] < ac[i, j + tb]: 251 | tb = 2 252 | if tb == 0: 253 | i -= 1 254 | j -= 1 255 | elif tb == 1: 256 | i -= 1 257 | else: 258 | j -= 1 259 | p.insert(0, i) 260 | q.insert(0, j) 261 | while j > 0: 262 | j -= 1 263 | p.insert(0, i) 264 | q.insert(0, j) 265 | while i > 0: 266 | i -= 1 267 | p.insert(0, i) 268 | q.insert(0, j) 269 | 270 | return np.array(p), np.array(q) 271 | 272 | 273 | @njit(nogil=True, fastmath=True) 274 | def traceback_adj(ac): 275 | """Computes the adjusted traceback path of the matrix c. 276 | 277 | Parameters 278 | ---------- 279 | ac: nd-array 280 | The accumulated cost matrix. 281 | 282 | Returns 283 | ------- 284 | Coordinates p and q of the minimum path adjusted. 285 | 286 | """ 287 | i, j = np.array(ac.shape) - 2 288 | p, q = [i], [j] 289 | while (i > 0) and (j > 0): 290 | tb = 0 291 | if ac[i, j + 1] < ac[i, j]: 292 | tb = 1 293 | if ac[i + 1, j] < ac[i, j + tb]: 294 | tb = 2 295 | if tb == 0: 296 | i -= 1 297 | j -= 1 298 | elif tb == 1: 299 | i -= 1 300 | else: # (tb == 2): 301 | j -= 1 302 | p.insert(0, i) 303 | q.insert(0, j) 304 | while i > 0: 305 | i -= 1 306 | p.insert(0, i) 307 | q.insert(0, j) 308 | return np.array(p), np.array(q) 309 | 310 | 311 | def backtracking(ac): 312 | """Compute the most cost-efficient path. 313 | 314 | Parameters 315 | ---------- 316 | ac: nd-array 317 | The accumulated cost matrix. 318 | 319 | Returns 320 | ------- 321 | Coordinates of the most cost-efficient path. 322 | """ 323 | x = np.shape(ac) 324 | i = x[0] - 1 325 | j = x[1] - 1 326 | 327 | # The indices of the paths are save in opposite direction 328 | # path = np.ones((i + j, 2 )) * np.inf; 329 | best_path = [] 330 | 331 | steps = 0 332 | while i != 0 or j != 0: 333 | 334 | best_path.append((i - 1, j - 1)) 335 | 336 | C = np.ones((3, 1)) * np.inf 337 | 338 | # Keep data points in both time series 339 | C[0] = ac[i - 1, j - 1] 340 | # Deletion in A 341 | C[1] = ac[i - 1, j] 342 | # Deletion in B 343 | C[2] = ac[i, j - 1] 344 | 345 | # Find the index for the lowest cost 346 | idx = np.argmin(C) 347 | 348 | if idx == 0: 349 | # Keep data points in both time series 350 | i = i - 1 351 | j = j - 1 352 | elif idx == 1: 353 | # Deletion in A 354 | i = i - 1 355 | j = j 356 | else: 357 | # Deletion in B 358 | i = i 359 | j = j - 1 360 | steps = steps + 1 361 | 362 | best_path.append((i - 1, j - 1)) 363 | 364 | best_path.reverse() 365 | best_path = np.array(best_path[1:]) 366 | 367 | return best_path[:, 0], best_path[:, 1] 368 | 369 | 370 | # DTW SW 371 | def dtw_sw(x, y, winlen, alpha=0.5, **kwargs): 372 | """Computes Dynamic Time Warping (DTW) of two time series using a sliding window. 373 | TODO: Check if this needs to be speed up. 374 | 375 | Parameters 376 | ---------- 377 | x: nd-array 378 | Time series x (query). 379 | y: nd-array 380 | Time series y. 381 | winlen: int 382 | The sliding window length. 383 | alpha: float 384 | A factor between 0 and 1 which weights the amplitude and derivative contributions. 385 | A higher value will favor amplitude and a lower value will favor the first derivative. 386 | 387 | \**kwargs: 388 | See below: 389 | 390 | * *do_sign_norm* (``bool``) -- 391 | If ``True`` the signals will be normalized before computing the DTW, 392 | (default: ``False``) 393 | 394 | * *do_dist_norm* (``bool``) -- 395 | If ``True`` the DTW distance will be normalized by dividing the summation of the path dimension. 396 | (default: ``True``) 397 | 398 | * *window* (``String``) -- 399 | Selects the global window constrains. Available options are ``None`` and ``sakoe-chiba``. 400 | (default: ``None``) 401 | 402 | * *factor* (``Float``) -- 403 | Selects the global constrain factor. 404 | (default: ``min(xl, yl) * .50``) 405 | 406 | 407 | Returns 408 | ------- 409 | d: float 410 | The SW-DTW distance. 411 | c: nd-array 412 | The local cost matrix. 413 | ac: nd-array 414 | The accumulated cost matrix. 415 | path: nd-array 416 | The optimal warping path between the two sequences. 417 | 418 | """ 419 | xl, yl = len(x), len(y) 420 | 421 | do_sign_norm = kwargs.get("normalize", False) 422 | do_dist_norm = kwargs.get("dist_norm", True) 423 | window = kwargs.get("window", None) 424 | factor = kwargs.get("factor", np.min((xl, yl)) * 0.50) 425 | 426 | if do_sign_norm: 427 | x, y = standardization(x), standardization(y) 428 | 429 | ac = np.zeros((xl + 1, yl + 1)) 430 | ac[0, 1:] = np.inf 431 | ac[1:, 0] = np.inf 432 | tmp_ac = ac[1:, 1:] 433 | 434 | nx = get_mirror(x, winlen) 435 | ny = get_mirror(y, winlen) 436 | 437 | dnx = np.diff(nx) 438 | dny = np.diff(ny) 439 | 440 | nx = nx[:-1] 441 | ny = ny[:-1] 442 | 443 | # Workaround to deal with even window sizes 444 | if winlen % 2 == 0: 445 | winlen -= 1 446 | 447 | swindow = np.hamming(winlen) 448 | swindow = swindow / np.sum(swindow) 449 | 450 | for i in range(xl): 451 | for j in range(yl): 452 | pad_i, pad_j = i + winlen, j + winlen 453 | # No window selected 454 | if window is None: 455 | tmp_ac[i, j] = sliding_dist( 456 | nx[pad_i - (winlen // 2) : pad_i + (winlen // 2) + 1], 457 | ny[pad_j - (winlen // 2) : pad_j + (winlen // 2) + 1], 458 | dnx[pad_i - (winlen // 2) : pad_i + (winlen // 2) + 1], 459 | dny[pad_j - (winlen // 2) : pad_j + (winlen // 2) + 1], 460 | alpha, 461 | swindow, 462 | ) 463 | 464 | # Sakoe-Chiba band 465 | elif window == "sakoe-chiba": 466 | if abs(i - j) < factor: 467 | tmp_ac[i, j] = sliding_dist( 468 | nx[pad_i - (winlen // 2) : pad_i + (winlen // 2) + 1], 469 | ny[pad_j - (winlen // 2) : pad_j + (winlen // 2) + 1], 470 | dnx[pad_i - (winlen // 2) : pad_i + (winlen // 2) + 1], 471 | dny[pad_j - (winlen // 2) : pad_j + (winlen // 2) + 1], 472 | alpha, 473 | swindow, 474 | ) 475 | else: 476 | tmp_ac[i, j] = np.inf 477 | 478 | # As last resource, the complete window is calculated 479 | else: 480 | tmp_ac[i, j] = sliding_dist( 481 | nx[pad_i - (winlen / 2) : pad_i + (winlen / 2) + 1], 482 | ny[pad_j - (winlen / 2) : pad_j + (winlen / 2) + 1], 483 | dnx[pad_i - (winlen / 2) : pad_i + (winlen / 2) + 1], 484 | dny[pad_j - (winlen / 2) : pad_j + (winlen / 2) + 1], 485 | alpha, 486 | swindow, 487 | ) 488 | 489 | c = tmp_ac.copy() 490 | 491 | for i in range(xl): 492 | for j in range(yl): 493 | tmp_ac[i, j] += min([ac[i, j], ac[i, j + 1], ac[i + 1, j]]) 494 | 495 | path = traceback(ac) 496 | 497 | if do_dist_norm: 498 | d = ac[-1, -1] / np.sum(np.shape(path)) 499 | else: 500 | d = ac[-1, -1] 501 | 502 | return d, c, ac, path 503 | 504 | 505 | def sliding_dist(xw, yw, dxw, dyw, alpha, win): 506 | """Computes the sliding distance. 507 | 508 | Parameters 509 | ---------- 510 | xw: nd-array 511 | x coords window. 512 | yw: nd-array 513 | y coords window. 514 | dxw: nd-array 515 | x coords diff window. 516 | dyw: nd-array 517 | y coords diff window. 518 | alpha: float 519 | Rely more on absolute or difference values 1- abs, 0 - diff. 520 | win: nd-array 521 | Signal window used for sliding distance. 522 | 523 | Returns 524 | ------- 525 | Sliding distance 526 | """ 527 | return (1 - alpha) * np.sqrt(np.sum((((dxw - dyw) * win) ** 2.0))) + alpha * np.sqrt( 528 | np.sum((((xw - yw) * win) ** 2.0)) 529 | ) 530 | 531 | 532 | def get_mirror(s, ws): 533 | """Performs a signal windowing based on a double inversion from the start and end segments. 534 | 535 | Parameters 536 | ---------- 537 | s: nd-array 538 | the input-signal. 539 | ws: int 540 | window size. 541 | 542 | Returns 543 | ------- 544 | Signal windowed 545 | """ 546 | 547 | return np.r_[2 * s[0] - s[ws:0:-1], s, 2 * s[-1] - s[-2 : -ws - 2 : -1]] 548 | 549 | 550 | @njit() 551 | def _lcss_point_dist(x, y): 552 | """ 553 | 554 | Parameters 555 | ---------- 556 | x: nd-array 557 | Time series x (query). 558 | y: nd-array 559 | Time series y. 560 | 561 | Returns 562 | ------- 563 | The LCSS distance. 564 | """ 565 | dist = 0.0 566 | for di in range(x.shape[0]): 567 | diff = x[di] - y[di] 568 | dist += diff * diff 569 | 570 | return dist ** 0.5 571 | 572 | 573 | def lcss_accumulated_matrix(x, y, eps): 574 | """Computes the LCSS cost matrix using the euclidean distance (dist) between two time series. 575 | 576 | Parameters 577 | ---------- 578 | x: nd-array 579 | Time series x (query). 580 | y: nd-array 581 | Time series y. 582 | eps : float 583 | Amplitude matching threshold. 584 | 585 | Returns 586 | ------- 587 | ac : nd-array 588 | The accumulated cost matrix. 589 | """ 590 | 591 | xl, yl = len(x), len(y) 592 | 593 | ac = np.zeros((xl + 1, yl + 1)) 594 | 595 | for i in range(1, xl + 1): 596 | for j in range(1, yl + 1): 597 | if _lcss_point_dist(x[i - 1, :], y[j - 1, :]) <= eps: 598 | ac[i, j] = 1 + ac[i - 1, j - 1] 599 | else: 600 | ac[i, j] = max(ac[i, j - 1], ac[i - 1, j]) 601 | 602 | return ac 603 | 604 | 605 | def lcss_path(x, y, c, eps): 606 | """Computes the LCSS path between two time series. 607 | 608 | Parameters 609 | ---------- 610 | x: nd-array 611 | The reference signal. 612 | y: nd-array 613 | The estimated signal. 614 | c : nd-array 615 | The cost matrix. 616 | eps : float 617 | Matching threshold. 618 | 619 | Returns 620 | ------- 621 | Coordinates of the minimum LCSS path. 622 | """ 623 | i, j = len(x), len(y) 624 | path = [] 625 | 626 | while i > 0 and j > 0: 627 | if _lcss_point_dist(x[i - 1, :], y[j - 1, :]) <= eps: 628 | path.append((i - 1, j - 1)) 629 | i -= 1 630 | j -= 1 631 | elif c[i - 1, j] > c[i, j - 1]: 632 | i -= 1 633 | else: 634 | j -= 1 635 | 636 | path = np.array(path[::-1]) 637 | return path[1:, 0], path[1:, 1] 638 | 639 | 640 | def lcss_score(c): 641 | """Computes the LCSS similarity score between two time series. 642 | 643 | Parameters 644 | ---------- 645 | c : nd-array 646 | The cost matrix. 647 | 648 | Returns 649 | ------- 650 | The LCSS score. 651 | """ 652 | 653 | xl = c.shape[0] - 1 654 | yl = c.shape[1] - 1 655 | 656 | return float(c[-1, -1]) / min([xl, yl]) 657 | --------------------------------------------------------------------------------