├── mprod ├── tests │ ├── __init__.py │ ├── test_base.py │ ├── _base_tests.py │ └── _utils.py ├── decompositions │ ├── tests │ │ ├── __init__.py │ │ ├── test__qr.py │ │ └── test_decompositions.py │ ├── __init__.py │ ├── _qr.py │ └── _tsvdm.py ├── dimensionality_reduction │ ├── tests │ │ ├── __init__.py │ │ └── test_TCAM.py │ ├── __init__.py │ └── _tcam.py ├── __init__.py ├── _misc.py ├── _pytester.py ├── _pytesttester.py ├── _base.py └── _ml_helpers.py ├── docs ├── _static │ └── img │ │ ├── mprod_logo.png │ │ ├── ._mprod_logo.png │ │ ├── mprod_logo_fav.png │ │ ├── ._mprod_logo_fav.png │ │ ├── mprod_logo_small.png │ │ ├── ._mprod_logo_small.png │ │ ├── mprod_tcam_cartoon.png │ │ └── ._mprod_tcam_cartoon.png ├── modules │ ├── stubs │ │ ├── mprod.x_m3.rst │ │ ├── mprod.m_prod.rst │ │ ├── mprod.table2tensor.rst │ │ ├── mprod.tensor_mtranspose.rst │ │ ├── mprod.decompositions.svdm.rst │ │ ├── mprod.decompositions.tqrm.rst │ │ ├── mprod.MeanDeviationForm.rst │ │ └── mprod.dimensionality_reduction.TCAM.rst │ ├── classes.rst │ ├── mprod.decompositions.rst │ ├── mprod.rst │ └── mprod.dimensionality_reduction.rst ├── examples │ ├── data │ │ ├── readme_graphs.png │ │ ├── single_curve_demo.pdf │ │ ├── single_curve_demo.png │ │ ├── multiple_curves_demo.png │ │ ├── mermaid-diagram-pipeline.png │ │ └── ._mermaid-diagram-pipeline.png │ ├── intro.ipynb │ ├── examples.rst │ └── mprod_primer.ipynb ├── _templates │ ├── class.rst │ └── function.rst ├── requirements.txt ├── environment.yml ├── Makefile ├── run_livereload.py ├── make.bat ├── conf.py └── index.rst ├── requirements.txt ├── .gitignore ├── .readthedocs.yaml ├── setup.cfg ├── .github └── workflows │ └── build.yaml ├── LICENSE ├── setup.py ├── azure-pipelines.yml ├── README.md └── runtests.py /mprod/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mprod/tests/test_base.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mprod/decompositions/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mprod/decompositions/tests/test__qr.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /mprod/dimensionality_reduction/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/_static/img/mprod_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/mprod_logo.png -------------------------------------------------------------------------------- /docs/_static/img/._mprod_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/._mprod_logo.png -------------------------------------------------------------------------------- /docs/_static/img/mprod_logo_fav.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/mprod_logo_fav.png -------------------------------------------------------------------------------- /docs/modules/stubs/mprod.x_m3.rst: -------------------------------------------------------------------------------- 1 | mprod.x\_m3 2 | =========== 3 | 4 | .. currentmodule:: mprod 5 | 6 | .. autofunction:: x_m3 -------------------------------------------------------------------------------- /docs/_static/img/._mprod_logo_fav.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/._mprod_logo_fav.png -------------------------------------------------------------------------------- /docs/_static/img/mprod_logo_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/mprod_logo_small.png -------------------------------------------------------------------------------- /docs/examples/data/readme_graphs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/examples/data/readme_graphs.png -------------------------------------------------------------------------------- /docs/_static/img/._mprod_logo_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/._mprod_logo_small.png -------------------------------------------------------------------------------- /docs/_static/img/mprod_tcam_cartoon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/mprod_tcam_cartoon.png -------------------------------------------------------------------------------- /docs/examples/data/single_curve_demo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/examples/data/single_curve_demo.pdf -------------------------------------------------------------------------------- /docs/examples/data/single_curve_demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/examples/data/single_curve_demo.png -------------------------------------------------------------------------------- /docs/modules/stubs/mprod.m_prod.rst: -------------------------------------------------------------------------------- 1 | mprod.m\_prod 2 | ============= 3 | 4 | .. currentmodule:: mprod 5 | 6 | .. autofunction:: m_prod -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.19.2 2 | scikit-learn>=0.24.1 3 | scipy>=1.5.3 4 | dataclasses>=0.7; python_version < '3.7' 5 | pandas>=1.1.5 -------------------------------------------------------------------------------- /docs/_static/img/._mprod_tcam_cartoon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/._mprod_tcam_cartoon.png -------------------------------------------------------------------------------- /docs/examples/data/multiple_curves_demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/examples/data/multiple_curves_demo.png -------------------------------------------------------------------------------- /docs/examples/data/mermaid-diagram-pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/examples/data/mermaid-diagram-pipeline.png -------------------------------------------------------------------------------- /docs/examples/data/._mermaid-diagram-pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/examples/data/._mermaid-diagram-pipeline.png -------------------------------------------------------------------------------- /docs/modules/stubs/mprod.table2tensor.rst: -------------------------------------------------------------------------------- 1 | mprod.table2tensor 2 | ================== 3 | 4 | .. currentmodule:: mprod 5 | 6 | .. autofunction:: table2tensor -------------------------------------------------------------------------------- /mprod/dimensionality_reduction/__init__.py: -------------------------------------------------------------------------------- 1 | """.. mprod.dimensionality_reduction 2 | """ 3 | 4 | from ._tcam import TCAM 5 | 6 | __all__ = [ 7 | "TCAM" 8 | ] -------------------------------------------------------------------------------- /docs/modules/stubs/mprod.tensor_mtranspose.rst: -------------------------------------------------------------------------------- 1 | mprod.tensor\_mtranspose 2 | ======================== 3 | 4 | .. currentmodule:: mprod 5 | 6 | .. autofunction:: tensor_mtranspose -------------------------------------------------------------------------------- /docs/modules/stubs/mprod.decompositions.svdm.rst: -------------------------------------------------------------------------------- 1 | mprod.decompositions.svdm 2 | ========================= 3 | 4 | .. currentmodule:: mprod.decompositions 5 | 6 | .. autofunction:: svdm -------------------------------------------------------------------------------- /docs/modules/stubs/mprod.decompositions.tqrm.rst: -------------------------------------------------------------------------------- 1 | mprod.decompositions.tqrm 2 | ========================= 3 | 4 | .. currentmodule:: mprod.decompositions 5 | 6 | .. autofunction:: tqrm -------------------------------------------------------------------------------- /mprod/decompositions/__init__.py: -------------------------------------------------------------------------------- 1 | """.. mprod.decompositions 2 | """ 3 | 4 | from ._tsvdm import svdm 5 | from ._qr import tqrm 6 | 7 | __all__ = [ 8 | "svdm", 9 | "tqrm" 10 | ] 11 | -------------------------------------------------------------------------------- /docs/_templates/class.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}.{{objname}}` 2 | {{ underline }}============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | .. include:: {{module}}.{{objname}}.examples 9 | 10 | .. raw:: html 11 | 12 |
13 | -------------------------------------------------------------------------------- /docs/_templates/function.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. :function:: {{ objname }} 7 | 8 | .. include:: {{module}}.{{objname}}.examples 9 | 10 | .. raw:: html 11 | 12 |
13 | -------------------------------------------------------------------------------- /docs/modules/stubs/mprod.MeanDeviationForm.rst: -------------------------------------------------------------------------------- 1 | :mod:`mprod.MeanDeviationForm` 2 | ===================================== 3 | 4 | .. currentmodule:: mprod 5 | 6 | .. autoclass:: MeanDeviationForm 7 | 8 | .. include:: mprod.MeanDeviationForm.examples 9 | 10 | .. raw:: html 11 | 12 |
-------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx-gallery==0.9.0 2 | numpydoc==1.1.0 3 | sphinxcontrib-bibtex==2.3.0 4 | sphinx-prompt==1.4.0 5 | nbsphinx==0.8.6 6 | sphinx_rtd_theme==0.5.2 7 | ipykernel==5.4.3 8 | m2r2==0.3.1 9 | ipykernel==5.4.3 10 | seaborn==0.11.1 11 | jupyter==1.0.0 12 | myst-parser==0.15.2 13 | livereload==2.6.3 14 | pandoc==2.0.1 -------------------------------------------------------------------------------- /docs/modules/stubs/mprod.dimensionality_reduction.TCAM.rst: -------------------------------------------------------------------------------- 1 | :mod:`mprod.dimensionality_reduction.TCAM` 2 | ================================================= 3 | 4 | .. currentmodule:: mprod.dimensionality_reduction 5 | 6 | .. autoclass:: TCAM 7 | 8 | .. include:: mprod.dimensionality_reduction.TCAM.examples 9 | 10 | .. raw:: html 11 | 12 |
-------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/.pytest_cache/ 2 | 3 | **/__pycache__/ 4 | build.log 5 | **/.DS_Store 6 | **/._.DS_Store 7 | **/.ipynb_checkpoints/ 8 | .coveragerc 9 | _doc/ 10 | coding_stuff/ 11 | test_notebooks/ 12 | docs/trashed_docs/ 13 | docs/_build/ 14 | docs/_conf.py 15 | dist/ 16 | build/ 17 | mprod_package.egg-info/ 18 | pytest.xml 19 | 20 | docs/examples/data/._*.png 21 | doc_exp/ 22 | docs_exp/ 23 | docs/run_livereload.py 24 | trashed_docs/ -------------------------------------------------------------------------------- /docs/modules/classes.rst: -------------------------------------------------------------------------------- 1 | ============= 2 | API Reference 3 | ============= 4 | 5 | This is the class and function reference of M product framework. Please refer to 6 | the full user guide for further details, as the class and 7 | function raw specifications may not be enough to give full guidelines on their 8 | uses. 9 | For reference on concepts repeated across the API, see 10 | 11 | .. toctree:: 12 | :maxdepth: 4 13 | 14 | mprod 15 | mprod.dimensionality_reduction 16 | mprod.decompositions 17 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: "3.6" 7 | 8 | 9 | python: 10 | install: 11 | - requirements: docs/requirements.txt 12 | - method: pip 13 | path: . 14 | extra_requirements: 15 | - docs 16 | # # version: "3.6" 17 | # install: 18 | # - method: pip 19 | # path: . 20 | # extra_requirements: 21 | # - docs 22 | # system_packages: true 23 | 24 | 25 | # Build documentation in the docs/ directory with Sphinx 26 | sphinx: 27 | configuration: docs/conf.py 28 | -------------------------------------------------------------------------------- /docs/modules/mprod.decompositions.rst: -------------------------------------------------------------------------------- 1 | Tensor decompositions 2 | ===================== 3 | 4 | The `mprod.decomposition` module includes tensor decomposition 5 | algorithms. Currently, the tsvdm and tqrm decompositions is implemented. 6 | Future plans are to implement additional factorizations such as non negative tensor factorization (NTF) . 7 | 8 | .. currentmodule:: mprod.decompositions 9 | 10 | 11 | .. autosummary:: 12 | :toctree: stubs 13 | 14 | svdm 15 | tqrm 16 | 17 | 18 | .. automodule:: mprod.decompositions 19 | :members: 20 | :toctree: stubs 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/modules/mprod.rst: -------------------------------------------------------------------------------- 1 | :mod:`mprod` base module 2 | ========================= 3 | 4 | 5 | The :mod:`mprod` module includes tensor decomposition utilities 6 | and algorithms, such as TCAM, MeanDeviationForm. 7 | 8 | .. currentmodule:: mprod 9 | 10 | 11 | 12 | .. autosummary:: 13 | :toctree: stubs 14 | :template: class.rst 15 | 16 | MeanDeviationForm 17 | 18 | 19 | .. autosummary:: 20 | :toctree: stubs 21 | 22 | m_prod 23 | tensor_mtranspose 24 | x_m3 25 | table2tensor 26 | 27 | 28 | 29 | .. automodule:: mprod 30 | :toctree: stubs 31 | :members: 32 | :undoc-members: 33 | :show-inheritance: 34 | -------------------------------------------------------------------------------- /docs/environment.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - pip=21.0.1 5 | - python=3.6 6 | - numpy = 1.19.2 7 | - scikit-learn = 0.24.1 8 | - scipy = 1.5.3 9 | - dataclasses = 0.7 10 | - pandas = 1.1.5 11 | - sphinx-gallery=0.9.0 12 | - numpydoc=1.1.0 13 | - sphinxcontrib-bibtex=2.3.0 14 | - sphinx-prompt=1.4.0 15 | - nbsphinx=0.8.6 16 | - ipykernel=5.4.3 17 | - seaborn=0.11.1 18 | - jupyter=1.0.0 19 | - sphinx-gallery = 0.9.0 20 | - numpydoc = 1.1.0 21 | - sphinxcontrib-bibtex = 2.3.0 22 | - sphinx-prompt = 1.4.0 23 | - nbsphinx = 0.8.6 24 | - sphinx_rtd_theme = 0.5.2 25 | - ipykernel = 5.4.3 26 | - pip: 27 | - mprod-package 28 | 29 | -------------------------------------------------------------------------------- /docs/modules/mprod.dimensionality_reduction.rst: -------------------------------------------------------------------------------- 1 | Dimensionality reductions 2 | ========================= 3 | 4 | This module includes tensor dimensionality reduction (tensor to matrix) algorithms. 5 | Currently the TCAM decomposition is implemented. 6 | Future plans are to implement tensor-CCA tensor-PLS etc.... 7 | 8 | 9 | .. currentmodule:: mprod.dimensionality_reduction 10 | 11 | 12 | .. autosummary:: 13 | :toctree: stubs 14 | :template: class.rst 15 | 16 | TCAM 17 | 18 | 19 | 20 | .. automodule:: mprod.dimensionality_reduction 21 | :members: 22 | :toctree: stubs 23 | :undoc-members: 24 | :show-inheritance: 25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /mprod/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | """ 4 | # import dimensionality_reduction 5 | # from .dimensionality_reduction._tcam import TCAM 6 | # import decompositions as decompositions 7 | # import dimensionality_reduction 8 | from ._ml_helpers import MeanDeviationForm, table2tensor 9 | from ._base import m_prod, tensor_mtranspose, x_m3, generate_haar, generate_dct 10 | 11 | from mprod._pytesttester import PytestTester 12 | test = PytestTester(__name__) 13 | del PytestTester 14 | # 15 | __all__ = [ 16 | "m_prod", 17 | "tensor_mtranspose", 18 | "x_m3", 19 | "MeanDeviationForm", 20 | "generate_haar", 21 | "generate_dct", 22 | "table2tensor", 23 | "dimensionality_reduction", 24 | "decompositions" 25 | ] 26 | 27 | -------------------------------------------------------------------------------- /docs/run_livereload.py: -------------------------------------------------------------------------------- 1 | from livereload import Server, shell 2 | 3 | if __name__ == '__main__': 4 | server = Server() 5 | server.watch('*.rst', shell('make html'), delay=1) 6 | server.watch('modules/*.rst', shell('make html'), delay=1) 7 | server.watch('modules/*/*.rst', shell('make html'), delay=1) 8 | server.watch('*.md', shell('make html'), delay=1) 9 | server.watch('*.py', shell('make html'), delay=1) 10 | server.watch('*.ipynb', shell('make html'), delay=.1) 11 | server.watch('examples/*.ipynb', shell('make html'), delay=.1) 12 | server.watch('_static/*', shell('make html'), delay=1) 13 | server.watch('_templates/*', shell('make html'), delay=1) 14 | server.serve(root='_build/html', host="cn240.wexac.weizmann.ac.il", port=8888) 15 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = mprod-package-uriamorP 3 | version = 0.0.5a1 4 | author = 5 | Uria Mor 6 | Rafael Valdes Mas 7 | Yotam Cohen 8 | Haim Avron 9 | project_url = https://github.com/UriaMorP/mprod_package 10 | author_email = uriamo@gmail.com, 11 | description = Software implementation for tensor-tensor m-product framework 12 | long_description_content_type = text/markdown 13 | long_description = file: README.md 14 | license = BSD 15 | classifiers = 16 | Development Status :: 3 - Alpha 17 | Intended Audience :: Scientists 18 | Programming Language :: Python :: 3.6 19 | Programming Language :: Python :: 3.7 20 | Programming Language :: Python :: 3.8 21 | Programming Language :: Python :: 3.9 22 | Programming Language :: Python :: 3.10 23 | Programming Language :: Python :: 3 :: Only 24 | 25 | 26 | python_requires = >=3.6.8 27 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /.github/workflows/build.yaml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | build: 10 | runs-on: ${{ matrix.os }} 11 | strategy: 12 | matrix: 13 | python-version: [3.6.8, 3.7, 3.8, 3.9, 3.10.0, 3.11.0, 3.12.0] 14 | os: [macos-latest, ubuntu-20.04] 15 | 16 | steps: 17 | - uses: actions/checkout@v2 18 | - name: Build using Python ${{ matrix.python-version }} 19 | uses: actions/setup-python@v2 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | 23 | - name: pip install dependencies [pip] 24 | run: | 25 | python -m pip install pip --upgrade pip; 26 | pip install pytest; 27 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 28 | if [ ${{ matrix.python-version }} == 3.6.8 ]; then pip install dataclasses>=0.7; fi 29 | pip install -e . 30 | 31 | - name: unit tests [pytest] 32 | run: | 33 | pytest --show-capture=no -v --disable-warnings --junitxml=pytest.xml 34 | -------------------------------------------------------------------------------- /mprod/_misc.py: -------------------------------------------------------------------------------- 1 | from typing import Iterable, Tuple 2 | from numpy import ndarray 3 | 4 | 5 | def _assert_order(tensor: ndarray, tensor_varname: str, order: int): 6 | got_order = len(tensor.shape) 7 | assert got_order == order, f"{tensor_varname} must be a order {order} tensor, found order {got_order}" 8 | 9 | 10 | def _assert_size(tensor: ndarray, tensor_varname: str, axis: int, dim: int): 11 | got_dim = tensor.shape[axis] 12 | assert got_dim == dim, f"Dimension {axis} of {tensor_varname} must equal {dim}, found {got_dim}" 13 | 14 | 15 | def _assert_order_and_mdim(tensor: ndarray, 16 | tensor_varname: str, 17 | order: int, 18 | dim_inspection_list: Iterable[Tuple[int, int]]): 19 | """ 20 | 21 | Parameters 22 | ---------- 23 | tensor: np.ndarray 24 | The tensor for inpection 25 | tensor_varname: str 26 | The variable name of the tensor as it appears in the code 27 | order: int 28 | The intended order of `tensor` 29 | dim_inspection_list 30 | 31 | 32 | """ 33 | _assert_order(tensor, tensor_varname, order) 34 | for ax, dim in dim_inspection_list: 35 | assert ax < order, f"Trying to assert the dimension of mode {ax} of a {order} order tensor {tensor_varname}" 36 | _assert_size(tensor, tensor_varname, ax, dim) 37 | 38 | -------------------------------------------------------------------------------- /mprod/tests/_base_tests.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from mprod import m_prod, x_m3, tensor_mtranspose 5 | from numpy.testing import ( 6 | assert_, assert_equal, assert_raises, assert_array_equal, 7 | assert_almost_equal, assert_allclose, suppress_warnings, 8 | assert_raises_regex, HAS_LAPACK64, 9 | ) 10 | 11 | 12 | def gen_m_transpose(mpair): 13 | mfun, minv = mpair 14 | 15 | def _do(a): 16 | return tensor_mtranspose(a, mfun, minv) 17 | 18 | return _do 19 | 20 | 21 | def gen_m_product(mpair): 22 | mfun, minv = mpair 23 | 24 | def _do(a, b): 25 | return m_prod(a, b, mfun, minv) 26 | 27 | return _do 28 | 29 | 30 | def assert_identity(J, tensor, mproduct): 31 | tensor2 = mproduct(J, tensor) 32 | assert_almost_equal(tensor, tensor2) 33 | 34 | 35 | def assert_m_orth(tensor, mfun, minv): 36 | m, p, n = tensor.shape 37 | 38 | _t = gen_m_transpose((mfun, minv)) 39 | _m = gen_m_product((mfun, minv)) 40 | 41 | if m <= p: 42 | J = _m(tensor, _t(tensor)) 43 | else: 44 | J = _m(_t(tensor), tensor) 45 | 46 | TENSOR_CASES = [] 47 | for mode2_size in range(1, 10, 100): 48 | for i in range(10): 49 | rng = np.random.default_rng(seed=i + int(np.log10(mode2_size))) 50 | TENSOR_CASES.append(rng.random((J.shape[1], mode2_size, n))) 51 | 52 | @pytest.mark.parametrize('tens', TENSOR_CASES) 53 | def _assert_id(tens): 54 | assert_identity(J, tens, _m) 55 | 56 | -------------------------------------------------------------------------------- /docs/examples/intro.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "raw", 5 | "metadata": { 6 | "raw_mimetype": "text/restructuredtext" 7 | }, 8 | "source": [ 9 | ".. _primer:\n", 10 | "\n", 11 | "Brief Intro\n", 12 | "-----------" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 5, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "# TODO" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [] 30 | } 31 | ], 32 | "metadata": { 33 | "celltoolbar": "Edit Metadata", 34 | "kernelspec": { 35 | "display_name": "mprod", 36 | "language": "python", 37 | "name": "mprod" 38 | }, 39 | "language_info": { 40 | "codemirror_mode": { 41 | "name": "ipython", 42 | "version": 3 43 | }, 44 | "file_extension": ".py", 45 | "mimetype": "text/x-python", 46 | "name": "python", 47 | "nbconvert_exporter": "python", 48 | "pygments_lexer": "ipython3", 49 | "version": "3.6.8" 50 | }, 51 | "toc": { 52 | "base_numbering": 1, 53 | "nav_menu": {}, 54 | "number_sections": true, 55 | "sideBar": true, 56 | "skip_h1_title": false, 57 | "title_cell": "Table of Contents", 58 | "title_sidebar": "Contents", 59 | "toc_cell": false, 60 | "toc_position": {}, 61 | "toc_section_display": true, 62 | "toc_window_display": false 63 | } 64 | }, 65 | "nbformat": 4, 66 | "nbformat_minor": 4 67 | } 68 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2021, UriaMorP 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /mprod/decompositions/tests/test_decompositions.py: -------------------------------------------------------------------------------- 1 | """ Test functions for decompositions module 2 | 3 | """ 4 | 5 | import pytest 6 | import numpy as np 7 | 8 | from itertools import product 9 | 10 | from mprod.decompositions import svdm, tqrm 11 | from mprod.tests._utils import (_make_mprod_op_cases, _make_tensor_cases, gen_m_product, gen_m_transpose, assert_m_orth) 12 | 13 | from numpy.testing import ( 14 | assert_, assert_equal, assert_raises, assert_array_equal, 15 | assert_almost_equal, assert_allclose, suppress_warnings, 16 | assert_raises_regex, HAS_LAPACK64, 17 | ) 18 | 19 | M_FUN_CASES = _make_mprod_op_cases() 20 | TENSOR_CASES = _make_tensor_cases() 21 | 22 | 23 | @pytest.mark.parametrize('tensor, m_pair', product(TENSOR_CASES, M_FUN_CASES)) 24 | def test_tsvdm(tensor, m_pair): 25 | mfun, minv = m_pair 26 | # _m = gen_m_product(m_pair) 27 | # _t = gen_m_transpose(m_pair) 28 | 29 | u, s, v = svdm(tensor, mfun, minv) 30 | m, p, n = tensor.shape 31 | rk = min(m, p) 32 | 33 | assert s.shape[0] == rk, f"expected shape[0] of s to be {rk}, got {s.shape[0]}" 34 | assert s.shape[1] == tensor.shape[-1], f"expected shape[1] of s to be {tensor.shape[-1]}, got {s.shape[1]}" 35 | 36 | 37 | # tensor2 = _m(_m(u, s), _t(v)) 38 | shat = mfun(s) 39 | us = mfun(u).transpose(2, 0, 1) * shat.T.reshape(n, 1, m) 40 | usv = np.matmul(us, mfun(v).transpose(2, 1, 0)) 41 | usv = usv.transpose(1, 2, 0) 42 | tensor2 = minv(usv) 43 | assert_almost_equal(tensor, tensor2) 44 | 45 | assert_m_orth(u, *m_pair) 46 | assert_m_orth(v, *m_pair) 47 | 48 | 49 | @pytest.mark.parametrize('tensor, m_pair', product(TENSOR_CASES, M_FUN_CASES)) 50 | def test_tqrm(tensor, m_pair): 51 | mfun, minv = m_pair 52 | 53 | _m = gen_m_product(m_pair) 54 | _t = gen_m_transpose(m_pair) 55 | 56 | Q, R = tqrm(tensor, mfun, minv) 57 | 58 | tensor2 = _m(Q, R) 59 | assert_almost_equal(tensor, tensor2) 60 | 61 | assert_m_orth(Q, *m_pair) 62 | -------------------------------------------------------------------------------- /mprod/decompositions/_qr.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import Tuple, Dict 3 | 4 | from mprod._base import NumpynDArray, MatrixTensorProduct 5 | 6 | 7 | def tqrm(tens_a: np.ndarray, fun_m: MatrixTensorProduct, inv_m: MatrixTensorProduct, hats: bool = False) \ 8 | -> Tuple[NumpynDArray, NumpynDArray]: 9 | """ 10 | The ``tqrm`` implements tensor-QR decomposition: 11 | ``Q,R = tqrm(tensor_a, m, inv_m)`` where ``Q`` is M-orthogonal tensor of shape ``(m,m,n)`` and ``R`` is f-upper 12 | triangular tensor of shape ``(m,p,n)`` 13 | 14 | Parameters 15 | ---------- 16 | tens_a: np.ndarray 17 | Tensor of shape ``(m,p,n)`` 18 | fun_m: MatrixTensorProduct 19 | Invertible mat-vec operation for transforming ``tens_a`` tube fibers 20 | inv_m: MatrixTensorProduct 21 | Invertible mat-vec operation for transforming ``tens_a`` tube fibers. This operation is the inverse of ``fun_m`` 22 | hats: bool 23 | Setting this to ``True`` will cause the function to return the tqrm factors in the tensor domain transform. 24 | 25 | Returns 26 | ------- 27 | tens_q: np.ndarray 28 | M-orthogonal tensor of shape ``(m,m,n)`` 29 | tens_r: np.ndarray 30 | f-upper triangular tensor of shape ``(m,p,n)`` 31 | 32 | """ 33 | 34 | m, p, n = tens_a.shape 35 | a_hat = fun_m(tens_a) 36 | 37 | q_hat = np.zeros((m, m, n)) 38 | r_hat = np.zeros((m, p, n)) 39 | k = 0 40 | 41 | for i in range(n): 42 | qq, rr = np.linalg.qr(a_hat[:, :, i]) 43 | 44 | qs1, qs2 = qq.shape 45 | rs1, rs2 = rr.shape 46 | 47 | q_hat[:qs1, :qs2, i] = np.copy(qq) 48 | r_hat[:rs1, :rs2, i] = np.copy(rr) 49 | 50 | k = max(k, max(qs2, rs1)) 51 | 52 | # truncate sizes 53 | q_hat = q_hat[:, :k, :] 54 | r_hat = r_hat[:k, :, :] 55 | 56 | if hats: 57 | return q_hat, r_hat 58 | 59 | tens_q = inv_m(q_hat) 60 | tens_r = inv_m(r_hat) 61 | 62 | return tens_q, tens_r 63 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | with open('README.md') as readme_file: 4 | readme = readme_file.read() 5 | 6 | configuration = { 7 | "author": 'Uria Mor, Rafael Valdes Mas, Yotam Cohen, Haim Avron', 8 | "author_email": "uriamo@gmail.com", 9 | "description": "Software implementation for tensor-tensor m-product framework", 10 | "long_description_content_type": 'text/markdown', 11 | "license": "BSD", 12 | "classifiers": [ # Optional 13 | 'Development Status :: 3 - Alpha', 14 | 'Intended Audience :: Science/Research', 15 | 'Programming Language :: Python :: 3.6', 16 | 'Programming Language :: Python :: 3.7', 17 | 'Programming Language :: Python :: 3.8', 18 | 'Programming Language :: Python :: 3.9', 19 | "Programming Language :: Python :: 3.10", 20 | 'Programming Language :: Python :: 3 :: Only', 21 | ], 22 | "keywords": ["Tensor", "multi way" 23 | , "omics", "longitudinal" 24 | , "factorization", "analysis" 25 | , "TCA", "TCAM", "PCA", "M product" 26 | , "tensor tensor product" 27 | , "tSVD", "tSVDM", "tensor decomposition"], 28 | "name": 'mprod-package', 29 | "version": '0.0.5a1', 30 | "packages": find_packages(), 31 | "long_description": readme, 32 | "url": "https://github.com/UriaMorP/mprod_package", 33 | "python_requires": '>=3.6.8', 34 | "install_requires": [ 35 | "numpy >= 1.19.2", 36 | "scikit-learn >= 0.24.1", 37 | "scipy >= 1.5.3", 38 | "dataclasses >= 0.7; python_version < '3.7'", 39 | "pandas >= 1.1.5" 40 | ], 41 | "extras_require": { 42 | "dev": ["pytest==6.2.2", ], 43 | "docs": [ 44 | "sphinx-gallery == 0.9.0", 45 | "numpydoc == 1.1.0", 46 | "sphinxcontrib-bibtex == 2.3.0", 47 | "sphinx-prompt == 1.4.0", 48 | "nbsphinx == 0.8.6", 49 | "ipykernel == 5.4.3", 50 | "seaborn == 0.11.1", 51 | "jupyter == 1.0.0", 52 | "myst-parser == 0.15.2", 53 | "m2r2 == 0.3.1", 54 | "livereload == 2.6.3", 55 | "pandoc == 2.0.1", 56 | ] 57 | } 58 | } 59 | 60 | setup(**configuration) 61 | -------------------------------------------------------------------------------- /mprod/tests/_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from mprod import m_prod, x_m3, tensor_mtranspose, generate_dct 5 | from numpy.testing import ( 6 | assert_, assert_equal, assert_raises, assert_array_equal, 7 | assert_almost_equal, assert_allclose, suppress_warnings, 8 | assert_raises_regex, HAS_LAPACK64, 9 | ) 10 | 11 | 12 | m, p, n = 100, 300, 10 13 | 14 | 15 | def _make_mprod_op_cases(): 16 | mprod_cases = [] 17 | for i in range(2): 18 | rng = np.random.default_rng(seed=i) 19 | mat = rng.random((n, n)) 20 | mat_m = np.linalg.qr(mat)[0] 21 | mfun = x_m3(mat_m) 22 | minv = x_m3(mat_m.T) 23 | mprod_cases.append((mfun,minv)) 24 | 25 | # add dct based transforms 26 | mfun, minv = generate_dct(n) 27 | mprod_cases.append((mfun,minv)) 28 | return mprod_cases 29 | 30 | 31 | def _make_tensor_cases(): 32 | tensor_cases = [] 33 | 34 | for i in range(2): 35 | rng = np.random.default_rng(seed=i) 36 | X = rng.random((m, p, n)) 37 | tensor_cases.append(X) 38 | return tensor_cases 39 | 40 | 41 | def gen_m_transpose(mpair): 42 | mfun, minv = mpair 43 | 44 | def _do(a): 45 | return tensor_mtranspose(a, mfun, minv) 46 | 47 | return _do 48 | 49 | 50 | def gen_m_product(mpair): 51 | mfun, minv = mpair 52 | 53 | def _do(a, b): 54 | return m_prod(a, b, mfun, minv) 55 | 56 | return _do 57 | 58 | 59 | def assert_identity(J, tensor, mproduct): 60 | tensor2 = mproduct(J, tensor) 61 | assert_almost_equal(tensor, tensor2) 62 | 63 | 64 | def assert_m_orth(tensor, mfun, minv): 65 | m, p, n = tensor.shape 66 | 67 | _t = gen_m_transpose((mfun, minv)) 68 | _m = gen_m_product((mfun, minv)) 69 | 70 | if m <= p: 71 | J = _m(tensor, _t(tensor)) 72 | else: 73 | J = _m(_t(tensor), tensor) 74 | 75 | TENSOR_CASES = [] 76 | for mode2_size in range(1, 10, 100): 77 | for i in range(2): 78 | rng = np.random.default_rng(seed=i + int(np.log10(mode2_size))) 79 | TENSOR_CASES.append(rng.random((J.shape[1], mode2_size, n))) 80 | 81 | @pytest.mark.parametrize('tens', TENSOR_CASES) 82 | def _assert_id(tens): 83 | assert_identity(J, tens, _m) 84 | -------------------------------------------------------------------------------- /azure-pipelines.yml: -------------------------------------------------------------------------------- 1 | # Python package 2 | # Create and test a Python package on multiple Python versions. 3 | # Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more: 4 | # https://docs.microsoft.com/azure/devops/pipelines/languages/python 5 | 6 | trigger: 7 | - main 8 | 9 | #jobs: 10 | # - job: Linux 11 | pool: 12 | vmImage: 'ubuntu-latest' 13 | strategy: 14 | matrix: 15 | Python36: 16 | python.version: '3.6' 17 | Python37: 18 | python.version: '3.7' 19 | Python38: 20 | python.version: '3.8' 21 | Python39: 22 | python.version: '3.9' 23 | Python310: 24 | python.version: '3.10' 25 | steps: 26 | - task: UsePythonVersion@0 27 | inputs: 28 | versionSpec: '$(python.version)' 29 | displayName: 'Use Python $(python.version)' 30 | 31 | - script: | 32 | python -m pip install --upgrade pip 33 | pip install -r requirements.txt 34 | displayName: 'Install dependencies' 35 | 36 | - script: | 37 | pip install -e . 38 | displayName: 'Install package' 39 | 40 | - script: | 41 | pip install pytest pytest-azurepipelines 42 | pytest --show-capture=no -v --disable-warnings --junitxml=pytest.xml 43 | displayName: 'pytest' 44 | 45 | - task: PublishTestResults@2 46 | inputs: 47 | testResultsFiles: 'pytest.xml' 48 | testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)' 49 | condition: succeededOrFailed() 50 | 51 | 52 | # - job: MacOS 53 | # pool: 54 | # vmImage: 'macOS-latest' 55 | # strategy: 56 | # matrix: 57 | # Python36: 58 | # python.version: '3.6' 59 | # Python37: 60 | # python.version: '3.7' 61 | # Python38: 62 | # python.version: '3.8' 63 | # Python39: 64 | # python.version: '3.9' 65 | # Python310: 66 | # python.version: '3.10' 67 | # steps: 68 | # - task: UsePythonVersion@0 69 | # inputs: 70 | # versionSpec: '$(python.version)' 71 | # displayName: 'Use Python $(python.version)' 72 | 73 | # - script: | 74 | # python -m pip install --upgrade pip 75 | # pip install -r requirements.txt 76 | # displayName: 'Install dependencies' 77 | # - script: | 78 | # pip install -e . 79 | # displayName: 'Install package' 80 | # - script: | 81 | # pip install pytest pytest-azurepipelines 82 | # pytest --show-capture=no -v --disable-warnings --junitxml=pytest.xml 83 | # displayName: 'pytest' 84 | # - task: PublishTestResults@2 85 | # inputs: 86 | # testResultsFiles: 'pytest.xml' 87 | # testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)' 88 | # condition: succeededOrFailed() 89 | -------------------------------------------------------------------------------- /docs/examples/examples.rst: -------------------------------------------------------------------------------- 1 | .. _tutorials: 2 | 3 | ========= 4 | Tutorials 5 | ========= 6 | 7 | .. rubric:: Scope and intention 8 | 9 | This page presents a collection of tutorial written by the authors of mprod package 10 | and intended to help newcomers in incorporating the machinery offered by the library 11 | in their analysis workflows. 12 | 13 | The main (and only) data-scientific tool currently implemented is the TCAM 14 | dimensionality reduction algorithm :footcite:p:`mor2021`. We intend to keep expanding the 15 | package content by adding :math:`\star_{\mathbf{M}}`-product based tools 16 | (such as tensor-PLS, tensor-CCA), and we encourage any form of collaboration, 17 | hoping to get good responses, feedback and help from the data-science community. 18 | 19 | .. rubric:: Target audience 20 | 21 | We do not expect expertise in Machine Learning, or data science, in order to use this package. 22 | In fact, it is aimed at non-experts 23 | 24 | That said, the library is not - by any means - meant to serve as a **black magic tensor package for dummies**. 25 | Just like with almost everything in machine-learning, using this library for ML related tasks require **some** general 26 | mathematical understanding of ML concepts. 27 | The implementation of dimensionality reduction methods (currently TCAM), is made consistent with 28 | `scikit-learn `_ library to the maximum possible extent, in order to enable smooth 29 | integration within the pythonic ML ecosystem. 30 | For this reason, the users are assumed to know the `scikit-learn `_ library. 31 | Scikit-learn package offers fantastic documentation, tutorials and examples that are more than enough in order to get 32 | started with machine learning in no time. 33 | 34 | .. note:: 35 | 36 | We acknowledge that many potential users might find R more familiar. 37 | However, we urge them to take the time and try the alternative. 38 | 39 | In addition, deep understanding of the mathematical theory underlying mprod based tensor algorithms is always a good 40 | idea. Bellow, you can find a short :ref:`Primer` section about the idea behind tensor-tensor algebra via the 41 | :math:`\star_{\bf{M}}` -product framework (For a thorough introduction, we refer the interested readers to 42 | :footcite:p:`Kilmer`) 43 | 44 | The :ref:`TCAM` section contains tutorials for working with :class:`mprod.dimensionality_reduction.TCAM`. 45 | For construction and showcase of TCAM refer to :footcite:p:`mor2021` 46 | 47 | 48 | -------------------------------- 49 | 50 | 51 | .. _TCAM: 52 | 53 | ---- 54 | TCAM 55 | ---- 56 | .. toctree:: 57 | :maxdepth: 8 58 | 59 | basic_example 60 | supervised_learning 61 | 62 | .. Schirmer2018 63 | 64 | 65 | 66 | 67 | .. _Primer: 68 | 69 | ------------ 70 | ⚙ Background 71 | ------------ 72 | .. toctree:: 73 | :maxdepth: 4 74 | 75 | mprod_primer 76 | 77 | 78 | .. footbibliography:: -------------------------------------------------------------------------------- /mprod/dimensionality_reduction/tests/test_TCAM.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from itertools import product 5 | 6 | from mprod.dimensionality_reduction import TCAM 7 | from mprod import MeanDeviationForm 8 | from mprod.tests._utils import (_make_mprod_op_cases, _make_tensor_cases, gen_m_product, gen_m_transpose, assert_m_orth, 9 | m, n, p) 10 | 11 | from numpy.testing import ( 12 | assert_, assert_equal, assert_raises, assert_array_equal, 13 | assert_almost_equal, assert_allclose, suppress_warnings, 14 | assert_raises_regex, HAS_LAPACK64, 15 | ) 16 | 17 | M_FUN_CASES = _make_mprod_op_cases()[:1] 18 | TENSOR_CASES = _make_tensor_cases()[:1] 19 | 20 | @pytest.mark.parametrize('X', TENSOR_CASES) 21 | @pytest.mark.parametrize('n_components', np.linspace(1, min(m, p) * n - 1, 3, dtype=int)) 22 | @pytest.mark.parametrize('mpair', M_FUN_CASES + [None]) 23 | def test_tcam_fit_transform(X, n_components, mpair): 24 | print(min(m, p) * n - 1) 25 | if mpair is None: 26 | tca = TCAM(n_components=n_components) 27 | else: 28 | mfun, minv = mpair 29 | tca = TCAM(fun_m=mfun, inv_m=minv, n_components=n_components) 30 | 31 | X_r = tca.fit(X).transform(X) 32 | assert X_r.shape[1] == n_components 33 | 34 | # check the equivalence of fit.transform and fit_transform 35 | X_r2 = tca.fit_transform(X) 36 | assert_allclose(X_r, X_r2) 37 | # X_r = tca.transform(X) 38 | assert_allclose(X_r, X_r2) 39 | 40 | 41 | @pytest.mark.parametrize('X', TENSOR_CASES) 42 | @pytest.mark.parametrize('n_components', np.linspace(.1, 1., 3, dtype=float)) 43 | @pytest.mark.parametrize('mpair', M_FUN_CASES + [None]) 44 | def test_tcam_reconstruction_err(X, n_components, mpair): 45 | print(min(m, p) * n - 1) 46 | if mpair is None: 47 | tca = TCAM(n_components=n_components) 48 | 49 | else: 50 | mfun, minv = mpair 51 | tca = TCAM(fun_m=mfun, inv_m=minv, n_components=n_components) 52 | # check the shape of fit.transform 53 | Y = tca.fit(X).transform(X) 54 | X2 = tca.inverse_transform(Y) 55 | 56 | assert np.round(1 - ((X2 - X) ** 2).sum() / (X ** 2).sum(), 20) >= n_components 57 | 58 | 59 | @pytest.mark.parametrize('X', TENSOR_CASES) 60 | @pytest.mark.parametrize('n_components', range(1, min(m, p) * n - 1, 200)) 61 | @pytest.mark.parametrize('mpair', M_FUN_CASES + [None]) 62 | def test_tcam_residue_m_orth(X, n_components, mpair): 63 | print(min(m, p) * n - 1) 64 | if mpair is None: 65 | tca = TCAM(n_components=n_components) 66 | 67 | else: 68 | mfun, minv = mpair 69 | tca = TCAM(fun_m=mfun, inv_m=minv, n_components=n_components) 70 | 71 | Y = tca.fit(X).transform(X) 72 | X2 = tca.inverse_transform(Y) 73 | _t = gen_m_transpose((tca.fun_m, tca.inv_m)) 74 | 75 | res_prod_norm = (tca._mprod(_t(X - X2), X2) ** 2).sum() 76 | assert_almost_equal(res_prod_norm, 0, err_msg=f"got {res_prod_norm} instead of 0", verbose=True, ) 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | import sphinx_gallery 16 | sys.path.insert(0, os.path.abspath('.')) 17 | sys.path.insert(0, os.path.abspath('../')) 18 | import sphinx_rtd_theme 19 | 20 | # -- Project information ----------------------------------------------------- 21 | 22 | project = 'mprod' 23 | copyright = '2021, Elinav&Avron groups' 24 | author = 'Uria Mor' 25 | 26 | # -- General configuration --------------------------------------------------- 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 30 | # ones. 31 | # extensions = [ 32 | # 'sphinx.ext.autodoc', 33 | # 'sphinx.ext.viewcode', 34 | # 'sphinx.ext.todo', 35 | # 'sphinx.ext.autodoc', 36 | # 'sphinx.ext.imgmath', 37 | # 'sphinx.ext.napoleon' 38 | # ] 39 | 40 | extensions = [ 41 | "sphinx.ext.autodoc", 42 | "sphinx.ext.autosummary", 43 | "numpydoc", 44 | 'sphinx.ext.viewcode', 45 | # "sphinx.ext.linkcode", 46 | "sphinx.ext.doctest", 47 | "sphinx.ext.intersphinx", 48 | "sphinx.ext.mathjax", 49 | "sphinxcontrib.bibtex", 50 | # 'sphinx.ext.imgmath', 51 | # "sphinx.ext.imgconverter", 52 | # "sphinx_gallery.gen_gallery", 53 | "sphinx-prompt", 54 | 'sphinx.ext.napoleon', 55 | "nbsphinx", 56 | # "myst_parser", 57 | 'm2r2', 58 | ] 59 | 60 | bibtex_bibfiles = ['refs.bib'] 61 | 62 | # The suffix(es) of source filenames. 63 | # You can specify multiple suffix as a list of string: 64 | # 65 | # source_suffix = ['.rst', '.md'] 66 | source_suffix = [".rst", ".ipynb", ".md"] 67 | 68 | # do not execute cells 69 | nbsphinx_execute = "always" 70 | nbsphinx_kernel_name = 'python3' 71 | # nbsphinx_execute = "never" 72 | 73 | 74 | # allow errors because not all tutorials build 75 | nbsphinx_allow_errors = True 76 | 77 | # napoleon related 78 | 79 | napoleon_google_docstring = False 80 | napoleon_use_param = False 81 | napoleon_use_ivar = True 82 | 83 | # Add any paths that contain templates here, relative to this directory. 84 | templates_path = ['_templates'] 85 | 86 | # generate autosummary even if no references 87 | autosummary_generate = False 88 | 89 | # The language for content autogenerated by Sphinx. Refer to documentation 90 | # for a list of supported languages. 91 | # 92 | # This is also used if you do content translation via gettext catalogs. 93 | # Usually you set "language" from the command line for these cases. 94 | language = 'en' 95 | 96 | # List of patterns, relative to source directory, that match files and 97 | # directories to ignore when looking for source files. 98 | # This pattern also affects html_static_path and html_extra_path. 99 | exclude_patterns = ['_build', 100 | 'Thumbs.db', 101 | '.DS_Store', 102 | 'trashed_docs', 103 | '.ipynb_checkpoints', 104 | "examples/.ipynb_checkpoints"] 105 | 106 | autodoc_default_options = {"members": True, "inherited-members": False, "methods": True} 107 | 108 | # -- Options for HTML output ------------------------------------------------- 109 | 110 | # The theme to use for HTML and HTML Help pages. See the documentation for 111 | # a list of builtin themes. 112 | # 113 | # html_theme = 'alabaster' 114 | html_theme = 'sphinx_rtd_theme' 115 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 116 | # html_theme_options = {"logo_only": True} 117 | # html_logo = "_static/img/mprod_logo_small.png" 118 | # html_favicon = "_static/img/mprod_logo_fav.png" 119 | 120 | # Add any paths that contain custom static files (such as style sheets) here, 121 | # relative to this directory. They are copied after the builtin static files, 122 | # so a file named "default.css" will overwrite the builtin "default.css". 123 | html_static_path = ['_static'] 124 | 125 | # -- Extension configuration ------------------------------------------------- 126 | 127 | # -- Options for todo extension ---------------------------------------------- 128 | 129 | # If true, `todo` and `todoList` produce output, else they produce nothing. 130 | todo_include_todos = False 131 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. mprod documentation master file, created by 2 | sphinx-quickstart on Sun Aug 1 10:11:11 2021. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | .. 7 | _.. figure:: _static/img/mprod_logo_fav.png 8 | 9 | 10 | =================================================== 11 | :code:`mprod`\: Tensor - Tensor algebraic framework 12 | =================================================== 13 | 14 | mprod is a software implementation for tensor-tensor algebraic framework derived from the 15 | :math:`\star_{\bf{M}}`-product :footcite:p:`Kilmer`. 16 | The package builds on NumPy\ :footcite:p:`Harris2020` and Scipy\ :footcite:p:`Virtanen2020` libraries to realize 17 | core operations and components required for the algebraic framework. 18 | 19 | 20 | :mod:`mprod-package` implements the fundamental components required for the :math:`\star_{\mathbf{M}}`-product algebraic 21 | framework; tensor-transpose, tensor-matrix multiplication (domain transforms), face-wise tensor multiplication, and, of 22 | course, the :math:`\star_{\mathbf{M}}` tensor-tensor product (See intro) 23 | 24 | In addition, the library offers several basic tensor factorizations such as :mod:`mprod.decompostions.tsvdm` 25 | :footcite:p:`Kilmer` , and :math:`\star_{\mathbf{M}}`-product based dimensionality reduction methods like the 26 | :mod:`mprod.dimensionality_reduction.TCAM` :footcite:p:`mor2021` 27 | 28 | 29 | .. figure:: _static/img/mprod_tcam_cartoon.png 30 | :alt: TCAM cartoon 31 | :class: with-shadow 32 | :width: 90% 33 | :align: center 34 | 35 | An introductory cartoon for the TCAM :footcite:p:`mor2021` - an :math:`\star_{\mathbf{M}}`-product based 36 | dimensionality reduction method for multi-way data. 37 | 38 | You can find the software `on github `_. 39 | 40 | 41 | 42 | --------------------------------------------------------- 43 | 44 | 45 | **Installation** 46 | ================ 47 | 48 | Conda install, with the great help of the conda-forge team: 49 | 50 | .. code:: bash 51 | 52 | conda install -c conda-forge mprod-package 53 | 54 | The conda-forge packages are available for Linux, OS X, and Windows 64 bit. Local testing was done only on Linux. 55 | 56 | PyPI install, presuming you have requirements installed (numpy, scipy, pandas, scikit-learn) installed: 57 | 58 | .. code:: bash 59 | 60 | pip install mprod-package 61 | 62 | ------------------------------------------------------------- 63 | 64 | Scientific context 65 | ------------------ 66 | 67 | *We live in a multi-dimensional world, immersed in huge volumes of data. This data often involves complex interlinked 68 | structures that span across multiple dimensions. Processes and phenomena also exhibit multi-dimensional behavior, 69 | requiring their models to operate in high dimensional settings*\ . 70 | 71 | *Typically, we use matrix algebra to manipulate data, in so-called vector embedded spaces. But such representations 72 | usually don’t take into account the underlying integrity of an object’s dimension, either missing out on high-order 73 | links that go beyond pairwise relations or requiring an overhead in encoding such relations. This is where tensor 74 | algebra comes into play, addressing multiple dimensions*\ . 75 | 76 | *But there is a problem. Despite a broad consensus, distilled over centuries of mathematical research, for matrix 77 | algebra, there is no such standard for its multidimensional counterpart, tensor algebra. There have been several 78 | propositions for tensor algebra frameworks over the years* :footcite:p:`Kolda2009`. *Existing techniques that decompose 79 | tensor constructs into simpler tangible entities have limitations and inconsistencies compared to matrix algebra* 80 | :footcite:p:`Hitchcock1927,DeLathauwer2000,Oseledets2011,Tuck1963a`. *These issues have been hindering broad 81 | adoption of tensor algebra into mainstream use*\ . 82 | 83 | **The tensor-tensor** :math:`\star_{\bf{M}}`\ **-product framework aims to change that**\ . 84 | 85 | *The paper* “**Tensor-Tensor Algebra for Optimal Representationand Compression of Multiway Data**” 86 | :footcite:p:`Kilmer` *describes a way to bridge the gap between matrix and tensor algebra, resulting in new algebraic 87 | constructs that natively represent and manipulate high-dimensional entities, while preserving their multi-order 88 | integrity*\ . 89 | 90 | -- \ **Lior Horesh, IBM research** :footcite:p:`LHoresh` 91 | 92 | ------------------------- 93 | 94 | 95 | .. toctree:: 96 | :caption: Contents 97 | 98 | examples/examples 99 | modules/classes 100 | 101 | ------------------------- 102 | 103 | Indices and tables 104 | ================== 105 | 106 | * :ref:`genindex` 107 | * :ref:`modindex` 108 | * :ref:`search` 109 | 110 | ---------------------- 111 | 112 | .. footbibliography:: 113 | -------------------------------------------------------------------------------- /mprod/decompositions/_tsvdm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import Tuple, Dict 3 | 4 | from mprod._base import NumpynDArray, MatrixTensorProduct 5 | 6 | 7 | def svdm(tens_a: np.ndarray, fun_m: MatrixTensorProduct, inv_m: MatrixTensorProduct 8 | , hats: bool = False) \ 9 | -> Tuple[NumpynDArray, NumpynDArray, NumpynDArray]: 10 | """ 11 | The svdm function is a helper function for computing the tsvdmII. 12 | This function does the **THIN** tsvdm: 13 | ``u,s,b = tsvdm(tensor_a, m, inv_m)`` where ``u,v`` are 14 | ``(m,k,n)`` and ``(p,k,n)`` M-orthogonal tensors and ``s`` 15 | is an f-diagonal tensor of shape ``(k,k,n)`` and ``k=min(p,m)`` 16 | 17 | Parameters 18 | ---------- 19 | tens_a: np.ndarray 20 | Tensor of shape ``(m,p,n)`` 21 | fun_m: MatrixTensorProduct 22 | Invertible mat-vec operation for transforming ``tens_a`` tube fibers 23 | inv_m: MatrixTensorProduct 24 | Invertible mat-vec operation for transforming ``tens_a`` tube fibers. 25 | This operation is the inverse of ``fun_m`` 26 | hats: bool 27 | Setting this to ``True`` will cause the function to return the tsvdm 28 | factors in the tensor domain transform. 29 | 30 | Returns 31 | ------- 32 | tens_u: np.ndarray 33 | M-orthogonal tensor of shape ``(m,k,n)`` 34 | tens_s: np.ndarray 35 | A ``(k,n)`` matrix representation of the f-diagonal tensor of 36 | shape ``(k,k,n)`` 37 | tens_v: np.ndarray 38 | M-orthogonal Tensor of shape ``(p,k,n)`` 39 | 40 | """ 41 | m, p, n = tens_a.shape 42 | a_hat = fun_m(tens_a) 43 | 44 | # The code bellow is a super efficient numpy trick for performing the following 45 | # 46 | # u_hat = np.zeros((m, m, n)) 47 | # s_hat = np.zeros((m, p, n)) 48 | # v_hat = np.zeros((p, p, n)) 49 | # 50 | # for i in range(n): 51 | # uu, ss, vt = np.linalg.svd(a_hat[:, :, i], full_matrices=False) 52 | # 53 | # us1, us2 = uu.shape 54 | # vs1, vs2 = vt.shape 55 | # 56 | # ssize = ss.size 57 | # s_hat[:ssize, :ssize, i] = np.diag(ss) 58 | # u_hat[:us1, :us2, i] = uu.copy() 59 | # v_hat[:vs2, :vs1, i] = vt.T.copy() 60 | 61 | u_hat, s_hat, v_hat = np.linalg.svd(a_hat.transpose(2, 0, 1), full_matrices=False) 62 | u_hat, s_hat, v_hat = u_hat.transpose(1, 2, 0), s_hat.transpose(), v_hat.transpose(2, 1, 0) 63 | 64 | # sreshape = s_hat.copy().reshape(1, m, n) 65 | # sreshape = sreshape.transpose(1, 0, 2) 66 | # idreshape = np.eye(m, p).reshape(m, p, 1) 67 | 68 | # s_hat = idreshape @ sreshape 69 | 70 | if hats: 71 | return u_hat, s_hat, v_hat 72 | 73 | u = inv_m(u_hat) 74 | v = inv_m(v_hat) 75 | s = inv_m(s_hat) 76 | 77 | return u, s, v 78 | 79 | 80 | def tsvdmii(tens_a: NumpynDArray, 81 | fun_m: MatrixTensorProduct, 82 | inv_m: MatrixTensorProduct, 83 | gamma: float = 1, 84 | n_components: int = None) -> \ 85 | Tuple[Dict[int, NumpynDArray], Dict[int, NumpynDArray], Dict[int, NumpynDArray], float, Dict[int, int], int]: 86 | assert not ((gamma is not None) and ( 87 | n_components is not None)), "Arguments gamma and n_components are mutually exclusive" 88 | assert (gamma is not None) or ( 89 | n_components is not None), "Exactely one of arguments gamma, n_components must be defined" 90 | 91 | m, p, n = tens_a.shape 92 | 93 | # execute full decomposition 94 | u_hat, s_hat, v_hat = svdm(tens_a, fun_m, inv_m, hats=True) 95 | 96 | # compute variation in the decomposition 97 | # var is the sorted (hat) squared singular values 98 | # cumm_var is scre 99 | # w_idx is an array of indices for `cumm_var` and `var` 100 | # total_var is the (float) sum of squared singular values `var` 101 | var = np.concatenate([np.diagonal(s_hat[:, :, i]) for i in range(n)]) ** 2 102 | var = np.sort(var.reshape(-1))[::-1] 103 | cumm_var = var.cumsum(axis=0) 104 | w_idx = np.arange(0, cumm_var.size, dtype=int) 105 | total_variance = var.sum() 106 | 107 | # Find truncation threshold according to 108 | if gamma is not None: 109 | reduced_ind = w_idx[(cumm_var / total_variance) > gamma] 110 | if reduced_ind.size == 0: 111 | j = 0 112 | else: 113 | j = reduced_ind.min() 114 | else: 115 | j = n_components 116 | 117 | tau = np.sqrt(var[j - 1]) 118 | rho = {} 119 | 120 | u_hat_rho_dict = {} 121 | s_hat_rho_dict = {} 122 | v_hat_rho_dict = {} 123 | 124 | max_rho = 0 125 | r = 0 126 | for i in range(n): 127 | diag_shat_i = np.diagonal(s_hat[:, :, i]) 128 | tau_mask = (diag_shat_i >= tau) 129 | rho_i = tau_mask.sum() 130 | if rho_i > 0: 131 | u_hat_rho_dict[i] = u_hat[:, :rho_i, i].copy() 132 | s_hat_rho_dict[i] = s_hat[:rho_i, :rho_i, i].copy() 133 | v_hat_rho_dict[i] = v_hat[:, :rho_i, i].copy() 134 | rho[i] = rho_i 135 | 136 | if rho_i > max_rho: 137 | max_rho = rho_i 138 | r += rho_i 139 | 140 | if n_components is not None: 141 | assert r == n_components, f"expected multirank {n_components} got {r}" 142 | 143 | return u_hat_rho_dict, s_hat_rho_dict, v_hat_rho_dict, total_variance, rho, r 144 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mprod_package 2 | 3 | [![Build and test](https://github.com/UriaMorP/mprod_package/actions/workflows/build.yaml/badge.svg)](https://github.com/UriaMorP/mprod_package/actions/workflows/build.yaml) 4 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/mprod-package) 5 | [![Documentation Status](https://readthedocs.org/projects/mprod-package/badge/?version=latest)](https://mprod-package.readthedocs.io/en/latest/?badge=latest) 6 | ![Conda](https://img.shields.io/conda/dn/conda-forge/mprod-package?label=downloads%28conda-forge%29) 7 | [![Conda Version](https://img.shields.io/conda/vn/conda-forge/mprod-package.svg)](https://anaconda.org/conda-forge/mprod-package) 8 | [![Pypi Downloads](https://img.shields.io/pypi/dm/mprod-package.svg?label=Pypi%20downloads)]( 9 | https://pypi.org/project/mprod-package/) 10 | 11 | 12 | Software implementation for tensor-tensor m-product framework [[1]](#1). 13 | The library currently contains tubal QR and tSVDM decompositions, and the TCAM method for dimensionality reduction. 14 | 15 | 16 |

17 | 18 |

19 | 20 | ## Installation 21 | 22 | ### Conda 23 | The `mprod-package` is hosted in [conda-forge](https://conda-forge.org/) channel. 24 | 25 | ``` 26 | conda install -c conda-forge mprod-package 27 | ``` 28 | 29 | ### pip 30 | ``` 31 | pip install mprod-package 32 | ``` 33 | See `mprod-package`s [pypi entry](https://pypi.org/project/mprod-package/) 34 | 35 | ### From source 36 | 37 | * Make sure that all dependencies listed in `requirements.txt` file are installed . 38 | * Clone the repository, then from the package directory, run 39 | ``` 40 | pip install -e . 41 | ``` 42 | 43 | The dependencies in `requirements.txt` are stated with exact versions used for locally test `mprod-package`, these packages were obtained from conda-forge channel. 44 | 45 | ```python 46 | import pandas as pd 47 | 48 | file_path = "https://raw.githubusercontent.com/UriaMorP/" \ 49 | "tcam_analysis_notebooks/main/Schirmer2018/Schirmer2018.tsv" 50 | 51 | data_table = pd.read_csv(file_path, index_col=[0,1], sep="\t" 52 | , dtype={'Week':int}) 53 | data_table = data_table.loc[:,data_table.median() > 1e-7] 54 | data_table.rename(columns= {k:f"Fature_{e+1}" for e,k in enumerate(data_table.columns)}, inplace=True) 55 | data_table.shape 56 | 57 | %matplotlib inline 58 | ``` 59 | 60 | ## How to use `TCAM` 61 | 62 | Given with a `pandas.DataFrame` of the data as below, with 2-level index, where the first level as subject identifier (mouse, human, image) and the second level of the index denotes sample repetition identity, in this case - the week during experiment, in which the sample was collected. 63 | 64 | 65 | ```python 66 | display(data_table.iloc[:2,:2].round(3)) 67 | 68 | ``` 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 |
Fature_1Fature_2
SubjectIDWeek
P_1034300.0010.023
40.0200.000
100 | 101 | 102 | ### Shape the data into tensor 103 | 104 | We use the `table2tensor` helper function to transform a 2-level (multi)-indexed `pandas.DataFrame` into a 3rd order tensor. 105 | 106 | 107 | ```python 108 | from mprod import table2tensor 109 | data_tensor, map1, map3 = table2tensor(data_table) 110 | ``` 111 | 112 | To inspect `table2tensor` operation, we use the resulting *\"mode mappings\"*; `map1` and `map3` associating each line in the input table to it's coordinates in the resulting tensor. 113 | In the following example, we use the mappings to extract the tensor coordinates corresponding to subject P\_7218's sample from week 52 114 | 115 | 116 | ```python 117 | (data_tensor[map1['P_7218'],:, map3[52]] == data_table.loc[('P_7218',52)].values).all() # True 118 | ``` 119 | 120 | ### Applying `TCAM` 121 | 122 | ```python 123 | from mprod.dimensionality_reduction import TCAM 124 | 125 | tca = TCAM() 126 | tca_trans = tca.fit_transform(data_tensor) 127 | ``` 128 | 129 | And that's all there is to it... Really! 130 | 131 | Note how similar the code above to what we would have written if we were to apply scikit-lean's `PCA` to the initial tabular data: 132 | 133 | 134 | ```python 135 | from sklearn.decomposition import PCA 136 | 137 | pca = PCA() 138 | pca_trans = pca.fit_transform(data_table) 139 | ``` 140 | 141 | The similarity between `TCAM`s interface to that of scikit-learn's `PCA` is not coincidental. 142 | We did our best in order to make `TCAM` as familiar as possible, and allow for high compatibility of `TCAM` with the existing Python ML framework. 143 | 144 | ### Accessing properties of the transformation 145 | 146 | 147 | ```python 148 | tca_loadings = tca.mode2_loadings # Obtain TCAM loadings 149 | pca_loadings = pca.components_ # Obtain PCA loadings 150 | 151 | tca_var = tca.explained_variance_ratio_*100 # % explained variation per TCA factor 152 | pca_var = pca.explained_variance_ratio_*100 # % explained variation per TCA factor 153 | 154 | tca_df = pd.DataFrame(tca_trans) # Cast TCA scores to dataframe 155 | tca_df.rename(index = dict(map(reversed, map1.items())) 156 | , inplace = True) # use the inverse of map1 to denote each row 157 | # of the TCAM scores with it's subject ID 158 | 159 | pca_df = pd.DataFrame(pca_trans) # Cast PCA scores to dataframe 160 | pca_df.index = data_table.index # anotate PC scores with sample names 161 | ``` 162 | 163 | 164 | 165 | 166 | 167 | ## References 168 | [1] 169 | Misha E. Kilmer, Lior Horesh, Haim Avron, and Elizabeth Newman. Tensor-tensor algebra for optimal representation and compression of multiway data. Proceedings of the National Academy of Sciences, 118(28):e2015851118, jul 2021. 170 | -------------------------------------------------------------------------------- /mprod/_pytester.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pytest test running. 3 | 4 | This module implements the ``test()`` function for modules. The usual 5 | boiler plate for doing that is to put the following in the module 6 | ``__init__.py`` file:: 7 | 8 | from mprod._pytesttester import PytestTester 9 | test = PytestTester(__name__) 10 | del PytestTester 11 | 12 | 13 | Warnings filtering and other runtime settings should be dealt with in the 14 | ``pytest.ini`` file in the numpy repo root. The behavior of the test depends on 15 | whether or not that file is found as follows: 16 | 17 | * ``pytest.ini`` is present (develop mode) 18 | All warnings except those explicitly filtered out are raised as error. 19 | * ``pytest.ini`` is absent (release mode) 20 | DeprecationWarnings and PendingDeprecationWarnings are ignored, other 21 | warnings are passed through. 22 | 23 | In practice, tests run from the numpy repo are run in develop mode. That 24 | includes the standard ``python runtests.py`` invocation. 25 | 26 | This module is imported by every numpy subpackage, so lies at the top level to 27 | simplify circular import issues. For the same reason, it contains no numpy 28 | imports at module scope, instead importing numpy within function calls. 29 | """ 30 | import sys 31 | import os 32 | 33 | __all__ = ['PytestTester'] 34 | 35 | # def _show_numpy_info(): 36 | # import numpy as np 37 | # 38 | # print("NumPy version %s" % np.__version__) 39 | # relaxed_strides = np.ones((10, 1), order="C").flags.f_contiguous 40 | # print("NumPy relaxed strides checking option:", relaxed_strides) 41 | # info = np.lib.utils._opt_info() 42 | # print("NumPy CPU features: ", (info if info else 'nothing enabled')) 43 | 44 | 45 | class PytestTester: 46 | """ 47 | Pytest test runner. 48 | 49 | A test function is typically added to a package's __init__.py like so:: 50 | 51 | from numpy._pytesttester import PytestTester 52 | test = PytestTester(__name__).test 53 | del PytestTester 54 | 55 | Calling this test function finds and runs all tests associated with the 56 | module and all its sub-modules. 57 | 58 | Attributes 59 | ---------- 60 | module_name : str 61 | Full path to the package to test. 62 | 63 | Parameters 64 | ---------- 65 | module_name : module name 66 | The name of the module to test. 67 | 68 | Notes 69 | ----- 70 | Unlike the previous ``nose``-based implementation, this class is not 71 | publicly exposed as it performs some ``numpy``-specific warning 72 | suppression. 73 | 74 | """ 75 | 76 | def __init__(self, module_name): 77 | self.module_name = module_name 78 | 79 | def __call__(self, label='fast', verbose=1, extra_argv=None, 80 | doctests=False, coverage=False, durations=-1, tests=None): 81 | """ 82 | Run tests for module using pytest. 83 | 84 | Parameters 85 | ---------- 86 | label : {'fast', 'full'}, optional 87 | Identifies the tests to run. When set to 'fast', tests decorated 88 | with `pytest.mark.slow` are skipped, when 'full', the slow marker 89 | is ignored. 90 | verbose : int, optional 91 | Verbosity value for test outputs, in the range 1-3. Default is 1. 92 | extra_argv : list, optional 93 | List with any extra arguments to pass to pytests. 94 | doctests : bool, optional 95 | .. note:: Not supported 96 | coverage : bool, optional 97 | If True, report coverage of NumPy code. Default is False. 98 | Requires installation of (pip) pytest-cov. 99 | durations : int, optional 100 | If < 0, do nothing, If 0, report time of all tests, if > 0, 101 | report the time of the slowest `timer` tests. Default is -1. 102 | tests : test or list of tests 103 | Tests to be executed with pytest '--pyargs' 104 | 105 | Returns 106 | ------- 107 | result : bool 108 | Return True on success, false otherwise. 109 | 110 | Notes 111 | ----- 112 | Each module exposes `test` in its namespace to run all tests for 113 | it. For example, to run all tests for mprod.lib: 114 | 115 | 116 | 117 | Examples 118 | -------- 119 | >>> result = mprod.lib.test() #doctest: +SKIP 120 | ... 121 | 1023 passed, 2 skipped, 6 deselected, 1 xfailed in 10.39 seconds 122 | >>> result 123 | True 124 | 125 | """ 126 | import pytest 127 | import warnings 128 | 129 | module = sys.modules[self.module_name] 130 | module_path = os.path.abspath(module.__path__[0]) 131 | 132 | # setup the pytest arguments 133 | pytest_args = ["-l"] 134 | 135 | # offset verbosity. The "-q" cancels a "-v". 136 | pytest_args += ["-q"] 137 | 138 | # Filter out distutils cpu warnings (could be localized to 139 | # distutils tests). ASV has problems with top level import, 140 | # so fetch module for suppression here. 141 | with warnings.catch_warnings(): 142 | warnings.simplefilter("always") 143 | from numpy.distutils import cpuinfo 144 | 145 | # Filter out annoying import messages. Want these in both develop and 146 | # release mode. 147 | pytest_args += [ 148 | "-W ignore:Not importing directory", 149 | "-W ignore:numpy.dtype size changed", 150 | "-W ignore:numpy.ufunc size changed", 151 | "-W ignore::UserWarning:cpuinfo", 152 | ] 153 | 154 | # When testing matrices, ignore their PendingDeprecationWarnings 155 | pytest_args += [ 156 | "-W ignore:the matrix subclass is not", 157 | "-W ignore:Importing from numpy.matlib is", 158 | ] 159 | 160 | if doctests: 161 | raise ValueError("Doctests not supported") 162 | 163 | if extra_argv: 164 | pytest_args += list(extra_argv) 165 | 166 | if verbose > 1: 167 | pytest_args += ["-" + "v" * (verbose - 1)] 168 | 169 | if coverage: 170 | pytest_args += ["--cov=" + module_path] 171 | 172 | if label == "fast": 173 | # not importing at the top level to avoid circular import of module 174 | from numpy.testing import IS_PYPY 175 | if IS_PYPY: 176 | pytest_args += ["-m", "not slow and not slow_pypy"] 177 | else: 178 | pytest_args += ["-m", "not slow"] 179 | 180 | elif label != "full": 181 | pytest_args += ["-m", label] 182 | 183 | if durations >= 0: 184 | pytest_args += ["--durations=%s" % durations] 185 | 186 | if tests is None: 187 | tests = [self.module_name] 188 | 189 | pytest_args += ["--pyargs"] + list(tests) 190 | 191 | # # run tests. 192 | # _show_numpy_info() 193 | 194 | try: 195 | code = pytest.main(pytest_args) 196 | except SystemExit as exc: 197 | code = exc.code 198 | 199 | return code == 0 200 | -------------------------------------------------------------------------------- /mprod/_pytesttester.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pytest test running. 3 | 4 | This module implements the ``test()`` function for modules. The usual 5 | boiler plate for doing that is to put the following in the module 6 | ``__init__.py`` file:: 7 | 8 | from mprod._pytesttester import PytestTester 9 | test = PytestTester(__name__) 10 | del PytestTester 11 | 12 | 13 | Warnings filtering and other runtime settings should be dealt with in the 14 | ``pytest.ini`` file in the numpy repo root. The behavior of the test depends on 15 | whether or not that file is found as follows: 16 | 17 | * ``pytest.ini`` is present (develop mode) 18 | All warnings except those explicitly filtered out are raised as error. 19 | * ``pytest.ini`` is absent (release mode) 20 | DeprecationWarnings and PendingDeprecationWarnings are ignored, other 21 | warnings are passed through. 22 | 23 | In practice, tests run from the numpy repo are run in develop mode. That 24 | includes the standard ``python runtests.py`` invocation. 25 | 26 | This module is imported by every numpy subpackage, so lies at the top level to 27 | simplify circular import issues. For the same reason, it contains no numpy 28 | imports at module scope, instead importing numpy within function calls. 29 | """ 30 | import sys 31 | import os 32 | 33 | __all__ = ['PytestTester'] 34 | 35 | 36 | # def _show_numpy_info(): 37 | # import numpy as np 38 | # 39 | # print("NumPy version %s" % np.__version__) 40 | # relaxed_strides = np.ones((10, 1), order="C").flags.f_contiguous 41 | # print("NumPy relaxed strides checking option:", relaxed_strides) 42 | # info = np.lib.utils._opt_info() 43 | # print("NumPy CPU features: ", (info if info else 'nothing enabled')) 44 | 45 | 46 | class PytestTester: 47 | """ 48 | Pytest test runner. 49 | 50 | A test function is typically added to a package's __init__.py like so:: 51 | 52 | from numpy._pytesttester import PytestTester 53 | test = PytestTester(__name__).test 54 | del PytestTester 55 | 56 | Calling this test function finds and runs all tests associated with the 57 | module and all its sub-modules. 58 | 59 | Attributes 60 | ---------- 61 | module_name : str 62 | Full path to the package to test. 63 | 64 | Parameters 65 | ---------- 66 | module_name : module name 67 | The name of the module to test. 68 | 69 | Notes 70 | ----- 71 | Unlike the previous ``nose``-based implementation, this class is not 72 | publicly exposed as it performs some ``numpy``-specific warning 73 | suppression. 74 | 75 | """ 76 | 77 | def __init__(self, module_name): 78 | self.module_name = module_name 79 | 80 | def __call__(self, label='fast', verbose=3, extra_argv=None, 81 | doctests=False, coverage=False, durations=-1, tests=None): 82 | """ 83 | Run tests for module using pytest. 84 | 85 | Parameters 86 | ---------- 87 | label : {'fast', 'full'}, optional 88 | Identifies the tests to run. When set to 'fast', tests decorated 89 | with `pytest.mark.slow` are skipped, when 'full', the slow marker 90 | is ignored. 91 | verbose : int, optional 92 | Verbosity value for test outputs, in the range 1-3. Default is 1. 93 | extra_argv : list, optional 94 | List with any extra arguments to pass to pytests. 95 | doctests : bool, optional 96 | .. note:: Not supported 97 | coverage : bool, optional 98 | If True, report coverage of NumPy code. Default is False. 99 | Requires installation of (pip) pytest-cov. 100 | durations : int, optional 101 | If < 0, do nothing, If 0, report time of all tests, if > 0, 102 | report the time of the slowest `timer` tests. Default is -1. 103 | tests : test or list of tests 104 | Tests to be executed with pytest '--pyargs' 105 | 106 | Returns 107 | ------- 108 | result : bool 109 | Return True on success, false otherwise. 110 | 111 | Notes 112 | ----- 113 | Each module exposes `test` in its namespace to run all tests for 114 | it. For example, to run all tests for mprod.lib: 115 | 116 | 117 | 118 | Examples 119 | -------- 120 | >>> result = mprod.lib.test() #doctest: +SKIP 121 | ... 122 | 1023 passed, 2 skipped, 6 deselected, 1 xfailed in 10.39 seconds 123 | >>> result 124 | True 125 | 126 | """ 127 | import pytest 128 | import warnings 129 | 130 | module = sys.modules[self.module_name] 131 | module_path = os.path.abspath(module.__path__[0]) 132 | 133 | # if os.path.islink(module_path): 134 | # module_path = os.path.realpath(module_path) 135 | 136 | 137 | # setup the pytest arguments 138 | pytest_args = ["-l"] 139 | 140 | # offset verbosity. The "-q" cancels a "-v". 141 | pytest_args += ["-q"] 142 | 143 | # Filter out distutils cpu warnings (could be localized to 144 | # distutils tests). ASV has problems with top level import, 145 | # so fetch module for suppression here. 146 | with warnings.catch_warnings(): 147 | warnings.simplefilter("always") 148 | from numpy.distutils import cpuinfo 149 | 150 | # Filter out annoying import messages. Want these in both develop and 151 | # release mode. 152 | pytest_args += [ 153 | "-W ignore:Not importing directory", 154 | "-W ignore:numpy.dtype size changed", 155 | "-W ignore:numpy.ufunc size changed", 156 | "-W ignore::UserWarning:cpuinfo", 157 | ] 158 | 159 | # When testing matrices, ignore their PendingDeprecationWarnings 160 | pytest_args += [ 161 | "-W ignore:the matrix subclass is not", 162 | "-W ignore:Importing from numpy.matlib is", 163 | ] 164 | 165 | if doctests: 166 | raise ValueError("Doctests not supported") 167 | 168 | if extra_argv: 169 | pytest_args += list(extra_argv) 170 | 171 | if verbose > 1: 172 | pytest_args += ["-" + "v" * (verbose - 1)] 173 | 174 | if coverage: 175 | pytest_args += ["--cov=" + module_path] 176 | 177 | if label == "fast": 178 | # not importing at the top level to avoid circular import of module 179 | from numpy.testing import IS_PYPY 180 | if IS_PYPY: 181 | pytest_args += ["-m", "not slow and not slow_pypy"] 182 | else: 183 | pytest_args += ["-m", "not slow"] 184 | 185 | elif label != "full": 186 | pytest_args += ["-m", label] 187 | 188 | if durations >= 0: 189 | pytest_args += ["--durations=%s" % durations] 190 | 191 | if tests is None: 192 | tests = [self.module_name] 193 | 194 | pytest_args += ["--pyargs"] + list(tests) 195 | 196 | # # run tests. 197 | # _show_numpy_info() 198 | 199 | try: 200 | code = pytest.main(pytest_args) 201 | except SystemExit as exc: 202 | code = exc.code 203 | 204 | return code == 0 205 | -------------------------------------------------------------------------------- /mprod/_base.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import Callable, Tuple, Dict, List 3 | 4 | import scipy.fft 5 | from scipy.fft import dct, idct, rfft, irfft 6 | from scipy.stats import ortho_group 7 | 8 | NumpynDArray = np.ndarray 9 | MatrixTensorProduct = Callable[[NumpynDArray], NumpynDArray] 10 | 11 | 12 | def _default_transform(tube_size: int) -> Tuple[MatrixTensorProduct, MatrixTensorProduct]: 13 | def fun_m(x): 14 | return dct(x, type=2, n=tube_size, axis=-1, norm='ortho') 15 | 16 | def inv_m(x): 17 | return idct(x, type=2, n=tube_size, axis=-1, norm='ortho') 18 | 19 | return fun_m, inv_m 20 | 21 | 22 | def generate_dct(tube_size: int, dct_type: int = 2) -> Tuple[MatrixTensorProduct, MatrixTensorProduct]: 23 | """Generates a DCT based tensor-matrix operation (forward and inverse) 24 | 25 | Parameters 26 | ---------- 27 | 28 | tube_size: int 29 | the fiber-tube size of the tensors of interest 30 | 31 | dct_type: int, default = 2 32 | The choice of dct type, see scipy.fft.dct.__doc__ for details 33 | 34 | Returns 35 | ------- 36 | 37 | fun_m: MatrixTensorProduct 38 | A tensor transform 39 | 40 | inv_m: MatrixTensorProduct 41 | A tensor transform (the inverse of `fun_m`) 42 | 43 | """ 44 | 45 | def fun_m(x): 46 | return dct(x, type=dct_type, n=tube_size, axis=-1, norm='ortho') 47 | 48 | def inv_m(x): 49 | return idct(x, type=dct_type, n=tube_size, axis=-1, norm='ortho') 50 | 51 | return fun_m, inv_m 52 | 53 | 54 | # noinspection PyPep8Naming 55 | def _mod3prod(A: NumpynDArray, funM: MatrixTensorProduct) -> NumpynDArray: 56 | """Maps a tensor `A` to the tensor domain transform defined by the operation of a mapping `funM` on 57 | the tube fibers of `A` 58 | 59 | Parameters 60 | ---------- 61 | 62 | A: NumpynDArray 63 | Tensor with `A.shape[2] == n` 64 | 65 | funM: MatrixTensorProduct 66 | Picklable mapping that operates on (n dimensional) tube fibers of a tensor 67 | 68 | Returns 69 | ------- 70 | 71 | hatA: MatrixTensorProduct 72 | Returns domain transform of `A` defined by the operation of `funM` 73 | """ 74 | m, p, n = A.shape 75 | return funM(A.transpose((2, 1, 0)).reshape(n, m * p)).reshape((n, p, m)).transpose((2, 1, 0)) 76 | 77 | 78 | def x_m3(M: NumpynDArray) -> MatrixTensorProduct: 79 | """ 80 | Creates a picklable tensor transformation forming the mod3 tensor-matrix multiplication required in the M product 81 | definition. 82 | 83 | Parameters 84 | ---------- 85 | M: np.ndarray 86 | A matrix of shape `(n,n)` 87 | 88 | Returns 89 | ------- 90 | fun: Callable[[NumpynDArray], NumpynDArray] 91 | Picklable mapping that operates on (n dimensional) tube fibers of a tensor 92 | 93 | """ 94 | assert len(M.shape) == 2, "M must be a 2 dimensional matrix" 95 | assert M.shape[0] == M.shape[1], "M must be a square matrix" 96 | 97 | tube_size = M.shape[0] 98 | def fun(A: NumpynDArray) -> NumpynDArray: 99 | assert A.shape[-1] == tube_size, "The last dimension of A must be the same as the tube size " 100 | if len(A.shape) == 2: 101 | # the case where A is a matrix representation of f-diagonal tensor 102 | return A @ M.T 103 | elif len(A.shape) == 3: 104 | m, p, n = A.shape 105 | return (M @ A.transpose((2, 1, 0)).reshape(n, m * p)).reshape((n, p, m)).transpose((2, 1, 0)) 106 | else: 107 | raise NotImplementedError("We only work with 3d tensors for now!") 108 | return fun 109 | 110 | 111 | def generate_haar(tube_size: int, random_state = None) -> Tuple[MatrixTensorProduct, MatrixTensorProduct]: 112 | """Generates a tensor-matrix transformation based on random sampling of unitary matrix 113 | (according to the Haar distribution on O_n See scipy.stats.) 114 | 115 | Parameters 116 | ---------- 117 | 118 | tube_size: int 119 | the fiber-tube size of the tensors of interest 120 | 121 | Returns 122 | ------- 123 | 124 | fun_m: MatrixTensorProduct 125 | A tensor transform 126 | 127 | inv_m: MatrixTensorProduct 128 | A tensor transform (the inverse of `fun_m`) 129 | 130 | """ 131 | 132 | M = ortho_group.rvs(tube_size, random_state=random_state) 133 | 134 | fun_m = x_m3(M) 135 | inv_m = x_m3(M.T) 136 | 137 | return fun_m, inv_m 138 | 139 | 140 | def m_prod(tens_a: NumpynDArray, 141 | tens_b: NumpynDArray, 142 | fun_m: MatrixTensorProduct, 143 | inv_m: MatrixTensorProduct) -> NumpynDArray: 144 | """ 145 | Returns the :math:`\\star_{\\mathbf{M}}` product of tensors `A` and `B` 146 | where ``A.shape == (m,p,n)`` and ``B.shape == (p,r,n)``. 147 | 148 | Parameters 149 | ---------- 150 | tens_a: array-like 151 | 3'rd order tensor with shape `m x p x n` 152 | 153 | tens_b: array-like 154 | 3'rd order tensor with shape `p x r x n` 155 | 156 | fun_m: MatrixTensorProduct, Callable[[NumpynDArray], NumpynDArray] 157 | Invertible linear mapping from `R^n` to `R^n` 158 | 159 | inv_m: MatrixTensorProduct, Callable[[NumpynDArray], NumpynDArray] 160 | Invertible linear mapping from R^n to R^n ( `fun_m(inv_m(x)) = inv_m(fun_m(x)) = x` ) 161 | 162 | Returns 163 | ------- 164 | tensor: array-like 165 | 3'rd order tensor of shape `m x r x n` that is the star :math:`\\star_{\\mathbf{M}}` 166 | product of `A` and `B` 167 | """ 168 | 169 | assert tens_a.shape[1] == tens_b.shape[0] 170 | assert tens_a.shape[-1] == tens_b.shape[-1] 171 | 172 | a_hat = fun_m(tens_a) 173 | b_hat = fun_m(tens_b) 174 | 175 | c_hat = np.einsum('mpi,pli->mli', a_hat, b_hat) 176 | return inv_m(c_hat) 177 | 178 | 179 | # copied version from transformers.py 180 | # def m_prod(A: NumpynDArray, B: NumpynDArray, funM: MatrixTensorProduct, invM: MatrixTensorProduct) -> NumpynDArray: 181 | # # assert A.shape[1] == B.shape[0] 182 | # # assert A.shape[-1] == B.shape[-1] 183 | # A_hat = funM(A) 184 | # B_hat = funM(B) 185 | # 186 | # calE_hat = np.einsum('mpi,pli->mli', A_hat, B_hat) 187 | # return invM(calE_hat) 188 | 189 | def tensor_mtranspose(tensor, mfun, minv): 190 | tensor_hat = mfun(tensor) 191 | tensor_hat_t = tensor_hat.transpose((1, 0, 2)) 192 | tensor_t = minv(tensor_hat_t) 193 | return tensor_t 194 | 195 | 196 | def _t_pinv_fdiag(F, Mfun, Minv) -> NumpynDArray: 197 | m, p, n = F.shape 198 | hat_f = Mfun(F) 199 | 200 | pinv_hat_f = np.zeros_like(hat_f) 201 | for i in range(n): 202 | fi_diag = np.diagonal(hat_f[:, :, i]).copy() 203 | fi_diag[(fi_diag ** 2) > 1e-6] = 1 / fi_diag[(fi_diag ** 2) > 1e-6] 204 | 205 | pinv_hat_f[:fi_diag.size, :fi_diag.size, i] = np.diag(fi_diag) 206 | 207 | pinv_f = Minv(pinv_hat_f) 208 | 209 | return tensor_mtranspose(pinv_f, Mfun, Minv) 210 | 211 | # # TODO: Is TensorArray needed ? 212 | # # noinspection PyPep8Naming 213 | # class TensorArray(np.ndarray): 214 | # def __new__(cls, input_array): 215 | # # Input array is an already formed ndarray instance 216 | # # We first cast to be our class type 217 | # obj = np.asarray(input_array).view(cls) 218 | # # add the new attribute to the created instance 219 | # # Finally, we must return the newly created object: 220 | # return obj 221 | # 222 | # @property 223 | # def TT(self): 224 | # return self.transpose((1, 0, 2)) 225 | # 226 | # def __array_finalize__(self, obj): 227 | # # see InfoArray.__array_finalize__ for comments 228 | # if obj is None: return 229 | # self.info = getattr(obj, 'info', None) 230 | -------------------------------------------------------------------------------- /mprod/_ml_helpers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import List, Tuple, Dict, Mapping 3 | from ._base import NumpynDArray 4 | from sklearn.base import TransformerMixin, BaseEstimator 5 | import pandas as pd 6 | from itertools import product 7 | 8 | 9 | def table2tensor(table: pd.DataFrame, missing_flag: bool = False) -> Tuple[np.ma.core.MaskedArray, Mapping, Mapping]: 10 | """ 11 | Reshapes a `nm x p` (`(samples x reps) x features`) multi-indexed datafram to form a `m x p x n` tensor 12 | `(subjects, features, reps)` 13 | 14 | Parameters 15 | ---------- 16 | table: pd.DataFrame 17 | a `nm x p` table of sampels x features 18 | 19 | missing_flag: `bool`, default = False 20 | When set to `False` (default), the function will raise an error in case there are missing samples. 21 | Setting to `True` will result in a tensor with masked entries. 22 | 23 | Returns 24 | ------- 25 | tensor: ndarray, np.ma.array 26 | 3'rd order tensor `m x p x n` (subjects, features, reps) 27 | 28 | mode1_mapping : dict 29 | The mapping of each mode1 (frontal) slice index of the tensor to the table's original subject name 30 | 31 | mode3_mapping : dict 32 | The mapping of each mode3 (lateral) slice index of the tensor to the table's original rep id 33 | 34 | 35 | Examples 36 | -------- 37 | Suppose that ``table_data`` is a dataframe with no missing values. 38 | 39 | >>> from mprod import table2tensor 40 | >>> import pandas as pd 41 | >>> np.random.seed(0) 42 | >>> table_data.iloc[:5,:4] 43 | f1 f2 f3 f4 44 | SubjetID rep 45 | a t1 0.251259 0.744838 -0.45889 -0.208525 46 | t10 2.39831 0.248772 0.65873 1.36994 47 | t2 -0.303154 -0.337603 -0.568608 -1.0239 48 | t3 1.36369 0.978895 0.161972 -0.804368 49 | t4 1.8548 1.52954 0.78576 0.538041 50 | >>> msk_tensor, mode1_mapping, mode3_mapping = table2tensor(table_data, missing_flag=False) 51 | >>> msk_tensor[:3,:3,:2] 52 | [[[0.25125853442243695 2.398308745102709] 53 | [0.7448378210349296 0.2487716728987871] 54 | [-0.4588901621837434 0.6587302072601999]] 55 | [[-0.5689263433318329 -0.06564253839123065] 56 | [1.0017636851038796 -0.49265853128383713] 57 | [0.45266517056628647 -1.4812390563653883]] 58 | [[0.7690616486878629 0.49302719962677855] 59 | [0.3186320585255899 1.469576084933633] 60 | [0.9609169837347897 -0.19564077520234632]]] 61 | >>> mode1_mapping 62 | {'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4} 63 | >>> mode3_mapping 64 | {'t1': 0, 65 | 't10': 1, 66 | 't2': 2, 67 | 't3': 3, 68 | 't4': 4, 69 | 't5': 5, 70 | 't6': 6, 71 | 't7': 7, 72 | 't8': 8, 73 | 't9': 9} 74 | 75 | **missing values** 76 | 77 | >>> msk_tensor, mode1_mapping, mode3_mapping = table2tensor(table_data.sample(40) 78 | ... , missing_flag=True) 79 | >>> msk_tensor[:3,:3,:2] 80 | masked_array( 81 | data=[[[0.07664420134210018, --], 82 | [-0.7358062254334045, --], 83 | [0.5562074188402509, --]], 84 | [[2.088982483926928, -0.06564253839123065], 85 | [0.7697757466063808, -0.49265853128383713], 86 | [0.4147812728859107, -1.4812390563653883]], 87 | [[-0.004794963866429985, 1.2262908375944879], 88 | [-0.15033350807209261, -0.3068131758163276], 89 | [0.6461670563178799, 0.1769508046682527]]], 90 | mask=[[[False, True], 91 | [False, True], 92 | [False, True]], 93 | [[False, False], 94 | [False, False], 95 | [False, False]], 96 | [[False, False], 97 | [False, False], 98 | [False, False]]], fill_value=0.0) 99 | >>> mode1_mapping 100 | {'a': 3, 'b': 1, 'c': 0, 'd': 4, 'e': 2} 101 | >>> mode3_mapping 102 | {'t1': 2, 103 | 't10': 1, 104 | 't2': 3, 105 | 't3': 6, 106 | 't4': 5, 107 | 't5': 7, 108 | 't6': 8, 109 | 't7': 4, 110 | 't8': 0, 111 | 't9': 9} 112 | """ 113 | 114 | samples_map, usamples = table.index.get_level_values(0).factorize() 115 | reps_map, ureps = table.index.get_level_values(1).factorize() 116 | 117 | m, p, n = usamples.size, table.shape[1], ureps.size 118 | 119 | samples_map_dict = pd.Series(np.unique(samples_map), usamples).to_dict() 120 | reps_map_dict = pd.Series(np.unique(reps_map), ureps).to_dict() 121 | 122 | if missing_flag: 123 | tensor = np.ma.array(np.zeros((m, p, n)), mask=np.ones((m, p, n)), fill_value=0) 124 | index_iterator = table.iterrows() 125 | else: 126 | tensor = np.zeros((m, p, n)) 127 | index_iterator = (((i, j), table.loc[(i, j)].copy()) for i, j in product(usamples, ureps)) 128 | 129 | try: 130 | for (m1, m3), val in index_iterator: 131 | tensor[samples_map_dict[m1], :, reps_map_dict[m3]] = val.values 132 | except KeyError as ke: 133 | raise KeyError("Discovered missing data in the table, which is not allowed by default. " 134 | "To work with missing data and have a masked array returned, set missing_flag to True") 135 | 136 | return tensor, samples_map_dict, reps_map_dict 137 | 138 | 139 | # noinspection PyPep8Naming 140 | # noinspection PyUnusedLocal 141 | class MeanDeviationForm(TransformerMixin, BaseEstimator): 142 | """Standardize the data by subtracting the mean (or empiric mean) sample 143 | The mean deviation form of a tensor :math:`X \\in \mathbb{R}^{m \\times p \\times n}` is calculated as: 144 | 145 | Z = X - U 146 | 147 | where `U` is the mean sample of `X` , calculated as follows: 148 | 149 | .. math:: 150 | U = \\frac{1}{m} \\sum_{i=1}^{m} X[i,:,:] 151 | 152 | and for the empiric mean deviation form: 153 | 154 | .. math:: 155 | U = \\frac{1}{m-1} \\sum_{i=1}^{m} X[i,:,:] 156 | 157 | 158 | Attributes 159 | ---------- 160 | _mean_sample : ndarray of shape (p_features, n_repeats), or `None` 161 | The mean sample of the dataset 162 | 163 | 164 | Methods 165 | ------- 166 | fit: 167 | Fits a MeanDeviationForm transformer by computing the mean sample of a training dataset 168 | transform: 169 | Shift dataset by fitted sample mean 170 | fit_transform: 171 | Compute the mean sample of a dataset and transform it to its mean deviation form 172 | inverse_transform: 173 | Add precomputed mean sample to a dataset 174 | 175 | 176 | 177 | 178 | """ 179 | 180 | def __init__(self): 181 | # super(MeanDeviationForm, self).__init__() 182 | 183 | self._mean_sample = None 184 | 185 | def _fit(self, X): 186 | denum = X.shape[0] 187 | self._mean_sample = np.nansum(X, axis=0, keepdims=True) / denum 188 | 189 | def fit(self, X, y=None, **fit_param): 190 | """Compute the mean (or empiric mean) sample of a tensor 191 | 192 | Parameters 193 | ---------- 194 | X : {array-like} of shape (m_samples, p_features, n_repeats) 195 | The data used to compute the mean sample 196 | used for later cantering along the features-repeats axes. 197 | y : None 198 | Ignored. 199 | 200 | Returns 201 | ------- 202 | self : object 203 | Fitted MeanDeviationForm object 204 | 205 | Examples 206 | -------- 207 | >>> from mprod import MeanDeviationForm 208 | >>> import numpy as np 209 | >>> X = np.random.randn(10,20,4) 210 | >>> mdf = MeanDeviationForm() 211 | >>> mdf = mdf.fit(X) 212 | """ 213 | self._fit(X) 214 | return self 215 | 216 | def transform(self, X, y=None): 217 | """Perform standardization by centering. 218 | 219 | Parameters 220 | ---------- 221 | X : array-like of shape (k_samples, p_features, n_repeats) 222 | The data used to center along the features-repeats axes. 223 | 224 | Returns 225 | ------- 226 | X_tr : ndarray of shape (k_samples, p_features, n_repeats) 227 | Transformed tensor. 228 | 229 | Examples 230 | -------- 231 | >>> from mprod import MeanDeviationForm 232 | >>> import numpy as np 233 | >>> X = np.random.randn(10,20,4) 234 | >>> y = np.random.randn(50,20,4) 235 | >>> mdf = MeanDeviationForm() 236 | >>> mdf_fit = mdf.fit(X) 237 | >>> yt = mdf.transform(yt) 238 | 239 | """ 240 | 241 | X_transform = X - self._mean_sample 242 | if type(X_transform) == np.ma.core.MaskedArray: 243 | return X_transform.filled().data 244 | else: 245 | return X_transform 246 | 247 | def fit_transform(self, X, y=None, **fit_params): 248 | self.fit(X, y, **fit_params) 249 | return self.transform(X) 250 | 251 | def inverse_transform(self, Y): 252 | """Undo the centering of X according to mean sample. 253 | 254 | Parameters 255 | ---------- 256 | X : array-like of shape (m_samples, p_features, n_repeats) 257 | Input data that will be transformed. 258 | 259 | Returns 260 | ------- 261 | Xt : ndarray of shape (m_samples, p_features, n_repeats) 262 | Transformed data. 263 | 264 | Examples 265 | -------- 266 | >>> from mprod import MeanDeviationForm 267 | >>> import numpy as np 268 | >>> X = np.random.randn(10,20,4) 269 | >>> mdf = MeanDeviationForm() 270 | >>> Xt = mdf.fit_transform(X) 271 | >>> mdf.inverse_transform(Xt) - X 272 | 273 | """ 274 | Y_transform = Y + self._mean_sample 275 | 276 | if type(Y) == np.ma.core.MaskedArray: 277 | return Y_transform.filled().data 278 | else: 279 | return Y_transform 280 | 281 | -------------------------------------------------------------------------------- /mprod/dimensionality_reduction/_tcam.py: -------------------------------------------------------------------------------- 1 | """TCAM 2 | """ 3 | import numpy as np 4 | from dataclasses import dataclass 5 | from sklearn.base import TransformerMixin, BaseEstimator 6 | 7 | from .._base import m_prod, tensor_mtranspose, _default_transform, _t_pinv_fdiag 8 | from .._base import MatrixTensorProduct, NumpynDArray 9 | from ..decompositions import svdm 10 | from .._misc import _assert_order_and_mdim 11 | from .._ml_helpers import MeanDeviationForm 12 | 13 | _float_types = [np.sctypeDict[c] for c in 'efdg'] + [float] 14 | _int_types = [np.sctypeDict[c] for c in 'bhip'] + [int] 15 | 16 | 17 | def _pinv_diag(diag_tensor): 18 | sinv = diag_tensor.copy() 19 | sinv += ((diag_tensor ** 2) <= 1e-6) * 1e+20 20 | sinv = (((diag_tensor ** 2) > 1e-6) * (1 / sinv)) 21 | return sinv 22 | 23 | 24 | @dataclass 25 | class TensorSVDResults: 26 | u: np.ndarray 27 | s: np.ndarray 28 | v: np.ndarray 29 | 30 | def astuple(self): 31 | return self.u.copy(), self.s.copy(), self.v.copy() 32 | 33 | 34 | # noinspection PyPep8Naming 35 | class TCAM(TransformerMixin, BaseEstimator): 36 | """tsvdm based tensor component analysis (TCAM). 37 | Linear dimensionality reduction using tensor Singular Value Decomposition of the 38 | data to project it to a lower dimensional space. The input data is centered 39 | but not scaled for each feature before applying the tSVDM (using :mod:`mprod.MeanDeviationForm` ) . 40 | It uses the :mod:`mprod.decompositions.svdm` function as basis for the ``TSVDMII`` algorithm from Kilmer et. al. 41 | (https://doi.org/10.1073/pnas.2015851118) then offers a CP like transformations of the data accordingly. 42 | See https://arxiv.org/abs/2111.14159 for theoretical results and case studies, and the :ref:`Tutorials ` 43 | for elaborated examples 44 | 45 | Parameters 46 | ---------- 47 | n_components : int, float, default=None 48 | Number of components to keep. 49 | if n_components is not set all components are kept:: 50 | 51 | n_components == min(m_samples, p_features) * n_reps - 1 52 | 53 | If ``0 < n_components < 1`` , select the number of components such that the 54 | amount of variance that needs to be explained is greater than the percentage specified 55 | by n_components. In case ``n_components >= 1`` is an integer then the estimated number 56 | of components will be:: 57 | 58 | n_components_ == min(n_components, min(m_samples, p_features) * n_reps - 1) 59 | 60 | 61 | Attributes 62 | ---------- 63 | n_components_ : int 64 | The estimated number of components. When n_components is set 65 | to a number between 0 and 1. this number is estimated from input data. 66 | Otherwise it equals the parameter n_components, 67 | or `min(m_samples, p_features) * n_reps -1` if n_components is None. 68 | 69 | explained_variance_ratio_ : ndarray of shape (`n_components_`,) 70 | The amount of variance explained by each of the selected components. 71 | 72 | mode2_loadings : ndarray (float) of shape (`n_components_`, `n_features` ) 73 | A matrix representing the contribution (coefficient) of each feature in the orinial 74 | features space (2'nd mode of the tensor) to each of the TCAM factors. 75 | 76 | 77 | Methods 78 | ------- 79 | fit: 80 | Compute the TCAM transformation for a given dataset 81 | transform: 82 | Transform a given dataset using a fitted TCAM 83 | fit_transform: 84 | Fit a TCAM to a dataset then return its TCAM transformation 85 | inverse_transform: 86 | Given points in the reduced TCAM space, compute the points pre-image in the original features space. 87 | 88 | 89 | """ 90 | 91 | def __init__(self, fun_m: MatrixTensorProduct = None, 92 | inv_m: MatrixTensorProduct = None, 93 | n_components=None): 94 | assert (type(n_components) in _int_types and (n_components >= 1)) or \ 95 | ((type(n_components) in _float_types) and (0 < n_components <= 1)) \ 96 | or (n_components is None), f"`n_components` must be positive integer or a float between 0 and 1" \ 97 | f" or `None`, got {n_components} of type {type(n_components)}" 98 | 99 | assert (fun_m is None) == (inv_m is None), "Only one of fun_m,inv_m is None. " \ 100 | "Both must be defined (or both None)" 101 | 102 | self.n_components = n_components 103 | 104 | self.fun_m = fun_m 105 | self.inv_m = inv_m 106 | self._mdf = MeanDeviationForm() 107 | 108 | def _mprod(self, a, b) -> NumpynDArray: 109 | return m_prod(a, b, self.fun_m, self.inv_m) 110 | 111 | def _fit(self, X: np.ndarray): 112 | max_rank = self._n * min(self._m, self._p) - 1 113 | 114 | self._hat_svdm = TensorSVDResults(*svdm(X, self.fun_m, self.inv_m, hats=True)) 115 | 116 | # get factors order 117 | diagonals = self._hat_svdm.s.transpose().copy() 118 | self._factors_order = np.unravel_index(np.argsort(- (diagonals ** 2), axis=None), diagonals.shape) 119 | self._sorted_singular_vals = diagonals[self._factors_order] 120 | self._total_variation = (self._sorted_singular_vals ** 2).sum() 121 | self.explained_variance_ratio_ = ((self._sorted_singular_vals ** 2) / self._total_variation) 122 | 123 | # populate n_components if not given 124 | if self.n_components is None: 125 | self.n_components_ = max_rank 126 | elif type(self.n_components) in _int_types and self.n_components > 0: 127 | self.n_components_ = min(max_rank, self.n_components) 128 | elif type(self.n_components) in _float_types and self.n_components == 1.: 129 | self.n_components_ = max_rank 130 | elif 0 < self.n_components < 1 and type(self.n_components) in _float_types: 131 | var_cumsum = (self._sorted_singular_vals ** 2).cumsum() # w in the paper 132 | w_idx = np.arange(0, var_cumsum.size, dtype=int) # w index 133 | self.n_components_ = min(max_rank, 134 | w_idx[(var_cumsum / self._total_variation) > self.n_components].min() + 1) 135 | else: 136 | raise ValueError("Unexpected edge case for the value of `n_components`") 137 | 138 | self.n_components_ = max(1, self.n_components_) 139 | 140 | self._n_factors_order = tuple([self._factors_order[0][:self.n_components_].copy(), 141 | self._factors_order[1][:self.n_components_].copy()]) 142 | 143 | self.explained_variance_ratio_ = self.explained_variance_ratio_[:self.n_components_] 144 | self._rrho = np.array([0 for _ in range(self._n)]) 145 | for nn, rr in zip(*self._n_factors_order): 146 | self._rrho[nn] = max(self._rrho[nn], rr + 1) 147 | # self._rrho += 1 148 | # populate truncations 149 | # _tau = self._sorted_singular_vals[self.n_components_ + 1] 150 | # self._rrho = (diagonals > _tau).sum(axis=1) 151 | self._truncated_hat_svdm = TensorSVDResults(*self._hat_svdm.astuple()) 152 | 153 | self._truncated_hat_svdm.u = self._truncated_hat_svdm.u[:, :self._rrho.max(), :] 154 | self._truncated_hat_svdm.s = self._truncated_hat_svdm.s[:self._rrho.max(), :] 155 | self._truncated_hat_svdm.v = self._truncated_hat_svdm.v[:, :self._rrho.max(), :] 156 | 157 | for i, rho_i in enumerate(self._rrho): 158 | self._truncated_hat_svdm.u[:, rho_i:, i] = 0 159 | self._truncated_hat_svdm.s[rho_i:, i] = 0 160 | self._truncated_hat_svdm.v[:, rho_i:, i] = 0 161 | 162 | self._truncated_svdm = TensorSVDResults(self.inv_m(self._truncated_hat_svdm.u), 163 | self.inv_m(self._truncated_hat_svdm.s), 164 | self.inv_m(self._truncated_hat_svdm.v)) 165 | 166 | self._truncS_pinv = self._truncated_svdm.s.copy() 167 | self._truncS_pinv[(self._truncS_pinv ** 2) <= 1e-6] = 0 168 | self._truncS_pinv[(self._truncS_pinv ** 2) > 1e-6] = 1 / self._truncS_pinv[(self._truncS_pinv ** 2) > 1e-6] 169 | 170 | return self 171 | 172 | # noinspection PyUnusedLocal 173 | def fit(self, X, y=None, **fit_params): 174 | """Fit the model with X. 175 | 176 | Parameters 177 | ---------- 178 | X : array-like of shape (m_samples, p_features, n_modes) 179 | Training data, where m_samples is the number of samples, 180 | p_features is the number of features and n_modes is the 181 | number of modes (timepoints/locations etc...) 182 | 183 | y : Ignored 184 | Ignored. 185 | 186 | Returns 187 | ------- 188 | self : object 189 | Returns the instance itself. 190 | 191 | 192 | Examples 193 | -------- 194 | >>> from mprod.dimensionality_reduction import TCAM 195 | >>> import numpy as np 196 | >>> X = np.random.randn(10,20,4) 197 | >>> tca = TCAM() 198 | >>> mdf = tca.fit(X) 199 | 200 | 201 | """ 202 | 203 | assert len(X.shape) == 3, "X must be a 3'rd order tensor" 204 | self._m, self._p, self._n = X.shape 205 | 206 | if self.fun_m is None: 207 | self.fun_m, self.inv_m = _default_transform(self._n) 208 | _X = self._mdf.fit_transform(X) 209 | 210 | return self._fit(_X) 211 | 212 | def _mode0_reduce(self, tU): 213 | return np.concatenate( 214 | [self._sorted_singular_vals[e] * tU[:, [fj], [fi]] for e, (fi, fj) in 215 | enumerate(zip(*self._n_factors_order))], 216 | axis=1) 217 | 218 | def _mode1_reduce(self, tV): 219 | return np.concatenate( 220 | [self._sorted_singular_vals[e] * tV[:, [fj], [fi]] for e, (fi, fj) in 221 | enumerate(zip(*self._n_factors_order))], 222 | axis=1) 223 | 224 | def _mode0_projector(self, X): 225 | 226 | trunc_U, trunc_S, trunc_V = self._truncated_hat_svdm.astuple() 227 | # trunc_Spinv = _t_pinv_fdiag(trunc_S, self.fun_m, self.inv_m) 228 | # XV = self._mprod(X, trunc_V) 229 | # XVS = self._mprod(XV, trunc_Spinv) 230 | # XVS_hat = self.fun_m(XVS) 231 | 232 | XV_hat = np.matmul(self.fun_m(X).transpose(2, 0, 1), trunc_V.transpose(2, 0, 1)).transpose(1, 2, 0) 233 | Y = XV_hat[:, self._n_factors_order[1], self._n_factors_order[0]].copy() 234 | 235 | # XV_hat = np.matmul(self.fun_m(X).transpose(2, 0, 1), trunc_V.transpose(2, 0, 1)) 236 | # XVS_hat = XV_hat * _pinv_diag(trunc_S).transpose().reshape(self._n, 1, self._rrho.max()) 237 | # XVS_hat = XVS_hat.transpose(1, 2, 0) 238 | # Y = XVS_hat[:, self._n_factors_order[1], self._n_factors_order[0]].copy() 239 | 240 | # X_transformed_0 = self._mprod(X, self._truncated_svdm.v) 241 | # X_transformed_0 = self._mprod(X_transformed_0, self._truncS_pinv) 242 | # X_transformed = self.fun_m(X_transformed_0) 243 | return Y 244 | 245 | # def _mode1_projector(self, X): 246 | # truncU_mtranspose = tensor_mtranspose(self._truncated_svdm.u, self.fun_m, self.inv_m) 247 | # X_transformed_0 = self._mprod(truncU_mtranspose, X) 248 | # X_transformed_0 = tensor_mtranspose(self._mprod(self._truncS_pinv, X_transformed_0), self.fun_m, self.inv_m) 249 | # X_transformed = self.fun_m(X_transformed_0) 250 | # return self._mode1_reduce(X_transformed) 251 | 252 | def transform(self, X): 253 | """Apply mode-1 dimensionality reduction to X. 254 | 255 | X is projected on the first mode-1 tensor components previously extracted 256 | from a training set. 257 | 258 | Parameters 259 | ---------- 260 | X : array-like of shape (m_samples, p_features, n_modes) 261 | Training data, where m_samples is the number of samples, 262 | p_features is the number of features and n_modes is the 263 | number of modes (timepoints/locations etc...) 264 | 265 | Returns 266 | ------- 267 | X_new : array-like of shape (m_samples, `n_components_`) 268 | Projection of X in the first principal components, where m_samples 269 | is the number of samples and n_components is the number of the components. 270 | 271 | """ 272 | _assert_order_and_mdim(X, 'X', 3, [(1, self._p), (2, self._n)]) 273 | return self._mode0_projector(self._mdf.transform(X)) 274 | 275 | @property 276 | def mode2_loadings(self): 277 | """ The weights driving the variation in each of the obtained factors with respect to 278 | each feature 279 | """ 280 | 281 | return self._truncated_hat_svdm.v[:,self._n_factors_order[1], self._n_factors_order[0]].copy() 282 | 283 | def fit_transform(self, X: np.ndarray, y=None, **fit_params): 284 | 285 | """Fit the model with X and apply the dimensionality reduction on X. 286 | 287 | Parameters 288 | ---------- 289 | X : array-like of shape (m_samples, p_features, n_modes) 290 | Training data, where m_samples is the number of samples, 291 | p_features is the number of features and n_modes is the 292 | number of modes (timepoints/locations etc...) 293 | 294 | y : Ignored 295 | Ignored. 296 | 297 | Returns 298 | ------- 299 | X_new : ndarray of shape (m_samples, `n_components_`) 300 | Transformed values. 301 | 302 | """ 303 | 304 | self.fit(X) 305 | return self.transform(X) 306 | 307 | # noinspection PyPep8Naming 308 | def inverse_transform(self, Y: NumpynDArray): 309 | """ 310 | Inverts TCAM scores back to the original features space 311 | 312 | Parameters 313 | ---------- 314 | Y: np.ndarray 315 | 2d array with shape (k, `n_components_`) 316 | 317 | Returns 318 | ------- 319 | Y_inv: NumpynDArray 320 | 3rd order tensor that is the inverse transform of Y to the original features space 321 | 322 | """ 323 | 324 | trunc_U, trunc_S, trunc_V = self._truncated_hat_svdm.astuple() 325 | 326 | # Suppose YY = X * V * pinv(S) 327 | # and the matrix Y is an ordering of YYs columns according to the factors order 328 | 329 | YY_hat = np.zeros((Y.shape[0], self._rrho.max(), self._n)) 330 | YY_hat[:, self._n_factors_order[1], self._n_factors_order[0]] = Y.copy() 331 | # YYS_hat = YY_hat.transpose(2, 0, 1) * trunc_S.transpose().reshape(self._n, 1, self._rrho.max()) 332 | X_hat = np.matmul(YY_hat.transpose(2, 0, 1), trunc_V.transpose(2, 1, 0)).transpose(1, 2, 0) 333 | XX = self.inv_m(X_hat) 334 | 335 | # Note that 336 | # YY*S*V' = X * V * pinv(S) * S * V' 337 | # = X * V * (JJ) * V' 338 | # = X * (V * JJ) * V' 339 | # = X * (VV) * V' 340 | # = X * (JJ) \approx X 341 | # 342 | # where JJ is "almost" the identity tensor 343 | 344 | 345 | # #################################### OLD CODE ################################################# 346 | # YY_hat = np.zeros((trunc_U.shape[0], trunc_U.shape[1], trunc_U.shape[-1])) # 347 | # YY_hat[:, self._n_factors_order[1], self._n_factors_order[0]] = Y.copy() # 348 | # YY = self.inv_m(YY_hat) # get YY from YY_hat # 349 | # YYs = self._mprod(YY, trunc_S) # YY*S # 350 | # Yinv = self._mprod(YYs, tensor_mtranspose(trunc_V, self.fun_m, self.inv_m)) # YY*S*V' # 351 | # # return self._mdf.inverse_transform(Yinv) # 352 | # ############################################################################################### 353 | 354 | return self._mdf.inverse_transform(XX) 355 | 356 | 357 | -------------------------------------------------------------------------------- /docs/examples/mprod_primer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | ".. note::\n", 8 | " The following content provides technical and mathematical background for the `mprod-package`. \n", 9 | " Most users of downstream applications such as `TCAM` would probably like to skip this part\n", 10 | "\n", 11 | "$\\newcommand{\\mat}[1]{\\mathbf{#1}}$\n", 12 | "$\\newcommand{\\matM}{\\mat{M}}$\n", 13 | "$\\newcommand{\\matMt}{\\matM^{\\T}}$\n", 14 | "$\\newcommand{\\matMi}{\\matM^{-1}}$\n", 15 | "$\\newcommand{\\T}{\\mat{T}}$\n", 16 | "$\\newcommand{\\xx}{\\times}$\n", 17 | "$\\newcommand{\\mpn}{m \\xx p \\xx n}$\n", 18 | "$\\newcommand{\\pmn}{p \\xx m \\xx n}$\n", 19 | "$\\newcommand{\\tens}[1]{\\mathcal{#1}}$\n", 20 | "$\\newcommand{\\tA}{\\tens{A}}$\n", 21 | "$\\newcommand{\\tAt}{\\tA^{\\T}}$\n", 22 | "$\\newcommand{\\thA}{\\widehat{\\tA}}$\n", 23 | "$\\newcommand{\\thAt}{\\thA^{\\T}}$\n", 24 | "$\\newcommand{\\tC}{\\tens{C}}$\n", 25 | "$\\newcommand{\\tCt}{\\tC^{\\T}}$\n", 26 | "$\\newcommand{\\thC}{\\widehat{\\tC}}$\n", 27 | "$\\newcommand{\\thCt}{\\thC^{\\T}}$\n", 28 | "$\\newcommand{\\tB}{\\tens{B}}$\n", 29 | "$\\newcommand{\\tBt}{\\tB^{\\T}}$\n", 30 | "$\\newcommand{\\thB}{\\widehat{\\tB}}$\n", 31 | "$\\newcommand{\\thBt}{\\thB^{\\T}}$\n", 32 | "$\\newcommand{\\tsub}[1]{\\xx_{#1}}$\n", 33 | "$\\newcommand{\\tsM}{\\tsub{3}\\matM}$\n", 34 | "$\\newcommand{\\tsMinv}{\\tsub{3}\\matM^{-1}}$\n", 35 | "$\\newcommand{\\mm}{\\star_{\\scriptscriptstyle \\matM } }$\n", 36 | "$\\newcommand{\\RR}{\\mathbb{R}}$\n", 37 | "$\\newcommand{\\tI}{\\tens{I}}$\n", 38 | "$\\newcommand{\\thI}{\\widehat{\\tI}}$\n", 39 | "$\\newcommand{\\tE}{\\tens{E}}$\n", 40 | "$\\newcommand{\\tQ}{\\tens{Q}}$\n", 41 | "$\\newcommand{\\tQt}{\\tQ^{\\T}}$\n", 42 | "$\\newcommand{\\thQ}{\\widehat{\\tQ}}$\n", 43 | "$\\newcommand{\\thQt}{\\thQ^{\\T}}$\n", 44 | "$\\newcommand{\\tV}{\\tens{V}}$\n", 45 | "$\\newcommand{\\tVt}{\\tV^{\\T}}$\n", 46 | "$\\newcommand{\\thV}{\\widehat{\\tV}}$\n", 47 | "$\\newcommand{\\thVt}{\\thV^{\\T}}$\n", 48 | "$\\newcommand{\\tU}{\\tens{U}}$\n", 49 | "$\\newcommand{\\tUt}{\\tU^{\\T}}$\n", 50 | "$\\newcommand{\\thU}{\\widehat{\\tU}}$\n", 51 | "$\\newcommand{\\thUt}{\\thU^{\\T}}$\n", 52 | "$\\newcommand{\\tS}{\\tens{S}}$\n", 53 | "$\\newcommand{\\tSt}{\\tS^{\\T}}$\n", 54 | "$\\newcommand{\\thS}{\\widehat{\\tS}}$\n", 55 | "$\\newcommand{\\thSt}{\\thS^{\\T}}$\n", 56 | "$\\newcommand{\\hsigma}{\\hat{\\sigma}}$\n", 57 | "$\\newcommand{\\rnk}{\\operatorname{rank}}$\n", 58 | "$\\newcommand{\\rrho}{\\boldsymbol{\\rho}}$\n", 59 | "$\\newcommand{\\TNorm}[1]{\\|#1\\|_{2}}$\n", 60 | "$\\newcommand{\\FNorm}[1]{\\|#1\\|_{F}}$\n", 61 | "$\\newcommand{\\NNorm}[1]{\\|#1\\|_{*}}$\n", 62 | "$\\newcommand{\\FNormS}[1]{\\FNorm{#1}^2}$\n", 63 | "$\\newcommand{\\TNormS}[1]{\\TNorm{#1}^2}$" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "The main functionality of ``mprod-package`` is factorization of tensors, that is, expressing a tensor $\\tA \\in \\RR^{d_1 \\xx ... \\xx d_N}$ as a product of other, \"simpler\" tensors. \n", 71 | "For this aim, one must first obtain some notion of tensor-tensor multiplication.\n", 72 | "The \"M-product\" (denoted by $\\mm$ ), defined in Kilmer et al., refers to a \"family\" of tensor-tensor products, and provides the notion of multiplication which enables the factorization of tensors. \n", 73 | "Here, we briefly walk through the steps of $\\mm$-product's formal construction. " 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "# The M-product\n", 81 | "\n", 82 | "We begin with some definitions.
\n", 83 | "Let $\\matM$ be an $n\\xx n$ unitary matrix ($\\matM \\matMt = \\mat{I}_n = \\matMt \\matM$), and a tensor $\\tA \\in \\RR^{\\mpn}$. \n", 84 | "We define the **domain transform** specified by $\\matM$ as $\\thA := \\tA \\tsM$, where $\\tsM$ denotes the tensor-matrix multiplication of applying $\\matM$ to each of the tensor $n$ dimensional tube fibers ($\\tA_{i,j,:}$).\n", 85 | "\n", 86 | "A practical demonstration using `scipy` and `numpy` libraries:" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 2, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "import numpy as np\n", 96 | "from scipy.stats import ortho_group # used for sampling random unitary matrices \n", 97 | " # from the Haar distribution\n", 98 | "\n", 99 | "m, p, n = 10, 5, 8\n", 100 | "\n", 101 | "A = np.random.randn(m, p, n) # generate a random tensor\n", 102 | "M = ortho_group.rvs(n) # random sample unitary M\n", 103 | "\n", 104 | "A_hat = np.zeros_like(A)\n", 105 | "for i in range(m):\n", 106 | " for j in range(p):\n", 107 | " A_hat[i,j,:] = M @ A[i,j,:]" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | ".. attention::\n", 115 | " The tensor-matrix product implementation is much more efficient than the above for loop\n", 116 | "\n", 117 | "\n", 118 | "\n", 119 | "The **transpose** of a real $\\mpn$ tensor $\\tA$ with respect to $\\matM$, denoted by $\\tA^{\\T}$, is a $\\pmn$ tensor for which \n", 120 | "$$[\\widehat{\\tA^{\\T}}]_{:,:,i} = [\\thA^{\\T}]_{:,:,i} = {[\\thA]_{:,:,i}}^{\\T}$$\n", 121 | "\n", 122 | "Given two tensors $\\tA \\in \\RR^{\\mpn}$ and $\\tB \\in \\RR^{p \\xx r \\xx n}$ , the facewise tensor-tensor product of $\\tA$ and $\\tB$, denoted by $\\tA \\vartriangle \\tB$ , is the $m \\xx r \\xx n$ tensor for which \n", 123 | "$$[\\tA \\vartriangle \\tB]_{:,:,i} = \\tA_{:,:,i} \\tB_{:,:,i}$$ \n", 124 | "\n", 125 | "The $\\mm$ **-product** of $\\tA \\in \\RR^{\\mpn}$ and $\\tB \\in \\RR^{p \\xx r \\xx n}$ is defined by \n", 126 | "$$\\tA \\mm \\tB := (\\thA \\vartriangle \\thB) \\tsMinv \\in \\RR^{m \\xx r \\xx n}$$ \n", 127 | "\n", 128 | "\n", 129 | "The `mprod-package` offers utility functions like `m_prod` implementing $\\mm$ as well as random and spectral analysis based generators of unitary transforms" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 3, 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "name": "stdout", 139 | "output_type": "stream", 140 | "text": [ 141 | "129.30020497750468\n" 142 | ] 143 | } 144 | ], 145 | "source": [ 146 | "from mprod import m_prod\n", 147 | "from mprod import generate_haar, generate_dct\n", 148 | "\n", 149 | "funm_haar, invm_haar = generate_haar(n) # Utility wrapper arround \n", 150 | " # scipy.stats.ortho_group \n", 151 | "funm_dct, invm_dct = generate_dct(n) # Generates dct and idct transforms using scipy's\n", 152 | " # fft module. the default dct type is 2\n", 153 | "\n", 154 | "# generate random tensor B \n", 155 | "r = 15\n", 156 | "B = np.random.randn(p,r,n)\n", 157 | "\n", 158 | "# Multiply A and B with respect to a randomly sampled M\n", 159 | "C_haar = m_prod(A,B,funm_haar, invm_haar)\n", 160 | "\n", 161 | "# Multiply A and B with respect to M = dct\n", 162 | "C_dct = m_prod(A,B,funm_dct, invm_dct)\n", 163 | "\n", 164 | "print(np.linalg.norm(C_haar - C_dct))" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "As shown above, given two distinct transforms ${\\matM}_1, {\\matM}_2$ , we have that $\\tA \\star_{\\scriptstyle \\matM_1} \\tB$ and $\\tA \\star_{\\scriptstyle \\matM_2} \\tB$ are not equal in general.\n", 172 | "This fact, as we will see, provides high flexibility when applying $\\mm$ based dimensionality reduction schemes.\n", 173 | "\n", 174 | "Two tensors $\\tA, \\tB \\in \\RR^{1 \\xx m \\xx n}$ are called $\\mm$ **-orthogonal slices** if $\\tA^{\\T} \\mm \\tB = \\mathbf{0}$, where $\\mathbf{0} \\in \\RR^{1\\xx 1 \\xx n}$ is the zero tube fiber, while $\\tQ \\in \\RR^{m \\xx m \\xx n}$ is called $\\mm$ **-unitary** if $\\tQ^{\\T} \\mm \\tQ = \\tI = \\tQ \\mm \\tQ^{\\T}$ .\n", 175 | "
\n", 176 | "A tensor $\\tB \\in \\RR^{p \\xx k \\xx n}$ is said to be a pseudo $\\mm$ -unitary tensor (or pseudo $\\mm$-orthogonal) if $\\tB^{\\T} \\mm \\tB$ is f-diagonal (i.e., all frontal slices are diagonal), and all frontal slices of $(\\tB^{\\T} \\mm \\tB) \\tsM$ are diagonal matrices with entries that are either ones or zeros.\n", 177 | "\n", 178 | "\n", 179 | "# TSVDM\n", 180 | "\n", 181 | "Let $\\tA \\in \\RR^{\\mpn}$ be a real tensor, then is possible to write the full **tubal singular value decomposition** of $\\tA$ as \n", 182 | "$$\\tA = \\tU \\mm \\tS \\mm \\tV^{\\T}$$ \n", 183 | "\n", 184 | "where $\\tU, \\tV$ are $(m \\xx m \\xx n)$ and $(p \\xx p \\xx n)$ , $\\mm$-unitary tensors respectively, and $\\tS \\in \\RR^{\\mpn}$ is an **f-diagonal** tensor, that is, a tensor whose frontal slices ( $\\tS_{:,:,i}$ ) are matrices with zeros outside their main diagonal.
\n", 185 | "\n", 186 | "We use the notation $\\hsigma_{j}^{(i)}$ do denote the $j^{th}$ largest singular value on the $i^{th}$ lateral face of $\\thS$: \n", 187 | "$$\\hsigma_{j}^{(i)} := \\thS_{j,j,i}$$\n" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 4, 193 | "metadata": {}, 194 | "outputs": [ 195 | { 196 | "name": "stdout", 197 | "output_type": "stream", 198 | "text": [ 199 | "U: 10x5x8\n", 200 | "S: 5x8\n", 201 | "V: 5x5x8 \n", 202 | "\n", 203 | "||A - USV'||^2 = 5.159366909775574e-27\n" 204 | ] 205 | } 206 | ], 207 | "source": [ 208 | "from mprod.decompositions import svdm\n", 209 | "from mprod import tensor_mtranspose\n", 210 | "\n", 211 | "U,S,V = svdm(A, funm_haar, invm_haar)\n", 212 | "\n", 213 | "print(\"U:\", \"x\".join(map(str, U.shape)))\n", 214 | "print(\"S:\", \"x\".join(map(str, S.shape)))\n", 215 | "print(\"V:\", \"x\".join(map(str, V.shape)),\"\\n\")\n", 216 | "\n", 217 | "# Note that for practical reasons, S is stored in a lean datastructure\n", 218 | "# To obtain the \"tensorial\" representation of S, we do as follows\n", 219 | "tens_S = np.zeros((p,p,n))\n", 220 | "for i in range(n):\n", 221 | " tens_S[:S.shape[0],:S.shape[0],i] = np.diag(S[:,i])\n", 222 | "\n", 223 | "\n", 224 | "# reconstruct the tensor\n", 225 | "Vt = tensor_mtranspose(V,funm_haar, invm_haar)\n", 226 | "US = m_prod(U, tens_S, funm_haar, invm_haar)\n", 227 | "USVt = m_prod(US, Vt, funm_haar, invm_haar)\n", 228 | "\n", 229 | "print(\"||A - USV'||^2 =\",np.linalg.norm(A - USVt)**2) # practically 0" 230 | ] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": {}, 235 | "source": [ 236 | "# Tensor ranks and truncations\n", 237 | "\n", 238 | "* The **t-rank** of $\\tA$ is the number of nonezero tubes of $\\tS$: \n", 239 | "$$\n", 240 | "r = | \\left\\{ i = 1, \\dots, n ~;~ \\FNormS{\\tS_{i,i,:}} > 0 \\right\\} |\n", 241 | "$$\n", 242 | "\n", 243 | "$\\tA^{(q)} = \\tU_{:,1:q, :} \\mm \\tS_{1:q,1:q,:} \\mm {\\tV_{:,1:q,:}}^{\\T}$ denotes the t-rank $q$ truncation of $\\tA$ under $\\mm$\n", 244 | " \n", 245 | "* The **multi-rank** of $\\tA$ under $\\mm$, denoted by the vector $\\rrho \\in \\mathbb{N}^{n}$ whose $i^{th}$ entry is \n", 246 | "$$\n", 247 | "\\rrho_i = \\rnk (\\thA_{:,:,i})\n", 248 | "$$\n", 249 | "\n", 250 | "The multi-rank $\\rrho$ truncation of $\\tA$ under $\\mm$ is given by the tensor $\\tA_{\\rrho}$ for which \n", 251 | "$$\n", 252 | "\\widehat{\\tA_{\\rrho}}_{:,:,i} = \\thU_{:,1:\\rrho_i, i} \\thS_{1:\\rrho_i,1:\\rrho_i,i} {\\thV_{:,1:\\rrho_i,i}}^{\\T}\n", 253 | "$$ \n", 254 | "\n", 255 | "* The **implicit rank** under $\\mm$ of a tensor $\\tA$ with multi-rank $\\rrho$ under $\\mm$ is \n", 256 | "$$\n", 257 | "r = \\sum_{i=1}^{n} \\rrho_i\n", 258 | "$$\n", 259 | "\n", 260 | "Note that for t-rank truncation the $\\tU$ and $\\tV$ factors are $\\mm$-orthogonal, while for multi-rank truncation they are only pseudo $\\mm$-orthogonal." 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 5, 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [ 269 | "# t-rank 4 trunctation \n", 270 | "q = 4\n", 271 | "tens_S_t_hat = funm_haar(tens_S.copy())\n", 272 | "tens_S_t_hat[q:,q:,:] = 0\n", 273 | "tens_S_t = invm_haar(tens_S_t_hat)\n", 274 | "A4 = m_prod(m_prod(U, tens_S_t, funm_haar, invm_haar), Vt, funm_haar, invm_haar)\n", 275 | "\n", 276 | "\n", 277 | "# multi-rank rho trunctation \n", 278 | "rho = [1,3,2,2,3,1,4,3] # this is the multi-rank vector\n", 279 | "tens_S_rho_hat = funm_haar(tens_S.copy())\n", 280 | "for i in range(n):\n", 281 | " tens_S_rho_hat[rho[i]:,rho[i]:,i] = 0\n", 282 | "\n", 283 | "tens_S_rho = invm_haar(tens_S_rho_hat)\n", 284 | "A_rho = m_prod(m_prod(U, tens_S_rho, funm_haar, invm_haar), Vt, funm_haar, invm_haar)" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "\n", 292 | "Let $\\tA = \\tU \\mm \\tS \\mm \\tV^{\\T} \\in \\RR^{\\mpn}$, \n", 293 | "we will use $j_1,\\dots, j_{np}$ and $i_1,\\dots, i_{np}$ to denote the indexes of the non-zeros of $\\thS$ ordered in decreasing order. That is \n", 294 | "$$\\hsigma_{\\ell} := \\hsigma_{j_{\\ell}}^{(i_{\\ell})}$$\n", 295 | "\n", 296 | "where $\\hsigma_1 \\geq \\hsigma_2 \\geq \\dots \\geq \\hsigma_{np}$ .\n", 297 | "\n", 298 | "For $q = 1 , \\dots , p n$ , the **explicit rank-** $q$ **truncation** under $\\mm$ of a tensor $\\tA$, denoted by $\\tA_q = \\tA_{\\rrho}$ , where $\\tA_{\\rrho}$ is the tensor of multi-rank $\\rrho$ under $\\mm$ such that \n", 299 | "$$\\rrho_i = \\max \\{ j = 1, \\dots ,p ~|~ (j,i) \\in \\{(j_1, j_1), \\dots, (j_q, i_q)\\} \\} .$$ \n", 300 | "\n", 301 | "In words, we keep the $q$ top singular values of any frontal slice of $\\thS$, and zero out the rest. \n", 302 | "\n", 303 | "\n", 304 | "\n" 305 | ] 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "metadata": {}, 310 | "source": [ 311 | ".. note::\n", 312 | " We have that $\\tA^{(q)}, \\tA_{\\rrho}$ and $\\tA_{q}$ are the best t-rank $q$, multi-rank $\\rrho$ and explicit-rank $q$ (under $\\mm$) approximations of $\\tA$, respectively.\n", 313 | "\n", 314 | "\n", 315 | "\n", 316 | "\n", 317 | "# The effect of choosing different transforms \n", 318 | "\n", 319 | "To demonstrate how might the choice of $\\matM$ influence the resulting decomposition, we use the real-world time-series dataset obtained from a study on Pediatric Ulcerative Colitis (PUC) by Schirmer et al..\n", 320 | "\n", 321 | "First, we obtain the data table from our analysis GitHub repo, construct a tensor from the data and apply TSVDM with respect to both randomly sampled $\\matM$ and the DCT.\n", 322 | "\n", 323 | "Note that in `generate_haar` function call, we set the `random_state` parameter to an integer (123) just so that the results are reproducible." 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 57, 329 | "metadata": {}, 330 | "outputs": [ 331 | { 332 | "name": "stdout", 333 | "output_type": "stream", 334 | "text": [ 335 | "shape of S, by randomly sampled transform: (87, 4)\n", 336 | "shape of S, by DCT: (4, 4)\n" 337 | ] 338 | } 339 | ], 340 | "source": [ 341 | "import pandas as pd\n", 342 | "from mprod import table2tensor\n", 343 | "\n", 344 | "file_path = \"https://raw.githubusercontent.com/UriaMorP/\" \\\n", 345 | " \"tcam_analysis_notebooks/main/Schirmer2018/Schirmer2018.tsv\"\n", 346 | "\n", 347 | "data_raw = pd.read_csv(file_path, index_col=[0,1], sep=\"\\t\"\n", 348 | " , dtype={'Week':int})\n", 349 | "\n", 350 | "data_tensor, map1, map3 = table2tensor(data_raw)\n", 351 | "\n", 352 | "m,p,n = data_tensor.shape\n", 353 | "\n", 354 | "# Generate transforms according to the \n", 355 | "# relevant dimensions\n", 356 | "funm_haar, invm_haar = generate_haar(n,random_state=123)\n", 357 | "funm_dct, invm_dct = generate_dct(n)\n", 358 | "\n", 359 | "\n", 360 | "# Haar\n", 361 | "Uhaar, Shaar, Vhaar = svdm(data_tensor, funm_haar, invm_haar)\n", 362 | "print(\"shape of S, by randomly sampled transform:\", Shaar.shape)\n", 363 | "# DCT\n", 364 | "Udct, Sdct, Vdct = svdm(data_tensor, funm_dct, invm_dct)\n", 365 | "print(\"shape of S, by DCT:\", Sdct.shape)\n" 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "metadata": {}, 371 | "source": [ 372 | "In this case, we have that the t-rank of our data under the DCT domain transform is 4, and 87 under $\\mm$ where $\\matM$ is obtained from randomly sampling the Haar distribution. \n", 373 | "\n", 374 | "Even though it is not generally true that choosing $\\matM$ as DCT (the t-product) results in better compression, the fact that it does so for time-series data makes perfect sense; Since we assume that time-series data are samples of continuous functions, which, are easy to approximate well using very few DCT basis elements." 375 | ] 376 | }, 377 | { 378 | "cell_type": "raw", 379 | "metadata": { 380 | "raw_mimetype": "text/restructuredtext" 381 | }, 382 | "source": [ 383 | ".. footbibliography::" 384 | ] 385 | } 386 | ], 387 | "metadata": { 388 | "celltoolbar": "Raw Cell Format", 389 | "kernelspec": { 390 | "display_name": "mprod", 391 | "language": "python", 392 | "name": "mprod" 393 | }, 394 | "language_info": { 395 | "codemirror_mode": { 396 | "name": "ipython", 397 | "version": 3 398 | }, 399 | "file_extension": ".py", 400 | "mimetype": "text/x-python", 401 | "name": "python", 402 | "nbconvert_exporter": "python", 403 | "pygments_lexer": "ipython3", 404 | "version": "3.6.8" 405 | }, 406 | "toc": { 407 | "base_numbering": 1, 408 | "nav_menu": {}, 409 | "number_sections": true, 410 | "sideBar": true, 411 | "skip_h1_title": false, 412 | "title_cell": "Table of Contents", 413 | "title_sidebar": "Contents", 414 | "toc_cell": false, 415 | "toc_position": {}, 416 | "toc_section_display": true, 417 | "toc_window_display": false 418 | } 419 | }, 420 | "nbformat": 4, 421 | "nbformat_minor": 4 422 | } 423 | -------------------------------------------------------------------------------- /runtests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | runtests.py [OPTIONS] [-- ARGS] 4 | 5 | Run tests, building the project first. 6 | 7 | Examples:: 8 | 9 | $ python runtests.py 10 | $ python runtests.py -s {SAMPLE_SUBMODULE} 11 | $ # Run a standalone test function: 12 | $ python runtests.py -t {SAMPLE_TEST} 13 | $ # Run a test defined as a method of a TestXXX class: 14 | $ python runtests.py --ipython 15 | $ python runtests.py --python somescript.py 16 | $ python runtests.py --bench 17 | $ python runtests.py --durations 20 18 | 19 | Run a debugger: 20 | 21 | $ gdb --args python runtests.py [...other args...] 22 | 23 | Disable pytest capturing of output by using its '-s' option: 24 | 25 | $ python runtests.py -- -s 26 | 27 | Generate C code coverage listing under build/lcov/: 28 | (requires http://ltp.sourceforge.net/coverage/lcov.php) 29 | 30 | $ python runtests.py --gcov [...other args...] 31 | $ python runtests.py --lcov-html 32 | 33 | Run lint checks. 34 | Provide target branch name or `uncommitted` to check before committing: 35 | 36 | $ python runtests.py --lint main 37 | $ python runtests.py --lint uncommitted 38 | 39 | """ 40 | # 41 | # This is a generic test runner script for projects using NumPy's test 42 | # framework. Change the following values to adapt to your project: 43 | # 44 | import numpy.lib 45 | 46 | PROJECT_MODULE = "mprod" 47 | PROJECT_ROOT_FILES = ['mprod', 'setup.py'] 48 | SAMPLE_TEST = "mprod/decompositions/tests/test_decompositions.py::test_tqrm" 49 | SAMPLE_SUBMODULE = "decompositions" 50 | 51 | # EXTRA_PATH = ['/home/labs/elinav/uria/lib/ccache', '/home/labs/elinav/uria/lib/f90cache', 52 | # '/home/labs/elinav/uria/local/lib/ccache', '/home/labs/elinav/uria/local/lib/f90cache'] 53 | 54 | EXTRA_PATH = ['~/lib/ccache', '~/lib/f90cache', 55 | '~/local/lib/ccache', '~/local/lib/f90cache'] 56 | 57 | 58 | # --------------------------------------------------------------------- 59 | 60 | 61 | if __doc__ is None: 62 | __doc__ = "Run without -OO if you want usage info" 63 | else: 64 | __doc__ = __doc__.format(**globals()) 65 | 66 | import sys 67 | import os, glob 68 | 69 | # In case we are run from the source directory, we don't want to import the 70 | # project from there: 71 | sys.path.pop(0) 72 | 73 | import shutil 74 | import subprocess 75 | import time 76 | from argparse import ArgumentParser, REMAINDER 77 | 78 | ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__))) 79 | 80 | 81 | def main(argv): 82 | parser = ArgumentParser(usage=__doc__.lstrip()) 83 | parser.add_argument("--verbose", "-v", action="count", default=1, 84 | help="Add one verbosity level to pytest. Default is 0") 85 | parser.add_argument("--debug-info", action="store_true", 86 | help=("Add --verbose-cfg to build_src to show " 87 | "compiler configuration output while creating " 88 | "_numpyconfig.h and config.h")) 89 | parser.add_argument("--no-build", "-n", action="store_true", default=False, 90 | help="Do not build the project (use system installed " 91 | "version)") 92 | parser.add_argument("--build-only", "-b", action="store_true", 93 | default=False, help="Just build, do not run any tests") 94 | parser.add_argument("--doctests", action="store_true", default=False, 95 | help="Run doctests in module") 96 | parser.add_argument("--refguide-check", action="store_true", default=False, 97 | help="Run refguide (doctest) check (do not run " 98 | "regular tests.)") 99 | parser.add_argument("--coverage", action="store_true", default=False, 100 | help=("Report coverage of project code. HTML output " 101 | "goes under build/coverage")) 102 | parser.add_argument("--lint", default=None, 103 | help="'' or 'uncommitted', passed to " 104 | "tools/linter.py [--branch BRANCH] " 105 | "[--uncommitted]") 106 | parser.add_argument("--durations", action="store", default=-1, type=int, 107 | help=("Time N slowest tests, time all if 0, time none " 108 | "if < 0")) 109 | parser.add_argument("--gcov", action="store_true", default=False, 110 | help=("Enable C code coverage via gcov (requires " 111 | "GCC). gcov output goes to build/**/*.gc*")) 112 | parser.add_argument("--lcov-html", action="store_true", default=False, 113 | help=("Produce HTML for C code coverage information " 114 | "from a previous run with --gcov. " 115 | "HTML output goes to build/lcov/")) 116 | parser.add_argument("--mode", "-m", default="fast", 117 | help="'fast', 'full', or something that could be " 118 | "passed to nosetests -A [default: fast]") 119 | parser.add_argument("--submodule", "-s", default=None, 120 | help="Submodule whose tests to run (cluster, " 121 | "constants, ...)") 122 | parser.add_argument("--pythonpath", "-p", default=None, 123 | help="Paths to prepend to PYTHONPATH") 124 | parser.add_argument("--tests", "-t", action='append', 125 | help="Specify tests to run") 126 | parser.add_argument("--python", action="store_true", 127 | help="Start a Python shell with PYTHONPATH set") 128 | parser.add_argument("--ipython", "-i", action="store_true", 129 | help="Start IPython shell with PYTHONPATH set") 130 | parser.add_argument("--shell", action="store_true", 131 | help="Start Unix shell with PYTHONPATH set") 132 | parser.add_argument("--mypy", action="store_true", 133 | help="Run mypy on files with NumPy on the MYPYPATH") 134 | parser.add_argument("--debug", "-g", action="store_true", 135 | help="Debug build") 136 | parser.add_argument("--parallel", "-j", type=int, default=0, 137 | help="Number of parallel jobs during build") 138 | parser.add_argument("--warn-error", action="store_true", 139 | help="Set -Werror to convert all compiler warnings to " 140 | "errors") 141 | parser.add_argument("--cpu-baseline", default=None, 142 | help="Specify a list of enabled baseline CPU " 143 | "optimizations"), 144 | parser.add_argument("--cpu-dispatch", default=None, 145 | help="Specify a list of dispatched CPU optimizations"), 146 | parser.add_argument("--disable-optimization", action="store_true", 147 | help="Disable CPU optimized code (dispatch, simd, " 148 | "fast, ...)"), 149 | parser.add_argument("--simd-test", default=None, 150 | help="Specify a list of CPU optimizations to be " 151 | "tested against NumPy SIMD interface"), 152 | parser.add_argument("--show-build-log", action="store_true", 153 | help="Show build output rather than using a log file") 154 | parser.add_argument("--bench", action="store_true", 155 | help="Run benchmark suite instead of test suite") 156 | parser.add_argument("--bench-compare", action="store", metavar="COMMIT", 157 | help=("Compare benchmark results of current HEAD to " 158 | "BEFORE. Use an additional " 159 | "--bench-compare=COMMIT to override HEAD with " 160 | "COMMIT. Note that you need to commit your " 161 | "changes first!")) 162 | parser.add_argument("args", metavar="ARGS", default=[], nargs=REMAINDER, 163 | help="Arguments to pass to pytest, asv, mypy, Python " 164 | "or shell") 165 | args = parser.parse_args(argv) 166 | 167 | if args.durations < 0: 168 | args.durations = -1 169 | 170 | if args.bench_compare: 171 | args.bench = True 172 | args.no_build = True # ASV does the building 173 | 174 | if args.lcov_html: 175 | # generate C code coverage output 176 | lcov_generate() 177 | sys.exit(0) 178 | 179 | if args.pythonpath: 180 | for p in reversed(args.pythonpath.split(os.pathsep)): 181 | sys.path.insert(0, p) 182 | 183 | if args.gcov: 184 | gcov_reset_counters() 185 | 186 | if args.debug and args.bench: 187 | print("*** Benchmarks should not be run against debug " 188 | "version; remove -g flag ***") 189 | 190 | if args.lint: 191 | check_lint(args.lint) 192 | 193 | if not args.no_build: 194 | # we need the noarch path in case the package is pure python. 195 | site_dir, site_dir_noarch = build_project(args) 196 | sys.path.insert(0, site_dir) 197 | sys.path.insert(0, site_dir_noarch) 198 | os.environ['PYTHONPATH'] = site_dir + os.pathsep + site_dir_noarch 199 | else: 200 | _temp = __import__(PROJECT_MODULE) 201 | site_dir = os.path.sep.join(_temp.__file__.split(os.path.sep)[:-2]) 202 | 203 | extra_argv = args.args[:] 204 | if not args.bench: 205 | # extra_argv may also lists selected benchmarks 206 | if extra_argv and extra_argv[0] == '--': 207 | extra_argv = extra_argv[1:] 208 | 209 | if args.python: 210 | # Debugging issues with warnings is much easier if you can see them 211 | print("Enabling display of all warnings") 212 | import warnings 213 | import types 214 | 215 | warnings.filterwarnings("always") 216 | if extra_argv: 217 | # Don't use subprocess, since we don't want to include the 218 | # current path in PYTHONPATH. 219 | sys.argv = extra_argv 220 | with open(extra_argv[0], 'r') as f: 221 | script = f.read() 222 | sys.modules['__main__'] = types.ModuleType('__main__') 223 | ns = dict(__name__='__main__', 224 | __file__=extra_argv[0]) 225 | exec(script, ns) 226 | sys.exit(0) 227 | else: 228 | import code 229 | code.interact() 230 | sys.exit(0) 231 | 232 | if args.ipython: 233 | # Debugging issues with warnings is much easier if you can see them 234 | print("Enabling display of all warnings and pre-importing numpy as np") 235 | import warnings; 236 | warnings.filterwarnings("always") 237 | import IPython 238 | import numpy as np 239 | IPython.embed(colors='neutral', user_ns={"np": np}) 240 | sys.exit(0) 241 | 242 | if args.shell: 243 | shell = os.environ.get('SHELL', 'cmd' if os.name == 'nt' else 'sh') 244 | print("Spawning a shell ({})...".format(shell)) 245 | subprocess.call([shell] + extra_argv) 246 | sys.exit(0) 247 | 248 | if args.mypy: 249 | try: 250 | import mypy.api 251 | except ImportError: 252 | raise RuntimeError( 253 | "Mypy not found. Please install it by running " 254 | "pip install -r test_requirements.txt from the repo root" 255 | ) 256 | 257 | os.environ['MYPYPATH'] = site_dir 258 | # By default mypy won't color the output since it isn't being 259 | # invoked from a tty. 260 | os.environ['MYPY_FORCE_COLOR'] = '1' 261 | 262 | config = os.path.join( 263 | site_dir, 264 | "numpy", 265 | "typing", 266 | "tests", 267 | "data", 268 | "mypy.ini", 269 | ) 270 | 271 | report, errors, status = mypy.api.run( 272 | ['--config-file', config] + args.args 273 | ) 274 | print(report, end='') 275 | print(errors, end='', file=sys.stderr) 276 | sys.exit(status) 277 | 278 | if args.coverage: 279 | dst_dir = os.path.join(ROOT_DIR, 'build', 'coverage') 280 | fn = os.path.join(dst_dir, 'coverage_html.js') 281 | if os.path.isdir(dst_dir) and os.path.isfile(fn): 282 | shutil.rmtree(dst_dir) 283 | extra_argv += ['--cov-report=html:' + dst_dir] 284 | 285 | if args.refguide_check: 286 | cmd = [os.path.join(ROOT_DIR, 'tools', 'refguide_check.py'), 287 | '--doctests'] 288 | if args.submodule: 289 | cmd += [args.submodule] 290 | os.execv(sys.executable, [sys.executable] + cmd) 291 | sys.exit(0) 292 | 293 | if args.bench: 294 | # Run ASV 295 | for i, v in enumerate(extra_argv): 296 | if v.startswith("--"): 297 | items = extra_argv[:i] 298 | if v == "--": 299 | i += 1 # skip '--' indicating further are passed on. 300 | bench_args = extra_argv[i:] 301 | break 302 | else: 303 | items = extra_argv 304 | bench_args = [] 305 | 306 | if args.tests: 307 | items += args.tests 308 | if args.submodule: 309 | items += [args.submodule] 310 | for a in items: 311 | bench_args.extend(['--bench', a]) 312 | 313 | if not args.bench_compare: 314 | cmd = ['asv', 'run', '-n', '-e', '--python=same'] + bench_args 315 | ret = subprocess.call(cmd, cwd=os.path.join(ROOT_DIR, 'benchmarks')) 316 | sys.exit(ret) 317 | else: 318 | commits = [x.strip() for x in args.bench_compare.split(',')] 319 | if len(commits) == 1: 320 | commit_a = commits[0] 321 | commit_b = 'HEAD' 322 | elif len(commits) == 2: 323 | commit_a, commit_b = commits 324 | else: 325 | p.error("Too many commits to compare benchmarks for") 326 | 327 | # Check for uncommitted files 328 | if commit_b == 'HEAD': 329 | r1 = subprocess.call(['git', 'diff-index', '--quiet', 330 | '--cached', 'HEAD']) 331 | r2 = subprocess.call(['git', 'diff-files', '--quiet']) 332 | if r1 != 0 or r2 != 0: 333 | print("*" * 80) 334 | print("WARNING: you have uncommitted changes --- " 335 | "these will NOT be benchmarked!") 336 | print("*" * 80) 337 | 338 | # Fix commit ids (HEAD is local to current repo) 339 | out = subprocess.check_output(['git', 'rev-parse', commit_b]) 340 | commit_b = out.strip().decode('ascii') 341 | 342 | out = subprocess.check_output(['git', 'rev-parse', commit_a]) 343 | commit_a = out.strip().decode('ascii') 344 | 345 | # generate config file with the required build options 346 | asv_cfpath = [ 347 | '--config', asv_compare_config( 348 | os.path.join(ROOT_DIR, 'benchmarks'), args, 349 | # to clear the cache if the user changed build options 350 | (commit_a, commit_b) 351 | ) 352 | ] 353 | cmd = ['asv', 'continuous', '-e', '-f', '1.05', 354 | commit_a, commit_b] + asv_cfpath + bench_args 355 | ret = subprocess.call(cmd, cwd=os.path.join(ROOT_DIR, 'benchmarks')) 356 | sys.exit(ret) 357 | 358 | if args.build_only: 359 | sys.exit(0) 360 | else: 361 | __import__(PROJECT_MODULE) 362 | # from numpy.testing._private.nosetester import NoseTester 363 | # test = NoseTester.test 364 | test = sys.modules[PROJECT_MODULE].test 365 | 366 | if args.submodule: 367 | tests = [PROJECT_MODULE + "." + args.submodule] 368 | elif args.tests: 369 | tests = args.tests 370 | else: 371 | tests = None 372 | 373 | # Run the tests under build/test 374 | 375 | if not args.no_build: 376 | test_dir = site_dir 377 | else: 378 | test_dir = os.path.join(ROOT_DIR, 'build', 'test') 379 | if not os.path.isdir(test_dir): 380 | os.makedirs(test_dir) 381 | 382 | shutil.copyfile(os.path.join(ROOT_DIR, '.coveragerc'), 383 | os.path.join(test_dir, '.coveragerc')) 384 | 385 | cwd = os.getcwd() 386 | try: 387 | os.chdir(test_dir) 388 | 389 | result = test(args.mode, 390 | verbose=args.verbose, 391 | extra_argv=extra_argv, 392 | doctests=args.doctests, 393 | coverage=args.coverage, 394 | durations=args.durations, 395 | tests=tests) 396 | 397 | finally: 398 | os.chdir(cwd) 399 | 400 | if isinstance(result, bool): 401 | sys.exit(0 if result else 1) 402 | elif result.wasSuccessful(): 403 | sys.exit(0) 404 | else: 405 | sys.exit(1) 406 | 407 | 408 | def build_project(args): 409 | """ 410 | Build a dev version of the project. 411 | 412 | Returns 413 | ------- 414 | site_dir 415 | site-packages directory where it was installed 416 | 417 | """ 418 | 419 | import sysconfig 420 | 421 | root_ok = [os.path.exists(os.path.join(ROOT_DIR, fn)) 422 | for fn in PROJECT_ROOT_FILES] 423 | if not all(root_ok): 424 | print("To build the project, run runtests.py in " 425 | "git checkout or unpacked source") 426 | sys.exit(1) 427 | 428 | dst_dir = os.path.join(ROOT_DIR, 'build', 'testenv') 429 | 430 | env = dict(os.environ) 431 | cmd = [sys.executable, 'setup.py'] 432 | 433 | # Always use ccache, if installed 434 | env['PATH'] = os.pathsep.join(EXTRA_PATH + env.get('PATH', '').split(os.pathsep)) 435 | cvars = sysconfig.get_config_vars() 436 | compiler = env.get('CC') or cvars.get('CC', '') 437 | if 'gcc' in compiler: 438 | # Check that this isn't clang masquerading as gcc. 439 | if sys.platform != 'darwin' or 'gnu-gcc' in compiler: 440 | # add flags used as werrors 441 | warnings_as_errors = ' '.join([ 442 | # from tools/travis-test.sh 443 | '-Werror=vla', 444 | '-Werror=nonnull', 445 | '-Werror=pointer-arith', 446 | '-Wlogical-op', 447 | # from sysconfig 448 | '-Werror=unused-function', 449 | ]) 450 | env['CFLAGS'] = warnings_as_errors + ' ' + env.get('CFLAGS', '') 451 | if args.debug or args.gcov: 452 | # assume everyone uses gcc/gfortran 453 | env['OPT'] = '-O0 -ggdb' 454 | env['FOPT'] = '-O0 -ggdb' 455 | if args.gcov: 456 | env['OPT'] = '-O0 -ggdb' 457 | env['FOPT'] = '-O0 -ggdb' 458 | env['CC'] = cvars['CC'] + ' --coverage' 459 | env['CXX'] = cvars['CXX'] + ' --coverage' 460 | env['F77'] = 'gfortran --coverage ' 461 | env['F90'] = 'gfortran --coverage ' 462 | env['LDSHARED'] = cvars['LDSHARED'] + ' --coverage' 463 | env['LDFLAGS'] = " ".join(cvars['LDSHARED'].split()[1:]) + ' --coverage' 464 | 465 | cmd += ["build"] 466 | if args.parallel > 1: 467 | cmd += ["-j", str(args.parallel)] 468 | if args.warn_error: 469 | cmd += ["--warn-error"] 470 | if args.cpu_baseline: 471 | cmd += ["--cpu-baseline", args.cpu_baseline] 472 | if args.cpu_dispatch: 473 | cmd += ["--cpu-dispatch", args.cpu_dispatch] 474 | if args.disable_optimization: 475 | cmd += ["--disable-optimization"] 476 | if args.simd_test is not None: 477 | cmd += ["--simd-test", args.simd_test] 478 | if args.debug_info: 479 | cmd += ["build_src", "--verbose-cfg"] 480 | # Install; avoid producing eggs so numpy can be imported from dst_dir. 481 | cmd += ['install', '--prefix=' + dst_dir, 482 | '--single-version-externally-managed', 483 | '--record=' + dst_dir + 'tmp_install_log.txt'] 484 | 485 | from distutils.sysconfig import get_python_lib 486 | site_dir = get_python_lib(prefix=dst_dir, plat_specific=True) 487 | site_dir_noarch = get_python_lib(prefix=dst_dir, plat_specific=False) 488 | # easy_install won't install to a path that Python by default cannot see 489 | # and isn't on the PYTHONPATH. Plus, it has to exist. 490 | if not os.path.exists(site_dir): 491 | os.makedirs(site_dir) 492 | if not os.path.exists(site_dir_noarch): 493 | os.makedirs(site_dir_noarch) 494 | env['PYTHONPATH'] = site_dir + os.pathsep + site_dir_noarch 495 | 496 | log_filename = os.path.join(ROOT_DIR, 'build.log') 497 | 498 | if args.show_build_log: 499 | ret = subprocess.call(cmd, env=env, cwd=ROOT_DIR) 500 | else: 501 | log_filename = os.path.join(ROOT_DIR, 'build.log') 502 | print("Building, see build.log...") 503 | with open(log_filename, 'w') as log: 504 | p = subprocess.Popen(cmd, env=env, stdout=log, stderr=log, 505 | cwd=ROOT_DIR) 506 | try: 507 | # Wait for it to finish, and print something to indicate the 508 | # process is alive, but only if the log file has grown (to 509 | # allow continuous integration environments kill a hanging 510 | # process accurately if it produces no output) 511 | last_blip = time.time() 512 | last_log_size = os.stat(log_filename).st_size 513 | while p.poll() is None: 514 | time.sleep(0.5) 515 | if time.time() - last_blip > 60: 516 | log_size = os.stat(log_filename).st_size 517 | if log_size > last_log_size: 518 | print(" ... build in progress") 519 | last_blip = time.time() 520 | last_log_size = log_size 521 | 522 | ret = p.wait() 523 | except: 524 | p.kill() 525 | p.wait() 526 | raise 527 | 528 | if ret == 0: 529 | print("Build OK") 530 | else: 531 | if not args.show_build_log: 532 | with open(log_filename, 'r') as f: 533 | print(f.read()) 534 | print("Build failed!") 535 | sys.exit(1) 536 | 537 | return site_dir, site_dir_noarch 538 | 539 | 540 | def asv_compare_config(bench_path, args, h_commits): 541 | """ 542 | Fill the required build options through custom variable 543 | 'numpy_build_options' and return the generated config path. 544 | """ 545 | conf_path = os.path.join(bench_path, "asv_compare.conf.json.tpl") 546 | nconf_path = os.path.join(bench_path, "_asv_compare.conf.json") 547 | 548 | # add custom build 549 | build = [] 550 | if args.parallel > 1: 551 | build += ["-j", str(args.parallel)] 552 | if args.cpu_baseline: 553 | build += ["--cpu-baseline", args.cpu_baseline] 554 | if args.cpu_dispatch: 555 | build += ["--cpu-dispatch", args.cpu_dispatch] 556 | if args.disable_optimization: 557 | build += ["--disable-optimization"] 558 | 559 | is_cached = asv_substitute_config(conf_path, nconf_path, 560 | numpy_build_options=' '.join([f'\\"{v}\\"' for v in build]), 561 | numpy_global_options=' '.join( 562 | [f'--global-option=\\"{v}\\"' for v in ["build"] + build]) 563 | ) 564 | if not is_cached: 565 | asv_clear_cache(bench_path, h_commits) 566 | return nconf_path 567 | 568 | 569 | def asv_clear_cache(bench_path, h_commits, env_dir="env"): 570 | """ 571 | Force ASV to clear the cache according to specified commit hashes. 572 | """ 573 | # FIXME: only clear the cache from the current environment dir 574 | asv_build_pattern = os.path.join(bench_path, env_dir, "*", "asv-build-cache") 575 | for asv_build_cache in glob.glob(asv_build_pattern, recursive=True): 576 | for c in h_commits: 577 | try: 578 | shutil.rmtree(os.path.join(asv_build_cache, c)) 579 | except OSError: 580 | pass 581 | 582 | 583 | def asv_substitute_config(in_config, out_config, **custom_vars): 584 | """ 585 | A workaround to allow substituting custom tokens within 586 | ASV configuration file since there's no official way to add custom 587 | variables(e.g. env vars). 588 | 589 | Parameters 590 | ---------- 591 | in_config : str 592 | The path of ASV configuration file, e.g. '/path/to/asv.conf.json' 593 | out_config : str 594 | The path of generated configuration file, 595 | e.g. '/path/to/asv_substituted.conf.json'. 596 | 597 | The other keyword arguments represent the custom variables. 598 | 599 | Returns 600 | ------- 601 | True(is cached) if 'out_config' is already generated with 602 | the same '**custom_vars' and updated with latest 'in_config', 603 | False otherwise. 604 | 605 | Examples 606 | -------- 607 | See asv_compare_config(). 608 | """ 609 | assert in_config != out_config 610 | assert len(custom_vars) > 0 611 | 612 | def sdbm_hash(*factors): 613 | chash = 0 614 | for f in factors: 615 | for char in str(f): 616 | chash = ord(char) + (chash << 6) + (chash << 16) - chash 617 | chash &= 0xFFFFFFFF 618 | return chash 619 | 620 | vars_hash = sdbm_hash(custom_vars, os.path.getmtime(in_config)) 621 | try: 622 | with open(out_config, "r") as wfd: 623 | hash_line = wfd.readline().split('hash:') 624 | if len(hash_line) > 1 and int(hash_line[1]) == vars_hash: 625 | return True 626 | except IOError: 627 | pass 628 | 629 | custom_vars = {f'{{{k}}}': v for k, v in custom_vars.items()} 630 | with open(in_config, "r") as rfd, open(out_config, "w") as wfd: 631 | wfd.write(f"// hash:{vars_hash}\n") 632 | wfd.write("// This file is automatically generated by runtests.py\n") 633 | for line in rfd: 634 | for key, val in custom_vars.items(): 635 | line = line.replace(key, val) 636 | wfd.write(line) 637 | return False 638 | 639 | 640 | # 641 | # GCOV support 642 | # 643 | def gcov_reset_counters(): 644 | print("Removing previous GCOV .gcda files...") 645 | build_dir = os.path.join(ROOT_DIR, 'build') 646 | for dirpath, dirnames, filenames in os.walk(build_dir): 647 | for fn in filenames: 648 | if fn.endswith('.gcda') or fn.endswith('.da'): 649 | pth = os.path.join(dirpath, fn) 650 | os.unlink(pth) 651 | 652 | 653 | # 654 | # LCOV support 655 | # 656 | 657 | LCOV_OUTPUT_FILE = os.path.join(ROOT_DIR, 'build', 'lcov.out') 658 | LCOV_HTML_DIR = os.path.join(ROOT_DIR, 'build', 'lcov') 659 | 660 | 661 | def lcov_generate(): 662 | try: 663 | os.unlink(LCOV_OUTPUT_FILE) 664 | except OSError: 665 | pass 666 | try: 667 | shutil.rmtree(LCOV_HTML_DIR) 668 | except OSError: 669 | pass 670 | 671 | print("Capturing lcov info...") 672 | subprocess.call(['lcov', '-q', '-c', 673 | '-d', os.path.join(ROOT_DIR, 'build'), 674 | '-b', ROOT_DIR, 675 | '--output-file', LCOV_OUTPUT_FILE]) 676 | 677 | print("Generating lcov HTML output...") 678 | ret = subprocess.call(['genhtml', '-q', LCOV_OUTPUT_FILE, 679 | '--output-directory', LCOV_HTML_DIR, 680 | '--legend', '--highlight']) 681 | if ret != 0: 682 | print("genhtml failed!") 683 | else: 684 | print("HTML output generated under build/lcov/") 685 | 686 | 687 | def check_lint(lint_args): 688 | """ 689 | Adds ROOT_DIR to path and performs lint checks. 690 | This functions exits the program with status code of lint check. 691 | """ 692 | sys.path.append(ROOT_DIR) 693 | try: 694 | from tools.linter import DiffLinter 695 | except ModuleNotFoundError as e: 696 | print(f"Error: {e.msg}. " 697 | "Install using linter_requirements.txt.") 698 | sys.exit(1) 699 | 700 | uncommitted = lint_args == "uncommitted" 701 | branch = "main" if uncommitted else lint_args 702 | 703 | DiffLinter(branch).run_lint(uncommitted) 704 | 705 | 706 | if __name__ == "__main__": 707 | main(argv=sys.argv[1:]) 708 | --------------------------------------------------------------------------------