├── mprod
├── tests
│ ├── __init__.py
│ ├── test_base.py
│ ├── _base_tests.py
│ └── _utils.py
├── decompositions
│ ├── tests
│ │ ├── __init__.py
│ │ ├── test__qr.py
│ │ └── test_decompositions.py
│ ├── __init__.py
│ ├── _qr.py
│ └── _tsvdm.py
├── dimensionality_reduction
│ ├── tests
│ │ ├── __init__.py
│ │ └── test_TCAM.py
│ ├── __init__.py
│ └── _tcam.py
├── __init__.py
├── _misc.py
├── _pytester.py
├── _pytesttester.py
├── _base.py
└── _ml_helpers.py
├── docs
├── _static
│ └── img
│ │ ├── mprod_logo.png
│ │ ├── ._mprod_logo.png
│ │ ├── mprod_logo_fav.png
│ │ ├── ._mprod_logo_fav.png
│ │ ├── mprod_logo_small.png
│ │ ├── ._mprod_logo_small.png
│ │ ├── mprod_tcam_cartoon.png
│ │ └── ._mprod_tcam_cartoon.png
├── modules
│ ├── stubs
│ │ ├── mprod.x_m3.rst
│ │ ├── mprod.m_prod.rst
│ │ ├── mprod.table2tensor.rst
│ │ ├── mprod.tensor_mtranspose.rst
│ │ ├── mprod.decompositions.svdm.rst
│ │ ├── mprod.decompositions.tqrm.rst
│ │ ├── mprod.MeanDeviationForm.rst
│ │ └── mprod.dimensionality_reduction.TCAM.rst
│ ├── classes.rst
│ ├── mprod.decompositions.rst
│ ├── mprod.rst
│ └── mprod.dimensionality_reduction.rst
├── examples
│ ├── data
│ │ ├── readme_graphs.png
│ │ ├── single_curve_demo.pdf
│ │ ├── single_curve_demo.png
│ │ ├── multiple_curves_demo.png
│ │ ├── mermaid-diagram-pipeline.png
│ │ └── ._mermaid-diagram-pipeline.png
│ ├── intro.ipynb
│ ├── examples.rst
│ └── mprod_primer.ipynb
├── _templates
│ ├── class.rst
│ └── function.rst
├── requirements.txt
├── environment.yml
├── Makefile
├── run_livereload.py
├── make.bat
├── conf.py
└── index.rst
├── requirements.txt
├── .gitignore
├── .readthedocs.yaml
├── setup.cfg
├── .github
└── workflows
│ └── build.yaml
├── LICENSE
├── setup.py
├── azure-pipelines.yml
├── README.md
└── runtests.py
/mprod/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/mprod/tests/test_base.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/mprod/decompositions/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/mprod/decompositions/tests/test__qr.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/mprod/dimensionality_reduction/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/_static/img/mprod_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/mprod_logo.png
--------------------------------------------------------------------------------
/docs/_static/img/._mprod_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/._mprod_logo.png
--------------------------------------------------------------------------------
/docs/_static/img/mprod_logo_fav.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/mprod_logo_fav.png
--------------------------------------------------------------------------------
/docs/modules/stubs/mprod.x_m3.rst:
--------------------------------------------------------------------------------
1 | mprod.x\_m3
2 | ===========
3 |
4 | .. currentmodule:: mprod
5 |
6 | .. autofunction:: x_m3
--------------------------------------------------------------------------------
/docs/_static/img/._mprod_logo_fav.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/._mprod_logo_fav.png
--------------------------------------------------------------------------------
/docs/_static/img/mprod_logo_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/mprod_logo_small.png
--------------------------------------------------------------------------------
/docs/examples/data/readme_graphs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/examples/data/readme_graphs.png
--------------------------------------------------------------------------------
/docs/_static/img/._mprod_logo_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/._mprod_logo_small.png
--------------------------------------------------------------------------------
/docs/_static/img/mprod_tcam_cartoon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/mprod_tcam_cartoon.png
--------------------------------------------------------------------------------
/docs/examples/data/single_curve_demo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/examples/data/single_curve_demo.pdf
--------------------------------------------------------------------------------
/docs/examples/data/single_curve_demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/examples/data/single_curve_demo.png
--------------------------------------------------------------------------------
/docs/modules/stubs/mprod.m_prod.rst:
--------------------------------------------------------------------------------
1 | mprod.m\_prod
2 | =============
3 |
4 | .. currentmodule:: mprod
5 |
6 | .. autofunction:: m_prod
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.19.2
2 | scikit-learn>=0.24.1
3 | scipy>=1.5.3
4 | dataclasses>=0.7; python_version < '3.7'
5 | pandas>=1.1.5
--------------------------------------------------------------------------------
/docs/_static/img/._mprod_tcam_cartoon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/._mprod_tcam_cartoon.png
--------------------------------------------------------------------------------
/docs/examples/data/multiple_curves_demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/examples/data/multiple_curves_demo.png
--------------------------------------------------------------------------------
/docs/examples/data/mermaid-diagram-pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/examples/data/mermaid-diagram-pipeline.png
--------------------------------------------------------------------------------
/docs/examples/data/._mermaid-diagram-pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/examples/data/._mermaid-diagram-pipeline.png
--------------------------------------------------------------------------------
/docs/modules/stubs/mprod.table2tensor.rst:
--------------------------------------------------------------------------------
1 | mprod.table2tensor
2 | ==================
3 |
4 | .. currentmodule:: mprod
5 |
6 | .. autofunction:: table2tensor
--------------------------------------------------------------------------------
/mprod/dimensionality_reduction/__init__.py:
--------------------------------------------------------------------------------
1 | """.. mprod.dimensionality_reduction
2 | """
3 |
4 | from ._tcam import TCAM
5 |
6 | __all__ = [
7 | "TCAM"
8 | ]
--------------------------------------------------------------------------------
/docs/modules/stubs/mprod.tensor_mtranspose.rst:
--------------------------------------------------------------------------------
1 | mprod.tensor\_mtranspose
2 | ========================
3 |
4 | .. currentmodule:: mprod
5 |
6 | .. autofunction:: tensor_mtranspose
--------------------------------------------------------------------------------
/docs/modules/stubs/mprod.decompositions.svdm.rst:
--------------------------------------------------------------------------------
1 | mprod.decompositions.svdm
2 | =========================
3 |
4 | .. currentmodule:: mprod.decompositions
5 |
6 | .. autofunction:: svdm
--------------------------------------------------------------------------------
/docs/modules/stubs/mprod.decompositions.tqrm.rst:
--------------------------------------------------------------------------------
1 | mprod.decompositions.tqrm
2 | =========================
3 |
4 | .. currentmodule:: mprod.decompositions
5 |
6 | .. autofunction:: tqrm
--------------------------------------------------------------------------------
/mprod/decompositions/__init__.py:
--------------------------------------------------------------------------------
1 | """.. mprod.decompositions
2 | """
3 |
4 | from ._tsvdm import svdm
5 | from ._qr import tqrm
6 |
7 | __all__ = [
8 | "svdm",
9 | "tqrm"
10 | ]
11 |
--------------------------------------------------------------------------------
/docs/_templates/class.rst:
--------------------------------------------------------------------------------
1 | :mod:`{{module}}.{{objname}}`
2 | {{ underline }}==============
3 |
4 | .. currentmodule:: {{ module }}
5 |
6 | .. autoclass:: {{ objname }}
7 |
8 | .. include:: {{module}}.{{objname}}.examples
9 |
10 | .. raw:: html
11 |
12 |
13 |
--------------------------------------------------------------------------------
/docs/_templates/function.rst:
--------------------------------------------------------------------------------
1 | :mod:`{{module}}`.{{objname}}
2 | {{ underline }}==============
3 |
4 | .. currentmodule:: {{ module }}
5 |
6 | .. :function:: {{ objname }}
7 |
8 | .. include:: {{module}}.{{objname}}.examples
9 |
10 | .. raw:: html
11 |
12 |
13 |
--------------------------------------------------------------------------------
/docs/modules/stubs/mprod.MeanDeviationForm.rst:
--------------------------------------------------------------------------------
1 | :mod:`mprod.MeanDeviationForm`
2 | =====================================
3 |
4 | .. currentmodule:: mprod
5 |
6 | .. autoclass:: MeanDeviationForm
7 |
8 | .. include:: mprod.MeanDeviationForm.examples
9 |
10 | .. raw:: html
11 |
12 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx-gallery==0.9.0
2 | numpydoc==1.1.0
3 | sphinxcontrib-bibtex==2.3.0
4 | sphinx-prompt==1.4.0
5 | nbsphinx==0.8.6
6 | sphinx_rtd_theme==0.5.2
7 | ipykernel==5.4.3
8 | m2r2==0.3.1
9 | ipykernel==5.4.3
10 | seaborn==0.11.1
11 | jupyter==1.0.0
12 | myst-parser==0.15.2
13 | livereload==2.6.3
14 | pandoc==2.0.1
--------------------------------------------------------------------------------
/docs/modules/stubs/mprod.dimensionality_reduction.TCAM.rst:
--------------------------------------------------------------------------------
1 | :mod:`mprod.dimensionality_reduction.TCAM`
2 | =================================================
3 |
4 | .. currentmodule:: mprod.dimensionality_reduction
5 |
6 | .. autoclass:: TCAM
7 |
8 | .. include:: mprod.dimensionality_reduction.TCAM.examples
9 |
10 | .. raw:: html
11 |
12 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | **/.pytest_cache/
2 |
3 | **/__pycache__/
4 | build.log
5 | **/.DS_Store
6 | **/._.DS_Store
7 | **/.ipynb_checkpoints/
8 | .coveragerc
9 | _doc/
10 | coding_stuff/
11 | test_notebooks/
12 | docs/trashed_docs/
13 | docs/_build/
14 | docs/_conf.py
15 | dist/
16 | build/
17 | mprod_package.egg-info/
18 | pytest.xml
19 |
20 | docs/examples/data/._*.png
21 | doc_exp/
22 | docs_exp/
23 | docs/run_livereload.py
24 | trashed_docs/
--------------------------------------------------------------------------------
/docs/modules/classes.rst:
--------------------------------------------------------------------------------
1 | =============
2 | API Reference
3 | =============
4 |
5 | This is the class and function reference of M product framework. Please refer to
6 | the full user guide for further details, as the class and
7 | function raw specifications may not be enough to give full guidelines on their
8 | uses.
9 | For reference on concepts repeated across the API, see
10 |
11 | .. toctree::
12 | :maxdepth: 4
13 |
14 | mprod
15 | mprod.dimensionality_reduction
16 | mprod.decompositions
17 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | build:
4 | os: ubuntu-22.04
5 | tools:
6 | python: "3.6"
7 |
8 |
9 | python:
10 | install:
11 | - requirements: docs/requirements.txt
12 | - method: pip
13 | path: .
14 | extra_requirements:
15 | - docs
16 | # # version: "3.6"
17 | # install:
18 | # - method: pip
19 | # path: .
20 | # extra_requirements:
21 | # - docs
22 | # system_packages: true
23 |
24 |
25 | # Build documentation in the docs/ directory with Sphinx
26 | sphinx:
27 | configuration: docs/conf.py
28 |
--------------------------------------------------------------------------------
/docs/modules/mprod.decompositions.rst:
--------------------------------------------------------------------------------
1 | Tensor decompositions
2 | =====================
3 |
4 | The `mprod.decomposition` module includes tensor decomposition
5 | algorithms. Currently, the tsvdm and tqrm decompositions is implemented.
6 | Future plans are to implement additional factorizations such as non negative tensor factorization (NTF) .
7 |
8 | .. currentmodule:: mprod.decompositions
9 |
10 |
11 | .. autosummary::
12 | :toctree: stubs
13 |
14 | svdm
15 | tqrm
16 |
17 |
18 | .. automodule:: mprod.decompositions
19 | :members:
20 | :toctree: stubs
21 | :undoc-members:
22 | :show-inheritance:
23 |
--------------------------------------------------------------------------------
/docs/modules/mprod.rst:
--------------------------------------------------------------------------------
1 | :mod:`mprod` base module
2 | =========================
3 |
4 |
5 | The :mod:`mprod` module includes tensor decomposition utilities
6 | and algorithms, such as TCAM, MeanDeviationForm.
7 |
8 | .. currentmodule:: mprod
9 |
10 |
11 |
12 | .. autosummary::
13 | :toctree: stubs
14 | :template: class.rst
15 |
16 | MeanDeviationForm
17 |
18 |
19 | .. autosummary::
20 | :toctree: stubs
21 |
22 | m_prod
23 | tensor_mtranspose
24 | x_m3
25 | table2tensor
26 |
27 |
28 |
29 | .. automodule:: mprod
30 | :toctree: stubs
31 | :members:
32 | :undoc-members:
33 | :show-inheritance:
34 |
--------------------------------------------------------------------------------
/docs/environment.yml:
--------------------------------------------------------------------------------
1 | channels:
2 | - conda-forge
3 | dependencies:
4 | - pip=21.0.1
5 | - python=3.6
6 | - numpy = 1.19.2
7 | - scikit-learn = 0.24.1
8 | - scipy = 1.5.3
9 | - dataclasses = 0.7
10 | - pandas = 1.1.5
11 | - sphinx-gallery=0.9.0
12 | - numpydoc=1.1.0
13 | - sphinxcontrib-bibtex=2.3.0
14 | - sphinx-prompt=1.4.0
15 | - nbsphinx=0.8.6
16 | - ipykernel=5.4.3
17 | - seaborn=0.11.1
18 | - jupyter=1.0.0
19 | - sphinx-gallery = 0.9.0
20 | - numpydoc = 1.1.0
21 | - sphinxcontrib-bibtex = 2.3.0
22 | - sphinx-prompt = 1.4.0
23 | - nbsphinx = 0.8.6
24 | - sphinx_rtd_theme = 0.5.2
25 | - ipykernel = 5.4.3
26 | - pip:
27 | - mprod-package
28 |
29 |
--------------------------------------------------------------------------------
/docs/modules/mprod.dimensionality_reduction.rst:
--------------------------------------------------------------------------------
1 | Dimensionality reductions
2 | =========================
3 |
4 | This module includes tensor dimensionality reduction (tensor to matrix) algorithms.
5 | Currently the TCAM decomposition is implemented.
6 | Future plans are to implement tensor-CCA tensor-PLS etc....
7 |
8 |
9 | .. currentmodule:: mprod.dimensionality_reduction
10 |
11 |
12 | .. autosummary::
13 | :toctree: stubs
14 | :template: class.rst
15 |
16 | TCAM
17 |
18 |
19 |
20 | .. automodule:: mprod.dimensionality_reduction
21 | :members:
22 | :toctree: stubs
23 | :undoc-members:
24 | :show-inheritance:
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/mprod/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 |
3 | """
4 | # import dimensionality_reduction
5 | # from .dimensionality_reduction._tcam import TCAM
6 | # import decompositions as decompositions
7 | # import dimensionality_reduction
8 | from ._ml_helpers import MeanDeviationForm, table2tensor
9 | from ._base import m_prod, tensor_mtranspose, x_m3, generate_haar, generate_dct
10 |
11 | from mprod._pytesttester import PytestTester
12 | test = PytestTester(__name__)
13 | del PytestTester
14 | #
15 | __all__ = [
16 | "m_prod",
17 | "tensor_mtranspose",
18 | "x_m3",
19 | "MeanDeviationForm",
20 | "generate_haar",
21 | "generate_dct",
22 | "table2tensor",
23 | "dimensionality_reduction",
24 | "decompositions"
25 | ]
26 |
27 |
--------------------------------------------------------------------------------
/docs/run_livereload.py:
--------------------------------------------------------------------------------
1 | from livereload import Server, shell
2 |
3 | if __name__ == '__main__':
4 | server = Server()
5 | server.watch('*.rst', shell('make html'), delay=1)
6 | server.watch('modules/*.rst', shell('make html'), delay=1)
7 | server.watch('modules/*/*.rst', shell('make html'), delay=1)
8 | server.watch('*.md', shell('make html'), delay=1)
9 | server.watch('*.py', shell('make html'), delay=1)
10 | server.watch('*.ipynb', shell('make html'), delay=.1)
11 | server.watch('examples/*.ipynb', shell('make html'), delay=.1)
12 | server.watch('_static/*', shell('make html'), delay=1)
13 | server.watch('_templates/*', shell('make html'), delay=1)
14 | server.serve(root='_build/html', host="cn240.wexac.weizmann.ac.il", port=8888)
15 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | name = mprod-package-uriamorP
3 | version = 0.0.5a1
4 | author =
5 | Uria Mor
6 | Rafael Valdes Mas
7 | Yotam Cohen
8 | Haim Avron
9 | project_url = https://github.com/UriaMorP/mprod_package
10 | author_email = uriamo@gmail.com,
11 | description = Software implementation for tensor-tensor m-product framework
12 | long_description_content_type = text/markdown
13 | long_description = file: README.md
14 | license = BSD
15 | classifiers =
16 | Development Status :: 3 - Alpha
17 | Intended Audience :: Scientists
18 | Programming Language :: Python :: 3.6
19 | Programming Language :: Python :: 3.7
20 | Programming Language :: Python :: 3.8
21 | Programming Language :: Python :: 3.9
22 | Programming Language :: Python :: 3.10
23 | Programming Language :: Python :: 3 :: Only
24 |
25 |
26 | python_requires = >=3.6.8
27 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/.github/workflows/build.yaml:
--------------------------------------------------------------------------------
1 | name: Tests
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 |
8 | jobs:
9 | build:
10 | runs-on: ${{ matrix.os }}
11 | strategy:
12 | matrix:
13 | python-version: [3.6.8, 3.7, 3.8, 3.9, 3.10.0, 3.11.0, 3.12.0]
14 | os: [macos-latest, ubuntu-20.04]
15 |
16 | steps:
17 | - uses: actions/checkout@v2
18 | - name: Build using Python ${{ matrix.python-version }}
19 | uses: actions/setup-python@v2
20 | with:
21 | python-version: ${{ matrix.python-version }}
22 |
23 | - name: pip install dependencies [pip]
24 | run: |
25 | python -m pip install pip --upgrade pip;
26 | pip install pytest;
27 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
28 | if [ ${{ matrix.python-version }} == 3.6.8 ]; then pip install dataclasses>=0.7; fi
29 | pip install -e .
30 |
31 | - name: unit tests [pytest]
32 | run: |
33 | pytest --show-capture=no -v --disable-warnings --junitxml=pytest.xml
34 |
--------------------------------------------------------------------------------
/mprod/_misc.py:
--------------------------------------------------------------------------------
1 | from typing import Iterable, Tuple
2 | from numpy import ndarray
3 |
4 |
5 | def _assert_order(tensor: ndarray, tensor_varname: str, order: int):
6 | got_order = len(tensor.shape)
7 | assert got_order == order, f"{tensor_varname} must be a order {order} tensor, found order {got_order}"
8 |
9 |
10 | def _assert_size(tensor: ndarray, tensor_varname: str, axis: int, dim: int):
11 | got_dim = tensor.shape[axis]
12 | assert got_dim == dim, f"Dimension {axis} of {tensor_varname} must equal {dim}, found {got_dim}"
13 |
14 |
15 | def _assert_order_and_mdim(tensor: ndarray,
16 | tensor_varname: str,
17 | order: int,
18 | dim_inspection_list: Iterable[Tuple[int, int]]):
19 | """
20 |
21 | Parameters
22 | ----------
23 | tensor: np.ndarray
24 | The tensor for inpection
25 | tensor_varname: str
26 | The variable name of the tensor as it appears in the code
27 | order: int
28 | The intended order of `tensor`
29 | dim_inspection_list
30 |
31 |
32 | """
33 | _assert_order(tensor, tensor_varname, order)
34 | for ax, dim in dim_inspection_list:
35 | assert ax < order, f"Trying to assert the dimension of mode {ax} of a {order} order tensor {tensor_varname}"
36 | _assert_size(tensor, tensor_varname, ax, dim)
37 |
38 |
--------------------------------------------------------------------------------
/mprod/tests/_base_tests.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from mprod import m_prod, x_m3, tensor_mtranspose
5 | from numpy.testing import (
6 | assert_, assert_equal, assert_raises, assert_array_equal,
7 | assert_almost_equal, assert_allclose, suppress_warnings,
8 | assert_raises_regex, HAS_LAPACK64,
9 | )
10 |
11 |
12 | def gen_m_transpose(mpair):
13 | mfun, minv = mpair
14 |
15 | def _do(a):
16 | return tensor_mtranspose(a, mfun, minv)
17 |
18 | return _do
19 |
20 |
21 | def gen_m_product(mpair):
22 | mfun, minv = mpair
23 |
24 | def _do(a, b):
25 | return m_prod(a, b, mfun, minv)
26 |
27 | return _do
28 |
29 |
30 | def assert_identity(J, tensor, mproduct):
31 | tensor2 = mproduct(J, tensor)
32 | assert_almost_equal(tensor, tensor2)
33 |
34 |
35 | def assert_m_orth(tensor, mfun, minv):
36 | m, p, n = tensor.shape
37 |
38 | _t = gen_m_transpose((mfun, minv))
39 | _m = gen_m_product((mfun, minv))
40 |
41 | if m <= p:
42 | J = _m(tensor, _t(tensor))
43 | else:
44 | J = _m(_t(tensor), tensor)
45 |
46 | TENSOR_CASES = []
47 | for mode2_size in range(1, 10, 100):
48 | for i in range(10):
49 | rng = np.random.default_rng(seed=i + int(np.log10(mode2_size)))
50 | TENSOR_CASES.append(rng.random((J.shape[1], mode2_size, n)))
51 |
52 | @pytest.mark.parametrize('tens', TENSOR_CASES)
53 | def _assert_id(tens):
54 | assert_identity(J, tens, _m)
55 |
56 |
--------------------------------------------------------------------------------
/docs/examples/intro.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "raw",
5 | "metadata": {
6 | "raw_mimetype": "text/restructuredtext"
7 | },
8 | "source": [
9 | ".. _primer:\n",
10 | "\n",
11 | "Brief Intro\n",
12 | "-----------"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 5,
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "# TODO"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": []
30 | }
31 | ],
32 | "metadata": {
33 | "celltoolbar": "Edit Metadata",
34 | "kernelspec": {
35 | "display_name": "mprod",
36 | "language": "python",
37 | "name": "mprod"
38 | },
39 | "language_info": {
40 | "codemirror_mode": {
41 | "name": "ipython",
42 | "version": 3
43 | },
44 | "file_extension": ".py",
45 | "mimetype": "text/x-python",
46 | "name": "python",
47 | "nbconvert_exporter": "python",
48 | "pygments_lexer": "ipython3",
49 | "version": "3.6.8"
50 | },
51 | "toc": {
52 | "base_numbering": 1,
53 | "nav_menu": {},
54 | "number_sections": true,
55 | "sideBar": true,
56 | "skip_h1_title": false,
57 | "title_cell": "Table of Contents",
58 | "title_sidebar": "Contents",
59 | "toc_cell": false,
60 | "toc_position": {},
61 | "toc_section_display": true,
62 | "toc_window_display": false
63 | }
64 | },
65 | "nbformat": 4,
66 | "nbformat_minor": 4
67 | }
68 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2021, UriaMorP
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | 3. Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/mprod/decompositions/tests/test_decompositions.py:
--------------------------------------------------------------------------------
1 | """ Test functions for decompositions module
2 |
3 | """
4 |
5 | import pytest
6 | import numpy as np
7 |
8 | from itertools import product
9 |
10 | from mprod.decompositions import svdm, tqrm
11 | from mprod.tests._utils import (_make_mprod_op_cases, _make_tensor_cases, gen_m_product, gen_m_transpose, assert_m_orth)
12 |
13 | from numpy.testing import (
14 | assert_, assert_equal, assert_raises, assert_array_equal,
15 | assert_almost_equal, assert_allclose, suppress_warnings,
16 | assert_raises_regex, HAS_LAPACK64,
17 | )
18 |
19 | M_FUN_CASES = _make_mprod_op_cases()
20 | TENSOR_CASES = _make_tensor_cases()
21 |
22 |
23 | @pytest.mark.parametrize('tensor, m_pair', product(TENSOR_CASES, M_FUN_CASES))
24 | def test_tsvdm(tensor, m_pair):
25 | mfun, minv = m_pair
26 | # _m = gen_m_product(m_pair)
27 | # _t = gen_m_transpose(m_pair)
28 |
29 | u, s, v = svdm(tensor, mfun, minv)
30 | m, p, n = tensor.shape
31 | rk = min(m, p)
32 |
33 | assert s.shape[0] == rk, f"expected shape[0] of s to be {rk}, got {s.shape[0]}"
34 | assert s.shape[1] == tensor.shape[-1], f"expected shape[1] of s to be {tensor.shape[-1]}, got {s.shape[1]}"
35 |
36 |
37 | # tensor2 = _m(_m(u, s), _t(v))
38 | shat = mfun(s)
39 | us = mfun(u).transpose(2, 0, 1) * shat.T.reshape(n, 1, m)
40 | usv = np.matmul(us, mfun(v).transpose(2, 1, 0))
41 | usv = usv.transpose(1, 2, 0)
42 | tensor2 = minv(usv)
43 | assert_almost_equal(tensor, tensor2)
44 |
45 | assert_m_orth(u, *m_pair)
46 | assert_m_orth(v, *m_pair)
47 |
48 |
49 | @pytest.mark.parametrize('tensor, m_pair', product(TENSOR_CASES, M_FUN_CASES))
50 | def test_tqrm(tensor, m_pair):
51 | mfun, minv = m_pair
52 |
53 | _m = gen_m_product(m_pair)
54 | _t = gen_m_transpose(m_pair)
55 |
56 | Q, R = tqrm(tensor, mfun, minv)
57 |
58 | tensor2 = _m(Q, R)
59 | assert_almost_equal(tensor, tensor2)
60 |
61 | assert_m_orth(Q, *m_pair)
62 |
--------------------------------------------------------------------------------
/mprod/decompositions/_qr.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from typing import Tuple, Dict
3 |
4 | from mprod._base import NumpynDArray, MatrixTensorProduct
5 |
6 |
7 | def tqrm(tens_a: np.ndarray, fun_m: MatrixTensorProduct, inv_m: MatrixTensorProduct, hats: bool = False) \
8 | -> Tuple[NumpynDArray, NumpynDArray]:
9 | """
10 | The ``tqrm`` implements tensor-QR decomposition:
11 | ``Q,R = tqrm(tensor_a, m, inv_m)`` where ``Q`` is M-orthogonal tensor of shape ``(m,m,n)`` and ``R`` is f-upper
12 | triangular tensor of shape ``(m,p,n)``
13 |
14 | Parameters
15 | ----------
16 | tens_a: np.ndarray
17 | Tensor of shape ``(m,p,n)``
18 | fun_m: MatrixTensorProduct
19 | Invertible mat-vec operation for transforming ``tens_a`` tube fibers
20 | inv_m: MatrixTensorProduct
21 | Invertible mat-vec operation for transforming ``tens_a`` tube fibers. This operation is the inverse of ``fun_m``
22 | hats: bool
23 | Setting this to ``True`` will cause the function to return the tqrm factors in the tensor domain transform.
24 |
25 | Returns
26 | -------
27 | tens_q: np.ndarray
28 | M-orthogonal tensor of shape ``(m,m,n)``
29 | tens_r: np.ndarray
30 | f-upper triangular tensor of shape ``(m,p,n)``
31 |
32 | """
33 |
34 | m, p, n = tens_a.shape
35 | a_hat = fun_m(tens_a)
36 |
37 | q_hat = np.zeros((m, m, n))
38 | r_hat = np.zeros((m, p, n))
39 | k = 0
40 |
41 | for i in range(n):
42 | qq, rr = np.linalg.qr(a_hat[:, :, i])
43 |
44 | qs1, qs2 = qq.shape
45 | rs1, rs2 = rr.shape
46 |
47 | q_hat[:qs1, :qs2, i] = np.copy(qq)
48 | r_hat[:rs1, :rs2, i] = np.copy(rr)
49 |
50 | k = max(k, max(qs2, rs1))
51 |
52 | # truncate sizes
53 | q_hat = q_hat[:, :k, :]
54 | r_hat = r_hat[:k, :, :]
55 |
56 | if hats:
57 | return q_hat, r_hat
58 |
59 | tens_q = inv_m(q_hat)
60 | tens_r = inv_m(r_hat)
61 |
62 | return tens_q, tens_r
63 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | with open('README.md') as readme_file:
4 | readme = readme_file.read()
5 |
6 | configuration = {
7 | "author": 'Uria Mor, Rafael Valdes Mas, Yotam Cohen, Haim Avron',
8 | "author_email": "uriamo@gmail.com",
9 | "description": "Software implementation for tensor-tensor m-product framework",
10 | "long_description_content_type": 'text/markdown',
11 | "license": "BSD",
12 | "classifiers": [ # Optional
13 | 'Development Status :: 3 - Alpha',
14 | 'Intended Audience :: Science/Research',
15 | 'Programming Language :: Python :: 3.6',
16 | 'Programming Language :: Python :: 3.7',
17 | 'Programming Language :: Python :: 3.8',
18 | 'Programming Language :: Python :: 3.9',
19 | "Programming Language :: Python :: 3.10",
20 | 'Programming Language :: Python :: 3 :: Only',
21 | ],
22 | "keywords": ["Tensor", "multi way"
23 | , "omics", "longitudinal"
24 | , "factorization", "analysis"
25 | , "TCA", "TCAM", "PCA", "M product"
26 | , "tensor tensor product"
27 | , "tSVD", "tSVDM", "tensor decomposition"],
28 | "name": 'mprod-package',
29 | "version": '0.0.5a1',
30 | "packages": find_packages(),
31 | "long_description": readme,
32 | "url": "https://github.com/UriaMorP/mprod_package",
33 | "python_requires": '>=3.6.8',
34 | "install_requires": [
35 | "numpy >= 1.19.2",
36 | "scikit-learn >= 0.24.1",
37 | "scipy >= 1.5.3",
38 | "dataclasses >= 0.7; python_version < '3.7'",
39 | "pandas >= 1.1.5"
40 | ],
41 | "extras_require": {
42 | "dev": ["pytest==6.2.2", ],
43 | "docs": [
44 | "sphinx-gallery == 0.9.0",
45 | "numpydoc == 1.1.0",
46 | "sphinxcontrib-bibtex == 2.3.0",
47 | "sphinx-prompt == 1.4.0",
48 | "nbsphinx == 0.8.6",
49 | "ipykernel == 5.4.3",
50 | "seaborn == 0.11.1",
51 | "jupyter == 1.0.0",
52 | "myst-parser == 0.15.2",
53 | "m2r2 == 0.3.1",
54 | "livereload == 2.6.3",
55 | "pandoc == 2.0.1",
56 | ]
57 | }
58 | }
59 |
60 | setup(**configuration)
61 |
--------------------------------------------------------------------------------
/mprod/tests/_utils.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from mprod import m_prod, x_m3, tensor_mtranspose, generate_dct
5 | from numpy.testing import (
6 | assert_, assert_equal, assert_raises, assert_array_equal,
7 | assert_almost_equal, assert_allclose, suppress_warnings,
8 | assert_raises_regex, HAS_LAPACK64,
9 | )
10 |
11 |
12 | m, p, n = 100, 300, 10
13 |
14 |
15 | def _make_mprod_op_cases():
16 | mprod_cases = []
17 | for i in range(2):
18 | rng = np.random.default_rng(seed=i)
19 | mat = rng.random((n, n))
20 | mat_m = np.linalg.qr(mat)[0]
21 | mfun = x_m3(mat_m)
22 | minv = x_m3(mat_m.T)
23 | mprod_cases.append((mfun,minv))
24 |
25 | # add dct based transforms
26 | mfun, minv = generate_dct(n)
27 | mprod_cases.append((mfun,minv))
28 | return mprod_cases
29 |
30 |
31 | def _make_tensor_cases():
32 | tensor_cases = []
33 |
34 | for i in range(2):
35 | rng = np.random.default_rng(seed=i)
36 | X = rng.random((m, p, n))
37 | tensor_cases.append(X)
38 | return tensor_cases
39 |
40 |
41 | def gen_m_transpose(mpair):
42 | mfun, minv = mpair
43 |
44 | def _do(a):
45 | return tensor_mtranspose(a, mfun, minv)
46 |
47 | return _do
48 |
49 |
50 | def gen_m_product(mpair):
51 | mfun, minv = mpair
52 |
53 | def _do(a, b):
54 | return m_prod(a, b, mfun, minv)
55 |
56 | return _do
57 |
58 |
59 | def assert_identity(J, tensor, mproduct):
60 | tensor2 = mproduct(J, tensor)
61 | assert_almost_equal(tensor, tensor2)
62 |
63 |
64 | def assert_m_orth(tensor, mfun, minv):
65 | m, p, n = tensor.shape
66 |
67 | _t = gen_m_transpose((mfun, minv))
68 | _m = gen_m_product((mfun, minv))
69 |
70 | if m <= p:
71 | J = _m(tensor, _t(tensor))
72 | else:
73 | J = _m(_t(tensor), tensor)
74 |
75 | TENSOR_CASES = []
76 | for mode2_size in range(1, 10, 100):
77 | for i in range(2):
78 | rng = np.random.default_rng(seed=i + int(np.log10(mode2_size)))
79 | TENSOR_CASES.append(rng.random((J.shape[1], mode2_size, n)))
80 |
81 | @pytest.mark.parametrize('tens', TENSOR_CASES)
82 | def _assert_id(tens):
83 | assert_identity(J, tens, _m)
84 |
--------------------------------------------------------------------------------
/azure-pipelines.yml:
--------------------------------------------------------------------------------
1 | # Python package
2 | # Create and test a Python package on multiple Python versions.
3 | # Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more:
4 | # https://docs.microsoft.com/azure/devops/pipelines/languages/python
5 |
6 | trigger:
7 | - main
8 |
9 | #jobs:
10 | # - job: Linux
11 | pool:
12 | vmImage: 'ubuntu-latest'
13 | strategy:
14 | matrix:
15 | Python36:
16 | python.version: '3.6'
17 | Python37:
18 | python.version: '3.7'
19 | Python38:
20 | python.version: '3.8'
21 | Python39:
22 | python.version: '3.9'
23 | Python310:
24 | python.version: '3.10'
25 | steps:
26 | - task: UsePythonVersion@0
27 | inputs:
28 | versionSpec: '$(python.version)'
29 | displayName: 'Use Python $(python.version)'
30 |
31 | - script: |
32 | python -m pip install --upgrade pip
33 | pip install -r requirements.txt
34 | displayName: 'Install dependencies'
35 |
36 | - script: |
37 | pip install -e .
38 | displayName: 'Install package'
39 |
40 | - script: |
41 | pip install pytest pytest-azurepipelines
42 | pytest --show-capture=no -v --disable-warnings --junitxml=pytest.xml
43 | displayName: 'pytest'
44 |
45 | - task: PublishTestResults@2
46 | inputs:
47 | testResultsFiles: 'pytest.xml'
48 | testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)'
49 | condition: succeededOrFailed()
50 |
51 |
52 | # - job: MacOS
53 | # pool:
54 | # vmImage: 'macOS-latest'
55 | # strategy:
56 | # matrix:
57 | # Python36:
58 | # python.version: '3.6'
59 | # Python37:
60 | # python.version: '3.7'
61 | # Python38:
62 | # python.version: '3.8'
63 | # Python39:
64 | # python.version: '3.9'
65 | # Python310:
66 | # python.version: '3.10'
67 | # steps:
68 | # - task: UsePythonVersion@0
69 | # inputs:
70 | # versionSpec: '$(python.version)'
71 | # displayName: 'Use Python $(python.version)'
72 |
73 | # - script: |
74 | # python -m pip install --upgrade pip
75 | # pip install -r requirements.txt
76 | # displayName: 'Install dependencies'
77 | # - script: |
78 | # pip install -e .
79 | # displayName: 'Install package'
80 | # - script: |
81 | # pip install pytest pytest-azurepipelines
82 | # pytest --show-capture=no -v --disable-warnings --junitxml=pytest.xml
83 | # displayName: 'pytest'
84 | # - task: PublishTestResults@2
85 | # inputs:
86 | # testResultsFiles: 'pytest.xml'
87 | # testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)'
88 | # condition: succeededOrFailed()
89 |
--------------------------------------------------------------------------------
/docs/examples/examples.rst:
--------------------------------------------------------------------------------
1 | .. _tutorials:
2 |
3 | =========
4 | Tutorials
5 | =========
6 |
7 | .. rubric:: Scope and intention
8 |
9 | This page presents a collection of tutorial written by the authors of mprod package
10 | and intended to help newcomers in incorporating the machinery offered by the library
11 | in their analysis workflows.
12 |
13 | The main (and only) data-scientific tool currently implemented is the TCAM
14 | dimensionality reduction algorithm :footcite:p:`mor2021`. We intend to keep expanding the
15 | package content by adding :math:`\star_{\mathbf{M}}`-product based tools
16 | (such as tensor-PLS, tensor-CCA), and we encourage any form of collaboration,
17 | hoping to get good responses, feedback and help from the data-science community.
18 |
19 | .. rubric:: Target audience
20 |
21 | We do not expect expertise in Machine Learning, or data science, in order to use this package.
22 | In fact, it is aimed at non-experts
23 |
24 | That said, the library is not - by any means - meant to serve as a **black magic tensor package for dummies**.
25 | Just like with almost everything in machine-learning, using this library for ML related tasks require **some** general
26 | mathematical understanding of ML concepts.
27 | The implementation of dimensionality reduction methods (currently TCAM), is made consistent with
28 | `scikit-learn `_ library to the maximum possible extent, in order to enable smooth
29 | integration within the pythonic ML ecosystem.
30 | For this reason, the users are assumed to know the `scikit-learn `_ library.
31 | Scikit-learn package offers fantastic documentation, tutorials and examples that are more than enough in order to get
32 | started with machine learning in no time.
33 |
34 | .. note::
35 |
36 | We acknowledge that many potential users might find R more familiar.
37 | However, we urge them to take the time and try the alternative.
38 |
39 | In addition, deep understanding of the mathematical theory underlying mprod based tensor algorithms is always a good
40 | idea. Bellow, you can find a short :ref:`Primer` section about the idea behind tensor-tensor algebra via the
41 | :math:`\star_{\bf{M}}` -product framework (For a thorough introduction, we refer the interested readers to
42 | :footcite:p:`Kilmer`)
43 |
44 | The :ref:`TCAM` section contains tutorials for working with :class:`mprod.dimensionality_reduction.TCAM`.
45 | For construction and showcase of TCAM refer to :footcite:p:`mor2021`
46 |
47 |
48 | --------------------------------
49 |
50 |
51 | .. _TCAM:
52 |
53 | ----
54 | TCAM
55 | ----
56 | .. toctree::
57 | :maxdepth: 8
58 |
59 | basic_example
60 | supervised_learning
61 |
62 | .. Schirmer2018
63 |
64 |
65 |
66 |
67 | .. _Primer:
68 |
69 | ------------
70 | ⚙ Background
71 | ------------
72 | .. toctree::
73 | :maxdepth: 4
74 |
75 | mprod_primer
76 |
77 |
78 | .. footbibliography::
--------------------------------------------------------------------------------
/mprod/dimensionality_reduction/tests/test_TCAM.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from itertools import product
5 |
6 | from mprod.dimensionality_reduction import TCAM
7 | from mprod import MeanDeviationForm
8 | from mprod.tests._utils import (_make_mprod_op_cases, _make_tensor_cases, gen_m_product, gen_m_transpose, assert_m_orth,
9 | m, n, p)
10 |
11 | from numpy.testing import (
12 | assert_, assert_equal, assert_raises, assert_array_equal,
13 | assert_almost_equal, assert_allclose, suppress_warnings,
14 | assert_raises_regex, HAS_LAPACK64,
15 | )
16 |
17 | M_FUN_CASES = _make_mprod_op_cases()[:1]
18 | TENSOR_CASES = _make_tensor_cases()[:1]
19 |
20 | @pytest.mark.parametrize('X', TENSOR_CASES)
21 | @pytest.mark.parametrize('n_components', np.linspace(1, min(m, p) * n - 1, 3, dtype=int))
22 | @pytest.mark.parametrize('mpair', M_FUN_CASES + [None])
23 | def test_tcam_fit_transform(X, n_components, mpair):
24 | print(min(m, p) * n - 1)
25 | if mpair is None:
26 | tca = TCAM(n_components=n_components)
27 | else:
28 | mfun, minv = mpair
29 | tca = TCAM(fun_m=mfun, inv_m=minv, n_components=n_components)
30 |
31 | X_r = tca.fit(X).transform(X)
32 | assert X_r.shape[1] == n_components
33 |
34 | # check the equivalence of fit.transform and fit_transform
35 | X_r2 = tca.fit_transform(X)
36 | assert_allclose(X_r, X_r2)
37 | # X_r = tca.transform(X)
38 | assert_allclose(X_r, X_r2)
39 |
40 |
41 | @pytest.mark.parametrize('X', TENSOR_CASES)
42 | @pytest.mark.parametrize('n_components', np.linspace(.1, 1., 3, dtype=float))
43 | @pytest.mark.parametrize('mpair', M_FUN_CASES + [None])
44 | def test_tcam_reconstruction_err(X, n_components, mpair):
45 | print(min(m, p) * n - 1)
46 | if mpair is None:
47 | tca = TCAM(n_components=n_components)
48 |
49 | else:
50 | mfun, minv = mpair
51 | tca = TCAM(fun_m=mfun, inv_m=minv, n_components=n_components)
52 | # check the shape of fit.transform
53 | Y = tca.fit(X).transform(X)
54 | X2 = tca.inverse_transform(Y)
55 |
56 | assert np.round(1 - ((X2 - X) ** 2).sum() / (X ** 2).sum(), 20) >= n_components
57 |
58 |
59 | @pytest.mark.parametrize('X', TENSOR_CASES)
60 | @pytest.mark.parametrize('n_components', range(1, min(m, p) * n - 1, 200))
61 | @pytest.mark.parametrize('mpair', M_FUN_CASES + [None])
62 | def test_tcam_residue_m_orth(X, n_components, mpair):
63 | print(min(m, p) * n - 1)
64 | if mpair is None:
65 | tca = TCAM(n_components=n_components)
66 |
67 | else:
68 | mfun, minv = mpair
69 | tca = TCAM(fun_m=mfun, inv_m=minv, n_components=n_components)
70 |
71 | Y = tca.fit(X).transform(X)
72 | X2 = tca.inverse_transform(Y)
73 | _t = gen_m_transpose((tca.fun_m, tca.inv_m))
74 |
75 | res_prod_norm = (tca._mprod(_t(X - X2), X2) ** 2).sum()
76 | assert_almost_equal(res_prod_norm, 0, err_msg=f"got {res_prod_norm} instead of 0", verbose=True, )
77 |
78 |
79 |
80 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | import sphinx_gallery
16 | sys.path.insert(0, os.path.abspath('.'))
17 | sys.path.insert(0, os.path.abspath('../'))
18 | import sphinx_rtd_theme
19 |
20 | # -- Project information -----------------------------------------------------
21 |
22 | project = 'mprod'
23 | copyright = '2021, Elinav&Avron groups'
24 | author = 'Uria Mor'
25 |
26 | # -- General configuration ---------------------------------------------------
27 |
28 | # Add any Sphinx extension module names here, as strings. They can be
29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
30 | # ones.
31 | # extensions = [
32 | # 'sphinx.ext.autodoc',
33 | # 'sphinx.ext.viewcode',
34 | # 'sphinx.ext.todo',
35 | # 'sphinx.ext.autodoc',
36 | # 'sphinx.ext.imgmath',
37 | # 'sphinx.ext.napoleon'
38 | # ]
39 |
40 | extensions = [
41 | "sphinx.ext.autodoc",
42 | "sphinx.ext.autosummary",
43 | "numpydoc",
44 | 'sphinx.ext.viewcode',
45 | # "sphinx.ext.linkcode",
46 | "sphinx.ext.doctest",
47 | "sphinx.ext.intersphinx",
48 | "sphinx.ext.mathjax",
49 | "sphinxcontrib.bibtex",
50 | # 'sphinx.ext.imgmath',
51 | # "sphinx.ext.imgconverter",
52 | # "sphinx_gallery.gen_gallery",
53 | "sphinx-prompt",
54 | 'sphinx.ext.napoleon',
55 | "nbsphinx",
56 | # "myst_parser",
57 | 'm2r2',
58 | ]
59 |
60 | bibtex_bibfiles = ['refs.bib']
61 |
62 | # The suffix(es) of source filenames.
63 | # You can specify multiple suffix as a list of string:
64 | #
65 | # source_suffix = ['.rst', '.md']
66 | source_suffix = [".rst", ".ipynb", ".md"]
67 |
68 | # do not execute cells
69 | nbsphinx_execute = "always"
70 | nbsphinx_kernel_name = 'python3'
71 | # nbsphinx_execute = "never"
72 |
73 |
74 | # allow errors because not all tutorials build
75 | nbsphinx_allow_errors = True
76 |
77 | # napoleon related
78 |
79 | napoleon_google_docstring = False
80 | napoleon_use_param = False
81 | napoleon_use_ivar = True
82 |
83 | # Add any paths that contain templates here, relative to this directory.
84 | templates_path = ['_templates']
85 |
86 | # generate autosummary even if no references
87 | autosummary_generate = False
88 |
89 | # The language for content autogenerated by Sphinx. Refer to documentation
90 | # for a list of supported languages.
91 | #
92 | # This is also used if you do content translation via gettext catalogs.
93 | # Usually you set "language" from the command line for these cases.
94 | language = 'en'
95 |
96 | # List of patterns, relative to source directory, that match files and
97 | # directories to ignore when looking for source files.
98 | # This pattern also affects html_static_path and html_extra_path.
99 | exclude_patterns = ['_build',
100 | 'Thumbs.db',
101 | '.DS_Store',
102 | 'trashed_docs',
103 | '.ipynb_checkpoints',
104 | "examples/.ipynb_checkpoints"]
105 |
106 | autodoc_default_options = {"members": True, "inherited-members": False, "methods": True}
107 |
108 | # -- Options for HTML output -------------------------------------------------
109 |
110 | # The theme to use for HTML and HTML Help pages. See the documentation for
111 | # a list of builtin themes.
112 | #
113 | # html_theme = 'alabaster'
114 | html_theme = 'sphinx_rtd_theme'
115 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
116 | # html_theme_options = {"logo_only": True}
117 | # html_logo = "_static/img/mprod_logo_small.png"
118 | # html_favicon = "_static/img/mprod_logo_fav.png"
119 |
120 | # Add any paths that contain custom static files (such as style sheets) here,
121 | # relative to this directory. They are copied after the builtin static files,
122 | # so a file named "default.css" will overwrite the builtin "default.css".
123 | html_static_path = ['_static']
124 |
125 | # -- Extension configuration -------------------------------------------------
126 |
127 | # -- Options for todo extension ----------------------------------------------
128 |
129 | # If true, `todo` and `todoList` produce output, else they produce nothing.
130 | todo_include_todos = False
131 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. mprod documentation master file, created by
2 | sphinx-quickstart on Sun Aug 1 10:11:11 2021.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | ..
7 | _.. figure:: _static/img/mprod_logo_fav.png
8 |
9 |
10 | ===================================================
11 | :code:`mprod`\: Tensor - Tensor algebraic framework
12 | ===================================================
13 |
14 | mprod is a software implementation for tensor-tensor algebraic framework derived from the
15 | :math:`\star_{\bf{M}}`-product :footcite:p:`Kilmer`.
16 | The package builds on NumPy\ :footcite:p:`Harris2020` and Scipy\ :footcite:p:`Virtanen2020` libraries to realize
17 | core operations and components required for the algebraic framework.
18 |
19 |
20 | :mod:`mprod-package` implements the fundamental components required for the :math:`\star_{\mathbf{M}}`-product algebraic
21 | framework; tensor-transpose, tensor-matrix multiplication (domain transforms), face-wise tensor multiplication, and, of
22 | course, the :math:`\star_{\mathbf{M}}` tensor-tensor product (See intro)
23 |
24 | In addition, the library offers several basic tensor factorizations such as :mod:`mprod.decompostions.tsvdm`
25 | :footcite:p:`Kilmer` , and :math:`\star_{\mathbf{M}}`-product based dimensionality reduction methods like the
26 | :mod:`mprod.dimensionality_reduction.TCAM` :footcite:p:`mor2021`
27 |
28 |
29 | .. figure:: _static/img/mprod_tcam_cartoon.png
30 | :alt: TCAM cartoon
31 | :class: with-shadow
32 | :width: 90%
33 | :align: center
34 |
35 | An introductory cartoon for the TCAM :footcite:p:`mor2021` - an :math:`\star_{\mathbf{M}}`-product based
36 | dimensionality reduction method for multi-way data.
37 |
38 | You can find the software `on github `_.
39 |
40 |
41 |
42 | ---------------------------------------------------------
43 |
44 |
45 | **Installation**
46 | ================
47 |
48 | Conda install, with the great help of the conda-forge team:
49 |
50 | .. code:: bash
51 |
52 | conda install -c conda-forge mprod-package
53 |
54 | The conda-forge packages are available for Linux, OS X, and Windows 64 bit. Local testing was done only on Linux.
55 |
56 | PyPI install, presuming you have requirements installed (numpy, scipy, pandas, scikit-learn) installed:
57 |
58 | .. code:: bash
59 |
60 | pip install mprod-package
61 |
62 | -------------------------------------------------------------
63 |
64 | Scientific context
65 | ------------------
66 |
67 | *We live in a multi-dimensional world, immersed in huge volumes of data. This data often involves complex interlinked
68 | structures that span across multiple dimensions. Processes and phenomena also exhibit multi-dimensional behavior,
69 | requiring their models to operate in high dimensional settings*\ .
70 |
71 | *Typically, we use matrix algebra to manipulate data, in so-called vector embedded spaces. But such representations
72 | usually don’t take into account the underlying integrity of an object’s dimension, either missing out on high-order
73 | links that go beyond pairwise relations or requiring an overhead in encoding such relations. This is where tensor
74 | algebra comes into play, addressing multiple dimensions*\ .
75 |
76 | *But there is a problem. Despite a broad consensus, distilled over centuries of mathematical research, for matrix
77 | algebra, there is no such standard for its multidimensional counterpart, tensor algebra. There have been several
78 | propositions for tensor algebra frameworks over the years* :footcite:p:`Kolda2009`. *Existing techniques that decompose
79 | tensor constructs into simpler tangible entities have limitations and inconsistencies compared to matrix algebra*
80 | :footcite:p:`Hitchcock1927,DeLathauwer2000,Oseledets2011,Tuck1963a`. *These issues have been hindering broad
81 | adoption of tensor algebra into mainstream use*\ .
82 |
83 | **The tensor-tensor** :math:`\star_{\bf{M}}`\ **-product framework aims to change that**\ .
84 |
85 | *The paper* “**Tensor-Tensor Algebra for Optimal Representationand Compression of Multiway Data**”
86 | :footcite:p:`Kilmer` *describes a way to bridge the gap between matrix and tensor algebra, resulting in new algebraic
87 | constructs that natively represent and manipulate high-dimensional entities, while preserving their multi-order
88 | integrity*\ .
89 |
90 | -- \ **Lior Horesh, IBM research** :footcite:p:`LHoresh`
91 |
92 | -------------------------
93 |
94 |
95 | .. toctree::
96 | :caption: Contents
97 |
98 | examples/examples
99 | modules/classes
100 |
101 | -------------------------
102 |
103 | Indices and tables
104 | ==================
105 |
106 | * :ref:`genindex`
107 | * :ref:`modindex`
108 | * :ref:`search`
109 |
110 | ----------------------
111 |
112 | .. footbibliography::
113 |
--------------------------------------------------------------------------------
/mprod/decompositions/_tsvdm.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from typing import Tuple, Dict
3 |
4 | from mprod._base import NumpynDArray, MatrixTensorProduct
5 |
6 |
7 | def svdm(tens_a: np.ndarray, fun_m: MatrixTensorProduct, inv_m: MatrixTensorProduct
8 | , hats: bool = False) \
9 | -> Tuple[NumpynDArray, NumpynDArray, NumpynDArray]:
10 | """
11 | The svdm function is a helper function for computing the tsvdmII.
12 | This function does the **THIN** tsvdm:
13 | ``u,s,b = tsvdm(tensor_a, m, inv_m)`` where ``u,v`` are
14 | ``(m,k,n)`` and ``(p,k,n)`` M-orthogonal tensors and ``s``
15 | is an f-diagonal tensor of shape ``(k,k,n)`` and ``k=min(p,m)``
16 |
17 | Parameters
18 | ----------
19 | tens_a: np.ndarray
20 | Tensor of shape ``(m,p,n)``
21 | fun_m: MatrixTensorProduct
22 | Invertible mat-vec operation for transforming ``tens_a`` tube fibers
23 | inv_m: MatrixTensorProduct
24 | Invertible mat-vec operation for transforming ``tens_a`` tube fibers.
25 | This operation is the inverse of ``fun_m``
26 | hats: bool
27 | Setting this to ``True`` will cause the function to return the tsvdm
28 | factors in the tensor domain transform.
29 |
30 | Returns
31 | -------
32 | tens_u: np.ndarray
33 | M-orthogonal tensor of shape ``(m,k,n)``
34 | tens_s: np.ndarray
35 | A ``(k,n)`` matrix representation of the f-diagonal tensor of
36 | shape ``(k,k,n)``
37 | tens_v: np.ndarray
38 | M-orthogonal Tensor of shape ``(p,k,n)``
39 |
40 | """
41 | m, p, n = tens_a.shape
42 | a_hat = fun_m(tens_a)
43 |
44 | # The code bellow is a super efficient numpy trick for performing the following
45 | #
46 | # u_hat = np.zeros((m, m, n))
47 | # s_hat = np.zeros((m, p, n))
48 | # v_hat = np.zeros((p, p, n))
49 | #
50 | # for i in range(n):
51 | # uu, ss, vt = np.linalg.svd(a_hat[:, :, i], full_matrices=False)
52 | #
53 | # us1, us2 = uu.shape
54 | # vs1, vs2 = vt.shape
55 | #
56 | # ssize = ss.size
57 | # s_hat[:ssize, :ssize, i] = np.diag(ss)
58 | # u_hat[:us1, :us2, i] = uu.copy()
59 | # v_hat[:vs2, :vs1, i] = vt.T.copy()
60 |
61 | u_hat, s_hat, v_hat = np.linalg.svd(a_hat.transpose(2, 0, 1), full_matrices=False)
62 | u_hat, s_hat, v_hat = u_hat.transpose(1, 2, 0), s_hat.transpose(), v_hat.transpose(2, 1, 0)
63 |
64 | # sreshape = s_hat.copy().reshape(1, m, n)
65 | # sreshape = sreshape.transpose(1, 0, 2)
66 | # idreshape = np.eye(m, p).reshape(m, p, 1)
67 |
68 | # s_hat = idreshape @ sreshape
69 |
70 | if hats:
71 | return u_hat, s_hat, v_hat
72 |
73 | u = inv_m(u_hat)
74 | v = inv_m(v_hat)
75 | s = inv_m(s_hat)
76 |
77 | return u, s, v
78 |
79 |
80 | def tsvdmii(tens_a: NumpynDArray,
81 | fun_m: MatrixTensorProduct,
82 | inv_m: MatrixTensorProduct,
83 | gamma: float = 1,
84 | n_components: int = None) -> \
85 | Tuple[Dict[int, NumpynDArray], Dict[int, NumpynDArray], Dict[int, NumpynDArray], float, Dict[int, int], int]:
86 | assert not ((gamma is not None) and (
87 | n_components is not None)), "Arguments gamma and n_components are mutually exclusive"
88 | assert (gamma is not None) or (
89 | n_components is not None), "Exactely one of arguments gamma, n_components must be defined"
90 |
91 | m, p, n = tens_a.shape
92 |
93 | # execute full decomposition
94 | u_hat, s_hat, v_hat = svdm(tens_a, fun_m, inv_m, hats=True)
95 |
96 | # compute variation in the decomposition
97 | # var is the sorted (hat) squared singular values
98 | # cumm_var is scre
99 | # w_idx is an array of indices for `cumm_var` and `var`
100 | # total_var is the (float) sum of squared singular values `var`
101 | var = np.concatenate([np.diagonal(s_hat[:, :, i]) for i in range(n)]) ** 2
102 | var = np.sort(var.reshape(-1))[::-1]
103 | cumm_var = var.cumsum(axis=0)
104 | w_idx = np.arange(0, cumm_var.size, dtype=int)
105 | total_variance = var.sum()
106 |
107 | # Find truncation threshold according to
108 | if gamma is not None:
109 | reduced_ind = w_idx[(cumm_var / total_variance) > gamma]
110 | if reduced_ind.size == 0:
111 | j = 0
112 | else:
113 | j = reduced_ind.min()
114 | else:
115 | j = n_components
116 |
117 | tau = np.sqrt(var[j - 1])
118 | rho = {}
119 |
120 | u_hat_rho_dict = {}
121 | s_hat_rho_dict = {}
122 | v_hat_rho_dict = {}
123 |
124 | max_rho = 0
125 | r = 0
126 | for i in range(n):
127 | diag_shat_i = np.diagonal(s_hat[:, :, i])
128 | tau_mask = (diag_shat_i >= tau)
129 | rho_i = tau_mask.sum()
130 | if rho_i > 0:
131 | u_hat_rho_dict[i] = u_hat[:, :rho_i, i].copy()
132 | s_hat_rho_dict[i] = s_hat[:rho_i, :rho_i, i].copy()
133 | v_hat_rho_dict[i] = v_hat[:, :rho_i, i].copy()
134 | rho[i] = rho_i
135 |
136 | if rho_i > max_rho:
137 | max_rho = rho_i
138 | r += rho_i
139 |
140 | if n_components is not None:
141 | assert r == n_components, f"expected multirank {n_components} got {r}"
142 |
143 | return u_hat_rho_dict, s_hat_rho_dict, v_hat_rho_dict, total_variance, rho, r
144 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # mprod_package
2 |
3 | [](https://github.com/UriaMorP/mprod_package/actions/workflows/build.yaml)
4 | 
5 | [](https://mprod-package.readthedocs.io/en/latest/?badge=latest)
6 | 
7 | [](https://anaconda.org/conda-forge/mprod-package)
8 | [](
9 | https://pypi.org/project/mprod-package/)
10 |
11 |
12 | Software implementation for tensor-tensor m-product framework [[1]](#1).
13 | The library currently contains tubal QR and tSVDM decompositions, and the TCAM method for dimensionality reduction.
14 |
15 |
16 |
17 |
18 |
19 |
20 | ## Installation
21 |
22 | ### Conda
23 | The `mprod-package` is hosted in [conda-forge](https://conda-forge.org/) channel.
24 |
25 | ```
26 | conda install -c conda-forge mprod-package
27 | ```
28 |
29 | ### pip
30 | ```
31 | pip install mprod-package
32 | ```
33 | See `mprod-package`s [pypi entry](https://pypi.org/project/mprod-package/)
34 |
35 | ### From source
36 |
37 | * Make sure that all dependencies listed in `requirements.txt` file are installed .
38 | * Clone the repository, then from the package directory, run
39 | ```
40 | pip install -e .
41 | ```
42 |
43 | The dependencies in `requirements.txt` are stated with exact versions used for locally test `mprod-package`, these packages were obtained from conda-forge channel.
44 |
45 | ```python
46 | import pandas as pd
47 |
48 | file_path = "https://raw.githubusercontent.com/UriaMorP/" \
49 | "tcam_analysis_notebooks/main/Schirmer2018/Schirmer2018.tsv"
50 |
51 | data_table = pd.read_csv(file_path, index_col=[0,1], sep="\t"
52 | , dtype={'Week':int})
53 | data_table = data_table.loc[:,data_table.median() > 1e-7]
54 | data_table.rename(columns= {k:f"Fature_{e+1}" for e,k in enumerate(data_table.columns)}, inplace=True)
55 | data_table.shape
56 |
57 | %matplotlib inline
58 | ```
59 |
60 | ## How to use `TCAM`
61 |
62 | Given with a `pandas.DataFrame` of the data as below, with 2-level index, where the first level as subject identifier (mouse, human, image) and the second level of the index denotes sample repetition identity, in this case - the week during experiment, in which the sample was collected.
63 |
64 |
65 | ```python
66 | display(data_table.iloc[:2,:2].round(3))
67 |
68 | ```
69 |
70 |
71 |
72 |
73 |
74 | |
75 | |
76 | Fature_1 |
77 | Fature_2 |
78 |
79 |
80 | | SubjectID |
81 | Week |
82 | |
83 | |
84 |
85 |
86 |
87 |
88 | | P_10343 |
89 | 0 |
90 | 0.001 |
91 | 0.023 |
92 |
93 |
94 | | 4 |
95 | 0.020 |
96 | 0.000 |
97 |
98 |
99 |
100 |
101 |
102 | ### Shape the data into tensor
103 |
104 | We use the `table2tensor` helper function to transform a 2-level (multi)-indexed `pandas.DataFrame` into a 3rd order tensor.
105 |
106 |
107 | ```python
108 | from mprod import table2tensor
109 | data_tensor, map1, map3 = table2tensor(data_table)
110 | ```
111 |
112 | To inspect `table2tensor` operation, we use the resulting *\"mode mappings\"*; `map1` and `map3` associating each line in the input table to it's coordinates in the resulting tensor.
113 | In the following example, we use the mappings to extract the tensor coordinates corresponding to subject P\_7218's sample from week 52
114 |
115 |
116 | ```python
117 | (data_tensor[map1['P_7218'],:, map3[52]] == data_table.loc[('P_7218',52)].values).all() # True
118 | ```
119 |
120 | ### Applying `TCAM`
121 |
122 | ```python
123 | from mprod.dimensionality_reduction import TCAM
124 |
125 | tca = TCAM()
126 | tca_trans = tca.fit_transform(data_tensor)
127 | ```
128 |
129 | And that's all there is to it... Really!
130 |
131 | Note how similar the code above to what we would have written if we were to apply scikit-lean's `PCA` to the initial tabular data:
132 |
133 |
134 | ```python
135 | from sklearn.decomposition import PCA
136 |
137 | pca = PCA()
138 | pca_trans = pca.fit_transform(data_table)
139 | ```
140 |
141 | The similarity between `TCAM`s interface to that of scikit-learn's `PCA` is not coincidental.
142 | We did our best in order to make `TCAM` as familiar as possible, and allow for high compatibility of `TCAM` with the existing Python ML framework.
143 |
144 | ### Accessing properties of the transformation
145 |
146 |
147 | ```python
148 | tca_loadings = tca.mode2_loadings # Obtain TCAM loadings
149 | pca_loadings = pca.components_ # Obtain PCA loadings
150 |
151 | tca_var = tca.explained_variance_ratio_*100 # % explained variation per TCA factor
152 | pca_var = pca.explained_variance_ratio_*100 # % explained variation per TCA factor
153 |
154 | tca_df = pd.DataFrame(tca_trans) # Cast TCA scores to dataframe
155 | tca_df.rename(index = dict(map(reversed, map1.items()))
156 | , inplace = True) # use the inverse of map1 to denote each row
157 | # of the TCAM scores with it's subject ID
158 |
159 | pca_df = pd.DataFrame(pca_trans) # Cast PCA scores to dataframe
160 | pca_df.index = data_table.index # anotate PC scores with sample names
161 | ```
162 |
163 |
164 |
165 |
166 |
167 | ## References
168 | [1]
169 | Misha E. Kilmer, Lior Horesh, Haim Avron, and Elizabeth Newman. Tensor-tensor algebra for optimal representation and compression of multiway data. Proceedings of the National Academy of Sciences, 118(28):e2015851118, jul 2021.
170 |
--------------------------------------------------------------------------------
/mprod/_pytester.py:
--------------------------------------------------------------------------------
1 | """
2 | Pytest test running.
3 |
4 | This module implements the ``test()`` function for modules. The usual
5 | boiler plate for doing that is to put the following in the module
6 | ``__init__.py`` file::
7 |
8 | from mprod._pytesttester import PytestTester
9 | test = PytestTester(__name__)
10 | del PytestTester
11 |
12 |
13 | Warnings filtering and other runtime settings should be dealt with in the
14 | ``pytest.ini`` file in the numpy repo root. The behavior of the test depends on
15 | whether or not that file is found as follows:
16 |
17 | * ``pytest.ini`` is present (develop mode)
18 | All warnings except those explicitly filtered out are raised as error.
19 | * ``pytest.ini`` is absent (release mode)
20 | DeprecationWarnings and PendingDeprecationWarnings are ignored, other
21 | warnings are passed through.
22 |
23 | In practice, tests run from the numpy repo are run in develop mode. That
24 | includes the standard ``python runtests.py`` invocation.
25 |
26 | This module is imported by every numpy subpackage, so lies at the top level to
27 | simplify circular import issues. For the same reason, it contains no numpy
28 | imports at module scope, instead importing numpy within function calls.
29 | """
30 | import sys
31 | import os
32 |
33 | __all__ = ['PytestTester']
34 |
35 | # def _show_numpy_info():
36 | # import numpy as np
37 | #
38 | # print("NumPy version %s" % np.__version__)
39 | # relaxed_strides = np.ones((10, 1), order="C").flags.f_contiguous
40 | # print("NumPy relaxed strides checking option:", relaxed_strides)
41 | # info = np.lib.utils._opt_info()
42 | # print("NumPy CPU features: ", (info if info else 'nothing enabled'))
43 |
44 |
45 | class PytestTester:
46 | """
47 | Pytest test runner.
48 |
49 | A test function is typically added to a package's __init__.py like so::
50 |
51 | from numpy._pytesttester import PytestTester
52 | test = PytestTester(__name__).test
53 | del PytestTester
54 |
55 | Calling this test function finds and runs all tests associated with the
56 | module and all its sub-modules.
57 |
58 | Attributes
59 | ----------
60 | module_name : str
61 | Full path to the package to test.
62 |
63 | Parameters
64 | ----------
65 | module_name : module name
66 | The name of the module to test.
67 |
68 | Notes
69 | -----
70 | Unlike the previous ``nose``-based implementation, this class is not
71 | publicly exposed as it performs some ``numpy``-specific warning
72 | suppression.
73 |
74 | """
75 |
76 | def __init__(self, module_name):
77 | self.module_name = module_name
78 |
79 | def __call__(self, label='fast', verbose=1, extra_argv=None,
80 | doctests=False, coverage=False, durations=-1, tests=None):
81 | """
82 | Run tests for module using pytest.
83 |
84 | Parameters
85 | ----------
86 | label : {'fast', 'full'}, optional
87 | Identifies the tests to run. When set to 'fast', tests decorated
88 | with `pytest.mark.slow` are skipped, when 'full', the slow marker
89 | is ignored.
90 | verbose : int, optional
91 | Verbosity value for test outputs, in the range 1-3. Default is 1.
92 | extra_argv : list, optional
93 | List with any extra arguments to pass to pytests.
94 | doctests : bool, optional
95 | .. note:: Not supported
96 | coverage : bool, optional
97 | If True, report coverage of NumPy code. Default is False.
98 | Requires installation of (pip) pytest-cov.
99 | durations : int, optional
100 | If < 0, do nothing, If 0, report time of all tests, if > 0,
101 | report the time of the slowest `timer` tests. Default is -1.
102 | tests : test or list of tests
103 | Tests to be executed with pytest '--pyargs'
104 |
105 | Returns
106 | -------
107 | result : bool
108 | Return True on success, false otherwise.
109 |
110 | Notes
111 | -----
112 | Each module exposes `test` in its namespace to run all tests for
113 | it. For example, to run all tests for mprod.lib:
114 |
115 |
116 |
117 | Examples
118 | --------
119 | >>> result = mprod.lib.test() #doctest: +SKIP
120 | ...
121 | 1023 passed, 2 skipped, 6 deselected, 1 xfailed in 10.39 seconds
122 | >>> result
123 | True
124 |
125 | """
126 | import pytest
127 | import warnings
128 |
129 | module = sys.modules[self.module_name]
130 | module_path = os.path.abspath(module.__path__[0])
131 |
132 | # setup the pytest arguments
133 | pytest_args = ["-l"]
134 |
135 | # offset verbosity. The "-q" cancels a "-v".
136 | pytest_args += ["-q"]
137 |
138 | # Filter out distutils cpu warnings (could be localized to
139 | # distutils tests). ASV has problems with top level import,
140 | # so fetch module for suppression here.
141 | with warnings.catch_warnings():
142 | warnings.simplefilter("always")
143 | from numpy.distutils import cpuinfo
144 |
145 | # Filter out annoying import messages. Want these in both develop and
146 | # release mode.
147 | pytest_args += [
148 | "-W ignore:Not importing directory",
149 | "-W ignore:numpy.dtype size changed",
150 | "-W ignore:numpy.ufunc size changed",
151 | "-W ignore::UserWarning:cpuinfo",
152 | ]
153 |
154 | # When testing matrices, ignore their PendingDeprecationWarnings
155 | pytest_args += [
156 | "-W ignore:the matrix subclass is not",
157 | "-W ignore:Importing from numpy.matlib is",
158 | ]
159 |
160 | if doctests:
161 | raise ValueError("Doctests not supported")
162 |
163 | if extra_argv:
164 | pytest_args += list(extra_argv)
165 |
166 | if verbose > 1:
167 | pytest_args += ["-" + "v" * (verbose - 1)]
168 |
169 | if coverage:
170 | pytest_args += ["--cov=" + module_path]
171 |
172 | if label == "fast":
173 | # not importing at the top level to avoid circular import of module
174 | from numpy.testing import IS_PYPY
175 | if IS_PYPY:
176 | pytest_args += ["-m", "not slow and not slow_pypy"]
177 | else:
178 | pytest_args += ["-m", "not slow"]
179 |
180 | elif label != "full":
181 | pytest_args += ["-m", label]
182 |
183 | if durations >= 0:
184 | pytest_args += ["--durations=%s" % durations]
185 |
186 | if tests is None:
187 | tests = [self.module_name]
188 |
189 | pytest_args += ["--pyargs"] + list(tests)
190 |
191 | # # run tests.
192 | # _show_numpy_info()
193 |
194 | try:
195 | code = pytest.main(pytest_args)
196 | except SystemExit as exc:
197 | code = exc.code
198 |
199 | return code == 0
200 |
--------------------------------------------------------------------------------
/mprod/_pytesttester.py:
--------------------------------------------------------------------------------
1 | """
2 | Pytest test running.
3 |
4 | This module implements the ``test()`` function for modules. The usual
5 | boiler plate for doing that is to put the following in the module
6 | ``__init__.py`` file::
7 |
8 | from mprod._pytesttester import PytestTester
9 | test = PytestTester(__name__)
10 | del PytestTester
11 |
12 |
13 | Warnings filtering and other runtime settings should be dealt with in the
14 | ``pytest.ini`` file in the numpy repo root. The behavior of the test depends on
15 | whether or not that file is found as follows:
16 |
17 | * ``pytest.ini`` is present (develop mode)
18 | All warnings except those explicitly filtered out are raised as error.
19 | * ``pytest.ini`` is absent (release mode)
20 | DeprecationWarnings and PendingDeprecationWarnings are ignored, other
21 | warnings are passed through.
22 |
23 | In practice, tests run from the numpy repo are run in develop mode. That
24 | includes the standard ``python runtests.py`` invocation.
25 |
26 | This module is imported by every numpy subpackage, so lies at the top level to
27 | simplify circular import issues. For the same reason, it contains no numpy
28 | imports at module scope, instead importing numpy within function calls.
29 | """
30 | import sys
31 | import os
32 |
33 | __all__ = ['PytestTester']
34 |
35 |
36 | # def _show_numpy_info():
37 | # import numpy as np
38 | #
39 | # print("NumPy version %s" % np.__version__)
40 | # relaxed_strides = np.ones((10, 1), order="C").flags.f_contiguous
41 | # print("NumPy relaxed strides checking option:", relaxed_strides)
42 | # info = np.lib.utils._opt_info()
43 | # print("NumPy CPU features: ", (info if info else 'nothing enabled'))
44 |
45 |
46 | class PytestTester:
47 | """
48 | Pytest test runner.
49 |
50 | A test function is typically added to a package's __init__.py like so::
51 |
52 | from numpy._pytesttester import PytestTester
53 | test = PytestTester(__name__).test
54 | del PytestTester
55 |
56 | Calling this test function finds and runs all tests associated with the
57 | module and all its sub-modules.
58 |
59 | Attributes
60 | ----------
61 | module_name : str
62 | Full path to the package to test.
63 |
64 | Parameters
65 | ----------
66 | module_name : module name
67 | The name of the module to test.
68 |
69 | Notes
70 | -----
71 | Unlike the previous ``nose``-based implementation, this class is not
72 | publicly exposed as it performs some ``numpy``-specific warning
73 | suppression.
74 |
75 | """
76 |
77 | def __init__(self, module_name):
78 | self.module_name = module_name
79 |
80 | def __call__(self, label='fast', verbose=3, extra_argv=None,
81 | doctests=False, coverage=False, durations=-1, tests=None):
82 | """
83 | Run tests for module using pytest.
84 |
85 | Parameters
86 | ----------
87 | label : {'fast', 'full'}, optional
88 | Identifies the tests to run. When set to 'fast', tests decorated
89 | with `pytest.mark.slow` are skipped, when 'full', the slow marker
90 | is ignored.
91 | verbose : int, optional
92 | Verbosity value for test outputs, in the range 1-3. Default is 1.
93 | extra_argv : list, optional
94 | List with any extra arguments to pass to pytests.
95 | doctests : bool, optional
96 | .. note:: Not supported
97 | coverage : bool, optional
98 | If True, report coverage of NumPy code. Default is False.
99 | Requires installation of (pip) pytest-cov.
100 | durations : int, optional
101 | If < 0, do nothing, If 0, report time of all tests, if > 0,
102 | report the time of the slowest `timer` tests. Default is -1.
103 | tests : test or list of tests
104 | Tests to be executed with pytest '--pyargs'
105 |
106 | Returns
107 | -------
108 | result : bool
109 | Return True on success, false otherwise.
110 |
111 | Notes
112 | -----
113 | Each module exposes `test` in its namespace to run all tests for
114 | it. For example, to run all tests for mprod.lib:
115 |
116 |
117 |
118 | Examples
119 | --------
120 | >>> result = mprod.lib.test() #doctest: +SKIP
121 | ...
122 | 1023 passed, 2 skipped, 6 deselected, 1 xfailed in 10.39 seconds
123 | >>> result
124 | True
125 |
126 | """
127 | import pytest
128 | import warnings
129 |
130 | module = sys.modules[self.module_name]
131 | module_path = os.path.abspath(module.__path__[0])
132 |
133 | # if os.path.islink(module_path):
134 | # module_path = os.path.realpath(module_path)
135 |
136 |
137 | # setup the pytest arguments
138 | pytest_args = ["-l"]
139 |
140 | # offset verbosity. The "-q" cancels a "-v".
141 | pytest_args += ["-q"]
142 |
143 | # Filter out distutils cpu warnings (could be localized to
144 | # distutils tests). ASV has problems with top level import,
145 | # so fetch module for suppression here.
146 | with warnings.catch_warnings():
147 | warnings.simplefilter("always")
148 | from numpy.distutils import cpuinfo
149 |
150 | # Filter out annoying import messages. Want these in both develop and
151 | # release mode.
152 | pytest_args += [
153 | "-W ignore:Not importing directory",
154 | "-W ignore:numpy.dtype size changed",
155 | "-W ignore:numpy.ufunc size changed",
156 | "-W ignore::UserWarning:cpuinfo",
157 | ]
158 |
159 | # When testing matrices, ignore their PendingDeprecationWarnings
160 | pytest_args += [
161 | "-W ignore:the matrix subclass is not",
162 | "-W ignore:Importing from numpy.matlib is",
163 | ]
164 |
165 | if doctests:
166 | raise ValueError("Doctests not supported")
167 |
168 | if extra_argv:
169 | pytest_args += list(extra_argv)
170 |
171 | if verbose > 1:
172 | pytest_args += ["-" + "v" * (verbose - 1)]
173 |
174 | if coverage:
175 | pytest_args += ["--cov=" + module_path]
176 |
177 | if label == "fast":
178 | # not importing at the top level to avoid circular import of module
179 | from numpy.testing import IS_PYPY
180 | if IS_PYPY:
181 | pytest_args += ["-m", "not slow and not slow_pypy"]
182 | else:
183 | pytest_args += ["-m", "not slow"]
184 |
185 | elif label != "full":
186 | pytest_args += ["-m", label]
187 |
188 | if durations >= 0:
189 | pytest_args += ["--durations=%s" % durations]
190 |
191 | if tests is None:
192 | tests = [self.module_name]
193 |
194 | pytest_args += ["--pyargs"] + list(tests)
195 |
196 | # # run tests.
197 | # _show_numpy_info()
198 |
199 | try:
200 | code = pytest.main(pytest_args)
201 | except SystemExit as exc:
202 | code = exc.code
203 |
204 | return code == 0
205 |
--------------------------------------------------------------------------------
/mprod/_base.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from typing import Callable, Tuple, Dict, List
3 |
4 | import scipy.fft
5 | from scipy.fft import dct, idct, rfft, irfft
6 | from scipy.stats import ortho_group
7 |
8 | NumpynDArray = np.ndarray
9 | MatrixTensorProduct = Callable[[NumpynDArray], NumpynDArray]
10 |
11 |
12 | def _default_transform(tube_size: int) -> Tuple[MatrixTensorProduct, MatrixTensorProduct]:
13 | def fun_m(x):
14 | return dct(x, type=2, n=tube_size, axis=-1, norm='ortho')
15 |
16 | def inv_m(x):
17 | return idct(x, type=2, n=tube_size, axis=-1, norm='ortho')
18 |
19 | return fun_m, inv_m
20 |
21 |
22 | def generate_dct(tube_size: int, dct_type: int = 2) -> Tuple[MatrixTensorProduct, MatrixTensorProduct]:
23 | """Generates a DCT based tensor-matrix operation (forward and inverse)
24 |
25 | Parameters
26 | ----------
27 |
28 | tube_size: int
29 | the fiber-tube size of the tensors of interest
30 |
31 | dct_type: int, default = 2
32 | The choice of dct type, see scipy.fft.dct.__doc__ for details
33 |
34 | Returns
35 | -------
36 |
37 | fun_m: MatrixTensorProduct
38 | A tensor transform
39 |
40 | inv_m: MatrixTensorProduct
41 | A tensor transform (the inverse of `fun_m`)
42 |
43 | """
44 |
45 | def fun_m(x):
46 | return dct(x, type=dct_type, n=tube_size, axis=-1, norm='ortho')
47 |
48 | def inv_m(x):
49 | return idct(x, type=dct_type, n=tube_size, axis=-1, norm='ortho')
50 |
51 | return fun_m, inv_m
52 |
53 |
54 | # noinspection PyPep8Naming
55 | def _mod3prod(A: NumpynDArray, funM: MatrixTensorProduct) -> NumpynDArray:
56 | """Maps a tensor `A` to the tensor domain transform defined by the operation of a mapping `funM` on
57 | the tube fibers of `A`
58 |
59 | Parameters
60 | ----------
61 |
62 | A: NumpynDArray
63 | Tensor with `A.shape[2] == n`
64 |
65 | funM: MatrixTensorProduct
66 | Picklable mapping that operates on (n dimensional) tube fibers of a tensor
67 |
68 | Returns
69 | -------
70 |
71 | hatA: MatrixTensorProduct
72 | Returns domain transform of `A` defined by the operation of `funM`
73 | """
74 | m, p, n = A.shape
75 | return funM(A.transpose((2, 1, 0)).reshape(n, m * p)).reshape((n, p, m)).transpose((2, 1, 0))
76 |
77 |
78 | def x_m3(M: NumpynDArray) -> MatrixTensorProduct:
79 | """
80 | Creates a picklable tensor transformation forming the mod3 tensor-matrix multiplication required in the M product
81 | definition.
82 |
83 | Parameters
84 | ----------
85 | M: np.ndarray
86 | A matrix of shape `(n,n)`
87 |
88 | Returns
89 | -------
90 | fun: Callable[[NumpynDArray], NumpynDArray]
91 | Picklable mapping that operates on (n dimensional) tube fibers of a tensor
92 |
93 | """
94 | assert len(M.shape) == 2, "M must be a 2 dimensional matrix"
95 | assert M.shape[0] == M.shape[1], "M must be a square matrix"
96 |
97 | tube_size = M.shape[0]
98 | def fun(A: NumpynDArray) -> NumpynDArray:
99 | assert A.shape[-1] == tube_size, "The last dimension of A must be the same as the tube size "
100 | if len(A.shape) == 2:
101 | # the case where A is a matrix representation of f-diagonal tensor
102 | return A @ M.T
103 | elif len(A.shape) == 3:
104 | m, p, n = A.shape
105 | return (M @ A.transpose((2, 1, 0)).reshape(n, m * p)).reshape((n, p, m)).transpose((2, 1, 0))
106 | else:
107 | raise NotImplementedError("We only work with 3d tensors for now!")
108 | return fun
109 |
110 |
111 | def generate_haar(tube_size: int, random_state = None) -> Tuple[MatrixTensorProduct, MatrixTensorProduct]:
112 | """Generates a tensor-matrix transformation based on random sampling of unitary matrix
113 | (according to the Haar distribution on O_n See scipy.stats.)
114 |
115 | Parameters
116 | ----------
117 |
118 | tube_size: int
119 | the fiber-tube size of the tensors of interest
120 |
121 | Returns
122 | -------
123 |
124 | fun_m: MatrixTensorProduct
125 | A tensor transform
126 |
127 | inv_m: MatrixTensorProduct
128 | A tensor transform (the inverse of `fun_m`)
129 |
130 | """
131 |
132 | M = ortho_group.rvs(tube_size, random_state=random_state)
133 |
134 | fun_m = x_m3(M)
135 | inv_m = x_m3(M.T)
136 |
137 | return fun_m, inv_m
138 |
139 |
140 | def m_prod(tens_a: NumpynDArray,
141 | tens_b: NumpynDArray,
142 | fun_m: MatrixTensorProduct,
143 | inv_m: MatrixTensorProduct) -> NumpynDArray:
144 | """
145 | Returns the :math:`\\star_{\\mathbf{M}}` product of tensors `A` and `B`
146 | where ``A.shape == (m,p,n)`` and ``B.shape == (p,r,n)``.
147 |
148 | Parameters
149 | ----------
150 | tens_a: array-like
151 | 3'rd order tensor with shape `m x p x n`
152 |
153 | tens_b: array-like
154 | 3'rd order tensor with shape `p x r x n`
155 |
156 | fun_m: MatrixTensorProduct, Callable[[NumpynDArray], NumpynDArray]
157 | Invertible linear mapping from `R^n` to `R^n`
158 |
159 | inv_m: MatrixTensorProduct, Callable[[NumpynDArray], NumpynDArray]
160 | Invertible linear mapping from R^n to R^n ( `fun_m(inv_m(x)) = inv_m(fun_m(x)) = x` )
161 |
162 | Returns
163 | -------
164 | tensor: array-like
165 | 3'rd order tensor of shape `m x r x n` that is the star :math:`\\star_{\\mathbf{M}}`
166 | product of `A` and `B`
167 | """
168 |
169 | assert tens_a.shape[1] == tens_b.shape[0]
170 | assert tens_a.shape[-1] == tens_b.shape[-1]
171 |
172 | a_hat = fun_m(tens_a)
173 | b_hat = fun_m(tens_b)
174 |
175 | c_hat = np.einsum('mpi,pli->mli', a_hat, b_hat)
176 | return inv_m(c_hat)
177 |
178 |
179 | # copied version from transformers.py
180 | # def m_prod(A: NumpynDArray, B: NumpynDArray, funM: MatrixTensorProduct, invM: MatrixTensorProduct) -> NumpynDArray:
181 | # # assert A.shape[1] == B.shape[0]
182 | # # assert A.shape[-1] == B.shape[-1]
183 | # A_hat = funM(A)
184 | # B_hat = funM(B)
185 | #
186 | # calE_hat = np.einsum('mpi,pli->mli', A_hat, B_hat)
187 | # return invM(calE_hat)
188 |
189 | def tensor_mtranspose(tensor, mfun, minv):
190 | tensor_hat = mfun(tensor)
191 | tensor_hat_t = tensor_hat.transpose((1, 0, 2))
192 | tensor_t = minv(tensor_hat_t)
193 | return tensor_t
194 |
195 |
196 | def _t_pinv_fdiag(F, Mfun, Minv) -> NumpynDArray:
197 | m, p, n = F.shape
198 | hat_f = Mfun(F)
199 |
200 | pinv_hat_f = np.zeros_like(hat_f)
201 | for i in range(n):
202 | fi_diag = np.diagonal(hat_f[:, :, i]).copy()
203 | fi_diag[(fi_diag ** 2) > 1e-6] = 1 / fi_diag[(fi_diag ** 2) > 1e-6]
204 |
205 | pinv_hat_f[:fi_diag.size, :fi_diag.size, i] = np.diag(fi_diag)
206 |
207 | pinv_f = Minv(pinv_hat_f)
208 |
209 | return tensor_mtranspose(pinv_f, Mfun, Minv)
210 |
211 | # # TODO: Is TensorArray needed ?
212 | # # noinspection PyPep8Naming
213 | # class TensorArray(np.ndarray):
214 | # def __new__(cls, input_array):
215 | # # Input array is an already formed ndarray instance
216 | # # We first cast to be our class type
217 | # obj = np.asarray(input_array).view(cls)
218 | # # add the new attribute to the created instance
219 | # # Finally, we must return the newly created object:
220 | # return obj
221 | #
222 | # @property
223 | # def TT(self):
224 | # return self.transpose((1, 0, 2))
225 | #
226 | # def __array_finalize__(self, obj):
227 | # # see InfoArray.__array_finalize__ for comments
228 | # if obj is None: return
229 | # self.info = getattr(obj, 'info', None)
230 |
--------------------------------------------------------------------------------
/mprod/_ml_helpers.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from typing import List, Tuple, Dict, Mapping
3 | from ._base import NumpynDArray
4 | from sklearn.base import TransformerMixin, BaseEstimator
5 | import pandas as pd
6 | from itertools import product
7 |
8 |
9 | def table2tensor(table: pd.DataFrame, missing_flag: bool = False) -> Tuple[np.ma.core.MaskedArray, Mapping, Mapping]:
10 | """
11 | Reshapes a `nm x p` (`(samples x reps) x features`) multi-indexed datafram to form a `m x p x n` tensor
12 | `(subjects, features, reps)`
13 |
14 | Parameters
15 | ----------
16 | table: pd.DataFrame
17 | a `nm x p` table of sampels x features
18 |
19 | missing_flag: `bool`, default = False
20 | When set to `False` (default), the function will raise an error in case there are missing samples.
21 | Setting to `True` will result in a tensor with masked entries.
22 |
23 | Returns
24 | -------
25 | tensor: ndarray, np.ma.array
26 | 3'rd order tensor `m x p x n` (subjects, features, reps)
27 |
28 | mode1_mapping : dict
29 | The mapping of each mode1 (frontal) slice index of the tensor to the table's original subject name
30 |
31 | mode3_mapping : dict
32 | The mapping of each mode3 (lateral) slice index of the tensor to the table's original rep id
33 |
34 |
35 | Examples
36 | --------
37 | Suppose that ``table_data`` is a dataframe with no missing values.
38 |
39 | >>> from mprod import table2tensor
40 | >>> import pandas as pd
41 | >>> np.random.seed(0)
42 | >>> table_data.iloc[:5,:4]
43 | f1 f2 f3 f4
44 | SubjetID rep
45 | a t1 0.251259 0.744838 -0.45889 -0.208525
46 | t10 2.39831 0.248772 0.65873 1.36994
47 | t2 -0.303154 -0.337603 -0.568608 -1.0239
48 | t3 1.36369 0.978895 0.161972 -0.804368
49 | t4 1.8548 1.52954 0.78576 0.538041
50 | >>> msk_tensor, mode1_mapping, mode3_mapping = table2tensor(table_data, missing_flag=False)
51 | >>> msk_tensor[:3,:3,:2]
52 | [[[0.25125853442243695 2.398308745102709]
53 | [0.7448378210349296 0.2487716728987871]
54 | [-0.4588901621837434 0.6587302072601999]]
55 | [[-0.5689263433318329 -0.06564253839123065]
56 | [1.0017636851038796 -0.49265853128383713]
57 | [0.45266517056628647 -1.4812390563653883]]
58 | [[0.7690616486878629 0.49302719962677855]
59 | [0.3186320585255899 1.469576084933633]
60 | [0.9609169837347897 -0.19564077520234632]]]
61 | >>> mode1_mapping
62 | {'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4}
63 | >>> mode3_mapping
64 | {'t1': 0,
65 | 't10': 1,
66 | 't2': 2,
67 | 't3': 3,
68 | 't4': 4,
69 | 't5': 5,
70 | 't6': 6,
71 | 't7': 7,
72 | 't8': 8,
73 | 't9': 9}
74 |
75 | **missing values**
76 |
77 | >>> msk_tensor, mode1_mapping, mode3_mapping = table2tensor(table_data.sample(40)
78 | ... , missing_flag=True)
79 | >>> msk_tensor[:3,:3,:2]
80 | masked_array(
81 | data=[[[0.07664420134210018, --],
82 | [-0.7358062254334045, --],
83 | [0.5562074188402509, --]],
84 | [[2.088982483926928, -0.06564253839123065],
85 | [0.7697757466063808, -0.49265853128383713],
86 | [0.4147812728859107, -1.4812390563653883]],
87 | [[-0.004794963866429985, 1.2262908375944879],
88 | [-0.15033350807209261, -0.3068131758163276],
89 | [0.6461670563178799, 0.1769508046682527]]],
90 | mask=[[[False, True],
91 | [False, True],
92 | [False, True]],
93 | [[False, False],
94 | [False, False],
95 | [False, False]],
96 | [[False, False],
97 | [False, False],
98 | [False, False]]], fill_value=0.0)
99 | >>> mode1_mapping
100 | {'a': 3, 'b': 1, 'c': 0, 'd': 4, 'e': 2}
101 | >>> mode3_mapping
102 | {'t1': 2,
103 | 't10': 1,
104 | 't2': 3,
105 | 't3': 6,
106 | 't4': 5,
107 | 't5': 7,
108 | 't6': 8,
109 | 't7': 4,
110 | 't8': 0,
111 | 't9': 9}
112 | """
113 |
114 | samples_map, usamples = table.index.get_level_values(0).factorize()
115 | reps_map, ureps = table.index.get_level_values(1).factorize()
116 |
117 | m, p, n = usamples.size, table.shape[1], ureps.size
118 |
119 | samples_map_dict = pd.Series(np.unique(samples_map), usamples).to_dict()
120 | reps_map_dict = pd.Series(np.unique(reps_map), ureps).to_dict()
121 |
122 | if missing_flag:
123 | tensor = np.ma.array(np.zeros((m, p, n)), mask=np.ones((m, p, n)), fill_value=0)
124 | index_iterator = table.iterrows()
125 | else:
126 | tensor = np.zeros((m, p, n))
127 | index_iterator = (((i, j), table.loc[(i, j)].copy()) for i, j in product(usamples, ureps))
128 |
129 | try:
130 | for (m1, m3), val in index_iterator:
131 | tensor[samples_map_dict[m1], :, reps_map_dict[m3]] = val.values
132 | except KeyError as ke:
133 | raise KeyError("Discovered missing data in the table, which is not allowed by default. "
134 | "To work with missing data and have a masked array returned, set missing_flag to True")
135 |
136 | return tensor, samples_map_dict, reps_map_dict
137 |
138 |
139 | # noinspection PyPep8Naming
140 | # noinspection PyUnusedLocal
141 | class MeanDeviationForm(TransformerMixin, BaseEstimator):
142 | """Standardize the data by subtracting the mean (or empiric mean) sample
143 | The mean deviation form of a tensor :math:`X \\in \mathbb{R}^{m \\times p \\times n}` is calculated as:
144 |
145 | Z = X - U
146 |
147 | where `U` is the mean sample of `X` , calculated as follows:
148 |
149 | .. math::
150 | U = \\frac{1}{m} \\sum_{i=1}^{m} X[i,:,:]
151 |
152 | and for the empiric mean deviation form:
153 |
154 | .. math::
155 | U = \\frac{1}{m-1} \\sum_{i=1}^{m} X[i,:,:]
156 |
157 |
158 | Attributes
159 | ----------
160 | _mean_sample : ndarray of shape (p_features, n_repeats), or `None`
161 | The mean sample of the dataset
162 |
163 |
164 | Methods
165 | -------
166 | fit:
167 | Fits a MeanDeviationForm transformer by computing the mean sample of a training dataset
168 | transform:
169 | Shift dataset by fitted sample mean
170 | fit_transform:
171 | Compute the mean sample of a dataset and transform it to its mean deviation form
172 | inverse_transform:
173 | Add precomputed mean sample to a dataset
174 |
175 |
176 |
177 |
178 | """
179 |
180 | def __init__(self):
181 | # super(MeanDeviationForm, self).__init__()
182 |
183 | self._mean_sample = None
184 |
185 | def _fit(self, X):
186 | denum = X.shape[0]
187 | self._mean_sample = np.nansum(X, axis=0, keepdims=True) / denum
188 |
189 | def fit(self, X, y=None, **fit_param):
190 | """Compute the mean (or empiric mean) sample of a tensor
191 |
192 | Parameters
193 | ----------
194 | X : {array-like} of shape (m_samples, p_features, n_repeats)
195 | The data used to compute the mean sample
196 | used for later cantering along the features-repeats axes.
197 | y : None
198 | Ignored.
199 |
200 | Returns
201 | -------
202 | self : object
203 | Fitted MeanDeviationForm object
204 |
205 | Examples
206 | --------
207 | >>> from mprod import MeanDeviationForm
208 | >>> import numpy as np
209 | >>> X = np.random.randn(10,20,4)
210 | >>> mdf = MeanDeviationForm()
211 | >>> mdf = mdf.fit(X)
212 | """
213 | self._fit(X)
214 | return self
215 |
216 | def transform(self, X, y=None):
217 | """Perform standardization by centering.
218 |
219 | Parameters
220 | ----------
221 | X : array-like of shape (k_samples, p_features, n_repeats)
222 | The data used to center along the features-repeats axes.
223 |
224 | Returns
225 | -------
226 | X_tr : ndarray of shape (k_samples, p_features, n_repeats)
227 | Transformed tensor.
228 |
229 | Examples
230 | --------
231 | >>> from mprod import MeanDeviationForm
232 | >>> import numpy as np
233 | >>> X = np.random.randn(10,20,4)
234 | >>> y = np.random.randn(50,20,4)
235 | >>> mdf = MeanDeviationForm()
236 | >>> mdf_fit = mdf.fit(X)
237 | >>> yt = mdf.transform(yt)
238 |
239 | """
240 |
241 | X_transform = X - self._mean_sample
242 | if type(X_transform) == np.ma.core.MaskedArray:
243 | return X_transform.filled().data
244 | else:
245 | return X_transform
246 |
247 | def fit_transform(self, X, y=None, **fit_params):
248 | self.fit(X, y, **fit_params)
249 | return self.transform(X)
250 |
251 | def inverse_transform(self, Y):
252 | """Undo the centering of X according to mean sample.
253 |
254 | Parameters
255 | ----------
256 | X : array-like of shape (m_samples, p_features, n_repeats)
257 | Input data that will be transformed.
258 |
259 | Returns
260 | -------
261 | Xt : ndarray of shape (m_samples, p_features, n_repeats)
262 | Transformed data.
263 |
264 | Examples
265 | --------
266 | >>> from mprod import MeanDeviationForm
267 | >>> import numpy as np
268 | >>> X = np.random.randn(10,20,4)
269 | >>> mdf = MeanDeviationForm()
270 | >>> Xt = mdf.fit_transform(X)
271 | >>> mdf.inverse_transform(Xt) - X
272 |
273 | """
274 | Y_transform = Y + self._mean_sample
275 |
276 | if type(Y) == np.ma.core.MaskedArray:
277 | return Y_transform.filled().data
278 | else:
279 | return Y_transform
280 |
281 |
--------------------------------------------------------------------------------
/mprod/dimensionality_reduction/_tcam.py:
--------------------------------------------------------------------------------
1 | """TCAM
2 | """
3 | import numpy as np
4 | from dataclasses import dataclass
5 | from sklearn.base import TransformerMixin, BaseEstimator
6 |
7 | from .._base import m_prod, tensor_mtranspose, _default_transform, _t_pinv_fdiag
8 | from .._base import MatrixTensorProduct, NumpynDArray
9 | from ..decompositions import svdm
10 | from .._misc import _assert_order_and_mdim
11 | from .._ml_helpers import MeanDeviationForm
12 |
13 | _float_types = [np.sctypeDict[c] for c in 'efdg'] + [float]
14 | _int_types = [np.sctypeDict[c] for c in 'bhip'] + [int]
15 |
16 |
17 | def _pinv_diag(diag_tensor):
18 | sinv = diag_tensor.copy()
19 | sinv += ((diag_tensor ** 2) <= 1e-6) * 1e+20
20 | sinv = (((diag_tensor ** 2) > 1e-6) * (1 / sinv))
21 | return sinv
22 |
23 |
24 | @dataclass
25 | class TensorSVDResults:
26 | u: np.ndarray
27 | s: np.ndarray
28 | v: np.ndarray
29 |
30 | def astuple(self):
31 | return self.u.copy(), self.s.copy(), self.v.copy()
32 |
33 |
34 | # noinspection PyPep8Naming
35 | class TCAM(TransformerMixin, BaseEstimator):
36 | """tsvdm based tensor component analysis (TCAM).
37 | Linear dimensionality reduction using tensor Singular Value Decomposition of the
38 | data to project it to a lower dimensional space. The input data is centered
39 | but not scaled for each feature before applying the tSVDM (using :mod:`mprod.MeanDeviationForm` ) .
40 | It uses the :mod:`mprod.decompositions.svdm` function as basis for the ``TSVDMII`` algorithm from Kilmer et. al.
41 | (https://doi.org/10.1073/pnas.2015851118) then offers a CP like transformations of the data accordingly.
42 | See https://arxiv.org/abs/2111.14159 for theoretical results and case studies, and the :ref:`Tutorials `
43 | for elaborated examples
44 |
45 | Parameters
46 | ----------
47 | n_components : int, float, default=None
48 | Number of components to keep.
49 | if n_components is not set all components are kept::
50 |
51 | n_components == min(m_samples, p_features) * n_reps - 1
52 |
53 | If ``0 < n_components < 1`` , select the number of components such that the
54 | amount of variance that needs to be explained is greater than the percentage specified
55 | by n_components. In case ``n_components >= 1`` is an integer then the estimated number
56 | of components will be::
57 |
58 | n_components_ == min(n_components, min(m_samples, p_features) * n_reps - 1)
59 |
60 |
61 | Attributes
62 | ----------
63 | n_components_ : int
64 | The estimated number of components. When n_components is set
65 | to a number between 0 and 1. this number is estimated from input data.
66 | Otherwise it equals the parameter n_components,
67 | or `min(m_samples, p_features) * n_reps -1` if n_components is None.
68 |
69 | explained_variance_ratio_ : ndarray of shape (`n_components_`,)
70 | The amount of variance explained by each of the selected components.
71 |
72 | mode2_loadings : ndarray (float) of shape (`n_components_`, `n_features` )
73 | A matrix representing the contribution (coefficient) of each feature in the orinial
74 | features space (2'nd mode of the tensor) to each of the TCAM factors.
75 |
76 |
77 | Methods
78 | -------
79 | fit:
80 | Compute the TCAM transformation for a given dataset
81 | transform:
82 | Transform a given dataset using a fitted TCAM
83 | fit_transform:
84 | Fit a TCAM to a dataset then return its TCAM transformation
85 | inverse_transform:
86 | Given points in the reduced TCAM space, compute the points pre-image in the original features space.
87 |
88 |
89 | """
90 |
91 | def __init__(self, fun_m: MatrixTensorProduct = None,
92 | inv_m: MatrixTensorProduct = None,
93 | n_components=None):
94 | assert (type(n_components) in _int_types and (n_components >= 1)) or \
95 | ((type(n_components) in _float_types) and (0 < n_components <= 1)) \
96 | or (n_components is None), f"`n_components` must be positive integer or a float between 0 and 1" \
97 | f" or `None`, got {n_components} of type {type(n_components)}"
98 |
99 | assert (fun_m is None) == (inv_m is None), "Only one of fun_m,inv_m is None. " \
100 | "Both must be defined (or both None)"
101 |
102 | self.n_components = n_components
103 |
104 | self.fun_m = fun_m
105 | self.inv_m = inv_m
106 | self._mdf = MeanDeviationForm()
107 |
108 | def _mprod(self, a, b) -> NumpynDArray:
109 | return m_prod(a, b, self.fun_m, self.inv_m)
110 |
111 | def _fit(self, X: np.ndarray):
112 | max_rank = self._n * min(self._m, self._p) - 1
113 |
114 | self._hat_svdm = TensorSVDResults(*svdm(X, self.fun_m, self.inv_m, hats=True))
115 |
116 | # get factors order
117 | diagonals = self._hat_svdm.s.transpose().copy()
118 | self._factors_order = np.unravel_index(np.argsort(- (diagonals ** 2), axis=None), diagonals.shape)
119 | self._sorted_singular_vals = diagonals[self._factors_order]
120 | self._total_variation = (self._sorted_singular_vals ** 2).sum()
121 | self.explained_variance_ratio_ = ((self._sorted_singular_vals ** 2) / self._total_variation)
122 |
123 | # populate n_components if not given
124 | if self.n_components is None:
125 | self.n_components_ = max_rank
126 | elif type(self.n_components) in _int_types and self.n_components > 0:
127 | self.n_components_ = min(max_rank, self.n_components)
128 | elif type(self.n_components) in _float_types and self.n_components == 1.:
129 | self.n_components_ = max_rank
130 | elif 0 < self.n_components < 1 and type(self.n_components) in _float_types:
131 | var_cumsum = (self._sorted_singular_vals ** 2).cumsum() # w in the paper
132 | w_idx = np.arange(0, var_cumsum.size, dtype=int) # w index
133 | self.n_components_ = min(max_rank,
134 | w_idx[(var_cumsum / self._total_variation) > self.n_components].min() + 1)
135 | else:
136 | raise ValueError("Unexpected edge case for the value of `n_components`")
137 |
138 | self.n_components_ = max(1, self.n_components_)
139 |
140 | self._n_factors_order = tuple([self._factors_order[0][:self.n_components_].copy(),
141 | self._factors_order[1][:self.n_components_].copy()])
142 |
143 | self.explained_variance_ratio_ = self.explained_variance_ratio_[:self.n_components_]
144 | self._rrho = np.array([0 for _ in range(self._n)])
145 | for nn, rr in zip(*self._n_factors_order):
146 | self._rrho[nn] = max(self._rrho[nn], rr + 1)
147 | # self._rrho += 1
148 | # populate truncations
149 | # _tau = self._sorted_singular_vals[self.n_components_ + 1]
150 | # self._rrho = (diagonals > _tau).sum(axis=1)
151 | self._truncated_hat_svdm = TensorSVDResults(*self._hat_svdm.astuple())
152 |
153 | self._truncated_hat_svdm.u = self._truncated_hat_svdm.u[:, :self._rrho.max(), :]
154 | self._truncated_hat_svdm.s = self._truncated_hat_svdm.s[:self._rrho.max(), :]
155 | self._truncated_hat_svdm.v = self._truncated_hat_svdm.v[:, :self._rrho.max(), :]
156 |
157 | for i, rho_i in enumerate(self._rrho):
158 | self._truncated_hat_svdm.u[:, rho_i:, i] = 0
159 | self._truncated_hat_svdm.s[rho_i:, i] = 0
160 | self._truncated_hat_svdm.v[:, rho_i:, i] = 0
161 |
162 | self._truncated_svdm = TensorSVDResults(self.inv_m(self._truncated_hat_svdm.u),
163 | self.inv_m(self._truncated_hat_svdm.s),
164 | self.inv_m(self._truncated_hat_svdm.v))
165 |
166 | self._truncS_pinv = self._truncated_svdm.s.copy()
167 | self._truncS_pinv[(self._truncS_pinv ** 2) <= 1e-6] = 0
168 | self._truncS_pinv[(self._truncS_pinv ** 2) > 1e-6] = 1 / self._truncS_pinv[(self._truncS_pinv ** 2) > 1e-6]
169 |
170 | return self
171 |
172 | # noinspection PyUnusedLocal
173 | def fit(self, X, y=None, **fit_params):
174 | """Fit the model with X.
175 |
176 | Parameters
177 | ----------
178 | X : array-like of shape (m_samples, p_features, n_modes)
179 | Training data, where m_samples is the number of samples,
180 | p_features is the number of features and n_modes is the
181 | number of modes (timepoints/locations etc...)
182 |
183 | y : Ignored
184 | Ignored.
185 |
186 | Returns
187 | -------
188 | self : object
189 | Returns the instance itself.
190 |
191 |
192 | Examples
193 | --------
194 | >>> from mprod.dimensionality_reduction import TCAM
195 | >>> import numpy as np
196 | >>> X = np.random.randn(10,20,4)
197 | >>> tca = TCAM()
198 | >>> mdf = tca.fit(X)
199 |
200 |
201 | """
202 |
203 | assert len(X.shape) == 3, "X must be a 3'rd order tensor"
204 | self._m, self._p, self._n = X.shape
205 |
206 | if self.fun_m is None:
207 | self.fun_m, self.inv_m = _default_transform(self._n)
208 | _X = self._mdf.fit_transform(X)
209 |
210 | return self._fit(_X)
211 |
212 | def _mode0_reduce(self, tU):
213 | return np.concatenate(
214 | [self._sorted_singular_vals[e] * tU[:, [fj], [fi]] for e, (fi, fj) in
215 | enumerate(zip(*self._n_factors_order))],
216 | axis=1)
217 |
218 | def _mode1_reduce(self, tV):
219 | return np.concatenate(
220 | [self._sorted_singular_vals[e] * tV[:, [fj], [fi]] for e, (fi, fj) in
221 | enumerate(zip(*self._n_factors_order))],
222 | axis=1)
223 |
224 | def _mode0_projector(self, X):
225 |
226 | trunc_U, trunc_S, trunc_V = self._truncated_hat_svdm.astuple()
227 | # trunc_Spinv = _t_pinv_fdiag(trunc_S, self.fun_m, self.inv_m)
228 | # XV = self._mprod(X, trunc_V)
229 | # XVS = self._mprod(XV, trunc_Spinv)
230 | # XVS_hat = self.fun_m(XVS)
231 |
232 | XV_hat = np.matmul(self.fun_m(X).transpose(2, 0, 1), trunc_V.transpose(2, 0, 1)).transpose(1, 2, 0)
233 | Y = XV_hat[:, self._n_factors_order[1], self._n_factors_order[0]].copy()
234 |
235 | # XV_hat = np.matmul(self.fun_m(X).transpose(2, 0, 1), trunc_V.transpose(2, 0, 1))
236 | # XVS_hat = XV_hat * _pinv_diag(trunc_S).transpose().reshape(self._n, 1, self._rrho.max())
237 | # XVS_hat = XVS_hat.transpose(1, 2, 0)
238 | # Y = XVS_hat[:, self._n_factors_order[1], self._n_factors_order[0]].copy()
239 |
240 | # X_transformed_0 = self._mprod(X, self._truncated_svdm.v)
241 | # X_transformed_0 = self._mprod(X_transformed_0, self._truncS_pinv)
242 | # X_transformed = self.fun_m(X_transformed_0)
243 | return Y
244 |
245 | # def _mode1_projector(self, X):
246 | # truncU_mtranspose = tensor_mtranspose(self._truncated_svdm.u, self.fun_m, self.inv_m)
247 | # X_transformed_0 = self._mprod(truncU_mtranspose, X)
248 | # X_transformed_0 = tensor_mtranspose(self._mprod(self._truncS_pinv, X_transformed_0), self.fun_m, self.inv_m)
249 | # X_transformed = self.fun_m(X_transformed_0)
250 | # return self._mode1_reduce(X_transformed)
251 |
252 | def transform(self, X):
253 | """Apply mode-1 dimensionality reduction to X.
254 |
255 | X is projected on the first mode-1 tensor components previously extracted
256 | from a training set.
257 |
258 | Parameters
259 | ----------
260 | X : array-like of shape (m_samples, p_features, n_modes)
261 | Training data, where m_samples is the number of samples,
262 | p_features is the number of features and n_modes is the
263 | number of modes (timepoints/locations etc...)
264 |
265 | Returns
266 | -------
267 | X_new : array-like of shape (m_samples, `n_components_`)
268 | Projection of X in the first principal components, where m_samples
269 | is the number of samples and n_components is the number of the components.
270 |
271 | """
272 | _assert_order_and_mdim(X, 'X', 3, [(1, self._p), (2, self._n)])
273 | return self._mode0_projector(self._mdf.transform(X))
274 |
275 | @property
276 | def mode2_loadings(self):
277 | """ The weights driving the variation in each of the obtained factors with respect to
278 | each feature
279 | """
280 |
281 | return self._truncated_hat_svdm.v[:,self._n_factors_order[1], self._n_factors_order[0]].copy()
282 |
283 | def fit_transform(self, X: np.ndarray, y=None, **fit_params):
284 |
285 | """Fit the model with X and apply the dimensionality reduction on X.
286 |
287 | Parameters
288 | ----------
289 | X : array-like of shape (m_samples, p_features, n_modes)
290 | Training data, where m_samples is the number of samples,
291 | p_features is the number of features and n_modes is the
292 | number of modes (timepoints/locations etc...)
293 |
294 | y : Ignored
295 | Ignored.
296 |
297 | Returns
298 | -------
299 | X_new : ndarray of shape (m_samples, `n_components_`)
300 | Transformed values.
301 |
302 | """
303 |
304 | self.fit(X)
305 | return self.transform(X)
306 |
307 | # noinspection PyPep8Naming
308 | def inverse_transform(self, Y: NumpynDArray):
309 | """
310 | Inverts TCAM scores back to the original features space
311 |
312 | Parameters
313 | ----------
314 | Y: np.ndarray
315 | 2d array with shape (k, `n_components_`)
316 |
317 | Returns
318 | -------
319 | Y_inv: NumpynDArray
320 | 3rd order tensor that is the inverse transform of Y to the original features space
321 |
322 | """
323 |
324 | trunc_U, trunc_S, trunc_V = self._truncated_hat_svdm.astuple()
325 |
326 | # Suppose YY = X * V * pinv(S)
327 | # and the matrix Y is an ordering of YYs columns according to the factors order
328 |
329 | YY_hat = np.zeros((Y.shape[0], self._rrho.max(), self._n))
330 | YY_hat[:, self._n_factors_order[1], self._n_factors_order[0]] = Y.copy()
331 | # YYS_hat = YY_hat.transpose(2, 0, 1) * trunc_S.transpose().reshape(self._n, 1, self._rrho.max())
332 | X_hat = np.matmul(YY_hat.transpose(2, 0, 1), trunc_V.transpose(2, 1, 0)).transpose(1, 2, 0)
333 | XX = self.inv_m(X_hat)
334 |
335 | # Note that
336 | # YY*S*V' = X * V * pinv(S) * S * V'
337 | # = X * V * (JJ) * V'
338 | # = X * (V * JJ) * V'
339 | # = X * (VV) * V'
340 | # = X * (JJ) \approx X
341 | #
342 | # where JJ is "almost" the identity tensor
343 |
344 |
345 | # #################################### OLD CODE #################################################
346 | # YY_hat = np.zeros((trunc_U.shape[0], trunc_U.shape[1], trunc_U.shape[-1])) #
347 | # YY_hat[:, self._n_factors_order[1], self._n_factors_order[0]] = Y.copy() #
348 | # YY = self.inv_m(YY_hat) # get YY from YY_hat #
349 | # YYs = self._mprod(YY, trunc_S) # YY*S #
350 | # Yinv = self._mprod(YYs, tensor_mtranspose(trunc_V, self.fun_m, self.inv_m)) # YY*S*V' #
351 | # # return self._mdf.inverse_transform(Yinv) #
352 | # ###############################################################################################
353 |
354 | return self._mdf.inverse_transform(XX)
355 |
356 |
357 |
--------------------------------------------------------------------------------
/docs/examples/mprod_primer.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | ".. note::\n",
8 | " The following content provides technical and mathematical background for the `mprod-package`. \n",
9 | " Most users of downstream applications such as `TCAM` would probably like to skip this part\n",
10 | "\n",
11 | "$\\newcommand{\\mat}[1]{\\mathbf{#1}}$\n",
12 | "$\\newcommand{\\matM}{\\mat{M}}$\n",
13 | "$\\newcommand{\\matMt}{\\matM^{\\T}}$\n",
14 | "$\\newcommand{\\matMi}{\\matM^{-1}}$\n",
15 | "$\\newcommand{\\T}{\\mat{T}}$\n",
16 | "$\\newcommand{\\xx}{\\times}$\n",
17 | "$\\newcommand{\\mpn}{m \\xx p \\xx n}$\n",
18 | "$\\newcommand{\\pmn}{p \\xx m \\xx n}$\n",
19 | "$\\newcommand{\\tens}[1]{\\mathcal{#1}}$\n",
20 | "$\\newcommand{\\tA}{\\tens{A}}$\n",
21 | "$\\newcommand{\\tAt}{\\tA^{\\T}}$\n",
22 | "$\\newcommand{\\thA}{\\widehat{\\tA}}$\n",
23 | "$\\newcommand{\\thAt}{\\thA^{\\T}}$\n",
24 | "$\\newcommand{\\tC}{\\tens{C}}$\n",
25 | "$\\newcommand{\\tCt}{\\tC^{\\T}}$\n",
26 | "$\\newcommand{\\thC}{\\widehat{\\tC}}$\n",
27 | "$\\newcommand{\\thCt}{\\thC^{\\T}}$\n",
28 | "$\\newcommand{\\tB}{\\tens{B}}$\n",
29 | "$\\newcommand{\\tBt}{\\tB^{\\T}}$\n",
30 | "$\\newcommand{\\thB}{\\widehat{\\tB}}$\n",
31 | "$\\newcommand{\\thBt}{\\thB^{\\T}}$\n",
32 | "$\\newcommand{\\tsub}[1]{\\xx_{#1}}$\n",
33 | "$\\newcommand{\\tsM}{\\tsub{3}\\matM}$\n",
34 | "$\\newcommand{\\tsMinv}{\\tsub{3}\\matM^{-1}}$\n",
35 | "$\\newcommand{\\mm}{\\star_{\\scriptscriptstyle \\matM } }$\n",
36 | "$\\newcommand{\\RR}{\\mathbb{R}}$\n",
37 | "$\\newcommand{\\tI}{\\tens{I}}$\n",
38 | "$\\newcommand{\\thI}{\\widehat{\\tI}}$\n",
39 | "$\\newcommand{\\tE}{\\tens{E}}$\n",
40 | "$\\newcommand{\\tQ}{\\tens{Q}}$\n",
41 | "$\\newcommand{\\tQt}{\\tQ^{\\T}}$\n",
42 | "$\\newcommand{\\thQ}{\\widehat{\\tQ}}$\n",
43 | "$\\newcommand{\\thQt}{\\thQ^{\\T}}$\n",
44 | "$\\newcommand{\\tV}{\\tens{V}}$\n",
45 | "$\\newcommand{\\tVt}{\\tV^{\\T}}$\n",
46 | "$\\newcommand{\\thV}{\\widehat{\\tV}}$\n",
47 | "$\\newcommand{\\thVt}{\\thV^{\\T}}$\n",
48 | "$\\newcommand{\\tU}{\\tens{U}}$\n",
49 | "$\\newcommand{\\tUt}{\\tU^{\\T}}$\n",
50 | "$\\newcommand{\\thU}{\\widehat{\\tU}}$\n",
51 | "$\\newcommand{\\thUt}{\\thU^{\\T}}$\n",
52 | "$\\newcommand{\\tS}{\\tens{S}}$\n",
53 | "$\\newcommand{\\tSt}{\\tS^{\\T}}$\n",
54 | "$\\newcommand{\\thS}{\\widehat{\\tS}}$\n",
55 | "$\\newcommand{\\thSt}{\\thS^{\\T}}$\n",
56 | "$\\newcommand{\\hsigma}{\\hat{\\sigma}}$\n",
57 | "$\\newcommand{\\rnk}{\\operatorname{rank}}$\n",
58 | "$\\newcommand{\\rrho}{\\boldsymbol{\\rho}}$\n",
59 | "$\\newcommand{\\TNorm}[1]{\\|#1\\|_{2}}$\n",
60 | "$\\newcommand{\\FNorm}[1]{\\|#1\\|_{F}}$\n",
61 | "$\\newcommand{\\NNorm}[1]{\\|#1\\|_{*}}$\n",
62 | "$\\newcommand{\\FNormS}[1]{\\FNorm{#1}^2}$\n",
63 | "$\\newcommand{\\TNormS}[1]{\\TNorm{#1}^2}$"
64 | ]
65 | },
66 | {
67 | "cell_type": "markdown",
68 | "metadata": {},
69 | "source": [
70 | "The main functionality of ``mprod-package`` is factorization of tensors, that is, expressing a tensor $\\tA \\in \\RR^{d_1 \\xx ... \\xx d_N}$ as a product of other, \"simpler\" tensors. \n",
71 | "For this aim, one must first obtain some notion of tensor-tensor multiplication.\n",
72 | "The \"M-product\" (denoted by $\\mm$ ), defined in Kilmer et al., refers to a \"family\" of tensor-tensor products, and provides the notion of multiplication which enables the factorization of tensors. \n",
73 | "Here, we briefly walk through the steps of $\\mm$-product's formal construction. "
74 | ]
75 | },
76 | {
77 | "cell_type": "markdown",
78 | "metadata": {},
79 | "source": [
80 | "# The M-product\n",
81 | "\n",
82 | "We begin with some definitions.
\n",
83 | "Let $\\matM$ be an $n\\xx n$ unitary matrix ($\\matM \\matMt = \\mat{I}_n = \\matMt \\matM$), and a tensor $\\tA \\in \\RR^{\\mpn}$. \n",
84 | "We define the **domain transform** specified by $\\matM$ as $\\thA := \\tA \\tsM$, where $\\tsM$ denotes the tensor-matrix multiplication of applying $\\matM$ to each of the tensor $n$ dimensional tube fibers ($\\tA_{i,j,:}$).\n",
85 | "\n",
86 | "A practical demonstration using `scipy` and `numpy` libraries:"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": 2,
92 | "metadata": {},
93 | "outputs": [],
94 | "source": [
95 | "import numpy as np\n",
96 | "from scipy.stats import ortho_group # used for sampling random unitary matrices \n",
97 | " # from the Haar distribution\n",
98 | "\n",
99 | "m, p, n = 10, 5, 8\n",
100 | "\n",
101 | "A = np.random.randn(m, p, n) # generate a random tensor\n",
102 | "M = ortho_group.rvs(n) # random sample unitary M\n",
103 | "\n",
104 | "A_hat = np.zeros_like(A)\n",
105 | "for i in range(m):\n",
106 | " for j in range(p):\n",
107 | " A_hat[i,j,:] = M @ A[i,j,:]"
108 | ]
109 | },
110 | {
111 | "cell_type": "markdown",
112 | "metadata": {},
113 | "source": [
114 | ".. attention::\n",
115 | " The tensor-matrix product implementation is much more efficient than the above for loop\n",
116 | "\n",
117 | "\n",
118 | "\n",
119 | "The **transpose** of a real $\\mpn$ tensor $\\tA$ with respect to $\\matM$, denoted by $\\tA^{\\T}$, is a $\\pmn$ tensor for which \n",
120 | "$$[\\widehat{\\tA^{\\T}}]_{:,:,i} = [\\thA^{\\T}]_{:,:,i} = {[\\thA]_{:,:,i}}^{\\T}$$\n",
121 | "\n",
122 | "Given two tensors $\\tA \\in \\RR^{\\mpn}$ and $\\tB \\in \\RR^{p \\xx r \\xx n}$ , the facewise tensor-tensor product of $\\tA$ and $\\tB$, denoted by $\\tA \\vartriangle \\tB$ , is the $m \\xx r \\xx n$ tensor for which \n",
123 | "$$[\\tA \\vartriangle \\tB]_{:,:,i} = \\tA_{:,:,i} \\tB_{:,:,i}$$ \n",
124 | "\n",
125 | "The $\\mm$ **-product** of $\\tA \\in \\RR^{\\mpn}$ and $\\tB \\in \\RR^{p \\xx r \\xx n}$ is defined by \n",
126 | "$$\\tA \\mm \\tB := (\\thA \\vartriangle \\thB) \\tsMinv \\in \\RR^{m \\xx r \\xx n}$$ \n",
127 | "\n",
128 | "\n",
129 | "The `mprod-package` offers utility functions like `m_prod` implementing $\\mm$ as well as random and spectral analysis based generators of unitary transforms"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": 3,
135 | "metadata": {},
136 | "outputs": [
137 | {
138 | "name": "stdout",
139 | "output_type": "stream",
140 | "text": [
141 | "129.30020497750468\n"
142 | ]
143 | }
144 | ],
145 | "source": [
146 | "from mprod import m_prod\n",
147 | "from mprod import generate_haar, generate_dct\n",
148 | "\n",
149 | "funm_haar, invm_haar = generate_haar(n) # Utility wrapper arround \n",
150 | " # scipy.stats.ortho_group \n",
151 | "funm_dct, invm_dct = generate_dct(n) # Generates dct and idct transforms using scipy's\n",
152 | " # fft module. the default dct type is 2\n",
153 | "\n",
154 | "# generate random tensor B \n",
155 | "r = 15\n",
156 | "B = np.random.randn(p,r,n)\n",
157 | "\n",
158 | "# Multiply A and B with respect to a randomly sampled M\n",
159 | "C_haar = m_prod(A,B,funm_haar, invm_haar)\n",
160 | "\n",
161 | "# Multiply A and B with respect to M = dct\n",
162 | "C_dct = m_prod(A,B,funm_dct, invm_dct)\n",
163 | "\n",
164 | "print(np.linalg.norm(C_haar - C_dct))"
165 | ]
166 | },
167 | {
168 | "cell_type": "markdown",
169 | "metadata": {},
170 | "source": [
171 | "As shown above, given two distinct transforms ${\\matM}_1, {\\matM}_2$ , we have that $\\tA \\star_{\\scriptstyle \\matM_1} \\tB$ and $\\tA \\star_{\\scriptstyle \\matM_2} \\tB$ are not equal in general.\n",
172 | "This fact, as we will see, provides high flexibility when applying $\\mm$ based dimensionality reduction schemes.\n",
173 | "\n",
174 | "Two tensors $\\tA, \\tB \\in \\RR^{1 \\xx m \\xx n}$ are called $\\mm$ **-orthogonal slices** if $\\tA^{\\T} \\mm \\tB = \\mathbf{0}$, where $\\mathbf{0} \\in \\RR^{1\\xx 1 \\xx n}$ is the zero tube fiber, while $\\tQ \\in \\RR^{m \\xx m \\xx n}$ is called $\\mm$ **-unitary** if $\\tQ^{\\T} \\mm \\tQ = \\tI = \\tQ \\mm \\tQ^{\\T}$ .\n",
175 | "
\n",
176 | "A tensor $\\tB \\in \\RR^{p \\xx k \\xx n}$ is said to be a pseudo $\\mm$ -unitary tensor (or pseudo $\\mm$-orthogonal) if $\\tB^{\\T} \\mm \\tB$ is f-diagonal (i.e., all frontal slices are diagonal), and all frontal slices of $(\\tB^{\\T} \\mm \\tB) \\tsM$ are diagonal matrices with entries that are either ones or zeros.\n",
177 | "\n",
178 | "\n",
179 | "# TSVDM\n",
180 | "\n",
181 | "Let $\\tA \\in \\RR^{\\mpn}$ be a real tensor, then is possible to write the full **tubal singular value decomposition** of $\\tA$ as \n",
182 | "$$\\tA = \\tU \\mm \\tS \\mm \\tV^{\\T}$$ \n",
183 | "\n",
184 | "where $\\tU, \\tV$ are $(m \\xx m \\xx n)$ and $(p \\xx p \\xx n)$ , $\\mm$-unitary tensors respectively, and $\\tS \\in \\RR^{\\mpn}$ is an **f-diagonal** tensor, that is, a tensor whose frontal slices ( $\\tS_{:,:,i}$ ) are matrices with zeros outside their main diagonal.
\n",
185 | "\n",
186 | "We use the notation $\\hsigma_{j}^{(i)}$ do denote the $j^{th}$ largest singular value on the $i^{th}$ lateral face of $\\thS$: \n",
187 | "$$\\hsigma_{j}^{(i)} := \\thS_{j,j,i}$$\n"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": 4,
193 | "metadata": {},
194 | "outputs": [
195 | {
196 | "name": "stdout",
197 | "output_type": "stream",
198 | "text": [
199 | "U: 10x5x8\n",
200 | "S: 5x8\n",
201 | "V: 5x5x8 \n",
202 | "\n",
203 | "||A - USV'||^2 = 5.159366909775574e-27\n"
204 | ]
205 | }
206 | ],
207 | "source": [
208 | "from mprod.decompositions import svdm\n",
209 | "from mprod import tensor_mtranspose\n",
210 | "\n",
211 | "U,S,V = svdm(A, funm_haar, invm_haar)\n",
212 | "\n",
213 | "print(\"U:\", \"x\".join(map(str, U.shape)))\n",
214 | "print(\"S:\", \"x\".join(map(str, S.shape)))\n",
215 | "print(\"V:\", \"x\".join(map(str, V.shape)),\"\\n\")\n",
216 | "\n",
217 | "# Note that for practical reasons, S is stored in a lean datastructure\n",
218 | "# To obtain the \"tensorial\" representation of S, we do as follows\n",
219 | "tens_S = np.zeros((p,p,n))\n",
220 | "for i in range(n):\n",
221 | " tens_S[:S.shape[0],:S.shape[0],i] = np.diag(S[:,i])\n",
222 | "\n",
223 | "\n",
224 | "# reconstruct the tensor\n",
225 | "Vt = tensor_mtranspose(V,funm_haar, invm_haar)\n",
226 | "US = m_prod(U, tens_S, funm_haar, invm_haar)\n",
227 | "USVt = m_prod(US, Vt, funm_haar, invm_haar)\n",
228 | "\n",
229 | "print(\"||A - USV'||^2 =\",np.linalg.norm(A - USVt)**2) # practically 0"
230 | ]
231 | },
232 | {
233 | "cell_type": "markdown",
234 | "metadata": {},
235 | "source": [
236 | "# Tensor ranks and truncations\n",
237 | "\n",
238 | "* The **t-rank** of $\\tA$ is the number of nonezero tubes of $\\tS$: \n",
239 | "$$\n",
240 | "r = | \\left\\{ i = 1, \\dots, n ~;~ \\FNormS{\\tS_{i,i,:}} > 0 \\right\\} |\n",
241 | "$$\n",
242 | "\n",
243 | "$\\tA^{(q)} = \\tU_{:,1:q, :} \\mm \\tS_{1:q,1:q,:} \\mm {\\tV_{:,1:q,:}}^{\\T}$ denotes the t-rank $q$ truncation of $\\tA$ under $\\mm$\n",
244 | " \n",
245 | "* The **multi-rank** of $\\tA$ under $\\mm$, denoted by the vector $\\rrho \\in \\mathbb{N}^{n}$ whose $i^{th}$ entry is \n",
246 | "$$\n",
247 | "\\rrho_i = \\rnk (\\thA_{:,:,i})\n",
248 | "$$\n",
249 | "\n",
250 | "The multi-rank $\\rrho$ truncation of $\\tA$ under $\\mm$ is given by the tensor $\\tA_{\\rrho}$ for which \n",
251 | "$$\n",
252 | "\\widehat{\\tA_{\\rrho}}_{:,:,i} = \\thU_{:,1:\\rrho_i, i} \\thS_{1:\\rrho_i,1:\\rrho_i,i} {\\thV_{:,1:\\rrho_i,i}}^{\\T}\n",
253 | "$$ \n",
254 | "\n",
255 | "* The **implicit rank** under $\\mm$ of a tensor $\\tA$ with multi-rank $\\rrho$ under $\\mm$ is \n",
256 | "$$\n",
257 | "r = \\sum_{i=1}^{n} \\rrho_i\n",
258 | "$$\n",
259 | "\n",
260 | "Note that for t-rank truncation the $\\tU$ and $\\tV$ factors are $\\mm$-orthogonal, while for multi-rank truncation they are only pseudo $\\mm$-orthogonal."
261 | ]
262 | },
263 | {
264 | "cell_type": "code",
265 | "execution_count": 5,
266 | "metadata": {},
267 | "outputs": [],
268 | "source": [
269 | "# t-rank 4 trunctation \n",
270 | "q = 4\n",
271 | "tens_S_t_hat = funm_haar(tens_S.copy())\n",
272 | "tens_S_t_hat[q:,q:,:] = 0\n",
273 | "tens_S_t = invm_haar(tens_S_t_hat)\n",
274 | "A4 = m_prod(m_prod(U, tens_S_t, funm_haar, invm_haar), Vt, funm_haar, invm_haar)\n",
275 | "\n",
276 | "\n",
277 | "# multi-rank rho trunctation \n",
278 | "rho = [1,3,2,2,3,1,4,3] # this is the multi-rank vector\n",
279 | "tens_S_rho_hat = funm_haar(tens_S.copy())\n",
280 | "for i in range(n):\n",
281 | " tens_S_rho_hat[rho[i]:,rho[i]:,i] = 0\n",
282 | "\n",
283 | "tens_S_rho = invm_haar(tens_S_rho_hat)\n",
284 | "A_rho = m_prod(m_prod(U, tens_S_rho, funm_haar, invm_haar), Vt, funm_haar, invm_haar)"
285 | ]
286 | },
287 | {
288 | "cell_type": "markdown",
289 | "metadata": {},
290 | "source": [
291 | "\n",
292 | "Let $\\tA = \\tU \\mm \\tS \\mm \\tV^{\\T} \\in \\RR^{\\mpn}$, \n",
293 | "we will use $j_1,\\dots, j_{np}$ and $i_1,\\dots, i_{np}$ to denote the indexes of the non-zeros of $\\thS$ ordered in decreasing order. That is \n",
294 | "$$\\hsigma_{\\ell} := \\hsigma_{j_{\\ell}}^{(i_{\\ell})}$$\n",
295 | "\n",
296 | "where $\\hsigma_1 \\geq \\hsigma_2 \\geq \\dots \\geq \\hsigma_{np}$ .\n",
297 | "\n",
298 | "For $q = 1 , \\dots , p n$ , the **explicit rank-** $q$ **truncation** under $\\mm$ of a tensor $\\tA$, denoted by $\\tA_q = \\tA_{\\rrho}$ , where $\\tA_{\\rrho}$ is the tensor of multi-rank $\\rrho$ under $\\mm$ such that \n",
299 | "$$\\rrho_i = \\max \\{ j = 1, \\dots ,p ~|~ (j,i) \\in \\{(j_1, j_1), \\dots, (j_q, i_q)\\} \\} .$$ \n",
300 | "\n",
301 | "In words, we keep the $q$ top singular values of any frontal slice of $\\thS$, and zero out the rest. \n",
302 | "\n",
303 | "\n",
304 | "\n"
305 | ]
306 | },
307 | {
308 | "cell_type": "markdown",
309 | "metadata": {},
310 | "source": [
311 | ".. note::\n",
312 | " We have that $\\tA^{(q)}, \\tA_{\\rrho}$ and $\\tA_{q}$ are the best t-rank $q$, multi-rank $\\rrho$ and explicit-rank $q$ (under $\\mm$) approximations of $\\tA$, respectively.\n",
313 | "\n",
314 | "\n",
315 | "\n",
316 | "\n",
317 | "# The effect of choosing different transforms \n",
318 | "\n",
319 | "To demonstrate how might the choice of $\\matM$ influence the resulting decomposition, we use the real-world time-series dataset obtained from a study on Pediatric Ulcerative Colitis (PUC) by Schirmer et al..\n",
320 | "\n",
321 | "First, we obtain the data table from our analysis GitHub repo, construct a tensor from the data and apply TSVDM with respect to both randomly sampled $\\matM$ and the DCT.\n",
322 | "\n",
323 | "Note that in `generate_haar` function call, we set the `random_state` parameter to an integer (123) just so that the results are reproducible."
324 | ]
325 | },
326 | {
327 | "cell_type": "code",
328 | "execution_count": 57,
329 | "metadata": {},
330 | "outputs": [
331 | {
332 | "name": "stdout",
333 | "output_type": "stream",
334 | "text": [
335 | "shape of S, by randomly sampled transform: (87, 4)\n",
336 | "shape of S, by DCT: (4, 4)\n"
337 | ]
338 | }
339 | ],
340 | "source": [
341 | "import pandas as pd\n",
342 | "from mprod import table2tensor\n",
343 | "\n",
344 | "file_path = \"https://raw.githubusercontent.com/UriaMorP/\" \\\n",
345 | " \"tcam_analysis_notebooks/main/Schirmer2018/Schirmer2018.tsv\"\n",
346 | "\n",
347 | "data_raw = pd.read_csv(file_path, index_col=[0,1], sep=\"\\t\"\n",
348 | " , dtype={'Week':int})\n",
349 | "\n",
350 | "data_tensor, map1, map3 = table2tensor(data_raw)\n",
351 | "\n",
352 | "m,p,n = data_tensor.shape\n",
353 | "\n",
354 | "# Generate transforms according to the \n",
355 | "# relevant dimensions\n",
356 | "funm_haar, invm_haar = generate_haar(n,random_state=123)\n",
357 | "funm_dct, invm_dct = generate_dct(n)\n",
358 | "\n",
359 | "\n",
360 | "# Haar\n",
361 | "Uhaar, Shaar, Vhaar = svdm(data_tensor, funm_haar, invm_haar)\n",
362 | "print(\"shape of S, by randomly sampled transform:\", Shaar.shape)\n",
363 | "# DCT\n",
364 | "Udct, Sdct, Vdct = svdm(data_tensor, funm_dct, invm_dct)\n",
365 | "print(\"shape of S, by DCT:\", Sdct.shape)\n"
366 | ]
367 | },
368 | {
369 | "cell_type": "markdown",
370 | "metadata": {},
371 | "source": [
372 | "In this case, we have that the t-rank of our data under the DCT domain transform is 4, and 87 under $\\mm$ where $\\matM$ is obtained from randomly sampling the Haar distribution. \n",
373 | "\n",
374 | "Even though it is not generally true that choosing $\\matM$ as DCT (the t-product) results in better compression, the fact that it does so for time-series data makes perfect sense; Since we assume that time-series data are samples of continuous functions, which, are easy to approximate well using very few DCT basis elements."
375 | ]
376 | },
377 | {
378 | "cell_type": "raw",
379 | "metadata": {
380 | "raw_mimetype": "text/restructuredtext"
381 | },
382 | "source": [
383 | ".. footbibliography::"
384 | ]
385 | }
386 | ],
387 | "metadata": {
388 | "celltoolbar": "Raw Cell Format",
389 | "kernelspec": {
390 | "display_name": "mprod",
391 | "language": "python",
392 | "name": "mprod"
393 | },
394 | "language_info": {
395 | "codemirror_mode": {
396 | "name": "ipython",
397 | "version": 3
398 | },
399 | "file_extension": ".py",
400 | "mimetype": "text/x-python",
401 | "name": "python",
402 | "nbconvert_exporter": "python",
403 | "pygments_lexer": "ipython3",
404 | "version": "3.6.8"
405 | },
406 | "toc": {
407 | "base_numbering": 1,
408 | "nav_menu": {},
409 | "number_sections": true,
410 | "sideBar": true,
411 | "skip_h1_title": false,
412 | "title_cell": "Table of Contents",
413 | "title_sidebar": "Contents",
414 | "toc_cell": false,
415 | "toc_position": {},
416 | "toc_section_display": true,
417 | "toc_window_display": false
418 | }
419 | },
420 | "nbformat": 4,
421 | "nbformat_minor": 4
422 | }
423 |
--------------------------------------------------------------------------------
/runtests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | runtests.py [OPTIONS] [-- ARGS]
4 |
5 | Run tests, building the project first.
6 |
7 | Examples::
8 |
9 | $ python runtests.py
10 | $ python runtests.py -s {SAMPLE_SUBMODULE}
11 | $ # Run a standalone test function:
12 | $ python runtests.py -t {SAMPLE_TEST}
13 | $ # Run a test defined as a method of a TestXXX class:
14 | $ python runtests.py --ipython
15 | $ python runtests.py --python somescript.py
16 | $ python runtests.py --bench
17 | $ python runtests.py --durations 20
18 |
19 | Run a debugger:
20 |
21 | $ gdb --args python runtests.py [...other args...]
22 |
23 | Disable pytest capturing of output by using its '-s' option:
24 |
25 | $ python runtests.py -- -s
26 |
27 | Generate C code coverage listing under build/lcov/:
28 | (requires http://ltp.sourceforge.net/coverage/lcov.php)
29 |
30 | $ python runtests.py --gcov [...other args...]
31 | $ python runtests.py --lcov-html
32 |
33 | Run lint checks.
34 | Provide target branch name or `uncommitted` to check before committing:
35 |
36 | $ python runtests.py --lint main
37 | $ python runtests.py --lint uncommitted
38 |
39 | """
40 | #
41 | # This is a generic test runner script for projects using NumPy's test
42 | # framework. Change the following values to adapt to your project:
43 | #
44 | import numpy.lib
45 |
46 | PROJECT_MODULE = "mprod"
47 | PROJECT_ROOT_FILES = ['mprod', 'setup.py']
48 | SAMPLE_TEST = "mprod/decompositions/tests/test_decompositions.py::test_tqrm"
49 | SAMPLE_SUBMODULE = "decompositions"
50 |
51 | # EXTRA_PATH = ['/home/labs/elinav/uria/lib/ccache', '/home/labs/elinav/uria/lib/f90cache',
52 | # '/home/labs/elinav/uria/local/lib/ccache', '/home/labs/elinav/uria/local/lib/f90cache']
53 |
54 | EXTRA_PATH = ['~/lib/ccache', '~/lib/f90cache',
55 | '~/local/lib/ccache', '~/local/lib/f90cache']
56 |
57 |
58 | # ---------------------------------------------------------------------
59 |
60 |
61 | if __doc__ is None:
62 | __doc__ = "Run without -OO if you want usage info"
63 | else:
64 | __doc__ = __doc__.format(**globals())
65 |
66 | import sys
67 | import os, glob
68 |
69 | # In case we are run from the source directory, we don't want to import the
70 | # project from there:
71 | sys.path.pop(0)
72 |
73 | import shutil
74 | import subprocess
75 | import time
76 | from argparse import ArgumentParser, REMAINDER
77 |
78 | ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__)))
79 |
80 |
81 | def main(argv):
82 | parser = ArgumentParser(usage=__doc__.lstrip())
83 | parser.add_argument("--verbose", "-v", action="count", default=1,
84 | help="Add one verbosity level to pytest. Default is 0")
85 | parser.add_argument("--debug-info", action="store_true",
86 | help=("Add --verbose-cfg to build_src to show "
87 | "compiler configuration output while creating "
88 | "_numpyconfig.h and config.h"))
89 | parser.add_argument("--no-build", "-n", action="store_true", default=False,
90 | help="Do not build the project (use system installed "
91 | "version)")
92 | parser.add_argument("--build-only", "-b", action="store_true",
93 | default=False, help="Just build, do not run any tests")
94 | parser.add_argument("--doctests", action="store_true", default=False,
95 | help="Run doctests in module")
96 | parser.add_argument("--refguide-check", action="store_true", default=False,
97 | help="Run refguide (doctest) check (do not run "
98 | "regular tests.)")
99 | parser.add_argument("--coverage", action="store_true", default=False,
100 | help=("Report coverage of project code. HTML output "
101 | "goes under build/coverage"))
102 | parser.add_argument("--lint", default=None,
103 | help="'' or 'uncommitted', passed to "
104 | "tools/linter.py [--branch BRANCH] "
105 | "[--uncommitted]")
106 | parser.add_argument("--durations", action="store", default=-1, type=int,
107 | help=("Time N slowest tests, time all if 0, time none "
108 | "if < 0"))
109 | parser.add_argument("--gcov", action="store_true", default=False,
110 | help=("Enable C code coverage via gcov (requires "
111 | "GCC). gcov output goes to build/**/*.gc*"))
112 | parser.add_argument("--lcov-html", action="store_true", default=False,
113 | help=("Produce HTML for C code coverage information "
114 | "from a previous run with --gcov. "
115 | "HTML output goes to build/lcov/"))
116 | parser.add_argument("--mode", "-m", default="fast",
117 | help="'fast', 'full', or something that could be "
118 | "passed to nosetests -A [default: fast]")
119 | parser.add_argument("--submodule", "-s", default=None,
120 | help="Submodule whose tests to run (cluster, "
121 | "constants, ...)")
122 | parser.add_argument("--pythonpath", "-p", default=None,
123 | help="Paths to prepend to PYTHONPATH")
124 | parser.add_argument("--tests", "-t", action='append',
125 | help="Specify tests to run")
126 | parser.add_argument("--python", action="store_true",
127 | help="Start a Python shell with PYTHONPATH set")
128 | parser.add_argument("--ipython", "-i", action="store_true",
129 | help="Start IPython shell with PYTHONPATH set")
130 | parser.add_argument("--shell", action="store_true",
131 | help="Start Unix shell with PYTHONPATH set")
132 | parser.add_argument("--mypy", action="store_true",
133 | help="Run mypy on files with NumPy on the MYPYPATH")
134 | parser.add_argument("--debug", "-g", action="store_true",
135 | help="Debug build")
136 | parser.add_argument("--parallel", "-j", type=int, default=0,
137 | help="Number of parallel jobs during build")
138 | parser.add_argument("--warn-error", action="store_true",
139 | help="Set -Werror to convert all compiler warnings to "
140 | "errors")
141 | parser.add_argument("--cpu-baseline", default=None,
142 | help="Specify a list of enabled baseline CPU "
143 | "optimizations"),
144 | parser.add_argument("--cpu-dispatch", default=None,
145 | help="Specify a list of dispatched CPU optimizations"),
146 | parser.add_argument("--disable-optimization", action="store_true",
147 | help="Disable CPU optimized code (dispatch, simd, "
148 | "fast, ...)"),
149 | parser.add_argument("--simd-test", default=None,
150 | help="Specify a list of CPU optimizations to be "
151 | "tested against NumPy SIMD interface"),
152 | parser.add_argument("--show-build-log", action="store_true",
153 | help="Show build output rather than using a log file")
154 | parser.add_argument("--bench", action="store_true",
155 | help="Run benchmark suite instead of test suite")
156 | parser.add_argument("--bench-compare", action="store", metavar="COMMIT",
157 | help=("Compare benchmark results of current HEAD to "
158 | "BEFORE. Use an additional "
159 | "--bench-compare=COMMIT to override HEAD with "
160 | "COMMIT. Note that you need to commit your "
161 | "changes first!"))
162 | parser.add_argument("args", metavar="ARGS", default=[], nargs=REMAINDER,
163 | help="Arguments to pass to pytest, asv, mypy, Python "
164 | "or shell")
165 | args = parser.parse_args(argv)
166 |
167 | if args.durations < 0:
168 | args.durations = -1
169 |
170 | if args.bench_compare:
171 | args.bench = True
172 | args.no_build = True # ASV does the building
173 |
174 | if args.lcov_html:
175 | # generate C code coverage output
176 | lcov_generate()
177 | sys.exit(0)
178 |
179 | if args.pythonpath:
180 | for p in reversed(args.pythonpath.split(os.pathsep)):
181 | sys.path.insert(0, p)
182 |
183 | if args.gcov:
184 | gcov_reset_counters()
185 |
186 | if args.debug and args.bench:
187 | print("*** Benchmarks should not be run against debug "
188 | "version; remove -g flag ***")
189 |
190 | if args.lint:
191 | check_lint(args.lint)
192 |
193 | if not args.no_build:
194 | # we need the noarch path in case the package is pure python.
195 | site_dir, site_dir_noarch = build_project(args)
196 | sys.path.insert(0, site_dir)
197 | sys.path.insert(0, site_dir_noarch)
198 | os.environ['PYTHONPATH'] = site_dir + os.pathsep + site_dir_noarch
199 | else:
200 | _temp = __import__(PROJECT_MODULE)
201 | site_dir = os.path.sep.join(_temp.__file__.split(os.path.sep)[:-2])
202 |
203 | extra_argv = args.args[:]
204 | if not args.bench:
205 | # extra_argv may also lists selected benchmarks
206 | if extra_argv and extra_argv[0] == '--':
207 | extra_argv = extra_argv[1:]
208 |
209 | if args.python:
210 | # Debugging issues with warnings is much easier if you can see them
211 | print("Enabling display of all warnings")
212 | import warnings
213 | import types
214 |
215 | warnings.filterwarnings("always")
216 | if extra_argv:
217 | # Don't use subprocess, since we don't want to include the
218 | # current path in PYTHONPATH.
219 | sys.argv = extra_argv
220 | with open(extra_argv[0], 'r') as f:
221 | script = f.read()
222 | sys.modules['__main__'] = types.ModuleType('__main__')
223 | ns = dict(__name__='__main__',
224 | __file__=extra_argv[0])
225 | exec(script, ns)
226 | sys.exit(0)
227 | else:
228 | import code
229 | code.interact()
230 | sys.exit(0)
231 |
232 | if args.ipython:
233 | # Debugging issues with warnings is much easier if you can see them
234 | print("Enabling display of all warnings and pre-importing numpy as np")
235 | import warnings;
236 | warnings.filterwarnings("always")
237 | import IPython
238 | import numpy as np
239 | IPython.embed(colors='neutral', user_ns={"np": np})
240 | sys.exit(0)
241 |
242 | if args.shell:
243 | shell = os.environ.get('SHELL', 'cmd' if os.name == 'nt' else 'sh')
244 | print("Spawning a shell ({})...".format(shell))
245 | subprocess.call([shell] + extra_argv)
246 | sys.exit(0)
247 |
248 | if args.mypy:
249 | try:
250 | import mypy.api
251 | except ImportError:
252 | raise RuntimeError(
253 | "Mypy not found. Please install it by running "
254 | "pip install -r test_requirements.txt from the repo root"
255 | )
256 |
257 | os.environ['MYPYPATH'] = site_dir
258 | # By default mypy won't color the output since it isn't being
259 | # invoked from a tty.
260 | os.environ['MYPY_FORCE_COLOR'] = '1'
261 |
262 | config = os.path.join(
263 | site_dir,
264 | "numpy",
265 | "typing",
266 | "tests",
267 | "data",
268 | "mypy.ini",
269 | )
270 |
271 | report, errors, status = mypy.api.run(
272 | ['--config-file', config] + args.args
273 | )
274 | print(report, end='')
275 | print(errors, end='', file=sys.stderr)
276 | sys.exit(status)
277 |
278 | if args.coverage:
279 | dst_dir = os.path.join(ROOT_DIR, 'build', 'coverage')
280 | fn = os.path.join(dst_dir, 'coverage_html.js')
281 | if os.path.isdir(dst_dir) and os.path.isfile(fn):
282 | shutil.rmtree(dst_dir)
283 | extra_argv += ['--cov-report=html:' + dst_dir]
284 |
285 | if args.refguide_check:
286 | cmd = [os.path.join(ROOT_DIR, 'tools', 'refguide_check.py'),
287 | '--doctests']
288 | if args.submodule:
289 | cmd += [args.submodule]
290 | os.execv(sys.executable, [sys.executable] + cmd)
291 | sys.exit(0)
292 |
293 | if args.bench:
294 | # Run ASV
295 | for i, v in enumerate(extra_argv):
296 | if v.startswith("--"):
297 | items = extra_argv[:i]
298 | if v == "--":
299 | i += 1 # skip '--' indicating further are passed on.
300 | bench_args = extra_argv[i:]
301 | break
302 | else:
303 | items = extra_argv
304 | bench_args = []
305 |
306 | if args.tests:
307 | items += args.tests
308 | if args.submodule:
309 | items += [args.submodule]
310 | for a in items:
311 | bench_args.extend(['--bench', a])
312 |
313 | if not args.bench_compare:
314 | cmd = ['asv', 'run', '-n', '-e', '--python=same'] + bench_args
315 | ret = subprocess.call(cmd, cwd=os.path.join(ROOT_DIR, 'benchmarks'))
316 | sys.exit(ret)
317 | else:
318 | commits = [x.strip() for x in args.bench_compare.split(',')]
319 | if len(commits) == 1:
320 | commit_a = commits[0]
321 | commit_b = 'HEAD'
322 | elif len(commits) == 2:
323 | commit_a, commit_b = commits
324 | else:
325 | p.error("Too many commits to compare benchmarks for")
326 |
327 | # Check for uncommitted files
328 | if commit_b == 'HEAD':
329 | r1 = subprocess.call(['git', 'diff-index', '--quiet',
330 | '--cached', 'HEAD'])
331 | r2 = subprocess.call(['git', 'diff-files', '--quiet'])
332 | if r1 != 0 or r2 != 0:
333 | print("*" * 80)
334 | print("WARNING: you have uncommitted changes --- "
335 | "these will NOT be benchmarked!")
336 | print("*" * 80)
337 |
338 | # Fix commit ids (HEAD is local to current repo)
339 | out = subprocess.check_output(['git', 'rev-parse', commit_b])
340 | commit_b = out.strip().decode('ascii')
341 |
342 | out = subprocess.check_output(['git', 'rev-parse', commit_a])
343 | commit_a = out.strip().decode('ascii')
344 |
345 | # generate config file with the required build options
346 | asv_cfpath = [
347 | '--config', asv_compare_config(
348 | os.path.join(ROOT_DIR, 'benchmarks'), args,
349 | # to clear the cache if the user changed build options
350 | (commit_a, commit_b)
351 | )
352 | ]
353 | cmd = ['asv', 'continuous', '-e', '-f', '1.05',
354 | commit_a, commit_b] + asv_cfpath + bench_args
355 | ret = subprocess.call(cmd, cwd=os.path.join(ROOT_DIR, 'benchmarks'))
356 | sys.exit(ret)
357 |
358 | if args.build_only:
359 | sys.exit(0)
360 | else:
361 | __import__(PROJECT_MODULE)
362 | # from numpy.testing._private.nosetester import NoseTester
363 | # test = NoseTester.test
364 | test = sys.modules[PROJECT_MODULE].test
365 |
366 | if args.submodule:
367 | tests = [PROJECT_MODULE + "." + args.submodule]
368 | elif args.tests:
369 | tests = args.tests
370 | else:
371 | tests = None
372 |
373 | # Run the tests under build/test
374 |
375 | if not args.no_build:
376 | test_dir = site_dir
377 | else:
378 | test_dir = os.path.join(ROOT_DIR, 'build', 'test')
379 | if not os.path.isdir(test_dir):
380 | os.makedirs(test_dir)
381 |
382 | shutil.copyfile(os.path.join(ROOT_DIR, '.coveragerc'),
383 | os.path.join(test_dir, '.coveragerc'))
384 |
385 | cwd = os.getcwd()
386 | try:
387 | os.chdir(test_dir)
388 |
389 | result = test(args.mode,
390 | verbose=args.verbose,
391 | extra_argv=extra_argv,
392 | doctests=args.doctests,
393 | coverage=args.coverage,
394 | durations=args.durations,
395 | tests=tests)
396 |
397 | finally:
398 | os.chdir(cwd)
399 |
400 | if isinstance(result, bool):
401 | sys.exit(0 if result else 1)
402 | elif result.wasSuccessful():
403 | sys.exit(0)
404 | else:
405 | sys.exit(1)
406 |
407 |
408 | def build_project(args):
409 | """
410 | Build a dev version of the project.
411 |
412 | Returns
413 | -------
414 | site_dir
415 | site-packages directory where it was installed
416 |
417 | """
418 |
419 | import sysconfig
420 |
421 | root_ok = [os.path.exists(os.path.join(ROOT_DIR, fn))
422 | for fn in PROJECT_ROOT_FILES]
423 | if not all(root_ok):
424 | print("To build the project, run runtests.py in "
425 | "git checkout or unpacked source")
426 | sys.exit(1)
427 |
428 | dst_dir = os.path.join(ROOT_DIR, 'build', 'testenv')
429 |
430 | env = dict(os.environ)
431 | cmd = [sys.executable, 'setup.py']
432 |
433 | # Always use ccache, if installed
434 | env['PATH'] = os.pathsep.join(EXTRA_PATH + env.get('PATH', '').split(os.pathsep))
435 | cvars = sysconfig.get_config_vars()
436 | compiler = env.get('CC') or cvars.get('CC', '')
437 | if 'gcc' in compiler:
438 | # Check that this isn't clang masquerading as gcc.
439 | if sys.platform != 'darwin' or 'gnu-gcc' in compiler:
440 | # add flags used as werrors
441 | warnings_as_errors = ' '.join([
442 | # from tools/travis-test.sh
443 | '-Werror=vla',
444 | '-Werror=nonnull',
445 | '-Werror=pointer-arith',
446 | '-Wlogical-op',
447 | # from sysconfig
448 | '-Werror=unused-function',
449 | ])
450 | env['CFLAGS'] = warnings_as_errors + ' ' + env.get('CFLAGS', '')
451 | if args.debug or args.gcov:
452 | # assume everyone uses gcc/gfortran
453 | env['OPT'] = '-O0 -ggdb'
454 | env['FOPT'] = '-O0 -ggdb'
455 | if args.gcov:
456 | env['OPT'] = '-O0 -ggdb'
457 | env['FOPT'] = '-O0 -ggdb'
458 | env['CC'] = cvars['CC'] + ' --coverage'
459 | env['CXX'] = cvars['CXX'] + ' --coverage'
460 | env['F77'] = 'gfortran --coverage '
461 | env['F90'] = 'gfortran --coverage '
462 | env['LDSHARED'] = cvars['LDSHARED'] + ' --coverage'
463 | env['LDFLAGS'] = " ".join(cvars['LDSHARED'].split()[1:]) + ' --coverage'
464 |
465 | cmd += ["build"]
466 | if args.parallel > 1:
467 | cmd += ["-j", str(args.parallel)]
468 | if args.warn_error:
469 | cmd += ["--warn-error"]
470 | if args.cpu_baseline:
471 | cmd += ["--cpu-baseline", args.cpu_baseline]
472 | if args.cpu_dispatch:
473 | cmd += ["--cpu-dispatch", args.cpu_dispatch]
474 | if args.disable_optimization:
475 | cmd += ["--disable-optimization"]
476 | if args.simd_test is not None:
477 | cmd += ["--simd-test", args.simd_test]
478 | if args.debug_info:
479 | cmd += ["build_src", "--verbose-cfg"]
480 | # Install; avoid producing eggs so numpy can be imported from dst_dir.
481 | cmd += ['install', '--prefix=' + dst_dir,
482 | '--single-version-externally-managed',
483 | '--record=' + dst_dir + 'tmp_install_log.txt']
484 |
485 | from distutils.sysconfig import get_python_lib
486 | site_dir = get_python_lib(prefix=dst_dir, plat_specific=True)
487 | site_dir_noarch = get_python_lib(prefix=dst_dir, plat_specific=False)
488 | # easy_install won't install to a path that Python by default cannot see
489 | # and isn't on the PYTHONPATH. Plus, it has to exist.
490 | if not os.path.exists(site_dir):
491 | os.makedirs(site_dir)
492 | if not os.path.exists(site_dir_noarch):
493 | os.makedirs(site_dir_noarch)
494 | env['PYTHONPATH'] = site_dir + os.pathsep + site_dir_noarch
495 |
496 | log_filename = os.path.join(ROOT_DIR, 'build.log')
497 |
498 | if args.show_build_log:
499 | ret = subprocess.call(cmd, env=env, cwd=ROOT_DIR)
500 | else:
501 | log_filename = os.path.join(ROOT_DIR, 'build.log')
502 | print("Building, see build.log...")
503 | with open(log_filename, 'w') as log:
504 | p = subprocess.Popen(cmd, env=env, stdout=log, stderr=log,
505 | cwd=ROOT_DIR)
506 | try:
507 | # Wait for it to finish, and print something to indicate the
508 | # process is alive, but only if the log file has grown (to
509 | # allow continuous integration environments kill a hanging
510 | # process accurately if it produces no output)
511 | last_blip = time.time()
512 | last_log_size = os.stat(log_filename).st_size
513 | while p.poll() is None:
514 | time.sleep(0.5)
515 | if time.time() - last_blip > 60:
516 | log_size = os.stat(log_filename).st_size
517 | if log_size > last_log_size:
518 | print(" ... build in progress")
519 | last_blip = time.time()
520 | last_log_size = log_size
521 |
522 | ret = p.wait()
523 | except:
524 | p.kill()
525 | p.wait()
526 | raise
527 |
528 | if ret == 0:
529 | print("Build OK")
530 | else:
531 | if not args.show_build_log:
532 | with open(log_filename, 'r') as f:
533 | print(f.read())
534 | print("Build failed!")
535 | sys.exit(1)
536 |
537 | return site_dir, site_dir_noarch
538 |
539 |
540 | def asv_compare_config(bench_path, args, h_commits):
541 | """
542 | Fill the required build options through custom variable
543 | 'numpy_build_options' and return the generated config path.
544 | """
545 | conf_path = os.path.join(bench_path, "asv_compare.conf.json.tpl")
546 | nconf_path = os.path.join(bench_path, "_asv_compare.conf.json")
547 |
548 | # add custom build
549 | build = []
550 | if args.parallel > 1:
551 | build += ["-j", str(args.parallel)]
552 | if args.cpu_baseline:
553 | build += ["--cpu-baseline", args.cpu_baseline]
554 | if args.cpu_dispatch:
555 | build += ["--cpu-dispatch", args.cpu_dispatch]
556 | if args.disable_optimization:
557 | build += ["--disable-optimization"]
558 |
559 | is_cached = asv_substitute_config(conf_path, nconf_path,
560 | numpy_build_options=' '.join([f'\\"{v}\\"' for v in build]),
561 | numpy_global_options=' '.join(
562 | [f'--global-option=\\"{v}\\"' for v in ["build"] + build])
563 | )
564 | if not is_cached:
565 | asv_clear_cache(bench_path, h_commits)
566 | return nconf_path
567 |
568 |
569 | def asv_clear_cache(bench_path, h_commits, env_dir="env"):
570 | """
571 | Force ASV to clear the cache according to specified commit hashes.
572 | """
573 | # FIXME: only clear the cache from the current environment dir
574 | asv_build_pattern = os.path.join(bench_path, env_dir, "*", "asv-build-cache")
575 | for asv_build_cache in glob.glob(asv_build_pattern, recursive=True):
576 | for c in h_commits:
577 | try:
578 | shutil.rmtree(os.path.join(asv_build_cache, c))
579 | except OSError:
580 | pass
581 |
582 |
583 | def asv_substitute_config(in_config, out_config, **custom_vars):
584 | """
585 | A workaround to allow substituting custom tokens within
586 | ASV configuration file since there's no official way to add custom
587 | variables(e.g. env vars).
588 |
589 | Parameters
590 | ----------
591 | in_config : str
592 | The path of ASV configuration file, e.g. '/path/to/asv.conf.json'
593 | out_config : str
594 | The path of generated configuration file,
595 | e.g. '/path/to/asv_substituted.conf.json'.
596 |
597 | The other keyword arguments represent the custom variables.
598 |
599 | Returns
600 | -------
601 | True(is cached) if 'out_config' is already generated with
602 | the same '**custom_vars' and updated with latest 'in_config',
603 | False otherwise.
604 |
605 | Examples
606 | --------
607 | See asv_compare_config().
608 | """
609 | assert in_config != out_config
610 | assert len(custom_vars) > 0
611 |
612 | def sdbm_hash(*factors):
613 | chash = 0
614 | for f in factors:
615 | for char in str(f):
616 | chash = ord(char) + (chash << 6) + (chash << 16) - chash
617 | chash &= 0xFFFFFFFF
618 | return chash
619 |
620 | vars_hash = sdbm_hash(custom_vars, os.path.getmtime(in_config))
621 | try:
622 | with open(out_config, "r") as wfd:
623 | hash_line = wfd.readline().split('hash:')
624 | if len(hash_line) > 1 and int(hash_line[1]) == vars_hash:
625 | return True
626 | except IOError:
627 | pass
628 |
629 | custom_vars = {f'{{{k}}}': v for k, v in custom_vars.items()}
630 | with open(in_config, "r") as rfd, open(out_config, "w") as wfd:
631 | wfd.write(f"// hash:{vars_hash}\n")
632 | wfd.write("// This file is automatically generated by runtests.py\n")
633 | for line in rfd:
634 | for key, val in custom_vars.items():
635 | line = line.replace(key, val)
636 | wfd.write(line)
637 | return False
638 |
639 |
640 | #
641 | # GCOV support
642 | #
643 | def gcov_reset_counters():
644 | print("Removing previous GCOV .gcda files...")
645 | build_dir = os.path.join(ROOT_DIR, 'build')
646 | for dirpath, dirnames, filenames in os.walk(build_dir):
647 | for fn in filenames:
648 | if fn.endswith('.gcda') or fn.endswith('.da'):
649 | pth = os.path.join(dirpath, fn)
650 | os.unlink(pth)
651 |
652 |
653 | #
654 | # LCOV support
655 | #
656 |
657 | LCOV_OUTPUT_FILE = os.path.join(ROOT_DIR, 'build', 'lcov.out')
658 | LCOV_HTML_DIR = os.path.join(ROOT_DIR, 'build', 'lcov')
659 |
660 |
661 | def lcov_generate():
662 | try:
663 | os.unlink(LCOV_OUTPUT_FILE)
664 | except OSError:
665 | pass
666 | try:
667 | shutil.rmtree(LCOV_HTML_DIR)
668 | except OSError:
669 | pass
670 |
671 | print("Capturing lcov info...")
672 | subprocess.call(['lcov', '-q', '-c',
673 | '-d', os.path.join(ROOT_DIR, 'build'),
674 | '-b', ROOT_DIR,
675 | '--output-file', LCOV_OUTPUT_FILE])
676 |
677 | print("Generating lcov HTML output...")
678 | ret = subprocess.call(['genhtml', '-q', LCOV_OUTPUT_FILE,
679 | '--output-directory', LCOV_HTML_DIR,
680 | '--legend', '--highlight'])
681 | if ret != 0:
682 | print("genhtml failed!")
683 | else:
684 | print("HTML output generated under build/lcov/")
685 |
686 |
687 | def check_lint(lint_args):
688 | """
689 | Adds ROOT_DIR to path and performs lint checks.
690 | This functions exits the program with status code of lint check.
691 | """
692 | sys.path.append(ROOT_DIR)
693 | try:
694 | from tools.linter import DiffLinter
695 | except ModuleNotFoundError as e:
696 | print(f"Error: {e.msg}. "
697 | "Install using linter_requirements.txt.")
698 | sys.exit(1)
699 |
700 | uncommitted = lint_args == "uncommitted"
701 | branch = "main" if uncommitted else lint_args
702 |
703 | DiffLinter(branch).run_lint(uncommitted)
704 |
705 |
706 | if __name__ == "__main__":
707 | main(argv=sys.argv[1:])
708 |
--------------------------------------------------------------------------------