├── mprod
    ├── tests
    │   ├── __init__.py
    │   ├── test_base.py
    │   ├── _base_tests.py
    │   └── _utils.py
    ├── decompositions
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test__qr.py
    │   │   └── test_decompositions.py
    │   ├── __init__.py
    │   ├── _qr.py
    │   └── _tsvdm.py
    ├── dimensionality_reduction
    │   ├── tests
    │   │   ├── __init__.py
    │   │   └── test_TCAM.py
    │   ├── __init__.py
    │   └── _tcam.py
    ├── __init__.py
    ├── _misc.py
    ├── _pytester.py
    ├── _pytesttester.py
    ├── _base.py
    └── _ml_helpers.py
├── docs
    ├── _static
    │   └── img
    │   │   ├── mprod_logo.png
    │   │   ├── ._mprod_logo.png
    │   │   ├── mprod_logo_fav.png
    │   │   ├── ._mprod_logo_fav.png
    │   │   ├── mprod_logo_small.png
    │   │   ├── ._mprod_logo_small.png
    │   │   ├── mprod_tcam_cartoon.png
    │   │   └── ._mprod_tcam_cartoon.png
    ├── modules
    │   ├── stubs
    │   │   ├── mprod.x_m3.rst
    │   │   ├── mprod.m_prod.rst
    │   │   ├── mprod.table2tensor.rst
    │   │   ├── mprod.tensor_mtranspose.rst
    │   │   ├── mprod.decompositions.svdm.rst
    │   │   ├── mprod.decompositions.tqrm.rst
    │   │   ├── mprod.MeanDeviationForm.rst
    │   │   └── mprod.dimensionality_reduction.TCAM.rst
    │   ├── classes.rst
    │   ├── mprod.decompositions.rst
    │   ├── mprod.rst
    │   └── mprod.dimensionality_reduction.rst
    ├── examples
    │   ├── data
    │   │   ├── readme_graphs.png
    │   │   ├── single_curve_demo.pdf
    │   │   ├── single_curve_demo.png
    │   │   ├── multiple_curves_demo.png
    │   │   ├── mermaid-diagram-pipeline.png
    │   │   └── ._mermaid-diagram-pipeline.png
    │   ├── intro.ipynb
    │   ├── examples.rst
    │   └── mprod_primer.ipynb
    ├── _templates
    │   ├── class.rst
    │   └── function.rst
    ├── requirements.txt
    ├── environment.yml
    ├── Makefile
    ├── run_livereload.py
    ├── make.bat
    ├── conf.py
    └── index.rst
├── requirements.txt
├── .gitignore
├── .readthedocs.yaml
├── setup.cfg
├── .github
    └── workflows
    │   └── build.yaml
├── LICENSE
├── setup.py
├── azure-pipelines.yml
├── README.md
└── runtests.py


/mprod/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mprod/tests/test_base.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mprod/decompositions/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mprod/decompositions/tests/test__qr.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/mprod/dimensionality_reduction/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/_static/img/mprod_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/mprod_logo.png


--------------------------------------------------------------------------------
/docs/_static/img/._mprod_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/._mprod_logo.png


--------------------------------------------------------------------------------
/docs/_static/img/mprod_logo_fav.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/mprod_logo_fav.png


--------------------------------------------------------------------------------
/docs/modules/stubs/mprod.x_m3.rst:
--------------------------------------------------------------------------------
1 | mprod.x\_m3
2 | ===========
3 | 
4 | .. currentmodule:: mprod
5 | 
6 | .. autofunction:: x_m3


--------------------------------------------------------------------------------
/docs/_static/img/._mprod_logo_fav.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/._mprod_logo_fav.png


--------------------------------------------------------------------------------
/docs/_static/img/mprod_logo_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/mprod_logo_small.png


--------------------------------------------------------------------------------
/docs/examples/data/readme_graphs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/examples/data/readme_graphs.png


--------------------------------------------------------------------------------
/docs/_static/img/._mprod_logo_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/._mprod_logo_small.png


--------------------------------------------------------------------------------
/docs/_static/img/mprod_tcam_cartoon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/mprod_tcam_cartoon.png


--------------------------------------------------------------------------------
/docs/examples/data/single_curve_demo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/examples/data/single_curve_demo.pdf


--------------------------------------------------------------------------------
/docs/examples/data/single_curve_demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/examples/data/single_curve_demo.png


--------------------------------------------------------------------------------
/docs/modules/stubs/mprod.m_prod.rst:
--------------------------------------------------------------------------------
1 | mprod.m\_prod
2 | =============
3 | 
4 | .. currentmodule:: mprod
5 | 
6 | .. autofunction:: m_prod


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.19.2
2 | scikit-learn>=0.24.1
3 | scipy>=1.5.3
4 | dataclasses>=0.7; python_version < '3.7'
5 | pandas>=1.1.5


--------------------------------------------------------------------------------
/docs/_static/img/._mprod_tcam_cartoon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/_static/img/._mprod_tcam_cartoon.png


--------------------------------------------------------------------------------
/docs/examples/data/multiple_curves_demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/examples/data/multiple_curves_demo.png


--------------------------------------------------------------------------------
/docs/examples/data/mermaid-diagram-pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/examples/data/mermaid-diagram-pipeline.png


--------------------------------------------------------------------------------
/docs/examples/data/._mermaid-diagram-pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UriaMorP/mprod_package/HEAD/docs/examples/data/._mermaid-diagram-pipeline.png


--------------------------------------------------------------------------------
/docs/modules/stubs/mprod.table2tensor.rst:
--------------------------------------------------------------------------------
1 | mprod.table2tensor
2 | ==================
3 | 
4 | .. currentmodule:: mprod
5 | 
6 | .. autofunction:: table2tensor


--------------------------------------------------------------------------------
/mprod/dimensionality_reduction/__init__.py:
--------------------------------------------------------------------------------
1 | """.. mprod.dimensionality_reduction
2 | """
3 | 
4 | from ._tcam import TCAM
5 | 
6 | __all__ = [
7 |     "TCAM"
8 | ]


--------------------------------------------------------------------------------
/docs/modules/stubs/mprod.tensor_mtranspose.rst:
--------------------------------------------------------------------------------
1 | mprod.tensor\_mtranspose
2 | ========================
3 | 
4 | .. currentmodule:: mprod
5 | 
6 | .. autofunction:: tensor_mtranspose


--------------------------------------------------------------------------------
/docs/modules/stubs/mprod.decompositions.svdm.rst:
--------------------------------------------------------------------------------
1 | mprod.decompositions.svdm
2 | =========================
3 | 
4 | .. currentmodule:: mprod.decompositions
5 | 
6 | .. autofunction:: svdm


--------------------------------------------------------------------------------
/docs/modules/stubs/mprod.decompositions.tqrm.rst:
--------------------------------------------------------------------------------
1 | mprod.decompositions.tqrm
2 | =========================
3 | 
4 | .. currentmodule:: mprod.decompositions
5 | 
6 | .. autofunction:: tqrm


--------------------------------------------------------------------------------
/mprod/decompositions/__init__.py:
--------------------------------------------------------------------------------
 1 | """.. mprod.decompositions
 2 | """
 3 | 
 4 | from ._tsvdm import svdm
 5 | from ._qr import tqrm
 6 | 
 7 | __all__ = [
 8 |     "svdm",
 9 |     "tqrm"
10 | ]
11 | 


--------------------------------------------------------------------------------
/docs/_templates/class.rst:
--------------------------------------------------------------------------------
 1 | :mod:`{{module}}.{{objname}}`
 2 | {{ underline }}==============
 3 | 
 4 | .. currentmodule:: {{ module }}
 5 | 
 6 | .. autoclass:: {{ objname }}
 7 | 
 8 | .. include:: {{module}}.{{objname}}.examples
 9 | 
10 | .. raw:: html
11 | 
12 |     <div class="clearer"></div>
13 | 


--------------------------------------------------------------------------------
/docs/_templates/function.rst:
--------------------------------------------------------------------------------
 1 | :mod:`{{module}}`.{{objname}}
 2 | {{ underline }}==============
 3 | 
 4 | .. currentmodule:: {{ module }}
 5 | 
 6 | .. :function:: {{ objname }}
 7 | 
 8 | .. include:: {{module}}.{{objname}}.examples
 9 | 
10 | .. raw:: html
11 | 
12 |     <div class="clearer"></div>
13 | 


--------------------------------------------------------------------------------
/docs/modules/stubs/mprod.MeanDeviationForm.rst:
--------------------------------------------------------------------------------
 1 | :mod:`mprod.MeanDeviationForm`
 2 | =====================================
 3 | 
 4 | .. currentmodule:: mprod
 5 | 
 6 | .. autoclass:: MeanDeviationForm
 7 | 
 8 | .. include:: mprod.MeanDeviationForm.examples
 9 | 
10 | .. raw:: html
11 | 
12 |     <div class="clearer"></div>


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | sphinx-gallery==0.9.0
 2 | numpydoc==1.1.0
 3 | sphinxcontrib-bibtex==2.3.0
 4 | sphinx-prompt==1.4.0
 5 | nbsphinx==0.8.6
 6 | sphinx_rtd_theme==0.5.2
 7 | ipykernel==5.4.3
 8 | m2r2==0.3.1
 9 | ipykernel==5.4.3
10 | seaborn==0.11.1
11 | jupyter==1.0.0
12 | myst-parser==0.15.2
13 | livereload==2.6.3
14 | pandoc==2.0.1


--------------------------------------------------------------------------------
/docs/modules/stubs/mprod.dimensionality_reduction.TCAM.rst:
--------------------------------------------------------------------------------
 1 | :mod:`mprod.dimensionality_reduction.TCAM`
 2 | =================================================
 3 | 
 4 | .. currentmodule:: mprod.dimensionality_reduction
 5 | 
 6 | .. autoclass:: TCAM
 7 | 
 8 | .. include:: mprod.dimensionality_reduction.TCAM.examples
 9 | 
10 | .. raw:: html
11 | 
12 |     <div class="clearer"></div>


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | **/.pytest_cache/
 2 | 
 3 | **/__pycache__/
 4 | build.log
 5 | **/.DS_Store
 6 | **/._.DS_Store
 7 | **/.ipynb_checkpoints/
 8 | .coveragerc
 9 | _doc/
10 | coding_stuff/
11 | test_notebooks/
12 | docs/trashed_docs/
13 | docs/_build/
14 | docs/_conf.py
15 | dist/
16 | build/
17 | mprod_package.egg-info/
18 | pytest.xml
19 | 
20 | docs/examples/data/._*.png
21 | doc_exp/
22 | docs_exp/
23 | docs/run_livereload.py
24 | trashed_docs/


--------------------------------------------------------------------------------
/docs/modules/classes.rst:
--------------------------------------------------------------------------------
 1 | =============
 2 | API Reference
 3 | =============
 4 | 
 5 | This is the class and function reference of M product framework. Please refer to
 6 | the full user guide for further details, as the class and
 7 | function raw specifications may not be enough to give full guidelines on their
 8 | uses.
 9 | For reference on concepts repeated across the API, see
10 | 
11 | .. toctree::
12 |    :maxdepth: 4
13 | 
14 |    mprod
15 |    mprod.dimensionality_reduction
16 |    mprod.decompositions
17 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | build:
 4 |   os: ubuntu-22.04
 5 |   tools:
 6 |     python: "3.6"
 7 |   
 8 | 
 9 | python:
10 |   install:
11 |     - requirements: docs/requirements.txt
12 |     - method: pip
13 |       path: .
14 |       extra_requirements:
15 |         - docs
16 |   # # version: "3.6"
17 |   # install:
18 |   #   - method: pip
19 |   #     path: .
20 |   #     extra_requirements:
21 |   #       - docs
22 |   # system_packages: true
23 |   
24 | 
25 | # Build documentation in the docs/ directory with Sphinx
26 | sphinx:
27 |   configuration: docs/conf.py
28 | 


--------------------------------------------------------------------------------
/docs/modules/mprod.decompositions.rst:
--------------------------------------------------------------------------------
 1 | Tensor decompositions
 2 | =====================
 3 | 
 4 | The `mprod.decomposition` module includes tensor decomposition
 5 | algorithms. Currently, the tsvdm and tqrm decompositions is implemented.
 6 | Future plans are to implement additional factorizations such as non negative tensor factorization (NTF)  .
 7 | 
 8 | .. currentmodule:: mprod.decompositions
 9 | 
10 | 
11 | .. autosummary::
12 |    :toctree: stubs
13 | 
14 |     svdm
15 |     tqrm
16 | 
17 | 
18 | .. automodule:: mprod.decompositions
19 |    :members:
20 |    :toctree: stubs
21 |    :undoc-members:
22 |    :show-inheritance:
23 | 


--------------------------------------------------------------------------------
/docs/modules/mprod.rst:
--------------------------------------------------------------------------------
 1 | :mod:`mprod`  base module
 2 | =========================
 3 | 
 4 | 
 5 | The :mod:`mprod` module includes tensor decomposition utilities
 6 | and algorithms, such as  TCAM, MeanDeviationForm.
 7 | 
 8 | .. currentmodule:: mprod
 9 | 
10 | 
11 | 
12 | .. autosummary::
13 |    :toctree: stubs
14 |    :template: class.rst
15 | 
16 |     MeanDeviationForm
17 | 
18 | 
19 | .. autosummary::
20 |    :toctree: stubs
21 | 
22 |    m_prod
23 |    tensor_mtranspose
24 |    x_m3
25 |    table2tensor
26 | 
27 | 
28 | 
29 | .. automodule:: mprod
30 |    :toctree: stubs
31 |    :members:
32 |    :undoc-members:
33 |    :show-inheritance:
34 | 


--------------------------------------------------------------------------------
/docs/environment.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 | - conda-forge
 3 | dependencies:
 4 | - pip=21.0.1
 5 | - python=3.6
 6 | - numpy = 1.19.2
 7 | - scikit-learn = 0.24.1
 8 | - scipy = 1.5.3
 9 | - dataclasses = 0.7
10 | - pandas = 1.1.5
11 | - sphinx-gallery=0.9.0
12 | - numpydoc=1.1.0
13 | - sphinxcontrib-bibtex=2.3.0
14 | - sphinx-prompt=1.4.0
15 | - nbsphinx=0.8.6
16 | - ipykernel=5.4.3
17 | - seaborn=0.11.1
18 | - jupyter=1.0.0
19 | - sphinx-gallery = 0.9.0
20 | - numpydoc = 1.1.0
21 | - sphinxcontrib-bibtex = 2.3.0
22 | - sphinx-prompt = 1.4.0
23 | - nbsphinx = 0.8.6
24 | - sphinx_rtd_theme = 0.5.2
25 | - ipykernel = 5.4.3
26 | - pip:
27 |   - mprod-package
28 | 
29 | 


--------------------------------------------------------------------------------
/docs/modules/mprod.dimensionality_reduction.rst:
--------------------------------------------------------------------------------
 1 | Dimensionality reductions
 2 | =========================
 3 | 
 4 | This module includes tensor dimensionality reduction (tensor to matrix) algorithms.
 5 | Currently the TCAM decomposition is implemented.
 6 | Future plans are to implement tensor-CCA tensor-PLS etc....
 7 | 
 8 | 
 9 | .. currentmodule:: mprod.dimensionality_reduction
10 | 
11 | 
12 | .. autosummary::
13 |    :toctree: stubs
14 |    :template: class.rst
15 | 
16 |     TCAM
17 | 
18 | 
19 | 
20 | .. automodule:: mprod.dimensionality_reduction
21 |    :members:
22 |    :toctree: stubs
23 |    :undoc-members:
24 |    :show-inheritance:
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/mprod/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 
 3 | """
 4 | # import dimensionality_reduction
 5 | # from .dimensionality_reduction._tcam import TCAM
 6 | # import decompositions as decompositions
 7 | # import dimensionality_reduction
 8 | from ._ml_helpers import MeanDeviationForm, table2tensor
 9 | from ._base import m_prod, tensor_mtranspose, x_m3, generate_haar, generate_dct
10 | 
11 | from mprod._pytesttester import PytestTester
12 | test = PytestTester(__name__)
13 | del PytestTester
14 | #
15 | __all__ = [
16 |     "m_prod",
17 |     "tensor_mtranspose",
18 |     "x_m3",
19 |     "MeanDeviationForm",
20 |     "generate_haar",
21 |     "generate_dct",
22 |     "table2tensor",
23 |     "dimensionality_reduction",
24 |     "decompositions"
25 | ]
26 | 
27 | 


--------------------------------------------------------------------------------
/docs/run_livereload.py:
--------------------------------------------------------------------------------
 1 | from livereload import Server, shell
 2 | 
 3 | if __name__ == '__main__':
 4 |     server = Server()
 5 |     server.watch('*.rst', shell('make html'), delay=1)
 6 |     server.watch('modules/*.rst', shell('make html'), delay=1)
 7 |     server.watch('modules/*/*.rst', shell('make html'), delay=1)
 8 |     server.watch('*.md', shell('make html'), delay=1)
 9 |     server.watch('*.py', shell('make html'), delay=1)
10 |     server.watch('*.ipynb', shell('make html'), delay=.1)
11 |     server.watch('examples/*.ipynb', shell('make html'), delay=.1)
12 |     server.watch('_static/*', shell('make html'), delay=1)
13 |     server.watch('_templates/*', shell('make html'), delay=1)
14 |     server.serve(root='_build/html', host="cn240.wexac.weizmann.ac.il", port=8888)
15 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = mprod-package-uriamorP
 3 | version = 0.0.5a1
 4 | author = 
 5 |     Uria Mor
 6 |     Rafael Valdes Mas
 7 |     Yotam Cohen
 8 |     Haim Avron
 9 | project_url = https://github.com/UriaMorP/mprod_package
10 | author_email = uriamo@gmail.com,
11 | description = Software implementation for tensor-tensor m-product framework
12 | long_description_content_type = text/markdown
13 | long_description = file: README.md
14 | license =  BSD
15 | classifiers =
16 |     Development Status :: 3 - Alpha
17 |     Intended Audience :: Scientists
18 |     Programming Language :: Python :: 3.6
19 |     Programming Language :: Python :: 3.7
20 |     Programming Language :: Python :: 3.8
21 |     Programming Language :: Python :: 3.9
22 |     Programming Language :: Python :: 3.10
23 |     Programming Language :: Python :: 3 :: Only
24 | 
25 | 
26 | python_requires = >=3.6.8
27 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yaml:
--------------------------------------------------------------------------------
 1 | name: Tests 
 2 | 
 3 | on: 
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   build:
10 |     runs-on: ${{ matrix.os }}
11 |     strategy:
12 |       matrix:
13 |         python-version: [3.6.8, 3.7, 3.8, 3.9, 3.10.0, 3.11.0, 3.12.0]
14 |         os: [macos-latest, ubuntu-20.04]
15 |     
16 |     steps:
17 |       - uses: actions/checkout@v2
18 |       - name: Build using Python ${{ matrix.python-version }}
19 |         uses: actions/setup-python@v2
20 |         with:
21 |           python-version: ${{ matrix.python-version }}
22 | 
23 |       - name: pip install dependencies [pip]
24 |         run: |
25 |           python -m pip install pip --upgrade pip;
26 |           pip install pytest;
27 |           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
28 |           if [ ${{ matrix.python-version }} == 3.6.8 ]; then pip install dataclasses>=0.7; fi
29 |           pip install -e .
30 | 
31 |       - name: unit tests [pytest]
32 |         run: |
33 |           pytest --show-capture=no -v --disable-warnings --junitxml=pytest.xml
34 | 


--------------------------------------------------------------------------------
/mprod/_misc.py:
--------------------------------------------------------------------------------
 1 | from typing import Iterable, Tuple
 2 | from numpy import ndarray
 3 | 
 4 | 
 5 | def _assert_order(tensor: ndarray, tensor_varname: str, order: int):
 6 |     got_order = len(tensor.shape)
 7 |     assert got_order == order, f"{tensor_varname} must be a order {order} tensor, found order {got_order}"
 8 | 
 9 | 
10 | def _assert_size(tensor: ndarray, tensor_varname: str, axis: int, dim: int):
11 |     got_dim = tensor.shape[axis]
12 |     assert got_dim == dim, f"Dimension {axis} of {tensor_varname} must equal {dim}, found {got_dim}"
13 | 
14 | 
15 | def _assert_order_and_mdim(tensor: ndarray,
16 |                           tensor_varname: str,
17 |                           order: int,
18 |                           dim_inspection_list: Iterable[Tuple[int, int]]):
19 |     """
20 | 
21 |     Parameters
22 |     ----------
23 |     tensor: np.ndarray
24 |         The tensor for inpection
25 |     tensor_varname: str
26 |         The variable name of the tensor as it appears in the code
27 |     order: int
28 |         The intended order of `tensor`
29 |     dim_inspection_list
30 | 
31 | 
32 |     """
33 |     _assert_order(tensor, tensor_varname, order)
34 |     for ax, dim in dim_inspection_list:
35 |         assert ax < order, f"Trying to assert the dimension of mode {ax} of a {order} order tensor {tensor_varname}"
36 |         _assert_size(tensor, tensor_varname, ax, dim)
37 | 
38 | 


--------------------------------------------------------------------------------
/mprod/tests/_base_tests.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import numpy as np
 3 | 
 4 | from mprod import m_prod, x_m3, tensor_mtranspose
 5 | from numpy.testing import (
 6 |     assert_, assert_equal, assert_raises, assert_array_equal,
 7 |     assert_almost_equal, assert_allclose, suppress_warnings,
 8 |     assert_raises_regex, HAS_LAPACK64,
 9 | )
10 | 
11 | 
12 | def gen_m_transpose(mpair):
13 |     mfun, minv = mpair
14 | 
15 |     def _do(a):
16 |         return tensor_mtranspose(a, mfun, minv)
17 | 
18 |     return _do
19 | 
20 | 
21 | def gen_m_product(mpair):
22 |     mfun, minv = mpair
23 | 
24 |     def _do(a, b):
25 |         return m_prod(a, b, mfun, minv)
26 | 
27 |     return _do
28 | 
29 | 
30 | def assert_identity(J, tensor, mproduct):
31 |     tensor2 = mproduct(J, tensor)
32 |     assert_almost_equal(tensor, tensor2)
33 | 
34 | 
35 | def assert_m_orth(tensor, mfun, minv):
36 |     m, p, n = tensor.shape
37 | 
38 |     _t = gen_m_transpose((mfun, minv))
39 |     _m = gen_m_product((mfun, minv))
40 | 
41 |     if m <= p:
42 |         J = _m(tensor, _t(tensor))
43 |     else:
44 |         J = _m(_t(tensor), tensor)
45 | 
46 |     TENSOR_CASES = []
47 |     for mode2_size in range(1, 10, 100):
48 |         for i in range(10):
49 |             rng = np.random.default_rng(seed=i + int(np.log10(mode2_size)))
50 |             TENSOR_CASES.append(rng.random((J.shape[1], mode2_size, n)))
51 | 
52 |     @pytest.mark.parametrize('tens', TENSOR_CASES)
53 |     def _assert_id(tens):
54 |         assert_identity(J, tens, _m)
55 | 
56 | 


--------------------------------------------------------------------------------
/docs/examples/intro.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "raw",
 5 |    "metadata": {
 6 |     "raw_mimetype": "text/restructuredtext"
 7 |    },
 8 |    "source": [
 9 |     ".. _primer:\n",
10 |     "\n",
11 |     "Brief Intro\n",
12 |     "-----------"
13 |    ]
14 |   },
15 |   {
16 |    "cell_type": "code",
17 |    "execution_count": 5,
18 |    "metadata": {},
19 |    "outputs": [],
20 |    "source": [
21 |     "# TODO"
22 |    ]
23 |   },
24 |   {
25 |    "cell_type": "code",
26 |    "execution_count": null,
27 |    "metadata": {},
28 |    "outputs": [],
29 |    "source": []
30 |   }
31 |  ],
32 |  "metadata": {
33 |   "celltoolbar": "Edit Metadata",
34 |   "kernelspec": {
35 |    "display_name": "mprod",
36 |    "language": "python",
37 |    "name": "mprod"
38 |   },
39 |   "language_info": {
40 |    "codemirror_mode": {
41 |     "name": "ipython",
42 |     "version": 3
43 |    },
44 |    "file_extension": ".py",
45 |    "mimetype": "text/x-python",
46 |    "name": "python",
47 |    "nbconvert_exporter": "python",
48 |    "pygments_lexer": "ipython3",
49 |    "version": "3.6.8"
50 |   },
51 |   "toc": {
52 |    "base_numbering": 1,
53 |    "nav_menu": {},
54 |    "number_sections": true,
55 |    "sideBar": true,
56 |    "skip_h1_title": false,
57 |    "title_cell": "Table of Contents",
58 |    "title_sidebar": "Contents",
59 |    "toc_cell": false,
60 |    "toc_position": {},
61 |    "toc_section_display": true,
62 |    "toc_window_display": false
63 |   }
64 |  },
65 |  "nbformat": 4,
66 |  "nbformat_minor": 4
67 | }
68 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2021, UriaMorP
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/mprod/decompositions/tests/test_decompositions.py:
--------------------------------------------------------------------------------
 1 | """ Test functions for decompositions module
 2 | 
 3 | """
 4 | 
 5 | import pytest
 6 | import numpy as np
 7 | 
 8 | from itertools import product
 9 | 
10 | from mprod.decompositions import svdm, tqrm
11 | from mprod.tests._utils import (_make_mprod_op_cases, _make_tensor_cases, gen_m_product, gen_m_transpose, assert_m_orth)
12 | 
13 | from numpy.testing import (
14 |     assert_, assert_equal, assert_raises, assert_array_equal,
15 |     assert_almost_equal, assert_allclose, suppress_warnings,
16 |     assert_raises_regex, HAS_LAPACK64,
17 | )
18 | 
19 | M_FUN_CASES = _make_mprod_op_cases()
20 | TENSOR_CASES = _make_tensor_cases()
21 | 
22 | 
23 | @pytest.mark.parametrize('tensor, m_pair', product(TENSOR_CASES, M_FUN_CASES))
24 | def test_tsvdm(tensor, m_pair):
25 |     mfun, minv = m_pair
26 |     # _m = gen_m_product(m_pair)
27 |     # _t = gen_m_transpose(m_pair)
28 | 
29 |     u, s, v = svdm(tensor, mfun, minv)
30 |     m, p, n = tensor.shape
31 |     rk = min(m, p)
32 | 
33 |     assert s.shape[0] == rk, f"expected shape[0] of s to be {rk}, got {s.shape[0]}"
34 |     assert s.shape[1] == tensor.shape[-1], f"expected shape[1] of s to be {tensor.shape[-1]}, got {s.shape[1]}"
35 | 
36 | 
37 |     # tensor2 = _m(_m(u, s), _t(v))
38 |     shat =  mfun(s)
39 |     us = mfun(u).transpose(2, 0, 1) * shat.T.reshape(n, 1, m)
40 |     usv = np.matmul(us, mfun(v).transpose(2, 1, 0))
41 |     usv = usv.transpose(1, 2, 0)
42 |     tensor2 = minv(usv)
43 |     assert_almost_equal(tensor, tensor2)
44 | 
45 |     assert_m_orth(u, *m_pair)
46 |     assert_m_orth(v, *m_pair)
47 | 
48 | 
49 | @pytest.mark.parametrize('tensor, m_pair', product(TENSOR_CASES, M_FUN_CASES))
50 | def test_tqrm(tensor, m_pair):
51 |     mfun, minv = m_pair
52 | 
53 |     _m = gen_m_product(m_pair)
54 |     _t = gen_m_transpose(m_pair)
55 | 
56 |     Q, R = tqrm(tensor, mfun, minv)
57 | 
58 |     tensor2 = _m(Q, R)
59 |     assert_almost_equal(tensor, tensor2)
60 | 
61 |     assert_m_orth(Q, *m_pair)
62 | 


--------------------------------------------------------------------------------
/mprod/decompositions/_qr.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from typing import Tuple, Dict
 3 | 
 4 | from mprod._base import NumpynDArray, MatrixTensorProduct
 5 | 
 6 | 
 7 | def tqrm(tens_a: np.ndarray, fun_m: MatrixTensorProduct, inv_m: MatrixTensorProduct, hats: bool = False) \
 8 |         -> Tuple[NumpynDArray, NumpynDArray]:
 9 |     """
10 |     The ``tqrm`` implements tensor-QR decomposition:
11 |     ``Q,R = tqrm(tensor_a, m, inv_m)`` where ``Q`` is M-orthogonal tensor of shape ``(m,m,n)`` and ``R`` is f-upper
12 |     triangular tensor of shape ``(m,p,n)``
13 | 
14 |     Parameters
15 |     ----------
16 |     tens_a: np.ndarray
17 |         Tensor of shape ``(m,p,n)``
18 |     fun_m: MatrixTensorProduct
19 |         Invertible mat-vec operation for transforming ``tens_a`` tube fibers
20 |     inv_m: MatrixTensorProduct
21 |         Invertible mat-vec operation for transforming ``tens_a`` tube fibers. This operation is the inverse of ``fun_m``
22 |     hats: bool
23 |         Setting this to ``True`` will cause the function to return the tqrm factors in the tensor domain transform.
24 | 
25 |     Returns
26 |     -------
27 |     tens_q: np.ndarray
28 |         M-orthogonal tensor of shape ``(m,m,n)``
29 |     tens_r: np.ndarray
30 |         f-upper triangular tensor of shape ``(m,p,n)``
31 | 
32 |     """
33 | 
34 |     m, p, n = tens_a.shape
35 |     a_hat = fun_m(tens_a)
36 | 
37 |     q_hat = np.zeros((m, m, n))
38 |     r_hat = np.zeros((m, p, n))
39 |     k = 0
40 | 
41 |     for i in range(n):
42 |         qq, rr = np.linalg.qr(a_hat[:, :, i])
43 | 
44 |         qs1, qs2 = qq.shape
45 |         rs1, rs2 = rr.shape
46 | 
47 |         q_hat[:qs1, :qs2, i] = np.copy(qq)
48 |         r_hat[:rs1, :rs2, i] = np.copy(rr)
49 | 
50 |         k = max(k, max(qs2, rs1))
51 | 
52 |     # truncate sizes
53 |     q_hat = q_hat[:, :k, :]
54 |     r_hat = r_hat[:k, :, :]
55 | 
56 |     if hats:
57 |         return q_hat, r_hat
58 | 
59 |     tens_q = inv_m(q_hat)
60 |     tens_r = inv_m(r_hat)
61 | 
62 |     return tens_q, tens_r
63 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open('README.md') as readme_file:
 4 |     readme = readme_file.read()
 5 | 
 6 | configuration = {
 7 |     "author": 'Uria Mor, Rafael Valdes Mas, Yotam Cohen, Haim Avron',
 8 |     "author_email": "uriamo@gmail.com",
 9 |     "description": "Software implementation for tensor-tensor m-product framework",
10 |     "long_description_content_type": 'text/markdown',
11 |     "license": "BSD",
12 |     "classifiers": [  # Optional
13 |         'Development Status :: 3 - Alpha',
14 |         'Intended Audience :: Science/Research',
15 |         'Programming Language :: Python :: 3.6',
16 |         'Programming Language :: Python :: 3.7',
17 |         'Programming Language :: Python :: 3.8',
18 |         'Programming Language :: Python :: 3.9',
19 |         "Programming Language :: Python :: 3.10",
20 |         'Programming Language :: Python :: 3 :: Only',
21 |     ],
22 |     "keywords": ["Tensor", "multi way"
23 |         , "omics", "longitudinal"
24 |         , "factorization", "analysis"
25 |         , "TCA", "TCAM", "PCA", "M product"
26 |         , "tensor tensor product"
27 |         , "tSVD", "tSVDM", "tensor decomposition"],
28 |     "name": 'mprod-package',
29 |     "version": '0.0.5a1',
30 |     "packages": find_packages(),
31 |     "long_description": readme,
32 |     "url": "https://github.com/UriaMorP/mprod_package",
33 |     "python_requires": '>=3.6.8',
34 |     "install_requires": [
35 |         "numpy >= 1.19.2",
36 |         "scikit-learn >= 0.24.1",
37 |         "scipy >= 1.5.3",
38 |         "dataclasses >= 0.7; python_version < '3.7'",
39 |         "pandas >= 1.1.5"
40 |     ],
41 |     "extras_require": {
42 |         "dev": ["pytest==6.2.2", ],
43 |         "docs": [
44 |             "sphinx-gallery == 0.9.0",
45 |             "numpydoc == 1.1.0",
46 |             "sphinxcontrib-bibtex == 2.3.0",
47 |             "sphinx-prompt == 1.4.0",
48 |             "nbsphinx == 0.8.6",
49 |             "ipykernel == 5.4.3",
50 |             "seaborn == 0.11.1",
51 |             "jupyter == 1.0.0",
52 |             "myst-parser == 0.15.2",
53 |             "m2r2 == 0.3.1",
54 |             "livereload == 2.6.3",
55 |             "pandoc == 2.0.1",
56 |         ]
57 |     }
58 | }
59 | 
60 | setup(**configuration)
61 | 


--------------------------------------------------------------------------------
/mprod/tests/_utils.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import numpy as np
 3 | 
 4 | from mprod import m_prod, x_m3, tensor_mtranspose, generate_dct
 5 | from numpy.testing import (
 6 |     assert_, assert_equal, assert_raises, assert_array_equal,
 7 |     assert_almost_equal, assert_allclose, suppress_warnings,
 8 |     assert_raises_regex, HAS_LAPACK64,
 9 | )
10 | 
11 | 
12 | m, p, n = 100, 300, 10
13 | 
14 | 
15 | def _make_mprod_op_cases():
16 |     mprod_cases = []
17 |     for i in range(2):
18 |         rng = np.random.default_rng(seed=i)
19 |         mat = rng.random((n, n))
20 |         mat_m = np.linalg.qr(mat)[0]
21 |         mfun = x_m3(mat_m)
22 |         minv = x_m3(mat_m.T)
23 |         mprod_cases.append((mfun,minv))
24 |         
25 |     # add dct based transforms
26 |     mfun, minv = generate_dct(n)
27 |     mprod_cases.append((mfun,minv))
28 |     return mprod_cases
29 | 
30 | 
31 | def _make_tensor_cases():
32 |     tensor_cases = []
33 | 
34 |     for i in range(2):
35 |         rng = np.random.default_rng(seed=i)
36 |         X = rng.random((m, p, n))
37 |         tensor_cases.append(X)
38 |     return tensor_cases
39 | 
40 | 
41 | def gen_m_transpose(mpair):
42 |     mfun, minv = mpair
43 | 
44 |     def _do(a):
45 |         return tensor_mtranspose(a, mfun, minv)
46 | 
47 |     return _do
48 | 
49 | 
50 | def gen_m_product(mpair):
51 |     mfun, minv = mpair
52 | 
53 |     def _do(a, b):
54 |         return m_prod(a, b, mfun, minv)
55 | 
56 |     return _do
57 | 
58 | 
59 | def assert_identity(J, tensor, mproduct):
60 |     tensor2 = mproduct(J, tensor)
61 |     assert_almost_equal(tensor, tensor2)
62 | 
63 | 
64 | def assert_m_orth(tensor, mfun, minv):
65 |     m, p, n = tensor.shape
66 | 
67 |     _t = gen_m_transpose((mfun, minv))
68 |     _m = gen_m_product((mfun, minv))
69 | 
70 |     if m <= p:
71 |         J = _m(tensor, _t(tensor))
72 |     else:
73 |         J = _m(_t(tensor), tensor)
74 | 
75 |     TENSOR_CASES = []
76 |     for mode2_size in range(1, 10, 100):
77 |         for i in range(2):
78 |             rng = np.random.default_rng(seed=i + int(np.log10(mode2_size)))
79 |             TENSOR_CASES.append(rng.random((J.shape[1], mode2_size, n)))
80 | 
81 |     @pytest.mark.parametrize('tens', TENSOR_CASES)
82 |     def _assert_id(tens):
83 |         assert_identity(J, tens, _m)
84 | 


--------------------------------------------------------------------------------
/azure-pipelines.yml:
--------------------------------------------------------------------------------
 1 | # Python package
 2 | # Create and test a Python package on multiple Python versions.
 3 | # Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more:
 4 | # https://docs.microsoft.com/azure/devops/pipelines/languages/python
 5 | 
 6 | trigger:
 7 | - main
 8 | 
 9 | #jobs:
10 | # - job: Linux
11 | pool:
12 |   vmImage: 'ubuntu-latest'
13 | strategy:
14 |   matrix:
15 |     Python36:
16 |       python.version: '3.6'
17 |     Python37:
18 |       python.version: '3.7'
19 |     Python38:
20 |       python.version: '3.8'
21 |     Python39:
22 |       python.version: '3.9'
23 |     Python310:
24 |       python.version: '3.10'
25 | steps:
26 |   - task: UsePythonVersion@0
27 |     inputs:
28 |       versionSpec: '$(python.version)'
29 |     displayName: 'Use Python $(python.version)'
30 | 
31 |   - script: |
32 |       python -m pip install --upgrade pip
33 |       pip install -r requirements.txt
34 |     displayName: 'Install dependencies'
35 | 
36 |   - script: |
37 |       pip install -e .
38 |     displayName: 'Install package'
39 | 
40 |   - script: |
41 |       pip install pytest pytest-azurepipelines
42 |       pytest --show-capture=no -v --disable-warnings --junitxml=pytest.xml
43 |     displayName: 'pytest'
44 | 
45 |   - task: PublishTestResults@2
46 |     inputs:
47 |       testResultsFiles: 'pytest.xml'
48 |       testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)'
49 |     condition: succeededOrFailed()
50 | 
51 | 
52 |   # - job: MacOS
53 |   #   pool:
54 |   #     vmImage: 'macOS-latest'
55 |   #   strategy:
56 |   #     matrix:
57 |   #       Python36:
58 |   #         python.version: '3.6'
59 |   #       Python37:
60 |   #         python.version: '3.7'
61 |   #       Python38:
62 |   #         python.version: '3.8'
63 |   #       Python39:
64 |   #         python.version: '3.9'
65 |   #       Python310:
66 |   #         python.version: '3.10'
67 |   #   steps:
68 |   #     - task: UsePythonVersion@0
69 |   #       inputs:
70 |   #         versionSpec: '$(python.version)'
71 |   #       displayName: 'Use Python $(python.version)'
72 | 
73 |   #     - script: |
74 |   #         python -m pip install --upgrade pip
75 |   #         pip install -r requirements.txt
76 |   #       displayName: 'Install dependencies'
77 |   #     - script: |
78 |   #         pip install -e .
79 |   #       displayName: 'Install package'
80 |   #     - script: |
81 |   #         pip install pytest pytest-azurepipelines
82 |   #         pytest --show-capture=no -v --disable-warnings --junitxml=pytest.xml
83 |   #       displayName: 'pytest'
84 |   #     - task: PublishTestResults@2
85 |   #       inputs:
86 |   #         testResultsFiles: 'pytest.xml'
87 |   #         testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)'
88 |   #       condition: succeededOrFailed()
89 | 


--------------------------------------------------------------------------------
/docs/examples/examples.rst:
--------------------------------------------------------------------------------
 1 | .. _tutorials:
 2 | 
 3 | =========
 4 | Tutorials
 5 | =========
 6 | 
 7 | .. rubric:: Scope and intention
 8 | 
 9 | This page presents a collection of tutorial written by the authors of mprod package
10 | and intended to help newcomers in incorporating the machinery offered by the library
11 | in their analysis workflows.
12 | 
13 | The main (and only) data-scientific tool currently implemented is the TCAM
14 | dimensionality reduction algorithm :footcite:p:`mor2021`. We intend to keep expanding the
15 | package content by adding :math:`\star_{\mathbf{M}}`-product based tools
16 | (such as tensor-PLS, tensor-CCA), and we encourage any form of collaboration,
17 | hoping to get good responses, feedback and help from the data-science community.
18 | 
19 | .. rubric:: Target audience
20 | 
21 | We do not expect expertise in Machine Learning, or data science, in order to use this package.
22 | In fact, it is aimed at non-experts
23 | 
24 | That said, the library is not - by any means - meant to serve as a **black magic tensor package for dummies**.
25 | Just like with almost everything in machine-learning, using this library for ML related tasks require **some** general
26 | mathematical understanding of ML concepts.
27 | The implementation of dimensionality reduction methods (currently TCAM), is made consistent with
28 | `scikit-learn <https://scikit-learn.org/>`_ library to the maximum possible extent, in order to enable smooth
29 | integration within the pythonic ML ecosystem.
30 | For this reason, the users are assumed to know the `scikit-learn <https://scikit-learn.org/>`_ library.
31 | Scikit-learn package offers fantastic documentation, tutorials and examples that are more than enough in order to get
32 | started with machine learning in no time.
33 | 
34 | .. note::
35 | 
36 |    We acknowledge that many potential users might find R more familiar.
37 |    However, we urge them to take the time and try the alternative.
38 | 
39 | In addition, deep understanding of the mathematical theory underlying  mprod based tensor algorithms is always a good
40 | idea. Bellow, you can find a short :ref:`Primer` section  about the idea behind tensor-tensor algebra via the
41 | :math:`\star_{\bf{M}}` -product framework (For a thorough introduction, we refer the interested readers to
42 | :footcite:p:`Kilmer`)
43 | 
44 | The :ref:`TCAM` section contains tutorials for working with :class:`mprod.dimensionality_reduction.TCAM`.
45 | For construction and showcase of TCAM refer to :footcite:p:`mor2021`
46 | 
47 | 
48 | --------------------------------
49 | 
50 | 
51 | .. _TCAM:
52 | 
53 | ----
54 | TCAM
55 | ----
56 | .. toctree::
57 |    :maxdepth: 8
58 | 
59 |    basic_example
60 |    supervised_learning
61 | 
62 | .. Schirmer2018
63 | 
64 | 
65 | 
66 | 
67 | .. _Primer:
68 | 
69 | ------------
70 | ⚙ Background
71 | ------------
72 | .. toctree::
73 |    :maxdepth: 4
74 | 
75 |    mprod_primer
76 | 
77 | 
78 | .. footbibliography::


--------------------------------------------------------------------------------
/mprod/dimensionality_reduction/tests/test_TCAM.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import numpy as np
 3 | 
 4 | from itertools import product
 5 | 
 6 | from mprod.dimensionality_reduction import TCAM
 7 | from mprod import MeanDeviationForm
 8 | from mprod.tests._utils import (_make_mprod_op_cases, _make_tensor_cases, gen_m_product, gen_m_transpose, assert_m_orth,
 9 |                                 m, n, p)
10 | 
11 | from numpy.testing import (
12 |     assert_, assert_equal, assert_raises, assert_array_equal,
13 |     assert_almost_equal, assert_allclose, suppress_warnings,
14 |     assert_raises_regex, HAS_LAPACK64,
15 | )
16 | 
17 | M_FUN_CASES = _make_mprod_op_cases()[:1]
18 | TENSOR_CASES = _make_tensor_cases()[:1]
19 | 
20 | @pytest.mark.parametrize('X', TENSOR_CASES)
21 | @pytest.mark.parametrize('n_components', np.linspace(1, min(m, p) * n - 1, 3, dtype=int))
22 | @pytest.mark.parametrize('mpair', M_FUN_CASES + [None])
23 | def test_tcam_fit_transform(X, n_components, mpair):
24 |     print(min(m, p) * n - 1)
25 |     if mpair is None:
26 |         tca = TCAM(n_components=n_components)
27 |     else:
28 |         mfun, minv = mpair
29 |         tca = TCAM(fun_m=mfun, inv_m=minv, n_components=n_components)
30 | 
31 |     X_r = tca.fit(X).transform(X)
32 |     assert X_r.shape[1] == n_components
33 | 
34 |     # check the equivalence of fit.transform and fit_transform
35 |     X_r2 = tca.fit_transform(X)
36 |     assert_allclose(X_r, X_r2)
37 |     # X_r = tca.transform(X)
38 |     assert_allclose(X_r, X_r2)
39 | 
40 | 
41 | @pytest.mark.parametrize('X', TENSOR_CASES)
42 | @pytest.mark.parametrize('n_components', np.linspace(.1, 1., 3, dtype=float))
43 | @pytest.mark.parametrize('mpair', M_FUN_CASES + [None])
44 | def test_tcam_reconstruction_err(X, n_components, mpair):
45 |     print(min(m, p) * n - 1)
46 |     if mpair is None:
47 |         tca = TCAM(n_components=n_components)
48 | 
49 |     else:
50 |         mfun, minv = mpair
51 |         tca = TCAM(fun_m=mfun, inv_m=minv, n_components=n_components)
52 |     # check the shape of fit.transform
53 |     Y = tca.fit(X).transform(X)
54 |     X2 = tca.inverse_transform(Y)
55 | 
56 |     assert np.round(1 - ((X2 - X) ** 2).sum() / (X ** 2).sum(), 20) >= n_components
57 | 
58 | 
59 | @pytest.mark.parametrize('X', TENSOR_CASES)
60 | @pytest.mark.parametrize('n_components', range(1, min(m, p) * n - 1, 200))
61 | @pytest.mark.parametrize('mpair', M_FUN_CASES + [None])
62 | def test_tcam_residue_m_orth(X, n_components, mpair):
63 |     print(min(m, p) * n - 1)
64 |     if mpair is None:
65 |         tca = TCAM(n_components=n_components)
66 | 
67 |     else:
68 |         mfun, minv = mpair
69 |         tca = TCAM(fun_m=mfun, inv_m=minv, n_components=n_components)
70 | 
71 |     Y = tca.fit(X).transform(X)
72 |     X2 = tca.inverse_transform(Y)
73 |     _t = gen_m_transpose((tca.fun_m, tca.inv_m))
74 | 
75 |     res_prod_norm = (tca._mprod(_t(X - X2), X2) ** 2).sum()
76 |     assert_almost_equal(res_prod_norm, 0, err_msg=f"got {res_prod_norm} instead of 0", verbose=True, )
77 | 
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for the Sphinx documentation builder.
  2 | #
  3 | # This file only contains a selection of the most common options. For a full
  4 | # list see the documentation:
  5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
  6 | 
  7 | # -- Path setup --------------------------------------------------------------
  8 | 
  9 | # If extensions (or modules to document with autodoc) are in another directory,
 10 | # add these directories to sys.path here. If the directory is relative to the
 11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 12 | #
 13 | import os
 14 | import sys
 15 | import sphinx_gallery
 16 | sys.path.insert(0, os.path.abspath('.'))
 17 | sys.path.insert(0, os.path.abspath('../'))
 18 | import sphinx_rtd_theme
 19 | 
 20 | # -- Project information -----------------------------------------------------
 21 | 
 22 | project = 'mprod'
 23 | copyright = '2021, Elinav&Avron groups'
 24 | author = 'Uria Mor'
 25 | 
 26 | # -- General configuration ---------------------------------------------------
 27 | 
 28 | # Add any Sphinx extension module names here, as strings. They can be
 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 30 | # ones.
 31 | # extensions = [
 32 | #     'sphinx.ext.autodoc',
 33 | #     'sphinx.ext.viewcode',
 34 | #     'sphinx.ext.todo',
 35 | #     'sphinx.ext.autodoc',
 36 | #     'sphinx.ext.imgmath',
 37 | #     'sphinx.ext.napoleon'
 38 | # ]
 39 | 
 40 | extensions = [
 41 |     "sphinx.ext.autodoc",
 42 |     "sphinx.ext.autosummary",
 43 |     "numpydoc",
 44 |     'sphinx.ext.viewcode',
 45 |     # "sphinx.ext.linkcode",
 46 |     "sphinx.ext.doctest",
 47 |     "sphinx.ext.intersphinx",
 48 |     "sphinx.ext.mathjax",
 49 |     "sphinxcontrib.bibtex",
 50 |     # 'sphinx.ext.imgmath',
 51 |     # "sphinx.ext.imgconverter",
 52 |     # "sphinx_gallery.gen_gallery",
 53 |     "sphinx-prompt",
 54 |     'sphinx.ext.napoleon',
 55 |     "nbsphinx",
 56 |     # "myst_parser",
 57 |     'm2r2',
 58 | ]
 59 | 
 60 | bibtex_bibfiles = ['refs.bib']
 61 | 
 62 | # The suffix(es) of source filenames.
 63 | # You can specify multiple suffix as a list of string:
 64 | #
 65 | # source_suffix = ['.rst', '.md']
 66 | source_suffix = [".rst", ".ipynb", ".md"]
 67 | 
 68 | # do not execute cells
 69 | nbsphinx_execute = "always"
 70 | nbsphinx_kernel_name = 'python3'
 71 | # nbsphinx_execute = "never"
 72 | 
 73 | 
 74 | # allow errors because not all tutorials build
 75 | nbsphinx_allow_errors = True
 76 | 
 77 | # napoleon related
 78 | 
 79 | napoleon_google_docstring = False
 80 | napoleon_use_param = False
 81 | napoleon_use_ivar = True
 82 | 
 83 | # Add any paths that contain templates here, relative to this directory.
 84 | templates_path = ['_templates']
 85 | 
 86 | # generate autosummary even if no references
 87 | autosummary_generate = False
 88 | 
 89 | # The language for content autogenerated by Sphinx. Refer to documentation
 90 | # for a list of supported languages.
 91 | #
 92 | # This is also used if you do content translation via gettext catalogs.
 93 | # Usually you set "language" from the command line for these cases.
 94 | language = 'en'
 95 | 
 96 | # List of patterns, relative to source directory, that match files and
 97 | # directories to ignore when looking for source files.
 98 | # This pattern also affects html_static_path and html_extra_path.
 99 | exclude_patterns = ['_build',
100 |                     'Thumbs.db',
101 |                     '.DS_Store',
102 |                     'trashed_docs',
103 |                     '.ipynb_checkpoints',
104 |                     "examples/.ipynb_checkpoints"]
105 | 
106 | autodoc_default_options = {"members": True, "inherited-members": False, "methods": True}
107 | 
108 | # -- Options for HTML output -------------------------------------------------
109 | 
110 | # The theme to use for HTML and HTML Help pages.  See the documentation for
111 | # a list of builtin themes.
112 | #
113 | # html_theme = 'alabaster'
114 | html_theme = 'sphinx_rtd_theme'
115 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
116 | # html_theme_options = {"logo_only": True}
117 | # html_logo = "_static/img/mprod_logo_small.png"
118 | # html_favicon = "_static/img/mprod_logo_fav.png"
119 | 
120 | # Add any paths that contain custom static files (such as style sheets) here,
121 | # relative to this directory. They are copied after the builtin static files,
122 | # so a file named "default.css" will overwrite the builtin "default.css".
123 | html_static_path = ['_static']
124 | 
125 | # -- Extension configuration -------------------------------------------------
126 | 
127 | # -- Options for todo extension ----------------------------------------------
128 | 
129 | # If true, `todo` and `todoList` produce output, else they produce nothing.
130 | todo_include_todos = False
131 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
  1 | .. mprod documentation master file, created by
  2 |    sphinx-quickstart on Sun Aug  1 10:11:11 2021.
  3 |    You can adapt this file completely to your liking, but it should at least
  4 |    contain the root `toctree` directive.
  5 | 
  6 | ..
  7 |    _.. figure:: _static/img/mprod_logo_fav.png
  8 | 
  9 | 
 10 | ===================================================
 11 | :code:`mprod`\: Tensor - Tensor algebraic framework
 12 | ===================================================
 13 | 
 14 | mprod is a software implementation for tensor-tensor algebraic framework derived from the
 15 | :math:`\star_{\bf{M}}`-product :footcite:p:`Kilmer`.
 16 | The package builds on NumPy\ :footcite:p:`Harris2020` and Scipy\ :footcite:p:`Virtanen2020` libraries to realize
 17 | core operations and components required for the algebraic framework.
 18 | 
 19 | 
 20 | :mod:`mprod-package` implements the fundamental components required for the :math:`\star_{\mathbf{M}}`-product algebraic
 21 | framework; tensor-transpose, tensor-matrix multiplication (domain transforms), face-wise tensor multiplication, and, of
 22 | course, the :math:`\star_{\mathbf{M}}` tensor-tensor product (See intro)
 23 | 
 24 | In addition, the library offers several basic tensor factorizations such as :mod:`mprod.decompostions.tsvdm`
 25 | :footcite:p:`Kilmer` , and :math:`\star_{\mathbf{M}}`-product based dimensionality reduction methods like the
 26 | :mod:`mprod.dimensionality_reduction.TCAM` :footcite:p:`mor2021`
 27 | 
 28 | 
 29 | .. figure:: _static/img/mprod_tcam_cartoon.png
 30 |       :alt: TCAM cartoon
 31 |       :class: with-shadow
 32 |       :width: 90%
 33 |       :align: center
 34 | 
 35 |       An introductory cartoon for the TCAM :footcite:p:`mor2021` - an :math:`\star_{\mathbf{M}}`-product based
 36 |       dimensionality reduction method for multi-way data.
 37 | 
 38 | You can find the software `on github <https://github.com/UriaMorP/mprod_package/>`_.
 39 | 
 40 | 
 41 | 
 42 | ---------------------------------------------------------
 43 | 
 44 | 
 45 | **Installation**
 46 | ================
 47 | 
 48 | Conda install, with the great help of the conda-forge team:
 49 | 
 50 | .. code:: bash
 51 | 
 52 |     conda install -c conda-forge mprod-package
 53 | 
 54 | The conda-forge packages are available for Linux, OS X, and Windows 64 bit. Local testing was done only on Linux.
 55 | 
 56 | PyPI install, presuming you have requirements installed (numpy, scipy, pandas, scikit-learn) installed:
 57 | 
 58 | .. code:: bash
 59 | 
 60 |     pip install mprod-package
 61 | 
 62 | -------------------------------------------------------------
 63 | 
 64 | Scientific context
 65 | ------------------
 66 | 
 67 | *We live in a multi-dimensional world, immersed in huge volumes of data. This data often involves complex interlinked
 68 | structures that span across multiple dimensions. Processes and phenomena also exhibit multi-dimensional behavior,
 69 | requiring their models to operate in high dimensional settings*\ .
 70 | 
 71 | *Typically, we use matrix algebra to manipulate data, in so-called vector embedded spaces. But such representations
 72 | usually don’t take into account the underlying integrity of an object’s dimension, either missing out on high-order
 73 | links that go beyond pairwise relations or requiring an overhead in encoding such relations. This is where tensor
 74 | algebra comes into play, addressing multiple dimensions*\ .
 75 | 
 76 | *But there is a problem. Despite a broad consensus, distilled over centuries of mathematical research, for matrix
 77 | algebra, there is no such standard for its multidimensional counterpart, tensor algebra. There have been several
 78 | propositions for tensor algebra frameworks over the years* :footcite:p:`Kolda2009`. *Existing techniques that decompose
 79 | tensor constructs into simpler tangible entities have limitations and inconsistencies compared to matrix algebra*
 80 | :footcite:p:`Hitchcock1927,DeLathauwer2000,Oseledets2011,Tuck1963a`. *These issues have been hindering broad
 81 | adoption of tensor algebra into mainstream use*\ .
 82 | 
 83 | **The tensor-tensor** :math:`\star_{\bf{M}}`\ **-product framework aims to change that**\ .
 84 | 
 85 | *The paper* “**Tensor-Tensor Algebra for Optimal Representationand Compression of Multiway Data**”
 86 | :footcite:p:`Kilmer` *describes a way to bridge the gap between matrix and tensor algebra, resulting in new algebraic
 87 | constructs that natively represent and manipulate high-dimensional entities, while preserving their multi-order
 88 | integrity*\ .
 89 | 
 90 |   -- \ **Lior Horesh, IBM research** :footcite:p:`LHoresh`
 91 | 
 92 | -------------------------
 93 | 
 94 | 
 95 | .. toctree::
 96 |    :caption: Contents
 97 | 
 98 |    examples/examples
 99 |    modules/classes
100 | 
101 | -------------------------
102 | 
103 | Indices and tables
104 | ==================
105 | 
106 | * :ref:`genindex`
107 | * :ref:`modindex`
108 | * :ref:`search`
109 | 
110 | ----------------------
111 | 
112 | .. footbibliography::
113 | 


--------------------------------------------------------------------------------
/mprod/decompositions/_tsvdm.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from typing import Tuple, Dict
  3 | 
  4 | from mprod._base import NumpynDArray, MatrixTensorProduct
  5 | 
  6 | 
  7 | def svdm(tens_a: np.ndarray, fun_m: MatrixTensorProduct, inv_m: MatrixTensorProduct
  8 |          , hats: bool = False) \
  9 |         -> Tuple[NumpynDArray, NumpynDArray, NumpynDArray]:
 10 |     """
 11 |     The svdm function is a helper function for computing the tsvdmII.
 12 |     This function does the **THIN** tsvdm: 
 13 |     ``u,s,b = tsvdm(tensor_a, m, inv_m)`` where ``u,v`` are
 14 |     ``(m,k,n)`` and ``(p,k,n)`` M-orthogonal tensors and ``s``
 15 |     is an f-diagonal tensor of shape ``(k,k,n)`` and ``k=min(p,m)``
 16 | 
 17 |     Parameters
 18 |     ----------
 19 |     tens_a: np.ndarray
 20 |         Tensor of shape ``(m,p,n)``
 21 |     fun_m: MatrixTensorProduct
 22 |         Invertible mat-vec operation for transforming ``tens_a`` tube fibers
 23 |     inv_m: MatrixTensorProduct
 24 |         Invertible mat-vec operation for transforming ``tens_a`` tube fibers.
 25 |         This operation is the inverse of ``fun_m``
 26 |     hats: bool
 27 |         Setting this to ``True`` will cause the function to return the tsvdm
 28 |         factors in the tensor domain transform.
 29 | 
 30 |     Returns
 31 |     -------
 32 |     tens_u: np.ndarray
 33 |         M-orthogonal tensor of shape ``(m,k,n)``
 34 |     tens_s: np.ndarray
 35 |         A ``(k,n)`` matrix representation of the f-diagonal tensor of
 36 |         shape ``(k,k,n)``
 37 |     tens_v: np.ndarray
 38 |         M-orthogonal Tensor of shape ``(p,k,n)``
 39 | 
 40 |     """
 41 |     m, p, n = tens_a.shape
 42 |     a_hat = fun_m(tens_a)
 43 | 
 44 |     # The code bellow is a super efficient numpy trick for performing the following
 45 |     #
 46 |     # u_hat = np.zeros((m, m, n))
 47 |     # s_hat = np.zeros((m, p, n))
 48 |     # v_hat = np.zeros((p, p, n))
 49 |     #
 50 |     # for i in range(n):
 51 |     #     uu, ss, vt = np.linalg.svd(a_hat[:, :, i], full_matrices=False)
 52 |     #
 53 |     #     us1, us2 = uu.shape
 54 |     #     vs1, vs2 = vt.shape
 55 |     #
 56 |     #     ssize = ss.size
 57 |     #     s_hat[:ssize, :ssize, i] = np.diag(ss)
 58 |     #     u_hat[:us1, :us2, i] = uu.copy()
 59 |     #     v_hat[:vs2, :vs1, i] = vt.T.copy()
 60 | 
 61 |     u_hat, s_hat, v_hat = np.linalg.svd(a_hat.transpose(2, 0, 1), full_matrices=False)
 62 |     u_hat, s_hat, v_hat = u_hat.transpose(1, 2, 0), s_hat.transpose(), v_hat.transpose(2, 1, 0)
 63 | 
 64 |     # sreshape = s_hat.copy().reshape(1, m, n)
 65 |     # sreshape = sreshape.transpose(1, 0, 2)
 66 |     # idreshape = np.eye(m, p).reshape(m, p, 1)
 67 | 
 68 |     # s_hat = idreshape @ sreshape
 69 | 
 70 |     if hats:
 71 |         return u_hat, s_hat, v_hat
 72 | 
 73 |     u = inv_m(u_hat)
 74 |     v = inv_m(v_hat)
 75 |     s = inv_m(s_hat)
 76 | 
 77 |     return u, s, v
 78 | 
 79 | 
 80 | def tsvdmii(tens_a: NumpynDArray,
 81 |             fun_m: MatrixTensorProduct,
 82 |             inv_m: MatrixTensorProduct,
 83 |             gamma: float = 1,
 84 |             n_components: int = None) -> \
 85 |         Tuple[Dict[int, NumpynDArray], Dict[int, NumpynDArray], Dict[int, NumpynDArray], float, Dict[int, int], int]:
 86 |     assert not ((gamma is not None) and (
 87 |             n_components is not None)), "Arguments gamma and n_components are mutually exclusive"
 88 |     assert (gamma is not None) or (
 89 |             n_components is not None), "Exactely one of arguments gamma, n_components must be defined"
 90 | 
 91 |     m, p, n = tens_a.shape
 92 | 
 93 |     # execute full decomposition
 94 |     u_hat, s_hat, v_hat = svdm(tens_a, fun_m, inv_m, hats=True)
 95 | 
 96 |     # compute variation in the decomposition
 97 |     #   var is the sorted (hat) squared singular values
 98 |     #   cumm_var is scre
 99 |     #   w_idx is an array of indices for `cumm_var` and `var`
100 |     #   total_var is the (float) sum of squared singular values `var`
101 |     var = np.concatenate([np.diagonal(s_hat[:, :, i]) for i in range(n)]) ** 2
102 |     var = np.sort(var.reshape(-1))[::-1]
103 |     cumm_var = var.cumsum(axis=0)
104 |     w_idx = np.arange(0, cumm_var.size, dtype=int)
105 |     total_variance = var.sum()
106 | 
107 |     # Find truncation threshold according to
108 |     if gamma is not None:
109 |         reduced_ind = w_idx[(cumm_var / total_variance) > gamma]
110 |         if reduced_ind.size == 0:
111 |             j = 0
112 |         else:
113 |             j = reduced_ind.min()
114 |     else:
115 |         j = n_components
116 | 
117 |     tau = np.sqrt(var[j - 1])
118 |     rho = {}
119 | 
120 |     u_hat_rho_dict = {}
121 |     s_hat_rho_dict = {}
122 |     v_hat_rho_dict = {}
123 | 
124 |     max_rho = 0
125 |     r = 0
126 |     for i in range(n):
127 |         diag_shat_i = np.diagonal(s_hat[:, :, i])
128 |         tau_mask = (diag_shat_i >= tau)
129 |         rho_i = tau_mask.sum()
130 |         if rho_i > 0:
131 |             u_hat_rho_dict[i] = u_hat[:, :rho_i, i].copy()
132 |             s_hat_rho_dict[i] = s_hat[:rho_i, :rho_i, i].copy()
133 |             v_hat_rho_dict[i] = v_hat[:, :rho_i, i].copy()
134 |             rho[i] = rho_i
135 | 
136 |             if rho_i > max_rho:
137 |                 max_rho = rho_i
138 |             r += rho_i
139 | 
140 |     if n_components is not None:
141 |         assert r == n_components, f"expected multirank {n_components} got {r}"
142 | 
143 |     return u_hat_rho_dict, s_hat_rho_dict, v_hat_rho_dict, total_variance, rho, r
144 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # mprod_package
  2 | 
  3 | [![Build and test](https://github.com/UriaMorP/mprod_package/actions/workflows/build.yaml/badge.svg)](https://github.com/UriaMorP/mprod_package/actions/workflows/build.yaml)
  4 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/mprod-package)
  5 | [![Documentation Status](https://readthedocs.org/projects/mprod-package/badge/?version=latest)](https://mprod-package.readthedocs.io/en/latest/?badge=latest)
  6 | ![Conda](https://img.shields.io/conda/dn/conda-forge/mprod-package?label=downloads%28conda-forge%29)
  7 | [![Conda Version](https://img.shields.io/conda/vn/conda-forge/mprod-package.svg)](https://anaconda.org/conda-forge/mprod-package)
  8 | [![Pypi Downloads](https://img.shields.io/pypi/dm/mprod-package.svg?label=Pypi%20downloads)](
  9 | https://pypi.org/project/mprod-package/)
 10 | 
 11 | 
 12 | Software implementation for tensor-tensor m-product framework [[1]](#1).
 13 | The library currently contains tubal QR and tSVDM decompositions, and the TCAM method for dimensionality reduction.
 14 | 
 15 | 
 16 | <p align="center">
 17 |   <img width="80%",height="80%",  src="https://user-images.githubusercontent.com/16097812/143407367-36c30aa4-da1f-4a8b-93db-470114486064.png" />
 18 | </p>
 19 | 
 20 | ## Installation 
 21 | 
 22 | ### Conda
 23 | The `mprod-package` is hosted in [conda-forge](https://conda-forge.org/) channel. 
 24 | 
 25 | ```
 26 | conda install -c conda-forge mprod-package
 27 | ```
 28 | 
 29 | ### pip
 30 | ```
 31 | pip install mprod-package 
 32 | ```
 33 | See `mprod-package`s [pypi entry](https://pypi.org/project/mprod-package/)
 34 | 
 35 | ### From source 
 36 | 
 37 | * Make sure that all dependencies listed in `requirements.txt` file are installed . 
 38 | * Clone the repository, then from the package directory, run
 39 | ```
 40 | pip install -e .
 41 | ```
 42 | 
 43 | The dependencies in `requirements.txt` are stated with exact versions used for locally test `mprod-package`, these packages were obtained from conda-forge channel.
 44 | 
 45 | ```python
 46 | import pandas as pd
 47 | 
 48 | file_path = "https://raw.githubusercontent.com/UriaMorP/" \
 49 |             "tcam_analysis_notebooks/main/Schirmer2018/Schirmer2018.tsv"
 50 | 
 51 | data_table = pd.read_csv(file_path, index_col=[0,1], sep="\t"
 52 |                        , dtype={'Week':int})
 53 | data_table = data_table.loc[:,data_table.median() > 1e-7]
 54 | data_table.rename(columns= {k:f"Fature_{e+1}" for e,k in enumerate(data_table.columns)}, inplace=True) 
 55 | data_table.shape
 56 | 
 57 | %matplotlib inline
 58 | ```
 59 | 
 60 | ## How to use `TCAM`
 61 | 
 62 | Given with a `pandas.DataFrame` of the data as below, with 2-level index, where the first level as subject identifier (mouse, human, image) and the second level of the index denotes sample repetition identity, in this case - the week during experiment, in which the sample was collected.
 63 | 
 64 | 
 65 | ```python
 66 | display(data_table.iloc[:2,:2].round(3))
 67 | 
 68 | ```
 69 | 
 70 | 
 71 | <table border="1" class="dataframe">
 72 |   <thead>
 73 |     <tr style="text-align: right;">
 74 |       <th></th>
 75 |       <th></th>
 76 |       <th>Fature_1</th>
 77 |       <th>Fature_2</th>
 78 |     </tr>
 79 |     <tr>
 80 |       <th>SubjectID</th>
 81 |       <th>Week</th>
 82 |       <th></th>
 83 |       <th></th>
 84 |     </tr>
 85 |   </thead>
 86 |   <tbody>
 87 |     <tr>
 88 |       <th rowspan="2" valign="top">P_10343</th>
 89 |       <th>0</th>
 90 |       <td>0.001</td>
 91 |       <td>0.023</td>
 92 |     </tr>
 93 |     <tr>
 94 |       <th>4</th>
 95 |       <td>0.020</td>
 96 |       <td>0.000</td>
 97 |     </tr>
 98 |   </tbody>
 99 | </table>
100 | 
101 | 
102 | ### Shape the data into tensor
103 | 
104 | We use the `table2tensor` helper function to transform a 2-level (multi)-indexed `pandas.DataFrame` into a 3rd order tensor. 
105 | 
106 | 
107 | ```python
108 | from mprod import table2tensor
109 | data_tensor, map1, map3 =  table2tensor(data_table)
110 | ```
111 | 
112 | To inspect `table2tensor` operation, we use the resulting *\"mode mappings\"*; `map1` and `map3`  associating each line in the input table to it's coordinates in the resulting tensor.
113 | In the following example, we use the mappings to extract the tensor coordinates corresponding to subject P\_7218's sample from week 52
114 | 
115 | 
116 | ```python
117 | (data_tensor[map1['P_7218'],:, map3[52]] == data_table.loc[('P_7218',52)].values).all() # True
118 | ```
119 | 
120 | ### Applying `TCAM`
121 | 
122 | ```python
123 | from mprod.dimensionality_reduction import TCAM
124 | 
125 | tca = TCAM()
126 | tca_trans = tca.fit_transform(data_tensor)
127 | ```
128 | 
129 | And that's all there is to it... Really!
130 | 
131 | Note how similar the code above to what we would have written if we were to apply scikit-lean's `PCA` to the initial tabular data:
132 | 
133 | 
134 | ```python
135 | from sklearn.decomposition import PCA
136 | 
137 | pca = PCA()
138 | pca_trans = pca.fit_transform(data_table)
139 | ```
140 | 
141 | The similarity between `TCAM`s interface to that of scikit-learn's `PCA` is not coincidental.
142 | We did our best in order to make `TCAM` as familiar as possible, and allow for high compatibility of `TCAM` with the existing Python ML framework.
143 | 
144 | ### Accessing properties of the transformation
145 | 
146 | 
147 | ```python
148 | tca_loadings = tca.mode2_loadings  # Obtain TCAM loadings
149 | pca_loadings = pca.components_     # Obtain PCA loadings
150 | 
151 | tca_var = tca.explained_variance_ratio_*100 # % explained variation per TCA factor
152 | pca_var = pca.explained_variance_ratio_*100 # % explained variation per TCA factor
153 | 
154 | tca_df = pd.DataFrame(tca_trans)   # Cast TCA scores to dataframe
155 | tca_df.rename(index = dict(map(reversed, map1.items()))
156 |               , inplace = True)    # use the inverse of map1 to denote each row 
157 |                                    # of the TCAM scores with it's subject ID
158 |     
159 | pca_df = pd.DataFrame(pca_trans)   # Cast PCA scores to dataframe
160 | pca_df.index = data_table.index    # anotate PC scores with sample names
161 | ```
162 | 
163 | 
164 | 
165 | 
166 | 
167 | ## References
168 | <a id="1">[1]</a> 
169 | Misha E. Kilmer, Lior Horesh, Haim Avron, and Elizabeth Newman.  Tensor-tensor algebra for optimal representation and compression of multiway data. Proceedings of the National Academy of Sciences, 118(28):e2015851118, jul 2021.
170 | 


--------------------------------------------------------------------------------
/mprod/_pytester.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Pytest test running.
  3 | 
  4 | This module implements the ``test()`` function for  modules. The usual
  5 | boiler plate for doing that is to put the following in the module
  6 | ``__init__.py`` file::
  7 | 
  8 |     from mprod._pytesttester import PytestTester
  9 |     test = PytestTester(__name__)
 10 |     del PytestTester
 11 | 
 12 | 
 13 | Warnings filtering and other runtime settings should be dealt with in the
 14 | ``pytest.ini`` file in the numpy repo root. The behavior of the test depends on
 15 | whether or not that file is found as follows:
 16 | 
 17 | * ``pytest.ini`` is present (develop mode)
 18 |     All warnings except those explicitly filtered out are raised as error.
 19 | * ``pytest.ini`` is absent (release mode)
 20 |     DeprecationWarnings and PendingDeprecationWarnings are ignored, other
 21 |     warnings are passed through.
 22 | 
 23 | In practice, tests run from the numpy repo are run in develop mode. That
 24 | includes the standard ``python runtests.py`` invocation.
 25 | 
 26 | This module is imported by every numpy subpackage, so lies at the top level to
 27 | simplify circular import issues. For the same reason, it contains no numpy
 28 | imports at module scope, instead importing numpy within function calls.
 29 | """
 30 | import sys
 31 | import os
 32 | 
 33 | __all__ = ['PytestTester']
 34 | 
 35 | # def _show_numpy_info():
 36 | #     import numpy as np
 37 | #
 38 | #     print("NumPy version %s" % np.__version__)
 39 | #     relaxed_strides = np.ones((10, 1), order="C").flags.f_contiguous
 40 | #     print("NumPy relaxed strides checking option:", relaxed_strides)
 41 | #     info = np.lib.utils._opt_info()
 42 | #     print("NumPy CPU features: ", (info if info else 'nothing enabled'))
 43 | 
 44 | 
 45 | class PytestTester:
 46 |     """
 47 |     Pytest test runner.
 48 | 
 49 |     A test function is typically added to a package's __init__.py like so::
 50 | 
 51 |       from numpy._pytesttester import PytestTester
 52 |       test = PytestTester(__name__).test
 53 |       del PytestTester
 54 | 
 55 |     Calling this test function finds and runs all tests associated with the
 56 |     module and all its sub-modules.
 57 | 
 58 |     Attributes
 59 |     ----------
 60 |     module_name : str
 61 |         Full path to the package to test.
 62 | 
 63 |     Parameters
 64 |     ----------
 65 |     module_name : module name
 66 |         The name of the module to test.
 67 | 
 68 |     Notes
 69 |     -----
 70 |     Unlike the previous ``nose``-based implementation, this class is not
 71 |     publicly exposed as it performs some ``numpy``-specific warning
 72 |     suppression.
 73 | 
 74 |     """
 75 | 
 76 |     def __init__(self, module_name):
 77 |         self.module_name = module_name
 78 | 
 79 |     def __call__(self, label='fast', verbose=1, extra_argv=None,
 80 |                  doctests=False, coverage=False, durations=-1, tests=None):
 81 |         """
 82 |         Run tests for module using pytest.
 83 | 
 84 |         Parameters
 85 |         ----------
 86 |         label : {'fast', 'full'}, optional
 87 |             Identifies the tests to run. When set to 'fast', tests decorated
 88 |             with `pytest.mark.slow` are skipped, when 'full', the slow marker
 89 |             is ignored.
 90 |         verbose : int, optional
 91 |             Verbosity value for test outputs, in the range 1-3. Default is 1.
 92 |         extra_argv : list, optional
 93 |             List with any extra arguments to pass to pytests.
 94 |         doctests : bool, optional
 95 |             .. note:: Not supported
 96 |         coverage : bool, optional
 97 |             If True, report coverage of NumPy code. Default is False.
 98 |             Requires installation of (pip) pytest-cov.
 99 |         durations : int, optional
100 |             If < 0, do nothing, If 0, report time of all tests, if > 0,
101 |             report the time of the slowest `timer` tests. Default is -1.
102 |         tests : test or list of tests
103 |             Tests to be executed with pytest '--pyargs'
104 | 
105 |         Returns
106 |         -------
107 |         result : bool
108 |             Return True on success, false otherwise.
109 | 
110 |         Notes
111 |         -----
112 |         Each  module exposes `test` in its namespace to run all tests for
113 |         it. For example, to run all tests for mprod.lib:
114 | 
115 | 
116 | 
117 |         Examples
118 |         --------
119 |         >>> result = mprod.lib.test() #doctest: +SKIP
120 |         ...
121 |         1023 passed, 2 skipped, 6 deselected, 1 xfailed in 10.39 seconds
122 |         >>> result
123 |         True
124 | 
125 |         """
126 |         import pytest
127 |         import warnings
128 | 
129 |         module = sys.modules[self.module_name]
130 |         module_path = os.path.abspath(module.__path__[0])
131 | 
132 |         # setup the pytest arguments
133 |         pytest_args = ["-l"]
134 | 
135 |         # offset verbosity. The "-q" cancels a "-v".
136 |         pytest_args += ["-q"]
137 | 
138 |         # Filter out distutils cpu warnings (could be localized to
139 |         # distutils tests). ASV has problems with top level import,
140 |         # so fetch module for suppression here.
141 |         with warnings.catch_warnings():
142 |             warnings.simplefilter("always")
143 |             from numpy.distutils import cpuinfo
144 | 
145 |         # Filter out annoying import messages. Want these in both develop and
146 |         # release mode.
147 |         pytest_args += [
148 |             "-W ignore:Not importing directory",
149 |             "-W ignore:numpy.dtype size changed",
150 |             "-W ignore:numpy.ufunc size changed",
151 |             "-W ignore::UserWarning:cpuinfo",
152 |         ]
153 | 
154 |         # When testing matrices, ignore their PendingDeprecationWarnings
155 |         pytest_args += [
156 |             "-W ignore:the matrix subclass is not",
157 |             "-W ignore:Importing from numpy.matlib is",
158 |         ]
159 | 
160 |         if doctests:
161 |             raise ValueError("Doctests not supported")
162 | 
163 |         if extra_argv:
164 |             pytest_args += list(extra_argv)
165 | 
166 |         if verbose > 1:
167 |             pytest_args += ["-" + "v" * (verbose - 1)]
168 | 
169 |         if coverage:
170 |             pytest_args += ["--cov=" + module_path]
171 | 
172 |         if label == "fast":
173 |             # not importing at the top level to avoid circular import of module
174 |             from numpy.testing import IS_PYPY
175 |             if IS_PYPY:
176 |                 pytest_args += ["-m", "not slow and not slow_pypy"]
177 |             else:
178 |                 pytest_args += ["-m", "not slow"]
179 | 
180 |         elif label != "full":
181 |             pytest_args += ["-m", label]
182 | 
183 |         if durations >= 0:
184 |             pytest_args += ["--durations=%s" % durations]
185 | 
186 |         if tests is None:
187 |             tests = [self.module_name]
188 | 
189 |         pytest_args += ["--pyargs"] + list(tests)
190 | 
191 |         # # run tests.
192 |         # _show_numpy_info()
193 | 
194 |         try:
195 |             code = pytest.main(pytest_args)
196 |         except SystemExit as exc:
197 |             code = exc.code
198 | 
199 |         return code == 0
200 | 


--------------------------------------------------------------------------------
/mprod/_pytesttester.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Pytest test running.
  3 | 
  4 | This module implements the ``test()`` function for  modules. The usual
  5 | boiler plate for doing that is to put the following in the module
  6 | ``__init__.py`` file::
  7 | 
  8 |     from mprod._pytesttester import PytestTester
  9 |     test = PytestTester(__name__)
 10 |     del PytestTester
 11 | 
 12 | 
 13 | Warnings filtering and other runtime settings should be dealt with in the
 14 | ``pytest.ini`` file in the numpy repo root. The behavior of the test depends on
 15 | whether or not that file is found as follows:
 16 | 
 17 | * ``pytest.ini`` is present (develop mode)
 18 |     All warnings except those explicitly filtered out are raised as error.
 19 | * ``pytest.ini`` is absent (release mode)
 20 |     DeprecationWarnings and PendingDeprecationWarnings are ignored, other
 21 |     warnings are passed through.
 22 | 
 23 | In practice, tests run from the numpy repo are run in develop mode. That
 24 | includes the standard ``python runtests.py`` invocation.
 25 | 
 26 | This module is imported by every numpy subpackage, so lies at the top level to
 27 | simplify circular import issues. For the same reason, it contains no numpy
 28 | imports at module scope, instead importing numpy within function calls.
 29 | """
 30 | import sys
 31 | import os
 32 | 
 33 | __all__ = ['PytestTester']
 34 | 
 35 | 
 36 | # def _show_numpy_info():
 37 | #     import numpy as np
 38 | #
 39 | #     print("NumPy version %s" % np.__version__)
 40 | #     relaxed_strides = np.ones((10, 1), order="C").flags.f_contiguous
 41 | #     print("NumPy relaxed strides checking option:", relaxed_strides)
 42 | #     info = np.lib.utils._opt_info()
 43 | #     print("NumPy CPU features: ", (info if info else 'nothing enabled'))
 44 | 
 45 | 
 46 | class PytestTester:
 47 |     """
 48 |     Pytest test runner.
 49 | 
 50 |     A test function is typically added to a package's __init__.py like so::
 51 | 
 52 |       from numpy._pytesttester import PytestTester
 53 |       test = PytestTester(__name__).test
 54 |       del PytestTester
 55 | 
 56 |     Calling this test function finds and runs all tests associated with the
 57 |     module and all its sub-modules.
 58 | 
 59 |     Attributes
 60 |     ----------
 61 |     module_name : str
 62 |         Full path to the package to test.
 63 | 
 64 |     Parameters
 65 |     ----------
 66 |     module_name : module name
 67 |         The name of the module to test.
 68 | 
 69 |     Notes
 70 |     -----
 71 |     Unlike the previous ``nose``-based implementation, this class is not
 72 |     publicly exposed as it performs some ``numpy``-specific warning
 73 |     suppression.
 74 | 
 75 |     """
 76 | 
 77 |     def __init__(self, module_name):
 78 |         self.module_name = module_name
 79 | 
 80 |     def __call__(self, label='fast', verbose=3, extra_argv=None,
 81 |                  doctests=False, coverage=False, durations=-1, tests=None):
 82 |         """
 83 |         Run tests for module using pytest.
 84 | 
 85 |         Parameters
 86 |         ----------
 87 |         label : {'fast', 'full'}, optional
 88 |             Identifies the tests to run. When set to 'fast', tests decorated
 89 |             with `pytest.mark.slow` are skipped, when 'full', the slow marker
 90 |             is ignored.
 91 |         verbose : int, optional
 92 |             Verbosity value for test outputs, in the range 1-3. Default is 1.
 93 |         extra_argv : list, optional
 94 |             List with any extra arguments to pass to pytests.
 95 |         doctests : bool, optional
 96 |             .. note:: Not supported
 97 |         coverage : bool, optional
 98 |             If True, report coverage of NumPy code. Default is False.
 99 |             Requires installation of (pip) pytest-cov.
100 |         durations : int, optional
101 |             If < 0, do nothing, If 0, report time of all tests, if > 0,
102 |             report the time of the slowest `timer` tests. Default is -1.
103 |         tests : test or list of tests
104 |             Tests to be executed with pytest '--pyargs'
105 | 
106 |         Returns
107 |         -------
108 |         result : bool
109 |             Return True on success, false otherwise.
110 | 
111 |         Notes
112 |         -----
113 |         Each  module exposes `test` in its namespace to run all tests for
114 |         it. For example, to run all tests for mprod.lib:
115 | 
116 | 
117 | 
118 |         Examples
119 |         --------
120 |         >>> result = mprod.lib.test() #doctest: +SKIP
121 |         ...
122 |         1023 passed, 2 skipped, 6 deselected, 1 xfailed in 10.39 seconds
123 |         >>> result
124 |         True
125 | 
126 |         """
127 |         import pytest
128 |         import warnings
129 | 
130 |         module = sys.modules[self.module_name]
131 |         module_path = os.path.abspath(module.__path__[0])
132 | 
133 |         # if os.path.islink(module_path):
134 |         #     module_path = os.path.realpath(module_path)
135 | 
136 | 
137 |         # setup the pytest arguments
138 |         pytest_args = ["-l"]
139 | 
140 |         # offset verbosity. The "-q" cancels a "-v".
141 |         pytest_args += ["-q"]
142 | 
143 |         # Filter out distutils cpu warnings (could be localized to
144 |         # distutils tests). ASV has problems with top level import,
145 |         # so fetch module for suppression here.
146 |         with warnings.catch_warnings():
147 |             warnings.simplefilter("always")
148 |             from numpy.distutils import cpuinfo
149 | 
150 |         # Filter out annoying import messages. Want these in both develop and
151 |         # release mode.
152 |         pytest_args += [
153 |             "-W ignore:Not importing directory",
154 |             "-W ignore:numpy.dtype size changed",
155 |             "-W ignore:numpy.ufunc size changed",
156 |             "-W ignore::UserWarning:cpuinfo",
157 |         ]
158 | 
159 |         # When testing matrices, ignore their PendingDeprecationWarnings
160 |         pytest_args += [
161 |             "-W ignore:the matrix subclass is not",
162 |             "-W ignore:Importing from numpy.matlib is",
163 |         ]
164 | 
165 |         if doctests:
166 |             raise ValueError("Doctests not supported")
167 | 
168 |         if extra_argv:
169 |             pytest_args += list(extra_argv)
170 | 
171 |         if verbose > 1:
172 |             pytest_args += ["-" + "v" * (verbose - 1)]
173 | 
174 |         if coverage:
175 |             pytest_args += ["--cov=" + module_path]
176 | 
177 |         if label == "fast":
178 |             # not importing at the top level to avoid circular import of module
179 |             from numpy.testing import IS_PYPY
180 |             if IS_PYPY:
181 |                 pytest_args += ["-m", "not slow and not slow_pypy"]
182 |             else:
183 |                 pytest_args += ["-m", "not slow"]
184 | 
185 |         elif label != "full":
186 |             pytest_args += ["-m", label]
187 | 
188 |         if durations >= 0:
189 |             pytest_args += ["--durations=%s" % durations]
190 | 
191 |         if tests is None:
192 |             tests = [self.module_name]
193 | 
194 |         pytest_args += ["--pyargs"] + list(tests)
195 | 
196 |         # # run tests.
197 |         # _show_numpy_info()
198 | 
199 |         try:
200 |             code = pytest.main(pytest_args)
201 |         except SystemExit as exc:
202 |             code = exc.code
203 | 
204 |         return code == 0
205 | 


--------------------------------------------------------------------------------
/mprod/_base.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from typing import Callable, Tuple, Dict, List
  3 | 
  4 | import scipy.fft
  5 | from scipy.fft import dct, idct, rfft, irfft
  6 | from scipy.stats import ortho_group
  7 | 
  8 | NumpynDArray = np.ndarray
  9 | MatrixTensorProduct = Callable[[NumpynDArray], NumpynDArray]
 10 | 
 11 | 
 12 | def _default_transform(tube_size: int) -> Tuple[MatrixTensorProduct, MatrixTensorProduct]:
 13 |     def fun_m(x):
 14 |         return dct(x, type=2, n=tube_size, axis=-1, norm='ortho')
 15 | 
 16 |     def inv_m(x):
 17 |         return idct(x, type=2, n=tube_size, axis=-1, norm='ortho')
 18 | 
 19 |     return fun_m, inv_m
 20 | 
 21 | 
 22 | def generate_dct(tube_size: int, dct_type: int = 2) -> Tuple[MatrixTensorProduct, MatrixTensorProduct]:
 23 |     """Generates a DCT based tensor-matrix operation (forward and inverse)
 24 | 
 25 |     Parameters
 26 |     ----------
 27 | 
 28 |     tube_size: int
 29 |         the fiber-tube size of the tensors of interest
 30 | 
 31 |     dct_type: int, default = 2
 32 |         The choice of dct type, see scipy.fft.dct.__doc__ for details
 33 | 
 34 |     Returns
 35 |     -------
 36 | 
 37 |     fun_m: MatrixTensorProduct
 38 |         A tensor transform
 39 | 
 40 |     inv_m: MatrixTensorProduct
 41 |         A tensor transform (the inverse of `fun_m`)
 42 | 
 43 |     """
 44 | 
 45 |     def fun_m(x):
 46 |         return dct(x, type=dct_type, n=tube_size, axis=-1, norm='ortho')
 47 | 
 48 |     def inv_m(x):
 49 |         return idct(x, type=dct_type, n=tube_size, axis=-1, norm='ortho')
 50 | 
 51 |     return fun_m, inv_m
 52 | 
 53 | 
 54 | # noinspection PyPep8Naming
 55 | def _mod3prod(A: NumpynDArray, funM: MatrixTensorProduct) -> NumpynDArray:
 56 |     """Maps a tensor `A` to the tensor domain transform defined by the operation of a mapping `funM` on
 57 |     the tube fibers of `A`
 58 | 
 59 |     Parameters
 60 |     ----------
 61 | 
 62 |     A: NumpynDArray
 63 |         Tensor with `A.shape[2] == n`
 64 | 
 65 |     funM: MatrixTensorProduct
 66 |         Picklable mapping that operates on (n dimensional) tube fibers of a tensor
 67 | 
 68 |     Returns
 69 |     -------
 70 | 
 71 |     hatA: MatrixTensorProduct
 72 |         Returns domain transform of `A` defined by the operation of `funM`
 73 |     """
 74 |     m, p, n = A.shape
 75 |     return funM(A.transpose((2, 1, 0)).reshape(n, m * p)).reshape((n, p, m)).transpose((2, 1, 0))
 76 | 
 77 | 
 78 | def x_m3(M: NumpynDArray) -> MatrixTensorProduct:
 79 |     """
 80 |     Creates a picklable tensor transformation forming the mod3 tensor-matrix multiplication required in the M product
 81 |     definition.
 82 | 
 83 |     Parameters
 84 |     ----------
 85 |     M: np.ndarray
 86 |         A matrix of shape `(n,n)`
 87 | 
 88 |     Returns
 89 |     -------
 90 |     fun:  Callable[[NumpynDArray], NumpynDArray]
 91 |         Picklable mapping that operates on (n dimensional) tube fibers of a tensor
 92 | 
 93 |     """
 94 |     assert len(M.shape) == 2, "M must be a 2 dimensional matrix"
 95 |     assert M.shape[0] == M.shape[1], "M must be a square matrix"
 96 |     
 97 |     tube_size = M.shape[0]
 98 |     def fun(A: NumpynDArray) -> NumpynDArray:
 99 |         assert A.shape[-1] == tube_size, "The last dimension of A must be the same as the tube size "
100 |         if len(A.shape) == 2:
101 |             # the case where A is a matrix representation of f-diagonal tensor
102 |             return  A @ M.T
103 |         elif len(A.shape) == 3:
104 |             m, p, n = A.shape
105 |             return (M @ A.transpose((2, 1, 0)).reshape(n, m * p)).reshape((n, p, m)).transpose((2, 1, 0))
106 |         else:
107 |             raise NotImplementedError("We only work with 3d tensors for now!")
108 |     return fun
109 | 
110 | 
111 | def generate_haar(tube_size: int, random_state = None) -> Tuple[MatrixTensorProduct, MatrixTensorProduct]:
112 |     """Generates a tensor-matrix transformation based on random sampling of unitary matrix
113 |     (according to the Haar distribution on O_n See scipy.stats.)
114 | 
115 |     Parameters
116 |     ----------
117 | 
118 |     tube_size: int
119 |         the fiber-tube size of the tensors of interest
120 | 
121 |     Returns
122 |     -------
123 | 
124 |     fun_m: MatrixTensorProduct
125 |         A tensor transform
126 | 
127 |     inv_m: MatrixTensorProduct
128 |         A tensor transform (the inverse of `fun_m`)
129 | 
130 |     """
131 | 
132 |     M = ortho_group.rvs(tube_size, random_state=random_state)
133 | 
134 |     fun_m = x_m3(M)
135 |     inv_m = x_m3(M.T)
136 | 
137 |     return fun_m, inv_m
138 | 
139 | 
140 | def m_prod(tens_a: NumpynDArray,
141 |            tens_b: NumpynDArray,
142 |            fun_m: MatrixTensorProduct,
143 |            inv_m: MatrixTensorProduct) -> NumpynDArray:
144 |     """
145 |     Returns the :math:`\\star_{\\mathbf{M}}` product of tensors `A` and `B`
146 |     where ``A.shape == (m,p,n)`` and ``B.shape == (p,r,n)``.
147 | 
148 |     Parameters
149 |     ----------
150 |     tens_a: array-like
151 |         3'rd order tensor with shape `m x p x n`
152 | 
153 |     tens_b: array-like
154 |         3'rd order tensor with shape `p x r x n`
155 | 
156 |     fun_m: MatrixTensorProduct, Callable[[NumpynDArray], NumpynDArray]
157 |         Invertible linear mapping from `R^n` to `R^n`
158 | 
159 |     inv_m: MatrixTensorProduct, Callable[[NumpynDArray], NumpynDArray]
160 |         Invertible linear mapping from R^n to R^n ( `fun_m(inv_m(x)) = inv_m(fun_m(x)) = x` )
161 | 
162 |     Returns
163 |     -------
164 |     tensor: array-like
165 |         3'rd order tensor of shape `m x r x n` that is the star :math:`\\star_{\\mathbf{M}}`
166 |         product of `A` and `B`
167 |     """
168 | 
169 |     assert tens_a.shape[1] == tens_b.shape[0]
170 |     assert tens_a.shape[-1] == tens_b.shape[-1]
171 | 
172 |     a_hat = fun_m(tens_a)
173 |     b_hat = fun_m(tens_b)
174 | 
175 |     c_hat = np.einsum('mpi,pli->mli', a_hat, b_hat)
176 |     return inv_m(c_hat)
177 | 
178 | 
179 | # copied version from transformers.py
180 | # def m_prod(A: NumpynDArray, B: NumpynDArray, funM: MatrixTensorProduct, invM: MatrixTensorProduct) -> NumpynDArray:
181 | #     # assert A.shape[1] == B.shape[0]
182 | #     # assert A.shape[-1] == B.shape[-1]
183 | #     A_hat = funM(A)
184 | #     B_hat = funM(B)
185 | #
186 | #     calE_hat = np.einsum('mpi,pli->mli', A_hat, B_hat)
187 | #     return invM(calE_hat)
188 | 
189 | def tensor_mtranspose(tensor, mfun, minv):
190 |     tensor_hat = mfun(tensor)
191 |     tensor_hat_t = tensor_hat.transpose((1, 0, 2))
192 |     tensor_t = minv(tensor_hat_t)
193 |     return tensor_t
194 | 
195 | 
196 | def _t_pinv_fdiag(F, Mfun, Minv) -> NumpynDArray:
197 |     m, p, n = F.shape
198 |     hat_f = Mfun(F)
199 | 
200 |     pinv_hat_f = np.zeros_like(hat_f)
201 |     for i in range(n):
202 |         fi_diag = np.diagonal(hat_f[:, :, i]).copy()
203 |         fi_diag[(fi_diag ** 2) > 1e-6] = 1 / fi_diag[(fi_diag ** 2) > 1e-6]
204 | 
205 |         pinv_hat_f[:fi_diag.size, :fi_diag.size, i] = np.diag(fi_diag)
206 | 
207 |     pinv_f = Minv(pinv_hat_f)
208 | 
209 |     return tensor_mtranspose(pinv_f, Mfun, Minv)
210 | 
211 | # # TODO: Is TensorArray needed ?
212 | # # noinspection PyPep8Naming
213 | # class TensorArray(np.ndarray):
214 | #     def __new__(cls, input_array):
215 | #         # Input array is an already formed ndarray instance
216 | #         # We first cast to be our class type
217 | #         obj = np.asarray(input_array).view(cls)
218 | #         # add the new attribute to the created instance
219 | #         # Finally, we must return the newly created object:
220 | #         return obj
221 | #
222 | #     @property
223 | #     def TT(self):
224 | #         return self.transpose((1, 0, 2))
225 | #
226 | #     def __array_finalize__(self, obj):
227 | #         # see InfoArray.__array_finalize__ for comments
228 | #         if obj is None: return
229 | #         self.info = getattr(obj, 'info', None)
230 | 


--------------------------------------------------------------------------------
/mprod/_ml_helpers.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from typing import List, Tuple, Dict, Mapping
  3 | from ._base import NumpynDArray
  4 | from sklearn.base import TransformerMixin, BaseEstimator
  5 | import pandas as pd
  6 | from itertools import product
  7 | 
  8 | 
  9 | def table2tensor(table: pd.DataFrame, missing_flag: bool = False) -> Tuple[np.ma.core.MaskedArray, Mapping, Mapping]:
 10 |     """
 11 |     Reshapes a  `nm x p` (`(samples x reps) x features`) multi-indexed datafram to form a `m x p x n` tensor
 12 |     `(subjects, features, reps)`
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     table: pd.DataFrame
 17 |         a `nm x p` table of sampels x features
 18 | 
 19 |     missing_flag: `bool`, default = False
 20 |         When set to `False` (default), the function will raise an error in case there are missing samples.
 21 |         Setting to `True` will result in a tensor with masked entries.
 22 | 
 23 |     Returns
 24 |     -------
 25 |     tensor: ndarray, np.ma.array
 26 |         3'rd order tensor `m x p x n` (subjects, features, reps)
 27 | 
 28 |     mode1_mapping : dict
 29 |         The mapping of each mode1 (frontal) slice  index of the tensor to the table's original subject name
 30 | 
 31 |     mode3_mapping : dict
 32 |         The mapping of each mode3 (lateral) slice index of the tensor to the table's original rep id
 33 | 
 34 | 
 35 |     Examples
 36 |     --------
 37 |     Suppose that ``table_data`` is a dataframe with no missing values.
 38 | 
 39 |     >>> from mprod import table2tensor
 40 |     >>> import pandas as pd
 41 |     >>> np.random.seed(0)
 42 |     >>> table_data.iloc[:5,:4]
 43 |                         f1        f2        f3        f4
 44 |     SubjetID rep
 45 |     a        t1   0.251259  0.744838  -0.45889 -0.208525
 46 |              t10   2.39831  0.248772   0.65873   1.36994
 47 |              t2  -0.303154 -0.337603 -0.568608   -1.0239
 48 |              t3    1.36369  0.978895  0.161972 -0.804368
 49 |              t4     1.8548   1.52954   0.78576  0.538041
 50 |     >>> msk_tensor, mode1_mapping, mode3_mapping = table2tensor(table_data, missing_flag=False)
 51 |     >>> msk_tensor[:3,:3,:2]
 52 |     [[[0.25125853442243695 2.398308745102709]
 53 |       [0.7448378210349296 0.2487716728987871]
 54 |       [-0.4588901621837434 0.6587302072601999]]
 55 |      [[-0.5689263433318329 -0.06564253839123065]
 56 |       [1.0017636851038796 -0.49265853128383713]
 57 |       [0.45266517056628647 -1.4812390563653883]]
 58 |      [[0.7690616486878629 0.49302719962677855]
 59 |       [0.3186320585255899 1.469576084933633]
 60 |       [0.9609169837347897 -0.19564077520234632]]]
 61 |     >>> mode1_mapping
 62 |     {'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4}
 63 |     >>> mode3_mapping
 64 |     {'t1': 0,
 65 |      't10': 1,
 66 |      't2': 2,
 67 |      't3': 3,
 68 |      't4': 4,
 69 |      't5': 5,
 70 |      't6': 6,
 71 |      't7': 7,
 72 |      't8': 8,
 73 |      't9': 9}
 74 | 
 75 |     **missing values**
 76 | 
 77 |     >>> msk_tensor, mode1_mapping, mode3_mapping = table2tensor(table_data.sample(40)
 78 |     ...                                                          , missing_flag=True)
 79 |     >>> msk_tensor[:3,:3,:2]
 80 |     masked_array(
 81 |       data=[[[0.07664420134210018, --],
 82 |              [-0.7358062254334045, --],
 83 |              [0.5562074188402509, --]],
 84 |             [[2.088982483926928, -0.06564253839123065],
 85 |              [0.7697757466063808, -0.49265853128383713],
 86 |              [0.4147812728859107, -1.4812390563653883]],
 87 |             [[-0.004794963866429985, 1.2262908375944879],
 88 |              [-0.15033350807209261, -0.3068131758163276],
 89 |              [0.6461670563178799, 0.1769508046682527]]],
 90 |       mask=[[[False,  True],
 91 |              [False,  True],
 92 |              [False,  True]],
 93 |             [[False, False],
 94 |              [False, False],
 95 |              [False, False]],
 96 |             [[False, False],
 97 |              [False, False],
 98 |              [False, False]]], fill_value=0.0)
 99 |     >>> mode1_mapping
100 |     {'a': 3, 'b': 1, 'c': 0, 'd': 4, 'e': 2}
101 |     >>> mode3_mapping
102 |     {'t1': 2,
103 |      't10': 1,
104 |      't2': 3,
105 |      't3': 6,
106 |      't4': 5,
107 |      't5': 7,
108 |      't6': 8,
109 |      't7': 4,
110 |      't8': 0,
111 |      't9': 9}
112 |     """
113 | 
114 |     samples_map, usamples = table.index.get_level_values(0).factorize()
115 |     reps_map, ureps = table.index.get_level_values(1).factorize()
116 | 
117 |     m, p, n = usamples.size, table.shape[1], ureps.size
118 | 
119 |     samples_map_dict = pd.Series(np.unique(samples_map), usamples).to_dict()
120 |     reps_map_dict = pd.Series(np.unique(reps_map), ureps).to_dict()
121 | 
122 |     if missing_flag:
123 |         tensor = np.ma.array(np.zeros((m, p, n)), mask=np.ones((m, p, n)), fill_value=0)
124 |         index_iterator = table.iterrows()
125 |     else:
126 |         tensor = np.zeros((m, p, n))
127 |         index_iterator = (((i, j), table.loc[(i, j)].copy()) for i, j in product(usamples, ureps))
128 | 
129 |     try:
130 |         for (m1, m3), val in index_iterator:
131 |             tensor[samples_map_dict[m1], :, reps_map_dict[m3]] = val.values
132 |     except KeyError as ke:
133 |         raise KeyError("Discovered missing data in the table, which is not allowed by default. "
134 |                        "To work with missing data and have a masked array returned, set missing_flag to True")
135 | 
136 |     return tensor, samples_map_dict, reps_map_dict
137 | 
138 | 
139 | # noinspection PyPep8Naming
140 | # noinspection PyUnusedLocal
141 | class MeanDeviationForm(TransformerMixin, BaseEstimator):
142 |     """Standardize the data by subtracting the mean (or empiric mean) sample
143 |     The mean deviation form of a tensor :math:`X \\in \mathbb{R}^{m \\times p \\times n}` is calculated as:
144 | 
145 |             Z = X - U
146 | 
147 |     where `U` is the mean sample of `X` , calculated as follows:
148 | 
149 |     .. math::
150 |         U = \\frac{1}{m} \\sum_{i=1}^{m} X[i,:,:]
151 | 
152 |     and for the empiric mean deviation form:
153 | 
154 |     .. math::
155 |         U = \\frac{1}{m-1} \\sum_{i=1}^{m} X[i,:,:]
156 | 
157 | 
158 |     Attributes
159 |     ----------
160 |     _mean_sample : ndarray of shape (p_features, n_repeats), or `None`
161 |         The mean sample of the dataset
162 | 
163 | 
164 |     Methods
165 |     -------
166 |     fit:
167 |         Fits a MeanDeviationForm transformer by computing the mean sample of a training dataset
168 |     transform:
169 |         Shift dataset by fitted sample mean
170 |     fit_transform:
171 |         Compute the mean sample of a dataset and transform it to its mean deviation form
172 |     inverse_transform:
173 |         Add precomputed mean sample to a dataset
174 | 
175 | 
176 | 
177 | 
178 |     """
179 | 
180 |     def __init__(self):
181 |         # super(MeanDeviationForm, self).__init__()
182 | 
183 |         self._mean_sample = None
184 | 
185 |     def _fit(self, X):
186 |         denum = X.shape[0]
187 |         self._mean_sample = np.nansum(X, axis=0, keepdims=True) / denum
188 | 
189 |     def fit(self, X, y=None, **fit_param):
190 |         """Compute the mean (or empiric mean) sample of a tensor
191 | 
192 |         Parameters
193 |         ----------
194 |         X : {array-like} of shape (m_samples, p_features, n_repeats)
195 |             The data used to compute the mean sample
196 |             used for later cantering along the features-repeats axes.
197 |         y : None
198 |             Ignored.
199 | 
200 |         Returns
201 |         -------
202 |         self : object
203 |             Fitted MeanDeviationForm object
204 | 
205 |         Examples
206 |         --------
207 |         >>> from mprod import MeanDeviationForm
208 |         >>> import numpy as np
209 |         >>> X = np.random.randn(10,20,4)
210 |         >>> mdf = MeanDeviationForm()
211 |         >>> mdf = mdf.fit(X)
212 |         """
213 |         self._fit(X)
214 |         return self
215 | 
216 |     def transform(self, X, y=None):
217 |         """Perform standardization by centering.
218 | 
219 |         Parameters
220 |         ----------
221 |         X : array-like of shape (k_samples, p_features, n_repeats)
222 |             The data used to center along the features-repeats axes.
223 | 
224 |         Returns
225 |         -------
226 |         X_tr : ndarray of shape (k_samples, p_features, n_repeats)
227 |             Transformed tensor.
228 | 
229 |         Examples
230 |         --------
231 |         >>> from mprod import MeanDeviationForm
232 |         >>> import numpy as np
233 |         >>> X = np.random.randn(10,20,4)
234 |         >>> y = np.random.randn(50,20,4)
235 |         >>> mdf = MeanDeviationForm()
236 |         >>> mdf_fit = mdf.fit(X)
237 |         >>> yt = mdf.transform(yt)
238 | 
239 |         """
240 | 
241 |         X_transform = X - self._mean_sample
242 |         if type(X_transform) == np.ma.core.MaskedArray:
243 |             return X_transform.filled().data
244 |         else:
245 |             return X_transform
246 | 
247 |     def fit_transform(self, X, y=None, **fit_params):
248 |         self.fit(X, y, **fit_params)
249 |         return self.transform(X)
250 | 
251 |     def inverse_transform(self, Y):
252 |         """Undo the centering of X according to mean sample.
253 | 
254 |         Parameters
255 |         ----------
256 |         X : array-like of shape (m_samples, p_features, n_repeats)
257 |             Input data that will be transformed.
258 | 
259 |         Returns
260 |         -------
261 |         Xt : ndarray of shape (m_samples, p_features, n_repeats)
262 |             Transformed data.
263 | 
264 |         Examples
265 |         --------
266 |         >>> from mprod import MeanDeviationForm
267 |         >>> import numpy as np
268 |         >>> X = np.random.randn(10,20,4)
269 |         >>> mdf = MeanDeviationForm()
270 |         >>> Xt = mdf.fit_transform(X)
271 |         >>> mdf.inverse_transform(Xt) - X
272 | 
273 |         """
274 |         Y_transform = Y + self._mean_sample
275 | 
276 |         if type(Y) == np.ma.core.MaskedArray:
277 |             return Y_transform.filled().data
278 |         else:
279 |             return Y_transform
280 | 
281 | 


--------------------------------------------------------------------------------
/mprod/dimensionality_reduction/_tcam.py:
--------------------------------------------------------------------------------
  1 | """TCAM
  2 | """
  3 | import numpy as np
  4 | from dataclasses import dataclass
  5 | from sklearn.base import TransformerMixin, BaseEstimator
  6 | 
  7 | from .._base import m_prod, tensor_mtranspose, _default_transform, _t_pinv_fdiag
  8 | from .._base import MatrixTensorProduct, NumpynDArray
  9 | from ..decompositions import svdm
 10 | from .._misc import _assert_order_and_mdim
 11 | from .._ml_helpers import MeanDeviationForm
 12 | 
 13 | _float_types = [np.sctypeDict[c] for c in 'efdg'] + [float]
 14 | _int_types = [np.sctypeDict[c] for c in 'bhip'] + [int]
 15 | 
 16 | 
 17 | def _pinv_diag(diag_tensor):
 18 |     sinv = diag_tensor.copy()
 19 |     sinv += ((diag_tensor ** 2) <= 1e-6) * 1e+20
 20 |     sinv = (((diag_tensor ** 2) > 1e-6) * (1 / sinv))
 21 |     return sinv
 22 | 
 23 | 
 24 | @dataclass
 25 | class TensorSVDResults:
 26 |     u: np.ndarray
 27 |     s: np.ndarray
 28 |     v: np.ndarray
 29 | 
 30 |     def astuple(self):
 31 |         return self.u.copy(), self.s.copy(), self.v.copy()
 32 | 
 33 | 
 34 | # noinspection PyPep8Naming
 35 | class TCAM(TransformerMixin, BaseEstimator):
 36 |     """tsvdm based tensor component analysis (TCAM).
 37 |     Linear dimensionality reduction using tensor Singular Value Decomposition of the
 38 |     data to project it to a lower dimensional space. The input data is centered
 39 |     but not scaled for each feature before applying the tSVDM (using :mod:`mprod.MeanDeviationForm` ) .
 40 |     It uses the :mod:`mprod.decompositions.svdm` function as basis for the ``TSVDMII`` algorithm from Kilmer et. al.
 41 |     (https://doi.org/10.1073/pnas.2015851118) then offers a CP like transformations of the data accordingly.
 42 |     See https://arxiv.org/abs/2111.14159 for theoretical results and case studies, and the :ref:`Tutorials <TCAM>`
 43 |     for elaborated examples
 44 | 
 45 |     Parameters
 46 |     ----------
 47 |     n_components : int, float, default=None
 48 |         Number of components to keep.
 49 |         if n_components is not set all components are kept::
 50 | 
 51 |             n_components == min(m_samples, p_features) * n_reps - 1
 52 | 
 53 |         If ``0 < n_components < 1`` , select the number of components such that the
 54 |         amount of variance that needs to be explained is greater than the percentage specified
 55 |         by n_components. In case ``n_components >= 1`` is an integer then the estimated number
 56 |         of components will be::
 57 | 
 58 |             n_components_ == min(n_components, min(m_samples, p_features) * n_reps - 1)
 59 | 
 60 | 
 61 |     Attributes
 62 |     ----------
 63 |     n_components_ : int
 64 |         The estimated number of components. When n_components is set
 65 |         to a number between 0 and 1. this number is estimated from input data.
 66 |         Otherwise it equals the parameter n_components,
 67 |         or `min(m_samples, p_features) * n_reps -1` if n_components is None.
 68 | 
 69 |     explained_variance_ratio_ : ndarray of shape (`n_components_`,)
 70 |         The amount of variance explained by each of the selected components.
 71 | 
 72 |     mode2_loadings : ndarray (float) of shape (`n_components_`, `n_features` )
 73 |         A matrix representing the contribution (coefficient) of each feature in the orinial
 74 |         features space (2'nd mode of the tensor) to each of the TCAM factors.
 75 | 
 76 | 
 77 |     Methods
 78 |     -------
 79 |     fit:
 80 |         Compute the TCAM transformation for a given dataset
 81 |     transform:
 82 |         Transform a given dataset using a fitted TCAM
 83 |     fit_transform:
 84 |         Fit a TCAM to a dataset then return its TCAM transformation
 85 |     inverse_transform:
 86 |         Given points in the reduced TCAM space, compute the points pre-image in the original features space.
 87 | 
 88 | 
 89 |     """
 90 | 
 91 |     def __init__(self, fun_m: MatrixTensorProduct = None,
 92 |                  inv_m: MatrixTensorProduct = None,
 93 |                  n_components=None):
 94 |         assert (type(n_components) in _int_types and (n_components >= 1)) or \
 95 |                ((type(n_components) in _float_types) and (0 < n_components <= 1)) \
 96 |                or (n_components is None), f"`n_components` must be positive integer or a float between 0 and 1" \
 97 |                                           f" or `None`, got {n_components} of type {type(n_components)}"
 98 | 
 99 |         assert (fun_m is None) == (inv_m is None), "Only one of fun_m,inv_m is None. " \
100 |                                                    "Both must be defined (or both None)"
101 | 
102 |         self.n_components = n_components
103 | 
104 |         self.fun_m = fun_m
105 |         self.inv_m = inv_m
106 |         self._mdf = MeanDeviationForm()
107 | 
108 |     def _mprod(self, a, b) -> NumpynDArray:
109 |         return m_prod(a, b, self.fun_m, self.inv_m)
110 | 
111 |     def _fit(self, X: np.ndarray):
112 |         max_rank = self._n * min(self._m, self._p) - 1
113 | 
114 |         self._hat_svdm = TensorSVDResults(*svdm(X, self.fun_m, self.inv_m, hats=True))
115 | 
116 |         # get factors order
117 |         diagonals = self._hat_svdm.s.transpose().copy()
118 |         self._factors_order = np.unravel_index(np.argsort(- (diagonals ** 2), axis=None), diagonals.shape)
119 |         self._sorted_singular_vals = diagonals[self._factors_order]
120 |         self._total_variation = (self._sorted_singular_vals ** 2).sum()
121 |         self.explained_variance_ratio_ = ((self._sorted_singular_vals ** 2) / self._total_variation)
122 | 
123 |         # populate n_components if not given
124 |         if self.n_components is None:
125 |             self.n_components_ = max_rank
126 |         elif type(self.n_components) in _int_types and self.n_components > 0:
127 |             self.n_components_ = min(max_rank, self.n_components)
128 |         elif type(self.n_components) in _float_types and self.n_components == 1.:
129 |             self.n_components_ = max_rank
130 |         elif 0 < self.n_components < 1 and type(self.n_components) in _float_types:
131 |             var_cumsum = (self._sorted_singular_vals ** 2).cumsum()  # w in the paper
132 |             w_idx = np.arange(0, var_cumsum.size, dtype=int)  # w index
133 |             self.n_components_ = min(max_rank,
134 |                                      w_idx[(var_cumsum / self._total_variation) > self.n_components].min() + 1)
135 |         else:
136 |             raise ValueError("Unexpected edge case for the value of `n_components`")
137 | 
138 |         self.n_components_ = max(1, self.n_components_)
139 | 
140 |         self._n_factors_order = tuple([self._factors_order[0][:self.n_components_].copy(),
141 |                                        self._factors_order[1][:self.n_components_].copy()])
142 | 
143 |         self.explained_variance_ratio_ = self.explained_variance_ratio_[:self.n_components_]
144 |         self._rrho = np.array([0 for _ in range(self._n)])
145 |         for nn, rr in zip(*self._n_factors_order):
146 |             self._rrho[nn] = max(self._rrho[nn], rr + 1)
147 |         # self._rrho += 1
148 |         # populate truncations
149 |         # _tau = self._sorted_singular_vals[self.n_components_ + 1]
150 |         # self._rrho = (diagonals > _tau).sum(axis=1)
151 |         self._truncated_hat_svdm = TensorSVDResults(*self._hat_svdm.astuple())
152 | 
153 |         self._truncated_hat_svdm.u = self._truncated_hat_svdm.u[:, :self._rrho.max(), :]
154 |         self._truncated_hat_svdm.s = self._truncated_hat_svdm.s[:self._rrho.max(), :]
155 |         self._truncated_hat_svdm.v = self._truncated_hat_svdm.v[:, :self._rrho.max(), :]
156 | 
157 |         for i, rho_i in enumerate(self._rrho):
158 |             self._truncated_hat_svdm.u[:, rho_i:, i] = 0
159 |             self._truncated_hat_svdm.s[rho_i:, i] = 0
160 |             self._truncated_hat_svdm.v[:, rho_i:, i] = 0
161 | 
162 |         self._truncated_svdm = TensorSVDResults(self.inv_m(self._truncated_hat_svdm.u),
163 |                                                 self.inv_m(self._truncated_hat_svdm.s),
164 |                                                 self.inv_m(self._truncated_hat_svdm.v))
165 | 
166 |         self._truncS_pinv = self._truncated_svdm.s.copy()
167 |         self._truncS_pinv[(self._truncS_pinv ** 2) <= 1e-6] = 0
168 |         self._truncS_pinv[(self._truncS_pinv ** 2) > 1e-6] = 1 / self._truncS_pinv[(self._truncS_pinv ** 2) > 1e-6]
169 | 
170 |         return self
171 | 
172 |     # noinspection PyUnusedLocal
173 |     def fit(self, X, y=None, **fit_params):
174 |         """Fit the model with X.
175 | 
176 |         Parameters
177 |         ----------
178 |         X : array-like of shape (m_samples, p_features, n_modes)
179 |             Training data, where m_samples is the number of samples,
180 |             p_features is the number of features and n_modes is the
181 |             number of modes (timepoints/locations etc...)
182 | 
183 |         y : Ignored
184 |             Ignored.
185 | 
186 |         Returns
187 |         -------
188 |         self : object
189 |             Returns the instance itself.
190 | 
191 | 
192 |         Examples
193 |         --------
194 |         >>> from mprod.dimensionality_reduction import TCAM
195 |         >>> import numpy as np
196 |         >>> X = np.random.randn(10,20,4)
197 |         >>> tca = TCAM()
198 |         >>> mdf = tca.fit(X)
199 | 
200 | 
201 |         """
202 | 
203 |         assert len(X.shape) == 3, "X must be a 3'rd order tensor"
204 |         self._m, self._p, self._n = X.shape
205 | 
206 |         if self.fun_m is None:
207 |             self.fun_m, self.inv_m = _default_transform(self._n)
208 |         _X = self._mdf.fit_transform(X)
209 | 
210 |         return self._fit(_X)
211 | 
212 |     def _mode0_reduce(self, tU):
213 |         return np.concatenate(
214 |             [self._sorted_singular_vals[e] * tU[:, [fj], [fi]] for e, (fi, fj) in
215 |              enumerate(zip(*self._n_factors_order))],
216 |             axis=1)
217 | 
218 |     def _mode1_reduce(self, tV):
219 |         return np.concatenate(
220 |             [self._sorted_singular_vals[e] * tV[:, [fj], [fi]] for e, (fi, fj) in
221 |              enumerate(zip(*self._n_factors_order))],
222 |             axis=1)
223 | 
224 |     def _mode0_projector(self, X):
225 | 
226 |         trunc_U, trunc_S, trunc_V = self._truncated_hat_svdm.astuple()
227 |         # trunc_Spinv = _t_pinv_fdiag(trunc_S, self.fun_m, self.inv_m)
228 |         # XV = self._mprod(X, trunc_V)
229 |         # XVS = self._mprod(XV, trunc_Spinv)
230 |         # XVS_hat = self.fun_m(XVS)
231 | 
232 |         XV_hat = np.matmul(self.fun_m(X).transpose(2, 0, 1), trunc_V.transpose(2, 0, 1)).transpose(1, 2, 0)
233 |         Y = XV_hat[:, self._n_factors_order[1], self._n_factors_order[0]].copy()
234 | 
235 |         # XV_hat = np.matmul(self.fun_m(X).transpose(2, 0, 1), trunc_V.transpose(2, 0, 1))
236 |         # XVS_hat = XV_hat * _pinv_diag(trunc_S).transpose().reshape(self._n, 1, self._rrho.max())
237 |         # XVS_hat = XVS_hat.transpose(1, 2, 0)
238 |         # Y = XVS_hat[:, self._n_factors_order[1], self._n_factors_order[0]].copy()
239 | 
240 |         # X_transformed_0 = self._mprod(X, self._truncated_svdm.v)
241 |         # X_transformed_0 = self._mprod(X_transformed_0, self._truncS_pinv)
242 |         # X_transformed = self.fun_m(X_transformed_0)
243 |         return Y
244 | 
245 |     # def _mode1_projector(self, X):
246 |     #     truncU_mtranspose = tensor_mtranspose(self._truncated_svdm.u, self.fun_m, self.inv_m)
247 |     #     X_transformed_0 = self._mprod(truncU_mtranspose, X)
248 |     #     X_transformed_0 = tensor_mtranspose(self._mprod(self._truncS_pinv, X_transformed_0), self.fun_m, self.inv_m)
249 |     #     X_transformed = self.fun_m(X_transformed_0)
250 |     #     return self._mode1_reduce(X_transformed)
251 | 
252 |     def transform(self, X):
253 |         """Apply mode-1 dimensionality reduction to X.
254 | 
255 |         X is projected on the first mode-1 tensor components previously extracted
256 |         from a training set.
257 | 
258 |         Parameters
259 |         ----------
260 |         X : array-like of shape (m_samples, p_features, n_modes)
261 |             Training data, where m_samples is the number of samples,
262 |             p_features is the number of features and n_modes is the
263 |             number of modes (timepoints/locations etc...)
264 | 
265 |         Returns
266 |         -------
267 |         X_new : array-like of shape (m_samples, `n_components_`)
268 |             Projection of X in the first principal components, where m_samples
269 |             is the number of samples and n_components is the number of the components.
270 | 
271 |         """
272 |         _assert_order_and_mdim(X, 'X', 3, [(1, self._p), (2, self._n)])
273 |         return self._mode0_projector(self._mdf.transform(X))
274 | 
275 |     @property
276 |     def mode2_loadings(self):
277 |         """ The weights driving the variation in each of the obtained factors with respect to
278 |         each feature
279 |         """
280 | 
281 |         return self._truncated_hat_svdm.v[:,self._n_factors_order[1], self._n_factors_order[0]].copy()
282 | 
283 |     def fit_transform(self, X: np.ndarray, y=None, **fit_params):
284 | 
285 |         """Fit the model with X and apply the dimensionality reduction on X.
286 | 
287 |         Parameters
288 |         ----------
289 |         X : array-like of shape (m_samples, p_features, n_modes)
290 |             Training data, where m_samples is the number of samples,
291 |             p_features is the number of features and n_modes is the
292 |             number of modes (timepoints/locations etc...)
293 | 
294 |         y : Ignored
295 |             Ignored.
296 | 
297 |         Returns
298 |         -------
299 |         X_new : ndarray of shape (m_samples, `n_components_`)
300 |             Transformed values.
301 | 
302 |         """
303 | 
304 |         self.fit(X)
305 |         return self.transform(X)
306 | 
307 |     # noinspection PyPep8Naming
308 |     def inverse_transform(self, Y: NumpynDArray):
309 |         """
310 |         Inverts TCAM scores back to the original features space
311 | 
312 |         Parameters
313 |         ----------
314 |         Y: np.ndarray
315 |             2d array with shape (k, `n_components_`)
316 | 
317 |         Returns
318 |         -------
319 |         Y_inv: NumpynDArray
320 |             3rd order tensor that is the inverse transform of Y to the original features space
321 | 
322 |         """
323 | 
324 |         trunc_U, trunc_S, trunc_V = self._truncated_hat_svdm.astuple()
325 | 
326 |         # Suppose YY = X * V * pinv(S)
327 |         # and the matrix Y is an ordering of YYs columns according to the factors order
328 | 
329 |         YY_hat = np.zeros((Y.shape[0], self._rrho.max(), self._n))
330 |         YY_hat[:, self._n_factors_order[1], self._n_factors_order[0]] = Y.copy()
331 |         # YYS_hat = YY_hat.transpose(2, 0, 1) * trunc_S.transpose().reshape(self._n, 1, self._rrho.max())
332 |         X_hat = np.matmul(YY_hat.transpose(2, 0, 1), trunc_V.transpose(2, 1, 0)).transpose(1, 2, 0)
333 |         XX = self.inv_m(X_hat)
334 | 
335 |         # Note that
336 |         # YY*S*V' = X * V * pinv(S) * S * V'
337 |         #         = X * V * (JJ) * V'
338 |         #         = X * (V * JJ) * V'
339 |         #         = X * (VV) * V'
340 |         #         = X * (JJ) \approx X
341 |         #
342 |         # where JJ is "almost" the identity tensor
343 | 
344 | 
345 |         # #################################### OLD CODE #################################################
346 |         # YY_hat = np.zeros((trunc_U.shape[0], trunc_U.shape[1], trunc_U.shape[-1]))                    #
347 |         # YY_hat[:, self._n_factors_order[1], self._n_factors_order[0]] = Y.copy()                      #
348 |         # YY = self.inv_m(YY_hat)  # get YY from YY_hat                                                 #
349 |         # YYs = self._mprod(YY, trunc_S)  # YY*S                                                        #
350 |         # Yinv = self._mprod(YYs, tensor_mtranspose(trunc_V, self.fun_m, self.inv_m))  # YY*S*V'        #
351 |         # # return self._mdf.inverse_transform(Yinv)                                                    #
352 |         # ###############################################################################################
353 | 
354 |         return self._mdf.inverse_transform(XX)
355 | 
356 | 
357 | 


--------------------------------------------------------------------------------
/docs/examples/mprod_primer.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     ".. note::\n",
  8 |     "   The following content provides technical and mathematical background for the `mprod-package`. \n",
  9 |     "   Most users of downstream applications such as `TCAM` would probably like to skip this part\n",
 10 |     "\n",
 11 |     "$\\newcommand{\\mat}[1]{\\mathbf{#1}}$\n",
 12 |     "$\\newcommand{\\matM}{\\mat{M}}$\n",
 13 |     "$\\newcommand{\\matMt}{\\matM^{\\T}}$\n",
 14 |     "$\\newcommand{\\matMi}{\\matM^{-1}}$\n",
 15 |     "$\\newcommand{\\T}{\\mat{T}}$\n",
 16 |     "$\\newcommand{\\xx}{\\times}$\n",
 17 |     "$\\newcommand{\\mpn}{m \\xx p \\xx n}$\n",
 18 |     "$\\newcommand{\\pmn}{p \\xx m \\xx n}$\n",
 19 |     "$\\newcommand{\\tens}[1]{\\mathcal{#1}}$\n",
 20 |     "$\\newcommand{\\tA}{\\tens{A}}$\n",
 21 |     "$\\newcommand{\\tAt}{\\tA^{\\T}}$\n",
 22 |     "$\\newcommand{\\thA}{\\widehat{\\tA}}$\n",
 23 |     "$\\newcommand{\\thAt}{\\thA^{\\T}}$\n",
 24 |     "$\\newcommand{\\tC}{\\tens{C}}$\n",
 25 |     "$\\newcommand{\\tCt}{\\tC^{\\T}}$\n",
 26 |     "$\\newcommand{\\thC}{\\widehat{\\tC}}$\n",
 27 |     "$\\newcommand{\\thCt}{\\thC^{\\T}}$\n",
 28 |     "$\\newcommand{\\tB}{\\tens{B}}$\n",
 29 |     "$\\newcommand{\\tBt}{\\tB^{\\T}}$\n",
 30 |     "$\\newcommand{\\thB}{\\widehat{\\tB}}$\n",
 31 |     "$\\newcommand{\\thBt}{\\thB^{\\T}}$\n",
 32 |     "$\\newcommand{\\tsub}[1]{\\xx_{#1}}$\n",
 33 |     "$\\newcommand{\\tsM}{\\tsub{3}\\matM}$\n",
 34 |     "$\\newcommand{\\tsMinv}{\\tsub{3}\\matM^{-1}}$\n",
 35 |     "$\\newcommand{\\mm}{\\star_{\\scriptscriptstyle \\matM } }$\n",
 36 |     "$\\newcommand{\\RR}{\\mathbb{R}}$\n",
 37 |     "$\\newcommand{\\tI}{\\tens{I}}$\n",
 38 |     "$\\newcommand{\\thI}{\\widehat{\\tI}}$\n",
 39 |     "$\\newcommand{\\tE}{\\tens{E}}$\n",
 40 |     "$\\newcommand{\\tQ}{\\tens{Q}}$\n",
 41 |     "$\\newcommand{\\tQt}{\\tQ^{\\T}}$\n",
 42 |     "$\\newcommand{\\thQ}{\\widehat{\\tQ}}$\n",
 43 |     "$\\newcommand{\\thQt}{\\thQ^{\\T}}$\n",
 44 |     "$\\newcommand{\\tV}{\\tens{V}}$\n",
 45 |     "$\\newcommand{\\tVt}{\\tV^{\\T}}$\n",
 46 |     "$\\newcommand{\\thV}{\\widehat{\\tV}}$\n",
 47 |     "$\\newcommand{\\thVt}{\\thV^{\\T}}$\n",
 48 |     "$\\newcommand{\\tU}{\\tens{U}}$\n",
 49 |     "$\\newcommand{\\tUt}{\\tU^{\\T}}$\n",
 50 |     "$\\newcommand{\\thU}{\\widehat{\\tU}}$\n",
 51 |     "$\\newcommand{\\thUt}{\\thU^{\\T}}$\n",
 52 |     "$\\newcommand{\\tS}{\\tens{S}}$\n",
 53 |     "$\\newcommand{\\tSt}{\\tS^{\\T}}$\n",
 54 |     "$\\newcommand{\\thS}{\\widehat{\\tS}}$\n",
 55 |     "$\\newcommand{\\thSt}{\\thS^{\\T}}$\n",
 56 |     "$\\newcommand{\\hsigma}{\\hat{\\sigma}}$\n",
 57 |     "$\\newcommand{\\rnk}{\\operatorname{rank}}$\n",
 58 |     "$\\newcommand{\\rrho}{\\boldsymbol{\\rho}}$\n",
 59 |     "$\\newcommand{\\TNorm}[1]{\\|#1\\|_{2}}$\n",
 60 |     "$\\newcommand{\\FNorm}[1]{\\|#1\\|_{F}}$\n",
 61 |     "$\\newcommand{\\NNorm}[1]{\\|#1\\|_{*}}$\n",
 62 |     "$\\newcommand{\\FNormS}[1]{\\FNorm{#1}^2}$\n",
 63 |     "$\\newcommand{\\TNormS}[1]{\\TNorm{#1}^2}$"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "The main functionality of ``mprod-package`` is factorization of tensors, that is, expressing a tensor $\\tA \\in \\RR^{d_1 \\xx ... \\xx d_N}$ as a product of other, \"simpler\" tensors. \n",
 71 |     "For this aim, one must first obtain some notion of tensor-tensor multiplication.\n",
 72 |     "The \"M-product\" (denoted by $\\mm$ ), defined in <cite data-footcite=\"Kilmer\">Kilmer et al.</cite>,  refers to a \"family\" of tensor-tensor products, and provides the notion of multiplication which enables the factorization of tensors. \n",
 73 |     "Here, we briefly walk through the steps of $\\mm$-product's formal construction. "
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "markdown",
 78 |    "metadata": {},
 79 |    "source": [
 80 |     "# The M-product\n",
 81 |     "\n",
 82 |     "We begin with some definitions. <br>\n",
 83 |     "Let $\\matM$ be an $n\\xx n$ unitary matrix ($\\matM \\matMt = \\mat{I}_n = \\matMt \\matM$), and a tensor $\\tA \\in \\RR^{\\mpn}$. \n",
 84 |     "We define the **domain transform** specified by $\\matM$ as $\\thA := \\tA \\tsM$, where $\\tsM$ denotes the tensor-matrix multiplication of applying $\\matM$ to each of the tensor $n$ dimensional tube fibers ($\\tA_{i,j,:}$).\n",
 85 |     "\n",
 86 |     "A practical demonstration using `scipy` and `numpy` libraries:"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 2,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "import numpy as np\n",
 96 |     "from scipy.stats import ortho_group # used for sampling random unitary matrices \n",
 97 |     "                                    # from the Haar distribution\n",
 98 |     "\n",
 99 |     "m, p, n = 10, 5, 8\n",
100 |     "\n",
101 |     "A = np.random.randn(m, p, n) # generate a random tensor\n",
102 |     "M = ortho_group.rvs(n)       # random sample unitary M\n",
103 |     "\n",
104 |     "A_hat = np.zeros_like(A)\n",
105 |     "for i in range(m):\n",
106 |     "    for j in range(p):\n",
107 |     "        A_hat[i,j,:] = M @ A[i,j,:]"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {},
113 |    "source": [
114 |     ".. attention::\n",
115 |     "   The tensor-matrix product implementation is much more efficient than the above for loop\n",
116 |     "\n",
117 |     "\n",
118 |     "\n",
119 |     "The **transpose** of a real $\\mpn$ tensor $\\tA$ with respect to $\\matM$, denoted by $\\tA^{\\T}$, is a $\\pmn$ tensor for which \n",
120 |     "$$[\\widehat{\\tA^{\\T}}]_{:,:,i} = [\\thA^{\\T}]_{:,:,i} = {[\\thA]_{:,:,i}}^{\\T}$$\n",
121 |     "\n",
122 |     "Given two tensors $\\tA \\in \\RR^{\\mpn}$ and  $\\tB \\in \\RR^{p \\xx r \\xx n}$ , the facewise tensor-tensor product of $\\tA$ and $\\tB$, denoted by $\\tA \\vartriangle \\tB$ ,  is the $m \\xx r \\xx n$ tensor for which \n",
123 |     "$$[\\tA \\vartriangle \\tB]_{:,:,i} = \\tA_{:,:,i} \\tB_{:,:,i}$$ \n",
124 |     "\n",
125 |     "The $\\mm$ **-product** of $\\tA \\in \\RR^{\\mpn}$ and  $\\tB \\in \\RR^{p \\xx r \\xx n}$ is defined by \n",
126 |     "$$\\tA \\mm \\tB := (\\thA \\vartriangle \\thB) \\tsMinv \\in \\RR^{m \\xx r \\xx n}$$ \n",
127 |     "\n",
128 |     "\n",
129 |     "The `mprod-package` offers utility functions like `m_prod` implementing $\\mm$ as well as random and spectral analysis based generators of unitary transforms"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 3,
135 |    "metadata": {},
136 |    "outputs": [
137 |     {
138 |      "name": "stdout",
139 |      "output_type": "stream",
140 |      "text": [
141 |       "129.30020497750468\n"
142 |      ]
143 |     }
144 |    ],
145 |    "source": [
146 |     "from mprod import  m_prod\n",
147 |     "from mprod import  generate_haar, generate_dct\n",
148 |     "\n",
149 |     "funm_haar, invm_haar = generate_haar(n) # Utility wrapper arround \n",
150 |     "                                        #  scipy.stats.ortho_group \n",
151 |     "funm_dct, invm_dct = generate_dct(n)    # Generates dct and idct transforms using scipy's\n",
152 |     "                                        #  fft module. the default dct type is 2\n",
153 |     "\n",
154 |     "# generate random tensor B    \n",
155 |     "r = 15\n",
156 |     "B = np.random.randn(p,r,n)\n",
157 |     "\n",
158 |     "# Multiply A and B with respect to a randomly sampled M\n",
159 |     "C_haar = m_prod(A,B,funm_haar, invm_haar)\n",
160 |     "\n",
161 |     "# Multiply A and B with respect to M = dct\n",
162 |     "C_dct = m_prod(A,B,funm_dct, invm_dct)\n",
163 |     "\n",
164 |     "print(np.linalg.norm(C_haar - C_dct))"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "markdown",
169 |    "metadata": {},
170 |    "source": [
171 |     "As shown above, given two distinct transforms ${\\matM}_1, {\\matM}_2$ , we have that $\\tA \\star_{\\scriptstyle \\matM_1} \\tB$ and $\\tA \\star_{\\scriptstyle \\matM_2} \\tB$ are not equal in general.\n",
172 |     "This fact, as we will see, provides high flexibility when applying $\\mm$ based dimensionality reduction schemes.\n",
173 |     "\n",
174 |     "Two tensors $\\tA, \\tB \\in \\RR^{1 \\xx m \\xx n}$ are called $\\mm$ **-orthogonal slices** if $\\tA^{\\T} \\mm \\tB = \\mathbf{0}$,  where $\\mathbf{0} \\in \\RR^{1\\xx 1 \\xx n}$ is the zero tube fiber, while $\\tQ \\in \\RR^{m \\xx m \\xx n}$ is called $\\mm$ **-unitary** if $\\tQ^{\\T} \\mm \\tQ = \\tI = \\tQ \\mm \\tQ^{\\T}$ .\n",
175 |     "<br>\n",
176 |     "A tensor $\\tB \\in \\RR^{p \\xx k \\xx n}$ is said to be a pseudo $\\mm$ -unitary tensor (or  pseudo $\\mm$-orthogonal) if $\\tB^{\\T} \\mm \\tB$ is f-diagonal (i.e., all frontal slices are diagonal), and all frontal slices of $(\\tB^{\\T} \\mm \\tB) \\tsM$ are diagonal matrices with entries that are either ones or zeros.\n",
177 |     "\n",
178 |     "\n",
179 |     "# TSVDM\n",
180 |     "\n",
181 |     "Let $\\tA \\in \\RR^{\\mpn}$  be a real tensor, then is possible to write the  full **tubal singular value decomposition** of $\\tA$  as \n",
182 |     "$$\\tA = \\tU \\mm \\tS \\mm \\tV^{\\T}$$ \n",
183 |     "\n",
184 |     "where $\\tU, \\tV$ are $(m \\xx m \\xx n)$ and $(p \\xx p \\xx n)$ , $\\mm$-unitary tensors respectively, and $\\tS \\in \\RR^{\\mpn}$ is an **f-diagonal** tensor, that is, a tensor whose frontal slices ( $\\tS_{:,:,i}$ ) are matrices with zeros outside their main diagonal.<br>\n",
185 |     "\n",
186 |     "We use the notation $\\hsigma_{j}^{(i)}$ do denote the $j^{th}$ largest singular value on the $i^{th}$ lateral face of $\\thS$: \n",
187 |     "$$\\hsigma_{j}^{(i)} := \\thS_{j,j,i}$$\n"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": 4,
193 |    "metadata": {},
194 |    "outputs": [
195 |     {
196 |      "name": "stdout",
197 |      "output_type": "stream",
198 |      "text": [
199 |       "U: 10x5x8\n",
200 |       "S: 5x8\n",
201 |       "V: 5x5x8 \n",
202 |       "\n",
203 |       "||A - USV'||^2 = 5.159366909775574e-27\n"
204 |      ]
205 |     }
206 |    ],
207 |    "source": [
208 |     "from mprod.decompositions import svdm\n",
209 |     "from mprod import tensor_mtranspose\n",
210 |     "\n",
211 |     "U,S,V = svdm(A, funm_haar, invm_haar)\n",
212 |     "\n",
213 |     "print(\"U:\", \"x\".join(map(str, U.shape)))\n",
214 |     "print(\"S:\", \"x\".join(map(str, S.shape)))\n",
215 |     "print(\"V:\", \"x\".join(map(str, V.shape)),\"\\n\")\n",
216 |     "\n",
217 |     "# Note that for practical reasons, S is stored in a lean datastructure\n",
218 |     "# To obtain the \"tensorial\" representation of S, we do as follows\n",
219 |     "tens_S = np.zeros((p,p,n))\n",
220 |     "for i in range(n):\n",
221 |     "    tens_S[:S.shape[0],:S.shape[0],i] = np.diag(S[:,i])\n",
222 |     "\n",
223 |     "\n",
224 |     "# reconstruct the tensor\n",
225 |     "Vt = tensor_mtranspose(V,funm_haar, invm_haar)\n",
226 |     "US = m_prod(U, tens_S, funm_haar, invm_haar)\n",
227 |     "USVt = m_prod(US, Vt, funm_haar, invm_haar)\n",
228 |     "\n",
229 |     "print(\"||A - USV'||^2 =\",np.linalg.norm(A - USVt)**2) # practically 0"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "markdown",
234 |    "metadata": {},
235 |    "source": [
236 |     "# Tensor ranks and truncations\n",
237 |     "\n",
238 |     "* The **t-rank** of $\\tA$ is the number of nonezero tubes of $\\tS$: \n",
239 |     "$$\n",
240 |     "r = | \\left\\{ i = 1, \\dots, n ~;~ \\FNormS{\\tS_{i,i,:}} > 0 \\right\\} |\n",
241 |     "$$\n",
242 |     "\n",
243 |     "$\\tA^{(q)} = \\tU_{:,1:q, :} \\mm \\tS_{1:q,1:q,:} \\mm {\\tV_{:,1:q,:}}^{\\T}$ denotes the t-rank $q$ truncation of $\\tA$ under $\\mm$\n",
244 |     "    \n",
245 |     "* The **multi-rank** of $\\tA$ under $\\mm$, denoted by the vector $\\rrho \\in \\mathbb{N}^{n}$ whose $i^{th}$ entry is \n",
246 |     "$$\n",
247 |     "\\rrho_i = \\rnk (\\thA_{:,:,i})\n",
248 |     "$$\n",
249 |     "\n",
250 |     "The multi-rank $\\rrho$ truncation of $\\tA$ under $\\mm$ is given by the tensor $\\tA_{\\rrho}$ for which \n",
251 |     "$$\n",
252 |     "\\widehat{\\tA_{\\rrho}}_{:,:,i} = \\thU_{:,1:\\rrho_i, i}  \\thS_{1:\\rrho_i,1:\\rrho_i,i}  {\\thV_{:,1:\\rrho_i,i}}^{\\T}\n",
253 |     "$$ \n",
254 |     "\n",
255 |     "* The **implicit rank** under $\\mm$ of a tensor $\\tA$ with multi-rank $\\rrho$ under $\\mm$ is \n",
256 |     "$$\n",
257 |     "r = \\sum_{i=1}^{n} \\rrho_i\n",
258 |     "$$\n",
259 |     "\n",
260 |     "Note that for t-rank truncation the $\\tU$ and $\\tV$ factors are $\\mm$-orthogonal, while for multi-rank truncation they are only pseudo $\\mm$-orthogonal."
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "code",
265 |    "execution_count": 5,
266 |    "metadata": {},
267 |    "outputs": [],
268 |    "source": [
269 |     "# t-rank 4 trunctation \n",
270 |     "q = 4\n",
271 |     "tens_S_t_hat = funm_haar(tens_S.copy())\n",
272 |     "tens_S_t_hat[q:,q:,:] = 0\n",
273 |     "tens_S_t = invm_haar(tens_S_t_hat)\n",
274 |     "A4 = m_prod(m_prod(U, tens_S_t, funm_haar, invm_haar), Vt, funm_haar, invm_haar)\n",
275 |     "\n",
276 |     "\n",
277 |     "# multi-rank rho trunctation \n",
278 |     "rho = [1,3,2,2,3,1,4,3] # this is the multi-rank vector\n",
279 |     "tens_S_rho_hat = funm_haar(tens_S.copy())\n",
280 |     "for i in range(n):\n",
281 |     "    tens_S_rho_hat[rho[i]:,rho[i]:,i] = 0\n",
282 |     "\n",
283 |     "tens_S_rho = invm_haar(tens_S_rho_hat)\n",
284 |     "A_rho = m_prod(m_prod(U, tens_S_rho, funm_haar, invm_haar), Vt, funm_haar, invm_haar)"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "markdown",
289 |    "metadata": {},
290 |    "source": [
291 |     "\n",
292 |     "Let $\\tA = \\tU \\mm \\tS \\mm \\tV^{\\T} \\in \\RR^{\\mpn}$, \n",
293 |     "we will use $j_1,\\dots, j_{np}$ and $i_1,\\dots, i_{np}$ to denote the indexes of the non-zeros of  $\\thS$ ordered in decreasing order. That is \n",
294 |     "$$\\hsigma_{\\ell} := \\hsigma_{j_{\\ell}}^{(i_{\\ell})}$$\n",
295 |     "\n",
296 |     "where $\\hsigma_1 \\geq \\hsigma_2 \\geq \\dots \\geq \\hsigma_{np}$ .\n",
297 |     "\n",
298 |     "For $q = 1 , \\dots , p n$ , the **explicit rank-** $q$ **truncation** under $\\mm$  of a tensor $\\tA$, denoted by $\\tA_q = \\tA_{\\rrho}$ , where $\\tA_{\\rrho}$ is the tensor of multi-rank $\\rrho$ under $\\mm$ such that \n",
299 |     "$$\\rrho_i = \\max \\{ j = 1, \\dots ,p ~|~ (j,i) \\in \\{(j_1, j_1), \\dots, (j_q, i_q)\\} \\} .$$ \n",
300 |     "\n",
301 |     "In words, we keep the $q$ top singular values of any frontal slice of $\\thS$, and zero out the rest. \n",
302 |     "\n",
303 |     "\n",
304 |     "\n"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "markdown",
309 |    "metadata": {},
310 |    "source": [
311 |     ".. note::\n",
312 |     "   We have that $\\tA^{(q)}, \\tA_{\\rrho}$ and $\\tA_{q}$ are the best t-rank $q$, multi-rank $\\rrho$ and explicit-rank $q$ (under $\\mm$) approximations of $\\tA$, respectively.\n",
313 |     "\n",
314 |     "\n",
315 |     "\n",
316 |     "\n",
317 |     "# The effect of choosing different transforms \n",
318 |     "\n",
319 |     "To demonstrate how might the choice of $\\matM$ influence the resulting decomposition, we use the real-world time-series dataset obtained from a study on Pediatric Ulcerative Colitis (PUC) by <cite data-footcite=\"Schirmer2018\">Schirmer et al.</cite>.\n",
320 |     "\n",
321 |     "First, we obtain the data table from our analysis GitHub repo, construct a tensor from the data and apply TSVDM with respect to both randomly sampled $\\matM$ and the DCT.\n",
322 |     "\n",
323 |     "Note that in `generate_haar` function call, we set the `random_state` parameter to an integer (123) just so that the results are reproducible."
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "code",
328 |    "execution_count": 57,
329 |    "metadata": {},
330 |    "outputs": [
331 |     {
332 |      "name": "stdout",
333 |      "output_type": "stream",
334 |      "text": [
335 |       "shape of S, by randomly sampled transform: (87, 4)\n",
336 |       "shape of S, by DCT: (4, 4)\n"
337 |      ]
338 |     }
339 |    ],
340 |    "source": [
341 |     "import pandas as pd\n",
342 |     "from mprod import table2tensor\n",
343 |     "\n",
344 |     "file_path = \"https://raw.githubusercontent.com/UriaMorP/\" \\\n",
345 |     "            \"tcam_analysis_notebooks/main/Schirmer2018/Schirmer2018.tsv\"\n",
346 |     "\n",
347 |     "data_raw = pd.read_csv(file_path, index_col=[0,1], sep=\"\\t\"\n",
348 |     "                       , dtype={'Week':int})\n",
349 |     "\n",
350 |     "data_tensor, map1, map3 =  table2tensor(data_raw)\n",
351 |     "\n",
352 |     "m,p,n = data_tensor.shape\n",
353 |     "\n",
354 |     "# Generate transforms according to the \n",
355 |     "# relevant dimensions\n",
356 |     "funm_haar, invm_haar = generate_haar(n,random_state=123)\n",
357 |     "funm_dct, invm_dct = generate_dct(n)\n",
358 |     "\n",
359 |     "\n",
360 |     "# Haar\n",
361 |     "Uhaar, Shaar, Vhaar = svdm(data_tensor, funm_haar, invm_haar)\n",
362 |     "print(\"shape of S, by randomly sampled transform:\", Shaar.shape)\n",
363 |     "# DCT\n",
364 |     "Udct, Sdct, Vdct = svdm(data_tensor, funm_dct, invm_dct)\n",
365 |     "print(\"shape of S, by DCT:\", Sdct.shape)\n"
366 |    ]
367 |   },
368 |   {
369 |    "cell_type": "markdown",
370 |    "metadata": {},
371 |    "source": [
372 |     "In this case, we have that the t-rank of our data under the DCT domain transform is 4, and 87 under $\\mm$ where $\\matM$ is obtained from randomly sampling the Haar distribution. \n",
373 |     "\n",
374 |     "Even though it is not generally true that choosing $\\matM$ as DCT (the t-product) results in better compression, the fact that it does so for time-series data makes perfect sense; Since we assume that time-series data are samples of continuous functions, which, are easy to approximate well using very few DCT basis elements."
375 |    ]
376 |   },
377 |   {
378 |    "cell_type": "raw",
379 |    "metadata": {
380 |     "raw_mimetype": "text/restructuredtext"
381 |    },
382 |    "source": [
383 |     ".. footbibliography::"
384 |    ]
385 |   }
386 |  ],
387 |  "metadata": {
388 |   "celltoolbar": "Raw Cell Format",
389 |   "kernelspec": {
390 |    "display_name": "mprod",
391 |    "language": "python",
392 |    "name": "mprod"
393 |   },
394 |   "language_info": {
395 |    "codemirror_mode": {
396 |     "name": "ipython",
397 |     "version": 3
398 |    },
399 |    "file_extension": ".py",
400 |    "mimetype": "text/x-python",
401 |    "name": "python",
402 |    "nbconvert_exporter": "python",
403 |    "pygments_lexer": "ipython3",
404 |    "version": "3.6.8"
405 |   },
406 |   "toc": {
407 |    "base_numbering": 1,
408 |    "nav_menu": {},
409 |    "number_sections": true,
410 |    "sideBar": true,
411 |    "skip_h1_title": false,
412 |    "title_cell": "Table of Contents",
413 |    "title_sidebar": "Contents",
414 |    "toc_cell": false,
415 |    "toc_position": {},
416 |    "toc_section_display": true,
417 |    "toc_window_display": false
418 |   }
419 |  },
420 |  "nbformat": 4,
421 |  "nbformat_minor": 4
422 | }
423 | 


--------------------------------------------------------------------------------
/runtests.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | runtests.py [OPTIONS] [-- ARGS]
  4 | 
  5 | Run tests, building the project first.
  6 | 
  7 | Examples::
  8 | 
  9 |     $ python runtests.py
 10 |     $ python runtests.py -s {SAMPLE_SUBMODULE}
 11 |     $ # Run a standalone test function:
 12 |     $ python runtests.py -t {SAMPLE_TEST}
 13 |     $ # Run a test defined as a method of a TestXXX class:
 14 |     $ python runtests.py --ipython
 15 |     $ python runtests.py --python somescript.py
 16 |     $ python runtests.py --bench
 17 |     $ python runtests.py --durations 20
 18 | 
 19 | Run a debugger:
 20 | 
 21 |     $ gdb --args python runtests.py [...other args...]
 22 | 
 23 | Disable pytest capturing of output by using its '-s' option:
 24 | 
 25 |     $ python runtests.py -- -s
 26 | 
 27 | Generate C code coverage listing under build/lcov/:
 28 | (requires http://ltp.sourceforge.net/coverage/lcov.php)
 29 | 
 30 |     $ python runtests.py --gcov [...other args...]
 31 |     $ python runtests.py --lcov-html
 32 | 
 33 | Run lint checks.
 34 | Provide target branch name or `uncommitted` to check before committing:
 35 | 
 36 |     $ python runtests.py --lint main
 37 |     $ python runtests.py --lint uncommitted
 38 | 
 39 | """
 40 | #
 41 | # This is a generic test runner script for projects using NumPy's test
 42 | # framework. Change the following values to adapt to your project:
 43 | #
 44 | import numpy.lib
 45 | 
 46 | PROJECT_MODULE = "mprod"
 47 | PROJECT_ROOT_FILES = ['mprod', 'setup.py']
 48 | SAMPLE_TEST = "mprod/decompositions/tests/test_decompositions.py::test_tqrm"
 49 | SAMPLE_SUBMODULE = "decompositions"
 50 | 
 51 | # EXTRA_PATH = ['/home/labs/elinav/uria/lib/ccache', '/home/labs/elinav/uria/lib/f90cache',
 52 | #               '/home/labs/elinav/uria/local/lib/ccache', '/home/labs/elinav/uria/local/lib/f90cache']
 53 | 
 54 | EXTRA_PATH = ['~/lib/ccache', '~/lib/f90cache',
 55 |               '~/local/lib/ccache', '~/local/lib/f90cache']
 56 | 
 57 | 
 58 | # ---------------------------------------------------------------------
 59 | 
 60 | 
 61 | if __doc__ is None:
 62 |     __doc__ = "Run without -OO if you want usage info"
 63 | else:
 64 |     __doc__ = __doc__.format(**globals())
 65 | 
 66 | import sys
 67 | import os, glob
 68 | 
 69 | # In case we are run from the source directory, we don't want to import the
 70 | # project from there:
 71 | sys.path.pop(0)
 72 | 
 73 | import shutil
 74 | import subprocess
 75 | import time
 76 | from argparse import ArgumentParser, REMAINDER
 77 | 
 78 | ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__)))
 79 | 
 80 | 
 81 | def main(argv):
 82 |     parser = ArgumentParser(usage=__doc__.lstrip())
 83 |     parser.add_argument("--verbose", "-v", action="count", default=1,
 84 |                         help="Add one verbosity level to pytest. Default is 0")
 85 |     parser.add_argument("--debug-info", action="store_true",
 86 |                         help=("Add --verbose-cfg to build_src to show "
 87 |                               "compiler configuration output while creating "
 88 |                               "_numpyconfig.h and config.h"))
 89 |     parser.add_argument("--no-build", "-n", action="store_true", default=False,
 90 |                         help="Do not build the project (use system installed "
 91 |                              "version)")
 92 |     parser.add_argument("--build-only", "-b", action="store_true",
 93 |                         default=False, help="Just build, do not run any tests")
 94 |     parser.add_argument("--doctests", action="store_true", default=False,
 95 |                         help="Run doctests in module")
 96 |     parser.add_argument("--refguide-check", action="store_true", default=False,
 97 |                         help="Run refguide (doctest) check (do not run "
 98 |                              "regular tests.)")
 99 |     parser.add_argument("--coverage", action="store_true", default=False,
100 |                         help=("Report coverage of project code. HTML output "
101 |                               "goes under build/coverage"))
102 |     parser.add_argument("--lint", default=None,
103 |                         help="'<Target Branch>' or 'uncommitted', passed to "
104 |                              "tools/linter.py [--branch BRANCH] "
105 |                              "[--uncommitted]")
106 |     parser.add_argument("--durations", action="store", default=-1, type=int,
107 |                         help=("Time N slowest tests, time all if 0, time none "
108 |                               "if < 0"))
109 |     parser.add_argument("--gcov", action="store_true", default=False,
110 |                         help=("Enable C code coverage via gcov (requires "
111 |                               "GCC). gcov output goes to build/**/*.gc*"))
112 |     parser.add_argument("--lcov-html", action="store_true", default=False,
113 |                         help=("Produce HTML for C code coverage information "
114 |                               "from a previous run with --gcov. "
115 |                               "HTML output goes to build/lcov/"))
116 |     parser.add_argument("--mode", "-m", default="fast",
117 |                         help="'fast', 'full', or something that could be "
118 |                              "passed to nosetests -A [default: fast]")
119 |     parser.add_argument("--submodule", "-s", default=None,
120 |                         help="Submodule whose tests to run (cluster, "
121 |                              "constants, ...)")
122 |     parser.add_argument("--pythonpath", "-p", default=None,
123 |                         help="Paths to prepend to PYTHONPATH")
124 |     parser.add_argument("--tests", "-t", action='append',
125 |                         help="Specify tests to run")
126 |     parser.add_argument("--python", action="store_true",
127 |                         help="Start a Python shell with PYTHONPATH set")
128 |     parser.add_argument("--ipython", "-i", action="store_true",
129 |                         help="Start IPython shell with PYTHONPATH set")
130 |     parser.add_argument("--shell", action="store_true",
131 |                         help="Start Unix shell with PYTHONPATH set")
132 |     parser.add_argument("--mypy", action="store_true",
133 |                         help="Run mypy on files with NumPy on the MYPYPATH")
134 |     parser.add_argument("--debug", "-g", action="store_true",
135 |                         help="Debug build")
136 |     parser.add_argument("--parallel", "-j", type=int, default=0,
137 |                         help="Number of parallel jobs during build")
138 |     parser.add_argument("--warn-error", action="store_true",
139 |                         help="Set -Werror to convert all compiler warnings to "
140 |                              "errors")
141 |     parser.add_argument("--cpu-baseline", default=None,
142 |                         help="Specify a list of enabled baseline CPU "
143 |                              "optimizations"),
144 |     parser.add_argument("--cpu-dispatch", default=None,
145 |                         help="Specify a list of dispatched CPU optimizations"),
146 |     parser.add_argument("--disable-optimization", action="store_true",
147 |                         help="Disable CPU optimized code (dispatch, simd, "
148 |                              "fast, ...)"),
149 |     parser.add_argument("--simd-test", default=None,
150 |                         help="Specify a list of CPU optimizations to be "
151 |                              "tested against NumPy SIMD interface"),
152 |     parser.add_argument("--show-build-log", action="store_true",
153 |                         help="Show build output rather than using a log file")
154 |     parser.add_argument("--bench", action="store_true",
155 |                         help="Run benchmark suite instead of test suite")
156 |     parser.add_argument("--bench-compare", action="store", metavar="COMMIT",
157 |                         help=("Compare benchmark results of current HEAD to "
158 |                               "BEFORE. Use an additional "
159 |                               "--bench-compare=COMMIT to override HEAD with "
160 |                               "COMMIT. Note that you need to commit your "
161 |                               "changes first!"))
162 |     parser.add_argument("args", metavar="ARGS", default=[], nargs=REMAINDER,
163 |                         help="Arguments to pass to pytest, asv, mypy, Python "
164 |                              "or shell")
165 |     args = parser.parse_args(argv)
166 | 
167 |     if args.durations < 0:
168 |         args.durations = -1
169 | 
170 |     if args.bench_compare:
171 |         args.bench = True
172 |         args.no_build = True  # ASV does the building
173 | 
174 |     if args.lcov_html:
175 |         # generate C code coverage output
176 |         lcov_generate()
177 |         sys.exit(0)
178 | 
179 |     if args.pythonpath:
180 |         for p in reversed(args.pythonpath.split(os.pathsep)):
181 |             sys.path.insert(0, p)
182 | 
183 |     if args.gcov:
184 |         gcov_reset_counters()
185 | 
186 |     if args.debug and args.bench:
187 |         print("*** Benchmarks should not be run against debug "
188 |               "version; remove -g flag ***")
189 | 
190 |     if args.lint:
191 |         check_lint(args.lint)
192 | 
193 |     if not args.no_build:
194 |         # we need the noarch path in case the package is pure python.
195 |         site_dir, site_dir_noarch = build_project(args)
196 |         sys.path.insert(0, site_dir)
197 |         sys.path.insert(0, site_dir_noarch)
198 |         os.environ['PYTHONPATH'] = site_dir + os.pathsep + site_dir_noarch
199 |     else:
200 |         _temp = __import__(PROJECT_MODULE)
201 |         site_dir = os.path.sep.join(_temp.__file__.split(os.path.sep)[:-2])
202 | 
203 |     extra_argv = args.args[:]
204 |     if not args.bench:
205 |         # extra_argv may also lists selected benchmarks
206 |         if extra_argv and extra_argv[0] == '--':
207 |             extra_argv = extra_argv[1:]
208 | 
209 |     if args.python:
210 |         # Debugging issues with warnings is much easier if you can see them
211 |         print("Enabling display of all warnings")
212 |         import warnings
213 |         import types
214 | 
215 |         warnings.filterwarnings("always")
216 |         if extra_argv:
217 |             # Don't use subprocess, since we don't want to include the
218 |             # current path in PYTHONPATH.
219 |             sys.argv = extra_argv
220 |             with open(extra_argv[0], 'r') as f:
221 |                 script = f.read()
222 |             sys.modules['__main__'] = types.ModuleType('__main__')
223 |             ns = dict(__name__='__main__',
224 |                       __file__=extra_argv[0])
225 |             exec(script, ns)
226 |             sys.exit(0)
227 |         else:
228 |             import code
229 |             code.interact()
230 |             sys.exit(0)
231 | 
232 |     if args.ipython:
233 |         # Debugging issues with warnings is much easier if you can see them
234 |         print("Enabling display of all warnings and pre-importing numpy as np")
235 |         import warnings;
236 |         warnings.filterwarnings("always")
237 |         import IPython
238 |         import numpy as np
239 |         IPython.embed(colors='neutral', user_ns={"np": np})
240 |         sys.exit(0)
241 | 
242 |     if args.shell:
243 |         shell = os.environ.get('SHELL', 'cmd' if os.name == 'nt' else 'sh')
244 |         print("Spawning a shell ({})...".format(shell))
245 |         subprocess.call([shell] + extra_argv)
246 |         sys.exit(0)
247 | 
248 |     if args.mypy:
249 |         try:
250 |             import mypy.api
251 |         except ImportError:
252 |             raise RuntimeError(
253 |                 "Mypy not found. Please install it by running "
254 |                 "pip install -r test_requirements.txt from the repo root"
255 |             )
256 | 
257 |         os.environ['MYPYPATH'] = site_dir
258 |         # By default mypy won't color the output since it isn't being
259 |         # invoked from a tty.
260 |         os.environ['MYPY_FORCE_COLOR'] = '1'
261 | 
262 |         config = os.path.join(
263 |             site_dir,
264 |             "numpy",
265 |             "typing",
266 |             "tests",
267 |             "data",
268 |             "mypy.ini",
269 |         )
270 | 
271 |         report, errors, status = mypy.api.run(
272 |             ['--config-file', config] + args.args
273 |         )
274 |         print(report, end='')
275 |         print(errors, end='', file=sys.stderr)
276 |         sys.exit(status)
277 | 
278 |     if args.coverage:
279 |         dst_dir = os.path.join(ROOT_DIR, 'build', 'coverage')
280 |         fn = os.path.join(dst_dir, 'coverage_html.js')
281 |         if os.path.isdir(dst_dir) and os.path.isfile(fn):
282 |             shutil.rmtree(dst_dir)
283 |         extra_argv += ['--cov-report=html:' + dst_dir]
284 | 
285 |     if args.refguide_check:
286 |         cmd = [os.path.join(ROOT_DIR, 'tools', 'refguide_check.py'),
287 |                '--doctests']
288 |         if args.submodule:
289 |             cmd += [args.submodule]
290 |         os.execv(sys.executable, [sys.executable] + cmd)
291 |         sys.exit(0)
292 | 
293 |     if args.bench:
294 |         # Run ASV
295 |         for i, v in enumerate(extra_argv):
296 |             if v.startswith("--"):
297 |                 items = extra_argv[:i]
298 |                 if v == "--":
299 |                     i += 1  # skip '--' indicating further are passed on.
300 |                 bench_args = extra_argv[i:]
301 |                 break
302 |         else:
303 |             items = extra_argv
304 |             bench_args = []
305 | 
306 |         if args.tests:
307 |             items += args.tests
308 |         if args.submodule:
309 |             items += [args.submodule]
310 |         for a in items:
311 |             bench_args.extend(['--bench', a])
312 | 
313 |         if not args.bench_compare:
314 |             cmd = ['asv', 'run', '-n', '-e', '--python=same'] + bench_args
315 |             ret = subprocess.call(cmd, cwd=os.path.join(ROOT_DIR, 'benchmarks'))
316 |             sys.exit(ret)
317 |         else:
318 |             commits = [x.strip() for x in args.bench_compare.split(',')]
319 |             if len(commits) == 1:
320 |                 commit_a = commits[0]
321 |                 commit_b = 'HEAD'
322 |             elif len(commits) == 2:
323 |                 commit_a, commit_b = commits
324 |             else:
325 |                 p.error("Too many commits to compare benchmarks for")
326 | 
327 |             # Check for uncommitted files
328 |             if commit_b == 'HEAD':
329 |                 r1 = subprocess.call(['git', 'diff-index', '--quiet',
330 |                                       '--cached', 'HEAD'])
331 |                 r2 = subprocess.call(['git', 'diff-files', '--quiet'])
332 |                 if r1 != 0 or r2 != 0:
333 |                     print("*" * 80)
334 |                     print("WARNING: you have uncommitted changes --- "
335 |                           "these will NOT be benchmarked!")
336 |                     print("*" * 80)
337 | 
338 |             # Fix commit ids (HEAD is local to current repo)
339 |             out = subprocess.check_output(['git', 'rev-parse', commit_b])
340 |             commit_b = out.strip().decode('ascii')
341 | 
342 |             out = subprocess.check_output(['git', 'rev-parse', commit_a])
343 |             commit_a = out.strip().decode('ascii')
344 | 
345 |             # generate config file with the required build options
346 |             asv_cfpath = [
347 |                 '--config', asv_compare_config(
348 |                     os.path.join(ROOT_DIR, 'benchmarks'), args,
349 |                     # to clear the cache if the user changed build options
350 |                     (commit_a, commit_b)
351 |                 )
352 |             ]
353 |             cmd = ['asv', 'continuous', '-e', '-f', '1.05',
354 |                    commit_a, commit_b] + asv_cfpath + bench_args
355 |             ret = subprocess.call(cmd, cwd=os.path.join(ROOT_DIR, 'benchmarks'))
356 |             sys.exit(ret)
357 | 
358 |     if args.build_only:
359 |         sys.exit(0)
360 |     else:
361 |         __import__(PROJECT_MODULE)
362 |         # from numpy.testing._private.nosetester import NoseTester
363 |         # test = NoseTester.test
364 |         test = sys.modules[PROJECT_MODULE].test
365 | 
366 |     if args.submodule:
367 |         tests = [PROJECT_MODULE + "." + args.submodule]
368 |     elif args.tests:
369 |         tests = args.tests
370 |     else:
371 |         tests = None
372 | 
373 |     # Run the tests under build/test
374 | 
375 |     if not args.no_build:
376 |         test_dir = site_dir
377 |     else:
378 |         test_dir = os.path.join(ROOT_DIR, 'build', 'test')
379 |         if not os.path.isdir(test_dir):
380 |             os.makedirs(test_dir)
381 | 
382 |     shutil.copyfile(os.path.join(ROOT_DIR, '.coveragerc'),
383 |                     os.path.join(test_dir, '.coveragerc'))
384 | 
385 |     cwd = os.getcwd()
386 |     try:
387 |         os.chdir(test_dir)
388 | 
389 |         result = test(args.mode,
390 |                       verbose=args.verbose,
391 |                       extra_argv=extra_argv,
392 |                       doctests=args.doctests,
393 |                       coverage=args.coverage,
394 |                       durations=args.durations,
395 |                       tests=tests)
396 | 
397 |     finally:
398 |         os.chdir(cwd)
399 | 
400 |     if isinstance(result, bool):
401 |         sys.exit(0 if result else 1)
402 |     elif result.wasSuccessful():
403 |         sys.exit(0)
404 |     else:
405 |         sys.exit(1)
406 | 
407 | 
408 | def build_project(args):
409 |     """
410 |     Build a dev version of the project.
411 | 
412 |     Returns
413 |     -------
414 |     site_dir
415 |         site-packages directory where it was installed
416 | 
417 |     """
418 | 
419 |     import sysconfig
420 | 
421 |     root_ok = [os.path.exists(os.path.join(ROOT_DIR, fn))
422 |                for fn in PROJECT_ROOT_FILES]
423 |     if not all(root_ok):
424 |         print("To build the project, run runtests.py in "
425 |               "git checkout or unpacked source")
426 |         sys.exit(1)
427 | 
428 |     dst_dir = os.path.join(ROOT_DIR, 'build', 'testenv')
429 | 
430 |     env = dict(os.environ)
431 |     cmd = [sys.executable, 'setup.py']
432 | 
433 |     # Always use ccache, if installed
434 |     env['PATH'] = os.pathsep.join(EXTRA_PATH + env.get('PATH', '').split(os.pathsep))
435 |     cvars = sysconfig.get_config_vars()
436 |     compiler = env.get('CC') or cvars.get('CC', '')
437 |     if 'gcc' in compiler:
438 |         # Check that this isn't clang masquerading as gcc.
439 |         if sys.platform != 'darwin' or 'gnu-gcc' in compiler:
440 |             # add flags used as werrors
441 |             warnings_as_errors = ' '.join([
442 |                 # from tools/travis-test.sh
443 |                 '-Werror=vla',
444 |                 '-Werror=nonnull',
445 |                 '-Werror=pointer-arith',
446 |                 '-Wlogical-op',
447 |                 # from sysconfig
448 |                 '-Werror=unused-function',
449 |             ])
450 |             env['CFLAGS'] = warnings_as_errors + ' ' + env.get('CFLAGS', '')
451 |     if args.debug or args.gcov:
452 |         # assume everyone uses gcc/gfortran
453 |         env['OPT'] = '-O0 -ggdb'
454 |         env['FOPT'] = '-O0 -ggdb'
455 |         if args.gcov:
456 |             env['OPT'] = '-O0 -ggdb'
457 |             env['FOPT'] = '-O0 -ggdb'
458 |             env['CC'] = cvars['CC'] + ' --coverage'
459 |             env['CXX'] = cvars['CXX'] + ' --coverage'
460 |             env['F77'] = 'gfortran --coverage '
461 |             env['F90'] = 'gfortran --coverage '
462 |             env['LDSHARED'] = cvars['LDSHARED'] + ' --coverage'
463 |             env['LDFLAGS'] = " ".join(cvars['LDSHARED'].split()[1:]) + ' --coverage'
464 | 
465 |     cmd += ["build"]
466 |     if args.parallel > 1:
467 |         cmd += ["-j", str(args.parallel)]
468 |     if args.warn_error:
469 |         cmd += ["--warn-error"]
470 |     if args.cpu_baseline:
471 |         cmd += ["--cpu-baseline", args.cpu_baseline]
472 |     if args.cpu_dispatch:
473 |         cmd += ["--cpu-dispatch", args.cpu_dispatch]
474 |     if args.disable_optimization:
475 |         cmd += ["--disable-optimization"]
476 |     if args.simd_test is not None:
477 |         cmd += ["--simd-test", args.simd_test]
478 |     if args.debug_info:
479 |         cmd += ["build_src", "--verbose-cfg"]
480 |     # Install; avoid producing eggs so numpy can be imported from dst_dir.
481 |     cmd += ['install', '--prefix=' + dst_dir,
482 |             '--single-version-externally-managed',
483 |             '--record=' + dst_dir + 'tmp_install_log.txt']
484 | 
485 |     from distutils.sysconfig import get_python_lib
486 |     site_dir = get_python_lib(prefix=dst_dir, plat_specific=True)
487 |     site_dir_noarch = get_python_lib(prefix=dst_dir, plat_specific=False)
488 |     # easy_install won't install to a path that Python by default cannot see
489 |     # and isn't on the PYTHONPATH.  Plus, it has to exist.
490 |     if not os.path.exists(site_dir):
491 |         os.makedirs(site_dir)
492 |     if not os.path.exists(site_dir_noarch):
493 |         os.makedirs(site_dir_noarch)
494 |     env['PYTHONPATH'] = site_dir + os.pathsep + site_dir_noarch
495 | 
496 |     log_filename = os.path.join(ROOT_DIR, 'build.log')
497 | 
498 |     if args.show_build_log:
499 |         ret = subprocess.call(cmd, env=env, cwd=ROOT_DIR)
500 |     else:
501 |         log_filename = os.path.join(ROOT_DIR, 'build.log')
502 |         print("Building, see build.log...")
503 |         with open(log_filename, 'w') as log:
504 |             p = subprocess.Popen(cmd, env=env, stdout=log, stderr=log,
505 |                                  cwd=ROOT_DIR)
506 |         try:
507 |             # Wait for it to finish, and print something to indicate the
508 |             # process is alive, but only if the log file has grown (to
509 |             # allow continuous integration environments kill a hanging
510 |             # process accurately if it produces no output)
511 |             last_blip = time.time()
512 |             last_log_size = os.stat(log_filename).st_size
513 |             while p.poll() is None:
514 |                 time.sleep(0.5)
515 |                 if time.time() - last_blip > 60:
516 |                     log_size = os.stat(log_filename).st_size
517 |                     if log_size > last_log_size:
518 |                         print("    ... build in progress")
519 |                         last_blip = time.time()
520 |                         last_log_size = log_size
521 | 
522 |             ret = p.wait()
523 |         except:
524 |             p.kill()
525 |             p.wait()
526 |             raise
527 | 
528 |     if ret == 0:
529 |         print("Build OK")
530 |     else:
531 |         if not args.show_build_log:
532 |             with open(log_filename, 'r') as f:
533 |                 print(f.read())
534 |             print("Build failed!")
535 |         sys.exit(1)
536 | 
537 |     return site_dir, site_dir_noarch
538 | 
539 | 
540 | def asv_compare_config(bench_path, args, h_commits):
541 |     """
542 |     Fill the required build options through custom variable
543 |     'numpy_build_options' and return the generated config path.
544 |     """
545 |     conf_path = os.path.join(bench_path, "asv_compare.conf.json.tpl")
546 |     nconf_path = os.path.join(bench_path, "_asv_compare.conf.json")
547 | 
548 |     # add custom build
549 |     build = []
550 |     if args.parallel > 1:
551 |         build += ["-j", str(args.parallel)]
552 |     if args.cpu_baseline:
553 |         build += ["--cpu-baseline", args.cpu_baseline]
554 |     if args.cpu_dispatch:
555 |         build += ["--cpu-dispatch", args.cpu_dispatch]
556 |     if args.disable_optimization:
557 |         build += ["--disable-optimization"]
558 | 
559 |     is_cached = asv_substitute_config(conf_path, nconf_path,
560 |                                       numpy_build_options=' '.join([f'\\"{v}\\"' for v in build]),
561 |                                       numpy_global_options=' '.join(
562 |                                           [f'--global-option=\\"{v}\\"' for v in ["build"] + build])
563 |                                       )
564 |     if not is_cached:
565 |         asv_clear_cache(bench_path, h_commits)
566 |     return nconf_path
567 | 
568 | 
569 | def asv_clear_cache(bench_path, h_commits, env_dir="env"):
570 |     """
571 |     Force ASV to clear the cache according to specified commit hashes.
572 |     """
573 |     # FIXME: only clear the cache from the current environment dir
574 |     asv_build_pattern = os.path.join(bench_path, env_dir, "*", "asv-build-cache")
575 |     for asv_build_cache in glob.glob(asv_build_pattern, recursive=True):
576 |         for c in h_commits:
577 |             try:
578 |                 shutil.rmtree(os.path.join(asv_build_cache, c))
579 |             except OSError:
580 |                 pass
581 | 
582 | 
583 | def asv_substitute_config(in_config, out_config, **custom_vars):
584 |     """
585 |     A workaround to allow substituting custom tokens within
586 |     ASV configuration file since there's no official way to add custom
587 |     variables(e.g. env vars).
588 | 
589 |     Parameters
590 |     ----------
591 |     in_config : str
592 |         The path of ASV configuration file, e.g. '/path/to/asv.conf.json'
593 |     out_config : str
594 |         The path of generated configuration file,
595 |         e.g. '/path/to/asv_substituted.conf.json'.
596 | 
597 |     The other keyword arguments represent the custom variables.
598 | 
599 |     Returns
600 |     -------
601 |     True(is cached) if 'out_config' is already generated with
602 |     the same '**custom_vars' and updated with latest 'in_config',
603 |     False otherwise.
604 | 
605 |     Examples
606 |     --------
607 |     See asv_compare_config().
608 |     """
609 |     assert in_config != out_config
610 |     assert len(custom_vars) > 0
611 | 
612 |     def sdbm_hash(*factors):
613 |         chash = 0
614 |         for f in factors:
615 |             for char in str(f):
616 |                 chash = ord(char) + (chash << 6) + (chash << 16) - chash
617 |                 chash &= 0xFFFFFFFF
618 |         return chash
619 | 
620 |     vars_hash = sdbm_hash(custom_vars, os.path.getmtime(in_config))
621 |     try:
622 |         with open(out_config, "r") as wfd:
623 |             hash_line = wfd.readline().split('hash:')
624 |             if len(hash_line) > 1 and int(hash_line[1]) == vars_hash:
625 |                 return True
626 |     except IOError:
627 |         pass
628 | 
629 |     custom_vars = {f'{{{k}}}': v for k, v in custom_vars.items()}
630 |     with open(in_config, "r") as rfd, open(out_config, "w") as wfd:
631 |         wfd.write(f"// hash:{vars_hash}\n")
632 |         wfd.write("// This file is automatically generated by runtests.py\n")
633 |         for line in rfd:
634 |             for key, val in custom_vars.items():
635 |                 line = line.replace(key, val)
636 |             wfd.write(line)
637 |     return False
638 | 
639 | 
640 | #
641 | # GCOV support
642 | #
643 | def gcov_reset_counters():
644 |     print("Removing previous GCOV .gcda files...")
645 |     build_dir = os.path.join(ROOT_DIR, 'build')
646 |     for dirpath, dirnames, filenames in os.walk(build_dir):
647 |         for fn in filenames:
648 |             if fn.endswith('.gcda') or fn.endswith('.da'):
649 |                 pth = os.path.join(dirpath, fn)
650 |                 os.unlink(pth)
651 | 
652 | 
653 | #
654 | # LCOV support
655 | #
656 | 
657 | LCOV_OUTPUT_FILE = os.path.join(ROOT_DIR, 'build', 'lcov.out')
658 | LCOV_HTML_DIR = os.path.join(ROOT_DIR, 'build', 'lcov')
659 | 
660 | 
661 | def lcov_generate():
662 |     try:
663 |         os.unlink(LCOV_OUTPUT_FILE)
664 |     except OSError:
665 |         pass
666 |     try:
667 |         shutil.rmtree(LCOV_HTML_DIR)
668 |     except OSError:
669 |         pass
670 | 
671 |     print("Capturing lcov info...")
672 |     subprocess.call(['lcov', '-q', '-c',
673 |                      '-d', os.path.join(ROOT_DIR, 'build'),
674 |                      '-b', ROOT_DIR,
675 |                      '--output-file', LCOV_OUTPUT_FILE])
676 | 
677 |     print("Generating lcov HTML output...")
678 |     ret = subprocess.call(['genhtml', '-q', LCOV_OUTPUT_FILE,
679 |                            '--output-directory', LCOV_HTML_DIR,
680 |                            '--legend', '--highlight'])
681 |     if ret != 0:
682 |         print("genhtml failed!")
683 |     else:
684 |         print("HTML output generated under build/lcov/")
685 | 
686 | 
687 | def check_lint(lint_args):
688 |     """
689 |     Adds ROOT_DIR to path and performs lint checks.
690 |     This functions exits the program with status code of lint check.
691 |     """
692 |     sys.path.append(ROOT_DIR)
693 |     try:
694 |         from tools.linter import DiffLinter
695 |     except ModuleNotFoundError as e:
696 |         print(f"Error: {e.msg}. "
697 |               "Install using linter_requirements.txt.")
698 |         sys.exit(1)
699 | 
700 |     uncommitted = lint_args == "uncommitted"
701 |     branch = "main" if uncommitted else lint_args
702 | 
703 |     DiffLinter(branch).run_lint(uncommitted)
704 | 
705 | 
706 | if __name__ == "__main__":
707 |     main(argv=sys.argv[1:])
708 | 


--------------------------------------------------------------------------------