├── .gitattributes
├── variational
├── solvers
│ ├── __init__.py
│ ├── eig_qr
│ │ ├── __init__.py
│ │ └── eig_qr.pyx
│ ├── tests
│ │ ├── __init__.py
│ │ └── test_direct.py
│ └── direct.py
├── estimators
│ ├── tests
│ │ ├── __init__.py
│ │ ├── benchmark_moments.py
│ │ ├── test_running_moments.py
│ │ └── test_moments.py
│ ├── covar_c
│ │ ├── __init__.py
│ │ ├── _covartools.h
│ │ ├── covartools.pyx
│ │ └── _covartools.c
│ ├── __init__.py
│ ├── running_moments.py
│ └── moments.py
├── __init__.py
├── util.py
└── _version.py
├── devtools
├── conda-recipe
│ ├── build.sh
│ ├── bld.bat
│ ├── meta.yaml
│ └── run_test.py
└── ci
│ ├── travis
│ └── install_miniconda.sh
│ └── appveyor
│ ├── appveyor
│ ├── runTestsuite.ps1
│ ├── run_with_env.cmd
│ └── transform_xunit_to_appveyor.xsl
│ ├── process_test_results.ps1
│ ├── run_with_env.cmd
│ └── transform_xunit_to_appveyor.xsl
├── docs
├── Interface.pdf
├── Updating_Formulas.pdf
├── Interface.lyx
└── Updating_Formulas.lyx
├── MANIFEST.in
├── examples
└── basissets_ramachandran
│ ├── torsion_A.npy
│ ├── torsion_FGAIL.npy
│ └── Example.py
├── .gitignore
├── setup.cfg
├── .travis.yml
├── appveyor.yml
├── README.md
└── setup.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | variational/_version.py export-subst
2 |
--------------------------------------------------------------------------------
/variational/solvers/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'noe'
2 |
--------------------------------------------------------------------------------
/variational/solvers/eig_qr/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'noe'
--------------------------------------------------------------------------------
/variational/estimators/tests/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'noe'
2 |
--------------------------------------------------------------------------------
/variational/solvers/tests/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'noe'
2 |
--------------------------------------------------------------------------------
/variational/estimators/covar_c/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'noe'
2 |
--------------------------------------------------------------------------------
/devtools/conda-recipe/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | $PYTHON setup.py install
3 |
--------------------------------------------------------------------------------
/docs/Interface.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/variational/master/docs/Interface.pdf
--------------------------------------------------------------------------------
/docs/Updating_Formulas.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/variational/master/docs/Updating_Formulas.pdf
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | # versioneer files
2 | include versioneer.py
3 | include variational/_version.py
4 |
5 | recursive-include variational *.pyx *.c *.h
6 |
--------------------------------------------------------------------------------
/examples/basissets_ramachandran/torsion_A.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/variational/master/examples/basissets_ramachandran/torsion_A.npy
--------------------------------------------------------------------------------
/examples/basissets_ramachandran/torsion_FGAIL.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/variational/master/examples/basissets_ramachandran/torsion_FGAIL.npy
--------------------------------------------------------------------------------
/variational/estimators/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | from .moments import moments_XX, moments_XXXY, moments_block
4 | from .moments import covar, covars
5 | from .running_moments import RunningCovar, running_covar
6 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # compiled files
2 | *.pyc
3 | *.so
4 | build
5 | variational.egg-info
6 | dist
7 |
8 | # project files
9 | .idea
10 |
11 | # generated files
12 | variational/version.py
13 | variational/estimators/covar_c/covartools.c
14 | variational/solvers/eig_qr/eig_qr.c
15 |
--------------------------------------------------------------------------------
/devtools/conda-recipe/bld.bat:
--------------------------------------------------------------------------------
1 | if not defined APPVEYOR (
2 | echo not on appveyor
3 | "%PYTHON%" setup.py install
4 | ) else (
5 | echo on appveyor
6 | cmd /E:ON /V:ON /C %APPVEYOR_BUILD_FOLDER%\devtools\ci\appveyor\run_with_env.cmd "%PYTHON%" setup.py install
7 | )
8 | if errorlevel 1 exit 1
9 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 |
2 | # See the docstring in versioneer.py for instructions. Note that you must
3 | # re-run 'versioneer.py setup' after changing this section, and commit the
4 | # resulting files.
5 |
6 | [versioneer]
7 | VCS = git
8 | style = pep440
9 | versionfile_source = variational/_version.py
10 | #versionfile_build =
11 | tag_prefix =
12 | parentdir_prefix = variational-
13 |
14 |
--------------------------------------------------------------------------------
/devtools/ci/travis/install_miniconda.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # make TARGET overrideable with env
4 | : ${TARGET:=$HOME/miniconda}
5 |
6 | function install_miniconda {
7 | echo "installing miniconda to $TARGET"
8 | wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O mc.sh -o /dev/null
9 | bash mc.sh -b -f -p $TARGET
10 | }
11 |
12 | install_miniconda
13 | export PATH=$TARGET/bin:$PATH
14 |
--------------------------------------------------------------------------------
/variational/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | __author__ = 'noe'
3 |
4 | # import subpackages such that they are available after the main package import
5 | from . import estimators
6 | from . import solvers
7 |
8 | # direct imports of important functions/classes to-level API
9 | from .solvers.direct import eig_corr
10 | from .solvers.direct import sort_by_norm
11 | from .solvers.eig_qr.eig_qr import eig_qr
12 |
13 | from ._version import get_versions
14 | __version__ = get_versions()['version']
15 | del get_versions
16 |
--------------------------------------------------------------------------------
/devtools/conda-recipe/meta.yaml:
--------------------------------------------------------------------------------
1 | package:
2 | name: variational
3 | version: !!str dev
4 | source:
5 | path: ../..
6 |
7 | build:
8 | preserve_egg_dir: True
9 |
10 | requirements:
11 | build:
12 | - python
13 | - setuptools
14 | - cython >=0.20
15 | - numpy >=1.7
16 | - scipy
17 | - six
18 |
19 | run:
20 | - python
21 | - setuptools
22 | - numpy >=1.7
23 | - scipy
24 | - six
25 |
26 | test:
27 | requires:
28 | - nose
29 | - coverage
30 | imports:
31 | - variational
32 |
33 |
--------------------------------------------------------------------------------
/variational/util.py:
--------------------------------------------------------------------------------
1 | """ Add convenience functions here if needed
2 | """
3 |
4 | __author__ = 'noe'
5 |
6 |
7 |
8 | def features_to_basis(infiles, basisset, outfiles):
9 | """Reads input files
10 |
11 | basisset : BasisSet object
12 | basis set tob e used
13 |
14 | References
15 | ---------
16 | .. [5] Vitalini, F., Noe, F. and Keller, B. (2015):
17 | A basis set for peptides for the variational approach to conformational kinetics. (In review).
18 |
19 | """
20 | # cycle through input files
21 | # read infile
22 | # map to basis function values
23 | # write outfile
24 | pass
25 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: c
2 |
3 | sudo: false
4 |
5 | env:
6 | global:
7 | - PATH=$HOME/miniconda/bin:$PATH
8 | - common_py_deps="pylint jinja2 conda-build"
9 | matrix:
10 | - python=2.7 CONDA_PY=27 CONDA_NPY=17
11 | - python=3.4 CONDA_PY=34 CONDA_NPY=18
12 | - python=3.4 CONDA_PY=34 CONDA_NPY=19
13 | - python=3.5 CONDA_PY=35 CONDA_NPY=19
14 |
15 | before_install:
16 | - devtools/ci/travis/install_miniconda.sh
17 | - conda config --set always_yes true
18 | - conda config --add channels omnia
19 | - conda install -q $common_py_deps
20 |
21 | script:
22 | - conda build -q devtools/conda-recipe
23 |
24 | after_success:
25 | # coverage report: needs .coverage file generated by testsuite and git src
26 | - pip install coveralls
27 | - coveralls
28 |
29 |
--------------------------------------------------------------------------------
/variational/estimators/covar_c/_covartools.h:
--------------------------------------------------------------------------------
1 | #ifndef _covartools_h_
2 | #define _covartools_h_
3 |
4 | void _subtract_row_double(double* X, double* row, int M, int N);
5 | void _subtract_row_float(double* X, double* row, int M, int N);
6 | void _subtract_row_double_copy(double* X0, double* X, double* row, int M, int N);
7 | int* _bool_to_list(int* b, int N, int nnz);
8 | void _variable_cols_char(int* cols, char* X, int M, int N, int min_constant);
9 | void _variable_cols_int(int* cols, int* X, int M, int N, int min_constant);
10 | void _variable_cols_long(int* cols, long* X, int M, int N, int min_constant);
11 | void _variable_cols_float(int* cols, float* X, int M, int N, int min_constant);
12 | void _variable_cols_double(int* cols, double* X, int M, int N, int min_constant);
13 | void _variable_cols_float_approx(int* cols, float* X, int M, int N, float tol, int min_constant);
14 | void _variable_cols_double_approx(int* cols, double* X, int M, int N, double tol, int min_constant);
15 |
16 | #endif
17 |
--------------------------------------------------------------------------------
/devtools/ci/appveyor/appveyor/runTestsuite.ps1:
--------------------------------------------------------------------------------
1 | function xslt_transform($xml, $xsl, $output)
2 | {
3 | trap [Exception]
4 | {
5 | Write-Host $_.Exception
6 | }
7 |
8 | $xslt = New-Object System.Xml.Xsl.XslCompiledTransform
9 | $xslt.Load($xsl)
10 | $xslt.Transform($xml, $output)
11 | }
12 |
13 | function upload($file) {
14 | trap [Exception]
15 | {
16 | Write-Host $_.Exception
17 | }
18 |
19 | $wc = New-Object 'System.Net.WebClient'
20 | $wc.UploadFile("https://ci.appveyor.com/api/testresults/xunit/$($env:APPVEYOR_JOB_ID)", $file)
21 | }
22 |
23 | function run {
24 | cd $env:APPVEYOR_BUILD_FOLDER
25 | $stylesheet = "tools/ci/appveyor/transform_xunit_to_appveyor.xsl"
26 | $input = "nosetests.xml"
27 | $output = "transformed.xml"
28 |
29 | if ( -not Test-Path $input ) {
30 | Write-Host "$input does not exist"
31 | return
32 | }
33 | xslt_transform $input $stylesheet $output
34 |
35 | upload $output
36 | Push-AppveyorArtifact $input
37 | Push-AppveyorArtifact $output
38 | }
39 |
40 | run
41 |
--------------------------------------------------------------------------------
/devtools/ci/appveyor/process_test_results.ps1:
--------------------------------------------------------------------------------
1 | function xslt_transform($xml, $xsl, $output)
2 | {
3 | trap [Exception]
4 | {
5 | Write-Host $_.Exception
6 | }
7 |
8 | $xslt = New-Object System.Xml.Xsl.XslCompiledTransform
9 | $xslt.Load($xsl)
10 | $xslt.Transform($xml, $output)
11 | }
12 |
13 | function upload($file) {
14 | trap [Exception]
15 | {
16 | Write-Host $_.Exception
17 | }
18 |
19 | $wc = New-Object 'System.Net.WebClient'
20 | $wc.UploadFile("https://ci.appveyor.com/api/testresults/xunit/$($env:APPVEYOR_JOB_ID)", $file)
21 | }
22 |
23 | function run {
24 | cd $env:APPVEYOR_BUILD_FOLDER
25 | $stylesheet = "devtools/ci/appveyor/transform_xunit_to_appveyor.xsl"
26 | $input = "nosetests.xml"
27 | $output = "transformed.xml"
28 | # if ( -not Test-Path $input ) {
29 | # throw "input file missing"
30 | # }
31 | xslt_transform $input $stylesheet $output
32 |
33 | upload $output
34 | Push-AppveyorArtifact $input
35 | Push-AppveyorArtifact $output
36 |
37 | # return exit code of testsuite
38 | if ( -not $success) {
39 | throw "testsuite not successful"
40 | }
41 | }
42 |
43 | run
44 |
--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
1 | environment:
2 | global:
3 | # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the
4 | # /E:ON and /V:ON options are not enabled in the batch script intepreter
5 | # See: http://stackoverflow.com/a/13751649/163740
6 | # this is being set in bld.bat of conda-recipe...
7 | #CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\devtools\\ci\\appveyor\\run_with_env.cmd"
8 | PYTHONUNBUFFERED: 1
9 |
10 | matrix:
11 | - PYTHON: "C:\\Miniconda"
12 | CONDA_PY: "27"
13 |
14 | - PYTHON: "C:\\Miniconda-x64"
15 | CONDA_PY: "27"
16 | ARCH: "64"
17 |
18 | - PYTHON: "C:\\Miniconda3"
19 | CONDA_PY: "34"
20 |
21 | - PYTHON: "C:\\Miniconda3-x64"
22 | CONDA_PY: "34"
23 | ARCH: "64"
24 |
25 | install:
26 | - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%;"
27 |
28 | - conda config --set always_yes true
29 | - conda config --add channels omnia
30 | - conda install -q conda-build jinja2
31 |
32 |
33 | build: false # Not a C# project, build stuff at the test step instead.
34 |
35 | test_script:
36 | # run testsuite and upload test results to AppVeyor; return exit code of testsuite
37 | - conda build -q devtools/conda-recipe
38 |
--------------------------------------------------------------------------------
/devtools/conda-recipe/run_test.py:
--------------------------------------------------------------------------------
1 |
2 | import subprocess
3 | import os
4 | import sys
5 | import shutil
6 | import re
7 |
8 | src_dir = os.getenv('SRC_DIR')
9 |
10 |
11 | # matplotlib headless backend
12 | with open('matplotlibrc', 'w') as fh:
13 | fh.write('backend: Agg')
14 |
15 |
16 | def coverage_report():
17 | fn = '.coverage'
18 | assert os.path.exists(fn)
19 | build_dir = os.getenv('TRAVIS_BUILD_DIR')
20 | dest = os.path.join(build_dir, fn)
21 | print( "copying coverage report to", dest)
22 | shutil.copy(fn, dest)
23 | assert os.path.exists(dest)
24 |
25 | # fix paths in .coverage file
26 | with open(dest, 'r') as fh:
27 | data = fh.read()
28 | match= '"/home/travis/miniconda/envs/_test/lib/python.+?/site-packages/.+?/(variational/.+?)"'
29 | repl = '"%s/\\1"' % build_dir
30 | data = re.sub(match, repl, data)
31 | os.unlink(dest)
32 | with open(dest, 'w+') as fh:
33 | fh.write(data)
34 |
35 | nose_run = "nosetests variational -vv" \
36 | " --with-coverage --cover-inclusive --cover-package=variational" \
37 | " --with-doctest --doctest-options=+NORMALIZE_WHITESPACE,+ELLIPSIS" \
38 | .split(' ')
39 |
40 | res = subprocess.call(nose_run)
41 |
42 |
43 | # move .coverage file to git clone on Travis CI
44 | if os.getenv('TRAVIS', False):
45 | coverage_report()
46 |
47 | if False: #os.getenv('APPVEYOR', False):
48 | call = ('powershell ' + os.path.join(os.getenv('APPVEYOR_BUILD_FOLDER'),
49 | 'devtools', 'ci', 'appveyor',
50 | 'process_test_results.ps1')).split(' ')
51 | res |= subprocess.call(call)
52 |
53 | sys.exit(res)
54 |
55 |
--------------------------------------------------------------------------------
/devtools/ci/appveyor/run_with_env.cmd:
--------------------------------------------------------------------------------
1 | :: To build extensions for 64 bit Python 3, we need to configure environment
2 | :: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of:
3 | :: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1)
4 | ::
5 | :: To build extensions for 64 bit Python 2, we need to configure environment
6 | :: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of:
7 | :: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0)
8 | ::
9 | :: 32 bit builds do not require specific environment configurations.
10 | ::
11 | :: Note: this script needs to be run with the /E:ON and /V:ON flags for the
12 | :: cmd interpreter, at least for (SDK v7.0)
13 | ::
14 | :: More details at:
15 | :: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows
16 | :: http://stackoverflow.com/a/13751649/163740
17 | ::
18 | :: Author: Olivier Grisel
19 | :: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/
20 | @ECHO OFF
21 |
22 | SET COMMAND_TO_RUN=%*
23 | SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows
24 |
25 | SET MAJOR_PYTHON_VERSION="%CONDA_PY:~0,1%"
26 | IF %MAJOR_PYTHON_VERSION% == "2" (
27 | SET WINDOWS_SDK_VERSION="v7.0"
28 | ) ELSE IF %MAJOR_PYTHON_VERSION% == "3" (
29 | SET WINDOWS_SDK_VERSION="v7.1"
30 | ) ELSE (
31 | ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%"
32 | EXIT 1
33 | )
34 |
35 | IF "%ARCH%"=="64" (
36 | ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture
37 | SET DISTUTILS_USE_SDK=1
38 | SET MSSdk=1
39 | "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION%
40 | "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release
41 | ECHO Executing: %COMMAND_TO_RUN%
42 | call %COMMAND_TO_RUN% || EXIT 1
43 | ) ELSE (
44 | ECHO Using default MSVC build environment for 32 bit architecture
45 | ECHO Executing: %COMMAND_TO_RUN%
46 | call %COMMAND_TO_RUN% || EXIT 1
47 | )
48 |
--------------------------------------------------------------------------------
/devtools/ci/appveyor/appveyor/run_with_env.cmd:
--------------------------------------------------------------------------------
1 | :: To build extensions for 64 bit Python 3, we need to configure environment
2 | :: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of:
3 | :: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1)
4 | ::
5 | :: To build extensions for 64 bit Python 2, we need to configure environment
6 | :: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of:
7 | :: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0)
8 | ::
9 | :: 32 bit builds do not require specific environment configurations.
10 | ::
11 | :: Note: this script needs to be run with the /E:ON and /V:ON flags for the
12 | :: cmd interpreter, at least for (SDK v7.0)
13 | ::
14 | :: More details at:
15 | :: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows
16 | :: http://stackoverflow.com/a/13751649/163740
17 | ::
18 | :: Author: Olivier Grisel
19 | :: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/
20 | @ECHO OFF
21 |
22 | SET COMMAND_TO_RUN=%*
23 | SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows
24 |
25 | SET MAJOR_PYTHON_VERSION="%CONDA_PY:~0,1%"
26 | IF %MAJOR_PYTHON_VERSION% == "2" (
27 | SET WINDOWS_SDK_VERSION="v7.0"
28 | ) ELSE IF %MAJOR_PYTHON_VERSION% == "3" (
29 | SET WINDOWS_SDK_VERSION="v7.1"
30 | ) ELSE (
31 | ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%"
32 | EXIT 1
33 | )
34 |
35 | IF "%ARCH%"=="64" (
36 | ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture
37 | SET DISTUTILS_USE_SDK=1
38 | SET MSSdk=1
39 | "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION%
40 | "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release
41 | ECHO Executing: %COMMAND_TO_RUN%
42 | call %COMMAND_TO_RUN% || EXIT 1
43 | ) ELSE (
44 | ECHO Using default MSVC build environment for 32 bit architecture
45 | ECHO Executing: %COMMAND_TO_RUN%
46 | call %COMMAND_TO_RUN% || EXIT 1
47 | )
48 |
--------------------------------------------------------------------------------
/variational/solvers/eig_qr/eig_qr.pyx:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | cimport scipy.linalg.cython_lapack as scc
3 |
4 | def eig_qr(A):
5 | """ Compute eigenvalues and eigenvectors of symmetric matrix A using symmetric tridiagonal QR-algorithm
6 | with implicit shifts. The matrix is first transformed to tridiagonal shape using lapack's dsytrd routine.
7 | Then, the tridiagonal QR-iteration is performed using lapack's dsteqr routine.
8 |
9 | Parameters:
10 | -----------
11 | A, ndarray (N, N):
12 | symmetric matrix.
13 |
14 | Returns:
15 | --------
16 | D, ndarray(N,)
17 | array of eigenvalues of A
18 | B, ndarray(N, N)
19 | array of eigenvectors of A.
20 | """
21 |
22 | # handle 1x1 case
23 | if np.size(A) == 1: # size can handle 1x1 arrays and numbers
24 | return A*np.ones(1), np.ones((1, 1))
25 |
26 | # Definitions:
27 | cdef double[:,:] B = np.require(A, dtype=np.float64, requirements=["F", "A"])
28 | cdef int n=A.shape[0], lda=A.shape[0], info, lwork=-1
29 | cdef char[:] uplo = np.zeros(1, "S1")
30 | uplo[:] = "U"
31 | cdef double[:] D = np.require(np.zeros(n), dtype=np.float64, requirements=["F", "A"])
32 | cdef double[:] E = np.require(np.zeros(n-1), dtype=np.float64, requirements=["F", "A"])
33 | cdef double[:] Tau = np.require(np.zeros(n-1), dtype=np.float64, requirements=["F", "A"])
34 | cdef double[:] Work = np.require(np.zeros(1), dtype=np.float64, requirements=["F", "A"])
35 |
36 | # Transform to tridiagonal shape:
37 | scc.dsytrd(&uplo[0], &n, &B[0, 0], &lda, &D[0], &E[0], &Tau[0], &Work[0], &lwork, &info)
38 | lwork = np.int(Work[0])
39 | cdef double[:] Work2 = np.require(np.zeros(lwork), dtype=np.float64, requirements=["F", "A"])
40 | scc.dsytrd(&uplo[0], &n, &B[0, 0], &lda, &D[0], &E[0], &Tau[0], &Work2[0], &lwork, &info)
41 |
42 | # Extract transformation to tridiagonal shape:
43 | lwork = -1
44 | scc.dorgtr(&uplo[0], &n, &B[0, 0], &lda, &Tau[0], &Work[0], &lwork, &info)
45 | lwork = np.int(Work[0])
46 | cdef double[:] Work3 = np.require(np.zeros(lwork), dtype=np.float64, requirements=["F", "A"])
47 | scc.dorgtr(&uplo[0], &n, &B[0, 0], &lda, &Tau[0], &Work3[0], &lwork, &info)
48 |
49 | # Run QR-iteration.
50 | cdef double[:] Work4 = np.require(np.zeros(np.maximum(1,2*n-2)), dtype=np.float64, requirements=["F", "A"])
51 | cdef char[:] compz = np.zeros(1, "S1")
52 | compz[:] = "V"
53 | scc.dsteqr(&compz[0], &n, &D[0], &E[0], &B[0, 0], &n, &Work4[0], &info)
54 |
55 | return np.asarray(D), np.asarray(B)
56 |
57 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/variational/solvers/tests/test_direct.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import unittest
3 | import numpy as np
4 | from .. import direct
5 |
6 | __author__ = 'noe'
7 |
8 |
9 | def sort_by_norm_and_imag_sign(evals, evecs):
10 | arr = np.zeros((len(evals),), dtype=[('mag', np.float64), ('sign', np.float64)])
11 | arr['mag'] = np.abs(evals)
12 | arr['sign'] = np.sign((np.imag(evals)))
13 | I = np.argsort(arr, order=['mag', 'sign'])[::-1]
14 | return evals[I], evecs[:, I]
15 |
16 |
17 | class TestDirect(unittest.TestCase):
18 |
19 | @classmethod
20 | def setUpClass(cls):
21 | pass
22 |
23 | def test_spd_inv_split(self):
24 | W = np.array([[1.0, 0.3, 0.2],
25 | [0.3, 0.8, 0.5],
26 | [0.2, 0.5, 0.9]])
27 | for method in ['QR', 'schur']:
28 | L = direct.spd_inv_split(W, method=method)
29 | # Test if decomposition is correct: inv(W) == L L.T
30 | assert np.allclose(np.dot(L, L.T), np.linalg.inv(W))
31 | # Test if matrices are orthogonal
32 | C = np.dot(L.T, L)
33 | assert np.max(np.abs(C - np.diag(np.diag(C)))) < 1e-12
34 |
35 | # Test if fails when given a nonsymmetric matrix
36 | W = np.array([[1.0, 0.2],
37 | [0.3, 0.8]])
38 | with self.assertRaises(AssertionError):
39 | direct.spd_inv_split(W)
40 |
41 | def test_eig_corr(self):
42 | C0 = np.array([[1.0, 0.3, 0.2],
43 | [0.3, 0.8, 0.5],
44 | [0.2, 0.5, 0.9]])
45 | Ct_sym = np.array([[0.5, 0.1, 0.0],
46 | [0.1, 0.3, 0.3],
47 | [0.0, 0.3, 0.2]])
48 | Ct_nonsym = np.array([[0.5, 0.1, 0.3],
49 | [0.1, 0.3, 0.3],
50 | [0.0, 0.3, 0.2]])
51 | # reference solution
52 | import scipy
53 | for Ct in [Ct_sym, Ct_nonsym]:
54 | v0, R0 = scipy.linalg.eig(Ct, C0)
55 | v0, R0 = sort_by_norm_and_imag_sign(v0, R0)
56 | for method in ['QR', 'schur']:
57 | # Test correctness
58 | v, R = direct.eig_corr(C0, Ct, method=method)
59 | v, R = sort_by_norm_and_imag_sign(v, R)
60 | assert np.allclose(v0, v) # eigenvalues equal?
61 | # eigenvectors equivalent?
62 | for i in range(R0.shape[1]):
63 | assert np.allclose(R0[:, i] / R0[0, i], R[:, i] / R[0, i])
64 | # Test if eigenpair diagonalizes the Koopman matrix
65 | K = np.dot(np.linalg.inv(C0), Ct)
66 | assert np.allclose(K, R.dot(np.diag(v)).dot(np.linalg.inv(R)))
67 |
68 |
69 | if __name__ == "__main__":
70 | unittest.main()
71 |
--------------------------------------------------------------------------------
/examples/basissets_ramachandran/Example.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Fri Jul 31 10:55:24 2015
4 |
5 | @author: fvitalini
6 | """
7 |
8 | """
9 | This script contains examples of usage for the classes:
10 | RamachandranBasis
11 | RamachandranProductBasis
12 | which are contained in the variational package.
13 | """
14 |
15 |
16 | import variational
17 | import numpy as np
18 |
19 | #Use of the function RamachandranBasis
20 |
21 | from variational.basissets.ramachandran import RamachandranBasis
22 | alabasis = RamachandranBasis('A', radians=False) #load the residue centered basis
23 | #function for residue Alanine and
24 | #default force field (ff_AMBER99SB_ILDN)
25 | #three eigenvectors are considered (order=2)
26 | #expects the timeseries in degrees.
27 | atraj = np.load('torsion_A.npy') #the file contains the phi/psi timeseries for residue A
28 | print atraj[0:10,:] #first 10 timesteps only
29 | ala_basis_traj=alabasis.map(atraj) # projects the trajectory onto the residue basis function
30 | print ala_basis_traj[0:10, :] #first 10 timesteps only
31 |
32 |
33 | #Use of the function RamachandranProductBasis
34 |
35 | # 1: Different number excitations
36 | from variational.basissets.ramachandran import RamachandranProductBasis
37 | FGAILbasis=RamachandranProductBasis('FGAIL', n_excite=3, radians=False) #load the residue centered basis
38 | #functions for residues F-G-A-I-L and
39 | #default force field (ff_AMBER99SB_ILDN)
40 | #three eigenvectors are considered (order=2)
41 | #up to 3 excited residue per basis function (n_excite=3)
42 | #expects the timeseries in degrees.
43 | FGAIL_traj = np.load('torsion_FGAIL.npy') #the file contains the phi/psi timeseries for residues FGAIL
44 | print FGAIL_traj[0:10,:] #first 10 timesteps only
45 | FGAIL_basis_set_traj, FGAIL_basis_set_list=FGAILbasis.map(FGAIL_traj) #projects the trajectory onto the residue basis functions
46 | print FGAIL_basis_set_list
47 | print FGAIL_basis_set_traj[0:10,:] #first 10 timesteps only
48 |
49 | # 2: Select only residues FG
50 | FGbasis=RamachandranProductBasis('FGAIL',include_res=[True,True,False,False,False], radians=False) #load the residue centered basis
51 | #functions for residues F-G and
52 | #default force field (ff_AMBER99SB_ILDN)
53 | #three eigenvectors are considered (order=2)
54 | #2 excited residue per basis function (n_excite=2)
55 | #expects the timeseries in degrees.
56 | FG_basis_set_traj, FG_basis_set_list=FGbasis.map(FGAIL_traj) #projects the trajectory onto the residue basis functions
57 | print FG_basis_set_list
58 | print FG_basis_set_traj[0:10,:] #first 10 timesteps only
59 | print FG_basis_set_traj[0:10,0] #first 10 timesteps of basis function 00
60 | print FG_basis_set_traj[0:10,1] #first 10 timesteps of basis function 01
61 | print FG_basis_set_traj[0:10,8] #first 10 timesteps of basis function 22
--------------------------------------------------------------------------------
/devtools/ci/appveyor/transform_xunit_to_appveyor.xsl:
--------------------------------------------------------------------------------
1 |
12 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 | Fail
69 | Skip
70 | Pass
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
--------------------------------------------------------------------------------
/devtools/ci/appveyor/appveyor/transform_xunit_to_appveyor.xsl:
--------------------------------------------------------------------------------
1 |
12 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 | Fail
69 | Skip
70 | Pass
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Variational Approach for conformation dynamics (VAC)
2 | ====================================================
3 |
4 | This package contains basis sets, estimators and solvers for the variational approach for
5 | conformation dynamics, a theory that has been proposed in [1] and was further developed in
6 | [2] and [3]. The variational approach is analogous to the Ritz method [4] that is
7 | employed in computational quantum chemistry. It differs in the way how the involved
8 | matrices are computed and in the meaning of the involved operators, eigenfunctions and
9 | eigenvalues - see [3] for a comparison.
10 |
11 | Roughly, the idea of the VAC is as follows: Given a (classical)
12 | molecular dynamics trajectory with configurations {x_1, ..., x_T}, and a
13 | set of basis functions defined on the space of configurations {chi_1(x), ..., chi_n(x)},
14 | we compute the two correlation matrices:
15 |
16 | c_ij (0) = < chi_i(x_t) chi_j(x_t) >_t
17 | c_ij (tau) = < chi_i(x_t) chi_j(x_t+tau) >_t
18 |
19 | where < . >_t is average over time t. Of course this can be generalized to many trajectories.
20 | Then we solve the generalized eigenvalue problem
21 |
22 | C(tau) r = C(0) r l(tau)
23 |
24 | where the eigenvalues l(tau) approximate the dominant eigenvalues of the Markov propagator
25 | or Markov backward propagator of the underlying dynamics. The corresponding eigenfunction
26 | of the backward propagator is approximated by
27 |
28 | psi(x) = sum_i r_i chi_i(x)
29 |
30 | Package functionalities
31 | -----------------------
32 |
33 | This package aims at providing code to help addressing a number of key problems:
34 |
35 | 1. Basis sets for molecular dynamics (MD), and in particular protein dynamics. See [5] for a
36 | first approach in this direction.
37 |
38 | 2. Estimators for the corration matrices C(0), C(tau). The trivial time-average that is usually
39 | employed has a number of problems especially for many short simulation trajectories that are
40 | initiated far from the equilibrium distribution (the usual case!).
41 |
42 | 3. Solvers for accurately solving the eigenvalue problem above, even for huge basis sets.
43 |
44 | At this time only a few of the above functionalities are implemented and we will go step by step.
45 | This package will undergo heavy development and there is currently no date for an official
46 | release, so don't be surprised if the API (the look + feel of functions and classes) change.
47 | At the moment this package is purely intended for development purposes, so use it at your own
48 | risk.
49 |
50 | Applications
51 | ------------
52 | 1. The time-lagged independent component analysis (TICA) method originally developed in [6] and
53 | proposed as an optimal data transformation method for building Markov state models of MD
54 | in [3,7] is a VAC with mean-free basis functions. Therefore you can easily implement TICA with
55 | this package.
56 |
57 | 2. By transforming the internal coordinates such as torsion angles or interatomic distances into
58 | suitable basis functions, you can approximate experimentally-measurable relaxation timescales
59 | and determine the corresponding structural rearrangements for peptides and proteins [2,5]
60 |
61 | 3. ... more will follow.
62 |
63 | References
64 | ----------
65 | [1] Noe, F. and Nueske, F. (2013): A variational approach to modeling slow processes in stochastic dynamical systems. SIAM Multiscale Model. Simul. 11, 635-655.
66 |
67 | [2] Nueske, F., Keller, B., Perez-Hernandez, G., Mey, A.S.J.S. and Noe, F. (2014) Variational Approach to Molecular Kinetics. J. Chem. Theory Comput. 10, 1739-1752.
68 |
69 | [3] Perez-Hernandez, G., Paul, F., Giorgino, T., De Fabritiis, G. and Noe, F. (2013) Identification of slow molecular order parameters for Markov model construction. J. Chem. Phys. 139, 015102.
70 |
71 | [4] Ritz, W. (1909): Ueber eine neue Methode zur Loesung gewisser Variationsprobleme der mathematischen Physik. J. Reine Angew. Math., 135, 1–61.
72 |
73 | [5] Vitalini, F., Noé, F. and Keller, B. (2015): A basis set for peptides for the variational approach to conformational kinetics. (In review).
74 |
75 | [6] Molgedey, L. and Schuster H. G. (1994): Phys. Rev. Lett. 72, 3634.
76 |
77 | [7] Schwantes, C. R. and Pande, V. S. : J. Chem. Theory Comput. 9, (2013)
78 |
--------------------------------------------------------------------------------
/variational/solvers/direct.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import numpy as _np
3 | __author__ = 'noe'
4 |
5 |
6 | def sort_by_norm(evals, evecs):
7 | """
8 | Sorts the eigenvalues and eigenvectors by descending norm of the eigenvalues
9 |
10 | Parameters
11 | ----------
12 | evals: ndarray(n)
13 | eigenvalues
14 | evecs: ndarray(n,n)
15 | eigenvectors in a column matrix
16 |
17 | Returns
18 | -------
19 | (evals, evecs) : ndarray(m), ndarray(n,m)
20 | the sorted eigenvalues and eigenvectors
21 |
22 | """
23 | # norms
24 | evnorms = _np.abs(evals)
25 | # sort
26 | I = _np.argsort(evnorms)[::-1]
27 | # permute
28 | evals2 = evals[I]
29 | evecs2 = evecs[:, I]
30 | # done
31 | return evals2, evecs2
32 |
33 |
34 | def spd_inv_split(W, epsilon=1e-10, method='QR', canonical_signs=False):
35 | """
36 | Compute :math:`W^{-1} = L L^T` of the symmetric positive-definite matrix :math:`W`.
37 |
38 | by first reducing W to a low-rank approximation that is truly spd.
39 |
40 | Parameters
41 | ----------
42 | W : ndarray((m,m), dtype=float)
43 | Symmetric positive-definite (spd) matrix.
44 | epsilon : float
45 | Truncation parameter. Eigenvalues with norms smaller than this cutoff will
46 | be removed.
47 | method : str
48 | Method to perform the decomposition of :math:`W` before inverting. Options are:
49 |
50 | * 'QR': QR-based robust eigenvalue decomposition of W
51 | * 'schur': Schur decomposition of W
52 |
53 | canonical_signs : boolean, default = False
54 | Fix signs in L, s. t. the largest element of in every column of L is positive.
55 |
56 | Returns
57 | -------
58 | L : ndarray((n, r))
59 | Matrix :math:`L` from the decomposition :math:`W^{-1} = L L^T`.
60 |
61 | """
62 | # check input
63 | assert _np.allclose(W.T, W), 'C0 is not a symmetric matrix'
64 |
65 | if (_np.shape(W)[0] == 1):
66 | L = 1./_np.sqrt(W[0,0])
67 | else:
68 | if method.lower() == 'qr':
69 | from .eig_qr.eig_qr import eig_qr
70 | s, V = eig_qr(W)
71 | # compute the Eigenvalues of C0 using Schur factorization
72 | elif method.lower() == 'schur':
73 | from scipy.linalg import schur
74 | S, V = schur(W)
75 | s = _np.diag(S)
76 | else:
77 | raise ValueError('method not implemented: ' + method)
78 |
79 | s, V = sort_by_norm(s, V) # sort them
80 |
81 | # determine the cutoff. We know that C0 is an spd matrix,
82 | # so we select the truncation threshold such that everything that is negative vanishes
83 | evmin = _np.min(s)
84 | if evmin < 0:
85 | epsilon = max(epsilon, -evmin + 1e-16)
86 |
87 | # determine effective rank m and perform low-rank approximations.
88 | evnorms = _np.abs(s)
89 | n = _np.shape(evnorms)[0]
90 | m = n - _np.searchsorted(evnorms[::-1], epsilon)
91 | Vm = V[:, 0:m]
92 | sm = s[0:m]
93 |
94 | if canonical_signs:
95 | # enforce canonical eigenvector signs
96 | for j in range(m):
97 | jj = _np.argmax(_np.abs(Vm[:, j]))
98 | Vm[:, j] *= _np.sign(Vm[jj, j])
99 |
100 | L = _np.dot(Vm, _np.diag(1.0/_np.sqrt(sm)))
101 |
102 | # return split
103 | return L
104 |
105 |
106 | def eig_corr(C0, Ct, epsilon=1e-10, method='QR', sign_maxelement=False):
107 | r""" Solve generalized eigenvalue problem with correlation matrices C0 and Ct
108 |
109 | Numerically robust solution of a generalized Hermitian (symmetric) eigenvalue
110 | problem of the form
111 |
112 | .. math::
113 | \mathbf{C}_t \mathbf{r}_i = \mathbf{C}_0 \mathbf{r}_i l_i
114 |
115 | Computes :math:`m` dominant eigenvalues :math:`l_i` and eigenvectors
116 | :math:`\mathbf{r}_i`, where :math:`m` is the numerical rank of the problem.
117 | This is done by first conducting a Schur decomposition of the symmetric
118 | positive matrix :math:`\mathbf{C}_0`, then truncating its spectrum to
119 | retain only eigenvalues that are numerically greater than zero, then using
120 | this decomposition to define an ordinary eigenvalue Problem for
121 | :math:`\mathbf{C}_t` of size :math:`m`, and then solving this eigenvalue
122 | problem.
123 |
124 | Parameters
125 | ----------
126 | C0 : ndarray (n,n)
127 | time-instantaneous correlation matrix. Must be symmetric positive definite
128 | Ct : ndarray (n,n)
129 | time-lagged correlation matrix. Must be symmetric
130 | epsilon : float
131 | eigenvalue norm cutoff. Eigenvalues of C0 with norms <= epsilon will be
132 | cut off. The remaining number of Eigenvalues define the size of
133 | the output.
134 | method : str
135 | Method to perform the decomposition of :math:`W` before inverting. Options are:
136 |
137 | * 'QR': QR-based robust eigenvalue decomposition of W
138 | * 'schur': Schur decomposition of W
139 | sign_maxelement : bool
140 | If True, re-scale each eigenvector such that its entry with maximal absolute value
141 | is positive.
142 |
143 |
144 | Returns
145 | -------
146 | l : ndarray (m)
147 | The first m generalized eigenvalues, sorted by descending norm
148 | R : ndarray (n,m)
149 | The first m generalized eigenvectors, as a column matrix.
150 |
151 | """
152 | L = spd_inv_split(C0, epsilon=epsilon, method=method)
153 | Ct_trans = _np.dot(_np.dot(L.T, Ct), L)
154 |
155 | # solve the symmetric eigenvalue problem in the new basis
156 | if _np.allclose(Ct.T, Ct):
157 | from scipy.linalg import eigh
158 | l, R_trans = eigh(Ct_trans)
159 | else:
160 | from scipy.linalg import eig
161 | l, R_trans = eig(Ct_trans)
162 |
163 | # sort eigenpairs
164 | l, R_trans = sort_by_norm(l, R_trans)
165 |
166 | # transform the eigenvectors back to the old basis
167 | R = _np.dot(L, R_trans)
168 |
169 | # Change signs of eigenvectors:
170 | if sign_maxelement:
171 | for j in range(R.shape[1]):
172 | imax = _np.argmax(_np.abs(R[:, j]))
173 | R[:, j] *= _np.sign(R[imax, j])
174 |
175 | # return result
176 | return l, R
177 |
--------------------------------------------------------------------------------
/variational/estimators/tests/benchmark_moments.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 | __author__ = 'noe'
4 |
5 | import time
6 | import numpy as np
7 | from .. import moments
8 |
9 | def genS(N):
10 | """ Generates sparsities given N (number of cols) """
11 | S = [10, 90, 100, 500, 900, 1000, 2000, 5000, 7500, 9000, 10000, 20000, 50000, 75000, 90000] # non-zero
12 | return [s for s in S if s <= N]
13 |
14 |
15 | def genX(L, N, n_var=None, const=False):
16 | X = np.random.rand(L, N) # random data
17 | if n_var is not None:
18 | if const:
19 | Xsparse = np.ones((L, N))
20 | else:
21 | Xsparse = np.zeros((L, N))
22 | Xsparse[:, :n_var] = X[:, :n_var]
23 | X = Xsparse
24 | return X
25 |
26 |
27 | def genY(L, N, n_var=None, const=False):
28 | X = np.random.rand(L, N) # random data
29 | if n_var is not None:
30 | if const:
31 | Xsparse = -np.ones((L, N))
32 | else:
33 | Xsparse = np.zeros((L, N))
34 | Xsparse[:, :n_var] = X[:, :n_var]
35 | X = Xsparse
36 | return X
37 |
38 |
39 | def reftime_momentsXX(X, remove_mean=False, nrep=3):
40 | # time for reference calculation
41 | t1 = time.time()
42 | for r in range(nrep):
43 | s_ref = X.sum(axis=0) # computation of mean
44 | if remove_mean:
45 | X = X - s_ref/float(X.shape[0])
46 | C_XX_ref = np.dot(X.T, X) # covariance matrix
47 | t2 = time.time()
48 | # return mean time
49 | return (t2-t1)/float(nrep)
50 |
51 |
52 | def mytime_momentsXX(X, remove_mean=False, nrep=3):
53 | # time for reference calculation
54 | t1 = time.time()
55 | for r in range(nrep):
56 | w, s, C_XX = moments.moments_XX(X, remove_mean=remove_mean)
57 | t2 = time.time()
58 | # return mean time
59 | return (t2-t1)/float(nrep)
60 |
61 |
62 | def reftime_momentsXXXY(X, Y, remove_mean=False, symmetrize=False, nrep=3):
63 | # time for reference calculation
64 | t1 = time.time()
65 | for r in range(nrep):
66 | sx = X.sum(axis=0) # computation of mean
67 | sy = Y.sum(axis=0) # computation of mean
68 | if symmetrize:
69 | sx = 0.5*(sx + sy)
70 | sy = sx
71 | if remove_mean:
72 | X = X - sx/float(X.shape[0])
73 | Y = Y - sy/float(Y.shape[0])
74 | if symmetrize:
75 | C_XX_ref = np.dot(X.T, X) + np.dot(Y.T, Y)
76 | C_XY = np.dot(X.T, Y)
77 | C_XY_ref = C_XY + C_XY.T
78 | else:
79 | C_XX_ref = np.dot(X.T, X)
80 | C_XY_ref = np.dot(X.T, Y)
81 | t2 = time.time()
82 | # return mean time
83 | return (t2-t1)/float(nrep)
84 |
85 |
86 | def mytime_momentsXXXY(X, Y, remove_mean=False, symmetrize=False, nrep=3):
87 | # time for reference calculation
88 | t1 = time.time()
89 | for r in range(nrep):
90 | w, sx, sy, C_XX, C_XY = moments.moments_XXXY(X, Y, remove_mean=remove_mean, symmetrize=symmetrize)
91 | t2 = time.time()
92 | # return mean time
93 | return (t2-t1)/float(nrep)
94 |
95 |
96 | def benchmark_moments(L=10000, N=10000, nrep=5, xy=False, remove_mean=False, symmetrize=False, const=False):
97 | #S = [10, 100, 1000]
98 | S = genS(N)
99 |
100 | # time for reference calculation
101 | X = genX(L, N)
102 | if xy:
103 | Y = genY(L, N)
104 | reftime = reftime_momentsXXXY(X, Y, remove_mean=remove_mean, symmetrize=symmetrize, nrep=nrep)
105 | else:
106 | reftime = reftime_momentsXX(X, remove_mean=remove_mean, nrep=nrep)
107 |
108 | # my time
109 | times = np.zeros(len(S))
110 | for k, s in enumerate(S):
111 | X = genX(L, N, n_var=s, const=const)
112 | if xy:
113 | Y = genY(L, N, n_var=s, const=const)
114 | times[k] = mytime_momentsXXXY(X, Y, remove_mean=remove_mean, symmetrize=symmetrize, nrep=nrep)
115 | else:
116 | times[k] = mytime_momentsXX(X, remove_mean=remove_mean, nrep=nrep)
117 |
118 | # assemble report
119 | rows = ['L, data points', 'N, dimensions', 'S, nonzeros', 'time trivial', 'time moments_XX', 'speed-up']
120 | table = np.zeros((6, len(S)))
121 | table[0, :] = L
122 | table[1, :] = N
123 | table[2, :] = S
124 | table[3, :] = reftime
125 | table[4, :] = times
126 | table[5, :] = reftime / times
127 |
128 | # print table
129 | if xy:
130 | fname = 'moments_XXXY'
131 | else:
132 | fname = 'moments_XX'
133 | print(fname + '\tremove_mean = ' + str(remove_mean) + '\tsym = ' + str(symmetrize) + '\tconst = ' + str(const))
134 | print(rows[0] + ('\t%i' * table.shape[1])%tuple(table[0]))
135 | print(rows[1] + ('\t%i' * table.shape[1])%tuple(table[1]))
136 | print(rows[2] + ('\t%i' * table.shape[1])%tuple(table[2]))
137 | print(rows[3] + ('\t%.3f' * table.shape[1])%tuple(table[3]))
138 | print(rows[4] + ('\t%.3f' * table.shape[1])%tuple(table[4]))
139 | print(rows[5] + ('\t%.3f' * table.shape[1])%tuple(table[5]))
140 | print()
141 |
142 |
143 | def main():
144 | LNs = [(100000, 100, 10), (10000, 1000, 7), (1000, 2000, 5), (250, 5000, 5), (100, 10000, 5)]
145 | for L, N, nrep in LNs:
146 | benchmark_moments(L=L, N=N, nrep=nrep, xy=False, remove_mean=False, symmetrize=False, const=False)
147 | benchmark_moments(L=L, N=N, nrep=nrep, xy=False, remove_mean=False, symmetrize=False, const=True)
148 | benchmark_moments(L=L, N=N, nrep=nrep, xy=False, remove_mean=True, symmetrize=False, const=False)
149 | benchmark_moments(L=L, N=N, nrep=nrep, xy=False, remove_mean=True, symmetrize=False, const=True)
150 | benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=False, symmetrize=False, const=False)
151 | benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=False, symmetrize=False, const=True)
152 | benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=False, symmetrize=True, const=False)
153 | benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=False, symmetrize=True, const=True)
154 | benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=True, symmetrize=False, const=False)
155 | benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=True, symmetrize=False, const=True)
156 | benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=True, symmetrize=True, const=False)
157 | benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=True, symmetrize=True, const=True)
158 |
159 |
160 | if __name__ == "__main__":
161 | main()
--------------------------------------------------------------------------------
/variational/estimators/covar_c/covartools.pyx:
--------------------------------------------------------------------------------
1 | import numpy
2 | import ctypes
3 | cimport numpy
4 |
5 | cdef extern from "_covartools.h":
6 | void _variable_cols_char(int* cols, char* X, int M, int N, int min_constant)
7 | void _variable_cols_int(int* cols, int* X, int M, int N, int min_constant)
8 | void _variable_cols_long(int* cols, long* X, int M, int N, int min_constant)
9 | void _variable_cols_float(int* cols, float* X, int M, int N, int min_constant)
10 | void _variable_cols_double(int* cols, double* X, int M, int N, int min_constant)
11 | void _variable_cols_float_approx(int* cols, float* X, int M, int N, float tol, int min_constant)
12 | void _variable_cols_double_approx(int* cols, double* X, int M, int N, double tol, int min_constant)
13 | void _subtract_row_double(double* X, double* row, int M, int N)
14 | void _subtract_row_float(float* X, float* row, int M, int N)
15 | void _subtract_row_double_copy(double* X0, double* X, double* row, int M, int N)
16 | void _subtract_row_float_copy(float* X0, float* X, float* row, int M, int N)
17 |
18 |
19 | # ================================================
20 | # Check for constant columns
21 | # ================================================
22 |
23 | def variable_cols_char(cols, X, M, N, min_constant=0):
24 | pcols = numpy.PyArray_DATA(cols)
25 | pX = numpy.PyArray_DATA(X)
26 | return _variable_cols_char(pcols, pX, M, N, min_constant)
27 |
28 | def variable_cols_int(cols, X, M, N, min_constant=0):
29 | pcols = numpy.PyArray_DATA(cols)
30 | pX = numpy.PyArray_DATA(X)
31 | return _variable_cols_int(pcols, pX, M, N, min_constant)
32 |
33 | def variable_cols_long(cols, X, M, N, min_constant=0):
34 | pcols = numpy.PyArray_DATA(cols)
35 | pX = numpy.PyArray_DATA(X)
36 | return _variable_cols_long(pcols, pX, M, N, min_constant)
37 |
38 | def variable_cols_float(cols, X, M, N, tol=0.0, min_constant=0):
39 | pcols = numpy.PyArray_DATA(cols)
40 | pX = numpy.PyArray_DATA(X)
41 | if tol == 0.0:
42 | return _variable_cols_float(pcols, pX, M, N, min_constant)
43 | else:
44 | return _variable_cols_float_approx(pcols, pX, M, N, numpy.float32(tol), min_constant)
45 |
46 | def variable_cols_double(cols, X, M, N, tol=0.0, min_constant=0):
47 | pcols = numpy.PyArray_DATA(cols)
48 | pX = numpy.PyArray_DATA(X)
49 | if tol == 0.0:
50 | return _variable_cols_double(pcols, pX, M, N, min_constant)
51 | else:
52 | return _variable_cols_double_approx(pcols, pX, M, N, tol, min_constant)
53 |
54 | def variable_cols(X, tol=0, min_constant=0):
55 | """ Evaluates which columns are constant (0) or variable (1)
56 |
57 | Parameters
58 | ----------
59 | X : ndarray
60 | Matrix whose columns will be checked for constant or variable.
61 | tol : float
62 | Tolerance for float-matrices. When set to 0 only equal columns with
63 | values will be considered constant. When set to a positive value,
64 | columns where all elements have absolute differences to the first
65 | element of that column are considered constant.
66 | min_constant : int
67 | Minimal number of constant columns to resume operation. If at one
68 | point the number of constant columns drops below min_constant, the
69 | computation will stop and all columns will be assumed to be variable.
70 | In this case, an all-True array will be returned.
71 |
72 | Returns
73 | -------
74 | variable : bool-array
75 | Array with number of elements equal to the columns. True: column is
76 | variable / nonconstant. False: column is constant.
77 |
78 | """
79 | if X is None:
80 | return None
81 | M, N = X.shape
82 |
83 | # prepare column array
84 | cols = numpy.zeros( (N), dtype=ctypes.c_int, order='C' )
85 |
86 | if X.dtype == numpy.float64:
87 | completed = variable_cols_double(cols, X, M, N, tol=tol, min_constant=min_constant)
88 | elif X.dtype == numpy.float32:
89 | completed = variable_cols_float(cols, X, M, N, tol=tol, min_constant=min_constant)
90 | elif X.dtype == numpy.int32:
91 | completed = variable_cols_int(cols, X, M, N, min_constant=min_constant)
92 | elif X.dtype == numpy.int64:
93 | completed = variable_cols_long(cols, X, M, N, min_constant=min_constant)
94 | elif X.dtype == numpy.bool:
95 | completed = variable_cols_char(cols, X, M, N, min_constant=min_constant)
96 | else:
97 | raise TypeError('unsupported type of X: '+str(X.dtype))
98 |
99 | # if interrupted, return all ones. Otherwise return the variable columns as bool array
100 | if completed == 0:
101 | return numpy.ones(cols, dtype=numpy.bool)
102 | else:
103 | return numpy.array(cols, dtype=numpy.bool)
104 |
105 | # ================================================
106 | # Row subtraction
107 | # ================================================
108 |
109 | def subtract_row_float(X, row, M, N):
110 | prow = numpy.PyArray_DATA(row)
111 | pX = numpy.PyArray_DATA(X)
112 | _subtract_row_float(pX, prow, M, N)
113 |
114 | def subtract_row_double(X, row, M, N):
115 | prow = numpy.PyArray_DATA(row)
116 | pX = numpy.PyArray_DATA(X)
117 | _subtract_row_double(pX, prow, M, N)
118 |
119 | def subtract_row_double_copy(X, row, M, N):
120 | X0 = numpy.zeros( X.shape, dtype=ctypes.c_double, order='C' )
121 | pX0 = numpy.PyArray_DATA(X0)
122 | pX = numpy.PyArray_DATA(X)
123 | prow = numpy.PyArray_DATA(row)
124 | _subtract_row_double_copy(pX0, pX, prow, M, N)
125 | return X0
126 |
127 | def subtract_row_float_copy(X, row, M, N):
128 | X0 = numpy.zeros( X.shape, dtype=ctypes.c_double, order='C' )
129 | pX0 = numpy.PyArray_DATA(X0)
130 | pX = numpy.PyArray_DATA(X)
131 | prow = numpy.PyArray_DATA(row)
132 | _subtract_row_float_copy(pX0, pX, prow, M, N)
133 | return X0
134 |
135 |
136 | def subtract_row(X, row, inplace=False):
137 | """ Subtracts given row from each row of array
138 |
139 | Parameters
140 | ----------
141 | X : ndarray (M, N)
142 | Matrix whose rows will be shifted.
143 | row : ndarray (N)
144 | Row vector that will be subtracted from each row of X.
145 | inplace : bool
146 | True: X will be changed. False: A copy of X will be created and X will remain unchanged.
147 |
148 | Returns
149 | -------
150 | X0 : ndarray (M, N)
151 | The row-shifted data
152 |
153 | """
154 | M, N = X.shape
155 |
156 | if X.dtype == numpy.float64 and row.dtype == numpy.float64:
157 | if inplace:
158 | subtract_row_double(X, row, M, N)
159 | else:
160 | X = subtract_row_double_copy(X, row, M, N)
161 | elif X.dtype == numpy.float32 and row.dtype == numpy.float32:
162 | if inplace:
163 | subtract_row_float(X, row, M, N)
164 | else:
165 | X = subtract_row_float_copy(X, row, M, N)
166 | else:
167 | raise TypeError('unsupported or inconsistent types: '+str(X.dtype)+' '+str(row.dtype))
168 |
169 | return X
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | """ Variational Approach for conformation dynamics (VAC)
2 |
3 | This package contains basis sets, estimators and solvers for the variational approach for
4 | conformation dynamics, a theory that has been proposed in [1] and was further developed in
5 | [2] and [3]. The variational approach is analogous to the Ritz method [4] that is
6 | employed in computational quantum chemistry. It differs in the way how the involved
7 | matrices are computed and in the meaning of the involved operators, eigenfunctions and
8 | eigenvalues - see [3] for a comparison.
9 |
10 | Roughly, the idea of the VAC is as follows: Given a (classical)
11 | molecular dynamics trajectory with configurations {x_1, ..., x_T}, and a
12 | set of basis functions defined on the space of configurations {chi_1(x), ..., chi_n(x)},
13 | we compute the two correlation matrices:
14 |
15 | c_ij (0) = < chi_i(x_t) chi_j(x_t) >_t
16 | c_ij (tau) = < chi_i(x_t) chi_j(x_t+tau) >_t
17 |
18 | where < . >_t is average over time t. Of course this can be generalized to many trajectories.
19 | Then we solve the generalized eigenvalue problem
20 |
21 | C(tau) r = C(0) r l(tau)
22 |
23 | where the eigenvalues l(tau) approximate the dominant eigenvalues of the Markov propagator
24 | or Markov backward propagator of the underlying dynamics. The corresponding eigenfunction
25 | of the backward propagator is approximated by
26 |
27 | psi(x) = sum_i r_i chi_i(x)
28 |
29 | Package functionalities
30 | -----------------------
31 |
32 | This package aims at providing code to help addressing a number of key problems:
33 |
34 | 1. Basis sets for molecular dynamics (MD), and in particular protein dynamics. See [5] for a
35 | first approach in this direction.
36 |
37 | 2. Estimators for the corration matrices C(0), C(tau). The trivial time-average that is usually
38 | employed has a number of problems especially for many short simulation trajectories that are
39 | initiated far from the equilibrium distribution (the usual case!).
40 |
41 | 3. Solvers for accurately solving the eigenvalue problem above, even for huge basis sets.
42 |
43 | At this time only a few of the above functionalities are implemented and we will go step by step.
44 | This package will undergo heavy development and there is currently no date for an official
45 | release, so don't be surprised if the API (the look + feel of functions and classes) change.
46 | At the moment this package is purely intended for development purposes, so use it at your own
47 | risk.
48 |
49 | Applications
50 | ------------
51 | 1. The time-lagged independent component analysis (TICA) method originally developed in [6] and
52 | proposed as an optimal data transformation method for building Markov state models of MD
53 | in [3,7] is a VAC with mean-free basis functions. Therefore you can easily implement TICA with
54 | this package.
55 |
56 | 2. By transforming the internal coordinates such as torsion angles or interatomic distances into
57 | suitable basis functions, you can approximate experimentally-measurable relaxation timescales
58 | and determine the corresponding structural rearrangements for peptides and proteins [2,5]
59 |
60 | 3. ... more will follow.
61 |
62 | References
63 | ----------
64 | [1] Noe, F. and Nueske, F. (2013): A variational approach to modeling slow processes
65 | in stochastic dynamical systems. SIAM Multiscale Model. Simul. 11, 635-655.
66 |
67 | [2] Nueske, F., Keller, B., Perez-Hernandez, G., Mey, A.S.J.S. and Noe, F. (2014)
68 | Variational Approach to Molecular Kinetics. J. Chem. Theory Comput. 10, 1739-1752.
69 |
70 | [3] Perez-Hernandez, G., Paul, F., Giorgino, T., De Fabritiis, G. and Noe, F. (2013)
71 | Identification of slow molecular order parameters for Markov model construction.
72 | J. Chem. Phys. 139, 015102.
73 |
74 | [4] Ritz, W. (1909): Ueber eine neue Methode zur Loesung gewisser
75 | Variationsprobleme der mathematischen Physik. J. Reine Angew. Math., 135, 1-61.
76 |
77 | [5] Vitalini, F., Noe, F. and Keller, B. (2015): A basis set for peptides for the
78 | variational approach to conformational kinetics. (In review).
79 |
80 | [6] Molgedey, L. and Schuster H. G. (1994): Phys. Rev. Lett. 72, 3634.
81 |
82 | [7] Schwantes, C. R. and Pande, V. S. (2000): J. Chem. Theory Comput. 9, 2000
83 |
84 | """
85 | from __future__ import print_function
86 | import os
87 | import versioneer
88 | from setuptools import setup, Extension, find_packages
89 | from os.path import relpath, join
90 |
91 | DOCLINES = __doc__.split("\n")
92 |
93 | CLASSIFIERS = """\
94 | Development Status :: 3 - Alpha
95 | Intended Audience :: Science/Research
96 | Intended Audience :: Developers
97 | License :: OSI Approved :: Open BSD clause 2 (OpenBSD)
98 | Programming Language :: Python
99 | Topic :: Scientific/Engineering :: Bio-Informatics
100 | Topic :: Scientific/Engineering :: Chemistry
101 | Topic :: Scientific/Engineering :: Physics
102 | Operating System :: Microsoft :: Windows
103 | Operating System :: POSIX
104 | Operating System :: Unix
105 | Operating System :: MacOS
106 | """
107 |
108 | ################################################################################
109 | # USEFUL SUBROUTINES
110 | ################################################################################
111 |
112 | def find_package_data(data_root, package_root):
113 | files = []
114 | for root, dirnames, filenames in os.walk(data_root):
115 | for fn in filenames:
116 | files.append(relpath(join(root, fn), package_root))
117 | return files
118 |
119 | ################################################################################
120 | # EXTENSIONS
121 | ################################################################################
122 |
123 | def extensions():
124 | from numpy import get_include as np_inc
125 | from scipy import get_include as sc_inc
126 | np_inc = np_inc()
127 | sc_inc = sc_inc()
128 | from Cython.Build import cythonize
129 | exts = [Extension('variational.estimators.covar_c.covartools',
130 | sources = ['./variational/estimators/covar_c/covartools.pyx',
131 | './variational/estimators/covar_c/_covartools.c'],
132 | include_dirs = ['./variational/estimators/covar_c/', np_inc],
133 | extra_compile_args=['-std=c99','-O3']),
134 | Extension('variational.solvers.eig_qr.eig_qr',
135 | sources=['./variational/solvers/eig_qr/eig_qr.pyx'],
136 | include_dirs=['./variational/solvers/eig_qr/', np_inc, sc_inc],
137 | extra_compile_args=['-std=c99','-O3'])
138 | ]
139 | return cythonize(exts)
140 |
141 |
142 | class lazy_cythonize(list):
143 | """evaluates extension list lazyly.
144 | pattern taken from http://tinyurl.com/qb8478q"""
145 | def __init__(self, callback):
146 | self._list, self.callback = None, callback
147 | def c_list(self):
148 | if self._list is None: self._list = self.callback()
149 | return self._list
150 | def __iter__(self):
151 | for e in self.c_list(): yield e
152 | def __getitem__(self, ii): return self.c_list()[ii]
153 | def __len__(self): return len(self.c_list())
154 |
155 | ################################################################################
156 | # SETUP
157 | ################################################################################
158 |
159 | metadata=dict(
160 | name = 'variational',
161 | author = 'Frank Noe, Fabian Paul and Feliks Nueske',
162 | author_email = 'frank.noe@fu-berlin.de',
163 | description = DOCLINES[0],
164 | long_description = "\n".join(DOCLINES[2:]),
165 | version=versioneer.get_version(),
166 | cmdclass=versioneer.get_cmdclass(),
167 | license='OpenBSD',
168 | url='https://github.com/markovmodel/variational',
169 | platforms=['Linux', 'Mac OS-X', 'Unix', 'Windows'],
170 | classifiers=CLASSIFIERS.splitlines(),
171 | #package_dir={'variational': 'variational'},
172 | packages=find_packages(),
173 | package_data={'variational.basisset':['ResiduesEigenvectors/*']
174 | },
175 | zip_safe=False,
176 | install_requires=[
177 | 'numpy',
178 | 'scipy',
179 | 'six',
180 | ],
181 | setup_requires=[
182 | 'cython>=0.24',
183 | 'numpy',
184 | ],
185 | ext_modules=lazy_cythonize(extensions),
186 | )
187 |
188 | setup(**metadata)
189 |
--------------------------------------------------------------------------------
/docs/Interface.lyx:
--------------------------------------------------------------------------------
1 | #LyX 2.1 created this file. For more info see http://www.lyx.org/
2 | \lyxformat 474
3 | \begin_document
4 | \begin_header
5 | \textclass article
6 | \use_default_options true
7 | \maintain_unincluded_children false
8 | \language english
9 | \language_package default
10 | \inputencoding auto
11 | \fontencoding global
12 | \font_roman default
13 | \font_sans default
14 | \font_typewriter default
15 | \font_math auto
16 | \font_default_family default
17 | \use_non_tex_fonts false
18 | \font_sc false
19 | \font_osf false
20 | \font_sf_scale 100
21 | \font_tt_scale 100
22 | \graphics default
23 | \default_output_format default
24 | \output_sync 0
25 | \bibtex_command default
26 | \index_command default
27 | \paperfontsize default
28 | \use_hyperref false
29 | \papersize default
30 | \use_geometry false
31 | \use_package amsmath 1
32 | \use_package amssymb 1
33 | \use_package cancel 1
34 | \use_package esint 1
35 | \use_package mathdots 1
36 | \use_package mathtools 1
37 | \use_package mhchem 1
38 | \use_package stackrel 1
39 | \use_package stmaryrd 1
40 | \use_package undertilde 1
41 | \cite_engine basic
42 | \cite_engine_type default
43 | \biblio_style plain
44 | \use_bibtopic false
45 | \use_indices false
46 | \paperorientation portrait
47 | \suppress_date false
48 | \justification true
49 | \use_refstyle 1
50 | \index Index
51 | \shortcut idx
52 | \color #008000
53 | \end_index
54 | \secnumdepth 3
55 | \tocdepth 3
56 | \paragraph_separation indent
57 | \paragraph_indentation default
58 | \quotes_language english
59 | \papercolumns 1
60 | \papersides 1
61 | \paperpagestyle default
62 | \tracking_changes false
63 | \output_changes false
64 | \html_math_output 0
65 | \html_css_as_file 0
66 | \html_be_strict false
67 | \end_header
68 |
69 | \begin_body
70 |
71 | \begin_layout Title
72 | Interface for Variational Package
73 | \end_layout
74 |
75 | \begin_layout Standard
76 | Here, we briefly sketch the interface for all functions to appear in the
77 | variational package.
78 | The package consists of three main modules: A library of basis sets, estimators
79 | for the correlation matrices, and a solver for the resulting generalized
80 | eigenvalue problem.
81 | \end_layout
82 |
83 | \begin_layout Enumerate
84 | The basis sets library contains functions to evaluate specific classes of
85 | basis functions.
86 | Examples for these classes are Gaussian basis functions, Fourier waves
87 | defined on angles, or the MSM-eigenvector based functions and their products.
88 | The general interface is given by the function SomeBasisSet below.
89 |
90 | \end_layout
91 |
92 | \begin_layout Enumerate
93 | The estimator module contains a function that generates the correlation-matrices
94 |
95 | \begin_inset Formula $\mathbf{C}^{\tau},\,\mathbf{C}^{0}$
96 | \end_inset
97 |
98 | from the basis function trajectories generated in the first step.
99 | \end_layout
100 |
101 | \begin_layout Enumerate
102 | The solver module contains a function the solve the generalized eigenvalue
103 | problem for the correlation matrices generated before.
104 | We will just use the function eig_corr implemented in pyemma.util.linalg.
105 | \end_layout
106 |
107 | \begin_layout Standard
108 | Below we describe the interfaces for these three modules.
109 | \end_layout
110 |
111 | \begin_layout Standard
112 | \begin_inset listings
113 | lstparams "language=Python,float,breaklines=true,tabsize=4"
114 | inline false
115 | status open
116 |
117 | \begin_layout Plain Layout
118 |
119 | def SomeBasisSet(list_of_trajectories, prefix, parameters):
120 | \end_layout
121 |
122 | \begin_layout Plain Layout
123 |
124 | """
125 | \end_layout
126 |
127 | \begin_layout Plain Layout
128 |
129 | Parameters
130 | \end_layout
131 |
132 | \begin_layout Plain Layout
133 |
134 | ----------
135 | \end_layout
136 |
137 | \begin_layout Plain Layout
138 |
139 | list_of_trajectories: list
140 | \end_layout
141 |
142 | \begin_layout Plain Layout
143 |
144 | List of .npy-files.
145 | Each file contains a feature trajectory, represented as an np-array of
146 | shape (T,N), where T is the number of time-steps in this trajectory and
147 | N is the number of features (distances, angles,...) on which the basis set
148 | is defined.
149 | \end_layout
150 |
151 | \begin_layout Plain Layout
152 |
153 | prefix: string
154 | \end_layout
155 |
156 | \begin_layout Plain Layout
157 |
158 | Common prefix for all files to be produced (see Output).
159 | \end_layout
160 |
161 | \begin_layout Plain Layout
162 |
163 | parameters:
164 | \end_layout
165 |
166 | \begin_layout Plain Layout
167 |
168 | Additional parameters needed for this basis set.
169 | \end_layout
170 |
171 | \begin_layout Plain Layout
172 |
173 | \end_layout
174 |
175 | \begin_layout Plain Layout
176 |
177 | Returns
178 | \end_layout
179 |
180 | \begin_layout Plain Layout
181 |
182 | -------
183 | \end_layout
184 |
185 | \begin_layout Plain Layout
186 |
187 | Returns a list of lists of filenames where the evaluations of all requested
188 | basis functions can be found.
189 | The files will be called "prefix_trajnum_fctnum.npy", where trajnum is the
190 | trajectory number and ftcnum is the number of the basis function.
191 | Each sublist contains the files for one trajectory.
192 | \end_layout
193 |
194 | \begin_layout Plain Layout
195 |
196 | '''
197 | \end_layout
198 |
199 | \end_inset
200 |
201 |
202 | \end_layout
203 |
204 | \begin_layout Standard
205 | \begin_inset listings
206 | lstparams "language=Python,float,breaklines=true,tabsize=4"
207 | inline false
208 | status open
209 |
210 | \begin_layout Plain Layout
211 |
212 | def Estimator(list_of_trajectories, list_of_taus):
213 | \end_layout
214 |
215 | \begin_layout Plain Layout
216 |
217 | """
218 | \end_layout
219 |
220 | \begin_layout Plain Layout
221 |
222 | Parameters:
223 | \end_layout
224 |
225 | \begin_layout Plain Layout
226 |
227 | -----------
228 | \end_layout
229 |
230 | \begin_layout Plain Layout
231 |
232 | list_of_trajectories: list
233 | \end_layout
234 |
235 | \begin_layout Plain Layout
236 |
237 | List of list of .npy-files, organized the same way as the output of a basis
238 | set function.
239 | Each sublist contains the files for all basis functions for one specific
240 | trajectory.
241 | \end_layout
242 |
243 | \begin_layout Plain Layout
244 |
245 | list_of_taus: ndarray (ntau,)
246 | \end_layout
247 |
248 | \begin_layout Plain Layout
249 |
250 | The lag-times for which the correlation matrices will be computed.
251 | \end_layout
252 |
253 | \begin_layout Plain Layout
254 |
255 | \end_layout
256 |
257 | \begin_layout Plain Layout
258 |
259 | Returns
260 | \end_layout
261 |
262 | \begin_layout Plain Layout
263 |
264 | -------
265 | \end_layout
266 |
267 | \begin_layout Plain Layout
268 |
269 | list of correlation matrices.
270 | \end_layout
271 |
272 | \begin_layout Plain Layout
273 |
274 | """
275 | \end_layout
276 |
277 | \end_inset
278 |
279 |
280 | \end_layout
281 |
282 | \begin_layout Standard
283 | \begin_inset listings
284 | lstparams "language=Python,float,breaklines=true,tabsize=4"
285 | inline false
286 | status open
287 |
288 | \begin_layout Plain Layout
289 |
290 | def eig_corr(C0, Ct, epsilon=1e-6):
291 | \end_layout
292 |
293 | \begin_layout Plain Layout
294 |
295 | """ Solve the generalized eigenvalues problem with correlation matrices
296 | C0 and Ct
297 | \end_layout
298 |
299 | \begin_layout Plain Layout
300 |
301 | Parameters
302 | \end_layout
303 |
304 | \begin_layout Plain Layout
305 |
306 | ----------
307 | \end_layout
308 |
309 | \begin_layout Plain Layout
310 |
311 | C0 : ndarray (n,n)
312 | \end_layout
313 |
314 | \begin_layout Plain Layout
315 |
316 | time-instantaneous correlation matrix.
317 | Must be symmetric positive definite
318 | \end_layout
319 |
320 | \begin_layout Plain Layout
321 |
322 | Ct : ndarray (n,n)
323 | \end_layout
324 |
325 | \begin_layout Plain Layout
326 |
327 | time-lagged correlation matrix.
328 | Must be symmetric
329 | \end_layout
330 |
331 | \begin_layout Plain Layout
332 |
333 | epsilon : float
334 | \end_layout
335 |
336 | \begin_layout Plain Layout
337 |
338 | eigenvalue norm cutoff.
339 | Eigenvalues of C0 with norms <= epsilon will be cut off.
340 | The remaining number of Eigenvalues define the size of the output.
341 | \end_layout
342 |
343 | \begin_layout Plain Layout
344 |
345 | Returns
346 | \end_layout
347 |
348 | \begin_layout Plain Layout
349 |
350 | -------
351 | \end_layout
352 |
353 | \begin_layout Plain Layout
354 |
355 | l : ndarray (m)
356 | \end_layout
357 |
358 | \begin_layout Plain Layout
359 |
360 | The first m generalized eigenvalues, sorted by descending norm
361 | \end_layout
362 |
363 | \begin_layout Plain Layout
364 |
365 | R : ndarray (n,m)
366 | \end_layout
367 |
368 | \begin_layout Plain Layout
369 |
370 | The first m generalized eigenvectors, as a column matrix.
371 | \end_layout
372 |
373 | \begin_layout Plain Layout
374 |
375 | """
376 | \end_layout
377 |
378 | \end_inset
379 |
380 |
381 | \end_layout
382 |
383 | \end_body
384 | \end_document
385 |
--------------------------------------------------------------------------------
/variational/estimators/tests/test_running_moments.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import unittest
3 | import numpy as np
4 | from .. import running_moments
5 |
6 | __author__ = 'noe'
7 |
8 |
9 | class TestRunningMoments(unittest.TestCase):
10 |
11 | @classmethod
12 | def setUpClass(cls):
13 | cls.X = np.random.rand(10000, 2)
14 | cls.Y = np.random.rand(10000, 2)
15 | cls.T = cls.X.shape[0]
16 | # Chunk size:
17 | cls.L = 1000
18 | # Number of chunks:
19 | cls.nchunks = cls.T / cls.L
20 | # Set a lag time for time-lagged tests:
21 | #cls.lag = 50
22 | # Weights references:
23 | cls.weights = np.random.rand(10000)
24 | # Trajectory weights:
25 | cls.trajweights = 3*np.random.rand(cls.nchunks)
26 | # bias the first part
27 | cls.X[:2000] += 1.0
28 | cls.Y[:2000] -= 1.0
29 | # direct calculation, moments of X and Y
30 | cls.w = np.shape(cls.X)[0]
31 | cls.wsym = 2*np.shape(cls.X)[0]
32 | cls.sx = cls.X.sum(axis=0)
33 | cls.sy = cls.Y.sum(axis=0)
34 | cls.Mxx = np.dot(cls.X.T, cls.X)
35 | cls.Mxy = np.dot(cls.X.T, cls.Y)
36 | cls.Myy = np.dot(cls.Y.T, cls.Y)
37 | cls.mx = cls.sx / float(cls.w)
38 | cls.my = cls.sy / float(cls.w)
39 | cls.X0 = cls.X - cls.mx
40 | cls.Y0 = cls.Y - cls.my
41 | cls.Mxx0 = np.dot(cls.X0.T, cls.X0)
42 | cls.Mxy0 = np.dot(cls.X0.T, cls.Y0)
43 | cls.Myy0 = np.dot(cls.Y0.T, cls.Y0)
44 |
45 | # direct calculation, symmetric moments
46 | cls.s_sym = cls.sx + cls.sy
47 | cls.Mxx_sym = np.dot(cls.X.T, cls.X) + np.dot(cls.Y.T, cls.Y)
48 | cls.Mxy_sym = np.dot(cls.X.T, cls.Y) + np.dot(cls.Y.T, cls.X)
49 | cls.m_sym = cls.s_sym / float(cls.wsym)
50 | cls.X0_sym = cls.X - cls.m_sym
51 | cls.Y0_sym = cls.Y - cls.m_sym
52 | cls.Mxx0_sym = np.dot(cls.X0_sym.T, cls.X0_sym) + np.dot(cls.Y0_sym.T, cls.Y0_sym)
53 | cls.Mxy0_sym = np.dot(cls.X0_sym.T, cls.Y0_sym) + np.dot(cls.Y0_sym.T, cls.X0_sym)
54 |
55 | # direct calculation, weighted moments:
56 | cls.wesum = np.sum(cls.weights)
57 | cls.sx_w = (cls.weights[:, None] * cls.X).sum(axis=0)
58 | cls.sy_w = (cls.weights[:, None] * cls.Y).sum(axis=0)
59 | cls.Mxx_w = np.dot((cls.weights[:, None] * cls.X).T, cls.X)
60 | cls.Mxy_w = np.dot((cls.weights[:, None] * cls.X).T, cls.Y)
61 | cls.mx_w = cls.sx_w / float(cls.wesum)
62 | cls.my_w = cls.sy_w / float(cls.wesum)
63 | cls.X0_w = cls.X - cls.mx_w
64 | cls.Y0_w = cls.Y - cls.my_w
65 | cls.Mxx0_w = np.dot((cls.weights[:, None] * cls.X0_w).T, cls.X0_w)
66 | cls.Mxy0_w = np.dot((cls.weights[:, None] * cls.X0_w).T, cls.Y0_w)
67 | # direct calculation, weighted symmetric moments
68 | cls.s_sym_w = cls.sx_w + cls.sy_w
69 | cls.Mxx_sym_w = np.dot((cls.weights[:, None] * cls.X).T, cls.X) + np.dot((cls.weights[:, None] * cls.Y).T, cls.Y)
70 | cls.Mxy_sym_w = np.dot((cls.weights[:, None] * cls.X).T, cls.Y) + np.dot((cls.weights[:, None] * cls.Y).T, cls.X)
71 | cls.m_sym_w = cls.s_sym_w / float(2 * cls.wesum)
72 | cls.X0_sym_w = cls.X - cls.m_sym_w
73 | cls.Y0_sym_w = cls.Y - cls.m_sym_w
74 | cls.Mxx0_sym_w = np.dot((cls.weights[:, None] *cls.X0_sym_w).T, cls.X0_sym_w) + np.dot((cls.weights[:, None] *cls.Y0_sym_w).T, cls.Y0_sym_w)
75 | cls.Mxy0_sym_w = np.dot((cls.weights[:, None] *cls.X0_sym_w).T, cls.Y0_sym_w) + np.dot((cls.weights[:, None] *cls.Y0_sym_w).T, cls.X0_sym_w)
76 |
77 | return cls
78 |
79 | def test_XX_withmean(self):
80 | # many passes
81 | cc = running_moments.RunningCovar(remove_mean=False)
82 | for i in range(0, self.T, self.L):
83 | cc.add(self.X[i:i+self.L])
84 | assert np.allclose(cc.weight_XX(), self.T)
85 | assert np.allclose(cc.sum_X(), self.sx)
86 | assert np.allclose(cc.moments_XX(), self.Mxx)
87 |
88 | def test_XX_meanfree(self):
89 | # many passes
90 | cc = running_moments.RunningCovar(remove_mean=True)
91 | for i in range(0, self.T, self.L):
92 | cc.add(self.X[i:i+self.L])
93 | assert np.allclose(cc.weight_XX(), self.T)
94 | assert np.allclose(cc.sum_X(), self.sx)
95 | assert np.allclose(cc.moments_XX(), self.Mxx0)
96 |
97 | def test_XXXY_withmean(self):
98 | # many passes
99 | cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=False)
100 | for i in range(0, self.T, self.L):
101 | cc.add(self.X[i:i+self.L], self.Y[i:i+self.L])
102 | assert np.allclose(cc.weight_XY(), self.T)
103 | assert np.allclose(cc.sum_X(), self.sx)
104 | assert np.allclose(cc.moments_XX(), self.Mxx)
105 | assert np.allclose(cc.moments_XY(), self.Mxy)
106 |
107 | def test_XXXY_meanfree(self):
108 | # many passes
109 | cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=True)
110 | L = 1000
111 | for i in range(0, self.X.shape[0], L):
112 | cc.add(self.X[i:i+L], self.Y[i:i+L])
113 | assert np.allclose(cc.weight_XY(), self.T)
114 | assert np.allclose(cc.sum_X(), self.sx)
115 | assert np.allclose(cc.moments_XX(), self.Mxx0)
116 | assert np.allclose(cc.moments_XY(), self.Mxy0)
117 |
118 | def test_XXXY_weighted_withmean(self):
119 | # many passes
120 | cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=False)
121 | for i in range(0, self.T, self.L):
122 | iX = self.X[i:i+self.L, :]
123 | iY = self.Y[i:i+self.L, :]
124 | iwe = self.weights[i:i+self.L]
125 | cc.add(iX, iY, weights=iwe)
126 | assert np.allclose(cc.weight_XY(), self.wesum)
127 | assert np.allclose(cc.sum_X(), self.sx_w)
128 | assert np.allclose(cc.moments_XX(), self.Mxx_w)
129 | assert np.allclose(cc.moments_XY(), self.Mxy_w)
130 |
131 | def test_XXXY_weighted_meanfree(self):
132 | # many passes
133 | cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=True)
134 | for i in range(0, self.T, self.L):
135 | iX = self.X[i:i+self.L, :]
136 | iY = self.Y[i:i+self.L, :]
137 | iwe = self.weights[i:i+self.L]
138 | cc.add(iX, iY, weights=iwe)
139 | assert np.allclose(cc.weight_XY(), self.wesum)
140 | assert np.allclose(cc.sum_X(), self.sx_w)
141 | assert np.allclose(cc.moments_XX(), self.Mxx0_w)
142 | assert np.allclose(cc.moments_XY(), self.Mxy0_w)
143 |
144 | def test_XXXY_sym_withmean(self):
145 | # many passes
146 | cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=False, symmetrize=True)
147 | for i in range(0, self.T, self.L):
148 | cc.add(self.X[i:i+self.L], self.Y[i:i+self.L])
149 | assert np.allclose(cc.weight_XY(), 2*self.T)
150 | assert np.allclose(cc.sum_X(), self.s_sym)
151 | assert np.allclose(cc.moments_XX(), self.Mxx_sym)
152 | assert np.allclose(cc.moments_XY(), self.Mxy_sym)
153 |
154 | def test_XXXY_sym_meanfree(self):
155 | # many passes
156 | cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=True, symmetrize=True)
157 | for i in range(0, self.T, self.L):
158 | cc.add(self.X[i:i+self.L], self.Y[i:i+self.L])
159 | assert np.allclose(cc.weight_XY(), 2*self.T)
160 | assert np.allclose(cc.sum_X(), self.s_sym)
161 | assert np.allclose(cc.moments_XX(), self.Mxx0_sym)
162 | assert np.allclose(cc.moments_XY(), self.Mxy0_sym)
163 |
164 | def test_XXXY_weighted_sym_withmean(self):
165 | # many passes
166 | cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=False, symmetrize=True)
167 | for i in range(0, self.T, self.L):
168 | iwe = self.weights[i:i+self.L]
169 | cc.add(self.X[i:i+self.L], self.Y[i:i+self.L], weights=iwe)
170 | assert np.allclose(cc.weight_XY(), 2 * self.wesum)
171 | assert np.allclose(cc.sum_X(), self.s_sym_w)
172 | assert np.allclose(cc.moments_XX(), self.Mxx_sym_w)
173 | assert np.allclose(cc.moments_XY(), self.Mxy_sym_w)
174 |
175 | def test_XXXY_weighted_sym_meanfree(self):
176 | # many passes
177 | cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=True, symmetrize=True)
178 | for i in range(0, self.T, self.L):
179 | iwe = self.weights[i:i+self.L]
180 | cc.add(self.X[i:i+self.L], self.Y[i:i+self.L], weights=iwe)
181 | assert np.allclose(cc.weight_XY(), 2*self.wesum)
182 | assert np.allclose(cc.sum_X(), self.s_sym_w)
183 | assert np.allclose(cc.moments_XX(), self.Mxx0_sym_w)
184 | assert np.allclose(cc.moments_XY(), self.Mxy0_sym_w)
185 |
186 | if __name__ == "__main__":
187 | unittest.main()
--------------------------------------------------------------------------------
/variational/estimators/covar_c/_covartools.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | /** Subtracts given row vector from each row of the matrix X
5 |
6 | @param X : (M, N) array
7 | @param row : (N) array
8 | @param M : int
9 | @param N : int
10 |
11 | */
12 | void _subtract_row_double(double* X, double* row, int M, int N)
13 | {
14 | int i, j, ro;
15 | for (i=0; i!=M; ++i)
16 | {
17 | ro = i*N;
18 | for (j=0; j!=N; ++j)
19 | {
20 | X[ro + j] -= row[j];
21 | }
22 | }
23 | }
24 |
25 | /** see above */
26 | void _subtract_row_float(float* X, float* row, int M, int N)
27 | {
28 | int i, j, ro;
29 | for (i=0; i!=M; ++i)
30 | {
31 | ro = i*N;
32 | for (j=0; j!=N; ++j)
33 | {
34 | X[ro + j] -= row[j];
35 | }
36 | }
37 | }
38 |
39 | void _subtract_row_double_copy(double* X0, double* X, double* row, int M, int N)
40 | {
41 | int i, j, ro;
42 | for (i=0; i!=M; ++i)
43 | {
44 | ro = i*N;
45 | for (j=0; j!=N; ++j)
46 | {
47 | X0[ro + j] = X[ro + j] - row[j];
48 | }
49 | }
50 | }
51 |
52 | void _subtract_row_float_copy(float* X0, float* X, float* row, int M, int N)
53 | {
54 | int i, j, ro;
55 | for (i=0; i!=M; ++i)
56 | {
57 | ro = i*N;
58 | for (j=0; j!=N; ++j)
59 | {
60 | X0[ro + j] = X[ro + j] - row[j];
61 | }
62 | }
63 | }
64 |
65 |
66 | int* _bool_to_list(int* b, int N, int nnz)
67 | {
68 | int i;
69 | int k=0;
70 | int* list = (int*)malloc(nnz*sizeof(int));
71 | for (i=0; i tol || -diff > tol)
303 | {
304 | if (cols[j] == 0)
305 | {
306 | cols[j] = 1;
307 | nconstant--;
308 | // are constant columns below threshold? Then interrupt.
309 | if (nconstant < min_constant)
310 | return 0;
311 | // do we have 0 constant columns? Then we can stop regularly.
312 | if (nconstant == 0)
313 | return 1;
314 | }
315 | }
316 | }
317 | }
318 |
319 | return 1;
320 | }
321 |
322 | /** see above */
323 | int _variable_cols_double_approx(int* cols, double* X, int M, int N, double tol, int min_constant)
324 | {
325 | // compare first and last row to get constant candidates
326 | int i,j;
327 | int ro = (M-1)*N;
328 | double diff;
329 | int nconstant = N; // current number of constant columns
330 |
331 | // by default all 0 (constant)
332 | for (j=0; j tol || -diff > tol)
343 | {
344 | if (cols[j] == 0)
345 | {
346 | cols[j] = 1;
347 | nconstant--;
348 | // are constant columns below threshold? Then interrupt.
349 | if (nconstant < min_constant)
350 | return 0;
351 | // do we have 0 constant columns? Then we can stop regularly.
352 | if (nconstant == 0)
353 | return 1;
354 | }
355 | }
356 | }
357 | }
358 |
359 | return 1;
360 | }
361 |
--------------------------------------------------------------------------------
/variational/estimators/running_moments.py:
--------------------------------------------------------------------------------
1 | __author__ = 'noe'
2 |
3 | import warnings
4 | import numbers
5 | import numpy as np
6 | from .moments import moments_XX, moments_XXXY, moments_block
7 |
8 |
9 | class Moments(object):
10 |
11 | def __init__(self, w, sx, sy, Mxy):
12 | """
13 | Parameters
14 | ----------
15 | w : float
16 | statistical weight.
17 | w = \sum_t w_t
18 | In most cases, :math:`w_t=1`, and then w is just the number of samples that went into s1, S2.
19 | s : ndarray(n,)
20 | sum over samples:
21 | .. math:
22 | s = \sum_t w_t x_t
23 | M : ndarray(n, n)
24 | .. math:
25 | M = (X-s)^T (X-s)
26 | """
27 | self.w = float(w)
28 | self.sx = sx
29 | self.sy = sy
30 | self.Mxy = Mxy
31 |
32 | def copy(self):
33 | return Moments(self.w, self.sx.copy(), self.sy.copy(), self.Mxy.copy())
34 |
35 | def combine(self, other, mean_free=False):
36 | """
37 | References
38 | ----------
39 | [1] http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf
40 | """
41 | w1 = self.w
42 | w2 = other.w
43 | w = w1 + w2
44 | dsx = (w2/w1) * self.sx - other.sx
45 | dsy = (w2/w1) * self.sy - other.sy
46 | # update
47 | self.w = w1 + w2
48 | self.sx = self.sx + other.sx
49 | self.sy = self.sy + other.sy
50 | #
51 | if mean_free:
52 | self.Mxy += other.Mxy + (w1 / (w2 * w)) * np.outer(dsx, dsy)
53 | else:
54 | self.Mxy += other.Mxy
55 | return self
56 |
57 | @property
58 | def mean_x(self):
59 | return self.sx / self.w
60 |
61 | @property
62 | def mean_y(self):
63 | return self.sy / self.w
64 |
65 | def covar(self, bessels_correction):
66 | """ Returns M / (w-1)
67 |
68 | Careful: The normalization w-1 assumes that we have counts as weights.
69 |
70 | """
71 | if bessels_correction:
72 | return self.Mxy/ (self.w-1)
73 | else:
74 | return self.Mxy/self.w
75 |
76 |
77 | class MomentsStorage(object):
78 | """
79 | """
80 |
81 | def __init__(self, nsave, remove_mean=False, rtol=1.5):
82 | """
83 | Parameters
84 | ----------
85 | rtol : float
86 | To decide when to merge two Moments. Ideally I'd like to merge two
87 | Moments when they have equal weights (i.e. equally many data points
88 | went into them). If we always add data chunks with equal weights,
89 | this can be achieved by using a binary tree, i.e. let M1 be the
90 | moment estimates from one chunk. Two of them are added to M2, Two
91 | M2 are added to M4, and so on. This way you need to store log2
92 | (n_chunks) number of Moment estimates.
93 | In practice you might get data in chunks of unequal length or weight.
94 | Therefore we need some heuristic when two Moment estimates should get
95 | merged. This is the role of rtol.
96 |
97 | """
98 | self.nsave = nsave
99 | self.storage = []
100 | self.rtol = rtol
101 | self.remove_mean = remove_mean
102 |
103 | def _can_merge_tail(self):
104 | """ Checks if the two last list elements can be merged
105 | """
106 | if len(self.storage) < 2:
107 | return False
108 | return self.storage[-2].w <= self.storage[-1].w * self.rtol
109 |
110 | def store(self, moments):
111 | """ Store object X with weight w
112 | """
113 | if len(self.storage) == self.nsave: # merge if we must
114 | # print 'must merge'
115 | self.storage[-1].combine(moments, mean_free=self.remove_mean)
116 | else: # append otherwise
117 | # print 'append'
118 | self.storage.append(moments)
119 | # merge if possible
120 | while self._can_merge_tail():
121 | # print 'merge: ',self.storage
122 | M = self.storage.pop()
123 | # print 'pop last: ',self.storage
124 | self.storage[-1].combine(M, mean_free=self.remove_mean)
125 | # print 'merged: ',self.storage
126 |
127 | @property
128 | def moments(self):
129 | """
130 | """
131 | # collapse storage if necessary
132 | while len(self.storage) > 1:
133 | # print 'collapse'
134 | M = self.storage.pop()
135 | self.storage[-1].combine(M, mean_free=self.remove_mean)
136 | # print 'return first element'
137 | return self.storage[0]
138 |
139 |
140 | class RunningCovar(object):
141 | """ Running covariance estimator
142 |
143 | Estimator object that can be fed chunks of X and Y data, and
144 | that can generate on-the-fly estimates of mean, covariance, running sum
145 | and second moment matrix.
146 |
147 | Parameters
148 | ----------
149 | compute_XX : bool
150 | Estimate the covariance of X
151 | compute_XY : bool
152 | Estimate the cross-covariance of X and Y
153 | compute_YY : bool
154 | Estimate the covariance of Y
155 | remove_mean : bool
156 | Remove the data mean in the covariance estimation
157 | symmetrize : bool
158 | Use symmetric estimates with sum defined by sum_t x_t + y_t and
159 | second moment matrices defined by X'X + Y'Y and Y'X + X'Y.
160 | modify_data : bool
161 | If remove_mean=True, the mean will be removed in the input data,
162 | without creating an independent copy. This option is faster but should
163 | only be selected if the input data is not used elsewhere.
164 | sparse_mode : str
165 | one of:
166 | * 'dense' : always use dense mode
167 | * 'sparse' : always use sparse mode if possible
168 | * 'auto' : automatic
169 | nsave : int
170 | Depth of Moment storage. Moments computed from each chunk will be
171 | combined with Moments of similar statistical weight using the pairwise
172 | combination algorithm described in [1]_.
173 |
174 | References
175 | ----------
176 | .. [1] http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf
177 |
178 | """
179 |
180 | # to get the Y mean, but this is currently not stored.
181 | def __init__(self, compute_XX=True, compute_XY=False, compute_YY=False,
182 | remove_mean=False, symmetrize=False, sparse_mode='auto', modify_data=False, nsave=5):
183 | # check input
184 | if not compute_XX and not compute_XY:
185 | raise ValueError('One of compute_XX or compute_XY must be True.')
186 | if symmetrize and compute_YY:
187 | raise ValueError('Combining compute_YY and symmetrize=True is meaningless.')
188 | if symmetrize and not compute_XY:
189 | warnings.warn('symmetrize=True has no effect with compute_XY=False.')
190 | # storage
191 | self.compute_XX = compute_XX
192 | if compute_XX:
193 | self.storage_XX = MomentsStorage(nsave, remove_mean=remove_mean)
194 | self.compute_XY = compute_XY
195 | if compute_XY:
196 | self.storage_XY = MomentsStorage(nsave, remove_mean=remove_mean)
197 | self.compute_YY = compute_YY
198 | if compute_YY:
199 | self.storage_YY = MomentsStorage(nsave, remove_mean=remove_mean)
200 | # symmetry
201 | self.remove_mean = remove_mean
202 | self.symmetrize = symmetrize
203 | # flags
204 | self.sparse_mode = sparse_mode
205 | self.modify_data = modify_data
206 |
207 | def add(self, X, Y=None, weights=None):
208 | """
209 | Add trajectory to estimate.
210 |
211 | Parameters
212 | ----------
213 | X : ndarray(T, N)
214 | array of N time series.
215 | Y : ndarray(T, N)
216 | array of N time series, usually time shifted version of X.
217 | weights : None or float or ndarray(T, ):
218 | weights assigned to each trajectory point. If None, all data points have weight one. If float,
219 | the same weight will be given to all data points. If ndarray, each data point is assigned a separate
220 | weight.
221 |
222 | """
223 |
224 | # check input
225 | T = X.shape[0]
226 | if Y is not None:
227 | assert Y.shape[0] == T, 'X and Y must have equal length'
228 | # Weights cannot be used for compute_YY:
229 | if weights is not None and self.compute_YY:
230 | raise ValueError('Use of weights is not implemented for compute_YY==True')
231 | if weights is not None:
232 | # Convert to array of length T if weights is a single number:
233 | if isinstance(weights, numbers.Real):
234 | weights = weights * np.ones(T, dtype=float)
235 | # Check appropriate length if weights is an array:
236 | elif isinstance(weights, np.ndarray):
237 | assert weights.shape[0] == T, 'weights and X must have equal length'
238 | else:
239 | raise TypeError('weights is of type %s, must be a number or ndarray'%(type(weights)))
240 | # estimate and add to storage
241 | if self.compute_XX and not self.compute_XY:
242 | w, s_X, C_XX = moments_XX(X, remove_mean=self.remove_mean, weights=weights, sparse_mode=self.sparse_mode, modify_data=self.modify_data)
243 | self.storage_XX.store(Moments(w, s_X, s_X, C_XX))
244 | elif self.compute_XX and self.compute_XY:
245 | assert Y is not None
246 | w, s_X, s_Y, C_XX, C_XY = moments_XXXY(X, Y, remove_mean=self.remove_mean, symmetrize=self.symmetrize,
247 | weights=weights, sparse_mode=self.sparse_mode, modify_data=self.modify_data)
248 | # make copy in order to get independently mergeable moments
249 | self.storage_XX.store(Moments(w, s_X, s_X, C_XX))
250 | self.storage_XY.store(Moments(w, s_X, s_Y, C_XY))
251 | else: # compute block
252 | assert Y is not None
253 | assert not self.symmetrize
254 | w, s, C = moments_block(X, Y, remove_mean=self.remove_mean,
255 | sparse_mode=self.sparse_mode, modify_data=self.modify_data)
256 | # make copy in order to get independently mergeable moments
257 | self.storage_XX.store(Moments(w, s[0], s[0], C[0, 0]))
258 | self.storage_XY.store(Moments(w, s[0], s[1], C[0, 1]))
259 | self.storage_YY.store(Moments(w, s[1], s[1], C[1, 1]))
260 |
261 | def sum_X(self):
262 | if self.compute_XX:
263 | return self.storage_XX.moments.sx
264 | elif self.compute_XY:
265 | return self.storage_XY.moments.sx
266 | else:
267 | raise RuntimeError('sum_X is not available')
268 |
269 | def sum_Y(self):
270 | if self.compute_XY:
271 | return self.storage_XY.moments.sy
272 | elif self.compute_YY:
273 | return self.storage_YY.moments.sy
274 | else:
275 | raise RuntimeError('sum_Y is not available')
276 |
277 | def mean_X(self):
278 | if self.compute_XX:
279 | return self.storage_XX.moments.mean_x
280 | elif self.compute_XY:
281 | return self.storage_XY.moments.mean_y
282 | else:
283 | raise RuntimeError('mean_X is not available')
284 |
285 | def mean_Y(self):
286 | if self.compute_XY:
287 | return self.storage_XY.moments.mean_y
288 | elif self.compute_YY:
289 | return self.storage_YY.moments.mean_y
290 | else:
291 | raise RuntimeError('mean_Y is not available')
292 |
293 | def weight_XX(self):
294 | return self.storage_XX.moments.w
295 |
296 | def weight_XY(self):
297 | return self.storage_XY.moments.w
298 |
299 | def weight_YY(self):
300 | return self.storage_YY.moments.w
301 |
302 | def moments_XX(self):
303 | return self.storage_XX.moments.Mxy
304 |
305 | def moments_XY(self):
306 | return self.storage_XY.moments.Mxy
307 |
308 | def moments_YY(self):
309 | return self.storage_YY.moments.Mxy
310 |
311 | def cov_XX(self, bessels_correction):
312 | return self.storage_XX.moments.covar(bessels_correction=bessels_correction)
313 |
314 | def cov_XY(self, bessels_correction):
315 | return self.storage_XY.moments.covar(bessels_correction=bessels_correction)
316 |
317 | def cov_YY(self, bessels_correction):
318 | return self.storage_YY.moments.covar(bessels_correction=bessels_correction)
319 |
320 |
321 | def running_covar(xx=True, xy=False, yy=False, remove_mean=False, symmetrize=False, sparse_mode='auto',
322 | modify_data=False, nsave=5):
323 | """ Returns a running covariance estimator
324 |
325 | Returns an estimator object that can be fed chunks of X and Y data, and
326 | that can generate on-the-fly estimates of mean, covariance, running sum
327 | and second moment matrix.
328 |
329 | Parameters
330 | ----------
331 | xx : bool
332 | Estimate the covariance of X
333 | xy : bool
334 | Estimate the cross-covariance of X and Y
335 | yy : bool
336 | Estimate the covariance of Y
337 | remove_mean : bool
338 | Remove the data mean in the covariance estimation
339 | symmetrize : bool
340 | Use symmetric estimates with sum defined by sum_t x_t + y_t and
341 | second moment matrices defined by X'X + Y'Y and Y'X + X'Y.
342 | modify_data : bool
343 | If remove_mean=True, the mean will be removed in the input data,
344 | without creating an independent copy. This option is faster but should
345 | only be selected if the input data is not used elsewhere.
346 | sparse_mode : str
347 | one of:
348 | * 'dense' : always use dense mode
349 | * 'sparse' : always use sparse mode if possible
350 | * 'auto' : automatic
351 | nsave : int
352 | Depth of Moment storage. Moments computed from each chunk will be
353 | combined with Moments of similar statistical weight using the pairwise
354 | combination algorithm described in [1]_.
355 |
356 | References
357 | ----------
358 | .. [1] http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf
359 |
360 | """
361 | return RunningCovar(compute_XX=xx, compute_XY=xy, compute_YY=yy, sparse_mode=sparse_mode, modify_data=modify_data,
362 | remove_mean=remove_mean, symmetrize=symmetrize, nsave=nsave)
363 |
--------------------------------------------------------------------------------
/variational/_version.py:
--------------------------------------------------------------------------------
1 |
2 | # This file helps to compute a version number in source trees obtained from
3 | # git-archive tarball (such as those provided by githubs download-from-tag
4 | # feature). Distribution tarballs (built by setup.py sdist) and build
5 | # directories (produced by setup.py build) will contain a much shorter file
6 | # that just contains the computed version number.
7 |
8 | # This file is released into the public domain. Generated by
9 | # versioneer-0.15 (https://github.com/warner/python-versioneer)
10 |
11 | import errno
12 | import os
13 | import re
14 | import subprocess
15 | import sys
16 |
17 |
18 | def get_keywords():
19 | # these strings will be replaced by git during git-archive.
20 | # setup.py/versioneer.py will grep for the variable names, so they must
21 | # each be defined on a line of their own. _version.py will just call
22 | # get_keywords().
23 | git_refnames = " (HEAD -> master)"
24 | git_full = "491361e8e271df0e28b34549ab32e22546e18ce9"
25 | keywords = {"refnames": git_refnames, "full": git_full}
26 | return keywords
27 |
28 |
29 | class VersioneerConfig:
30 | pass
31 |
32 |
33 | def get_config():
34 | # these strings are filled in when 'setup.py versioneer' creates
35 | # _version.py
36 | cfg = VersioneerConfig()
37 | cfg.VCS = "git"
38 | cfg.style = "pep440"
39 | cfg.tag_prefix = ""
40 | cfg.parentdir_prefix = "variational-"
41 | cfg.versionfile_source = "variational/_version.py"
42 | cfg.verbose = False
43 | return cfg
44 |
45 |
46 | class NotThisMethod(Exception):
47 | pass
48 |
49 |
50 | LONG_VERSION_PY = {}
51 | HANDLERS = {}
52 |
53 |
54 | def register_vcs_handler(vcs, method): # decorator
55 | def decorate(f):
56 | if vcs not in HANDLERS:
57 | HANDLERS[vcs] = {}
58 | HANDLERS[vcs][method] = f
59 | return f
60 | return decorate
61 |
62 |
63 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
64 | assert isinstance(commands, list)
65 | p = None
66 | for c in commands:
67 | try:
68 | dispcmd = str([c] + args)
69 | # remember shell=False, so use git.cmd on windows, not just git
70 | p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE,
71 | stderr=(subprocess.PIPE if hide_stderr
72 | else None))
73 | break
74 | except EnvironmentError:
75 | e = sys.exc_info()[1]
76 | if e.errno == errno.ENOENT:
77 | continue
78 | if verbose:
79 | print("unable to run %s" % dispcmd)
80 | print(e)
81 | return None
82 | else:
83 | if verbose:
84 | print("unable to find command, tried %s" % (commands,))
85 | return None
86 | stdout = p.communicate()[0].strip()
87 | if sys.version_info[0] >= 3:
88 | stdout = stdout.decode()
89 | if p.returncode != 0:
90 | if verbose:
91 | print("unable to run %s (error)" % dispcmd)
92 | return None
93 | return stdout
94 |
95 |
96 | def versions_from_parentdir(parentdir_prefix, root, verbose):
97 | # Source tarballs conventionally unpack into a directory that includes
98 | # both the project name and a version string.
99 | dirname = os.path.basename(root)
100 | if not dirname.startswith(parentdir_prefix):
101 | if verbose:
102 | print("guessing rootdir is '%s', but '%s' doesn't start with "
103 | "prefix '%s'" % (root, dirname, parentdir_prefix))
104 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
105 | return {"version": dirname[len(parentdir_prefix):],
106 | "full-revisionid": None,
107 | "dirty": False, "error": None}
108 |
109 |
110 | @register_vcs_handler("git", "get_keywords")
111 | def git_get_keywords(versionfile_abs):
112 | # the code embedded in _version.py can just fetch the value of these
113 | # keywords. When used from setup.py, we don't want to import _version.py,
114 | # so we do it with a regexp instead. This function is not used from
115 | # _version.py.
116 | keywords = {}
117 | try:
118 | f = open(versionfile_abs, "r")
119 | for line in f.readlines():
120 | if line.strip().startswith("git_refnames ="):
121 | mo = re.search(r'=\s*"(.*)"', line)
122 | if mo:
123 | keywords["refnames"] = mo.group(1)
124 | if line.strip().startswith("git_full ="):
125 | mo = re.search(r'=\s*"(.*)"', line)
126 | if mo:
127 | keywords["full"] = mo.group(1)
128 | f.close()
129 | except EnvironmentError:
130 | pass
131 | return keywords
132 |
133 |
134 | @register_vcs_handler("git", "keywords")
135 | def git_versions_from_keywords(keywords, tag_prefix, verbose):
136 | if not keywords:
137 | raise NotThisMethod("no keywords at all, weird")
138 | refnames = keywords["refnames"].strip()
139 | if refnames.startswith("$Format"):
140 | if verbose:
141 | print("keywords are unexpanded, not using")
142 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
143 | refs = set([r.strip() for r in refnames.strip("()").split(",")])
144 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
145 | # just "foo-1.0". If we see a "tag: " prefix, prefer those.
146 | TAG = "tag: "
147 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
148 | if not tags:
149 | # Either we're using git < 1.8.3, or there really are no tags. We use
150 | # a heuristic: assume all version tags have a digit. The old git %d
151 | # expansion behaves like git log --decorate=short and strips out the
152 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish
153 | # between branches and tags. By ignoring refnames without digits, we
154 | # filter out many common branch names like "release" and
155 | # "stabilization", as well as "HEAD" and "master".
156 | tags = set([r for r in refs if re.search(r'\d', r)])
157 | if verbose:
158 | print("discarding '%s', no digits" % ",".join(refs-tags))
159 | if verbose:
160 | print("likely tags: %s" % ",".join(sorted(tags)))
161 | for ref in sorted(tags):
162 | # sorting will prefer e.g. "2.0" over "2.0rc1"
163 | if ref.startswith(tag_prefix):
164 | r = ref[len(tag_prefix):]
165 | if verbose:
166 | print("picking %s" % r)
167 | return {"version": r,
168 | "full-revisionid": keywords["full"].strip(),
169 | "dirty": False, "error": None
170 | }
171 | # no suitable tags, so version is "0+unknown", but full hex is still there
172 | if verbose:
173 | print("no suitable tags, using unknown + full revision id")
174 | return {"version": "0+unknown",
175 | "full-revisionid": keywords["full"].strip(),
176 | "dirty": False, "error": "no suitable tags"}
177 |
178 |
179 | @register_vcs_handler("git", "pieces_from_vcs")
180 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
181 | # this runs 'git' from the root of the source tree. This only gets called
182 | # if the git-archive 'subst' keywords were *not* expanded, and
183 | # _version.py hasn't already been rewritten with a short version string,
184 | # meaning we're inside a checked out source tree.
185 |
186 | if not os.path.exists(os.path.join(root, ".git")):
187 | if verbose:
188 | print("no .git in %s" % root)
189 | raise NotThisMethod("no .git directory")
190 |
191 | GITS = ["git"]
192 | if sys.platform == "win32":
193 | GITS = ["git.cmd", "git.exe"]
194 | # if there is a tag, this yields TAG-NUM-gHEX[-dirty]
195 | # if there are no tags, this yields HEX[-dirty] (no NUM)
196 | describe_out = run_command(GITS, ["describe", "--tags", "--dirty",
197 | "--always", "--long"],
198 | cwd=root)
199 | # --long was added in git-1.5.5
200 | if describe_out is None:
201 | raise NotThisMethod("'git describe' failed")
202 | describe_out = describe_out.strip()
203 | full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
204 | if full_out is None:
205 | raise NotThisMethod("'git rev-parse' failed")
206 | full_out = full_out.strip()
207 |
208 | pieces = {}
209 | pieces["long"] = full_out
210 | pieces["short"] = full_out[:7] # maybe improved later
211 | pieces["error"] = None
212 |
213 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
214 | # TAG might have hyphens.
215 | git_describe = describe_out
216 |
217 | # look for -dirty suffix
218 | dirty = git_describe.endswith("-dirty")
219 | pieces["dirty"] = dirty
220 | if dirty:
221 | git_describe = git_describe[:git_describe.rindex("-dirty")]
222 |
223 | # now we have TAG-NUM-gHEX or HEX
224 |
225 | if "-" in git_describe:
226 | # TAG-NUM-gHEX
227 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
228 | if not mo:
229 | # unparseable. Maybe git-describe is misbehaving?
230 | pieces["error"] = ("unable to parse git-describe output: '%s'"
231 | % describe_out)
232 | return pieces
233 |
234 | # tag
235 | full_tag = mo.group(1)
236 | if not full_tag.startswith(tag_prefix):
237 | if verbose:
238 | fmt = "tag '%s' doesn't start with prefix '%s'"
239 | print(fmt % (full_tag, tag_prefix))
240 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
241 | % (full_tag, tag_prefix))
242 | return pieces
243 | pieces["closest-tag"] = full_tag[len(tag_prefix):]
244 |
245 | # distance: number of commits since tag
246 | pieces["distance"] = int(mo.group(2))
247 |
248 | # commit: short hex revision ID
249 | pieces["short"] = mo.group(3)
250 |
251 | else:
252 | # HEX: no tags
253 | pieces["closest-tag"] = None
254 | count_out = run_command(GITS, ["rev-list", "HEAD", "--count"],
255 | cwd=root)
256 | pieces["distance"] = int(count_out) # total number of commits
257 |
258 | return pieces
259 |
260 |
261 | def plus_or_dot(pieces):
262 | if "+" in pieces.get("closest-tag", ""):
263 | return "."
264 | return "+"
265 |
266 |
267 | def render_pep440(pieces):
268 | # now build up version string, with post-release "local version
269 | # identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
270 | # get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
271 |
272 | # exceptions:
273 | # 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
274 |
275 | if pieces["closest-tag"]:
276 | rendered = pieces["closest-tag"]
277 | if pieces["distance"] or pieces["dirty"]:
278 | rendered += plus_or_dot(pieces)
279 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
280 | if pieces["dirty"]:
281 | rendered += ".dirty"
282 | else:
283 | # exception #1
284 | rendered = "0+untagged.%d.g%s" % (pieces["distance"],
285 | pieces["short"])
286 | if pieces["dirty"]:
287 | rendered += ".dirty"
288 | return rendered
289 |
290 |
291 | def render_pep440_pre(pieces):
292 | # TAG[.post.devDISTANCE] . No -dirty
293 |
294 | # exceptions:
295 | # 1: no tags. 0.post.devDISTANCE
296 |
297 | if pieces["closest-tag"]:
298 | rendered = pieces["closest-tag"]
299 | if pieces["distance"]:
300 | rendered += ".post.dev%d" % pieces["distance"]
301 | else:
302 | # exception #1
303 | rendered = "0.post.dev%d" % pieces["distance"]
304 | return rendered
305 |
306 |
307 | def render_pep440_post(pieces):
308 | # TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that
309 | # .dev0 sorts backwards (a dirty tree will appear "older" than the
310 | # corresponding clean one), but you shouldn't be releasing software with
311 | # -dirty anyways.
312 |
313 | # exceptions:
314 | # 1: no tags. 0.postDISTANCE[.dev0]
315 |
316 | if pieces["closest-tag"]:
317 | rendered = pieces["closest-tag"]
318 | if pieces["distance"] or pieces["dirty"]:
319 | rendered += ".post%d" % pieces["distance"]
320 | if pieces["dirty"]:
321 | rendered += ".dev0"
322 | rendered += plus_or_dot(pieces)
323 | rendered += "g%s" % pieces["short"]
324 | else:
325 | # exception #1
326 | rendered = "0.post%d" % pieces["distance"]
327 | if pieces["dirty"]:
328 | rendered += ".dev0"
329 | rendered += "+g%s" % pieces["short"]
330 | return rendered
331 |
332 |
333 | def render_pep440_old(pieces):
334 | # TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty.
335 |
336 | # exceptions:
337 | # 1: no tags. 0.postDISTANCE[.dev0]
338 |
339 | if pieces["closest-tag"]:
340 | rendered = pieces["closest-tag"]
341 | if pieces["distance"] or pieces["dirty"]:
342 | rendered += ".post%d" % pieces["distance"]
343 | if pieces["dirty"]:
344 | rendered += ".dev0"
345 | else:
346 | # exception #1
347 | rendered = "0.post%d" % pieces["distance"]
348 | if pieces["dirty"]:
349 | rendered += ".dev0"
350 | return rendered
351 |
352 |
353 | def render_git_describe(pieces):
354 | # TAG[-DISTANCE-gHEX][-dirty], like 'git describe --tags --dirty
355 | # --always'
356 |
357 | # exceptions:
358 | # 1: no tags. HEX[-dirty] (note: no 'g' prefix)
359 |
360 | if pieces["closest-tag"]:
361 | rendered = pieces["closest-tag"]
362 | if pieces["distance"]:
363 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
364 | else:
365 | # exception #1
366 | rendered = pieces["short"]
367 | if pieces["dirty"]:
368 | rendered += "-dirty"
369 | return rendered
370 |
371 |
372 | def render_git_describe_long(pieces):
373 | # TAG-DISTANCE-gHEX[-dirty], like 'git describe --tags --dirty
374 | # --always -long'. The distance/hash is unconditional.
375 |
376 | # exceptions:
377 | # 1: no tags. HEX[-dirty] (note: no 'g' prefix)
378 |
379 | if pieces["closest-tag"]:
380 | rendered = pieces["closest-tag"]
381 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
382 | else:
383 | # exception #1
384 | rendered = pieces["short"]
385 | if pieces["dirty"]:
386 | rendered += "-dirty"
387 | return rendered
388 |
389 |
390 | def render(pieces, style):
391 | if pieces["error"]:
392 | return {"version": "unknown",
393 | "full-revisionid": pieces.get("long"),
394 | "dirty": None,
395 | "error": pieces["error"]}
396 |
397 | if not style or style == "default":
398 | style = "pep440" # the default
399 |
400 | if style == "pep440":
401 | rendered = render_pep440(pieces)
402 | elif style == "pep440-pre":
403 | rendered = render_pep440_pre(pieces)
404 | elif style == "pep440-post":
405 | rendered = render_pep440_post(pieces)
406 | elif style == "pep440-old":
407 | rendered = render_pep440_old(pieces)
408 | elif style == "git-describe":
409 | rendered = render_git_describe(pieces)
410 | elif style == "git-describe-long":
411 | rendered = render_git_describe_long(pieces)
412 | else:
413 | raise ValueError("unknown style '%s'" % style)
414 |
415 | return {"version": rendered, "full-revisionid": pieces["long"],
416 | "dirty": pieces["dirty"], "error": None}
417 |
418 |
419 | def get_versions():
420 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
421 | # __file__, we can work backwards from there to the root. Some
422 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
423 | # case we can only use expanded keywords.
424 |
425 | cfg = get_config()
426 | verbose = cfg.verbose
427 |
428 | try:
429 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
430 | verbose)
431 | except NotThisMethod:
432 | pass
433 |
434 | try:
435 | root = os.path.realpath(__file__)
436 | # versionfile_source is the relative path from the top of the source
437 | # tree (where the .git directory might live) to this file. Invert
438 | # this to find the root from __file__.
439 | for i in cfg.versionfile_source.split('/'):
440 | root = os.path.dirname(root)
441 | except NameError:
442 | return {"version": "0+unknown", "full-revisionid": None,
443 | "dirty": None,
444 | "error": "unable to find root of source tree"}
445 |
446 | try:
447 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
448 | return render(pieces, cfg.style)
449 | except NotThisMethod:
450 | pass
451 |
452 | try:
453 | if cfg.parentdir_prefix:
454 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
455 | except NotThisMethod:
456 | pass
457 |
458 | return {"version": "0+unknown", "full-revisionid": None,
459 | "dirty": None,
460 | "error": "unable to compute version"}
461 |
--------------------------------------------------------------------------------
/variational/estimators/tests/test_moments.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import unittest
3 | import numpy as np
4 | from .. import moments
5 |
6 | __author__ = 'noe'
7 |
8 | class TestMoments(unittest.TestCase):
9 |
10 | @classmethod
11 | def setUpClass(cls):
12 | cls.X_2 = np.random.rand(10000, 2)
13 | cls.Y_2 = np.random.rand(10000, 2)
14 | # dense data
15 | cls.X_10 = np.random.rand(10000, 10)
16 | cls.Y_10 = np.random.rand(10000, 10)
17 | cls.X_100 = np.random.rand(10000, 100)
18 | cls.Y_100 = np.random.rand(10000, 100)
19 | # sparse zero data
20 | cls.X_10_sparsezero = np.zeros((10000, 10))
21 | cls.X_10_sparsezero[:, 0] = cls.X_10[:, 0]
22 | cls.Y_10_sparsezero = np.zeros((10000, 10))
23 | cls.Y_10_sparsezero[:, 0] = cls.Y_10[:, 0]
24 | cls.X_100_sparsezero = np.zeros((10000, 100))
25 | cls.X_100_sparsezero[:, :10] = cls.X_100[:, :10]
26 | cls.Y_100_sparsezero = np.zeros((10000, 100))
27 | cls.Y_100_sparsezero[:, :10] = cls.Y_100[:, :10]
28 | # sparse const data
29 | cls.X_10_sparseconst = np.ones((10000, 10))
30 | cls.X_10_sparseconst[:, 0] = cls.X_10[:, 0]
31 | cls.Y_10_sparseconst = 2*np.ones((10000, 10))
32 | cls.Y_10_sparseconst[:, 0] = cls.Y_10[:, 0]
33 | cls.X_100_sparseconst = np.ones((10000, 100))
34 | cls.X_100_sparseconst[:, :10] = cls.X_100[:, :10]
35 | cls.Y_100_sparseconst = 2*np.zeros((10000, 100))
36 | cls.Y_100_sparseconst[:, :10] = cls.Y_100[:, :10]
37 | # boolean data
38 | cls.Xb_2 = np.random.randint(0, 2, size=(10000, 2))
39 | cls.Xb_2 = cls.Xb_2.astype(np.bool)
40 | cls.Xb_10 = np.random.randint(0, 2, size=(10000, 10))
41 | cls.Xb_10 = cls.Xb_10.astype(np.bool)
42 | cls.Xb_10_sparsezero = np.zeros((10000, 10), dtype=np.bool)
43 | cls.Xb_10_sparsezero[:, 0] = cls.Xb_10[:, 0]
44 | # generate weights:
45 | cls.weights = np.random.rand(10000)
46 | # Set the lag time for time-lagged tests:
47 | cls.lag = 50
48 |
49 | return cls
50 |
51 | def _test_moments_X(self, X, remove_mean=False, sparse_mode='auto', weights=None):
52 | # proposed solution
53 | w, s_X, C_XX = moments.moments_XX(X, remove_mean=remove_mean, modify_data=False,
54 | sparse_mode=sparse_mode, weights=weights)
55 | # reference
56 | X = X.astype(np.float64)
57 | if weights is not None:
58 | X1 = weights[:, None] * X
59 | w = weights.sum()
60 | else:
61 | X1 = X
62 | w = X.shape[0]
63 | s_X_ref = X1.sum(axis=0)
64 | if remove_mean:
65 | X = X - (1.0 / w) * s_X_ref
66 | if weights is not None:
67 | X1 = weights[:, None] * X
68 | else:
69 | X1 = X
70 | C_XX_ref = np.dot(X1.T, X)
71 | # test
72 | assert np.allclose(s_X, s_X_ref)
73 | assert np.allclose(C_XX, C_XX_ref)
74 |
75 | def test_moments_X(self):
76 | # simple test, dense
77 | self._test_moments_X(self.X_10, remove_mean=False, sparse_mode='dense')
78 | self._test_moments_X(self.X_100, remove_mean=False, sparse_mode='dense')
79 | # mean-free, dense
80 | self._test_moments_X(self.X_10, remove_mean=True, sparse_mode='dense')
81 | self._test_moments_X(self.X_100, remove_mean=True, sparse_mode='dense')
82 | # weighted test, simple, dense:
83 | self._test_moments_X(self.X_10, remove_mean=False, sparse_mode='dense', weights=self.weights)
84 | self._test_moments_X(self.X_100, remove_mean=False, sparse_mode='dense', weights=self.weights)
85 | # weighted test, mean-free, dense:
86 | self._test_moments_X(self.X_10, remove_mean=True, sparse_mode='dense', weights=self.weights)
87 | self._test_moments_X(self.X_100, remove_mean=True, sparse_mode='dense', weights=self.weights)
88 |
89 | def test_moments_X_sparsezero(self):
90 | # simple test, sparse
91 | self._test_moments_X(self.X_10_sparsezero, remove_mean=False, sparse_mode='sparse')
92 | self._test_moments_X(self.X_100_sparsezero, remove_mean=False, sparse_mode='sparse')
93 | # mean-free, sparse
94 | self._test_moments_X(self.X_10_sparsezero, remove_mean=True, sparse_mode='sparse')
95 | self._test_moments_X(self.X_100_sparsezero, remove_mean=True, sparse_mode='sparse')
96 | # weighted, sparse
97 | self._test_moments_X(self.X_10_sparsezero, remove_mean=False, sparse_mode='sparse', weights=self.weights)
98 | self._test_moments_X(self.X_100_sparsezero, remove_mean=False, sparse_mode='sparse', weights=self.weights)
99 | # weighted, mean-free, sparse
100 | self._test_moments_X(self.X_10_sparsezero, remove_mean=True, sparse_mode='sparse', weights=self.weights)
101 | self._test_moments_X(self.X_100_sparsezero, remove_mean=True, sparse_mode='sparse', weights=self.weights)
102 |
103 | def test_moments_X_sparseconst(self):
104 | # simple test, sparse
105 | self._test_moments_X(self.X_10_sparseconst, remove_mean=False, sparse_mode='sparse')
106 | self._test_moments_X(self.X_100_sparseconst, remove_mean=False, sparse_mode='sparse')
107 | # mean-free, sparse
108 | self._test_moments_X(self.X_10_sparseconst, remove_mean=True, sparse_mode='sparse')
109 | self._test_moments_X(self.X_100_sparseconst, remove_mean=True, sparse_mode='sparse')
110 | # weighted, sparse:
111 | self._test_moments_X(self.X_10_sparseconst, remove_mean=False, sparse_mode='dense', weights=self.weights)
112 | self._test_moments_X(self.X_100_sparseconst, remove_mean=False, sparse_mode='dense', weights=self.weights)
113 | # weighted, mean-free, sparse:
114 | self._test_moments_X(self.X_10_sparseconst, remove_mean=True, sparse_mode='dense', weights=self.weights)
115 | self._test_moments_X(self.X_100_sparseconst, remove_mean=True, sparse_mode='dense', weights=self.weights)
116 |
117 | def test_boolean_moments(self):
118 | # standard tests
119 | self._test_moments_X(self.Xb_10, remove_mean=False, sparse_mode='dense')
120 | self._test_moments_X(self.Xb_10, remove_mean=True, sparse_mode='dense')
121 | self._test_moments_X(self.Xb_10_sparsezero, remove_mean=False, sparse_mode='sparse')
122 | self._test_moments_X(self.Xb_10_sparsezero, remove_mean=True, sparse_mode='sparse')
123 | # test integer recovery
124 | Cxx_ref = np.dot(self.Xb_10.astype(np.int64).T, self.Xb_10.astype(np.int64)) # integer
125 | s_X_ref = np.sum(self.Xb_10, axis=0)
126 | w, s_X, Cxx = moments.moments_XX(self.Xb_10, remove_mean=False, modify_data=False, sparse_mode='dense')
127 | s_X = np.round(s_X).astype(np.int64)
128 | Cxx = np.round(Cxx).astype(np.int64)
129 | assert np.array_equal(s_X, s_X_ref)
130 | assert np.array_equal(Cxx, Cxx_ref)
131 |
132 |
133 | def _test_moments_XY(self, X, Y, symmetrize=False, remove_mean=False, sparse_mode='auto', weights=None):
134 | w1, s_X, s_Y, C_XX, C_XY = moments.moments_XXXY(X, Y, remove_mean=remove_mean, modify_data=False,
135 | symmetrize=symmetrize, sparse_mode=sparse_mode,
136 | weights=weights)
137 | # reference
138 | T = X.shape[0]
139 | if weights is not None:
140 | X1 = weights[:, None] * X
141 | Y1 = weights[:, None] * Y
142 | else:
143 | X1 = X
144 | Y1 = Y
145 | s_X_ref = X1.sum(axis=0)
146 | s_Y_ref = Y1.sum(axis=0)
147 | if symmetrize:
148 | s_X_ref = s_X_ref + s_Y_ref
149 | s_Y_ref = s_X_ref
150 | if weights is not None:
151 | w = 2 * np.sum(weights)
152 | else:
153 | w = 2 * T
154 | else:
155 | if weights is not None:
156 | w = np.sum(weights)
157 | else:
158 | w = T
159 | if remove_mean:
160 | X = X - s_X_ref/float(w)
161 | Y = Y - s_Y_ref/float(w)
162 | if weights is not None:
163 | X1 = weights[:, None] * X
164 | Y1 = weights[:, None] * Y
165 | else:
166 | X1 = X
167 | Y1 = Y
168 | if symmetrize:
169 | C_XX_ref = np.dot(X1.T, X) + np.dot(Y1.T, Y)
170 | C_XY_ref = np.dot(X1.T, Y) + np.dot(Y1.T, X)
171 | else:
172 | C_XX_ref = np.dot(X1.T, X)
173 | C_XY_ref = np.dot(X1.T, Y)
174 | # test
175 | assert np.allclose(w1, w)
176 | assert np.allclose(s_X, s_X_ref)
177 | assert np.allclose(s_Y, s_Y_ref)
178 | assert np.allclose(C_XX, C_XX_ref)
179 | assert np.allclose(C_XY, C_XY_ref)
180 |
181 | def test_moments_XY(self):
182 | # simple test, dense
183 | self._test_moments_XY(self.X_10, self.Y_10, symmetrize=False, remove_mean=False, sparse_mode='dense')
184 | self._test_moments_XY(self.X_100, self.Y_10, symmetrize=False, remove_mean=False, sparse_mode='dense')
185 | self._test_moments_XY(self.X_100, self.Y_100, symmetrize=False, remove_mean=False, sparse_mode='dense')
186 | # mean-free, dense
187 | self._test_moments_XY(self.X_10, self.Y_10, symmetrize=False, remove_mean=True, sparse_mode='dense')
188 | self._test_moments_XY(self.X_100, self.Y_10, symmetrize=False, remove_mean=True, sparse_mode='dense')
189 | self._test_moments_XY(self.X_100, self.Y_100, symmetrize=False, remove_mean=True, sparse_mode='dense')
190 |
191 | def test_moments_XY_weighted(self):
192 | # weighted test, dense
193 | self._test_moments_XY(self.X_10, self.X_10, symmetrize=False, remove_mean=False,
194 | sparse_mode='dense', weights=self.weights)
195 | self._test_moments_XY(self.X_100, self.X_100, symmetrize=False, remove_mean=False,
196 | sparse_mode='dense', weights=self.weights)
197 | # weighted test, mean-free, dense
198 | self._test_moments_XY(self.X_10, self.X_10, symmetrize=False, remove_mean=True,
199 | sparse_mode='dense', weights=self.weights)
200 | self._test_moments_XY(self.X_100, self.X_100, symmetrize=False, remove_mean=True,
201 | sparse_mode='dense', weights=self.weights)
202 |
203 | def test_moments_XY_sym(self):
204 | # simple test, dense, symmetric
205 | self._test_moments_XY(self.X_2, self.Y_2, symmetrize=True, remove_mean=False, sparse_mode='dense')
206 | self._test_moments_XY(self.X_10, self.Y_10, symmetrize=True, remove_mean=False, sparse_mode='dense')
207 | self._test_moments_XY(self.X_100, self.Y_100, symmetrize=True, remove_mean=False, sparse_mode='dense')
208 | # mean-free, dense, symmetric
209 | self._test_moments_XY(self.X_2, self.Y_2, symmetrize=True, remove_mean=True, sparse_mode='dense')
210 | self._test_moments_XY(self.X_10, self.Y_10, symmetrize=True, remove_mean=True, sparse_mode='dense')
211 | self._test_moments_XY(self.X_100, self.Y_100, symmetrize=True, remove_mean=True, sparse_mode='dense')
212 |
213 | def test_moments_XY_weighted_sym(self):
214 | # simple test, dense, symmetric
215 | self._test_moments_XY(self.X_2, self.Y_2, symmetrize=True, remove_mean=False, sparse_mode='dense',
216 | weights=self.weights)
217 | self._test_moments_XY(self.X_10, self.Y_10, symmetrize=True, remove_mean=False, sparse_mode='dense'
218 | , weights=self.weights)
219 | self._test_moments_XY(self.X_100, self.Y_100, symmetrize=True, remove_mean=False, sparse_mode='dense',
220 | weights=self.weights)
221 | # mean-free, dense, symmetric
222 | self._test_moments_XY(self.X_2, self.Y_2, symmetrize=True, remove_mean=True, sparse_mode='dense',
223 | weights=self.weights)
224 | self._test_moments_XY(self.X_10, self.Y_10, symmetrize=True, remove_mean=True, sparse_mode='dense',
225 | weights=self.weights)
226 | self._test_moments_XY(self.X_100, self.Y_100, symmetrize=True, remove_mean=True, sparse_mode='dense',
227 | weights=self.weights)
228 |
229 | def test_moments_XY_sparsezero(self):
230 | # simple test, dense
231 | self._test_moments_XY(self.X_10_sparsezero, self.Y_10_sparsezero, symmetrize=False, remove_mean=False,
232 | sparse_mode='sparse')
233 | self._test_moments_XY(self.X_100_sparsezero, self.Y_10_sparsezero, symmetrize=False, remove_mean=False,
234 | sparse_mode='sparse')
235 | self._test_moments_XY(self.X_100_sparsezero, self.Y_100_sparsezero, symmetrize=False, remove_mean=False,
236 | sparse_mode='sparse')
237 | # mean-free, dense
238 | self._test_moments_XY(self.X_10_sparsezero, self.Y_10_sparsezero, symmetrize=False, remove_mean=True,
239 | sparse_mode='sparse')
240 | self._test_moments_XY(self.X_100_sparsezero, self.Y_10_sparsezero, symmetrize=False, remove_mean=True,
241 | sparse_mode='sparse')
242 | self._test_moments_XY(self.X_100_sparsezero, self.Y_100_sparsezero, symmetrize=False, remove_mean=True,
243 | sparse_mode='dense')
244 |
245 | def test_moments_XY_weighted_sparsezero(self):
246 | # weighted test, sparse
247 | self._test_moments_XY(self.X_10_sparsezero, self.X_10_sparsezero, symmetrize=False, remove_mean=False,
248 | sparse_mode='sparse', weights=self.weights)
249 | self._test_moments_XY(self.X_100_sparsezero, self.X_100_sparsezero, symmetrize=False, remove_mean=False,
250 | sparse_mode='sparse', weights=self.weights)
251 | # weighted test, mean-free, sparse
252 | self._test_moments_XY(self.X_10_sparsezero, self.X_10_sparsezero, symmetrize=False, remove_mean=True,
253 | sparse_mode='sparse', weights=self.weights)
254 | self._test_moments_XY(self.X_100_sparsezero, self.X_100_sparsezero, symmetrize=False, remove_mean=True,
255 | sparse_mode='sparse', weights=self.weights)
256 |
257 | def test_moments_XY_sym_sparsezero(self):
258 | # simple test, sparse, symmetric
259 | self._test_moments_XY(self.X_10_sparsezero, self.Y_10_sparsezero, symmetrize=True, remove_mean=False,
260 | sparse_mode='sparse')
261 | self._test_moments_XY(self.X_100_sparsezero, self.Y_100_sparsezero, symmetrize=True, remove_mean=False,
262 | sparse_mode='sparse')
263 | # mean-free, sparse, symmetric
264 | self._test_moments_XY(self.X_10_sparsezero, self.Y_10_sparsezero, symmetrize=True, remove_mean=True,
265 | sparse_mode='sparse')
266 | self._test_moments_XY(self.X_100_sparsezero, self.Y_100_sparsezero, symmetrize=True, remove_mean=True,
267 | sparse_mode='sparse')
268 |
269 | def test_moments_XY_weighted_sym_sparsezero(self):
270 | # simple test, sparse, symmetric
271 | self._test_moments_XY(self.X_10_sparsezero, self.Y_10_sparsezero, symmetrize=True, remove_mean=False,
272 | sparse_mode='sparse', weights=self.weights)
273 | self._test_moments_XY(self.X_100_sparsezero, self.Y_100_sparsezero, symmetrize=True, remove_mean=False,
274 | sparse_mode='sparse', weights=self.weights)
275 | # mean-free, sparse, symmetric
276 | self._test_moments_XY(self.X_10_sparsezero, self.Y_10_sparsezero, symmetrize=True, remove_mean=True,
277 | sparse_mode='sparse', weights=self.weights)
278 | self._test_moments_XY(self.X_100_sparsezero, self.Y_100_sparsezero, symmetrize=True, remove_mean=True,
279 | sparse_mode='sparse', weights=self.weights)
280 |
281 | def test_moments_XY_sparseconst(self):
282 | # simple test, dense
283 | self._test_moments_XY(self.X_10_sparseconst, self.Y_10_sparseconst, symmetrize=False, remove_mean=False,
284 | sparse_mode='sparse')
285 | self._test_moments_XY(self.X_100_sparseconst, self.Y_10_sparseconst, symmetrize=False, remove_mean=False,
286 | sparse_mode='sparse')
287 | self._test_moments_XY(self.X_100_sparseconst, self.Y_100_sparseconst, symmetrize=False, remove_mean=False,
288 | sparse_mode='sparse')
289 | # mean-free, dense
290 | self._test_moments_XY(self.X_10_sparseconst, self.Y_10_sparseconst, symmetrize=False, remove_mean=True,
291 | sparse_mode='sparse')
292 | self._test_moments_XY(self.X_100_sparseconst, self.Y_10_sparseconst, symmetrize=False, remove_mean=True,
293 | sparse_mode='sparse')
294 | self._test_moments_XY(self.X_100_sparseconst, self.Y_100_sparseconst, symmetrize=False, remove_mean=True,
295 | sparse_mode='dense')
296 |
297 | def test_moments_XY_weighted_sparseconst(self):
298 | # weighted test, sparse
299 | self._test_moments_XY(self.X_10_sparseconst, self.X_10_sparseconst, symmetrize=False, remove_mean=False,
300 | sparse_mode='sparse', weights=self.weights)
301 | self._test_moments_XY(self.X_100_sparseconst, self.X_100_sparseconst, symmetrize=False, remove_mean=False,
302 | sparse_mode='sparse', weights=self.weights)
303 | # weighted test, mean-free, sparse
304 | self._test_moments_XY(self.X_10_sparseconst, self.X_10_sparseconst, symmetrize=False, remove_mean=True,
305 | sparse_mode='sparse', weights=self.weights)
306 | self._test_moments_XY(self.X_100_sparseconst, self.X_100_sparseconst, symmetrize=False, remove_mean=True,
307 | sparse_mode='sparse', weights=self.weights)
308 |
309 | def test_moments_XY_sym_sparseconst(self):
310 | # simple test, sparse, symmetric
311 | self._test_moments_XY(self.X_10_sparseconst, self.Y_10_sparseconst, symmetrize=True, remove_mean=False,
312 | sparse_mode='sparse')
313 | self._test_moments_XY(self.X_100_sparseconst, self.Y_100_sparseconst, symmetrize=True, remove_mean=False,
314 | sparse_mode='sparse')
315 | # mean-free, sparse, symmetric
316 | self._test_moments_XY(self.X_10_sparseconst, self.Y_10_sparseconst, symmetrize=True, remove_mean=True,
317 | sparse_mode='sparse')
318 | self._test_moments_XY(self.X_100_sparseconst, self.Y_100_sparseconst, symmetrize=True, remove_mean=True,
319 | sparse_mode='sparse')
320 |
321 | def test_moments_XY_weighted_sym_sparseconst(self):
322 | # simple test, sparse, symmetric
323 | self._test_moments_XY(self.X_10_sparseconst, self.Y_10_sparseconst, symmetrize=True, remove_mean=False,
324 | sparse_mode='sparse', weights=self.weights)
325 | self._test_moments_XY(self.X_100_sparseconst, self.Y_100_sparseconst, symmetrize=True, remove_mean=False,
326 | sparse_mode='sparse', weights=self.weights)
327 | # mean-free, sparse, symmetric
328 | self._test_moments_XY(self.X_10_sparseconst, self.Y_10_sparseconst, symmetrize=True, remove_mean=True,
329 | sparse_mode='sparse', weights=self.weights)
330 | self._test_moments_XY(self.X_100_sparseconst, self.Y_100_sparseconst, symmetrize=True, remove_mean=True,
331 | sparse_mode='sparse', weights=self.weights)
332 |
333 |
334 | if __name__ == "__main__":
335 | unittest.main()
--------------------------------------------------------------------------------
/docs/Updating_Formulas.lyx:
--------------------------------------------------------------------------------
1 | #LyX 2.1 created this file. For more info see http://www.lyx.org/
2 | \lyxformat 474
3 | \begin_document
4 | \begin_header
5 | \textclass article
6 | \use_default_options true
7 | \begin_modules
8 | theorems-ams
9 | eqs-within-sections
10 | figs-within-sections
11 | \end_modules
12 | \maintain_unincluded_children false
13 | \language english
14 | \language_package default
15 | \inputencoding auto
16 | \fontencoding global
17 | \font_roman default
18 | \font_sans default
19 | \font_typewriter default
20 | \font_math auto
21 | \font_default_family default
22 | \use_non_tex_fonts false
23 | \font_sc false
24 | \font_osf false
25 | \font_sf_scale 100
26 | \font_tt_scale 100
27 | \graphics default
28 | \default_output_format default
29 | \output_sync 0
30 | \bibtex_command default
31 | \index_command default
32 | \paperfontsize default
33 | \spacing single
34 | \use_hyperref false
35 | \papersize default
36 | \use_geometry false
37 | \use_package amsmath 1
38 | \use_package amssymb 1
39 | \use_package cancel 1
40 | \use_package esint 1
41 | \use_package mathdots 1
42 | \use_package mathtools 1
43 | \use_package mhchem 1
44 | \use_package stackrel 1
45 | \use_package stmaryrd 1
46 | \use_package undertilde 1
47 | \cite_engine basic
48 | \cite_engine_type default
49 | \biblio_style plain
50 | \use_bibtopic false
51 | \use_indices false
52 | \paperorientation portrait
53 | \suppress_date false
54 | \justification true
55 | \use_refstyle 1
56 | \index Index
57 | \shortcut idx
58 | \color #008000
59 | \end_index
60 | \secnumdepth 3
61 | \tocdepth 3
62 | \paragraph_separation indent
63 | \paragraph_indentation default
64 | \quotes_language english
65 | \papercolumns 1
66 | \papersides 1
67 | \paperpagestyle default
68 | \tracking_changes false
69 | \output_changes false
70 | \html_math_output 0
71 | \html_css_as_file 0
72 | \html_be_strict false
73 | \end_header
74 |
75 | \begin_body
76 |
77 | \begin_layout Title
78 | Updating Formulas for Correlations
79 | \end_layout
80 |
81 | \begin_layout Standard
82 | Here, we collect updating formulas for correlations between time series:
83 | \end_layout
84 |
85 | \begin_layout Section
86 | General Time Series
87 | \end_layout
88 |
89 | \begin_layout Standard
90 | The standard case is to compute the correlation between two time series
91 |
92 | \begin_inset Formula $x_{t}(i),\, t=1,\ldots,T,\, i=1,\ldots,N$
93 | \end_inset
94 |
95 | , and
96 | \begin_inset Formula $y_{t}(i),\, t=1,\ldots,T,\, i=1,\ldots,N$
97 | \end_inset
98 |
99 | .
100 | Additionally, it is possible that weights are given for each time step,
101 | i.e.
102 | there are non-negative number
103 | \begin_inset Formula $w_{t},\, t=1,\ldots,T$
104 | \end_inset
105 |
106 | .
107 | Our goal then is to compute the (unnormalized) correlation
108 | \end_layout
109 |
110 | \begin_layout Standard
111 | \begin_inset Formula
112 | \begin{eqnarray*}
113 | C(i,j) & = & \sum_{t=1}^{T}w_{t}\left(x_{t}(i)-\overline{x}(i)\right)\left(y_{t}(j)-\overline{y}(j)\right),
114 | \end{eqnarray*}
115 |
116 | \end_inset
117 |
118 | where
119 | \begin_inset Formula $\bar{x}(i),\,\bar{y}(j)$
120 | \end_inset
121 |
122 | denote the weighted mean values of the time series, i.e.
123 | \end_layout
124 |
125 | \begin_layout Standard
126 | \begin_inset Formula
127 | \begin{eqnarray*}
128 | \overline{x}(i) & = & \frac{1}{W_{T}}\sum_{t=1}^{T}w_{t}x_{t}(i),\\
129 | W_{T} & = & \sum_{t=1}^{T}w_{t}.
130 | \end{eqnarray*}
131 |
132 | \end_inset
133 |
134 | We are interested in computing the correlation
135 | \begin_inset Formula $C(i,j)$
136 | \end_inset
137 |
138 | in chunks.
139 | That means we split the data into, say, two blocks
140 | \begin_inset Formula $x_{t}(i),\, t=1,\ldots,T_{1}$
141 | \end_inset
142 |
143 | , and
144 | \begin_inset Formula $x_{t}(i),\, t=T_{1}+1,\ldots,T_{2}=T,$
145 | \end_inset
146 |
147 | and the same for
148 | \begin_inset Formula $y_{t}$
149 | \end_inset
150 |
151 | .
152 | We would then like to compute the correlation of each chunk separately,
153 | sum them up and add a correction term.
154 | Let us introduce the following notation
155 | \end_layout
156 |
157 | \begin_layout Standard
158 | \begin_inset Formula
159 | \begin{eqnarray}
160 | \overline{x_{T_{1}}}(i) & = & \frac{1}{w_{T_{1}}}\sum_{t=1}^{T_{1}}w_{t}x_{t},\label{eq:chunk_definitions_0}\\
161 | \overline{x_{T_{2}}}(i) & = & \frac{1}{W_{T_{2}}}\sum_{t=T_{1}+1}^{T_{2}}w_{t}x_{t}\label{eq:chunk_definitions_1}\\
162 | W_{T_{1}} & = & \sum_{t=1}^{T_{1}}w_{t}\label{eq:chunk_definitions_2}\\
163 | W_{T_{2}} & = & \sum_{t=T_{1}+1}^{T_{2}}w_{t}\label{eq:chunk_definitions_3}\\
164 | S_{T_{1}}(i,j) & = & \sum_{t=1}^{T_{1}}\left(x_{t}(i)-\overline{x_{T_{1}}}(i)\right)\left(y_{t}(j)-\overline{y_{T_{1}}}(j)\right)\label{eq:chunk_definitions_4}\\
165 | S_{T_{2}}(i,j) & = & \sum_{t=T_{1}+1}^{T_{2}}\left(x_{t}(i)-\overline{x_{T_{2}}}(i)\right)\left(y_{t}(j)-\overline{y_{T_{2}}}(j)\right).\label{eq:chunk_definitions_5}
166 | \end{eqnarray}
167 |
168 | \end_inset
169 |
170 | Now, the calculations from section
171 | \begin_inset CommandInset ref
172 | LatexCommand ref
173 | reference "sec:Proofs"
174 |
175 | \end_inset
176 |
177 | show that the full correlation
178 | \begin_inset Formula $C(i,j)$
179 | \end_inset
180 |
181 | can be computed as
182 | \end_layout
183 |
184 | \begin_layout Standard
185 | \begin_inset Formula
186 | \begin{eqnarray}
187 | C(i,j) & = & S_{T_{1}}(i,j)+S_{T_{2}}(i,j)+\frac{W_{T_{1}}W_{T_{2}}}{W_{T}}\left(\overline{x_{T_{2}}}(i)-\overline{x_{T_{1}}}(i)\right)\left(\overline{y_{T_{2}}}(j)-\overline{y_{T_{1}}}(j)\right)\label{eq:Update_Standard}
188 | \end{eqnarray}
189 |
190 | \end_inset
191 |
192 |
193 | \end_layout
194 |
195 | \begin_layout Section
196 | Symmetrization
197 | \end_layout
198 |
199 | \begin_layout Standard
200 | In some cases, a symmetric correlation matrix is desired, for example if
201 |
202 | \begin_inset Formula $y_{t}$
203 | \end_inset
204 |
205 | is a time-lagged version of
206 | \begin_inset Formula $x_{t}$
207 | \end_inset
208 |
209 | .
210 | This can be achieved by redefining the means
211 | \end_layout
212 |
213 | \begin_layout Standard
214 | \begin_inset Formula
215 | \begin{eqnarray*}
216 | \overline{x}(i) & = & \frac{1}{2W_{T}}\left[\sum_{t=1}^{T}w_{t}x_{t}(i)+\sum_{t=1}^{T}w_{t}y_{t}(i)\right],
217 | \end{eqnarray*}
218 |
219 | \end_inset
220 |
221 | and defining the symmetrized correlation by
222 | \end_layout
223 |
224 | \begin_layout Standard
225 | \begin_inset Formula
226 | \begin{eqnarray*}
227 | C_{s}(i,j) & = & \sum_{t=1}^{T}w_{t}\left(x_{t}(i)-\overline{x}(i)\right)\left(y_{t}(j)-\overline{x}(j)\right)\\
228 | & & +\sum_{t=1}^{T}w_{t}\left(y_{t}(i)-\overline{x}(i)\right)\left(x_{t}(j)-\overline{x}(j)\right).
229 | \end{eqnarray*}
230 |
231 | \end_inset
232 |
233 | Using the analogues of Eqs.
234 |
235 | \begin_inset CommandInset ref
236 | LatexCommand eqref
237 | reference "eq:chunk_definitions_0"
238 |
239 | \end_inset
240 |
241 | -
242 | \begin_inset CommandInset ref
243 | LatexCommand eqref
244 | reference "eq:chunk_definitions_5"
245 |
246 | \end_inset
247 |
248 | , we arrive at the updating formula
249 | \end_layout
250 |
251 | \begin_layout Standard
252 | \begin_inset Formula
253 | \begin{eqnarray}
254 | C_{s}(i,j) & = & S_{T_{1}}(i,j)+S_{T_{2}}(i,j)+\frac{2W_{T_{1}}W_{T_{2}}}{W_{T}}\left(\overline{x_{T_{2}}}(i)-\overline{x_{T_{1}}}(i)\right)\left(\overline{x_{T_{2}}}(j)-\overline{x_{T_{1}}}(j)\right),\label{eq:Updata_Sym}
255 | \end{eqnarray}
256 |
257 | \end_inset
258 |
259 | see again section
260 | \begin_inset CommandInset ref
261 | LatexCommand ref
262 | reference "sec:Proofs"
263 |
264 | \end_inset
265 |
266 | .
267 | Please note that for time-lagged data,
268 | \begin_inset Formula $T_{1}$
269 | \end_inset
270 |
271 | and
272 | \begin_inset Formula $T_{2}$
273 | \end_inset
274 |
275 | must be changed to
276 | \begin_inset Formula $T_{1}-\tau$
277 | \end_inset
278 |
279 | and
280 | \begin_inset Formula $T_{2}-\tau$
281 | \end_inset
282 |
283 | , such that the first
284 | \begin_inset Formula $\tau$
285 | \end_inset
286 |
287 | steps of every chunk only appear in
288 | \begin_inset Formula $x_{t}$
289 | \end_inset
290 |
291 | , while the last
292 | \begin_inset Formula $\tau$
293 | \end_inset
294 |
295 | steps only appear in
296 | \begin_inset Formula $y_{t}$
297 | \end_inset
298 |
299 | .
300 | \end_layout
301 |
302 | \begin_layout Section
303 | Time-lagged Data without Symmetrization
304 | \end_layout
305 |
306 | \begin_layout Standard
307 | If we assume to be given a time-series
308 | \begin_inset Formula $\tilde{x_{t}}(i),\, t=1,\ldots,T+\tau$
309 | \end_inset
310 |
311 | , and define the time-lagged time-series
312 | \begin_inset Formula $x_{t}(i)=\tilde{x}_{t}(i),\, t=1,\ldots T$
313 | \end_inset
314 |
315 | and
316 | \begin_inset Formula $y_{t}(i)=\tilde{x}_{t+\tau},\, t=1,\ldots T$
317 | \end_inset
318 |
319 | .
320 | If we do not wish to symmetrize the correlations, it seems most consistent
321 | to use the weights of the first
322 | \begin_inset Formula $T$
323 | \end_inset
324 |
325 | steps,
326 | \begin_inset Formula $w_{t},\, t=1,\ldots,T$
327 | \end_inset
328 |
329 | , only.
330 | The means are thus defined by
331 | \end_layout
332 |
333 | \begin_layout Standard
334 | \begin_inset Formula
335 | \begin{eqnarray*}
336 | \overline{x}(i) & = & \frac{1}{W_{T}}\sum_{t=1}^{T}w_{t}x_{t}(i)\\
337 | \overline{y}(i) & = & \frac{1}{W_{T}}\sum_{t=1}^{T}w_{t}y_{t}(i)\\
338 | & = & \frac{1}{W_{T}}\sum_{t=\tau}^{T+\tau}w_{t-\tau}\tilde{x}_{t}\\
339 | W_{T} & = & \sum_{t=1}^{T}w_{t}.
340 | \end{eqnarray*}
341 |
342 | \end_inset
343 |
344 | The asymmetric correlation then becomes
345 | \end_layout
346 |
347 | \begin_layout Standard
348 | \begin_inset Formula
349 | \begin{eqnarray*}
350 | C_{a}(i,j) & = & \sum_{t=1}^{T}w_{t}\left(x_{t}(i)-\overline{x}(i)\right)\left(y_{t}(j)-\overline{y}(j)\right).
351 | \end{eqnarray*}
352 |
353 | \end_inset
354 |
355 | Using the analogues of Eqs.
356 |
357 | \begin_inset CommandInset ref
358 | LatexCommand eqref
359 | reference "eq:chunk_definitions_0"
360 |
361 | \end_inset
362 |
363 | -
364 | \begin_inset CommandInset ref
365 | LatexCommand eqref
366 | reference "eq:chunk_definitions_5"
367 |
368 | \end_inset
369 |
370 | , we find the updating formula for time-lagged data to be the same as Eq.
371 |
372 | \begin_inset CommandInset ref
373 | LatexCommand eqref
374 | reference "eq:Update_Standard"
375 |
376 | \end_inset
377 |
378 | :
379 | \end_layout
380 |
381 | \begin_layout Standard
382 | \begin_inset Formula
383 | \begin{eqnarray}
384 | C_{a}(i,j) & = & S_{T_{1}}(i,j)+S_{T_{2}}(i,j)+\frac{W_{T_{1}}W_{T_{2}}}{W_{T}}\left(\overline{x_{T_{2}}}(i)-\overline{x_{T_{1}}}(i)\right)\left(\overline{y_{T_{2}}}(j)-\overline{y_{T_{1}}}(j)\right)\label{eq:Update_Asym}
385 | \end{eqnarray}
386 |
387 | \end_inset
388 |
389 |
390 | \end_layout
391 |
392 | \begin_layout Section
393 | Conclusions
394 | \end_layout
395 |
396 | \begin_layout Standard
397 | We have shown that mean-free correlations can be easily computed in chunks
398 | for arbitrary time series
399 | \begin_inset Formula $x_{t}$
400 | \end_inset
401 |
402 | ,
403 | \begin_inset Formula $y_{t}$
404 | \end_inset
405 |
406 | , including time-dependent weights.
407 | Moreover, symmetrized mean-free correlations can be computed for arbitrary
408 | time-series, which can also be time-lagged copies.
409 | Finally, we found that for time-lagged time series which are not supposed
410 | to be symmetrized, it seems to make sense to compute the means using the
411 | weights of the first
412 | \begin_inset Formula $T$
413 | \end_inset
414 |
415 | steps.
416 | \end_layout
417 |
418 | \begin_layout Section
419 | Proofs
420 | \end_layout
421 |
422 | \begin_layout Standard
423 | \begin_inset CommandInset label
424 | LatexCommand label
425 | name "sec:Proofs"
426 |
427 | \end_inset
428 |
429 |
430 | \end_layout
431 |
432 | \begin_layout Standard
433 | First, we determine an expression for the full correlation in terms of the
434 | partial sums
435 | \begin_inset Formula $S_{T_{1}},\, S_{T_{2}}$
436 | \end_inset
437 |
438 | and a correction term for all cases considered here.
439 | We will see then that the correction term can be expressed in the forms
440 | given in Eqs.
441 |
442 | \begin_inset CommandInset ref
443 | LatexCommand eqref
444 | reference "eq:Update_Standard"
445 |
446 | \end_inset
447 |
448 | ,
449 | \begin_inset CommandInset ref
450 | LatexCommand eqref
451 | reference "eq:Updata_Sym"
452 |
453 | \end_inset
454 |
455 | and
456 | \begin_inset CommandInset ref
457 | LatexCommand eqref
458 | reference "eq:Update_Asym"
459 |
460 | \end_inset
461 |
462 | .
463 | Let us consider the standard case:
464 | \end_layout
465 |
466 | \begin_layout Standard
467 | \begin_inset Formula
468 | \begin{eqnarray}
469 | C(i,j) & = & \sum_{t=1}^{T}w_{t}\left(x_{t}(i)-\overline{x}(i)\right)\left(y_{t}(j)-\overline{y}(j)\right)\label{eq:Splitting_Cij_0}\\
470 | & = & \sum_{t=1}^{T_{1}}w_{t}\left(x_{t}(i)-\overline{x}(i)\right)\left(y_{t}(j)-\overline{y}(j)\right)\nonumber \\
471 | & & +\sum_{t=T_{1}+1}^{T_{2}}w_{t}\left(x_{t}(i)-\overline{x}(i)\right)\left(y_{t}(j)-\overline{y}(j)\right)\label{eq:Splitting_Cij_1}\\
472 | & = & \sum_{t=1}^{T_{1}}w_{t}\left(\left(x_{t}(i)-\overline{x_{T_{1}}}(i)\right)-\gamma_{1}^{x}(i)\right)\left(\left(y_{t}(j)-\overline{y_{T_{1}}}(j)\right)-\gamma_{1}^{y}(j)\right)\nonumber \\
473 | & & +\sum_{t=T_{1}+1}^{T_{2}}w_{t}\left(\left(x_{t}(i)-\overline{x_{T_{2}}}(i)\right)-\gamma_{2}^{x}(i)\right)\left(\left(y_{t}(j)-\overline{y_{T_{2}}}(j)\right)-\gamma_{2}^{y}(j)\right),\label{eq:Splitting_Cij_2}
474 | \end{eqnarray}
475 |
476 | \end_inset
477 |
478 | where
479 | \begin_inset Formula $\gamma_{k}^{x}(i)=\overline{x}(i)-\overline{x_{T_{k}}}(i)$
480 | \end_inset
481 |
482 | and
483 | \begin_inset Formula $\gamma_{k}^{y}(i)=\overline{y}(i)-\overline{y_{T_{k}}}(i)$
484 | \end_inset
485 |
486 | .
487 | We proceed to find
488 | \end_layout
489 |
490 | \begin_layout Standard
491 | \begin_inset Formula
492 | \begin{eqnarray}
493 | C(i,j) & = & \sum_{t=1}^{T_{1}}w_{t}\left(x_{t}(i)-\overline{x_{T_{1}}}(i)\right)\left(y_{t}(j)-\overline{y_{T_{1}}}(j)\right)-\gamma_{1}^{x}(i)\left(y_{t}(j)-\overline{y_{T_{1}}}(j)\right)\nonumber \\
494 | & & -\gamma_{1}^{y}(j)\left(x_{t}(i)-\overline{x_{T_{1}}}(i)\right)+\gamma_{1}^{x}(i)\gamma_{1}^{y}(j)\nonumber \\
495 | & & +\sum_{t=T_{1}+1}^{T_{2}}w_{t}\left(x_{t}(i)-\overline{x_{T_{2}}}(i)\right)\left(y_{t}(j)-\overline{y_{T_{2}}}(j)\right)-\gamma_{2}^{x}(i)\left(y_{t}(j)-\overline{y_{T_{2}}}(j)\right)\nonumber \\
496 | & & -\gamma_{2}^{y}(j)\left(x_{t}(i)-\overline{x_{T_{2}}}(i)\right)+\gamma_{2}^{x}(i)\gamma_{2}^{y}(j)\label{eq:Splitting_Cij_3}\\
497 | & = & S_{T_{1}}(i,j)+S_{T_{2}}(i,j)+W_{T_{1}}\gamma_{1}^{x}(i)\gamma_{1}^{y}(j)+W_{T_{2}}\gamma_{2}^{x}(i)\gamma_{2}^{y}(j).\label{eq:Splitting_Cij_4}
498 | \end{eqnarray}
499 |
500 | \end_inset
501 |
502 | It remains to deal with the term:
503 | \end_layout
504 |
505 | \begin_layout Standard
506 | \begin_inset Formula
507 | \begin{eqnarray}
508 | W_{T_{1}}\gamma_{1}^{x}(i)\gamma_{1}^{y}(j)+W_{T_{2}}\gamma_{2}^{x}(i)\gamma_{2}^{y}(j) & = & W_{T_{1}}\left(\overline{x}(i)\overline{y}(j)-\overline{x}(i)\overline{y_{T_{1}}}(j)-\overline{x_{T_{1}}}(i)\overline{y}(j)+\overline{x_{T_{1}}}(i)\overline{y_{T_{1}}}(j)\right)\nonumber \\
509 | & & +W_{T_{2}}\left(\overline{x}(i)\overline{y}(j)-\overline{x}(i)\overline{y_{T_{2}}}(j)-\overline{x_{T_{2}}}(i)\overline{y}(j)+\overline{x_{T_{2}}}(i)\overline{y_{T_{2}}}(j)\right)\label{eq:Gamma_Reformulation_0}\\
510 | & = & \left(W_{T_{1}}+W_{T_{2}}\right)\overline{x}(i)\overline{y}(j)+W_{T_{1}}\overline{x_{T_{1}}}(i)\overline{y_{T_{1}}}(j)\nonumber \\
511 | & & +W_{T_{2}}\overline{x_{T_{2}}}(i)\overline{y_{T_{2}}}(j)-\overline{x}(i)\left(W_{T_{1}}\overline{y_{T_{1}}}(j)+W_{T_{2}}\overline{y_{T_{2}}}(j)\right)\nonumber \\
512 | & & -\overline{y}(j)\left(W_{T_{1}}\overline{x_{T_{1}}}(i)+W_{T_{2}}\overline{x_{T_{2}}}(i)\right).\label{eq:Gamma_Reformulation_1}
513 | \end{eqnarray}
514 |
515 | \end_inset
516 |
517 | Now, we use that
518 | \begin_inset Formula $W_{T_{1}}\overline{x_{T_{1}}}(i)+W_{T_{2}}\overline{x_{T_{2}}}(i)=W_{T}\overline{x}(i)$
519 | \end_inset
520 |
521 | to find:
522 | \end_layout
523 |
524 | \begin_layout Standard
525 | \begin_inset Formula
526 | \begin{eqnarray}
527 | & = & W_{T_{1}}\overline{x_{T_{1}}}(i)\overline{y_{T_{1}}}(j)++W_{T_{2}}\overline{x_{T_{2}}}(i)\overline{y_{T_{2}}}(j)\nonumber \\
528 | & & -\overline{x}(i)\left(W_{T_{1}}\overline{y_{T_{1}}}(j)+W_{T_{2}}\overline{y_{T_{2}}}(j)\right)\label{eq:Gamma_Reformulation_2}\\
529 | & = & \frac{1}{W_{T}}\left[W_{T}\left(W_{T_{1}}\overline{x_{T_{1}}}(i)\overline{y_{T_{1}}}(j)++W_{T_{2}}\overline{x_{T_{2}}}(i)\overline{y_{T_{2}}}(j)\right)\right]\nonumber \\
530 | & & -\frac{1}{W_{T}}\left[W_{T}\overline{x}(i)\left(W_{T_{1}}\overline{y_{T_{1}}}(j)+W_{T_{2}}\overline{y_{T_{2}}}(j)\right)\right]\label{eq:Gamma_Reformulation_3}\\
531 | & = & \frac{W_{T_{1}}W_{T_{2}}}{W_{T}}\left[\overline{x_{T_{1}}}(i)\overline{y_{T_{1}}}(j)+\overline{x_{T_{2}}}(i)\overline{y_{T_{2}}}(j)-\overline{x_{T_{1}}}(i)\overline{y_{T_{2}}}(j)-\overline{x_{T_{2}}}(i)\overline{y_{T_{1}}}(j)\right].\label{eq:Gamma_Reformulation_4}
532 | \end{eqnarray}
533 |
534 | \end_inset
535 |
536 | This completes the proof of Eq.
537 |
538 | \begin_inset CommandInset ref
539 | LatexCommand eqref
540 | reference "eq:Update_Standard"
541 |
542 | \end_inset
543 |
544 | .
545 | For the symmetric case, the procedure from Eqs.
546 |
547 | \begin_inset CommandInset ref
548 | LatexCommand eqref
549 | reference "eq:Splitting_Cij_0"
550 |
551 | \end_inset
552 |
553 | -
554 | \begin_inset CommandInset ref
555 | LatexCommand eqref
556 | reference "eq:Splitting_Cij_4"
557 |
558 | \end_inset
559 |
560 | can be repeated to come up with the expression
561 | \end_layout
562 |
563 | \begin_layout Standard
564 | \begin_inset Formula
565 | \begin{eqnarray*}
566 | C_{s}(i,j) & = & S_{T_{1}}(i,j)+S_{T_{2}}(i,j)+W_{T_{1}}\left(\gamma_{1}(i)\gamma_{1}(j)+\gamma_{1}(j)\gamma_{1}(i)\right)\\
567 | & & +W_{T_{2}}\left(\gamma_{2}(i)\gamma_{2}(j)+\gamma_{2}(j)\gamma_{2}(i)\right),
568 | \end{eqnarray*}
569 |
570 | \end_inset
571 |
572 | where
573 | \begin_inset Formula $\gamma_{k}(i)=\overline{x}(i)-\overline{x_{T_{k}}}(i)$
574 | \end_inset
575 |
576 | .
577 | Then, the steps of Eqs.
578 |
579 | \begin_inset CommandInset ref
580 | LatexCommand eqref
581 | reference "eq:Gamma_Reformulation_0"
582 |
583 | \end_inset
584 |
585 | -
586 | \begin_inset CommandInset ref
587 | LatexCommand eqref
588 | reference "eq:Gamma_Reformulation_3"
589 |
590 | \end_inset
591 |
592 | can be repeated in the same way.
593 | For the asymmetric case, Eqs.
594 |
595 | \begin_inset CommandInset ref
596 | LatexCommand eqref
597 | reference "eq:Splitting_Cij_0"
598 |
599 | \end_inset
600 |
601 | -
602 | \begin_inset CommandInset ref
603 | LatexCommand eqref
604 | reference "eq:Splitting_Cij_4"
605 |
606 | \end_inset
607 |
608 | yield the expression
609 | \end_layout
610 |
611 | \begin_layout Standard
612 | \begin_inset Formula
613 | \begin{eqnarray*}
614 | C_{a}(i,j) & = & S_{T_{!}}(i,j)+S_{T_{2}}(i,j)+W_{T_{1}}\gamma_{1}^{x}(i)\gamma_{1}^{y}(j)+W_{T_{2}}\gamma_{2}^{x}(i)\gamma_{2}^{y}(j)\\
615 | & & -\gamma_{1}^{x}(i)\sum_{t=1}^{T_{1}}w_{t}\left(y_{t}(j)-\overline{y_{T_{1}}}(j)\right)-\gamma_{1}^{y}(j)\sum_{t=1}^{T_{1}}w_{t}\left(x_{t}(i)-\overline{x_{T_{1}}}(i)\right)\\
616 | & & -\gamma_{2}^{x}(i)\sum_{t=T_{1}+1}^{T_{2}}w_{t}\left(y_{t}(j)-\overline{y_{T_{2}}}(j)\right)-\gamma_{2}^{y}(j)\sum_{t=T_{1}+1}^{T_{2}}w_{t}\left(x_{t}(i)-\overline{x_{T_{2}}}(i)\right).
617 | \end{eqnarray*}
618 |
619 | \end_inset
620 |
621 | Here, we have used
622 | \begin_inset Formula $\gamma_{k}^{x}(i)=\overline{x}(i)-\overline{x_{T_{k}}}(i),\,\gamma_{k}^{y}(i)=\overline{y}(i)-\overline{y_{T_{k}}}(i)$
623 | \end_inset
624 |
625 | .
626 | The cross-terms cancel out and the expression
627 | \begin_inset Formula $W_{T_{1}}\gamma_{1}^{x}(i)\gamma_{1}^{y}(j)+W_{T_{2}}\gamma_{2}^{x}(i)\gamma_{2}^{y}(j)$
628 | \end_inset
629 |
630 | can be reformulated through Eqs.
631 |
632 | \begin_inset CommandInset ref
633 | LatexCommand eqref
634 | reference "eq:Gamma_Reformulation_0"
635 |
636 | \end_inset
637 |
638 | -
639 | \begin_inset CommandInset ref
640 | LatexCommand eqref
641 | reference "eq:Gamma_Reformulation_4"
642 |
643 | \end_inset
644 |
645 | to end up with Eq.
646 |
647 | \begin_inset CommandInset ref
648 | LatexCommand eqref
649 | reference "eq:Update_Asym"
650 |
651 | \end_inset
652 |
653 | .
654 | \end_layout
655 |
656 | \end_body
657 | \end_document
658 |
--------------------------------------------------------------------------------
/variational/estimators/moments.py:
--------------------------------------------------------------------------------
1 | """
2 |
3 | Data Types
4 | ----------
5 | The standard data type for covariance computations is
6 | float64, because the double precision (but not single precision) is
7 | usually sufficient to compute the long sums involved in covariance
8 | matrix computations. Integer types are avoided even if the data is integer,
9 | because the BLAS matrix multiplication is very fast with floats, but very
10 | slow with integers. If X is of boolean type (0/1), the standard data type
11 | is float32, because this will be sufficient to represent numbers up to 2^23
12 | without rounding error, which is usually sufficient sufficient as the
13 | largest element in np.dot(X.T, X) can then be T, the number of data points.
14 |
15 | Efficient Use
16 | -------------
17 | In order to get speedup with boolean input, remove_mean=False is required.
18 | Note that you can still do TICA that way.
19 |
20 | Sparsification
21 | --------------
22 | We aim at computing covariance matrices. For large (T x N) data matrices X, Y,
23 | the bottleneck of this operation is computing the matrix product np.dot(X.T, X),
24 | or np.dot(X.T, Y), with algorithmic complexity O(N^2 T). If X, Y have zero or
25 | constant columns, we can reduce N and thus reduce the algorithmic complexity.
26 |
27 | However, the BLAS matrix product used by np.dot() is highly Cache optimized -
28 | the data is accessed in a way that most operations are done in cache, making the
29 | calculation extremely efficient. Thus, even if X, Y have zero or constant columns,
30 | it does not always pay off to interfere with this operation - one one hand by
31 | spending compute time to determine the sparsity of the matrices, one the other
32 | hand by using slicing operations that reduce the algorithmic complexity, but may
33 | destroy the order of the data and thus produce more cache failures.
34 |
35 | In order to make an informed decision, we have compared the runtime of the following
36 | operations using matrices of various different sizes (T x N) and different degrees
37 | of sparsity. (using an Intel Core i7 with OS/X 10.10.1):
38 |
39 | 1. Compute np.dot(X.T, X)
40 | 2. Compute np.dot(X[:, sel].T, X[:, sel]) where sel selects the nonzero columns
41 | 3. Make a copy X0 = X[:, sel].copy() and then compute np.dot(X0.T, X0)
42 |
43 | It may seem that step 3 is not a good idea because we make the extra effort of
44 | copying the matrix. However, the new copy will have data ordered sequentially in
45 | memory, and therefore better prepared for the algorithmically more expensive but
46 | cache-optimized matrix product.
47 |
48 | We have empirically found that:
49 |
50 | * Making a copy before running np.dot (option 3) is in most cases better than
51 | using the dot product on sliced arrays (option 2). Exceptions are when the
52 | data is extremely sparse, such that only a few columns are selected.
53 | * Copying and subselecting columns (option 3) is only faster than the full
54 | dot product (option 1), if 50% or less columns are selected. This observation
55 | is roughly independent of N.
56 | * The observations above are valid for matrices (T x N) that are sufficiently
57 | large. We assume that "sufficiently large" means that they don't fully fit
58 | in the cache. For small matrices, the trends are less clear and different
59 | rules may apply.
60 |
61 | In order to optimize covariance calculation for large matrices, we therefore
62 | take the following actions:
63 |
64 | 1. Given matrix size of X (and Y), determine the minimum number of columns
65 | that need to be constant in order to use sparse computation.
66 | 2. Efficiently determine sparsity of X (and Y). Give up as soon as the
67 | number of constant column candidates drops below the minimum number, to
68 | avoid wasting time on the decision.
69 | 3. Subselect the desired columns and copy the data to a new array X0 (Y0).
70 | 4. Run operation on the new array X0 (Y0), including in-place substraction
71 | of the mean if needed.
72 |
73 | """
74 | from __future__ import absolute_import
75 |
76 | __author__ = 'noe'
77 |
78 | import math, sys, numbers, warnings
79 | import numpy as np
80 | from .covar_c import covartools
81 |
82 |
83 | def _is_zero(x):
84 | """ Returns True if x is numerically 0 or an array with 0's. """
85 | if x is None:
86 | return True
87 | if isinstance(x, numbers.Number):
88 | return x == 0.0
89 | if isinstance(x, np.ndarray):
90 | return np.all(x == 0)
91 | return False
92 |
93 |
94 | def _sparsify(X, remove_mean=False, modify_data=False, sparse_mode='auto', sparse_tol=0.0):
95 | """ Determines the sparsity of X and returns a selected sub-matrix
96 |
97 | Only conducts sparsification if the number of constant columns is at least
98 | max(a N - b, min_const_col_number),
99 |
100 | Parameters
101 | ----------
102 | X : ndarray
103 | data matrix
104 | remove_mean : bool
105 | True: remove column mean from the data, False: don't remove mean.
106 | modify_data : bool
107 | If remove_mean=True, the mean will be removed in the data matrix X,
108 | without creating an independent copy. This option is faster but might
109 | lead to surprises because your input array is changed.
110 | sparse_mode : str
111 | one of:
112 | * 'dense' : always use dense mode
113 | * 'sparse' : always use sparse mode if possible
114 | * 'auto' : automatic
115 |
116 | Returns
117 | -------
118 | X0 : ndarray (view of X)
119 | Either X itself (if not sufficiently sparse), or a sliced view of X,
120 | containing only the variable columns
121 | mask : ndarray(N, dtype=bool) or None
122 | Bool selection array that indicates which columns of X were selected for
123 | X0, i.e. X0 = X[:, mask]. mask is None if no sparse selection was made.
124 | xconst : ndarray(N)
125 | Constant column values that are outside the sparse selection, i.e.
126 | X[i, ~mask] = xconst for any row i. xconst=0 if no sparse selection was made.
127 |
128 | """
129 | if sparse_mode.lower() == 'sparse':
130 | min_const_col_number = 0 # enforce sparsity. A single constant column will lead to sparse treatment
131 | elif sparse_mode.lower() == 'dense':
132 | min_const_col_number = X.shape[1] + 1 # never use sparsity
133 | else:
134 | if remove_mean and not modify_data: # in this case we have to copy the data anyway, and can be permissive
135 | min_const_col_number = max(0.1 * X.shape[1], 50)
136 | else:
137 | # This is a rough heuristic to choose a minimum column number for which sparsity may pay off.
138 | # This heuristic is good for large number of samples, i.e. it may be inadequate for small matrices X.
139 | if X.shape[1] < 250:
140 | min_const_col_number = X.shape[1] - 0.25 * X.shape[1]
141 | elif X.shape[1] < 1000:
142 | min_const_col_number = X.shape[1] - (0.5 * X.shape[1] - 100)
143 | else:
144 | min_const_col_number = X.shape[1] - (0.8 * X.shape[1] - 400)
145 |
146 | if X.shape[1] > min_const_col_number:
147 | mask = covartools.variable_cols(X, tol=sparse_tol, min_constant=min_const_col_number) # bool vector
148 | nconst = len(np.where(~mask)[0])
149 | if nconst > min_const_col_number:
150 | xconst = X[0, ~mask]
151 | X = X[:, mask] # sparsify
152 | else:
153 | xconst = None
154 | mask = None
155 | else:
156 | xconst = None
157 | mask = None
158 |
159 | return X, mask, xconst # None, 0 if not sparse
160 |
161 |
162 | def _sparsify_pair(X, Y, remove_mean=False, modify_data=False, symmetrize=False, sparse_mode='auto', sparse_tol=0.0):
163 | """
164 | """
165 | T = X.shape[0]
166 | N = math.sqrt(X.shape[1] * Y.shape[1])
167 | # check each data set separately for sparsity.
168 | X0, mask_X, xconst = _sparsify(X, sparse_mode=sparse_mode, sparse_tol=sparse_tol)
169 | Y0, mask_Y, yconst = _sparsify(Y, sparse_mode=sparse_mode, sparse_tol=sparse_tol)
170 | # if we have nonzero constant columns and the number of samples is too small, do not treat as
171 | # sparse, because then the const-specialized dot product function doesn't pay off.
172 | is_const = not (_is_zero(xconst) and _is_zero(yconst))
173 | if is_const and (symmetrize or not remove_mean) and 10*T < N:
174 | return X, None, None, Y, None, None
175 | else:
176 | return X0, mask_X, xconst, Y0, mask_Y, yconst
177 |
178 |
179 | def _copy_convert(X, const=None, remove_mean=False, copy=True):
180 | """ Makes a copy or converts the data type if needed
181 |
182 | Copies the data and converts the data type if unsuitable for covariance
183 | calculation. The standard data type for covariance computations is
184 | float64, because the double precision (but not single precision) is
185 | usually sufficient to compute the long sums involved in covariance
186 | matrix computations. Integer types are avoided even if the data is integer,
187 | because the BLAS matrix multiplication is very fast with floats, but very
188 | slow with integers. If X is of boolean type (0/1), the standard data type
189 | is float32, because this will be sufficient to represent numbers up to 2^23
190 | without rounding error, which is usually sufficient sufficient as the
191 | largest element in np.dot(X.T, X) can then be T, the number of data points.
192 |
193 | Parameters
194 | ----------
195 | remove_mean : bool
196 | If True, will enforce float64 even if the input is boolean
197 | copy : bool
198 | If True, enforces a copy even if the data type doesn't require it.
199 |
200 | Return
201 | ------
202 | X : ndarray
203 | copy or reference to X if no copy was needed.
204 | const : ndarray or None
205 | copy or reference to const if no copy was needed.
206 |
207 | """
208 | # determine type
209 | dtype = np.float64 # default: convert to float64 in order to avoid cancellation errors
210 | if X.dtype.kind == 'b' and X.shape[0] < 2**23 and not remove_mean:
211 | dtype = np.float32 # convert to float32 if we can represent all numbers
212 | # copy/convert if needed
213 | if X.dtype not in (np.float64, dtype): # leave as float64 (conversion is expensive), otherwise convert to dtype
214 | X = X.astype(dtype, order='C')
215 | if const is not None:
216 | const = const.astype(dtype, order='C')
217 | elif copy:
218 | X = X.copy(order='C')
219 | if const is not None:
220 | const = const.copy(order='C')
221 |
222 | return X, const
223 |
224 |
225 | def _sum_sparse(xsum, mask_X, xconst, T):
226 | s = np.zeros(len(mask_X))
227 | s[mask_X] = xsum
228 | s[~mask_X] = T * xconst
229 | return s
230 |
231 |
232 | def _sum(X, xmask=None, xconst=None, Y=None, ymask=None, yconst=None, symmetric=False, remove_mean=False,
233 | weights=None):
234 | """ Computes the column sums and centered column sums.
235 |
236 | If symmetric = False, the sums will be determined as
237 | .. math:
238 | sx &=& \frac{1}{2} \sum_t x_t
239 | sy &=& \frac{1}{2} \sum_t y_t
240 |
241 | If symmetric, the sums will be determined as
242 |
243 | .. math:
244 | sx = sy = \frac{1}{2T} \sum_t x_t + y_t
245 |
246 | Returns
247 | -------
248 | w : float
249 | statistical weight of sx, sy
250 | sx : ndarray
251 | effective row sum of X (including symmetrization if requested)
252 | sx_raw_centered : ndarray
253 | centered raw row sum of X
254 |
255 | optional returns (only if Y is given):
256 |
257 | sy : ndarray
258 | effective row sum of X (including symmetrization if requested)
259 | sy_raw_centered : ndarray
260 | centered raw row sum of Y
261 |
262 | """
263 | T = X.shape[0]
264 | # Check if weights are given:
265 | if weights is not None:
266 | X = weights[:, None] * X
267 | if Y is not None:
268 | Y = weights[:, None] * Y
269 | # compute raw sums on variable data
270 | sx_raw = X.sum(axis=0) # this is the mean before subtracting it.
271 | sy_raw = 0
272 | if Y is not None:
273 | sy_raw = Y.sum(axis=0)
274 |
275 | # expand raw sums to full data
276 | if xmask is not None:
277 | if weights is not None:
278 | sx_raw = _sum_sparse(sx_raw, xmask, xconst, weights.sum())
279 | else:
280 | sx_raw = _sum_sparse(sx_raw, xmask, xconst, T)
281 | if ymask is not None:
282 | if weights is not None:
283 | sy_raw = _sum_sparse(sy_raw, ymask, yconst, weights.sum())
284 | else:
285 | sy_raw = _sum_sparse(sy_raw, ymask, yconst, T)
286 |
287 | # compute effective sums and centered sums
288 | if Y is not None and symmetric:
289 | sx = sx_raw + sy_raw
290 | sy = sx
291 | if weights is not None:
292 | w = 2*np.sum(weights)
293 | else:
294 | w = 2 * T
295 | else:
296 | sx = sx_raw
297 | sy = sy_raw
298 | if weights is not None:
299 | w = np.sum(weights)
300 | else:
301 | w = T
302 |
303 | sx_raw_centered = sx_raw.copy()
304 | if Y is not None:
305 | sy_raw_centered = sy_raw.copy()
306 |
307 | # center mean.
308 | if remove_mean:
309 | if Y is not None and symmetric:
310 | sx_raw_centered -= 0.5 * sx
311 | sy_raw_centered -= 0.5 * sy
312 | else:
313 | sx_raw_centered = np.zeros(sx.size)
314 | if Y is not None:
315 | sy_raw_centered = np.zeros(sy.size)
316 |
317 | # return
318 | if Y is not None:
319 | return w, sx, sx_raw_centered, sy, sy_raw_centered
320 | else:
321 | return w, sx, sx_raw_centered
322 |
323 |
324 | def _center(X, w, s, mask=None, const=None, inplace=True):
325 | """ Centers the data.
326 |
327 | Parameters
328 | ----------
329 | w : float
330 | statistical weight of s
331 | inplace : bool
332 | center in place
333 |
334 | Returns
335 | -------
336 | sx : ndarray
337 | uncentered row sum of X
338 | sx_centered : ndarray
339 | row sum of X after centering
340 |
341 | optional returns (only if Y is given):
342 |
343 | sy_raw : ndarray
344 | uncentered row sum of Y
345 | sy_centered : ndarray
346 | row sum of Y after centering
347 |
348 | """
349 | xmean = s / float(w)
350 | if mask is None:
351 | X = covartools.subtract_row(X, xmean, inplace=inplace)
352 | else:
353 | X = covartools.subtract_row(X, xmean[mask], inplace=inplace)
354 | if inplace:
355 | const = np.subtract(const, xmean[~mask], const)
356 | else:
357 | const = np.subtract(const, xmean[~mask])
358 |
359 | return X, const
360 |
361 |
362 | # ====================================================================================
363 | # SECOND MOMENT MATRICES / COVARIANCES
364 | # ====================================================================================
365 |
366 | def _M2_dense(X, Y, weights=None):
367 | """ 2nd moment matrix using dense matrix computations.
368 |
369 | This function is encapsulated such that we can make easy modifications of the basic algorithms
370 |
371 | """
372 | if weights is not None:
373 | return np.dot((weights[:, None] * X).T, Y)
374 | else:
375 | return np.dot(X.T, Y)
376 |
377 |
378 | def _M2_const(Xvar, mask_X, xvarsum, xconst, Yvar, mask_Y, yvarsum, yconst, weights=None):
379 | """ Computes the unnormalized covariance matrix between X and Y, exploiting constant input columns
380 |
381 | Computes the unnormalized covariance matrix :math:`C = X^\top Y`
382 | (for symmetric=False) or :math:`C = \frac{1}{2} (X^\top Y + Y^\top X)`
383 | (for symmetric=True). Suppose the data matrices can be column-permuted
384 | to have the form
385 |
386 | .. math:
387 | X &=& (X_{\mathrm{var}}, X_{\mathrm{const}})
388 | Y &=& (Y_{\mathrm{var}}, Y_{\mathrm{const}})
389 |
390 | with rows:
391 |
392 | .. math:
393 | x_t &=& (x_{\mathrm{var},t}, x_{\mathrm{const}})
394 | y_t &=& (y_{\mathrm{var},t}, y_{\mathrm{const}})
395 |
396 | where :math:`x_{\mathrm{const}},\:y_{\mathrm{const}}` are constant vectors.
397 | The resulting matrix has the general form:
398 |
399 | .. math:
400 | C &=& [X_{\mathrm{var}}^\top Y_{\mathrm{var}} x_{sum} y_{\mathrm{const}}^\top ]
401 | & & [x_{\mathrm{const}}^\top y_{sum}^\top x_{sum} x_{sum}^\top ]
402 |
403 | where :math:`x_{sum} = \sum_t x_{\mathrm{var},t}` and
404 | :math:`y_{sum} = \sum_t y_{\mathrm{var},t}`.
405 |
406 | Parameters
407 | ----------
408 | Xvar : ndarray (T, m)
409 | Part of the data matrix X with :math:`m \le M` variable columns.
410 | mask_X : ndarray (M)
411 | Boolean array of size M of the full columns. False for constant column,
412 | True for variable column in X.
413 | xvarsum : ndarray (m)
414 | Column sum of variable part of data matrix X
415 | xconst : ndarray (M-m)
416 | Values of the constant part of data matrix X
417 | Yvar : ndarray (T, n)
418 | Part of the data matrix Y with :math:`n \le N` variable columns.
419 | mask_Y : ndarray (N)
420 | Boolean array of size N of the full columns. False for constant column,
421 | True for variable column in Y.
422 | yvarsum : ndarray (n)
423 | Column sum of variable part of data matrix Y
424 | yconst : ndarray (N-n)
425 | Values of the constant part of data matrix Y
426 | weights : None or ndarray (N)
427 | weights for all time steps.
428 |
429 | Returns
430 | -------
431 | C : ndarray (M, N)
432 | Unnormalized covariance matrix.
433 |
434 | """
435 | C = np.zeros((len(mask_X), len(mask_Y)))
436 | # Block 11
437 | C[np.ix_(mask_X, mask_Y)] = _M2_dense(Xvar, Yvar, weights=weights)
438 | # other blocks
439 | xsum_is_0 = _is_zero(xvarsum)
440 | ysum_is_0 = _is_zero(yvarsum)
441 | xconst_is_0 = _is_zero(xconst)
442 | yconst_is_0 = _is_zero(yconst)
443 | # TODO: maybe we don't need the checking here, if we do the decision in the higher-level function M2
444 | # TODO: if not zero, we could still exploit the zeros in const and compute (and write!) this outer product
445 | # TODO: only to a sub-matrix
446 | # Block 12 and 21
447 | if weights is not None:
448 | wsum = np.sum(weights)
449 | xvarsum = np.sum(weights[:, None] * Xvar, axis=0)
450 | yvarsum = np.sum(weights[:, None] * Yvar, axis=0)
451 | else:
452 | wsum = Xvar.shape[0]
453 | if not (xsum_is_0 or yconst_is_0) or not (ysum_is_0 or xconst_is_0):
454 | C[np.ix_(mask_X, ~mask_Y)] = np.outer(xvarsum, yconst)
455 | C[np.ix_(~mask_X, mask_Y)] = np.outer(xconst, yvarsum)
456 | # Block 22
457 | if not (xconst_is_0 or yconst_is_0):
458 | C[np.ix_(~mask_X, ~mask_Y)] = np.outer(wsum*xconst, yconst)
459 | return C
460 |
461 |
462 | def _M2_sparse(Xvar, mask_X, Yvar, mask_Y, weights=None):
463 | """ 2nd moment matrix exploiting zero input columns """
464 | C = np.zeros((len(mask_X), len(mask_Y)))
465 | C[np.ix_(mask_X, mask_Y)] = _M2_dense(Xvar, Yvar, weights=weights)
466 | return C
467 |
468 |
469 | def _M2_sparse_sym(Xvar, mask_X, Yvar, mask_Y, weights=None):
470 | """ 2nd self-symmetric moment matrix exploiting zero input columns
471 |
472 | Computes X'X + Y'Y and X'Y + Y'X
473 |
474 | """
475 | assert len(mask_X) == len(mask_Y), 'X and Y need to have equal sizes for symmetrization'
476 |
477 | Cxxyy = np.zeros((len(mask_X), len(mask_Y)))
478 | Cxxyy[np.ix_(mask_X, mask_X)] = _M2_dense(Xvar, Xvar, weights=weights)
479 | Cxxyy[np.ix_(mask_Y, mask_Y)] += _M2_dense(Yvar, Yvar, weights=weights)
480 |
481 | Cxyyx = np.zeros((len(mask_X), len(mask_Y)))
482 | Cxy = _M2_dense(Xvar, Yvar, weights=weights)
483 | Cyx = _M2_dense(Yvar, Xvar, weights=weights)
484 | Cxyyx[np.ix_(mask_X, mask_Y)] = Cxy
485 | Cxyyx[np.ix_(mask_Y, mask_X)] += Cyx
486 |
487 | return Cxxyy, Cxyyx
488 |
489 |
490 | def _M2(Xvar, Yvar, mask_X=None, mask_Y=None, xsum=0, xconst=0, ysum=0, yconst=0, weights=None):
491 | """ direct (nonsymmetric) second moment matrix. Decide if we need dense, sparse, const"""
492 | if mask_X is None and mask_Y is None:
493 | return _M2_dense(Xvar, Yvar, weights=weights)
494 | else:
495 | # Check if one of the masks is not None, modify it and also adjust the constant columns:
496 | if mask_X is None:
497 | mask_X = np.ones(Xvar.shape[1], dtype=np.bool)
498 | xconst = np.ones(0, dtype=float)
499 | if mask_Y is None:
500 | mask_Y = np.ones(Yvar.shape[1], dtype=np.bool)
501 | yconst = np.ones(0, dtype=float)
502 | if _is_zero(xsum) and _is_zero(ysum) or _is_zero(xconst) and _is_zero(yconst):
503 | return _M2_sparse(Xvar, mask_X, Yvar, mask_Y, weights=weights)
504 | else:
505 | return _M2_const(Xvar, mask_X, xsum[mask_X], xconst, Yvar, mask_Y, ysum[mask_Y], yconst, weights=weights)
506 |
507 |
508 | def _M2_symmetric(Xvar, Yvar, mask_X=None, mask_Y=None, xsum=0, xconst=0, ysum=0, yconst=0, weights=None):
509 | """ symmetric second moment matrices. Decide if we need dense, sparse, const"""
510 | if mask_X is None and mask_Y is None:
511 | Cxxyy = _M2_dense(Xvar, Xvar, weights=weights) + _M2_dense(Yvar, Yvar, weights=weights)
512 | Cxy = _M2_dense(Xvar, Yvar, weights=weights)
513 | Cyx = _M2_dense(Yvar, Xvar, weights=weights)
514 | Cxyyx = Cxy + Cyx
515 | else:
516 | # Check if one of the masks is not None, modify it and also adjust the constant columns:
517 | if mask_X is None:
518 | mask_X = np.ones(Xvar.shape[1], dtype=np.bool)
519 | xconst = np.ones(0, dtype=float)
520 | if mask_Y is None:
521 | mask_Y = np.ones(Yvar.shape[1], dtype=np.bool)
522 | yconst = np.ones(0, dtype=float)
523 | if _is_zero(xsum) and _is_zero(ysum) or _is_zero(xconst) and _is_zero(yconst):
524 | Cxxyy, Cxyyx = _M2_sparse_sym(Xvar, mask_X, Yvar, mask_Y, weights=weights)
525 | else:
526 | xvarsum = xsum[mask_X] # to variable part
527 | yvarsum = ysum[mask_Y] # to variable part
528 | Cxxyy = _M2_const(Xvar, mask_X, xvarsum, xconst, Xvar, mask_X, xvarsum, xconst, weights=weights) \
529 | + _M2_const(Yvar, mask_Y, yvarsum, yconst, Yvar, mask_Y, yvarsum, yconst, weights=weights)
530 | Cxy = _M2_const(Xvar, mask_X, xvarsum, xconst, Yvar, mask_Y, yvarsum, yconst, weights=weights)
531 | Cyx = _M2_const(Yvar, mask_Y, yvarsum, yconst, Xvar, mask_X, xvarsum, xconst, weights=weights)
532 | Cxyyx = Cxy + Cyx
533 | return Cxxyy, Cxyyx
534 |
535 |
536 | # =================================================
537 | # USER API
538 | # =================================================
539 |
540 |
541 | def moments_XX(X, remove_mean=False, modify_data=False, weights=None, sparse_mode='auto', sparse_tol=0.0):
542 | """ Computes the first two unnormalized moments of X
543 |
544 | Computes :math:`s = \sum_t x_t` and :math:`C = X^\top X` while exploiting
545 | zero or constant columns in the data matrix.
546 |
547 | Parameters
548 | ----------
549 | X : ndarray (T, M)
550 | Data matrix
551 | remove_mean : bool
552 | True: remove column mean from the data, False: don't remove mean.
553 | modify_data : bool
554 | If remove_mean=True, the mean will be removed in the data matrix X,
555 | without creating an independent copy. This option is faster but might
556 | lead to surprises because your input array is changed.
557 | weights: None or ndarray(T, )
558 | weights assigned to each trajectory point. If None, all data points have weight one.
559 | If ndarray, each data point is assigned a separate weight.
560 | sparse_mode : str
561 | one of:
562 | * 'dense' : always use dense mode
563 | * 'sparse' : always use sparse mode if possible
564 | * 'auto' : automatic
565 | sparse_tol: float
566 | Threshold for considering column to be zero in order to save computing
567 | effort when the data is sparse or almost sparse.
568 | If max(abs(X[:, i])) < sparse_tol, then row i (and also column i if Y
569 | is not given) of the covariance matrix will be set to zero. If Y is
570 | given and max(abs(Y[:, i])) < sparse_tol, then column i of the
571 | covariance matrix will be set to zero.
572 |
573 | Returns
574 | -------
575 | w : float
576 | statistical weight
577 | s : ndarray (M)
578 | sum
579 | C : ndarray (M, M)
580 | unnormalized covariance matrix
581 |
582 | """
583 | # Check consistency of inputs:
584 | if weights is not None:
585 | assert X.shape[0] == weights.shape[0], 'X and weights_x must have equal length'
586 | # sparsify
587 | X0, mask_X, xconst = _sparsify(X, remove_mean=remove_mean, modify_data=modify_data,
588 | sparse_mode=sparse_mode, sparse_tol=sparse_tol)
589 | is_sparse = mask_X is not None
590 | # copy / convert
591 | # TODO: do we need to copy xconst?
592 | X0, xconst = _copy_convert(X0, const=xconst, remove_mean=remove_mean,
593 | copy=is_sparse or (remove_mean and not modify_data))
594 | # sum / center
595 | w, sx, sx0_centered = _sum(X0, xmask=mask_X, xconst=xconst, symmetric=False, remove_mean=remove_mean,
596 | weights=weights)
597 | if remove_mean:
598 | _center(X0, w, sx, mask=mask_X, const=xconst, inplace=True) # fast in-place centering
599 | # TODO: we could make a second const check here. If after summation not enough zeros have appeared in the
600 | # TODO: consts, we switch back to dense treatment here.
601 | # compute covariance matrix
602 | C = _M2(X0, X0, mask_X=mask_X, mask_Y=mask_X, xsum=sx0_centered, xconst=xconst, ysum=sx0_centered, yconst=xconst,
603 | weights=weights)
604 | return w, sx, C
605 |
606 |
607 | def moments_XXXY(X, Y, remove_mean=False, symmetrize=False, weights=None,
608 | modify_data=False, sparse_mode='auto', sparse_tol=0.0):
609 | """ Computes the first two unnormalized moments of X and Y
610 |
611 | If symmetrize is False, computes
612 |
613 | .. math:
614 | s_x &=& \sum_t x_t
615 | s_y &=& \sum_t y_t
616 | C_XX &=& X^\top X
617 | C_XY &=& X^\top Y
618 |
619 | If symmetrize is True, computes
620 |
621 | .. math:
622 | s_x = s_y &=& \frac{1}{2} \sum_t(x_t + y_t)
623 | C_XX &=& \frac{1}{2} (X^\top X + Y^\top Y)
624 | C_XY &=& \frac{1}{2} (X^\top Y + Y^\top X)
625 |
626 | while exploiting zero or constant columns in the data matrix.
627 |
628 | Parameters
629 | ----------
630 | X : ndarray (T, M)
631 | Data matrix
632 | Y : ndarray (T, N)
633 | Second data matrix
634 | remove_mean : bool
635 | True: remove column mean from the data, False: don't remove mean.
636 | symmetrize : bool
637 | Computes symmetrized means and moments (see above)
638 | weights : None or ndarray(T, )
639 | weights assigned to each trajectory point of X. If None, all data points have weight one.
640 | If ndarray, each data point is assigned a separate weight.
641 | time_lagged : bool,
642 | indicates that Y is a time-lagged version of X.
643 | modify_data : bool
644 | If remove_mean=True, the mean will be removed in the data matrix X,
645 | without creating an independent copy. This option is faster but might
646 | lead to surprises because your input array is changed.
647 | sparse_mode : str
648 | one of:
649 | * 'dense' : always use dense mode
650 | * 'sparse' : always use sparse mode if possible
651 | * 'auto' : automatic
652 | sparse_tol: float
653 | Threshold for considering column to be zero in order to save computing
654 | effort when the data is sparse or almost sparse.
655 | If max(abs(X[:, i])) < sparse_tol, then row i (and also column i if Y
656 | is not given) of the covariance matrix will be set to zero. If Y is
657 | given and max(abs(Y[:, i])) < sparse_tol, then column i of the
658 | covariance matrix will be set to zero.
659 |
660 | Returns
661 | -------
662 | w : float
663 | statistical weight
664 | s_x : ndarray (M)
665 | x-sum
666 | s_y : ndarray (N)
667 | y-sum
668 | C_XX : ndarray (M, M)
669 | unnormalized covariance matrix of X
670 | C_XY : ndarray (M, N)
671 | unnormalized covariance matrix of XY
672 |
673 | """
674 | # Check consistency of inputs:
675 | if Y is not None:
676 | assert Y.shape[0] == X.shape[0], 'X and Y must have equal length.'
677 | if weights is not None:
678 | assert X.shape[0] == weights.shape[0], 'X and weights_x must have equal length'
679 | # sparsify
680 | X0, mask_X, xconst, Y0, mask_Y, yconst = _sparsify_pair(X, Y, remove_mean=remove_mean, modify_data=modify_data,
681 | symmetrize=symmetrize, sparse_mode=sparse_mode, sparse_tol=sparse_tol)
682 | is_sparse = mask_X is not None and mask_Y is not None
683 | # copy / convert
684 | copy = is_sparse or (remove_mean and not modify_data)
685 | X0, xconst = _copy_convert(X0, const=xconst, remove_mean=remove_mean, copy=copy)
686 | Y0, yconst = _copy_convert(Y0, const=yconst, remove_mean=remove_mean, copy=copy)
687 | # sum / center
688 | w, sx, sx_centered, sy, sy_centered = _sum(X0, xmask=mask_X, xconst=xconst, Y=Y0, ymask=mask_Y, yconst=yconst,
689 | symmetric=symmetrize, remove_mean=remove_mean, weights=weights)
690 | if remove_mean:
691 | _center(X0, w, sx, mask=mask_X, const=xconst, inplace=True) # fast in-place centering
692 | _center(Y0, w, sy, mask=mask_Y, const=yconst, inplace=True) # fast in-place centering
693 |
694 | if symmetrize:
695 | Cxx, Cxy = _M2_symmetric(X0, Y0, mask_X=mask_X, mask_Y=mask_Y,
696 | xsum=sx_centered, xconst=xconst, ysum=sy_centered, yconst=yconst, weights=weights)
697 | else:
698 | Cxx = _M2(X0, X0, mask_X=mask_X, mask_Y=mask_X,
699 | xsum=sx_centered, xconst=xconst, ysum=sx_centered, yconst=xconst, weights=weights)
700 | Cxy = _M2(X0, Y0, mask_X=mask_X, mask_Y=mask_Y,
701 | xsum=sx_centered, xconst=xconst, ysum=sy_centered, yconst=yconst, weights=weights)
702 |
703 | return w, sx, sy, Cxx, Cxy
704 |
705 |
706 | def moments_block(X, Y, remove_mean=False, modify_data=False,
707 | sparse_mode='auto', sparse_tol=0.0):
708 | """ Computes the first two unnormalized moments of X and Y
709 |
710 | Computes
711 |
712 | .. math:
713 | s_x &=& \sum_t x_t
714 | s_y &=& \sum_t y_t
715 | C_XX &=& X^\top X
716 | C_XY &=& X^\top Y
717 | C_YX &=& Y^\top X
718 | C_YY &=& Y^\top Y
719 |
720 | while exploiting zero or constant columns in the data matrix.
721 |
722 | Parameters
723 | ----------
724 | X : ndarray (T, M)
725 | Data matrix
726 | Y : ndarray (T, N)
727 | Second data matrix
728 | remove_mean : bool
729 | True: remove column mean from the data, False: don't remove mean.
730 | modify_data : bool
731 | If remove_mean=True, the mean will be removed in the data matrix X,
732 | without creating an independent copy. This option is faster but might
733 | lead to surprises because your input array is changed.
734 | sparse_mode : str
735 | one of:
736 | * 'dense' : always use dense mode
737 | * 'sparse' : always use sparse mode if possible
738 | * 'auto' : automatic
739 | sparse_tol: float
740 | Threshold for considering column to be zero in order to save computing
741 | effort when the data is sparse or almost sparse.
742 | If max(abs(X[:, i])) < sparse_tol, then row i (and also column i if Y
743 | is not given) of the covariance matrix will be set to zero. If Y is
744 | given and max(abs(Y[:, i])) < sparse_tol, then column i of the
745 | covariance matrix will be set to zero.
746 |
747 | Returns
748 | -------
749 | w : float
750 | statistical weight of this estimation
751 | s : [ndarray (M), ndarray (M)]
752 | list of two elements with s[0]=sx and s[0]=sy
753 | C : [[ndarray(M,M), ndarray(M,N)], [ndarray(N,M),ndarray(N,N)]]
754 | list of two lists with two elements.
755 | C[0,0] = Cxx, C[0,1] = Cxy, C[1,0] = Cyx, C[1,1] = Cyy
756 |
757 | """
758 | # sparsify
759 | X0, mask_X, xconst = _sparsify(X, sparse_mode=sparse_mode, sparse_tol=sparse_tol)
760 | Y0, mask_Y, yconst = _sparsify(Y, sparse_mode=sparse_mode, sparse_tol=sparse_tol)
761 | # copy / convert
762 | copy = sparse_mode or (remove_mean and not modify_data)
763 | X0, xconst = _copy_convert(X0, const=xconst, copy=copy)
764 | Y0, yconst = _copy_convert(Y0, const=yconst, copy=copy)
765 | # sum / center
766 | w, sx, sx_centered, sy, sy_centered = _sum(X0, xmask=mask_X, xconst=xconst, Y=Y0, ymask=mask_Y, yconst=yconst,
767 | symmetric=False, remove_mean=remove_mean)
768 | if remove_mean:
769 | _center(X0, w, sx, mask=mask_X, const=xconst, inplace=True) # fast in-place centering
770 | _center(Y0, w, sy, mask=mask_Y, const=yconst, inplace=True) # fast in-place centering
771 |
772 | Cxx = _M2(X0, X0, mask_X=mask_X, mask_Y=mask_X,
773 | xsum=sx_centered, xconst=xconst, ysum=sx_centered, yconst=xconst)
774 | Cxy = _M2(X0, Y0, mask_X=mask_X, mask_Y=mask_Y,
775 | xsum=sx_centered, xconst=xconst, ysum=sy_centered, yconst=yconst)
776 | Cyy = _M2(Y0, Y0, mask_X=mask_Y, mask_Y=mask_Y,
777 | xsum=sy_centered, xconst=yconst, ysum=sy_centered, yconst=yconst)
778 |
779 | return w, [sx, sy], [[Cxx, Cxy], [Cxy.T, Cyy]]
780 |
781 |
782 | def covar(X, remove_mean=False, modify_data=False, weights=None, sparse_mode='auto', sparse_tol=0.0):
783 | """ Computes the covariance matrix of X
784 |
785 | Computes
786 |
787 | .. math:
788 | C_XX &=& X^\top X
789 |
790 | while exploiting zero or constant columns in the data matrix.
791 | WARNING: Directly use moments_XX if you can. This function does an additional
792 | constant-matrix multiplication and does not return the mean.
793 |
794 | Parameters
795 | ----------
796 | X : ndarray (T, M)
797 | Data matrix
798 | remove_mean : bool
799 | True: remove column mean from the data, False: don't remove mean.
800 | modify_data : bool
801 | If remove_mean=True, the mean will be removed in the data matrix X,
802 | without creating an independent copy. This option is faster but might
803 | lead to surprises because your input array is changed.
804 | weights : None or ndarray(T, )
805 | weights assigned to each trajectory point of X. If None, all data points have weight one.
806 | If ndarray, each data point is assigned a separate weight.
807 | sparse_mode : str
808 | one of:
809 | * 'dense' : always use dense mode
810 | * 'sparse' : always use sparse mode if possible
811 | * 'auto' : automatic
812 | sparse_tol: float
813 | Threshold for considering column to be zero in order to save computing
814 | effort when the data is sparse or almost sparse.
815 | If max(abs(X[:, i])) < sparse_tol, then row i (and also column i if Y
816 | is not given) of the covariance matrix will be set to zero. If Y is
817 | given and max(abs(Y[:, i])) < sparse_tol, then column i of the
818 | covariance matrix will be set to zero.
819 |
820 | Returns
821 | -------
822 | C_XX : ndarray (M, M)
823 | Covariance matrix of X
824 |
825 | See also
826 | --------
827 | moments_XX
828 |
829 | """
830 | w, s, M = moments_XX(X, remove_mean=remove_mean, weights=weights, modify_data=modify_data,
831 | sparse_mode=sparse_mode, sparse_tol=sparse_tol)
832 | return M / float(w)
833 |
834 |
835 | def covars(X, Y, remove_mean=False, modify_data=False, symmetrize=False, weights=None, sparse_mode='auto',
836 | sparse_tol=0.0):
837 | """ Computes the covariance and cross-covariance matrix of X and Y
838 |
839 | If symmetrize is False, computes
840 |
841 | .. math:
842 | C_XX &=& X^\top X
843 | C_XY &=& X^\top Y
844 |
845 | If symmetrize is True, computes
846 |
847 | .. math:
848 | C_XX &=& \frac{1}{2} (X^\top X + Y^\top Y)
849 | C_XY &=& \frac{1}{2} (X^\top Y + Y^\top X)
850 |
851 | while exploiting zero or constant columns in the data matrix.
852 | WARNING: Directly use moments_XXXY if you can. This function does an additional
853 | constant-matrix multiplication and does not return the mean.
854 |
855 | Parameters
856 | ----------
857 | X : ndarray (T, M)
858 | Data matrix
859 | Y : ndarray (T, N)
860 | Second data matrix
861 | remove_mean : bool
862 | True: remove column mean from the data, False: don't remove mean.
863 | modify_data : bool
864 | If remove_mean=True, the mean will be removed in the data matrix X,
865 | without creating an independent copy. This option is faster but might
866 | lead to surprises because your input array is changed.
867 | symmetrize : bool
868 | Computes symmetrized means and moments (see above)
869 | weights : None or ndarray(T, )
870 | weights assigned to each trajectory point of X. If None, all data points have weight one.
871 | If ndarray, each data point is assigned a separate weight.
872 | sparse_mode : str
873 | one of:
874 | * 'dense' : always use dense mode
875 | * 'sparse' : always use sparse mode if possible
876 | * 'auto' : automatic
877 | sparse_tol: float
878 | Threshold for considering column to be zero in order to save computing
879 | effort when the data is sparse or almost sparse.
880 | If max(abs(X[:, i])) < sparse_tol, then row i (and also column i if Y
881 | is not given) of the covariance matrix will be set to zero. If Y is
882 | given and max(abs(Y[:, i])) < sparse_tol, then column i of the
883 | covariance matrix will be set to zero.
884 |
885 | Returns
886 | -------
887 | C_XX : ndarray (M, M)
888 | Covariance matrix of X
889 | C_XY : ndarray (M, N)
890 | Covariance matrix of XY
891 |
892 | See also
893 | --------
894 | moments_XXXY
895 |
896 | """
897 | w, sx, sy, Mxx, Mxy = moments_XXXY(X, Y, remove_mean=remove_mean, modify_data=modify_data, weights=weights,
898 | symmetrize=symmetrize, sparse_mode=sparse_mode, sparse_tol=sparse_tol)
899 | return Mxx / float(w), Mxy / float(w)
900 |
--------------------------------------------------------------------------------