├── .gitattributes
├── variational
    ├── solvers
    │   ├── __init__.py
    │   ├── eig_qr
    │   │   ├── __init__.py
    │   │   └── eig_qr.pyx
    │   ├── tests
    │   │   ├── __init__.py
    │   │   └── test_direct.py
    │   └── direct.py
    ├── estimators
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── benchmark_moments.py
    │   │   ├── test_running_moments.py
    │   │   └── test_moments.py
    │   ├── covar_c
    │   │   ├── __init__.py
    │   │   ├── _covartools.h
    │   │   ├── covartools.pyx
    │   │   └── _covartools.c
    │   ├── __init__.py
    │   ├── running_moments.py
    │   └── moments.py
    ├── __init__.py
    ├── util.py
    └── _version.py
├── devtools
    ├── conda-recipe
    │   ├── build.sh
    │   ├── bld.bat
    │   ├── meta.yaml
    │   └── run_test.py
    └── ci
    │   ├── travis
    │       └── install_miniconda.sh
    │   └── appveyor
    │       ├── appveyor
    │           ├── runTestsuite.ps1
    │           ├── run_with_env.cmd
    │           └── transform_xunit_to_appveyor.xsl
    │       ├── process_test_results.ps1
    │       ├── run_with_env.cmd
    │       └── transform_xunit_to_appveyor.xsl
├── docs
    ├── Interface.pdf
    ├── Updating_Formulas.pdf
    ├── Interface.lyx
    └── Updating_Formulas.lyx
├── MANIFEST.in
├── examples
    └── basissets_ramachandran
    │   ├── torsion_A.npy
    │   ├── torsion_FGAIL.npy
    │   └── Example.py
├── .gitignore
├── setup.cfg
├── .travis.yml
├── appveyor.yml
├── README.md
└── setup.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | variational/_version.py export-subst
2 | 


--------------------------------------------------------------------------------
/variational/solvers/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'noe'
2 | 


--------------------------------------------------------------------------------
/variational/solvers/eig_qr/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'noe'


--------------------------------------------------------------------------------
/variational/estimators/tests/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'noe'
2 | 


--------------------------------------------------------------------------------
/variational/solvers/tests/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'noe'
2 | 


--------------------------------------------------------------------------------
/variational/estimators/covar_c/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'noe'
2 | 


--------------------------------------------------------------------------------
/devtools/conda-recipe/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | $PYTHON setup.py install
3 | 


--------------------------------------------------------------------------------
/docs/Interface.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/variational/master/docs/Interface.pdf


--------------------------------------------------------------------------------
/docs/Updating_Formulas.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/variational/master/docs/Updating_Formulas.pdf


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | # versioneer files
2 | include versioneer.py
3 | include variational/_version.py
4 | 
5 | recursive-include variational *.pyx *.c *.h
6 | 


--------------------------------------------------------------------------------
/examples/basissets_ramachandran/torsion_A.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/variational/master/examples/basissets_ramachandran/torsion_A.npy


--------------------------------------------------------------------------------
/examples/basissets_ramachandran/torsion_FGAIL.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/variational/master/examples/basissets_ramachandran/torsion_FGAIL.npy


--------------------------------------------------------------------------------
/variational/estimators/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | 
3 | from .moments import moments_XX, moments_XXXY, moments_block
4 | from .moments import covar, covars
5 | from .running_moments import RunningCovar, running_covar
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # compiled files
 2 | *.pyc
 3 | *.so
 4 | build
 5 | variational.egg-info
 6 | dist
 7 | 
 8 | # project files
 9 | .idea
10 | 
11 | # generated files
12 | variational/version.py
13 | variational/estimators/covar_c/covartools.c
14 | variational/solvers/eig_qr/eig_qr.c
15 | 


--------------------------------------------------------------------------------
/devtools/conda-recipe/bld.bat:
--------------------------------------------------------------------------------
1 | if not defined APPVEYOR (
2 |     echo not on appveyor
3 |    "%PYTHON%" setup.py install
4 | ) else ( 
5 |     echo on appveyor
6 |     cmd /E:ON /V:ON /C %APPVEYOR_BUILD_FOLDER%\devtools\ci\appveyor\run_with_env.cmd "%PYTHON%" setup.py install
7 | )
8 | if errorlevel 1 exit 1
9 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | 
 2 | # See the docstring in versioneer.py for instructions. Note that you must
 3 | # re-run 'versioneer.py setup' after changing this section, and commit the
 4 | # resulting files.
 5 | 
 6 | [versioneer]
 7 | VCS = git
 8 | style = pep440
 9 | versionfile_source = variational/_version.py
10 | #versionfile_build =
11 | tag_prefix =
12 | parentdir_prefix = variational-
13 | 
14 | 


--------------------------------------------------------------------------------
/devtools/ci/travis/install_miniconda.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # make TARGET overrideable with env
 4 | : ${TARGET:=$HOME/miniconda}
 5 | 
 6 | function install_miniconda {
 7 | 	echo "installing miniconda to $TARGET"
 8 | 	wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O mc.sh -o /dev/null
 9 | 	bash mc.sh -b -f -p $TARGET
10 | }
11 | 
12 | install_miniconda
13 | export PATH=$TARGET/bin:$PATH
14 | 


--------------------------------------------------------------------------------
/variational/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | __author__ = 'noe'
 3 | 
 4 | # import subpackages such that they are available after the main package import
 5 | from . import estimators
 6 | from . import solvers
 7 | 
 8 | # direct imports of important functions/classes to-level API
 9 | from .solvers.direct import eig_corr
10 | from .solvers.direct import sort_by_norm
11 | from .solvers.eig_qr.eig_qr import eig_qr
12 | 
13 | from ._version import get_versions
14 | __version__ = get_versions()['version']
15 | del get_versions
16 | 


--------------------------------------------------------------------------------
/devtools/conda-recipe/meta.yaml:
--------------------------------------------------------------------------------
 1 | package:
 2 |   name: variational 
 3 |   version: !!str dev
 4 | source:
 5 |   path: ../..
 6 | 
 7 | build:
 8 |   preserve_egg_dir: True
 9 | 
10 | requirements:
11 |   build:
12 |     - python
13 |     - setuptools
14 |     - cython >=0.20
15 |     - numpy >=1.7
16 |     - scipy
17 |     - six
18 | 
19 |   run:
20 |     - python
21 |     - setuptools
22 |     - numpy >=1.7
23 |     - scipy
24 |     - six
25 | 
26 | test:
27 |   requires:
28 |     - nose
29 |     - coverage
30 |   imports:
31 |     - variational 
32 | 
33 | 


--------------------------------------------------------------------------------
/variational/util.py:
--------------------------------------------------------------------------------
 1 | """ Add convenience functions here if needed
 2 | """
 3 | 
 4 | __author__ = 'noe'
 5 | 
 6 | 
 7 | 
 8 | def features_to_basis(infiles, basisset, outfiles):
 9 |     """Reads input files
10 | 
11 |     basisset : BasisSet object
12 |         basis set tob e used
13 | 
14 |     References
15 |     ---------
16 |     .. [5] Vitalini, F., Noe, F. and Keller, B. (2015):
17 |         A basis set for peptides for the variational approach to conformational kinetics. (In review).
18 | 
19 |     """
20 |     # cycle through input files
21 |     # read infile
22 |     # map to basis function values
23 |     # write outfile
24 |     pass
25 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: c 
 2 | 
 3 | sudo: false
 4 | 
 5 | env:
 6 |   global:
 7 |     - PATH=$HOME/miniconda/bin:$PATH
 8 |     - common_py_deps="pylint jinja2 conda-build"
 9 |   matrix:
10 |     - python=2.7  CONDA_PY=27  CONDA_NPY=17
11 |     - python=3.4  CONDA_PY=34  CONDA_NPY=18
12 |     - python=3.4  CONDA_PY=34  CONDA_NPY=19
13 |     - python=3.5  CONDA_PY=35  CONDA_NPY=19
14 |    
15 | before_install:
16 | - devtools/ci/travis/install_miniconda.sh
17 | - conda config --set always_yes true 
18 | - conda config --add channels omnia 
19 | - conda install -q $common_py_deps 
20 | 
21 | script:
22 | - conda build -q devtools/conda-recipe
23 | 
24 | after_success:
25 | # coverage report: needs .coverage file generated by testsuite and git src
26 | - pip install coveralls
27 | - coveralls 
28 | 
29 | 


--------------------------------------------------------------------------------
/variational/estimators/covar_c/_covartools.h:
--------------------------------------------------------------------------------
 1 | #ifndef _covartools_h_
 2 | #define _covartools_h_
 3 | 
 4 | void _subtract_row_double(double* X, double* row, int M, int N);
 5 | void _subtract_row_float(double* X, double* row, int M, int N);
 6 | void _subtract_row_double_copy(double* X0, double* X, double* row, int M, int N);
 7 | int* _bool_to_list(int* b, int N, int nnz);
 8 | void _variable_cols_char(int* cols, char* X, int M, int N, int min_constant);
 9 | void _variable_cols_int(int* cols, int* X, int M, int N, int min_constant);
10 | void _variable_cols_long(int* cols, long* X, int M, int N, int min_constant);
11 | void _variable_cols_float(int* cols, float* X, int M, int N, int min_constant);
12 | void _variable_cols_double(int* cols, double* X, int M, int N, int min_constant);
13 | void _variable_cols_float_approx(int* cols, float* X, int M, int N, float tol, int min_constant);
14 | void _variable_cols_double_approx(int* cols, double* X, int M, int N, double tol, int min_constant);
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/devtools/ci/appveyor/appveyor/runTestsuite.ps1:
--------------------------------------------------------------------------------
 1 | function xslt_transform($xml, $xsl, $output)
 2 | {
 3 | 	trap [Exception]
 4 | 	{
 5 | 	    Write-Host $_.Exception
 6 | 	}
 7 | 	
 8 | 	$xslt = New-Object System.Xml.Xsl.XslCompiledTransform
 9 | 	$xslt.Load($xsl)
10 | 	$xslt.Transform($xml, $output)
11 | }
12 | 
13 | function upload($file) {
14 |     trap [Exception]
15 |     {
16 |         Write-Host $_.Exception
17 |     }
18 | 
19 |     $wc = New-Object 'System.Net.WebClient'
20 |     $wc.UploadFile("https://ci.appveyor.com/api/testresults/xunit/$($env:APPVEYOR_JOB_ID)", $file)
21 | }
22 | 
23 | function run {
24 |     cd $env:APPVEYOR_BUILD_FOLDER
25 |     $stylesheet =  "tools/ci/appveyor/transform_xunit_to_appveyor.xsl"
26 |     $input = "nosetests.xml"
27 |     $output = "transformed.xml"
28 |     
29 |     if ( -not Test-Path $input ) {
30 |        Write-Host "$input does not exist"
31 |        return
32 |     }
33 |     xslt_transform $input $stylesheet $output
34 | 
35 |     upload $output
36 |     Push-AppveyorArtifact $input
37 |     Push-AppveyorArtifact $output
38 | }
39 | 
40 | run
41 | 


--------------------------------------------------------------------------------
/devtools/ci/appveyor/process_test_results.ps1:
--------------------------------------------------------------------------------
 1 | function xslt_transform($xml, $xsl, $output)
 2 | {
 3 | 	trap [Exception]
 4 | 	{
 5 | 	    Write-Host $_.Exception
 6 | 	}
 7 | 	
 8 | 	$xslt = New-Object System.Xml.Xsl.XslCompiledTransform
 9 | 	$xslt.Load($xsl)
10 | 	$xslt.Transform($xml, $output)
11 | }
12 | 
13 | function upload($file) {
14 |     trap [Exception]
15 |     {
16 |         Write-Host $_.Exception
17 |     }
18 | 
19 |     $wc = New-Object 'System.Net.WebClient'
20 |     $wc.UploadFile("https://ci.appveyor.com/api/testresults/xunit/$($env:APPVEYOR_JOB_ID)", $file)
21 | }
22 | 
23 | function run {
24 |     cd $env:APPVEYOR_BUILD_FOLDER
25 |     $stylesheet =  "devtools/ci/appveyor/transform_xunit_to_appveyor.xsl"
26 |     $input = "nosetests.xml"
27 |     $output = "transformed.xml"
28 | #    if ( -not Test-Path $input ) {
29 | #       throw "input file missing"
30 | #    } 
31 |     xslt_transform $input $stylesheet $output
32 | 
33 |     upload $output
34 |     Push-AppveyorArtifact $input
35 |     Push-AppveyorArtifact $output
36 |     
37 |     # return exit code of testsuite
38 |     if ( -not $success) {
39 |         throw "testsuite not successful"
40 |     }
41 | }
42 | 
43 | run
44 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | environment:
 2 |   global:
 3 |     # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the
 4 |     # /E:ON and /V:ON options are not enabled in the batch script intepreter
 5 |     # See: http://stackoverflow.com/a/13751649/163740
 6 |     # this is being set in bld.bat of conda-recipe...
 7 |     #CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\devtools\\ci\\appveyor\\run_with_env.cmd"
 8 |     PYTHONUNBUFFERED: 1
 9 | 
10 |   matrix:
11 |     - PYTHON: "C:\\Miniconda"
12 |       CONDA_PY: "27"
13 |     
14 |     - PYTHON: "C:\\Miniconda-x64"
15 |       CONDA_PY: "27" 
16 |       ARCH: "64"
17 | 
18 |     - PYTHON: "C:\\Miniconda3"
19 |       CONDA_PY: "34"
20 | 
21 |     - PYTHON: "C:\\Miniconda3-x64"
22 |       CONDA_PY: "34"
23 |       ARCH: "64"
24 | 
25 | install:
26 |   - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%;"
27 | 
28 |   - conda config --set always_yes true 
29 |   - conda config --add channels omnia 
30 |   - conda install -q conda-build jinja2 
31 | 
32 | 
33 | build: false # Not a C# project, build stuff at the test step instead.
34 | 
35 | test_script:
36 |   # run testsuite and upload test results to AppVeyor; return exit code of testsuite
37 |   - conda build -q devtools/conda-recipe
38 | 


--------------------------------------------------------------------------------
/devtools/conda-recipe/run_test.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import subprocess
 3 | import os
 4 | import sys
 5 | import shutil
 6 | import re
 7 | 
 8 | src_dir = os.getenv('SRC_DIR')
 9 | 
10 | 
11 | # matplotlib headless backend
12 | with open('matplotlibrc', 'w') as fh:
13 |     fh.write('backend: Agg')
14 | 
15 | 
16 | def coverage_report():
17 |     fn = '.coverage'
18 |     assert os.path.exists(fn)
19 |     build_dir = os.getenv('TRAVIS_BUILD_DIR')
20 |     dest = os.path.join(build_dir, fn)
21 |     print( "copying coverage report to", dest)
22 |     shutil.copy(fn, dest)
23 |     assert os.path.exists(dest)
24 | 
25 |     # fix paths in .coverage file
26 |     with open(dest, 'r') as fh:
27 |         data = fh.read()
28 |     match= '"/home/travis/miniconda/envs/_test/lib/python.+?/site-packages/.+?/(variational/.+?)"'
29 |     repl = '"%s/\\1"' % build_dir
30 |     data = re.sub(match, repl, data)
31 |     os.unlink(dest)
32 |     with open(dest, 'w+') as fh:
33 |        fh.write(data)
34 | 
35 | nose_run = "nosetests variational -vv" \
36 |            " --with-coverage --cover-inclusive --cover-package=variational" \
37 |            " --with-doctest --doctest-options=+NORMALIZE_WHITESPACE,+ELLIPSIS" \
38 |            .split(' ')
39 | 
40 | res = subprocess.call(nose_run)
41 | 
42 | 
43 | # move .coverage file to git clone on Travis CI
44 | if os.getenv('TRAVIS', False):
45 |    coverage_report()
46 | 
47 | if False: #os.getenv('APPVEYOR', False):
48 |    call = ('powershell ' + os.path.join(os.getenv('APPVEYOR_BUILD_FOLDER'), 
49 |            'devtools', 'ci', 'appveyor',
50 |            'process_test_results.ps1')).split(' ')
51 |    res |= subprocess.call(call)
52 |    
53 | sys.exit(res)
54 | 
55 | 


--------------------------------------------------------------------------------
/devtools/ci/appveyor/run_with_env.cmd:
--------------------------------------------------------------------------------
 1 | :: To build extensions for 64 bit Python 3, we need to configure environment
 2 | :: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of:
 3 | :: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1)
 4 | ::
 5 | :: To build extensions for 64 bit Python 2, we need to configure environment
 6 | :: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of:
 7 | :: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0)
 8 | ::
 9 | :: 32 bit builds do not require specific environment configurations.
10 | ::
11 | :: Note: this script needs to be run with the /E:ON and /V:ON flags for the
12 | :: cmd interpreter, at least for (SDK v7.0)
13 | ::
14 | :: More details at:
15 | :: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows
16 | :: http://stackoverflow.com/a/13751649/163740
17 | ::
18 | :: Author: Olivier Grisel
19 | :: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/
20 | @ECHO OFF
21 | 
22 | SET COMMAND_TO_RUN=%*
23 | SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows
24 | 
25 | SET MAJOR_PYTHON_VERSION="%CONDA_PY:~0,1%"
26 | IF %MAJOR_PYTHON_VERSION% == "2" (
27 |     SET WINDOWS_SDK_VERSION="v7.0"
28 | ) ELSE IF %MAJOR_PYTHON_VERSION% == "3" (
29 |     SET WINDOWS_SDK_VERSION="v7.1"
30 | ) ELSE (
31 |     ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%"
32 |     EXIT 1
33 | )
34 | 
35 | IF "%ARCH%"=="64" (
36 |     ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture
37 |     SET DISTUTILS_USE_SDK=1
38 |     SET MSSdk=1
39 |     "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION%
40 |     "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release
41 |     ECHO Executing: %COMMAND_TO_RUN%
42 |     call %COMMAND_TO_RUN% || EXIT 1
43 | ) ELSE (
44 |     ECHO Using default MSVC build environment for 32 bit architecture
45 |     ECHO Executing: %COMMAND_TO_RUN%
46 |     call %COMMAND_TO_RUN% || EXIT 1
47 | )
48 | 


--------------------------------------------------------------------------------
/devtools/ci/appveyor/appveyor/run_with_env.cmd:
--------------------------------------------------------------------------------
 1 | :: To build extensions for 64 bit Python 3, we need to configure environment
 2 | :: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of:
 3 | :: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1)
 4 | ::
 5 | :: To build extensions for 64 bit Python 2, we need to configure environment
 6 | :: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of:
 7 | :: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0)
 8 | ::
 9 | :: 32 bit builds do not require specific environment configurations.
10 | ::
11 | :: Note: this script needs to be run with the /E:ON and /V:ON flags for the
12 | :: cmd interpreter, at least for (SDK v7.0)
13 | ::
14 | :: More details at:
15 | :: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows
16 | :: http://stackoverflow.com/a/13751649/163740
17 | ::
18 | :: Author: Olivier Grisel
19 | :: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/
20 | @ECHO OFF
21 | 
22 | SET COMMAND_TO_RUN=%*
23 | SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows
24 | 
25 | SET MAJOR_PYTHON_VERSION="%CONDA_PY:~0,1%"
26 | IF %MAJOR_PYTHON_VERSION% == "2" (
27 |     SET WINDOWS_SDK_VERSION="v7.0"
28 | ) ELSE IF %MAJOR_PYTHON_VERSION% == "3" (
29 |     SET WINDOWS_SDK_VERSION="v7.1"
30 | ) ELSE (
31 |     ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%"
32 |     EXIT 1
33 | )
34 | 
35 | IF "%ARCH%"=="64" (
36 |     ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture
37 |     SET DISTUTILS_USE_SDK=1
38 |     SET MSSdk=1
39 |     "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION%
40 |     "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release
41 |     ECHO Executing: %COMMAND_TO_RUN%
42 |     call %COMMAND_TO_RUN% || EXIT 1
43 | ) ELSE (
44 |     ECHO Using default MSVC build environment for 32 bit architecture
45 |     ECHO Executing: %COMMAND_TO_RUN%
46 |     call %COMMAND_TO_RUN% || EXIT 1
47 | )
48 | 


--------------------------------------------------------------------------------
/variational/solvers/eig_qr/eig_qr.pyx:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | cimport scipy.linalg.cython_lapack as scc
 3 | 
 4 | def eig_qr(A):
 5 |     """ Compute eigenvalues and eigenvectors of symmetric matrix A using symmetric tridiagonal QR-algorithm
 6 |      with implicit shifts. The matrix is first transformed to tridiagonal shape using lapack's dsytrd routine.
 7 |      Then, the tridiagonal QR-iteration is performed using lapack's dsteqr routine.
 8 | 
 9 |      Parameters:
10 |      -----------
11 |      A, ndarray (N, N):
12 |         symmetric matrix.
13 | 
14 |      Returns:
15 |      --------
16 |      D, ndarray(N,)
17 |         array of eigenvalues of A
18 |      B, ndarray(N, N)
19 |         array of eigenvectors of A.
20 |     """
21 | 
22 |     # handle 1x1 case
23 |     if np.size(A) == 1:  # size can handle 1x1 arrays and numbers
24 |         return A*np.ones(1), np.ones((1, 1))
25 | 
26 |     # Definitions:
27 |     cdef double[:,:] B = np.require(A, dtype=np.float64, requirements=["F", "A"])
28 |     cdef int n=A.shape[0], lda=A.shape[0], info, lwork=-1
29 |     cdef char[:] uplo = np.zeros(1, "S1")
30 |     uplo[:] = "U"
31 |     cdef double[:] D = np.require(np.zeros(n), dtype=np.float64, requirements=["F", "A"])
32 |     cdef double[:] E = np.require(np.zeros(n-1), dtype=np.float64, requirements=["F", "A"])
33 |     cdef double[:] Tau = np.require(np.zeros(n-1), dtype=np.float64, requirements=["F", "A"])
34 |     cdef double[:] Work = np.require(np.zeros(1), dtype=np.float64, requirements=["F", "A"])
35 | 
36 |     # Transform to tridiagonal shape:
37 |     scc.dsytrd(&uplo[0], &n, &B[0, 0], &lda, &D[0], &E[0], &Tau[0], &Work[0], &lwork, &info)
38 |     lwork = np.int(Work[0])
39 |     cdef double[:] Work2 = np.require(np.zeros(lwork), dtype=np.float64, requirements=["F", "A"])
40 |     scc.dsytrd(&uplo[0], &n, &B[0, 0], &lda, &D[0], &E[0], &Tau[0], &Work2[0], &lwork, &info)
41 | 
42 |     # Extract transformation to tridiagonal shape:
43 |     lwork = -1
44 |     scc.dorgtr(&uplo[0], &n, &B[0, 0], &lda, &Tau[0], &Work[0], &lwork, &info)
45 |     lwork = np.int(Work[0])
46 |     cdef double[:] Work3 = np.require(np.zeros(lwork), dtype=np.float64, requirements=["F", "A"])
47 |     scc.dorgtr(&uplo[0], &n, &B[0, 0], &lda, &Tau[0], &Work3[0], &lwork, &info)
48 | 
49 |     # Run QR-iteration.
50 |     cdef double[:] Work4 = np.require(np.zeros(np.maximum(1,2*n-2)), dtype=np.float64, requirements=["F", "A"])
51 |     cdef char[:] compz = np.zeros(1, "S1")
52 |     compz[:] = "V"
53 |     scc.dsteqr(&compz[0], &n, &D[0], &E[0], &B[0, 0], &n, &Work4[0], &info)
54 | 
55 |     return np.asarray(D), np.asarray(B)
56 | 
57 | 
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/variational/solvers/tests/test_direct.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import unittest
 3 | import numpy as np
 4 | from .. import direct
 5 | 
 6 | __author__ = 'noe'
 7 | 
 8 | 
 9 | def sort_by_norm_and_imag_sign(evals, evecs):
10 |     arr = np.zeros((len(evals),), dtype=[('mag', np.float64), ('sign', np.float64)])
11 |     arr['mag'] = np.abs(evals)
12 |     arr['sign'] = np.sign((np.imag(evals)))
13 |     I = np.argsort(arr, order=['mag', 'sign'])[::-1]
14 |     return evals[I], evecs[:, I]
15 | 
16 | 
17 | class TestDirect(unittest.TestCase):
18 | 
19 |     @classmethod
20 |     def setUpClass(cls):
21 |         pass
22 | 
23 |     def test_spd_inv_split(self):
24 |         W = np.array([[1.0, 0.3, 0.2],
25 |                       [0.3, 0.8, 0.5],
26 |                       [0.2, 0.5, 0.9]])
27 |         for method in ['QR', 'schur']:
28 |             L = direct.spd_inv_split(W, method=method)
29 |             # Test if decomposition is correct: inv(W) == L L.T
30 |             assert np.allclose(np.dot(L, L.T), np.linalg.inv(W))
31 |             # Test if matrices are orthogonal
32 |             C = np.dot(L.T, L)
33 |             assert np.max(np.abs(C - np.diag(np.diag(C)))) < 1e-12
34 | 
35 |         # Test if fails when given a nonsymmetric matrix
36 |         W = np.array([[1.0, 0.2],
37 |                       [0.3, 0.8]])
38 |         with self.assertRaises(AssertionError):
39 |             direct.spd_inv_split(W)
40 | 
41 |     def test_eig_corr(self):
42 |         C0 = np.array([[1.0, 0.3, 0.2],
43 |                        [0.3, 0.8, 0.5],
44 |                        [0.2, 0.5, 0.9]])
45 |         Ct_sym = np.array([[0.5, 0.1, 0.0],
46 |                            [0.1, 0.3, 0.3],
47 |                            [0.0, 0.3, 0.2]])
48 |         Ct_nonsym = np.array([[0.5, 0.1, 0.3],
49 |                               [0.1, 0.3, 0.3],
50 |                               [0.0, 0.3, 0.2]])
51 |         # reference solution
52 |         import scipy
53 |         for Ct in [Ct_sym, Ct_nonsym]:
54 |             v0, R0 = scipy.linalg.eig(Ct, C0)
55 |             v0, R0 = sort_by_norm_and_imag_sign(v0, R0)
56 |             for method in ['QR', 'schur']:
57 |                 # Test correctness
58 |                 v, R = direct.eig_corr(C0, Ct, method=method)
59 |                 v, R = sort_by_norm_and_imag_sign(v, R)
60 |                 assert np.allclose(v0, v)  # eigenvalues equal?
61 |                 # eigenvectors equivalent?
62 |                 for i in range(R0.shape[1]):
63 |                     assert np.allclose(R0[:, i] / R0[0, i], R[:, i] / R[0, i])
64 |                 # Test if eigenpair diagonalizes the Koopman matrix
65 |                 K = np.dot(np.linalg.inv(C0), Ct)
66 |                 assert np.allclose(K, R.dot(np.diag(v)).dot(np.linalg.inv(R)))
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     unittest.main()
71 | 


--------------------------------------------------------------------------------
/examples/basissets_ramachandran/Example.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Fri Jul 31 10:55:24 2015
 4 | 
 5 | @author: fvitalini
 6 | """
 7 | 
 8 | """
 9 | This script contains examples of usage for the classes:
10 |     RamachandranBasis
11 |     RamachandranProductBasis
12 | which are contained in  the variational package.
13 | """
14 | 
15 | 
16 | import variational
17 | import numpy as np
18 | 
19 | #Use of the function RamachandranBasis
20 | 
21 | from variational.basissets.ramachandran import RamachandranBasis
22 | alabasis = RamachandranBasis('A', radians=False) #load the residue centered basis
23 |                                                  #function for residue Alanine and
24 |                                                  #default force field (ff_AMBER99SB_ILDN)
25 |                                                  #three eigenvectors are considered (order=2)
26 |                                                  #expects the timeseries in degrees.     
27 | atraj = np.load('torsion_A.npy') #the file contains the phi/psi timeseries for residue A
28 | print atraj[0:10,:] #first 10 timesteps only
29 | ala_basis_traj=alabasis.map(atraj) # projects the trajectory onto the residue basis function
30 | print ala_basis_traj[0:10, :] #first 10 timesteps only
31 | 
32 | 
33 | #Use of the function RamachandranProductBasis
34 | 
35 | # 1: Different number excitations
36 | from variational.basissets.ramachandran import RamachandranProductBasis
37 | FGAILbasis=RamachandranProductBasis('FGAIL', n_excite=3, radians=False) #load the residue centered basis
38 |                                                                          #functions for residues F-G-A-I-L and
39 |                                                                          #default force field (ff_AMBER99SB_ILDN)
40 |                                                                          #three eigenvectors are considered (order=2)
41 |                                                                          #up to 3 excited residue per basis function (n_excite=3)
42 |                                                                          #expects the timeseries in degrees.     
43 | FGAIL_traj = np.load('torsion_FGAIL.npy') #the file contains the phi/psi timeseries for residues FGAIL
44 | print FGAIL_traj[0:10,:] #first 10 timesteps only
45 | FGAIL_basis_set_traj, FGAIL_basis_set_list=FGAILbasis.map(FGAIL_traj) #projects the trajectory onto the residue basis functions
46 | print FGAIL_basis_set_list
47 | print FGAIL_basis_set_traj[0:10,:] #first 10 timesteps only
48 | 
49 | # 2: Select only residues FG
50 | FGbasis=RamachandranProductBasis('FGAIL',include_res=[True,True,False,False,False], radians=False) #load the residue centered basis
51 |                                                                                                     #functions for residues F-G and
52 |                                                                                                     #default force field (ff_AMBER99SB_ILDN)
53 |                                                                                                     #three eigenvectors are considered (order=2)
54 |                                                                                                     #2 excited residue per basis function (n_excite=2)
55 |                                                                                                     #expects the timeseries in degrees.     
56 | FG_basis_set_traj, FG_basis_set_list=FGbasis.map(FGAIL_traj) #projects the trajectory onto the residue basis functions
57 | print FG_basis_set_list
58 | print FG_basis_set_traj[0:10,:] #first 10 timesteps only
59 | print FG_basis_set_traj[0:10,0] #first 10 timesteps of basis function 00
60 | print FG_basis_set_traj[0:10,1] #first 10 timesteps of basis function 01
61 | print FG_basis_set_traj[0:10,8] #first 10 timesteps of basis function 22


--------------------------------------------------------------------------------
/devtools/ci/appveyor/transform_xunit_to_appveyor.xsl:
--------------------------------------------------------------------------------
  1 | <!--
  2 | This xslt stylesheet transforms (JUnit) xunit xml output to xunit.net xml, which
  3 | is eg. used at AppVeyor CI.
  4 | 
  5 | See for reference:
  6 | https://xunit.codeplex.com/wikipage?title=XmlFormat
  7 | 
  8 | tested with saxon and xsltproc against output of nosetests &#8211;&#8211; with-xml (Python)
  9 | 
 10 | Author: Martin Scherer <m.scherer@fu-berlin.de>
 11 | -->
 12 | <xsl:stylesheet version="1.0"
 13 |   xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 14 |   <xsl:output method="xml" encoding="UTF-8" indent="yes" />
 15 | 
 16 | <!-- xunit.net handles failures and errors the same -->
 17 |   <xsl:template match="failure|error">
 18 |     <xsl:attribute name="exception-type">
 19 |         <xsl:value-of select="@type" />
 20 |     </xsl:attribute>
 21 |     <message>
 22 |       <xsl:value-of select="@message" />
 23 |     </message>
 24 |     <stack-trace>
 25 |       <xsl:value-of select="." />
 26 |     </stack-trace>
 27 |   </xsl:template>
 28 |   
 29 |   <!-- use classnames as key -->
 30 |   <xsl:key name="class" match="/testsuite/testcase/@classname"
 31 |     use="." />
 32 |   <xsl:template match="testsuite">
 33 |     <assembly name="python">
 34 |       <xsl:attribute name="total">
 35 |         <xsl:value-of select="@tests" />
 36 |       </xsl:attribute>
 37 |       <xsl:attribute name="passed">
 38 |         <xsl:value-of select="@tests - @errors - @failures - @skip" />
 39 |       </xsl:attribute>
 40 |       <xsl:attribute name="failed">
 41 |         <xsl:value-of select="@errors" />
 42 |       </xsl:attribute>
 43 |       <xsl:attribute name="skipped">
 44 |         <xsl:value-of select="@skip" />
 45 |       </xsl:attribute>
 46 | 
 47 | <!-- class names, only unique -->
 48 |       <xsl:for-each
 49 |         select="/testsuite/testcase/@classname[generate-id()
 50 |                                        = generate-id(key('class',.)[1])]">
 51 |         <class>
 52 |           <xsl:variable name="className" select="." />
 53 |           <xsl:attribute name="name">
 54 |             <xsl:value-of select="." />
 55 |           </xsl:attribute>
 56 |     
 57 |     <!-- select only those testcases, which match the current classname -->
 58 |           <xsl:for-each select="/testsuite/testcase[@classname=$className]">
 59 |             <test>
 60 |               <xsl:attribute name="name">
 61 |                 <xsl:value-of select="@name" />
 62 |             </xsl:attribute>
 63 |               <xsl:attribute name="time">
 64 |                 <xsl:value-of select="@time" />
 65 |             </xsl:attribute>
 66 |               <xsl:variable name="result">
 67 |                 <xsl:choose>
 68 |                   <xsl:when test="error or failure">Fail</xsl:when>
 69 |                   <xsl:when test="skipped">Skip</xsl:when>
 70 |                   <xsl:otherwise>Pass</xsl:otherwise>
 71 |                 </xsl:choose>
 72 |               </xsl:variable>
 73 |               <xsl:attribute name="result">
 74 |                 <xsl:value-of select="$result" />
 75 |             </xsl:attribute>
 76 |               <xsl:choose>
 77 |                 <xsl:when test="error or failure">
 78 |                   <failure>
 79 |                     <xsl:apply-templates />
 80 |                   </failure>
 81 |                 </xsl:when>
 82 |                 <xsl:when test="skipped">
 83 |                   <reason>
 84 |                     <xsl:value-of select="skipped/@message" />
 85 |                   </reason>
 86 |                 </xsl:when>
 87 |               </xsl:choose>
 88 |               <xsl:choose>
 89 |                 <xsl:when test="system-out">
 90 |                   <output>
 91 |                     <xsl:value-of select="system-out/." />
 92 |                   </output>
 93 |                 </xsl:when>
 94 |               </xsl:choose>
 95 |             </test>
 96 |           </xsl:for-each>
 97 |         </class>
 98 |       </xsl:for-each>
 99 |     </assembly>
100 |   </xsl:template>
101 | </xsl:stylesheet>
102 | 


--------------------------------------------------------------------------------
/devtools/ci/appveyor/appveyor/transform_xunit_to_appveyor.xsl:
--------------------------------------------------------------------------------
  1 | <!--
  2 | This xslt stylesheet transforms (JUnit) xunit xml output to xunit.net xml, which
  3 | is eg. used at AppVeyor CI.
  4 | 
  5 | See for reference:
  6 | https://xunit.codeplex.com/wikipage?title=XmlFormat
  7 | 
  8 | tested with saxon and xsltproc against output of nosetests &#8211;&#8211; with-xml (Python)
  9 | 
 10 | Author: Martin Scherer <m.scherer@fu-berlin.de>
 11 | -->
 12 | <xsl:stylesheet version="1.0"
 13 |   xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 14 |   <xsl:output method="xml" encoding="UTF-8" indent="yes" />
 15 | 
 16 | <!-- xunit.net handles failures and errors the same -->
 17 |   <xsl:template match="failure|error">
 18 |     <xsl:attribute name="exception-type">
 19 |         <xsl:value-of select="@type" />
 20 |     </xsl:attribute>
 21 |     <message>
 22 |       <xsl:value-of select="@message" />
 23 |     </message>
 24 |     <stack-trace>
 25 |       <xsl:value-of select="." />
 26 |     </stack-trace>
 27 |   </xsl:template>
 28 |   
 29 |   <!-- use classnames as key -->
 30 |   <xsl:key name="class" match="/testsuite/testcase/@classname"
 31 |     use="." />
 32 |   <xsl:template match="testsuite">
 33 |     <assembly name="python">
 34 |       <xsl:attribute name="total">
 35 |         <xsl:value-of select="@tests" />
 36 |       </xsl:attribute>
 37 |       <xsl:attribute name="passed">
 38 |         <xsl:value-of select="@tests - @errors - @failures - @skip" />
 39 |       </xsl:attribute>
 40 |       <xsl:attribute name="failed">
 41 |         <xsl:value-of select="@errors" />
 42 |       </xsl:attribute>
 43 |       <xsl:attribute name="skipped">
 44 |         <xsl:value-of select="@skip" />
 45 |       </xsl:attribute>
 46 | 
 47 | <!-- class names, only unique -->
 48 |       <xsl:for-each
 49 |         select="/testsuite/testcase/@classname[generate-id()
 50 |                                        = generate-id(key('class',.)[1])]">
 51 |         <class>
 52 |           <xsl:variable name="className" select="." />
 53 |           <xsl:attribute name="name">
 54 |             <xsl:value-of select="." />
 55 |           </xsl:attribute>
 56 |     
 57 |     <!-- select only those testcases, which match the current classname -->
 58 |           <xsl:for-each select="/testsuite/testcase[@classname=$className]">
 59 |             <test>
 60 |               <xsl:attribute name="name">
 61 |                 <xsl:value-of select="@name" />
 62 |             </xsl:attribute>
 63 |               <xsl:attribute name="time">
 64 |                 <xsl:value-of select="@time" />
 65 |             </xsl:attribute>
 66 |               <xsl:variable name="result">
 67 |                 <xsl:choose>
 68 |                   <xsl:when test="error or failure">Fail</xsl:when>
 69 |                   <xsl:when test="skipped">Skip</xsl:when>
 70 |                   <xsl:otherwise>Pass</xsl:otherwise>
 71 |                 </xsl:choose>
 72 |               </xsl:variable>
 73 |               <xsl:attribute name="result">
 74 |                 <xsl:value-of select="$result" />
 75 |             </xsl:attribute>
 76 |               <xsl:choose>
 77 |                 <xsl:when test="error or failure">
 78 |                   <failure>
 79 |                     <xsl:apply-templates />
 80 |                   </failure>
 81 |                 </xsl:when>
 82 |                 <xsl:when test="skipped">
 83 |                   <reason>
 84 |                     <xsl:value-of select="skipped/@message" />
 85 |                   </reason>
 86 |                 </xsl:when>
 87 |               </xsl:choose>
 88 |               <xsl:choose>
 89 |                 <xsl:when test="system-out">
 90 |                   <output>
 91 |                     <xsl:value-of select="system-out/." />
 92 |                   </output>
 93 |                 </xsl:when>
 94 |               </xsl:choose>
 95 |             </test>
 96 |           </xsl:for-each>
 97 |         </class>
 98 |       </xsl:for-each>
 99 |     </assembly>
100 |   </xsl:template>
101 | </xsl:stylesheet>
102 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Variational Approach for conformation dynamics (VAC)
 2 | ====================================================
 3 | 
 4 | This package contains basis sets, estimators and solvers for the variational approach for 
 5 | conformation dynamics, a theory that has been proposed in [1] and was further developed in
 6 | [2] and [3]. The variational approach is analogous to the Ritz method [4] that is 
 7 | employed in computational quantum chemistry. It differs in the way how the involved
 8 | matrices are computed and in the meaning of the involved operators, eigenfunctions and 
 9 | eigenvalues - see [3] for a comparison. 
10 | 
11 | Roughly, the idea of the VAC is as follows: Given a (classical) 
12 | molecular dynamics trajectory with configurations {x_1, ..., x_T}, and a 
13 | set of basis functions defined on the space of configurations {chi_1(x), ..., chi_n(x)},
14 | we compute the two correlation matrices:
15 | 
16 | c_ij (0)   = < chi_i(x_t) chi_j(x_t) >_t
17 | c_ij (tau) = < chi_i(x_t) chi_j(x_t+tau) >_t
18 | 
19 | where < . >_t is average over time t. Of course this can be generalized to many trajectories.
20 | Then we solve the generalized eigenvalue problem
21 | 
22 | C(tau) r = C(0) r l(tau)
23 | 
24 | where the eigenvalues l(tau) approximate the dominant eigenvalues of the Markov propagator
25 | or Markov backward propagator of the underlying dynamics. The corresponding eigenfunction
26 | of the backward propagator is approximated by
27 | 
28 | psi(x) = sum_i r_i chi_i(x)
29 | 
30 | Package functionalities
31 | -----------------------
32 | 
33 | This package aims at providing code to help addressing a number of key problems:
34 | 
35 | 1. Basis sets for molecular dynamics (MD), and in particular protein dynamics. See [5] for a
36 |    first approach in this direction.
37 | 
38 | 2. Estimators for the corration matrices C(0), C(tau). The trivial time-average that is usually
39 |    employed has a number of problems especially for many short simulation trajectories that are
40 |    initiated far from the equilibrium distribution (the usual case!).
41 | 
42 | 3. Solvers for accurately solving the eigenvalue problem above, even for huge basis sets. 
43 | 
44 | At this time only a few of the above functionalities are implemented and we will go step by step.
45 | This package will undergo heavy development and there is currently no date for an official
46 | release, so don't be surprised if the API (the look + feel of functions and classes) change.
47 | At the moment this package is purely intended for development purposes, so use it at your own
48 | risk.
49 | 
50 | Applications
51 | ------------
52 | 1. The time-lagged independent component analysis (TICA) method originally developed in [6] and
53 |    proposed as an optimal data transformation method for building Markov state models of MD
54 |    in [3,7] is a VAC with mean-free basis functions. Therefore you can easily implement TICA with
55 |    this package.
56 | 
57 | 2. By transforming the internal coordinates such as torsion angles or interatomic distances into
58 |    suitable basis functions, you can approximate experimentally-measurable relaxation timescales
59 |    and determine the corresponding structural rearrangements for peptides and proteins [2,5]
60 | 
61 | 3. ... more will follow.
62 | 
63 | References
64 | ----------
65 | [1] Noe, F. and Nueske, F. (2013): A variational approach to modeling slow processes in stochastic dynamical systems. SIAM Multiscale Model. Simul. 11, 635-655.
66 | 
67 | [2] Nueske, F., Keller, B., Perez-Hernandez, G., Mey, A.S.J.S. and Noe, F. (2014) Variational Approach to Molecular Kinetics. J. Chem. Theory Comput. 10, 1739-1752.
68 | 
69 | [3] Perez-Hernandez, G., Paul, F., Giorgino, T., De Fabritiis, G. and Noe, F. (2013) Identification of slow molecular order parameters for Markov model construction. J. Chem. Phys. 139, 015102.
70 | 
71 | [4] Ritz, W. (1909): Ueber eine neue Methode zur Loesung gewisser Variationsprobleme der mathematischen Physik. J. Reine Angew. Math., 135, 1–61.
72 | 
73 | [5] Vitalini, F., Noé, F. and Keller, B. (2015): A basis set for peptides for the variational approach to conformational kinetics. (In review).
74 | 
75 | [6] Molgedey, L. and Schuster H. G. (1994): Phys. Rev. Lett. 72, 3634.
76 | 
77 | [7] Schwantes, C. R. and Pande, V. S. : J. Chem. Theory Comput. 9, (2013)
78 | 


--------------------------------------------------------------------------------
/variational/solvers/direct.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import numpy as _np
  3 | __author__ = 'noe'
  4 | 
  5 | 
  6 | def sort_by_norm(evals, evecs):
  7 |     """
  8 |     Sorts the eigenvalues and eigenvectors by descending norm of the eigenvalues
  9 | 
 10 |     Parameters
 11 |     ----------
 12 |     evals: ndarray(n)
 13 |         eigenvalues
 14 |     evecs: ndarray(n,n)
 15 |         eigenvectors in a column matrix
 16 | 
 17 |     Returns
 18 |     -------
 19 |     (evals, evecs) : ndarray(m), ndarray(n,m)
 20 |         the sorted eigenvalues and eigenvectors
 21 | 
 22 |     """
 23 |     # norms
 24 |     evnorms = _np.abs(evals)
 25 |     # sort
 26 |     I = _np.argsort(evnorms)[::-1]
 27 |     # permute
 28 |     evals2 = evals[I]
 29 |     evecs2 = evecs[:, I]
 30 |     # done
 31 |     return evals2, evecs2
 32 | 
 33 | 
 34 | def spd_inv_split(W, epsilon=1e-10, method='QR', canonical_signs=False):
 35 |     """
 36 |     Compute :math:`W^{-1} = L L^T` of the symmetric positive-definite matrix :math:`W`.
 37 | 
 38 |     by first reducing W to a low-rank approximation that is truly spd.
 39 | 
 40 |     Parameters
 41 |     ----------
 42 |     W : ndarray((m,m), dtype=float)
 43 |         Symmetric positive-definite (spd) matrix.
 44 |     epsilon : float
 45 |         Truncation parameter. Eigenvalues with norms smaller than this cutoff will
 46 |         be removed.
 47 |     method : str
 48 |         Method to perform the decomposition of :math:`W` before inverting. Options are:
 49 | 
 50 |         * 'QR': QR-based robust eigenvalue decomposition of W
 51 |         * 'schur': Schur decomposition of W
 52 | 
 53 |      canonical_signs : boolean, default = False
 54 |         Fix signs in L, s. t. the largest element of in every column of L is positive.
 55 | 
 56 |     Returns
 57 |     -------
 58 |     L : ndarray((n, r))
 59 |         Matrix :math:`L` from the decomposition :math:`W^{-1} = L L^T`.
 60 | 
 61 |     """
 62 |     # check input
 63 |     assert _np.allclose(W.T, W), 'C0 is not a symmetric matrix'
 64 | 
 65 |     if (_np.shape(W)[0] == 1):
 66 |         L = 1./_np.sqrt(W[0,0])
 67 |     else:
 68 |         if method.lower() == 'qr':
 69 |             from .eig_qr.eig_qr import eig_qr
 70 |             s, V = eig_qr(W)
 71 |         # compute the Eigenvalues of C0 using Schur factorization
 72 |         elif method.lower() == 'schur':
 73 |             from scipy.linalg import schur
 74 |             S, V = schur(W)
 75 |             s = _np.diag(S)
 76 |         else:
 77 |             raise ValueError('method not implemented: ' + method)
 78 | 
 79 |         s, V = sort_by_norm(s, V) # sort them
 80 | 
 81 |         # determine the cutoff. We know that C0 is an spd matrix,
 82 |         # so we select the truncation threshold such that everything that is negative vanishes
 83 |         evmin = _np.min(s)
 84 |         if evmin < 0:
 85 |             epsilon = max(epsilon, -evmin + 1e-16)
 86 | 
 87 |         # determine effective rank m and perform low-rank approximations.
 88 |         evnorms = _np.abs(s)
 89 |         n = _np.shape(evnorms)[0]
 90 |         m = n - _np.searchsorted(evnorms[::-1], epsilon)
 91 |         Vm = V[:, 0:m]
 92 |         sm = s[0:m]
 93 | 
 94 |         if canonical_signs:
 95 |             # enforce canonical eigenvector signs
 96 |             for j in range(m):
 97 |                 jj = _np.argmax(_np.abs(Vm[:, j]))
 98 |                 Vm[:, j] *= _np.sign(Vm[jj, j])
 99 | 
100 |         L = _np.dot(Vm, _np.diag(1.0/_np.sqrt(sm)))
101 | 
102 |     # return split
103 |     return L
104 | 
105 | 
106 | def eig_corr(C0, Ct, epsilon=1e-10, method='QR', sign_maxelement=False):
107 |     r""" Solve generalized eigenvalue problem with correlation matrices C0 and Ct
108 | 
109 |     Numerically robust solution of a generalized Hermitian (symmetric) eigenvalue
110 |     problem of the form
111 | 
112 |     .. math::
113 |         \mathbf{C}_t \mathbf{r}_i = \mathbf{C}_0 \mathbf{r}_i l_i
114 | 
115 |     Computes :math:`m` dominant eigenvalues :math:`l_i` and eigenvectors
116 |     :math:`\mathbf{r}_i`, where :math:`m` is the numerical rank of the problem.
117 |     This is done by first conducting a Schur decomposition of the symmetric
118 |     positive matrix :math:`\mathbf{C}_0`, then truncating its spectrum to
119 |     retain only eigenvalues that are numerically greater than zero, then using
120 |     this decomposition to define an ordinary eigenvalue Problem for
121 |     :math:`\mathbf{C}_t` of size :math:`m`, and then solving this eigenvalue
122 |     problem.
123 | 
124 |     Parameters
125 |     ----------
126 |     C0 : ndarray (n,n)
127 |         time-instantaneous correlation matrix. Must be symmetric positive definite
128 |     Ct : ndarray (n,n)
129 |         time-lagged correlation matrix. Must be symmetric
130 |     epsilon : float
131 |         eigenvalue norm cutoff. Eigenvalues of C0 with norms <= epsilon will be
132 |         cut off. The remaining number of Eigenvalues define the size of
133 |         the output.
134 |     method : str
135 |         Method to perform the decomposition of :math:`W` before inverting. Options are:
136 | 
137 |         * 'QR': QR-based robust eigenvalue decomposition of W
138 |         * 'schur': Schur decomposition of W
139 |     sign_maxelement : bool
140 |         If True, re-scale each eigenvector such that its entry with maximal absolute value
141 |         is positive.
142 | 
143 | 
144 |     Returns
145 |     -------
146 |     l : ndarray (m)
147 |         The first m generalized eigenvalues, sorted by descending norm
148 |     R : ndarray (n,m)
149 |         The first m generalized eigenvectors, as a column matrix.
150 | 
151 |     """
152 |     L = spd_inv_split(C0, epsilon=epsilon, method=method)
153 |     Ct_trans = _np.dot(_np.dot(L.T, Ct), L)
154 | 
155 |     # solve the symmetric eigenvalue problem in the new basis
156 |     if _np.allclose(Ct.T, Ct):
157 |         from scipy.linalg import eigh
158 |         l, R_trans = eigh(Ct_trans)
159 |     else:
160 |         from scipy.linalg import eig
161 |         l, R_trans = eig(Ct_trans)
162 | 
163 |     # sort eigenpairs
164 |     l, R_trans = sort_by_norm(l, R_trans)
165 | 
166 |     # transform the eigenvectors back to the old basis
167 |     R = _np.dot(L, R_trans)
168 | 
169 |     # Change signs of eigenvectors:
170 |     if sign_maxelement:
171 |         for j in range(R.shape[1]):
172 |             imax = _np.argmax(_np.abs(R[:, j]))
173 |             R[:, j] *= _np.sign(R[imax, j])
174 | 
175 |     # return result
176 |     return l, R
177 | 


--------------------------------------------------------------------------------
/variational/estimators/tests/benchmark_moments.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | __author__ = 'noe'
  4 | 
  5 | import time
  6 | import numpy as np
  7 | from .. import moments
  8 | 
  9 | def genS(N):
 10 |     """ Generates sparsities given N (number of cols) """
 11 |     S = [10, 90, 100, 500, 900, 1000, 2000, 5000, 7500, 9000, 10000, 20000, 50000, 75000, 90000]  # non-zero
 12 |     return [s for s in S if s <= N]
 13 | 
 14 | 
 15 | def genX(L, N, n_var=None, const=False):
 16 |     X = np.random.rand(L, N)  # random data
 17 |     if n_var is not None:
 18 |         if const:
 19 |             Xsparse = np.ones((L, N))
 20 |         else:
 21 |             Xsparse = np.zeros((L, N))
 22 |         Xsparse[:, :n_var] = X[:, :n_var]
 23 |         X = Xsparse
 24 |     return X
 25 | 
 26 | 
 27 | def genY(L, N, n_var=None, const=False):
 28 |     X = np.random.rand(L, N)  # random data
 29 |     if n_var is not None:
 30 |         if const:
 31 |             Xsparse = -np.ones((L, N))
 32 |         else:
 33 |             Xsparse = np.zeros((L, N))
 34 |         Xsparse[:, :n_var] = X[:, :n_var]
 35 |         X = Xsparse
 36 |     return X
 37 | 
 38 | 
 39 | def reftime_momentsXX(X, remove_mean=False, nrep=3):
 40 |     # time for reference calculation
 41 |     t1 = time.time()
 42 |     for r in range(nrep):
 43 |         s_ref = X.sum(axis=0)  # computation of mean
 44 |         if remove_mean:
 45 |             X = X - s_ref/float(X.shape[0])
 46 |         C_XX_ref = np.dot(X.T, X)  # covariance matrix
 47 |     t2 = time.time()
 48 |     # return mean time
 49 |     return (t2-t1)/float(nrep)
 50 | 
 51 | 
 52 | def mytime_momentsXX(X, remove_mean=False, nrep=3):
 53 |     # time for reference calculation
 54 |     t1 = time.time()
 55 |     for r in range(nrep):
 56 |         w, s, C_XX = moments.moments_XX(X, remove_mean=remove_mean)
 57 |     t2 = time.time()
 58 |     # return mean time
 59 |     return (t2-t1)/float(nrep)
 60 | 
 61 | 
 62 | def reftime_momentsXXXY(X, Y, remove_mean=False, symmetrize=False, nrep=3):
 63 |     # time for reference calculation
 64 |     t1 = time.time()
 65 |     for r in range(nrep):
 66 |         sx = X.sum(axis=0)  # computation of mean
 67 |         sy = Y.sum(axis=0)  # computation of mean
 68 |         if symmetrize:
 69 |             sx = 0.5*(sx + sy)
 70 |             sy = sx
 71 |         if remove_mean:
 72 |             X = X - sx/float(X.shape[0])
 73 |             Y = Y - sy/float(Y.shape[0])
 74 |         if symmetrize:
 75 |             C_XX_ref = np.dot(X.T, X) + np.dot(Y.T, Y)
 76 |             C_XY = np.dot(X.T, Y)
 77 |             C_XY_ref = C_XY + C_XY.T
 78 |         else:
 79 |             C_XX_ref = np.dot(X.T, X)
 80 |             C_XY_ref = np.dot(X.T, Y)
 81 |     t2 = time.time()
 82 |     # return mean time
 83 |     return (t2-t1)/float(nrep)
 84 | 
 85 | 
 86 | def mytime_momentsXXXY(X, Y, remove_mean=False, symmetrize=False, nrep=3):
 87 |     # time for reference calculation
 88 |     t1 = time.time()
 89 |     for r in range(nrep):
 90 |         w, sx, sy, C_XX, C_XY = moments.moments_XXXY(X, Y, remove_mean=remove_mean, symmetrize=symmetrize)
 91 |     t2 = time.time()
 92 |     # return mean time
 93 |     return (t2-t1)/float(nrep)
 94 | 
 95 | 
 96 | def benchmark_moments(L=10000, N=10000, nrep=5, xy=False, remove_mean=False, symmetrize=False, const=False):
 97 |     #S = [10, 100, 1000]
 98 |     S = genS(N)
 99 | 
100 |     # time for reference calculation
101 |     X = genX(L, N)
102 |     if xy:
103 |         Y = genY(L, N)
104 |         reftime = reftime_momentsXXXY(X, Y, remove_mean=remove_mean, symmetrize=symmetrize, nrep=nrep)
105 |     else:
106 |         reftime = reftime_momentsXX(X, remove_mean=remove_mean, nrep=nrep)
107 | 
108 |     # my time
109 |     times = np.zeros(len(S))
110 |     for k, s in enumerate(S):
111 |         X = genX(L, N, n_var=s, const=const)
112 |         if xy:
113 |             Y = genY(L, N, n_var=s, const=const)
114 |             times[k] = mytime_momentsXXXY(X, Y, remove_mean=remove_mean, symmetrize=symmetrize, nrep=nrep)
115 |         else:
116 |             times[k] = mytime_momentsXX(X, remove_mean=remove_mean, nrep=nrep)
117 | 
118 |     # assemble report
119 |     rows = ['L, data points', 'N, dimensions', 'S, nonzeros', 'time trivial', 'time moments_XX', 'speed-up']
120 |     table = np.zeros((6, len(S)))
121 |     table[0, :] = L
122 |     table[1, :] = N
123 |     table[2, :] = S
124 |     table[3, :] = reftime
125 |     table[4, :] = times
126 |     table[5, :] = reftime / times
127 | 
128 |     # print table
129 |     if xy:
130 |         fname = 'moments_XXXY'
131 |     else:
132 |         fname = 'moments_XX'
133 |     print(fname + '\tremove_mean = ' + str(remove_mean) + '\tsym = ' + str(symmetrize) + '\tconst = ' + str(const))
134 |     print(rows[0] + ('\t%i' * table.shape[1])%tuple(table[0]))
135 |     print(rows[1] + ('\t%i' * table.shape[1])%tuple(table[1]))
136 |     print(rows[2] + ('\t%i' * table.shape[1])%tuple(table[2]))
137 |     print(rows[3] + ('\t%.3f' * table.shape[1])%tuple(table[3]))
138 |     print(rows[4] + ('\t%.3f' * table.shape[1])%tuple(table[4]))
139 |     print(rows[5] + ('\t%.3f' * table.shape[1])%tuple(table[5]))
140 |     print()
141 | 
142 | 
143 | def main():
144 |     LNs = [(100000, 100, 10), (10000, 1000, 7), (1000, 2000, 5), (250, 5000, 5), (100, 10000, 5)]
145 |     for L, N, nrep in LNs:
146 |         benchmark_moments(L=L, N=N, nrep=nrep, xy=False, remove_mean=False, symmetrize=False, const=False)
147 |         benchmark_moments(L=L, N=N, nrep=nrep, xy=False, remove_mean=False, symmetrize=False, const=True)
148 |         benchmark_moments(L=L, N=N, nrep=nrep, xy=False, remove_mean=True, symmetrize=False, const=False)
149 |         benchmark_moments(L=L, N=N, nrep=nrep, xy=False, remove_mean=True, symmetrize=False, const=True)
150 |         benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=False, symmetrize=False, const=False)
151 |         benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=False, symmetrize=False, const=True)
152 |         benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=False, symmetrize=True, const=False)
153 |         benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=False, symmetrize=True, const=True)
154 |         benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=True, symmetrize=False, const=False)
155 |         benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=True, symmetrize=False, const=True)
156 |         benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=True, symmetrize=True, const=False)
157 |         benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=True, symmetrize=True, const=True)
158 | 
159 | 
160 | if __name__ == "__main__":
161 |     main()


--------------------------------------------------------------------------------
/variational/estimators/covar_c/covartools.pyx:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | import ctypes
  3 | cimport numpy
  4 | 
  5 | cdef extern from "_covartools.h":
  6 |     void _variable_cols_char(int* cols, char* X, int M, int N, int min_constant)
  7 |     void _variable_cols_int(int* cols, int* X, int M, int N, int min_constant)
  8 |     void _variable_cols_long(int* cols, long* X, int M, int N, int min_constant)
  9 |     void _variable_cols_float(int* cols, float* X, int M, int N, int min_constant)
 10 |     void _variable_cols_double(int* cols, double* X, int M, int N, int min_constant)
 11 |     void _variable_cols_float_approx(int* cols, float* X, int M, int N, float tol, int min_constant)
 12 |     void _variable_cols_double_approx(int* cols, double* X, int M, int N, double tol, int min_constant)
 13 |     void _subtract_row_double(double* X, double* row, int M, int N)
 14 |     void _subtract_row_float(float* X, float* row, int M, int N)
 15 |     void _subtract_row_double_copy(double* X0, double* X, double* row, int M, int N)
 16 |     void _subtract_row_float_copy(float* X0, float* X, float* row, int M, int N)
 17 | 
 18 | 
 19 | # ================================================
 20 | # Check for constant columns
 21 | # ================================================
 22 | 
 23 | def variable_cols_char(cols, X, M, N, min_constant=0):
 24 |     pcols = <int*> numpy.PyArray_DATA(cols)
 25 |     pX = <char*> numpy.PyArray_DATA(X)
 26 |     return _variable_cols_char(pcols, pX, M, N, min_constant)
 27 | 
 28 | def variable_cols_int(cols, X, M, N, min_constant=0):
 29 |     pcols = <int*> numpy.PyArray_DATA(cols)
 30 |     pX = <int*> numpy.PyArray_DATA(X)
 31 |     return _variable_cols_int(pcols, pX, M, N, min_constant)
 32 | 
 33 | def variable_cols_long(cols, X, M, N, min_constant=0):
 34 |     pcols = <int*> numpy.PyArray_DATA(cols)
 35 |     pX = <long*> numpy.PyArray_DATA(X)
 36 |     return _variable_cols_long(pcols, pX, M, N, min_constant)
 37 | 
 38 | def variable_cols_float(cols, X, M, N, tol=0.0, min_constant=0):
 39 |     pcols = <int*> numpy.PyArray_DATA(cols)
 40 |     pX = <float*> numpy.PyArray_DATA(X)
 41 |     if tol == 0.0:
 42 |         return _variable_cols_float(pcols, pX, M, N, min_constant)
 43 |     else:
 44 |         return _variable_cols_float_approx(pcols, pX, M, N, numpy.float32(tol), min_constant)
 45 | 
 46 | def variable_cols_double(cols, X, M, N, tol=0.0, min_constant=0):
 47 |     pcols = <int*> numpy.PyArray_DATA(cols)
 48 |     pX = <double*> numpy.PyArray_DATA(X)
 49 |     if tol == 0.0:
 50 |         return _variable_cols_double(pcols, pX, M, N, min_constant)
 51 |     else:
 52 |         return _variable_cols_double_approx(pcols, pX, M, N, tol, min_constant)
 53 | 
 54 | def variable_cols(X, tol=0, min_constant=0):
 55 |     """ Evaluates which columns are constant (0) or variable (1)
 56 | 
 57 |     Parameters
 58 |     ----------
 59 |     X : ndarray
 60 |         Matrix whose columns will be checked for constant or variable.
 61 |     tol : float
 62 |         Tolerance for float-matrices. When set to 0 only equal columns with
 63 |         values will be considered constant. When set to a positive value,
 64 |         columns where all elements have absolute differences to the first
 65 |         element of that column are considered constant.
 66 |     min_constant : int
 67 |         Minimal number of constant columns to resume operation. If at one
 68 |         point the number of constant columns drops below min_constant, the
 69 |         computation will stop and all columns will be assumed to be variable.
 70 |         In this case, an all-True array will be returned.
 71 | 
 72 |     Returns
 73 |     -------
 74 |     variable : bool-array
 75 |         Array with number of elements equal to the columns. True: column is
 76 |         variable / nonconstant. False: column is constant.
 77 | 
 78 |     """
 79 |     if X is None:
 80 |         return None
 81 |     M, N = X.shape
 82 | 
 83 |     # prepare column array
 84 |     cols = numpy.zeros( (N), dtype=ctypes.c_int, order='C' )
 85 | 
 86 |     if X.dtype == numpy.float64:
 87 |         completed = variable_cols_double(cols, X, M, N, tol=tol, min_constant=min_constant)
 88 |     elif X.dtype == numpy.float32:
 89 |         completed = variable_cols_float(cols, X, M, N, tol=tol, min_constant=min_constant)
 90 |     elif X.dtype == numpy.int32:
 91 |         completed = variable_cols_int(cols, X, M, N, min_constant=min_constant)
 92 |     elif X.dtype == numpy.int64:
 93 |         completed = variable_cols_long(cols, X, M, N, min_constant=min_constant)
 94 |     elif X.dtype == numpy.bool:
 95 |         completed = variable_cols_char(cols, X, M, N, min_constant=min_constant)
 96 |     else:
 97 |         raise TypeError('unsupported type of X: '+str(X.dtype))
 98 | 
 99 |     # if interrupted, return all ones. Otherwise return the variable columns as bool array
100 |     if completed == 0:
101 |         return numpy.ones(cols, dtype=numpy.bool)
102 |     else:
103 |         return numpy.array(cols, dtype=numpy.bool)
104 | 
105 | # ================================================
106 | # Row subtraction
107 | # ================================================
108 | 
109 | def subtract_row_float(X, row, M, N):
110 |     prow = <float*> numpy.PyArray_DATA(row)
111 |     pX = <float*> numpy.PyArray_DATA(X)
112 |     _subtract_row_float(pX, prow, M, N)
113 | 
114 | def subtract_row_double(X, row, M, N):
115 |     prow = <double*> numpy.PyArray_DATA(row)
116 |     pX = <double*> numpy.PyArray_DATA(X)
117 |     _subtract_row_double(pX, prow, M, N)
118 | 
119 | def subtract_row_double_copy(X, row, M, N):
120 |     X0 = numpy.zeros( X.shape, dtype=ctypes.c_double, order='C' )
121 |     pX0 = <double*> numpy.PyArray_DATA(X0)
122 |     pX = <double*> numpy.PyArray_DATA(X)
123 |     prow = <double*> numpy.PyArray_DATA(row)
124 |     _subtract_row_double_copy(pX0, pX, prow, M, N)
125 |     return X0
126 | 
127 | def subtract_row_float_copy(X, row, M, N):
128 |     X0 = numpy.zeros( X.shape, dtype=ctypes.c_double, order='C' )
129 |     pX0 = <float*> numpy.PyArray_DATA(X0)
130 |     pX = <float*> numpy.PyArray_DATA(X)
131 |     prow = <float*> numpy.PyArray_DATA(row)
132 |     _subtract_row_float_copy(pX0, pX, prow, M, N)
133 |     return X0
134 | 
135 | 
136 | def subtract_row(X, row, inplace=False):
137 |     """ Subtracts given row from each row of array
138 | 
139 |     Parameters
140 |     ----------
141 |     X : ndarray (M, N)
142 |         Matrix whose rows will be shifted.
143 |     row : ndarray (N)
144 |         Row vector that will be subtracted from each row of X.
145 |     inplace : bool
146 |         True: X will be changed. False: A copy of X will be created and X will remain unchanged.
147 | 
148 |     Returns
149 |     -------
150 |     X0 : ndarray (M, N)
151 |         The row-shifted data
152 | 
153 |     """
154 |     M, N = X.shape
155 | 
156 |     if X.dtype == numpy.float64 and row.dtype == numpy.float64:
157 |         if inplace:
158 |             subtract_row_double(X, row, M, N)
159 |         else:
160 |             X = subtract_row_double_copy(X, row, M, N)
161 |     elif X.dtype == numpy.float32 and row.dtype == numpy.float32:
162 |         if inplace:
163 |             subtract_row_float(X, row, M, N)
164 |         else:
165 |             X = subtract_row_float_copy(X, row, M, N)
166 |     else:
167 |         raise TypeError('unsupported or inconsistent types: '+str(X.dtype)+' '+str(row.dtype))
168 | 
169 |     return X


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | """ Variational Approach for conformation dynamics (VAC)
  2 | 
  3 | This package contains basis sets, estimators and solvers for the variational approach for
  4 | conformation dynamics, a theory that has been proposed in [1] and was further developed in
  5 | [2] and [3]. The variational approach is analogous to the Ritz method [4] that is
  6 | employed in computational quantum chemistry. It differs in the way how the involved
  7 | matrices are computed and in the meaning of the involved operators, eigenfunctions and
  8 | eigenvalues - see [3] for a comparison.
  9 | 
 10 | Roughly, the idea of the VAC is as follows: Given a (classical)
 11 | molecular dynamics trajectory with configurations {x_1, ..., x_T}, and a
 12 | set of basis functions defined on the space of configurations {chi_1(x), ..., chi_n(x)},
 13 | we compute the two correlation matrices:
 14 | 
 15 | c_ij (0)   = < chi_i(x_t) chi_j(x_t) >_t
 16 | c_ij (tau) = < chi_i(x_t) chi_j(x_t+tau) >_t
 17 | 
 18 | where < . >_t is average over time t. Of course this can be generalized to many trajectories.
 19 | Then we solve the generalized eigenvalue problem
 20 | 
 21 | C(tau) r = C(0) r l(tau)
 22 | 
 23 | where the eigenvalues l(tau) approximate the dominant eigenvalues of the Markov propagator
 24 | or Markov backward propagator of the underlying dynamics. The corresponding eigenfunction
 25 | of the backward propagator is approximated by
 26 | 
 27 | psi(x) = sum_i r_i chi_i(x)
 28 | 
 29 | Package functionalities
 30 | -----------------------
 31 | 
 32 | This package aims at providing code to help addressing a number of key problems:
 33 | 
 34 | 1. Basis sets for molecular dynamics (MD), and in particular protein dynamics. See [5] for a
 35 |    first approach in this direction.
 36 | 
 37 | 2. Estimators for the corration matrices C(0), C(tau). The trivial time-average that is usually
 38 |    employed has a number of problems especially for many short simulation trajectories that are
 39 |    initiated far from the equilibrium distribution (the usual case!).
 40 | 
 41 | 3. Solvers for accurately solving the eigenvalue problem above, even for huge basis sets.
 42 | 
 43 | At this time only a few of the above functionalities are implemented and we will go step by step.
 44 | This package will undergo heavy development and there is currently no date for an official
 45 | release, so don't be surprised if the API (the look + feel of functions and classes) change.
 46 | At the moment this package is purely intended for development purposes, so use it at your own
 47 | risk.
 48 | 
 49 | Applications
 50 | ------------
 51 | 1. The time-lagged independent component analysis (TICA) method originally developed in [6] and
 52 |    proposed as an optimal data transformation method for building Markov state models of MD
 53 |    in [3,7] is a VAC with mean-free basis functions. Therefore you can easily implement TICA with
 54 |    this package.
 55 | 
 56 | 2. By transforming the internal coordinates such as torsion angles or interatomic distances into
 57 |    suitable basis functions, you can approximate experimentally-measurable relaxation timescales
 58 |    and determine the corresponding structural rearrangements for peptides and proteins [2,5]
 59 | 
 60 | 3. ... more will follow.
 61 | 
 62 | References
 63 | ----------
 64 | [1] Noe, F. and Nueske, F. (2013): A variational approach to modeling slow processes
 65 |     in stochastic dynamical systems. SIAM Multiscale Model. Simul. 11, 635-655.
 66 | 
 67 | [2] Nueske, F., Keller, B., Perez-Hernandez, G., Mey, A.S.J.S. and Noe, F. (2014)
 68 |     Variational Approach to Molecular Kinetics. J. Chem. Theory Comput. 10, 1739-1752.
 69 | 
 70 | [3] Perez-Hernandez, G., Paul, F., Giorgino, T., De Fabritiis, G. and Noe, F. (2013)
 71 |     Identification of slow molecular order parameters for Markov model construction.
 72 |     J. Chem. Phys. 139, 015102.
 73 | 
 74 | [4] Ritz, W. (1909): Ueber eine neue Methode zur Loesung gewisser
 75 |     Variationsprobleme der mathematischen Physik. J. Reine Angew. Math., 135, 1-61.
 76 | 
 77 | [5] Vitalini, F., Noe, F. and Keller, B. (2015): A basis set for peptides for the
 78 |     variational approach to conformational kinetics. (In review).
 79 | 
 80 | [6] Molgedey, L. and Schuster H. G. (1994): Phys. Rev. Lett. 72, 3634.
 81 | 
 82 | [7] Schwantes, C. R. and Pande, V. S. (2000): J. Chem. Theory Comput. 9, 2000
 83 | 
 84 | """
 85 | from __future__ import print_function
 86 | import os
 87 | import versioneer
 88 | from setuptools import setup, Extension, find_packages
 89 | from os.path import relpath, join
 90 | 
 91 | DOCLINES = __doc__.split("\n")
 92 | 
 93 | CLASSIFIERS = """\
 94 | Development Status :: 3 - Alpha
 95 | Intended Audience :: Science/Research
 96 | Intended Audience :: Developers
 97 | License :: OSI Approved :: Open BSD clause 2 (OpenBSD)
 98 | Programming Language :: Python
 99 | Topic :: Scientific/Engineering :: Bio-Informatics
100 | Topic :: Scientific/Engineering :: Chemistry
101 | Topic :: Scientific/Engineering :: Physics
102 | Operating System :: Microsoft :: Windows
103 | Operating System :: POSIX
104 | Operating System :: Unix
105 | Operating System :: MacOS
106 | """
107 | 
108 | ################################################################################
109 | # USEFUL SUBROUTINES
110 | ################################################################################
111 | 
112 | def find_package_data(data_root, package_root):
113 |     files = []
114 |     for root, dirnames, filenames in os.walk(data_root):
115 |         for fn in filenames:
116 |             files.append(relpath(join(root, fn), package_root))
117 |     return files
118 | 
119 | ################################################################################
120 | # EXTENSIONS
121 | ################################################################################
122 | 
123 | def extensions():
124 |     from numpy import get_include as np_inc
125 |     from scipy import get_include as sc_inc
126 |     np_inc = np_inc()
127 |     sc_inc = sc_inc()
128 |     from Cython.Build import cythonize
129 |     exts = [Extension('variational.estimators.covar_c.covartools',
130 |                          sources = ['./variational/estimators/covar_c/covartools.pyx',
131 |                                     './variational/estimators/covar_c/_covartools.c'],
132 |                          include_dirs = ['./variational/estimators/covar_c/', np_inc],
133 |                          extra_compile_args=['-std=c99','-O3']),
134 |             Extension('variational.solvers.eig_qr.eig_qr',
135 |                         sources=['./variational/solvers/eig_qr/eig_qr.pyx'],
136 |                         include_dirs=['./variational/solvers/eig_qr/', np_inc, sc_inc],
137 |                         extra_compile_args=['-std=c99','-O3'])
138 |                ]
139 |     return cythonize(exts)
140 | 
141 | 
142 | class lazy_cythonize(list):
143 |     """evaluates extension list lazyly.
144 |     pattern taken from http://tinyurl.com/qb8478q"""
145 |     def __init__(self, callback):
146 |         self._list, self.callback = None, callback
147 |     def c_list(self):
148 |         if self._list is None: self._list = self.callback()
149 |         return self._list
150 |     def __iter__(self):
151 |         for e in self.c_list(): yield e
152 |     def __getitem__(self, ii): return self.c_list()[ii]
153 |     def __len__(self): return len(self.c_list())
154 | 
155 | ################################################################################
156 | # SETUP
157 | ################################################################################
158 | 
159 | metadata=dict(
160 |     name = 'variational',
161 |     author = 'Frank Noe, Fabian Paul and Feliks Nueske',
162 |     author_email = 'frank.noe@fu-berlin.de',
163 |     description = DOCLINES[0],
164 |     long_description = "\n".join(DOCLINES[2:]),
165 |     version=versioneer.get_version(),
166 |     cmdclass=versioneer.get_cmdclass(),
167 |     license='OpenBSD',
168 |     url='https://github.com/markovmodel/variational',
169 |     platforms=['Linux', 'Mac OS-X', 'Unix', 'Windows'],
170 |     classifiers=CLASSIFIERS.splitlines(),
171 |     #package_dir={'variational': 'variational'},
172 |     packages=find_packages(),
173 |     package_data={'variational.basisset':['ResiduesEigenvectors/*']
174 |                   },
175 |     zip_safe=False,
176 |     install_requires=[
177 |         'numpy',
178 |         'scipy',
179 |         'six',
180 |         ],
181 |     setup_requires=[
182 |         'cython>=0.24',
183 |         'numpy',
184 |         ],
185 |     ext_modules=lazy_cythonize(extensions),
186 |     )
187 | 
188 | setup(**metadata)
189 | 


--------------------------------------------------------------------------------
/docs/Interface.lyx:
--------------------------------------------------------------------------------
  1 | #LyX 2.1 created this file. For more info see http://www.lyx.org/
  2 | \lyxformat 474
  3 | \begin_document
  4 | \begin_header
  5 | \textclass article
  6 | \use_default_options true
  7 | \maintain_unincluded_children false
  8 | \language english
  9 | \language_package default
 10 | \inputencoding auto
 11 | \fontencoding global
 12 | \font_roman default
 13 | \font_sans default
 14 | \font_typewriter default
 15 | \font_math auto
 16 | \font_default_family default
 17 | \use_non_tex_fonts false
 18 | \font_sc false
 19 | \font_osf false
 20 | \font_sf_scale 100
 21 | \font_tt_scale 100
 22 | \graphics default
 23 | \default_output_format default
 24 | \output_sync 0
 25 | \bibtex_command default
 26 | \index_command default
 27 | \paperfontsize default
 28 | \use_hyperref false
 29 | \papersize default
 30 | \use_geometry false
 31 | \use_package amsmath 1
 32 | \use_package amssymb 1
 33 | \use_package cancel 1
 34 | \use_package esint 1
 35 | \use_package mathdots 1
 36 | \use_package mathtools 1
 37 | \use_package mhchem 1
 38 | \use_package stackrel 1
 39 | \use_package stmaryrd 1
 40 | \use_package undertilde 1
 41 | \cite_engine basic
 42 | \cite_engine_type default
 43 | \biblio_style plain
 44 | \use_bibtopic false
 45 | \use_indices false
 46 | \paperorientation portrait
 47 | \suppress_date false
 48 | \justification true
 49 | \use_refstyle 1
 50 | \index Index
 51 | \shortcut idx
 52 | \color #008000
 53 | \end_index
 54 | \secnumdepth 3
 55 | \tocdepth 3
 56 | \paragraph_separation indent
 57 | \paragraph_indentation default
 58 | \quotes_language english
 59 | \papercolumns 1
 60 | \papersides 1
 61 | \paperpagestyle default
 62 | \tracking_changes false
 63 | \output_changes false
 64 | \html_math_output 0
 65 | \html_css_as_file 0
 66 | \html_be_strict false
 67 | \end_header
 68 | 
 69 | \begin_body
 70 | 
 71 | \begin_layout Title
 72 | Interface for Variational Package
 73 | \end_layout
 74 | 
 75 | \begin_layout Standard
 76 | Here, we briefly sketch the interface for all functions to appear in the
 77 |  variational package.
 78 |  The package consists of three main modules: A library of basis sets, estimators
 79 |  for the correlation matrices, and a solver for the resulting generalized
 80 |  eigenvalue problem.
 81 | \end_layout
 82 | 
 83 | \begin_layout Enumerate
 84 | The basis sets library contains functions to evaluate specific classes of
 85 |  basis functions.
 86 |  Examples for these classes are Gaussian basis functions, Fourier waves
 87 |  defined on angles, or the MSM-eigenvector based functions and their products.
 88 |  The general interface is given by the function SomeBasisSet below.
 89 |  
 90 | \end_layout
 91 | 
 92 | \begin_layout Enumerate
 93 | The estimator module contains a function that generates the correlation-matrices
 94 |  
 95 | \begin_inset Formula $\mathbf{C}^{\tau},\,\mathbf{C}^{0}$
 96 | \end_inset
 97 | 
 98 |  from the basis function trajectories generated in the first step.
 99 | \end_layout
100 | 
101 | \begin_layout Enumerate
102 | The solver module contains a function the solve the generalized eigenvalue
103 |  problem for the correlation matrices generated before.
104 |  We will just use the function eig_corr implemented in pyemma.util.linalg.
105 | \end_layout
106 | 
107 | \begin_layout Standard
108 | Below we describe the interfaces for these three modules.
109 | \end_layout
110 | 
111 | \begin_layout Standard
112 | \begin_inset listings
113 | lstparams "language=Python,float,breaklines=true,tabsize=4"
114 | inline false
115 | status open
116 | 
117 | \begin_layout Plain Layout
118 | 
119 | def SomeBasisSet(list_of_trajectories, prefix, parameters):
120 | \end_layout
121 | 
122 | \begin_layout Plain Layout
123 | 
124 | 	"""
125 | \end_layout
126 | 
127 | \begin_layout Plain Layout
128 | 
129 | 	Parameters
130 | \end_layout
131 | 
132 | \begin_layout Plain Layout
133 | 
134 | 	----------
135 | \end_layout
136 | 
137 | \begin_layout Plain Layout
138 | 
139 | 	list_of_trajectories: list
140 | \end_layout
141 | 
142 | \begin_layout Plain Layout
143 | 
144 | 		List of .npy-files.
145 |  Each file contains a feature trajectory, represented as an np-array of
146 |  shape (T,N), where T is the number of time-steps in this trajectory and
147 |  N is the number of features (distances, angles,...) on which the basis set
148 |  is defined.
149 | \end_layout
150 | 
151 | \begin_layout Plain Layout
152 | 
153 | 	prefix: string
154 | \end_layout
155 | 
156 | \begin_layout Plain Layout
157 | 
158 | 		Common prefix for all files to be produced (see Output).
159 | \end_layout
160 | 
161 | \begin_layout Plain Layout
162 | 
163 | 	parameters:
164 | \end_layout
165 | 
166 | \begin_layout Plain Layout
167 | 
168 | 		Additional parameters needed for this basis set.
169 | \end_layout
170 | 
171 | \begin_layout Plain Layout
172 | 
173 | \end_layout
174 | 
175 | \begin_layout Plain Layout
176 | 
177 | 	Returns
178 | \end_layout
179 | 
180 | \begin_layout Plain Layout
181 | 
182 | 	-------
183 | \end_layout
184 | 
185 | \begin_layout Plain Layout
186 | 
187 | 		Returns a list of lists of filenames where the evaluations of all requested
188 |  basis functions can be found.
189 |  The files will be called "prefix_trajnum_fctnum.npy", where trajnum is the
190 |  trajectory number and ftcnum is the number of the basis function.
191 |  Each sublist contains the files for one trajectory.
192 | \end_layout
193 | 
194 | \begin_layout Plain Layout
195 | 
196 | 	'''
197 | \end_layout
198 | 
199 | \end_inset
200 | 
201 | 
202 | \end_layout
203 | 
204 | \begin_layout Standard
205 | \begin_inset listings
206 | lstparams "language=Python,float,breaklines=true,tabsize=4"
207 | inline false
208 | status open
209 | 
210 | \begin_layout Plain Layout
211 | 
212 | def Estimator(list_of_trajectories, list_of_taus):
213 | \end_layout
214 | 
215 | \begin_layout Plain Layout
216 | 
217 | 	"""
218 | \end_layout
219 | 
220 | \begin_layout Plain Layout
221 | 
222 | 	Parameters:
223 | \end_layout
224 | 
225 | \begin_layout Plain Layout
226 | 
227 | 	-----------
228 | \end_layout
229 | 
230 | \begin_layout Plain Layout
231 | 
232 | 	list_of_trajectories: list
233 | \end_layout
234 | 
235 | \begin_layout Plain Layout
236 | 
237 | 		List of list of .npy-files, organized the same way as the output of a basis
238 |  set function.
239 |  Each sublist contains the files for all basis functions for one specific
240 |  trajectory.
241 | \end_layout
242 | 
243 | \begin_layout Plain Layout
244 | 
245 | 	list_of_taus: ndarray (ntau,)
246 | \end_layout
247 | 
248 | \begin_layout Plain Layout
249 | 
250 | 		The lag-times for which the correlation matrices will be computed.
251 | \end_layout
252 | 
253 | \begin_layout Plain Layout
254 | 
255 | \end_layout
256 | 
257 | \begin_layout Plain Layout
258 | 
259 | 	Returns
260 | \end_layout
261 | 
262 | \begin_layout Plain Layout
263 | 
264 | 	-------
265 | \end_layout
266 | 
267 | \begin_layout Plain Layout
268 | 
269 | 	list of correlation matrices.
270 | \end_layout
271 | 
272 | \begin_layout Plain Layout
273 | 
274 | 	"""
275 | \end_layout
276 | 
277 | \end_inset
278 | 
279 | 
280 | \end_layout
281 | 
282 | \begin_layout Standard
283 | \begin_inset listings
284 | lstparams "language=Python,float,breaklines=true,tabsize=4"
285 | inline false
286 | status open
287 | 
288 | \begin_layout Plain Layout
289 | 
290 | def eig_corr(C0, Ct, epsilon=1e-6):
291 | \end_layout
292 | 
293 | \begin_layout Plain Layout
294 | 
295 |     """     Solve the generalized eigenvalues problem with correlation matrices
296 |  C0 and Ct     
297 | \end_layout
298 | 
299 | \begin_layout Plain Layout
300 | 
301 | 	Parameters
302 | \end_layout
303 | 
304 | \begin_layout Plain Layout
305 | 
306 |     ----------
307 | \end_layout
308 | 
309 | \begin_layout Plain Layout
310 | 
311 |     C0 : ndarray (n,n)
312 | \end_layout
313 | 
314 | \begin_layout Plain Layout
315 | 
316 |         time-instantaneous correlation matrix.
317 |  Must be symmetric positive definite
318 | \end_layout
319 | 
320 | \begin_layout Plain Layout
321 | 
322 |     Ct : ndarray (n,n)
323 | \end_layout
324 | 
325 | \begin_layout Plain Layout
326 | 
327 |         time-lagged correlation matrix.
328 |  Must be symmetric
329 | \end_layout
330 | 
331 | \begin_layout Plain Layout
332 | 
333 |     epsilon : float
334 | \end_layout
335 | 
336 | \begin_layout Plain Layout
337 | 
338 |         eigenvalue norm cutoff.
339 |  Eigenvalues of C0 with norms <= epsilon will be cut off.
340 |  The remaining number of Eigenvalues define the size of         the output.
341 | \end_layout
342 | 
343 | \begin_layout Plain Layout
344 | 
345 |     Returns
346 | \end_layout
347 | 
348 | \begin_layout Plain Layout
349 | 
350 |     -------
351 | \end_layout
352 | 
353 | \begin_layout Plain Layout
354 | 
355 |     l : ndarray (m)
356 | \end_layout
357 | 
358 | \begin_layout Plain Layout
359 | 
360 |         The first m generalized eigenvalues, sorted by descending norm
361 | \end_layout
362 | 
363 | \begin_layout Plain Layout
364 | 
365 |     R : ndarray (n,m)
366 | \end_layout
367 | 
368 | \begin_layout Plain Layout
369 | 
370 |         The first m generalized eigenvectors, as a column matrix.
371 | \end_layout
372 | 
373 | \begin_layout Plain Layout
374 | 
375 |     """
376 | \end_layout
377 | 
378 | \end_inset
379 | 
380 | 
381 | \end_layout
382 | 
383 | \end_body
384 | \end_document
385 | 


--------------------------------------------------------------------------------
/variational/estimators/tests/test_running_moments.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import unittest
  3 | import numpy as np
  4 | from .. import running_moments
  5 | 
  6 | __author__ = 'noe'
  7 | 
  8 | 
  9 | class TestRunningMoments(unittest.TestCase):
 10 | 
 11 |     @classmethod
 12 |     def setUpClass(cls):
 13 |         cls.X = np.random.rand(10000, 2)
 14 |         cls.Y = np.random.rand(10000, 2)
 15 |         cls.T = cls.X.shape[0]
 16 |         # Chunk size:
 17 |         cls.L = 1000
 18 |         # Number of chunks:
 19 |         cls.nchunks = cls.T / cls.L
 20 |         # Set a lag time for time-lagged tests:
 21 |         #cls.lag = 50
 22 |         # Weights references:
 23 |         cls.weights = np.random.rand(10000)
 24 |         # Trajectory weights:
 25 |         cls.trajweights = 3*np.random.rand(cls.nchunks)
 26 |         # bias the first part
 27 |         cls.X[:2000] += 1.0
 28 |         cls.Y[:2000] -= 1.0
 29 |         # direct calculation, moments of X and Y
 30 |         cls.w = np.shape(cls.X)[0]
 31 |         cls.wsym = 2*np.shape(cls.X)[0]
 32 |         cls.sx = cls.X.sum(axis=0)
 33 |         cls.sy = cls.Y.sum(axis=0)
 34 |         cls.Mxx = np.dot(cls.X.T, cls.X)
 35 |         cls.Mxy = np.dot(cls.X.T, cls.Y)
 36 |         cls.Myy = np.dot(cls.Y.T, cls.Y)
 37 |         cls.mx = cls.sx / float(cls.w)
 38 |         cls.my = cls.sy / float(cls.w)
 39 |         cls.X0 = cls.X - cls.mx
 40 |         cls.Y0 = cls.Y - cls.my
 41 |         cls.Mxx0 = np.dot(cls.X0.T, cls.X0)
 42 |         cls.Mxy0 = np.dot(cls.X0.T, cls.Y0)
 43 |         cls.Myy0 = np.dot(cls.Y0.T, cls.Y0)
 44 | 
 45 |         # direct calculation, symmetric moments
 46 |         cls.s_sym = cls.sx + cls.sy
 47 |         cls.Mxx_sym = np.dot(cls.X.T, cls.X) + np.dot(cls.Y.T, cls.Y)
 48 |         cls.Mxy_sym = np.dot(cls.X.T, cls.Y) + np.dot(cls.Y.T, cls.X)
 49 |         cls.m_sym = cls.s_sym / float(cls.wsym)
 50 |         cls.X0_sym = cls.X - cls.m_sym
 51 |         cls.Y0_sym = cls.Y - cls.m_sym
 52 |         cls.Mxx0_sym = np.dot(cls.X0_sym.T, cls.X0_sym) + np.dot(cls.Y0_sym.T, cls.Y0_sym)
 53 |         cls.Mxy0_sym = np.dot(cls.X0_sym.T, cls.Y0_sym) + np.dot(cls.Y0_sym.T, cls.X0_sym)
 54 | 
 55 |         # direct calculation, weighted moments:
 56 |         cls.wesum = np.sum(cls.weights)
 57 |         cls.sx_w = (cls.weights[:, None] * cls.X).sum(axis=0)
 58 |         cls.sy_w = (cls.weights[:, None] * cls.Y).sum(axis=0)
 59 |         cls.Mxx_w = np.dot((cls.weights[:, None] * cls.X).T, cls.X)
 60 |         cls.Mxy_w = np.dot((cls.weights[:, None] * cls.X).T, cls.Y)
 61 |         cls.mx_w = cls.sx_w / float(cls.wesum)
 62 |         cls.my_w = cls.sy_w / float(cls.wesum)
 63 |         cls.X0_w = cls.X - cls.mx_w
 64 |         cls.Y0_w = cls.Y - cls.my_w
 65 |         cls.Mxx0_w = np.dot((cls.weights[:, None] * cls.X0_w).T, cls.X0_w)
 66 |         cls.Mxy0_w = np.dot((cls.weights[:, None] * cls.X0_w).T, cls.Y0_w)
 67 |         # direct calculation, weighted symmetric moments
 68 |         cls.s_sym_w = cls.sx_w + cls.sy_w
 69 |         cls.Mxx_sym_w = np.dot((cls.weights[:, None] * cls.X).T, cls.X) + np.dot((cls.weights[:, None] * cls.Y).T, cls.Y)
 70 |         cls.Mxy_sym_w = np.dot((cls.weights[:, None] * cls.X).T, cls.Y) + np.dot((cls.weights[:, None] * cls.Y).T, cls.X)
 71 |         cls.m_sym_w = cls.s_sym_w / float(2 * cls.wesum)
 72 |         cls.X0_sym_w = cls.X - cls.m_sym_w
 73 |         cls.Y0_sym_w = cls.Y - cls.m_sym_w
 74 |         cls.Mxx0_sym_w = np.dot((cls.weights[:, None] *cls.X0_sym_w).T, cls.X0_sym_w) + np.dot((cls.weights[:, None] *cls.Y0_sym_w).T, cls.Y0_sym_w)
 75 |         cls.Mxy0_sym_w = np.dot((cls.weights[:, None] *cls.X0_sym_w).T, cls.Y0_sym_w) + np.dot((cls.weights[:, None] *cls.Y0_sym_w).T, cls.X0_sym_w)
 76 | 
 77 |         return cls
 78 | 
 79 |     def test_XX_withmean(self):
 80 |         # many passes
 81 |         cc = running_moments.RunningCovar(remove_mean=False)
 82 |         for i in range(0, self.T, self.L):
 83 |             cc.add(self.X[i:i+self.L])
 84 |         assert np.allclose(cc.weight_XX(), self.T)
 85 |         assert np.allclose(cc.sum_X(), self.sx)
 86 |         assert np.allclose(cc.moments_XX(), self.Mxx)
 87 | 
 88 |     def test_XX_meanfree(self):
 89 |         # many passes
 90 |         cc = running_moments.RunningCovar(remove_mean=True)
 91 |         for i in range(0, self.T, self.L):
 92 |             cc.add(self.X[i:i+self.L])
 93 |         assert np.allclose(cc.weight_XX(), self.T)
 94 |         assert np.allclose(cc.sum_X(), self.sx)
 95 |         assert np.allclose(cc.moments_XX(), self.Mxx0)
 96 | 
 97 |     def test_XXXY_withmean(self):
 98 |         # many passes
 99 |         cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=False)
100 |         for i in range(0, self.T, self.L):
101 |             cc.add(self.X[i:i+self.L], self.Y[i:i+self.L])
102 |         assert np.allclose(cc.weight_XY(), self.T)
103 |         assert np.allclose(cc.sum_X(), self.sx)
104 |         assert np.allclose(cc.moments_XX(), self.Mxx)
105 |         assert np.allclose(cc.moments_XY(), self.Mxy)
106 | 
107 |     def test_XXXY_meanfree(self):
108 |         # many passes
109 |         cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=True)
110 |         L = 1000
111 |         for i in range(0, self.X.shape[0], L):
112 |             cc.add(self.X[i:i+L], self.Y[i:i+L])
113 |         assert np.allclose(cc.weight_XY(), self.T)
114 |         assert np.allclose(cc.sum_X(), self.sx)
115 |         assert np.allclose(cc.moments_XX(), self.Mxx0)
116 |         assert np.allclose(cc.moments_XY(), self.Mxy0)
117 | 
118 |     def test_XXXY_weighted_withmean(self):
119 |         # many passes
120 |         cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=False)
121 |         for i in range(0, self.T, self.L):
122 |             iX = self.X[i:i+self.L, :]
123 |             iY = self.Y[i:i+self.L, :]
124 |             iwe = self.weights[i:i+self.L]
125 |             cc.add(iX, iY, weights=iwe)
126 |         assert np.allclose(cc.weight_XY(), self.wesum)
127 |         assert np.allclose(cc.sum_X(), self.sx_w)
128 |         assert np.allclose(cc.moments_XX(), self.Mxx_w)
129 |         assert np.allclose(cc.moments_XY(), self.Mxy_w)
130 | 
131 |     def test_XXXY_weighted_meanfree(self):
132 |         # many passes
133 |         cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=True)
134 |         for i in range(0, self.T, self.L):
135 |             iX = self.X[i:i+self.L, :]
136 |             iY = self.Y[i:i+self.L, :]
137 |             iwe = self.weights[i:i+self.L]
138 |             cc.add(iX, iY, weights=iwe)
139 |         assert np.allclose(cc.weight_XY(), self.wesum)
140 |         assert np.allclose(cc.sum_X(), self.sx_w)
141 |         assert np.allclose(cc.moments_XX(), self.Mxx0_w)
142 |         assert np.allclose(cc.moments_XY(), self.Mxy0_w)
143 | 
144 |     def test_XXXY_sym_withmean(self):
145 |         # many passes
146 |         cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=False, symmetrize=True)
147 |         for i in range(0, self.T, self.L):
148 |             cc.add(self.X[i:i+self.L], self.Y[i:i+self.L])
149 |         assert np.allclose(cc.weight_XY(), 2*self.T)
150 |         assert np.allclose(cc.sum_X(), self.s_sym)
151 |         assert np.allclose(cc.moments_XX(), self.Mxx_sym)
152 |         assert np.allclose(cc.moments_XY(), self.Mxy_sym)
153 | 
154 |     def test_XXXY_sym_meanfree(self):
155 |         # many passes
156 |         cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=True, symmetrize=True)
157 |         for i in range(0, self.T, self.L):
158 |             cc.add(self.X[i:i+self.L], self.Y[i:i+self.L])
159 |         assert np.allclose(cc.weight_XY(), 2*self.T)
160 |         assert np.allclose(cc.sum_X(), self.s_sym)
161 |         assert np.allclose(cc.moments_XX(), self.Mxx0_sym)
162 |         assert np.allclose(cc.moments_XY(), self.Mxy0_sym)
163 | 
164 |     def test_XXXY_weighted_sym_withmean(self):
165 |         # many passes
166 |         cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=False, symmetrize=True)
167 |         for i in range(0, self.T, self.L):
168 |             iwe = self.weights[i:i+self.L]
169 |             cc.add(self.X[i:i+self.L], self.Y[i:i+self.L], weights=iwe)
170 |         assert np.allclose(cc.weight_XY(), 2 * self.wesum)
171 |         assert np.allclose(cc.sum_X(), self.s_sym_w)
172 |         assert np.allclose(cc.moments_XX(), self.Mxx_sym_w)
173 |         assert np.allclose(cc.moments_XY(), self.Mxy_sym_w)
174 | 
175 |     def test_XXXY_weighted_sym_meanfree(self):
176 |         # many passes
177 |         cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=True, symmetrize=True)
178 |         for i in range(0, self.T, self.L):
179 |             iwe = self.weights[i:i+self.L]
180 |             cc.add(self.X[i:i+self.L], self.Y[i:i+self.L], weights=iwe)
181 |         assert np.allclose(cc.weight_XY(), 2*self.wesum)
182 |         assert np.allclose(cc.sum_X(), self.s_sym_w)
183 |         assert np.allclose(cc.moments_XX(), self.Mxx0_sym_w)
184 |         assert np.allclose(cc.moments_XY(), self.Mxy0_sym_w)
185 | 
186 | if __name__ == "__main__":
187 |     unittest.main()


--------------------------------------------------------------------------------
/variational/estimators/covar_c/_covartools.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <stdio.h>
  3 | 
  4 | /** Subtracts given row vector from each row of the matrix X
  5 | 
  6 | @param X : (M, N) array
  7 | @param row : (N) array
  8 | @param M : int
  9 | @param N : int
 10 | 
 11 | */
 12 | void _subtract_row_double(double* X, double* row, int M, int N)
 13 | {
 14 |         int i, j, ro;
 15 |         for (i=0; i!=M; ++i)
 16 |         {
 17 |                 ro = i*N;
 18 |                 for (j=0; j!=N; ++j)
 19 |                 {
 20 |                         X[ro + j] -= row[j];
 21 |                 }
 22 |         }
 23 | }
 24 | 
 25 | /** see above */
 26 | void _subtract_row_float(float* X, float* row, int M, int N)
 27 | {
 28 |         int i, j, ro;
 29 |         for (i=0; i!=M; ++i)
 30 |         {
 31 |                 ro = i*N;
 32 |                 for (j=0; j!=N; ++j)
 33 |                 {
 34 |                         X[ro + j] -= row[j];
 35 |                 }
 36 |         }
 37 | }
 38 | 
 39 | void _subtract_row_double_copy(double* X0, double* X, double* row, int M, int N)
 40 | {
 41 |         int i, j, ro;
 42 |         for (i=0; i!=M; ++i)
 43 |         {
 44 |                 ro = i*N;
 45 |                 for (j=0; j!=N; ++j)
 46 |                 {
 47 |                         X0[ro + j] = X[ro + j] - row[j];
 48 |                 }
 49 |         }
 50 | }
 51 | 
 52 | void _subtract_row_float_copy(float* X0, float* X, float* row, int M, int N)
 53 | {
 54 |         int i, j, ro;
 55 |         for (i=0; i!=M; ++i)
 56 |         {
 57 |                 ro = i*N;
 58 |                 for (j=0; j!=N; ++j)
 59 |                 {
 60 |                         X0[ro + j] = X[ro + j] - row[j];
 61 |                 }
 62 |         }
 63 | }
 64 | 
 65 | 
 66 | int* _bool_to_list(int* b, int N, int nnz)
 67 | {
 68 |         int i;
 69 |         int k=0;
 70 |         int* list = (int*)malloc(nnz*sizeof(int));
 71 |         for (i=0; i<N; i++)
 72 |                 if (b[i] == 1)
 73 |                         list[k++] = i;
 74 |         return (list);
 75 | }
 76 | 
 77 | /** Checks each column whether it is constant in the rows or not
 78 | 
 79 | @param cols : (N) result array that will be filled with 0 (column constant) or 1 (column variable)
 80 | @param X : (M, N) array
 81 | @param M : int
 82 | @param N : int
 83 | 
 84 | */
 85 | int _variable_cols_char(int* cols, char* X, int M, int N, int min_constant)
 86 | {
 87 |         // compare first and last row to get constant candidates
 88 |         int i,j;
 89 |         int ro = (M-1)*N;
 90 |         int nconstant = N;  // current number of constant columns
 91 | 
 92 |         // by default all 0 (constant)
 93 |         for (j=0; j<N; j++)
 94 |                 cols[j] = 0;
 95 | 
 96 |         // go through all rows in order to confirm constant candidates
 97 |         for (i=0; i<M; i++)
 98 |         {
 99 |                 ro = i*N;
100 |                 for (j=0; j<N; j++)
101 |                 {
102 |                         if (X[j] != X[ro+j])
103 |                         {
104 |                                 if (cols[j] == 0)
105 |                                 {
106 |                                         cols[j] = 1;
107 |                                         nconstant--;
108 |                                         // are constant columns below threshold? Then interrupt.
109 |                                         if (nconstant < min_constant)
110 |                                                 return 0;
111 |                                         // do we have 0 constant columns? Then we can stop regularly.
112 |                                         if (nconstant == 0)
113 |                                                 return 1;
114 |                                 }
115 |                         }
116 |                 }
117 |         }
118 | 
119 |         return 1;
120 | }
121 | 
122 | /** see above */
123 | int _variable_cols_int(int* cols, int* X, int M, int N, int min_constant)
124 | {
125 |         // compare first and last row to get constant candidates
126 |         int i,j;
127 |         int ro = (M-1)*N;
128 |         int nconstant = N;  // current number of constant columns
129 | 
130 |         // by default all 0 (constant)
131 |         for (j=0; j<N; j++)
132 |                 cols[j] = 0;
133 | 
134 |         // go through all rows in order to confirm constant candidates
135 |         for (i=0; i<M; i++)
136 |         {
137 |                 ro = i*N;
138 |                 for (j=0; j<N; j++)
139 |                 {
140 |                         if (X[j] != X[ro+j])
141 |                         {
142 |                                 if (cols[j] == 0)
143 |                                 {
144 |                                         cols[j] = 1;
145 |                                         nconstant--;
146 |                                         // are constant columns below threshold? Then interrupt.
147 |                                         if (nconstant < min_constant)
148 |                                                 return 0;
149 |                                         // do we have 0 constant columns? Then we can stop regularly.
150 |                                         if (nconstant == 0)
151 |                                                 return 1;
152 |                                 }
153 |                         }
154 |                 }
155 |         }
156 | 
157 |         return 1;
158 | }
159 | 
160 | /** see above */
161 | int _variable_cols_long(int* cols, long* X, int M, int N, int min_constant)
162 | {
163 |         // compare first and last row to get constant candidates
164 |         int i,j;
165 |         int ro = (M-1)*N;
166 |         int nconstant = N;  // current number of constant columns
167 | 
168 |         // by default all 0 (constant)
169 |         for (j=0; j<N; j++)
170 |                 cols[j] = 0;
171 | 
172 |         // go through all rows in order to confirm constant candidates
173 |         for (i=0; i<M; i++)
174 |         {
175 |                 ro = i*N;
176 |                 for (j=0; j<N; j++)
177 |                 {
178 |                         if (X[j] != X[ro+j])
179 |                         {
180 |                                 if (cols[j] == 0)
181 |                                 {
182 |                                         cols[j] = 1;
183 |                                         nconstant--;
184 |                                         // are constant columns below threshold? Then interrupt.
185 |                                         if (nconstant < min_constant)
186 |                                                 return 0;
187 |                                         // do we have 0 constant columns? Then we can stop regularly.
188 |                                         if (nconstant == 0)
189 |                                                 return 1;
190 |                                 }
191 |                         }
192 |                 }
193 |         }
194 | 
195 |         return 1;
196 | }
197 | 
198 | /** see above */
199 | int _variable_cols_float(int* cols, float* X, int M, int N, int min_constant)
200 | {
201 |         // compare first and last row to get constant candidates
202 |         int i,j;
203 |         int ro = (M-1)*N;
204 |         int nconstant = N;  // current number of constant columns
205 | 
206 |         // by default all 0 (constant)
207 |         for (j=0; j<N; j++)
208 |                 cols[j] = 0;
209 | 
210 |         // go through all rows in order to confirm constant candidates
211 |         for (i=0; i<M; i++)
212 |         {
213 |                 ro = i*N;
214 |                 for (j=0; j<N; j++)
215 |                 {
216 |                         if (X[j] != X[ro+j])
217 |                         {
218 |                                 if (cols[j] == 0)
219 |                                 {
220 |                                         cols[j] = 1;
221 |                                         nconstant--;
222 |                                         // are constant columns below threshold? Then interrupt.
223 |                                         if (nconstant < min_constant)
224 |                                                 return 0;
225 |                                         // do we have 0 constant columns? Then we can stop regularly.
226 |                                         if (nconstant == 0)
227 |                                                 return 1;
228 |                                 }
229 |                         }
230 |                 }
231 |         }
232 | 
233 |         return 1;
234 | }
235 | 
236 | /** see above */
237 | int _variable_cols_double(int* cols, double* X, int M, int N, int min_constant)
238 | {
239 |         // compare first and last row to get constant candidates
240 |         int i,j;
241 |         int ro = (M-1)*N;
242 |         int nconstant = N;  // current number of constant columns
243 | 
244 |         // by default all 0 (constant)
245 |         for (j=0; j<N; j++)
246 |                 cols[j] = 0;
247 | 
248 |         // go through all rows in order to confirm constant candidates
249 |         for (i=0; i<M; i++)
250 |         {
251 |                 ro = i*N;
252 |                 for (j=0; j<N; j++)
253 |                 {
254 |                         if (X[j] != X[ro+j])
255 |                         {
256 |                                 if (cols[j] == 0)
257 |                                 {
258 |                                         cols[j] = 1;
259 |                                         nconstant--;
260 |                                         // are constant columns below threshold? Then interrupt.
261 |                                         if (nconstant < min_constant)
262 |                                                 return 0;
263 |                                         // do we have 0 constant columns? Then we can stop regularly.
264 |                                         if (nconstant == 0)
265 |                                                 return 1;
266 |                                 }
267 |                         }
268 |                 }
269 |         }
270 | 
271 |         return 1;
272 | }
273 | 
274 | /** Checks each column whether it is approximately constant in the rows or not
275 | 
276 | @param cols : (N) result array that will be filled with 0 (column constant) or 1 (column variable)
277 | @param X : (M, N) array
278 | @param M : number of rows
279 | @param N : number of columns
280 | @param tol : maximum difference of any row to the first row to be still considered equal.
281 | 
282 | */
283 | int _variable_cols_float_approx(int* cols, float* X, int M, int N, float tol, int min_constant)
284 | {
285 |         // compare first and last row to get constant candidates
286 |         int i,j;
287 |         int ro = (M-1)*N;
288 |         float diff;
289 |         int nconstant = N;  // current number of constant columns
290 | 
291 |         // by default all 0 (constant)
292 |         for (j=0; j<N; j++)
293 |                 cols[j] = 0;
294 | 
295 |         // go through all rows in order to confirm constant candidates
296 |         for (i=0; i<M; i++)
297 |         {
298 |                 ro = i*N;
299 |                 for (j=0; j<N; j++)
300 |                 {
301 |                         diff = X[j] - X[ro+j];
302 |                         if (diff > tol || -diff > tol)
303 |                         {
304 |                                 if (cols[j] == 0)
305 |                                 {
306 |                                         cols[j] = 1;
307 |                                         nconstant--;
308 |                                         // are constant columns below threshold? Then interrupt.
309 |                                         if (nconstant < min_constant)
310 |                                                 return 0;
311 |                                         // do we have 0 constant columns? Then we can stop regularly.
312 |                                         if (nconstant == 0)
313 |                                                 return 1;
314 |                                 }
315 |                         }
316 |                 }
317 |         }
318 | 
319 |         return 1;
320 | }
321 | 
322 | /** see above */
323 | int _variable_cols_double_approx(int* cols, double* X, int M, int N, double tol, int min_constant)
324 | {
325 |         // compare first and last row to get constant candidates
326 |         int i,j;
327 |         int ro = (M-1)*N;
328 |         double diff;
329 |         int nconstant = N;  // current number of constant columns
330 | 
331 |         // by default all 0 (constant)
332 |         for (j=0; j<N; j++)
333 |                 cols[j] = 0;
334 | 
335 |         // go through all rows in order to confirm constant candidates
336 |         for (i=0; i<M; i++)
337 |         {
338 |                 ro = i*N;
339 |                 for (j=0; j<N; j++)
340 |                 {
341 |                         diff = X[j] - X[ro+j];
342 |                         if (diff > tol || -diff > tol)
343 |                         {
344 |                                 if (cols[j] == 0)
345 |                                 {
346 |                                         cols[j] = 1;
347 |                                         nconstant--;
348 |                                         // are constant columns below threshold? Then interrupt.
349 |                                         if (nconstant < min_constant)
350 |                                                 return 0;
351 |                                         // do we have 0 constant columns? Then we can stop regularly.
352 |                                         if (nconstant == 0)
353 |                                                 return 1;
354 |                                 }
355 |                         }
356 |                 }
357 |         }
358 | 
359 |         return 1;
360 | }
361 | 


--------------------------------------------------------------------------------
/variational/estimators/running_moments.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'noe'
  2 | 
  3 | import warnings
  4 | import numbers
  5 | import numpy as np
  6 | from .moments import moments_XX, moments_XXXY, moments_block
  7 | 
  8 | 
  9 | class Moments(object):
 10 | 
 11 |     def __init__(self, w, sx, sy, Mxy):
 12 |         """
 13 |         Parameters
 14 |         ----------
 15 |         w : float
 16 |             statistical weight.
 17 |                 w = \sum_t w_t
 18 |             In most cases, :math:`w_t=1`, and then w is just the number of samples that went into s1, S2.
 19 |         s : ndarray(n,)
 20 |             sum over samples:
 21 |             .. math:
 22 |                 s = \sum_t w_t x_t
 23 |         M : ndarray(n, n)
 24 |             .. math:
 25 |                 M = (X-s)^T (X-s)
 26 |         """
 27 |         self.w = float(w)
 28 |         self.sx = sx
 29 |         self.sy = sy
 30 |         self.Mxy = Mxy
 31 | 
 32 |     def copy(self):
 33 |         return Moments(self.w, self.sx.copy(), self.sy.copy(), self.Mxy.copy())
 34 | 
 35 |     def combine(self, other, mean_free=False):
 36 |         """
 37 |         References
 38 |         ----------
 39 |         [1] http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf
 40 |         """
 41 |         w1 = self.w
 42 |         w2 = other.w
 43 |         w = w1 + w2
 44 |         dsx = (w2/w1) * self.sx - other.sx
 45 |         dsy = (w2/w1) * self.sy - other.sy
 46 |         # update
 47 |         self.w = w1 + w2
 48 |         self.sx = self.sx + other.sx
 49 |         self.sy = self.sy + other.sy
 50 |         #
 51 |         if mean_free:
 52 |             self.Mxy += other.Mxy + (w1 / (w2 * w)) * np.outer(dsx, dsy)
 53 |         else:
 54 |             self.Mxy += other.Mxy
 55 |         return self
 56 | 
 57 |     @property
 58 |     def mean_x(self):
 59 |         return self.sx / self.w
 60 | 
 61 |     @property
 62 |     def mean_y(self):
 63 |         return self.sy / self.w
 64 | 
 65 |     def covar(self, bessels_correction):
 66 |         """ Returns M / (w-1)
 67 | 
 68 |         Careful: The normalization w-1 assumes that we have counts as weights.
 69 | 
 70 |         """
 71 |         if bessels_correction:
 72 |             return self.Mxy/ (self.w-1)
 73 |         else:
 74 |             return self.Mxy/self.w
 75 | 
 76 | 
 77 | class MomentsStorage(object):
 78 |     """
 79 |     """
 80 | 
 81 |     def __init__(self, nsave, remove_mean=False, rtol=1.5):
 82 |         """
 83 |         Parameters
 84 |         ----------
 85 |         rtol : float
 86 |             To decide when to merge two Moments. Ideally I'd like to merge two
 87 |             Moments when they have equal weights (i.e. equally many data points
 88 |             went into them). If we always add data chunks with equal weights,
 89 |             this can be achieved by using a binary tree, i.e. let M1 be the
 90 |             moment estimates from one chunk. Two of them are added to M2, Two
 91 |             M2 are added to M4, and so on. This way you need to store log2
 92 |             (n_chunks) number of Moment estimates.
 93 |             In practice you might get data in chunks of unequal length or weight.
 94 |             Therefore we need some heuristic when two Moment estimates should get
 95 |             merged. This is the role of rtol.
 96 | 
 97 |         """
 98 |         self.nsave = nsave
 99 |         self.storage = []
100 |         self.rtol = rtol
101 |         self.remove_mean = remove_mean
102 | 
103 |     def _can_merge_tail(self):
104 |         """ Checks if the two last list elements can be merged
105 |         """
106 |         if len(self.storage) < 2:
107 |             return False
108 |         return self.storage[-2].w <= self.storage[-1].w * self.rtol
109 | 
110 |     def store(self, moments):
111 |         """ Store object X with weight w
112 |         """
113 |         if len(self.storage) == self.nsave:  # merge if we must
114 |             # print 'must merge'
115 |             self.storage[-1].combine(moments, mean_free=self.remove_mean)
116 |         else:  # append otherwise
117 |             # print 'append'
118 |             self.storage.append(moments)
119 |         # merge if possible
120 |         while self._can_merge_tail():
121 |             # print 'merge: ',self.storage
122 |             M = self.storage.pop()
123 |             # print 'pop last: ',self.storage
124 |             self.storage[-1].combine(M, mean_free=self.remove_mean)
125 |             # print 'merged: ',self.storage
126 | 
127 |     @property
128 |     def moments(self):
129 |         """
130 |         """
131 |         # collapse storage if necessary
132 |         while len(self.storage) > 1:
133 |             # print 'collapse'
134 |             M = self.storage.pop()
135 |             self.storage[-1].combine(M, mean_free=self.remove_mean)
136 |         # print 'return first element'
137 |         return self.storage[0]
138 | 
139 | 
140 | class RunningCovar(object):
141 |     """ Running covariance estimator
142 | 
143 |     Estimator object that can be fed chunks of X and Y data, and
144 |     that can generate on-the-fly estimates of mean, covariance, running sum
145 |     and second moment matrix.
146 | 
147 |     Parameters
148 |     ----------
149 |     compute_XX : bool
150 |         Estimate the covariance of X
151 |     compute_XY : bool
152 |         Estimate the cross-covariance of X and Y
153 |     compute_YY : bool
154 |         Estimate the covariance of Y
155 |     remove_mean : bool
156 |         Remove the data mean in the covariance estimation
157 |     symmetrize : bool
158 |         Use symmetric estimates with sum defined by sum_t x_t + y_t and
159 |         second moment matrices defined by X'X + Y'Y and Y'X + X'Y.
160 |     modify_data : bool
161 |         If remove_mean=True, the mean will be removed in the input data,
162 |         without creating an independent copy. This option is faster but should
163 |         only be selected if the input data is not used elsewhere.
164 |     sparse_mode : str
165 |         one of:
166 |             * 'dense' : always use dense mode
167 |             * 'sparse' : always use sparse mode if possible
168 |             * 'auto' : automatic
169 |     nsave : int
170 |         Depth of Moment storage. Moments computed from each chunk will be
171 |         combined with Moments of similar statistical weight using the pairwise
172 |         combination algorithm described in [1]_.
173 | 
174 |     References
175 |     ----------
176 |     .. [1] http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf
177 | 
178 |     """
179 | 
180 |     # to get the Y mean, but this is currently not stored.
181 |     def __init__(self, compute_XX=True, compute_XY=False, compute_YY=False,
182 |                  remove_mean=False, symmetrize=False, sparse_mode='auto', modify_data=False, nsave=5):
183 |         # check input
184 |         if not compute_XX and not compute_XY:
185 |             raise ValueError('One of compute_XX or compute_XY must be True.')
186 |         if symmetrize and compute_YY:
187 |             raise ValueError('Combining compute_YY and symmetrize=True is meaningless.')
188 |         if symmetrize and not compute_XY:
189 |             warnings.warn('symmetrize=True has no effect with compute_XY=False.')
190 |         # storage
191 |         self.compute_XX = compute_XX
192 |         if compute_XX:
193 |             self.storage_XX = MomentsStorage(nsave, remove_mean=remove_mean)
194 |         self.compute_XY = compute_XY
195 |         if compute_XY:
196 |             self.storage_XY = MomentsStorage(nsave, remove_mean=remove_mean)
197 |         self.compute_YY = compute_YY
198 |         if compute_YY:
199 |             self.storage_YY = MomentsStorage(nsave, remove_mean=remove_mean)
200 |         # symmetry
201 |         self.remove_mean = remove_mean
202 |         self.symmetrize = symmetrize
203 |         # flags
204 |         self.sparse_mode = sparse_mode
205 |         self.modify_data = modify_data
206 | 
207 |     def add(self, X, Y=None, weights=None):
208 |         """
209 |         Add trajectory to estimate.
210 | 
211 |         Parameters
212 |         ----------
213 |         X : ndarray(T, N)
214 |             array of N time series.
215 |         Y : ndarray(T, N)
216 |             array of N time series, usually time shifted version of X.
217 |         weights : None or float or ndarray(T, ):
218 |             weights assigned to each trajectory point. If None, all data points have weight one. If float,
219 |             the same weight will be given to all data points. If ndarray, each data point is assigned a separate
220 |             weight.
221 | 
222 |         """
223 | 
224 |         # check input
225 |         T = X.shape[0]
226 |         if Y is not None:
227 |             assert Y.shape[0] == T, 'X and Y must have equal length'
228 |         # Weights cannot be used for compute_YY:
229 |         if weights is not None and self.compute_YY:
230 |             raise ValueError('Use of weights is not implemented for compute_YY==True')
231 |         if weights is not None:
232 |             # Convert to array of length T if weights is a single number:
233 |             if isinstance(weights, numbers.Real):
234 |                 weights = weights * np.ones(T, dtype=float)
235 |             # Check appropriate length if weights is an array:
236 |             elif isinstance(weights, np.ndarray):
237 |                 assert weights.shape[0] == T, 'weights and X must have equal length'
238 |             else:
239 |                 raise TypeError('weights is of type %s, must be a number or ndarray'%(type(weights)))
240 |         # estimate and add to storage
241 |         if self.compute_XX and not self.compute_XY:
242 |             w, s_X, C_XX = moments_XX(X, remove_mean=self.remove_mean, weights=weights, sparse_mode=self.sparse_mode, modify_data=self.modify_data)
243 |             self.storage_XX.store(Moments(w, s_X, s_X, C_XX))
244 |         elif self.compute_XX and self.compute_XY:
245 |             assert Y is not None
246 |             w, s_X, s_Y, C_XX, C_XY = moments_XXXY(X, Y, remove_mean=self.remove_mean, symmetrize=self.symmetrize,
247 |                                                    weights=weights, sparse_mode=self.sparse_mode, modify_data=self.modify_data)
248 |             # make copy in order to get independently mergeable moments
249 |             self.storage_XX.store(Moments(w, s_X, s_X, C_XX))
250 |             self.storage_XY.store(Moments(w, s_X, s_Y, C_XY))
251 |         else:  # compute block
252 |             assert Y is not None
253 |             assert not self.symmetrize
254 |             w, s, C = moments_block(X, Y, remove_mean=self.remove_mean,
255 |                                     sparse_mode=self.sparse_mode, modify_data=self.modify_data)
256 |             # make copy in order to get independently mergeable moments
257 |             self.storage_XX.store(Moments(w, s[0], s[0], C[0, 0]))
258 |             self.storage_XY.store(Moments(w, s[0], s[1], C[0, 1]))
259 |             self.storage_YY.store(Moments(w, s[1], s[1], C[1, 1]))
260 | 
261 |     def sum_X(self):
262 |         if self.compute_XX:
263 |             return self.storage_XX.moments.sx
264 |         elif self.compute_XY:
265 |             return self.storage_XY.moments.sx
266 |         else:
267 |             raise RuntimeError('sum_X is not available')
268 | 
269 |     def sum_Y(self):
270 |         if self.compute_XY:
271 |             return self.storage_XY.moments.sy
272 |         elif self.compute_YY:
273 |             return self.storage_YY.moments.sy
274 |         else:
275 |             raise RuntimeError('sum_Y is not available')
276 | 
277 |     def mean_X(self):
278 |         if self.compute_XX:
279 |             return self.storage_XX.moments.mean_x
280 |         elif self.compute_XY:
281 |             return self.storage_XY.moments.mean_y
282 |         else:
283 |             raise RuntimeError('mean_X is not available')
284 | 
285 |     def mean_Y(self):
286 |         if self.compute_XY:
287 |             return self.storage_XY.moments.mean_y
288 |         elif self.compute_YY:
289 |             return self.storage_YY.moments.mean_y
290 |         else:
291 |             raise RuntimeError('mean_Y is not available')
292 | 
293 |     def weight_XX(self):
294 |         return self.storage_XX.moments.w
295 | 
296 |     def weight_XY(self):
297 |         return self.storage_XY.moments.w
298 | 
299 |     def weight_YY(self):
300 |         return self.storage_YY.moments.w
301 | 
302 |     def moments_XX(self):
303 |         return self.storage_XX.moments.Mxy
304 | 
305 |     def moments_XY(self):
306 |         return self.storage_XY.moments.Mxy
307 | 
308 |     def moments_YY(self):
309 |         return self.storage_YY.moments.Mxy
310 | 
311 |     def cov_XX(self, bessels_correction):
312 |         return self.storage_XX.moments.covar(bessels_correction=bessels_correction)
313 | 
314 |     def cov_XY(self, bessels_correction):
315 |         return self.storage_XY.moments.covar(bessels_correction=bessels_correction)
316 | 
317 |     def cov_YY(self, bessels_correction):
318 |         return self.storage_YY.moments.covar(bessels_correction=bessels_correction)
319 | 
320 | 
321 | def running_covar(xx=True, xy=False, yy=False, remove_mean=False, symmetrize=False, sparse_mode='auto',
322 |                   modify_data=False, nsave=5):
323 |     """ Returns a running covariance estimator
324 | 
325 |     Returns an estimator object that can be fed chunks of X and Y data, and
326 |     that can generate on-the-fly estimates of mean, covariance, running sum
327 |     and second moment matrix.
328 | 
329 |     Parameters
330 |     ----------
331 |     xx : bool
332 |         Estimate the covariance of X
333 |     xy : bool
334 |         Estimate the cross-covariance of X and Y
335 |     yy : bool
336 |         Estimate the covariance of Y
337 |     remove_mean : bool
338 |         Remove the data mean in the covariance estimation
339 |     symmetrize : bool
340 |         Use symmetric estimates with sum defined by sum_t x_t + y_t and
341 |         second moment matrices defined by X'X + Y'Y and Y'X + X'Y.
342 |     modify_data : bool
343 |         If remove_mean=True, the mean will be removed in the input data,
344 |         without creating an independent copy. This option is faster but should
345 |         only be selected if the input data is not used elsewhere.
346 |     sparse_mode : str
347 |         one of:
348 |             * 'dense' : always use dense mode
349 |             * 'sparse' : always use sparse mode if possible
350 |             * 'auto' : automatic
351 |     nsave : int
352 |         Depth of Moment storage. Moments computed from each chunk will be
353 |         combined with Moments of similar statistical weight using the pairwise
354 |         combination algorithm described in [1]_.
355 | 
356 |     References
357 |     ----------
358 |     .. [1] http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf
359 | 
360 |     """
361 |     return RunningCovar(compute_XX=xx, compute_XY=xy, compute_YY=yy, sparse_mode=sparse_mode, modify_data=modify_data,
362 |                         remove_mean=remove_mean, symmetrize=symmetrize, nsave=nsave)
363 | 


--------------------------------------------------------------------------------
/variational/_version.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # This file helps to compute a version number in source trees obtained from
  3 | # git-archive tarball (such as those provided by githubs download-from-tag
  4 | # feature). Distribution tarballs (built by setup.py sdist) and build
  5 | # directories (produced by setup.py build) will contain a much shorter file
  6 | # that just contains the computed version number.
  7 | 
  8 | # This file is released into the public domain. Generated by
  9 | # versioneer-0.15 (https://github.com/warner/python-versioneer)
 10 | 
 11 | import errno
 12 | import os
 13 | import re
 14 | import subprocess
 15 | import sys
 16 | 
 17 | 
 18 | def get_keywords():
 19 |     # these strings will be replaced by git during git-archive.
 20 |     # setup.py/versioneer.py will grep for the variable names, so they must
 21 |     # each be defined on a line of their own. _version.py will just call
 22 |     # get_keywords().
 23 |     git_refnames = " (HEAD -> master)"
 24 |     git_full = "491361e8e271df0e28b34549ab32e22546e18ce9"
 25 |     keywords = {"refnames": git_refnames, "full": git_full}
 26 |     return keywords
 27 | 
 28 | 
 29 | class VersioneerConfig:
 30 |     pass
 31 | 
 32 | 
 33 | def get_config():
 34 |     # these strings are filled in when 'setup.py versioneer' creates
 35 |     # _version.py
 36 |     cfg = VersioneerConfig()
 37 |     cfg.VCS = "git"
 38 |     cfg.style = "pep440"
 39 |     cfg.tag_prefix = ""
 40 |     cfg.parentdir_prefix = "variational-"
 41 |     cfg.versionfile_source = "variational/_version.py"
 42 |     cfg.verbose = False
 43 |     return cfg
 44 | 
 45 | 
 46 | class NotThisMethod(Exception):
 47 |     pass
 48 | 
 49 | 
 50 | LONG_VERSION_PY = {}
 51 | HANDLERS = {}
 52 | 
 53 | 
 54 | def register_vcs_handler(vcs, method):  # decorator
 55 |     def decorate(f):
 56 |         if vcs not in HANDLERS:
 57 |             HANDLERS[vcs] = {}
 58 |         HANDLERS[vcs][method] = f
 59 |         return f
 60 |     return decorate
 61 | 
 62 | 
 63 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
 64 |     assert isinstance(commands, list)
 65 |     p = None
 66 |     for c in commands:
 67 |         try:
 68 |             dispcmd = str([c] + args)
 69 |             # remember shell=False, so use git.cmd on windows, not just git
 70 |             p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE,
 71 |                                  stderr=(subprocess.PIPE if hide_stderr
 72 |                                          else None))
 73 |             break
 74 |         except EnvironmentError:
 75 |             e = sys.exc_info()[1]
 76 |             if e.errno == errno.ENOENT:
 77 |                 continue
 78 |             if verbose:
 79 |                 print("unable to run %s" % dispcmd)
 80 |                 print(e)
 81 |             return None
 82 |     else:
 83 |         if verbose:
 84 |             print("unable to find command, tried %s" % (commands,))
 85 |         return None
 86 |     stdout = p.communicate()[0].strip()
 87 |     if sys.version_info[0] >= 3:
 88 |         stdout = stdout.decode()
 89 |     if p.returncode != 0:
 90 |         if verbose:
 91 |             print("unable to run %s (error)" % dispcmd)
 92 |         return None
 93 |     return stdout
 94 | 
 95 | 
 96 | def versions_from_parentdir(parentdir_prefix, root, verbose):
 97 |     # Source tarballs conventionally unpack into a directory that includes
 98 |     # both the project name and a version string.
 99 |     dirname = os.path.basename(root)
100 |     if not dirname.startswith(parentdir_prefix):
101 |         if verbose:
102 |             print("guessing rootdir is '%s', but '%s' doesn't start with "
103 |                   "prefix '%s'" % (root, dirname, parentdir_prefix))
104 |         raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
105 |     return {"version": dirname[len(parentdir_prefix):],
106 |             "full-revisionid": None,
107 |             "dirty": False, "error": None}
108 | 
109 | 
110 | @register_vcs_handler("git", "get_keywords")
111 | def git_get_keywords(versionfile_abs):
112 |     # the code embedded in _version.py can just fetch the value of these
113 |     # keywords. When used from setup.py, we don't want to import _version.py,
114 |     # so we do it with a regexp instead. This function is not used from
115 |     # _version.py.
116 |     keywords = {}
117 |     try:
118 |         f = open(versionfile_abs, "r")
119 |         for line in f.readlines():
120 |             if line.strip().startswith("git_refnames ="):
121 |                 mo = re.search(r'=\s*"(.*)"', line)
122 |                 if mo:
123 |                     keywords["refnames"] = mo.group(1)
124 |             if line.strip().startswith("git_full ="):
125 |                 mo = re.search(r'=\s*"(.*)"', line)
126 |                 if mo:
127 |                     keywords["full"] = mo.group(1)
128 |         f.close()
129 |     except EnvironmentError:
130 |         pass
131 |     return keywords
132 | 
133 | 
134 | @register_vcs_handler("git", "keywords")
135 | def git_versions_from_keywords(keywords, tag_prefix, verbose):
136 |     if not keywords:
137 |         raise NotThisMethod("no keywords at all, weird")
138 |     refnames = keywords["refnames"].strip()
139 |     if refnames.startswith("$Format"):
140 |         if verbose:
141 |             print("keywords are unexpanded, not using")
142 |         raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
143 |     refs = set([r.strip() for r in refnames.strip("()").split(",")])
144 |     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
145 |     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
146 |     TAG = "tag: "
147 |     tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
148 |     if not tags:
149 |         # Either we're using git < 1.8.3, or there really are no tags. We use
150 |         # a heuristic: assume all version tags have a digit. The old git %d
151 |         # expansion behaves like git log --decorate=short and strips out the
152 |         # refs/heads/ and refs/tags/ prefixes that would let us distinguish
153 |         # between branches and tags. By ignoring refnames without digits, we
154 |         # filter out many common branch names like "release" and
155 |         # "stabilization", as well as "HEAD" and "master".
156 |         tags = set([r for r in refs if re.search(r'\d', r)])
157 |         if verbose:
158 |             print("discarding '%s', no digits" % ",".join(refs-tags))
159 |     if verbose:
160 |         print("likely tags: %s" % ",".join(sorted(tags)))
161 |     for ref in sorted(tags):
162 |         # sorting will prefer e.g. "2.0" over "2.0rc1"
163 |         if ref.startswith(tag_prefix):
164 |             r = ref[len(tag_prefix):]
165 |             if verbose:
166 |                 print("picking %s" % r)
167 |             return {"version": r,
168 |                     "full-revisionid": keywords["full"].strip(),
169 |                     "dirty": False, "error": None
170 |                     }
171 |     # no suitable tags, so version is "0+unknown", but full hex is still there
172 |     if verbose:
173 |         print("no suitable tags, using unknown + full revision id")
174 |     return {"version": "0+unknown",
175 |             "full-revisionid": keywords["full"].strip(),
176 |             "dirty": False, "error": "no suitable tags"}
177 | 
178 | 
179 | @register_vcs_handler("git", "pieces_from_vcs")
180 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
181 |     # this runs 'git' from the root of the source tree. This only gets called
182 |     # if the git-archive 'subst' keywords were *not* expanded, and
183 |     # _version.py hasn't already been rewritten with a short version string,
184 |     # meaning we're inside a checked out source tree.
185 | 
186 |     if not os.path.exists(os.path.join(root, ".git")):
187 |         if verbose:
188 |             print("no .git in %s" % root)
189 |         raise NotThisMethod("no .git directory")
190 | 
191 |     GITS = ["git"]
192 |     if sys.platform == "win32":
193 |         GITS = ["git.cmd", "git.exe"]
194 |     # if there is a tag, this yields TAG-NUM-gHEX[-dirty]
195 |     # if there are no tags, this yields HEX[-dirty] (no NUM)
196 |     describe_out = run_command(GITS, ["describe", "--tags", "--dirty",
197 |                                       "--always", "--long"],
198 |                                cwd=root)
199 |     # --long was added in git-1.5.5
200 |     if describe_out is None:
201 |         raise NotThisMethod("'git describe' failed")
202 |     describe_out = describe_out.strip()
203 |     full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
204 |     if full_out is None:
205 |         raise NotThisMethod("'git rev-parse' failed")
206 |     full_out = full_out.strip()
207 | 
208 |     pieces = {}
209 |     pieces["long"] = full_out
210 |     pieces["short"] = full_out[:7]  # maybe improved later
211 |     pieces["error"] = None
212 | 
213 |     # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
214 |     # TAG might have hyphens.
215 |     git_describe = describe_out
216 | 
217 |     # look for -dirty suffix
218 |     dirty = git_describe.endswith("-dirty")
219 |     pieces["dirty"] = dirty
220 |     if dirty:
221 |         git_describe = git_describe[:git_describe.rindex("-dirty")]
222 | 
223 |     # now we have TAG-NUM-gHEX or HEX
224 | 
225 |     if "-" in git_describe:
226 |         # TAG-NUM-gHEX
227 |         mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
228 |         if not mo:
229 |             # unparseable. Maybe git-describe is misbehaving?
230 |             pieces["error"] = ("unable to parse git-describe output: '%s'"
231 |                                % describe_out)
232 |             return pieces
233 | 
234 |         # tag
235 |         full_tag = mo.group(1)
236 |         if not full_tag.startswith(tag_prefix):
237 |             if verbose:
238 |                 fmt = "tag '%s' doesn't start with prefix '%s'"
239 |                 print(fmt % (full_tag, tag_prefix))
240 |             pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
241 |                                % (full_tag, tag_prefix))
242 |             return pieces
243 |         pieces["closest-tag"] = full_tag[len(tag_prefix):]
244 | 
245 |         # distance: number of commits since tag
246 |         pieces["distance"] = int(mo.group(2))
247 | 
248 |         # commit: short hex revision ID
249 |         pieces["short"] = mo.group(3)
250 | 
251 |     else:
252 |         # HEX: no tags
253 |         pieces["closest-tag"] = None
254 |         count_out = run_command(GITS, ["rev-list", "HEAD", "--count"],
255 |                                 cwd=root)
256 |         pieces["distance"] = int(count_out)  # total number of commits
257 | 
258 |     return pieces
259 | 
260 | 
261 | def plus_or_dot(pieces):
262 |     if "+" in pieces.get("closest-tag", ""):
263 |         return "."
264 |     return "+"
265 | 
266 | 
267 | def render_pep440(pieces):
268 |     # now build up version string, with post-release "local version
269 |     # identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
270 |     # get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
271 | 
272 |     # exceptions:
273 |     # 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
274 | 
275 |     if pieces["closest-tag"]:
276 |         rendered = pieces["closest-tag"]
277 |         if pieces["distance"] or pieces["dirty"]:
278 |             rendered += plus_or_dot(pieces)
279 |             rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
280 |             if pieces["dirty"]:
281 |                 rendered += ".dirty"
282 |     else:
283 |         # exception #1
284 |         rendered = "0+untagged.%d.g%s" % (pieces["distance"],
285 |                                           pieces["short"])
286 |         if pieces["dirty"]:
287 |             rendered += ".dirty"
288 |     return rendered
289 | 
290 | 
291 | def render_pep440_pre(pieces):
292 |     # TAG[.post.devDISTANCE] . No -dirty
293 | 
294 |     # exceptions:
295 |     # 1: no tags. 0.post.devDISTANCE
296 | 
297 |     if pieces["closest-tag"]:
298 |         rendered = pieces["closest-tag"]
299 |         if pieces["distance"]:
300 |             rendered += ".post.dev%d" % pieces["distance"]
301 |     else:
302 |         # exception #1
303 |         rendered = "0.post.dev%d" % pieces["distance"]
304 |     return rendered
305 | 
306 | 
307 | def render_pep440_post(pieces):
308 |     # TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that
309 |     # .dev0 sorts backwards (a dirty tree will appear "older" than the
310 |     # corresponding clean one), but you shouldn't be releasing software with
311 |     # -dirty anyways.
312 | 
313 |     # exceptions:
314 |     # 1: no tags. 0.postDISTANCE[.dev0]
315 | 
316 |     if pieces["closest-tag"]:
317 |         rendered = pieces["closest-tag"]
318 |         if pieces["distance"] or pieces["dirty"]:
319 |             rendered += ".post%d" % pieces["distance"]
320 |             if pieces["dirty"]:
321 |                 rendered += ".dev0"
322 |             rendered += plus_or_dot(pieces)
323 |             rendered += "g%s" % pieces["short"]
324 |     else:
325 |         # exception #1
326 |         rendered = "0.post%d" % pieces["distance"]
327 |         if pieces["dirty"]:
328 |             rendered += ".dev0"
329 |         rendered += "+g%s" % pieces["short"]
330 |     return rendered
331 | 
332 | 
333 | def render_pep440_old(pieces):
334 |     # TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty.
335 | 
336 |     # exceptions:
337 |     # 1: no tags. 0.postDISTANCE[.dev0]
338 | 
339 |     if pieces["closest-tag"]:
340 |         rendered = pieces["closest-tag"]
341 |         if pieces["distance"] or pieces["dirty"]:
342 |             rendered += ".post%d" % pieces["distance"]
343 |             if pieces["dirty"]:
344 |                 rendered += ".dev0"
345 |     else:
346 |         # exception #1
347 |         rendered = "0.post%d" % pieces["distance"]
348 |         if pieces["dirty"]:
349 |             rendered += ".dev0"
350 |     return rendered
351 | 
352 | 
353 | def render_git_describe(pieces):
354 |     # TAG[-DISTANCE-gHEX][-dirty], like 'git describe --tags --dirty
355 |     # --always'
356 | 
357 |     # exceptions:
358 |     # 1: no tags. HEX[-dirty]  (note: no 'g' prefix)
359 | 
360 |     if pieces["closest-tag"]:
361 |         rendered = pieces["closest-tag"]
362 |         if pieces["distance"]:
363 |             rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
364 |     else:
365 |         # exception #1
366 |         rendered = pieces["short"]
367 |     if pieces["dirty"]:
368 |         rendered += "-dirty"
369 |     return rendered
370 | 
371 | 
372 | def render_git_describe_long(pieces):
373 |     # TAG-DISTANCE-gHEX[-dirty], like 'git describe --tags --dirty
374 |     # --always -long'. The distance/hash is unconditional.
375 | 
376 |     # exceptions:
377 |     # 1: no tags. HEX[-dirty]  (note: no 'g' prefix)
378 | 
379 |     if pieces["closest-tag"]:
380 |         rendered = pieces["closest-tag"]
381 |         rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
382 |     else:
383 |         # exception #1
384 |         rendered = pieces["short"]
385 |     if pieces["dirty"]:
386 |         rendered += "-dirty"
387 |     return rendered
388 | 
389 | 
390 | def render(pieces, style):
391 |     if pieces["error"]:
392 |         return {"version": "unknown",
393 |                 "full-revisionid": pieces.get("long"),
394 |                 "dirty": None,
395 |                 "error": pieces["error"]}
396 | 
397 |     if not style or style == "default":
398 |         style = "pep440"  # the default
399 | 
400 |     if style == "pep440":
401 |         rendered = render_pep440(pieces)
402 |     elif style == "pep440-pre":
403 |         rendered = render_pep440_pre(pieces)
404 |     elif style == "pep440-post":
405 |         rendered = render_pep440_post(pieces)
406 |     elif style == "pep440-old":
407 |         rendered = render_pep440_old(pieces)
408 |     elif style == "git-describe":
409 |         rendered = render_git_describe(pieces)
410 |     elif style == "git-describe-long":
411 |         rendered = render_git_describe_long(pieces)
412 |     else:
413 |         raise ValueError("unknown style '%s'" % style)
414 | 
415 |     return {"version": rendered, "full-revisionid": pieces["long"],
416 |             "dirty": pieces["dirty"], "error": None}
417 | 
418 | 
419 | def get_versions():
420 |     # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
421 |     # __file__, we can work backwards from there to the root. Some
422 |     # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
423 |     # case we can only use expanded keywords.
424 | 
425 |     cfg = get_config()
426 |     verbose = cfg.verbose
427 | 
428 |     try:
429 |         return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
430 |                                           verbose)
431 |     except NotThisMethod:
432 |         pass
433 | 
434 |     try:
435 |         root = os.path.realpath(__file__)
436 |         # versionfile_source is the relative path from the top of the source
437 |         # tree (where the .git directory might live) to this file. Invert
438 |         # this to find the root from __file__.
439 |         for i in cfg.versionfile_source.split('/'):
440 |             root = os.path.dirname(root)
441 |     except NameError:
442 |         return {"version": "0+unknown", "full-revisionid": None,
443 |                 "dirty": None,
444 |                 "error": "unable to find root of source tree"}
445 | 
446 |     try:
447 |         pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
448 |         return render(pieces, cfg.style)
449 |     except NotThisMethod:
450 |         pass
451 | 
452 |     try:
453 |         if cfg.parentdir_prefix:
454 |             return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
455 |     except NotThisMethod:
456 |         pass
457 | 
458 |     return {"version": "0+unknown", "full-revisionid": None,
459 |             "dirty": None,
460 |             "error": "unable to compute version"}
461 | 


--------------------------------------------------------------------------------
/variational/estimators/tests/test_moments.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import unittest
  3 | import numpy as np
  4 | from .. import moments
  5 | 
  6 | __author__ = 'noe'
  7 | 
  8 | class TestMoments(unittest.TestCase):
  9 | 
 10 |     @classmethod
 11 |     def setUpClass(cls):
 12 |         cls.X_2 = np.random.rand(10000, 2)
 13 |         cls.Y_2 = np.random.rand(10000, 2)
 14 |         # dense data
 15 |         cls.X_10 = np.random.rand(10000, 10)
 16 |         cls.Y_10 = np.random.rand(10000, 10)
 17 |         cls.X_100 = np.random.rand(10000, 100)
 18 |         cls.Y_100 = np.random.rand(10000, 100)
 19 |         # sparse zero data
 20 |         cls.X_10_sparsezero = np.zeros((10000, 10))
 21 |         cls.X_10_sparsezero[:, 0] = cls.X_10[:, 0]
 22 |         cls.Y_10_sparsezero = np.zeros((10000, 10))
 23 |         cls.Y_10_sparsezero[:, 0] = cls.Y_10[:, 0]
 24 |         cls.X_100_sparsezero = np.zeros((10000, 100))
 25 |         cls.X_100_sparsezero[:, :10] = cls.X_100[:, :10]
 26 |         cls.Y_100_sparsezero = np.zeros((10000, 100))
 27 |         cls.Y_100_sparsezero[:, :10] = cls.Y_100[:, :10]
 28 |         # sparse const data
 29 |         cls.X_10_sparseconst = np.ones((10000, 10))
 30 |         cls.X_10_sparseconst[:, 0] = cls.X_10[:, 0]
 31 |         cls.Y_10_sparseconst = 2*np.ones((10000, 10))
 32 |         cls.Y_10_sparseconst[:, 0] = cls.Y_10[:, 0]
 33 |         cls.X_100_sparseconst = np.ones((10000, 100))
 34 |         cls.X_100_sparseconst[:, :10] = cls.X_100[:, :10]
 35 |         cls.Y_100_sparseconst = 2*np.zeros((10000, 100))
 36 |         cls.Y_100_sparseconst[:, :10] = cls.Y_100[:, :10]
 37 |         # boolean data
 38 |         cls.Xb_2 = np.random.randint(0, 2, size=(10000, 2))
 39 |         cls.Xb_2 = cls.Xb_2.astype(np.bool)
 40 |         cls.Xb_10 = np.random.randint(0, 2, size=(10000, 10))
 41 |         cls.Xb_10 = cls.Xb_10.astype(np.bool)
 42 |         cls.Xb_10_sparsezero = np.zeros((10000, 10), dtype=np.bool)
 43 |         cls.Xb_10_sparsezero[:, 0] = cls.Xb_10[:, 0]
 44 |         # generate weights:
 45 |         cls.weights = np.random.rand(10000)
 46 |         # Set the lag time for time-lagged tests:
 47 |         cls.lag = 50
 48 | 
 49 |         return cls
 50 | 
 51 |     def _test_moments_X(self, X, remove_mean=False, sparse_mode='auto', weights=None):
 52 |         # proposed solution
 53 |         w, s_X, C_XX = moments.moments_XX(X, remove_mean=remove_mean, modify_data=False,
 54 |                                           sparse_mode=sparse_mode, weights=weights)
 55 |         # reference
 56 |         X = X.astype(np.float64)
 57 |         if weights is not None:
 58 |             X1 = weights[:, None] * X
 59 |             w = weights.sum()
 60 |         else:
 61 |             X1 = X
 62 |             w = X.shape[0]
 63 |         s_X_ref = X1.sum(axis=0)
 64 |         if remove_mean:
 65 |             X = X - (1.0 / w) * s_X_ref
 66 |         if weights is not None:
 67 |             X1 = weights[:, None] * X
 68 |         else:
 69 |             X1 = X
 70 |         C_XX_ref = np.dot(X1.T, X)
 71 |         # test
 72 |         assert np.allclose(s_X, s_X_ref)
 73 |         assert np.allclose(C_XX, C_XX_ref)
 74 | 
 75 |     def test_moments_X(self):
 76 |         # simple test, dense
 77 |         self._test_moments_X(self.X_10, remove_mean=False, sparse_mode='dense')
 78 |         self._test_moments_X(self.X_100, remove_mean=False, sparse_mode='dense')
 79 |         # mean-free, dense
 80 |         self._test_moments_X(self.X_10, remove_mean=True, sparse_mode='dense')
 81 |         self._test_moments_X(self.X_100, remove_mean=True, sparse_mode='dense')
 82 |         # weighted test, simple, dense:
 83 |         self._test_moments_X(self.X_10, remove_mean=False, sparse_mode='dense', weights=self.weights)
 84 |         self._test_moments_X(self.X_100, remove_mean=False, sparse_mode='dense', weights=self.weights)
 85 |         # weighted test, mean-free, dense:
 86 |         self._test_moments_X(self.X_10, remove_mean=True, sparse_mode='dense', weights=self.weights)
 87 |         self._test_moments_X(self.X_100, remove_mean=True, sparse_mode='dense', weights=self.weights)
 88 | 
 89 |     def test_moments_X_sparsezero(self):
 90 |         # simple test, sparse
 91 |         self._test_moments_X(self.X_10_sparsezero, remove_mean=False, sparse_mode='sparse')
 92 |         self._test_moments_X(self.X_100_sparsezero, remove_mean=False, sparse_mode='sparse')
 93 |         # mean-free, sparse
 94 |         self._test_moments_X(self.X_10_sparsezero, remove_mean=True, sparse_mode='sparse')
 95 |         self._test_moments_X(self.X_100_sparsezero, remove_mean=True, sparse_mode='sparse')
 96 |         # weighted, sparse
 97 |         self._test_moments_X(self.X_10_sparsezero, remove_mean=False, sparse_mode='sparse', weights=self.weights)
 98 |         self._test_moments_X(self.X_100_sparsezero, remove_mean=False, sparse_mode='sparse', weights=self.weights)
 99 |         # weighted, mean-free, sparse
100 |         self._test_moments_X(self.X_10_sparsezero, remove_mean=True, sparse_mode='sparse', weights=self.weights)
101 |         self._test_moments_X(self.X_100_sparsezero, remove_mean=True, sparse_mode='sparse', weights=self.weights)
102 | 
103 |     def test_moments_X_sparseconst(self):
104 |         # simple test, sparse
105 |         self._test_moments_X(self.X_10_sparseconst, remove_mean=False, sparse_mode='sparse')
106 |         self._test_moments_X(self.X_100_sparseconst, remove_mean=False, sparse_mode='sparse')
107 |         # mean-free, sparse
108 |         self._test_moments_X(self.X_10_sparseconst, remove_mean=True, sparse_mode='sparse')
109 |         self._test_moments_X(self.X_100_sparseconst, remove_mean=True, sparse_mode='sparse')
110 |         # weighted, sparse:
111 |         self._test_moments_X(self.X_10_sparseconst, remove_mean=False, sparse_mode='dense', weights=self.weights)
112 |         self._test_moments_X(self.X_100_sparseconst, remove_mean=False, sparse_mode='dense', weights=self.weights)
113 |         # weighted, mean-free, sparse:
114 |         self._test_moments_X(self.X_10_sparseconst, remove_mean=True, sparse_mode='dense', weights=self.weights)
115 |         self._test_moments_X(self.X_100_sparseconst, remove_mean=True, sparse_mode='dense', weights=self.weights)
116 | 
117 |     def test_boolean_moments(self):
118 |         # standard tests
119 |         self._test_moments_X(self.Xb_10, remove_mean=False, sparse_mode='dense')
120 |         self._test_moments_X(self.Xb_10, remove_mean=True, sparse_mode='dense')
121 |         self._test_moments_X(self.Xb_10_sparsezero, remove_mean=False, sparse_mode='sparse')
122 |         self._test_moments_X(self.Xb_10_sparsezero, remove_mean=True, sparse_mode='sparse')
123 |         # test integer recovery
124 |         Cxx_ref = np.dot(self.Xb_10.astype(np.int64).T, self.Xb_10.astype(np.int64))  # integer
125 |         s_X_ref = np.sum(self.Xb_10, axis=0)
126 |         w, s_X, Cxx = moments.moments_XX(self.Xb_10, remove_mean=False, modify_data=False, sparse_mode='dense')
127 |         s_X = np.round(s_X).astype(np.int64)
128 |         Cxx = np.round(Cxx).astype(np.int64)
129 |         assert np.array_equal(s_X, s_X_ref)
130 |         assert np.array_equal(Cxx, Cxx_ref)
131 | 
132 | 
133 |     def _test_moments_XY(self, X, Y, symmetrize=False, remove_mean=False, sparse_mode='auto', weights=None):
134 |         w1, s_X, s_Y, C_XX, C_XY = moments.moments_XXXY(X, Y, remove_mean=remove_mean, modify_data=False,
135 |                                                        symmetrize=symmetrize, sparse_mode=sparse_mode,
136 |                                                        weights=weights)
137 |         # reference
138 |         T = X.shape[0]
139 |         if weights is not None:
140 |             X1 = weights[:, None] * X
141 |             Y1 = weights[:, None] * Y
142 |         else:
143 |             X1 = X
144 |             Y1 = Y
145 |         s_X_ref = X1.sum(axis=0)
146 |         s_Y_ref = Y1.sum(axis=0)
147 |         if symmetrize:
148 |             s_X_ref = s_X_ref + s_Y_ref
149 |             s_Y_ref = s_X_ref
150 |             if weights is not None:
151 |                 w = 2 * np.sum(weights)
152 |             else:
153 |                 w = 2 * T
154 |         else:
155 |             if weights is not None:
156 |                 w = np.sum(weights)
157 |             else:
158 |                 w = T
159 |         if remove_mean:
160 |             X = X - s_X_ref/float(w)
161 |             Y = Y - s_Y_ref/float(w)
162 |         if weights is not None:
163 |             X1 = weights[:, None] * X
164 |             Y1 = weights[:, None] * Y
165 |         else:
166 |             X1 = X
167 |             Y1 = Y
168 |         if symmetrize:
169 |             C_XX_ref = np.dot(X1.T, X) + np.dot(Y1.T, Y)
170 |             C_XY_ref = np.dot(X1.T, Y) + np.dot(Y1.T, X)
171 |         else:
172 |             C_XX_ref = np.dot(X1.T, X)
173 |             C_XY_ref = np.dot(X1.T, Y)
174 |         # test
175 |         assert np.allclose(w1, w)
176 |         assert np.allclose(s_X, s_X_ref)
177 |         assert np.allclose(s_Y, s_Y_ref)
178 |         assert np.allclose(C_XX, C_XX_ref)
179 |         assert np.allclose(C_XY, C_XY_ref)
180 | 
181 |     def test_moments_XY(self):
182 |         # simple test, dense
183 |         self._test_moments_XY(self.X_10, self.Y_10, symmetrize=False, remove_mean=False, sparse_mode='dense')
184 |         self._test_moments_XY(self.X_100, self.Y_10, symmetrize=False, remove_mean=False, sparse_mode='dense')
185 |         self._test_moments_XY(self.X_100, self.Y_100, symmetrize=False, remove_mean=False, sparse_mode='dense')
186 |         # mean-free, dense
187 |         self._test_moments_XY(self.X_10, self.Y_10, symmetrize=False, remove_mean=True, sparse_mode='dense')
188 |         self._test_moments_XY(self.X_100, self.Y_10, symmetrize=False, remove_mean=True, sparse_mode='dense')
189 |         self._test_moments_XY(self.X_100, self.Y_100, symmetrize=False, remove_mean=True, sparse_mode='dense')
190 | 
191 |     def test_moments_XY_weighted(self):
192 |         # weighted test, dense
193 |         self._test_moments_XY(self.X_10, self.X_10, symmetrize=False, remove_mean=False,
194 |                               sparse_mode='dense', weights=self.weights)
195 |         self._test_moments_XY(self.X_100, self.X_100, symmetrize=False, remove_mean=False,
196 |                               sparse_mode='dense', weights=self.weights)
197 |         # weighted test, mean-free, dense
198 |         self._test_moments_XY(self.X_10, self.X_10, symmetrize=False, remove_mean=True,
199 |                               sparse_mode='dense', weights=self.weights)
200 |         self._test_moments_XY(self.X_100, self.X_100, symmetrize=False, remove_mean=True,
201 |                               sparse_mode='dense', weights=self.weights)
202 | 
203 |     def test_moments_XY_sym(self):
204 |         # simple test, dense, symmetric
205 |         self._test_moments_XY(self.X_2, self.Y_2, symmetrize=True, remove_mean=False, sparse_mode='dense')
206 |         self._test_moments_XY(self.X_10, self.Y_10, symmetrize=True, remove_mean=False, sparse_mode='dense')
207 |         self._test_moments_XY(self.X_100, self.Y_100, symmetrize=True, remove_mean=False, sparse_mode='dense')
208 |         # mean-free, dense, symmetric
209 |         self._test_moments_XY(self.X_2, self.Y_2, symmetrize=True, remove_mean=True, sparse_mode='dense')
210 |         self._test_moments_XY(self.X_10, self.Y_10, symmetrize=True, remove_mean=True, sparse_mode='dense')
211 |         self._test_moments_XY(self.X_100, self.Y_100, symmetrize=True, remove_mean=True, sparse_mode='dense')
212 | 
213 |     def test_moments_XY_weighted_sym(self):
214 |         # simple test, dense, symmetric
215 |         self._test_moments_XY(self.X_2, self.Y_2, symmetrize=True, remove_mean=False, sparse_mode='dense',
216 |                               weights=self.weights)
217 |         self._test_moments_XY(self.X_10, self.Y_10, symmetrize=True, remove_mean=False, sparse_mode='dense'
218 |                               , weights=self.weights)
219 |         self._test_moments_XY(self.X_100, self.Y_100, symmetrize=True, remove_mean=False, sparse_mode='dense',
220 |                               weights=self.weights)
221 |         # mean-free, dense, symmetric
222 |         self._test_moments_XY(self.X_2, self.Y_2, symmetrize=True, remove_mean=True, sparse_mode='dense',
223 |                               weights=self.weights)
224 |         self._test_moments_XY(self.X_10, self.Y_10, symmetrize=True, remove_mean=True, sparse_mode='dense',
225 |                               weights=self.weights)
226 |         self._test_moments_XY(self.X_100, self.Y_100, symmetrize=True, remove_mean=True, sparse_mode='dense',
227 |                               weights=self.weights)
228 | 
229 |     def test_moments_XY_sparsezero(self):
230 |         # simple test, dense
231 |         self._test_moments_XY(self.X_10_sparsezero, self.Y_10_sparsezero, symmetrize=False, remove_mean=False,
232 |                               sparse_mode='sparse')
233 |         self._test_moments_XY(self.X_100_sparsezero, self.Y_10_sparsezero, symmetrize=False, remove_mean=False,
234 |                               sparse_mode='sparse')
235 |         self._test_moments_XY(self.X_100_sparsezero, self.Y_100_sparsezero, symmetrize=False, remove_mean=False,
236 |                               sparse_mode='sparse')
237 |         # mean-free, dense
238 |         self._test_moments_XY(self.X_10_sparsezero, self.Y_10_sparsezero, symmetrize=False, remove_mean=True,
239 |                               sparse_mode='sparse')
240 |         self._test_moments_XY(self.X_100_sparsezero, self.Y_10_sparsezero, symmetrize=False, remove_mean=True,
241 |                               sparse_mode='sparse')
242 |         self._test_moments_XY(self.X_100_sparsezero, self.Y_100_sparsezero, symmetrize=False, remove_mean=True,
243 |                               sparse_mode='dense')
244 | 
245 |     def test_moments_XY_weighted_sparsezero(self):
246 |         # weighted test, sparse
247 |         self._test_moments_XY(self.X_10_sparsezero, self.X_10_sparsezero, symmetrize=False, remove_mean=False,
248 |                               sparse_mode='sparse', weights=self.weights)
249 |         self._test_moments_XY(self.X_100_sparsezero, self.X_100_sparsezero, symmetrize=False, remove_mean=False,
250 |                               sparse_mode='sparse', weights=self.weights)
251 |         # weighted test, mean-free, sparse
252 |         self._test_moments_XY(self.X_10_sparsezero, self.X_10_sparsezero, symmetrize=False, remove_mean=True,
253 |                               sparse_mode='sparse', weights=self.weights)
254 |         self._test_moments_XY(self.X_100_sparsezero, self.X_100_sparsezero, symmetrize=False, remove_mean=True,
255 |                               sparse_mode='sparse', weights=self.weights)
256 | 
257 |     def test_moments_XY_sym_sparsezero(self):
258 |         # simple test, sparse, symmetric
259 |         self._test_moments_XY(self.X_10_sparsezero, self.Y_10_sparsezero, symmetrize=True, remove_mean=False,
260 |                               sparse_mode='sparse')
261 |         self._test_moments_XY(self.X_100_sparsezero, self.Y_100_sparsezero, symmetrize=True, remove_mean=False,
262 |                               sparse_mode='sparse')
263 |         # mean-free, sparse, symmetric
264 |         self._test_moments_XY(self.X_10_sparsezero, self.Y_10_sparsezero, symmetrize=True, remove_mean=True,
265 |                               sparse_mode='sparse')
266 |         self._test_moments_XY(self.X_100_sparsezero, self.Y_100_sparsezero, symmetrize=True, remove_mean=True,
267 |                               sparse_mode='sparse')
268 | 
269 |     def test_moments_XY_weighted_sym_sparsezero(self):
270 |         # simple test, sparse, symmetric
271 |         self._test_moments_XY(self.X_10_sparsezero, self.Y_10_sparsezero, symmetrize=True, remove_mean=False,
272 |                               sparse_mode='sparse', weights=self.weights)
273 |         self._test_moments_XY(self.X_100_sparsezero, self.Y_100_sparsezero, symmetrize=True, remove_mean=False,
274 |                               sparse_mode='sparse', weights=self.weights)
275 |         # mean-free, sparse, symmetric
276 |         self._test_moments_XY(self.X_10_sparsezero, self.Y_10_sparsezero, symmetrize=True, remove_mean=True,
277 |                               sparse_mode='sparse', weights=self.weights)
278 |         self._test_moments_XY(self.X_100_sparsezero, self.Y_100_sparsezero, symmetrize=True, remove_mean=True,
279 |                               sparse_mode='sparse', weights=self.weights)
280 | 
281 |     def test_moments_XY_sparseconst(self):
282 |         # simple test, dense
283 |         self._test_moments_XY(self.X_10_sparseconst, self.Y_10_sparseconst, symmetrize=False, remove_mean=False,
284 |                               sparse_mode='sparse')
285 |         self._test_moments_XY(self.X_100_sparseconst, self.Y_10_sparseconst, symmetrize=False, remove_mean=False,
286 |                               sparse_mode='sparse')
287 |         self._test_moments_XY(self.X_100_sparseconst, self.Y_100_sparseconst, symmetrize=False, remove_mean=False,
288 |                               sparse_mode='sparse')
289 |         # mean-free, dense
290 |         self._test_moments_XY(self.X_10_sparseconst, self.Y_10_sparseconst, symmetrize=False, remove_mean=True,
291 |                               sparse_mode='sparse')
292 |         self._test_moments_XY(self.X_100_sparseconst, self.Y_10_sparseconst, symmetrize=False, remove_mean=True,
293 |                               sparse_mode='sparse')
294 |         self._test_moments_XY(self.X_100_sparseconst, self.Y_100_sparseconst, symmetrize=False, remove_mean=True,
295 |                               sparse_mode='dense')
296 | 
297 |     def test_moments_XY_weighted_sparseconst(self):
298 |         # weighted test, sparse
299 |         self._test_moments_XY(self.X_10_sparseconst, self.X_10_sparseconst, symmetrize=False, remove_mean=False,
300 |                               sparse_mode='sparse', weights=self.weights)
301 |         self._test_moments_XY(self.X_100_sparseconst, self.X_100_sparseconst, symmetrize=False, remove_mean=False,
302 |                               sparse_mode='sparse', weights=self.weights)
303 |         # weighted test, mean-free, sparse
304 |         self._test_moments_XY(self.X_10_sparseconst, self.X_10_sparseconst, symmetrize=False, remove_mean=True,
305 |                               sparse_mode='sparse', weights=self.weights)
306 |         self._test_moments_XY(self.X_100_sparseconst, self.X_100_sparseconst, symmetrize=False, remove_mean=True,
307 |                               sparse_mode='sparse', weights=self.weights)
308 | 
309 |     def test_moments_XY_sym_sparseconst(self):
310 |         # simple test, sparse, symmetric
311 |         self._test_moments_XY(self.X_10_sparseconst, self.Y_10_sparseconst, symmetrize=True, remove_mean=False,
312 |                               sparse_mode='sparse')
313 |         self._test_moments_XY(self.X_100_sparseconst, self.Y_100_sparseconst, symmetrize=True, remove_mean=False,
314 |                               sparse_mode='sparse')
315 |         # mean-free, sparse, symmetric
316 |         self._test_moments_XY(self.X_10_sparseconst, self.Y_10_sparseconst, symmetrize=True, remove_mean=True,
317 |                               sparse_mode='sparse')
318 |         self._test_moments_XY(self.X_100_sparseconst, self.Y_100_sparseconst, symmetrize=True, remove_mean=True,
319 |                               sparse_mode='sparse')
320 | 
321 |     def test_moments_XY_weighted_sym_sparseconst(self):
322 |         # simple test, sparse, symmetric
323 |         self._test_moments_XY(self.X_10_sparseconst, self.Y_10_sparseconst, symmetrize=True, remove_mean=False,
324 |                               sparse_mode='sparse', weights=self.weights)
325 |         self._test_moments_XY(self.X_100_sparseconst, self.Y_100_sparseconst, symmetrize=True, remove_mean=False,
326 |                               sparse_mode='sparse', weights=self.weights)
327 |         # mean-free, sparse, symmetric
328 |         self._test_moments_XY(self.X_10_sparseconst, self.Y_10_sparseconst, symmetrize=True, remove_mean=True,
329 |                               sparse_mode='sparse', weights=self.weights)
330 |         self._test_moments_XY(self.X_100_sparseconst, self.Y_100_sparseconst, symmetrize=True, remove_mean=True,
331 |                               sparse_mode='sparse', weights=self.weights)
332 | 
333 | 
334 | if __name__ == "__main__":
335 |     unittest.main()


--------------------------------------------------------------------------------
/docs/Updating_Formulas.lyx:
--------------------------------------------------------------------------------
  1 | #LyX 2.1 created this file. For more info see http://www.lyx.org/
  2 | \lyxformat 474
  3 | \begin_document
  4 | \begin_header
  5 | \textclass article
  6 | \use_default_options true
  7 | \begin_modules
  8 | theorems-ams
  9 | eqs-within-sections
 10 | figs-within-sections
 11 | \end_modules
 12 | \maintain_unincluded_children false
 13 | \language english
 14 | \language_package default
 15 | \inputencoding auto
 16 | \fontencoding global
 17 | \font_roman default
 18 | \font_sans default
 19 | \font_typewriter default
 20 | \font_math auto
 21 | \font_default_family default
 22 | \use_non_tex_fonts false
 23 | \font_sc false
 24 | \font_osf false
 25 | \font_sf_scale 100
 26 | \font_tt_scale 100
 27 | \graphics default
 28 | \default_output_format default
 29 | \output_sync 0
 30 | \bibtex_command default
 31 | \index_command default
 32 | \paperfontsize default
 33 | \spacing single
 34 | \use_hyperref false
 35 | \papersize default
 36 | \use_geometry false
 37 | \use_package amsmath 1
 38 | \use_package amssymb 1
 39 | \use_package cancel 1
 40 | \use_package esint 1
 41 | \use_package mathdots 1
 42 | \use_package mathtools 1
 43 | \use_package mhchem 1
 44 | \use_package stackrel 1
 45 | \use_package stmaryrd 1
 46 | \use_package undertilde 1
 47 | \cite_engine basic
 48 | \cite_engine_type default
 49 | \biblio_style plain
 50 | \use_bibtopic false
 51 | \use_indices false
 52 | \paperorientation portrait
 53 | \suppress_date false
 54 | \justification true
 55 | \use_refstyle 1
 56 | \index Index
 57 | \shortcut idx
 58 | \color #008000
 59 | \end_index
 60 | \secnumdepth 3
 61 | \tocdepth 3
 62 | \paragraph_separation indent
 63 | \paragraph_indentation default
 64 | \quotes_language english
 65 | \papercolumns 1
 66 | \papersides 1
 67 | \paperpagestyle default
 68 | \tracking_changes false
 69 | \output_changes false
 70 | \html_math_output 0
 71 | \html_css_as_file 0
 72 | \html_be_strict false
 73 | \end_header
 74 | 
 75 | \begin_body
 76 | 
 77 | \begin_layout Title
 78 | Updating Formulas for Correlations
 79 | \end_layout
 80 | 
 81 | \begin_layout Standard
 82 | Here, we collect updating formulas for correlations between time series:
 83 | \end_layout
 84 | 
 85 | \begin_layout Section
 86 | General Time Series
 87 | \end_layout
 88 | 
 89 | \begin_layout Standard
 90 | The standard case is to compute the correlation between two time series
 91 |  
 92 | \begin_inset Formula $x_{t}(i),\, t=1,\ldots,T,\, i=1,\ldots,N$
 93 | \end_inset
 94 | 
 95 | , and 
 96 | \begin_inset Formula $y_{t}(i),\, t=1,\ldots,T,\, i=1,\ldots,N$
 97 | \end_inset
 98 | 
 99 | .
100 |  Additionally, it is possible that weights are given for each time step,
101 |  i.e.
102 |  there are non-negative number 
103 | \begin_inset Formula $w_{t},\, t=1,\ldots,T$
104 | \end_inset
105 | 
106 | .
107 |  Our goal then is to compute the (unnormalized) correlation
108 | \end_layout
109 | 
110 | \begin_layout Standard
111 | \begin_inset Formula 
112 | \begin{eqnarray*}
113 | C(i,j) & = & \sum_{t=1}^{T}w_{t}\left(x_{t}(i)-\overline{x}(i)\right)\left(y_{t}(j)-\overline{y}(j)\right),
114 | \end{eqnarray*}
115 | 
116 | \end_inset
117 | 
118 | where 
119 | \begin_inset Formula $\bar{x}(i),\,\bar{y}(j)$
120 | \end_inset
121 | 
122 |  denote the weighted mean values of the time series, i.e.
123 | \end_layout
124 | 
125 | \begin_layout Standard
126 | \begin_inset Formula 
127 | \begin{eqnarray*}
128 | \overline{x}(i) & = & \frac{1}{W_{T}}\sum_{t=1}^{T}w_{t}x_{t}(i),\\
129 | W_{T} & = & \sum_{t=1}^{T}w_{t}.
130 | \end{eqnarray*}
131 | 
132 | \end_inset
133 | 
134 | We are interested in computing the correlation 
135 | \begin_inset Formula $C(i,j)$
136 | \end_inset
137 | 
138 |  in chunks.
139 |  That means we split the data into, say, two blocks 
140 | \begin_inset Formula $x_{t}(i),\, t=1,\ldots,T_{1}$
141 | \end_inset
142 | 
143 | , and 
144 | \begin_inset Formula $x_{t}(i),\, t=T_{1}+1,\ldots,T_{2}=T,$
145 | \end_inset
146 | 
147 |  and the same for 
148 | \begin_inset Formula $y_{t}$
149 | \end_inset
150 | 
151 | .
152 |  We would then like to compute the correlation of each chunk separately,
153 |  sum them up and add a correction term.
154 |  Let us introduce the following notation
155 | \end_layout
156 | 
157 | \begin_layout Standard
158 | \begin_inset Formula 
159 | \begin{eqnarray}
160 | \overline{x_{T_{1}}}(i) & = & \frac{1}{w_{T_{1}}}\sum_{t=1}^{T_{1}}w_{t}x_{t},\label{eq:chunk_definitions_0}\\
161 | \overline{x_{T_{2}}}(i) & = & \frac{1}{W_{T_{2}}}\sum_{t=T_{1}+1}^{T_{2}}w_{t}x_{t}\label{eq:chunk_definitions_1}\\
162 | W_{T_{1}} & = & \sum_{t=1}^{T_{1}}w_{t}\label{eq:chunk_definitions_2}\\
163 | W_{T_{2}} & = & \sum_{t=T_{1}+1}^{T_{2}}w_{t}\label{eq:chunk_definitions_3}\\
164 | S_{T_{1}}(i,j) & = & \sum_{t=1}^{T_{1}}\left(x_{t}(i)-\overline{x_{T_{1}}}(i)\right)\left(y_{t}(j)-\overline{y_{T_{1}}}(j)\right)\label{eq:chunk_definitions_4}\\
165 | S_{T_{2}}(i,j) & = & \sum_{t=T_{1}+1}^{T_{2}}\left(x_{t}(i)-\overline{x_{T_{2}}}(i)\right)\left(y_{t}(j)-\overline{y_{T_{2}}}(j)\right).\label{eq:chunk_definitions_5}
166 | \end{eqnarray}
167 | 
168 | \end_inset
169 | 
170 | Now, the calculations from section 
171 | \begin_inset CommandInset ref
172 | LatexCommand ref
173 | reference "sec:Proofs"
174 | 
175 | \end_inset
176 | 
177 |  show that the full correlation 
178 | \begin_inset Formula $C(i,j)$
179 | \end_inset
180 | 
181 |  can be computed as
182 | \end_layout
183 | 
184 | \begin_layout Standard
185 | \begin_inset Formula 
186 | \begin{eqnarray}
187 | C(i,j) & = & S_{T_{1}}(i,j)+S_{T_{2}}(i,j)+\frac{W_{T_{1}}W_{T_{2}}}{W_{T}}\left(\overline{x_{T_{2}}}(i)-\overline{x_{T_{1}}}(i)\right)\left(\overline{y_{T_{2}}}(j)-\overline{y_{T_{1}}}(j)\right)\label{eq:Update_Standard}
188 | \end{eqnarray}
189 | 
190 | \end_inset
191 | 
192 | 
193 | \end_layout
194 | 
195 | \begin_layout Section
196 | Symmetrization
197 | \end_layout
198 | 
199 | \begin_layout Standard
200 | In some cases, a symmetric correlation matrix is desired, for example if
201 |  
202 | \begin_inset Formula $y_{t}$
203 | \end_inset
204 | 
205 |  is a time-lagged version of 
206 | \begin_inset Formula $x_{t}$
207 | \end_inset
208 | 
209 | .
210 |  This can be achieved by redefining the means
211 | \end_layout
212 | 
213 | \begin_layout Standard
214 | \begin_inset Formula 
215 | \begin{eqnarray*}
216 | \overline{x}(i) & = & \frac{1}{2W_{T}}\left[\sum_{t=1}^{T}w_{t}x_{t}(i)+\sum_{t=1}^{T}w_{t}y_{t}(i)\right],
217 | \end{eqnarray*}
218 | 
219 | \end_inset
220 | 
221 | and defining the symmetrized correlation by
222 | \end_layout
223 | 
224 | \begin_layout Standard
225 | \begin_inset Formula 
226 | \begin{eqnarray*}
227 | C_{s}(i,j) & = & \sum_{t=1}^{T}w_{t}\left(x_{t}(i)-\overline{x}(i)\right)\left(y_{t}(j)-\overline{x}(j)\right)\\
228 |  &  & +\sum_{t=1}^{T}w_{t}\left(y_{t}(i)-\overline{x}(i)\right)\left(x_{t}(j)-\overline{x}(j)\right).
229 | \end{eqnarray*}
230 | 
231 | \end_inset
232 | 
233 | Using the analogues of Eqs.
234 |  
235 | \begin_inset CommandInset ref
236 | LatexCommand eqref
237 | reference "eq:chunk_definitions_0"
238 | 
239 | \end_inset
240 | 
241 | -
242 | \begin_inset CommandInset ref
243 | LatexCommand eqref
244 | reference "eq:chunk_definitions_5"
245 | 
246 | \end_inset
247 | 
248 | , we arrive at the updating formula
249 | \end_layout
250 | 
251 | \begin_layout Standard
252 | \begin_inset Formula 
253 | \begin{eqnarray}
254 | C_{s}(i,j) & = & S_{T_{1}}(i,j)+S_{T_{2}}(i,j)+\frac{2W_{T_{1}}W_{T_{2}}}{W_{T}}\left(\overline{x_{T_{2}}}(i)-\overline{x_{T_{1}}}(i)\right)\left(\overline{x_{T_{2}}}(j)-\overline{x_{T_{1}}}(j)\right),\label{eq:Updata_Sym}
255 | \end{eqnarray}
256 | 
257 | \end_inset
258 | 
259 | see again section 
260 | \begin_inset CommandInset ref
261 | LatexCommand ref
262 | reference "sec:Proofs"
263 | 
264 | \end_inset
265 | 
266 | .
267 |  Please note that for time-lagged data, 
268 | \begin_inset Formula $T_{1}$
269 | \end_inset
270 | 
271 |  and 
272 | \begin_inset Formula $T_{2}$
273 | \end_inset
274 | 
275 |  must be changed to 
276 | \begin_inset Formula $T_{1}-\tau$
277 | \end_inset
278 | 
279 |  and 
280 | \begin_inset Formula $T_{2}-\tau$
281 | \end_inset
282 | 
283 | , such that the first 
284 | \begin_inset Formula $\tau$
285 | \end_inset
286 | 
287 |  steps of every chunk only appear in 
288 | \begin_inset Formula $x_{t}$
289 | \end_inset
290 | 
291 | , while the last 
292 | \begin_inset Formula $\tau$
293 | \end_inset
294 | 
295 |  steps only appear in 
296 | \begin_inset Formula $y_{t}$
297 | \end_inset
298 | 
299 | .
300 | \end_layout
301 | 
302 | \begin_layout Section
303 | Time-lagged Data without Symmetrization
304 | \end_layout
305 | 
306 | \begin_layout Standard
307 | If we assume to be given a time-series 
308 | \begin_inset Formula $\tilde{x_{t}}(i),\, t=1,\ldots,T+\tau$
309 | \end_inset
310 | 
311 | , and define the time-lagged time-series 
312 | \begin_inset Formula $x_{t}(i)=\tilde{x}_{t}(i),\, t=1,\ldots T$
313 | \end_inset
314 | 
315 |  and 
316 | \begin_inset Formula $y_{t}(i)=\tilde{x}_{t+\tau},\, t=1,\ldots T$
317 | \end_inset
318 | 
319 | .
320 |  If we do not wish to symmetrize the correlations, it seems most consistent
321 |  to use the weights of the first 
322 | \begin_inset Formula $T$
323 | \end_inset
324 | 
325 |  steps, 
326 | \begin_inset Formula $w_{t},\, t=1,\ldots,T$
327 | \end_inset
328 | 
329 | , only.
330 |  The means are thus defined by
331 | \end_layout
332 | 
333 | \begin_layout Standard
334 | \begin_inset Formula 
335 | \begin{eqnarray*}
336 | \overline{x}(i) & = & \frac{1}{W_{T}}\sum_{t=1}^{T}w_{t}x_{t}(i)\\
337 | \overline{y}(i) & = & \frac{1}{W_{T}}\sum_{t=1}^{T}w_{t}y_{t}(i)\\
338 |  & = & \frac{1}{W_{T}}\sum_{t=\tau}^{T+\tau}w_{t-\tau}\tilde{x}_{t}\\
339 | W_{T} & = & \sum_{t=1}^{T}w_{t}.
340 | \end{eqnarray*}
341 | 
342 | \end_inset
343 | 
344 | The asymmetric correlation then becomes
345 | \end_layout
346 | 
347 | \begin_layout Standard
348 | \begin_inset Formula 
349 | \begin{eqnarray*}
350 | C_{a}(i,j) & = & \sum_{t=1}^{T}w_{t}\left(x_{t}(i)-\overline{x}(i)\right)\left(y_{t}(j)-\overline{y}(j)\right).
351 | \end{eqnarray*}
352 | 
353 | \end_inset
354 | 
355 | Using the analogues of Eqs.
356 |  
357 | \begin_inset CommandInset ref
358 | LatexCommand eqref
359 | reference "eq:chunk_definitions_0"
360 | 
361 | \end_inset
362 | 
363 | -
364 | \begin_inset CommandInset ref
365 | LatexCommand eqref
366 | reference "eq:chunk_definitions_5"
367 | 
368 | \end_inset
369 | 
370 | , we find the updating formula for time-lagged data to be the same as Eq.
371 |  
372 | \begin_inset CommandInset ref
373 | LatexCommand eqref
374 | reference "eq:Update_Standard"
375 | 
376 | \end_inset
377 | 
378 | :
379 | \end_layout
380 | 
381 | \begin_layout Standard
382 | \begin_inset Formula 
383 | \begin{eqnarray}
384 | C_{a}(i,j) & = & S_{T_{1}}(i,j)+S_{T_{2}}(i,j)+\frac{W_{T_{1}}W_{T_{2}}}{W_{T}}\left(\overline{x_{T_{2}}}(i)-\overline{x_{T_{1}}}(i)\right)\left(\overline{y_{T_{2}}}(j)-\overline{y_{T_{1}}}(j)\right)\label{eq:Update_Asym}
385 | \end{eqnarray}
386 | 
387 | \end_inset
388 | 
389 | 
390 | \end_layout
391 | 
392 | \begin_layout Section
393 | Conclusions
394 | \end_layout
395 | 
396 | \begin_layout Standard
397 | We have shown that mean-free correlations can be easily computed in chunks
398 |  for arbitrary time series 
399 | \begin_inset Formula $x_{t}$
400 | \end_inset
401 | 
402 | , 
403 | \begin_inset Formula $y_{t}$
404 | \end_inset
405 | 
406 | , including time-dependent weights.
407 |  Moreover, symmetrized mean-free correlations can be computed for arbitrary
408 |  time-series, which can also be time-lagged copies.
409 |  Finally, we found that for time-lagged time series which are not supposed
410 |  to be symmetrized, it seems to make sense to compute the means using the
411 |  weights of the first 
412 | \begin_inset Formula $T$
413 | \end_inset
414 | 
415 |  steps.
416 | \end_layout
417 | 
418 | \begin_layout Section
419 | Proofs
420 | \end_layout
421 | 
422 | \begin_layout Standard
423 | \begin_inset CommandInset label
424 | LatexCommand label
425 | name "sec:Proofs"
426 | 
427 | \end_inset
428 | 
429 | 
430 | \end_layout
431 | 
432 | \begin_layout Standard
433 | First, we determine an expression for the full correlation in terms of the
434 |  partial sums 
435 | \begin_inset Formula $S_{T_{1}},\, S_{T_{2}}$
436 | \end_inset
437 | 
438 |  and a correction term for all cases considered here.
439 |  We will see then that the correction term can be expressed in the forms
440 |  given in Eqs.
441 |  
442 | \begin_inset CommandInset ref
443 | LatexCommand eqref
444 | reference "eq:Update_Standard"
445 | 
446 | \end_inset
447 | 
448 | , 
449 | \begin_inset CommandInset ref
450 | LatexCommand eqref
451 | reference "eq:Updata_Sym"
452 | 
453 | \end_inset
454 | 
455 |  and 
456 | \begin_inset CommandInset ref
457 | LatexCommand eqref
458 | reference "eq:Update_Asym"
459 | 
460 | \end_inset
461 | 
462 | .
463 |  Let us consider the standard case:
464 | \end_layout
465 | 
466 | \begin_layout Standard
467 | \begin_inset Formula 
468 | \begin{eqnarray}
469 | C(i,j) & = & \sum_{t=1}^{T}w_{t}\left(x_{t}(i)-\overline{x}(i)\right)\left(y_{t}(j)-\overline{y}(j)\right)\label{eq:Splitting_Cij_0}\\
470 |  & = & \sum_{t=1}^{T_{1}}w_{t}\left(x_{t}(i)-\overline{x}(i)\right)\left(y_{t}(j)-\overline{y}(j)\right)\nonumber \\
471 |  &  & +\sum_{t=T_{1}+1}^{T_{2}}w_{t}\left(x_{t}(i)-\overline{x}(i)\right)\left(y_{t}(j)-\overline{y}(j)\right)\label{eq:Splitting_Cij_1}\\
472 |  & = & \sum_{t=1}^{T_{1}}w_{t}\left(\left(x_{t}(i)-\overline{x_{T_{1}}}(i)\right)-\gamma_{1}^{x}(i)\right)\left(\left(y_{t}(j)-\overline{y_{T_{1}}}(j)\right)-\gamma_{1}^{y}(j)\right)\nonumber \\
473 |  &  & +\sum_{t=T_{1}+1}^{T_{2}}w_{t}\left(\left(x_{t}(i)-\overline{x_{T_{2}}}(i)\right)-\gamma_{2}^{x}(i)\right)\left(\left(y_{t}(j)-\overline{y_{T_{2}}}(j)\right)-\gamma_{2}^{y}(j)\right),\label{eq:Splitting_Cij_2}
474 | \end{eqnarray}
475 | 
476 | \end_inset
477 | 
478 | where 
479 | \begin_inset Formula $\gamma_{k}^{x}(i)=\overline{x}(i)-\overline{x_{T_{k}}}(i)$
480 | \end_inset
481 | 
482 |  and 
483 | \begin_inset Formula $\gamma_{k}^{y}(i)=\overline{y}(i)-\overline{y_{T_{k}}}(i)$
484 | \end_inset
485 | 
486 | .
487 |  We proceed to find
488 | \end_layout
489 | 
490 | \begin_layout Standard
491 | \begin_inset Formula 
492 | \begin{eqnarray}
493 | C(i,j) & = & \sum_{t=1}^{T_{1}}w_{t}\left(x_{t}(i)-\overline{x_{T_{1}}}(i)\right)\left(y_{t}(j)-\overline{y_{T_{1}}}(j)\right)-\gamma_{1}^{x}(i)\left(y_{t}(j)-\overline{y_{T_{1}}}(j)\right)\nonumber \\
494 |  &  & -\gamma_{1}^{y}(j)\left(x_{t}(i)-\overline{x_{T_{1}}}(i)\right)+\gamma_{1}^{x}(i)\gamma_{1}^{y}(j)\nonumber \\
495 |  &  & +\sum_{t=T_{1}+1}^{T_{2}}w_{t}\left(x_{t}(i)-\overline{x_{T_{2}}}(i)\right)\left(y_{t}(j)-\overline{y_{T_{2}}}(j)\right)-\gamma_{2}^{x}(i)\left(y_{t}(j)-\overline{y_{T_{2}}}(j)\right)\nonumber \\
496 |  &  & -\gamma_{2}^{y}(j)\left(x_{t}(i)-\overline{x_{T_{2}}}(i)\right)+\gamma_{2}^{x}(i)\gamma_{2}^{y}(j)\label{eq:Splitting_Cij_3}\\
497 |  & = & S_{T_{1}}(i,j)+S_{T_{2}}(i,j)+W_{T_{1}}\gamma_{1}^{x}(i)\gamma_{1}^{y}(j)+W_{T_{2}}\gamma_{2}^{x}(i)\gamma_{2}^{y}(j).\label{eq:Splitting_Cij_4}
498 | \end{eqnarray}
499 | 
500 | \end_inset
501 | 
502 | It remains to deal with the term:
503 | \end_layout
504 | 
505 | \begin_layout Standard
506 | \begin_inset Formula 
507 | \begin{eqnarray}
508 | W_{T_{1}}\gamma_{1}^{x}(i)\gamma_{1}^{y}(j)+W_{T_{2}}\gamma_{2}^{x}(i)\gamma_{2}^{y}(j) & = & W_{T_{1}}\left(\overline{x}(i)\overline{y}(j)-\overline{x}(i)\overline{y_{T_{1}}}(j)-\overline{x_{T_{1}}}(i)\overline{y}(j)+\overline{x_{T_{1}}}(i)\overline{y_{T_{1}}}(j)\right)\nonumber \\
509 |  &  & +W_{T_{2}}\left(\overline{x}(i)\overline{y}(j)-\overline{x}(i)\overline{y_{T_{2}}}(j)-\overline{x_{T_{2}}}(i)\overline{y}(j)+\overline{x_{T_{2}}}(i)\overline{y_{T_{2}}}(j)\right)\label{eq:Gamma_Reformulation_0}\\
510 |  & = & \left(W_{T_{1}}+W_{T_{2}}\right)\overline{x}(i)\overline{y}(j)+W_{T_{1}}\overline{x_{T_{1}}}(i)\overline{y_{T_{1}}}(j)\nonumber \\
511 |  &  & +W_{T_{2}}\overline{x_{T_{2}}}(i)\overline{y_{T_{2}}}(j)-\overline{x}(i)\left(W_{T_{1}}\overline{y_{T_{1}}}(j)+W_{T_{2}}\overline{y_{T_{2}}}(j)\right)\nonumber \\
512 |  &  & -\overline{y}(j)\left(W_{T_{1}}\overline{x_{T_{1}}}(i)+W_{T_{2}}\overline{x_{T_{2}}}(i)\right).\label{eq:Gamma_Reformulation_1}
513 | \end{eqnarray}
514 | 
515 | \end_inset
516 | 
517 | Now, we use that 
518 | \begin_inset Formula $W_{T_{1}}\overline{x_{T_{1}}}(i)+W_{T_{2}}\overline{x_{T_{2}}}(i)=W_{T}\overline{x}(i)$
519 | \end_inset
520 | 
521 |  to find:
522 | \end_layout
523 | 
524 | \begin_layout Standard
525 | \begin_inset Formula 
526 | \begin{eqnarray}
527 |  & = & W_{T_{1}}\overline{x_{T_{1}}}(i)\overline{y_{T_{1}}}(j)++W_{T_{2}}\overline{x_{T_{2}}}(i)\overline{y_{T_{2}}}(j)\nonumber \\
528 |  &  & -\overline{x}(i)\left(W_{T_{1}}\overline{y_{T_{1}}}(j)+W_{T_{2}}\overline{y_{T_{2}}}(j)\right)\label{eq:Gamma_Reformulation_2}\\
529 |  & = & \frac{1}{W_{T}}\left[W_{T}\left(W_{T_{1}}\overline{x_{T_{1}}}(i)\overline{y_{T_{1}}}(j)++W_{T_{2}}\overline{x_{T_{2}}}(i)\overline{y_{T_{2}}}(j)\right)\right]\nonumber \\
530 |  &  & -\frac{1}{W_{T}}\left[W_{T}\overline{x}(i)\left(W_{T_{1}}\overline{y_{T_{1}}}(j)+W_{T_{2}}\overline{y_{T_{2}}}(j)\right)\right]\label{eq:Gamma_Reformulation_3}\\
531 |  & = & \frac{W_{T_{1}}W_{T_{2}}}{W_{T}}\left[\overline{x_{T_{1}}}(i)\overline{y_{T_{1}}}(j)+\overline{x_{T_{2}}}(i)\overline{y_{T_{2}}}(j)-\overline{x_{T_{1}}}(i)\overline{y_{T_{2}}}(j)-\overline{x_{T_{2}}}(i)\overline{y_{T_{1}}}(j)\right].\label{eq:Gamma_Reformulation_4}
532 | \end{eqnarray}
533 | 
534 | \end_inset
535 | 
536 | This completes the proof of Eq.
537 |  
538 | \begin_inset CommandInset ref
539 | LatexCommand eqref
540 | reference "eq:Update_Standard"
541 | 
542 | \end_inset
543 | 
544 | .
545 |  For the symmetric case, the procedure from Eqs.
546 |  
547 | \begin_inset CommandInset ref
548 | LatexCommand eqref
549 | reference "eq:Splitting_Cij_0"
550 | 
551 | \end_inset
552 | 
553 | -
554 | \begin_inset CommandInset ref
555 | LatexCommand eqref
556 | reference "eq:Splitting_Cij_4"
557 | 
558 | \end_inset
559 | 
560 |  can be repeated to come up with the expression
561 | \end_layout
562 | 
563 | \begin_layout Standard
564 | \begin_inset Formula 
565 | \begin{eqnarray*}
566 | C_{s}(i,j) & = & S_{T_{1}}(i,j)+S_{T_{2}}(i,j)+W_{T_{1}}\left(\gamma_{1}(i)\gamma_{1}(j)+\gamma_{1}(j)\gamma_{1}(i)\right)\\
567 |  &  & +W_{T_{2}}\left(\gamma_{2}(i)\gamma_{2}(j)+\gamma_{2}(j)\gamma_{2}(i)\right),
568 | \end{eqnarray*}
569 | 
570 | \end_inset
571 | 
572 | where 
573 | \begin_inset Formula $\gamma_{k}(i)=\overline{x}(i)-\overline{x_{T_{k}}}(i)$
574 | \end_inset
575 | 
576 | .
577 |  Then, the steps of Eqs.
578 |  
579 | \begin_inset CommandInset ref
580 | LatexCommand eqref
581 | reference "eq:Gamma_Reformulation_0"
582 | 
583 | \end_inset
584 | 
585 | -
586 | \begin_inset CommandInset ref
587 | LatexCommand eqref
588 | reference "eq:Gamma_Reformulation_3"
589 | 
590 | \end_inset
591 | 
592 |  can be repeated in the same way.
593 |  For the asymmetric case, Eqs.
594 |  
595 | \begin_inset CommandInset ref
596 | LatexCommand eqref
597 | reference "eq:Splitting_Cij_0"
598 | 
599 | \end_inset
600 | 
601 | -
602 | \begin_inset CommandInset ref
603 | LatexCommand eqref
604 | reference "eq:Splitting_Cij_4"
605 | 
606 | \end_inset
607 | 
608 |  yield the expression
609 | \end_layout
610 | 
611 | \begin_layout Standard
612 | \begin_inset Formula 
613 | \begin{eqnarray*}
614 | C_{a}(i,j) & = & S_{T_{!}}(i,j)+S_{T_{2}}(i,j)+W_{T_{1}}\gamma_{1}^{x}(i)\gamma_{1}^{y}(j)+W_{T_{2}}\gamma_{2}^{x}(i)\gamma_{2}^{y}(j)\\
615 |  &  & -\gamma_{1}^{x}(i)\sum_{t=1}^{T_{1}}w_{t}\left(y_{t}(j)-\overline{y_{T_{1}}}(j)\right)-\gamma_{1}^{y}(j)\sum_{t=1}^{T_{1}}w_{t}\left(x_{t}(i)-\overline{x_{T_{1}}}(i)\right)\\
616 |  &  & -\gamma_{2}^{x}(i)\sum_{t=T_{1}+1}^{T_{2}}w_{t}\left(y_{t}(j)-\overline{y_{T_{2}}}(j)\right)-\gamma_{2}^{y}(j)\sum_{t=T_{1}+1}^{T_{2}}w_{t}\left(x_{t}(i)-\overline{x_{T_{2}}}(i)\right).
617 | \end{eqnarray*}
618 | 
619 | \end_inset
620 | 
621 | Here, we have used 
622 | \begin_inset Formula $\gamma_{k}^{x}(i)=\overline{x}(i)-\overline{x_{T_{k}}}(i),\,\gamma_{k}^{y}(i)=\overline{y}(i)-\overline{y_{T_{k}}}(i)$
623 | \end_inset
624 | 
625 | .
626 |  The cross-terms cancel out and the expression 
627 | \begin_inset Formula $W_{T_{1}}\gamma_{1}^{x}(i)\gamma_{1}^{y}(j)+W_{T_{2}}\gamma_{2}^{x}(i)\gamma_{2}^{y}(j)$
628 | \end_inset
629 | 
630 |  can be reformulated through Eqs.
631 |  
632 | \begin_inset CommandInset ref
633 | LatexCommand eqref
634 | reference "eq:Gamma_Reformulation_0"
635 | 
636 | \end_inset
637 | 
638 | -
639 | \begin_inset CommandInset ref
640 | LatexCommand eqref
641 | reference "eq:Gamma_Reformulation_4"
642 | 
643 | \end_inset
644 | 
645 |  to end up with Eq.
646 |  
647 | \begin_inset CommandInset ref
648 | LatexCommand eqref
649 | reference "eq:Update_Asym"
650 | 
651 | \end_inset
652 | 
653 | .
654 | \end_layout
655 | 
656 | \end_body
657 | \end_document
658 | 


--------------------------------------------------------------------------------
/variational/estimators/moments.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 
  3 | Data Types
  4 | ----------
  5 | The standard data type for covariance computations is
  6 | float64, because the double precision (but not single precision) is
  7 | usually sufficient to compute the long sums involved in covariance
  8 | matrix computations. Integer types are avoided even if the data is integer,
  9 | because the BLAS matrix multiplication is very fast with floats, but very
 10 | slow with integers. If X is of boolean type (0/1), the standard data type
 11 | is float32, because this will be sufficient to represent numbers up to 2^23
 12 | without rounding error, which is usually sufficient sufficient as the
 13 | largest element in np.dot(X.T, X) can then be T, the number of data points.
 14 | 
 15 | Efficient Use
 16 | -------------
 17 | In order to get speedup with boolean input, remove_mean=False is required.
 18 | Note that you can still do TICA that way.
 19 | 
 20 | Sparsification
 21 | --------------
 22 | We aim at computing covariance matrices. For large (T x N) data matrices X, Y,
 23 | the bottleneck of this operation is computing the matrix product np.dot(X.T, X),
 24 | or np.dot(X.T, Y), with algorithmic complexity O(N^2 T). If X, Y have zero or
 25 | constant columns, we can reduce N and thus reduce the algorithmic complexity.
 26 | 
 27 | However, the BLAS matrix product used by np.dot() is highly Cache optimized -
 28 | the data is accessed in a way that most operations are done in cache, making the
 29 | calculation extremely efficient. Thus, even if X, Y have zero or constant columns,
 30 | it does not always pay off to interfere with this operation - one one hand by
 31 | spending compute time to determine the sparsity of the matrices, one the other
 32 | hand by using slicing operations that reduce the algorithmic complexity, but may
 33 | destroy the order of the data and thus produce more cache failures.
 34 | 
 35 | In order to make an informed decision, we have compared the runtime of the following
 36 | operations using matrices of various different sizes (T x N) and different degrees
 37 | of sparsity. (using an Intel Core i7 with OS/X 10.10.1):
 38 | 
 39 |     1. Compute np.dot(X.T, X)
 40 |     2. Compute np.dot(X[:, sel].T, X[:, sel]) where sel selects the nonzero columns
 41 |     3. Make a copy X0 = X[:, sel].copy() and then compute np.dot(X0.T, X0)
 42 | 
 43 | It may seem that step 3 is not a good idea because we make the extra effort of
 44 | copying the matrix. However, the new copy will have data ordered sequentially in
 45 | memory, and therefore better prepared for the algorithmically more expensive but
 46 | cache-optimized matrix product.
 47 | 
 48 | We have empirically found that:
 49 | 
 50 |     * Making a copy before running np.dot (option 3) is in most cases better than
 51 |       using the dot product on sliced arrays (option 2). Exceptions are when the
 52 |       data is extremely sparse, such that only a few columns are selected.
 53 |     * Copying and subselecting columns (option 3) is only faster than the full
 54 |       dot product (option 1), if 50% or less columns are selected. This observation
 55 |       is roughly independent of N.
 56 |     * The observations above are valid for  matrices (T x N) that are sufficiently
 57 |       large. We assume that "sufficiently large" means that they don't fully fit
 58 |       in the cache. For small matrices, the trends are less clear and different
 59 |       rules may apply.
 60 | 
 61 | In order to optimize covariance calculation for large matrices, we therefore
 62 | take the following actions:
 63 | 
 64 |     1. Given matrix size of X (and Y), determine the minimum number of columns
 65 |        that need to be constant in order to use sparse computation.
 66 |     2. Efficiently determine sparsity of X (and Y). Give up as soon as the
 67 |        number of constant column candidates drops below the minimum number, to
 68 |        avoid wasting time on the decision.
 69 |     3. Subselect the desired columns and copy the data to a new array X0 (Y0).
 70 |     4. Run operation on the new array X0 (Y0), including in-place substraction
 71 |        of the mean if needed.
 72 | 
 73 | """
 74 | from __future__ import absolute_import
 75 | 
 76 | __author__ = 'noe'
 77 | 
 78 | import math, sys, numbers, warnings
 79 | import numpy as np
 80 | from .covar_c import covartools
 81 | 
 82 | 
 83 | def _is_zero(x):
 84 |     """ Returns True if x is numerically 0 or an array with 0's. """
 85 |     if x is None:
 86 |         return True
 87 |     if isinstance(x, numbers.Number):
 88 |         return x == 0.0
 89 |     if isinstance(x, np.ndarray):
 90 |         return np.all(x == 0)
 91 |     return False
 92 | 
 93 | 
 94 | def _sparsify(X, remove_mean=False, modify_data=False, sparse_mode='auto', sparse_tol=0.0):
 95 |     """ Determines the sparsity of X and returns a selected sub-matrix
 96 | 
 97 |     Only conducts sparsification if the number of constant columns is at least
 98 |     max(a N - b, min_const_col_number),
 99 | 
100 |     Parameters
101 |     ----------
102 |     X : ndarray
103 |         data matrix
104 |     remove_mean : bool
105 |         True: remove column mean from the data, False: don't remove mean.
106 |     modify_data : bool
107 |         If remove_mean=True, the mean will be removed in the data matrix X,
108 |         without creating an independent copy. This option is faster but might
109 |         lead to surprises because your input array is changed.
110 |     sparse_mode : str
111 |         one of:
112 |             * 'dense' : always use dense mode
113 |             * 'sparse' : always use sparse mode if possible
114 |             * 'auto' : automatic
115 | 
116 |     Returns
117 |     -------
118 |     X0 : ndarray (view of X)
119 |         Either X itself (if not sufficiently sparse), or a sliced view of X,
120 |         containing only the variable columns
121 |     mask : ndarray(N, dtype=bool) or None
122 |         Bool selection array that indicates which columns of X were selected for
123 |         X0, i.e. X0 = X[:, mask]. mask is None if no sparse selection was made.
124 |     xconst : ndarray(N)
125 |         Constant column values that are outside the sparse selection, i.e.
126 |         X[i, ~mask] = xconst for any row i. xconst=0 if no sparse selection was made.
127 | 
128 |     """
129 |     if sparse_mode.lower() == 'sparse':
130 |         min_const_col_number = 0  # enforce sparsity. A single constant column will lead to sparse treatment
131 |     elif sparse_mode.lower() == 'dense':
132 |         min_const_col_number = X.shape[1] + 1  # never use sparsity
133 |     else:
134 |         if remove_mean and not modify_data:  # in this case we have to copy the data anyway, and can be permissive
135 |             min_const_col_number = max(0.1 * X.shape[1], 50)
136 |         else:
137 |             # This is a rough heuristic to choose a minimum column number for which sparsity may pay off.
138 |             # This heuristic is good for large number of samples, i.e. it may be inadequate for small matrices X.
139 |             if X.shape[1] < 250:
140 |                 min_const_col_number = X.shape[1] - 0.25 * X.shape[1]
141 |             elif X.shape[1] < 1000:
142 |                 min_const_col_number = X.shape[1] - (0.5 * X.shape[1] - 100)
143 |             else:
144 |                 min_const_col_number = X.shape[1] - (0.8 * X.shape[1] - 400)
145 | 
146 |     if X.shape[1] > min_const_col_number:
147 |         mask = covartools.variable_cols(X, tol=sparse_tol, min_constant=min_const_col_number)  # bool vector
148 |         nconst = len(np.where(~mask)[0])
149 |         if nconst > min_const_col_number:
150 |             xconst = X[0, ~mask]
151 |             X = X[:, mask]  # sparsify
152 |         else:
153 |             xconst = None
154 |             mask = None
155 |     else:
156 |         xconst = None
157 |         mask = None
158 | 
159 |     return X, mask, xconst  # None, 0 if not sparse
160 | 
161 | 
162 | def _sparsify_pair(X, Y, remove_mean=False, modify_data=False, symmetrize=False, sparse_mode='auto', sparse_tol=0.0):
163 |     """
164 |     """
165 |     T = X.shape[0]
166 |     N = math.sqrt(X.shape[1] * Y.shape[1])
167 |     # check each data set separately for sparsity.
168 |     X0, mask_X, xconst = _sparsify(X, sparse_mode=sparse_mode, sparse_tol=sparse_tol)
169 |     Y0, mask_Y, yconst = _sparsify(Y, sparse_mode=sparse_mode, sparse_tol=sparse_tol)
170 |     # if we have nonzero constant columns and the number of samples is too small, do not treat as
171 |     # sparse, because then the const-specialized dot product function doesn't pay off.
172 |     is_const = not (_is_zero(xconst) and _is_zero(yconst))
173 |     if is_const and (symmetrize or not remove_mean) and 10*T < N:
174 |         return X, None, None, Y, None, None
175 |     else:
176 |         return X0, mask_X, xconst, Y0, mask_Y, yconst
177 | 
178 | 
179 | def _copy_convert(X, const=None, remove_mean=False, copy=True):
180 |     """ Makes a copy or converts the data type if needed
181 | 
182 |     Copies the data and converts the data type if unsuitable for covariance
183 |     calculation. The standard data type for covariance computations is
184 |     float64, because the double precision (but not single precision) is
185 |     usually sufficient to compute the long sums involved in covariance
186 |     matrix computations. Integer types are avoided even if the data is integer,
187 |     because the BLAS matrix multiplication is very fast with floats, but very
188 |     slow with integers. If X is of boolean type (0/1), the standard data type
189 |     is float32, because this will be sufficient to represent numbers up to 2^23
190 |     without rounding error, which is usually sufficient sufficient as the
191 |     largest element in np.dot(X.T, X) can then be T, the number of data points.
192 | 
193 |     Parameters
194 |     ----------
195 |     remove_mean : bool
196 |         If True, will enforce float64 even if the input is boolean
197 |     copy : bool
198 |         If True, enforces a copy even if the data type doesn't require it.
199 | 
200 |     Return
201 |     ------
202 |     X : ndarray
203 |         copy or reference to X if no copy was needed.
204 |     const : ndarray or None
205 |         copy or reference to const if no copy was needed.
206 | 
207 |     """
208 |     # determine type
209 |     dtype = np.float64  # default: convert to float64 in order to avoid cancellation errors
210 |     if X.dtype.kind == 'b' and X.shape[0] < 2**23 and not remove_mean:
211 |         dtype = np.float32  # convert to float32 if we can represent all numbers
212 |     # copy/convert if needed
213 |     if X.dtype not in (np.float64, dtype):  # leave as float64 (conversion is expensive), otherwise convert to dtype
214 |         X = X.astype(dtype, order='C')
215 |         if const is not None:
216 |             const = const.astype(dtype, order='C')
217 |     elif copy:
218 |         X = X.copy(order='C')
219 |         if const is not None:
220 |             const = const.copy(order='C')
221 | 
222 |     return X, const
223 | 
224 | 
225 | def _sum_sparse(xsum, mask_X, xconst, T):
226 |     s = np.zeros(len(mask_X))
227 |     s[mask_X] = xsum
228 |     s[~mask_X] = T * xconst
229 |     return s
230 | 
231 | 
232 | def _sum(X, xmask=None, xconst=None, Y=None, ymask=None, yconst=None, symmetric=False, remove_mean=False,
233 |          weights=None):
234 |     """ Computes the column sums and centered column sums.
235 | 
236 |     If symmetric = False, the sums will be determined as
237 |     .. math:
238 |         sx &=& \frac{1}{2} \sum_t x_t
239 |         sy &=& \frac{1}{2} \sum_t y_t
240 | 
241 |     If symmetric, the sums will be determined as
242 | 
243 |     .. math:
244 |         sx = sy = \frac{1}{2T} \sum_t x_t + y_t
245 | 
246 |     Returns
247 |     -------
248 |     w : float
249 |         statistical weight of sx, sy
250 |     sx : ndarray
251 |         effective row sum of X (including symmetrization if requested)
252 |     sx_raw_centered : ndarray
253 |         centered raw row sum of X
254 | 
255 |     optional returns (only if Y is given):
256 | 
257 |     sy : ndarray
258 |         effective row sum of X (including symmetrization if requested)
259 |     sy_raw_centered : ndarray
260 |         centered raw row sum of Y
261 | 
262 |     """
263 |     T = X.shape[0]
264 |     # Check if weights are given:
265 |     if weights is not None:
266 |         X = weights[:, None] * X
267 |         if Y is not None:
268 |             Y = weights[:, None] * Y
269 |     # compute raw sums on variable data
270 |     sx_raw = X.sum(axis=0)  # this is the mean before subtracting it.
271 |     sy_raw = 0
272 |     if Y is not None:
273 |         sy_raw = Y.sum(axis=0)
274 | 
275 |     # expand raw sums to full data
276 |     if xmask is not None:
277 |         if weights is not None:
278 |             sx_raw = _sum_sparse(sx_raw, xmask, xconst, weights.sum())
279 |         else:
280 |             sx_raw = _sum_sparse(sx_raw, xmask, xconst, T)
281 |     if ymask is not None:
282 |         if weights is not None:
283 |             sy_raw = _sum_sparse(sy_raw, ymask, yconst, weights.sum())
284 |         else:
285 |             sy_raw = _sum_sparse(sy_raw, ymask, yconst, T)
286 | 
287 |     # compute effective sums and centered sums
288 |     if Y is not None and symmetric:
289 |         sx = sx_raw + sy_raw
290 |         sy = sx
291 |         if weights is not None:
292 |             w = 2*np.sum(weights)
293 |         else:
294 |             w = 2 * T
295 |     else:
296 |         sx = sx_raw
297 |         sy = sy_raw
298 |         if weights is not None:
299 |             w = np.sum(weights)
300 |         else:
301 |             w = T
302 | 
303 |     sx_raw_centered = sx_raw.copy()
304 |     if Y is not None:
305 |         sy_raw_centered = sy_raw.copy()
306 | 
307 |     # center mean.
308 |     if remove_mean:
309 |         if Y is not None and symmetric:
310 |             sx_raw_centered -= 0.5 * sx
311 |             sy_raw_centered -= 0.5 * sy
312 |         else:
313 |             sx_raw_centered = np.zeros(sx.size)
314 |             if Y is not None:
315 |                 sy_raw_centered = np.zeros(sy.size)
316 | 
317 |     # return
318 |     if Y is not None:
319 |         return w, sx, sx_raw_centered, sy, sy_raw_centered
320 |     else:
321 |         return w, sx, sx_raw_centered
322 | 
323 | 
324 | def _center(X, w, s, mask=None, const=None, inplace=True):
325 |     """ Centers the data.
326 | 
327 |     Parameters
328 |     ----------
329 |     w : float
330 |         statistical weight of s
331 |     inplace : bool
332 |         center in place
333 | 
334 |     Returns
335 |     -------
336 |     sx : ndarray
337 |         uncentered row sum of X
338 |     sx_centered : ndarray
339 |         row sum of X after centering
340 | 
341 |     optional returns (only if Y is given):
342 | 
343 |     sy_raw : ndarray
344 |         uncentered row sum of Y
345 |     sy_centered : ndarray
346 |         row sum of Y after centering
347 | 
348 |     """
349 |     xmean = s / float(w)
350 |     if mask is None:
351 |         X = covartools.subtract_row(X, xmean, inplace=inplace)
352 |     else:
353 |         X = covartools.subtract_row(X, xmean[mask], inplace=inplace)
354 |         if inplace:
355 |             const = np.subtract(const, xmean[~mask], const)
356 |         else:
357 |             const = np.subtract(const, xmean[~mask])
358 | 
359 |     return X, const
360 | 
361 | 
362 | # ====================================================================================
363 | # SECOND MOMENT MATRICES / COVARIANCES
364 | # ====================================================================================
365 | 
366 | def _M2_dense(X, Y, weights=None):
367 |     """ 2nd moment matrix using dense matrix computations.
368 | 
369 |     This function is encapsulated such that we can make easy modifications of the basic algorithms
370 | 
371 |     """
372 |     if weights is not None:
373 |         return np.dot((weights[:, None] * X).T, Y)
374 |     else:
375 |         return np.dot(X.T, Y)
376 | 
377 | 
378 | def _M2_const(Xvar, mask_X, xvarsum, xconst, Yvar, mask_Y, yvarsum, yconst, weights=None):
379 |     """ Computes the unnormalized covariance matrix between X and Y, exploiting constant input columns
380 | 
381 |     Computes the unnormalized covariance matrix :math:`C = X^\top Y`
382 |     (for symmetric=False) or :math:`C = \frac{1}{2} (X^\top Y + Y^\top X)`
383 |     (for symmetric=True). Suppose the data matrices can be column-permuted
384 |     to have the form
385 | 
386 |     .. math:
387 |         X &=& (X_{\mathrm{var}}, X_{\mathrm{const}})
388 |         Y &=& (Y_{\mathrm{var}}, Y_{\mathrm{const}})
389 | 
390 |     with rows:
391 | 
392 |     .. math:
393 |         x_t &=& (x_{\mathrm{var},t}, x_{\mathrm{const}})
394 |         y_t &=& (y_{\mathrm{var},t}, y_{\mathrm{const}})
395 | 
396 |     where :math:`x_{\mathrm{const}},\:y_{\mathrm{const}}` are constant vectors.
397 |     The resulting matrix has the general form:
398 | 
399 |     .. math:
400 |         C &=& [X_{\mathrm{var}}^\top Y_{\mathrm{var}}  x_{sum} y_{\mathrm{const}}^\top ]
401 |           & & [x_{\mathrm{const}}^\top y_{sum}^\top    x_{sum} x_{sum}^\top            ]
402 | 
403 |     where :math:`x_{sum} = \sum_t x_{\mathrm{var},t}` and
404 |     :math:`y_{sum} = \sum_t y_{\mathrm{var},t}`.
405 | 
406 |     Parameters
407 |     ----------
408 |     Xvar : ndarray (T, m)
409 |         Part of the data matrix X with :math:`m \le M` variable columns.
410 |     mask_X : ndarray (M)
411 |         Boolean array of size M of the full columns. False for constant column,
412 |         True for variable column in X.
413 |     xvarsum : ndarray (m)
414 |         Column sum of variable part of data matrix X
415 |     xconst : ndarray (M-m)
416 |         Values of the constant part of data matrix X
417 |     Yvar : ndarray (T, n)
418 |         Part of the data matrix Y with :math:`n \le N` variable columns.
419 |     mask_Y : ndarray (N)
420 |         Boolean array of size N of the full columns. False for constant column,
421 |         True for variable column in Y.
422 |     yvarsum : ndarray (n)
423 |         Column sum of variable part of data matrix Y
424 |     yconst : ndarray (N-n)
425 |         Values of the constant part of data matrix Y
426 |     weights : None or ndarray (N)
427 |         weights for all time steps.
428 | 
429 |     Returns
430 |     -------
431 |     C : ndarray (M, N)
432 |         Unnormalized covariance matrix.
433 | 
434 |     """
435 |     C = np.zeros((len(mask_X), len(mask_Y)))
436 |     # Block 11
437 |     C[np.ix_(mask_X, mask_Y)] = _M2_dense(Xvar, Yvar, weights=weights)
438 |     # other blocks
439 |     xsum_is_0 = _is_zero(xvarsum)
440 |     ysum_is_0 = _is_zero(yvarsum)
441 |     xconst_is_0 = _is_zero(xconst)
442 |     yconst_is_0 = _is_zero(yconst)
443 |     # TODO: maybe we don't need the checking here, if we do the decision in the higher-level function M2
444 |     # TODO: if not zero, we could still exploit the zeros in const and compute (and write!) this outer product
445 |     # TODO: only to a sub-matrix
446 |     # Block 12 and 21
447 |     if weights is not None:
448 |         wsum = np.sum(weights)
449 |         xvarsum = np.sum(weights[:, None] * Xvar, axis=0)
450 |         yvarsum = np.sum(weights[:, None] * Yvar, axis=0)
451 |     else:
452 |         wsum = Xvar.shape[0]
453 |     if not (xsum_is_0 or yconst_is_0) or not (ysum_is_0 or xconst_is_0):
454 |         C[np.ix_(mask_X, ~mask_Y)] = np.outer(xvarsum, yconst)
455 |         C[np.ix_(~mask_X, mask_Y)] = np.outer(xconst, yvarsum)
456 |     # Block 22
457 |     if not (xconst_is_0 or yconst_is_0):
458 |         C[np.ix_(~mask_X, ~mask_Y)] = np.outer(wsum*xconst, yconst)
459 |     return C
460 | 
461 | 
462 | def _M2_sparse(Xvar, mask_X, Yvar, mask_Y, weights=None):
463 |     """ 2nd moment matrix exploiting zero input columns """
464 |     C = np.zeros((len(mask_X), len(mask_Y)))
465 |     C[np.ix_(mask_X, mask_Y)] = _M2_dense(Xvar, Yvar, weights=weights)
466 |     return C
467 | 
468 | 
469 | def _M2_sparse_sym(Xvar, mask_X, Yvar, mask_Y, weights=None):
470 |     """ 2nd self-symmetric moment matrix exploiting zero input columns
471 | 
472 |     Computes X'X + Y'Y and X'Y + Y'X
473 | 
474 |     """
475 |     assert len(mask_X) == len(mask_Y), 'X and Y need to have equal sizes for symmetrization'
476 | 
477 |     Cxxyy = np.zeros((len(mask_X), len(mask_Y)))
478 |     Cxxyy[np.ix_(mask_X, mask_X)] = _M2_dense(Xvar, Xvar, weights=weights)
479 |     Cxxyy[np.ix_(mask_Y, mask_Y)] += _M2_dense(Yvar, Yvar, weights=weights)
480 | 
481 |     Cxyyx = np.zeros((len(mask_X), len(mask_Y)))
482 |     Cxy = _M2_dense(Xvar, Yvar, weights=weights)
483 |     Cyx = _M2_dense(Yvar, Xvar, weights=weights)
484 |     Cxyyx[np.ix_(mask_X, mask_Y)] = Cxy
485 |     Cxyyx[np.ix_(mask_Y, mask_X)] += Cyx
486 | 
487 |     return Cxxyy, Cxyyx
488 | 
489 | 
490 | def _M2(Xvar, Yvar, mask_X=None, mask_Y=None, xsum=0, xconst=0, ysum=0, yconst=0, weights=None):
491 |     """ direct (nonsymmetric) second moment matrix. Decide if we need dense, sparse, const"""
492 |     if mask_X is None and mask_Y is None:
493 |         return _M2_dense(Xvar, Yvar, weights=weights)
494 |     else:
495 |         # Check if one of the masks is not None, modify it and also adjust the constant columns:
496 |         if mask_X is None:
497 |             mask_X = np.ones(Xvar.shape[1], dtype=np.bool)
498 |             xconst = np.ones(0, dtype=float)
499 |         if mask_Y is None:
500 |             mask_Y = np.ones(Yvar.shape[1], dtype=np.bool)
501 |             yconst = np.ones(0, dtype=float)
502 |     if _is_zero(xsum) and _is_zero(ysum) or _is_zero(xconst) and _is_zero(yconst):
503 |         return _M2_sparse(Xvar, mask_X, Yvar, mask_Y, weights=weights)
504 |     else:
505 |         return _M2_const(Xvar, mask_X, xsum[mask_X], xconst, Yvar, mask_Y, ysum[mask_Y], yconst, weights=weights)
506 | 
507 | 
508 | def _M2_symmetric(Xvar, Yvar, mask_X=None, mask_Y=None, xsum=0, xconst=0, ysum=0, yconst=0, weights=None):
509 |     """ symmetric second moment matrices. Decide if we need dense, sparse, const"""
510 |     if mask_X is None and mask_Y is None:
511 |         Cxxyy = _M2_dense(Xvar, Xvar, weights=weights) + _M2_dense(Yvar, Yvar, weights=weights)
512 |         Cxy = _M2_dense(Xvar, Yvar, weights=weights)
513 |         Cyx = _M2_dense(Yvar, Xvar, weights=weights)
514 |         Cxyyx = Cxy + Cyx
515 |     else:
516 |         # Check if one of the masks is not None, modify it and also adjust the constant columns:
517 |         if mask_X is None:
518 |             mask_X = np.ones(Xvar.shape[1], dtype=np.bool)
519 |             xconst = np.ones(0, dtype=float)
520 |         if mask_Y is None:
521 |             mask_Y = np.ones(Yvar.shape[1], dtype=np.bool)
522 |             yconst = np.ones(0, dtype=float)
523 |         if _is_zero(xsum) and _is_zero(ysum) or _is_zero(xconst) and _is_zero(yconst):
524 |             Cxxyy, Cxyyx = _M2_sparse_sym(Xvar, mask_X, Yvar, mask_Y, weights=weights)
525 |         else:
526 |             xvarsum = xsum[mask_X]  # to variable part
527 |             yvarsum = ysum[mask_Y]  # to variable part
528 |             Cxxyy = _M2_const(Xvar, mask_X, xvarsum, xconst, Xvar, mask_X, xvarsum, xconst, weights=weights) \
529 |                     + _M2_const(Yvar, mask_Y, yvarsum, yconst, Yvar, mask_Y, yvarsum, yconst, weights=weights)
530 |             Cxy = _M2_const(Xvar, mask_X, xvarsum, xconst, Yvar, mask_Y, yvarsum, yconst, weights=weights)
531 |             Cyx = _M2_const(Yvar, mask_Y, yvarsum, yconst, Xvar, mask_X, xvarsum, xconst, weights=weights)
532 |             Cxyyx = Cxy + Cyx
533 |     return Cxxyy, Cxyyx
534 | 
535 | 
536 | # =================================================
537 | # USER API
538 | # =================================================
539 | 
540 | 
541 | def moments_XX(X, remove_mean=False, modify_data=False, weights=None, sparse_mode='auto', sparse_tol=0.0):
542 |     """ Computes the first two unnormalized moments of X
543 | 
544 |     Computes :math:`s = \sum_t x_t` and :math:`C = X^\top X` while exploiting
545 |     zero or constant columns in the data matrix.
546 | 
547 |     Parameters
548 |     ----------
549 |     X : ndarray (T, M)
550 |         Data matrix
551 |     remove_mean : bool
552 |         True: remove column mean from the data, False: don't remove mean.
553 |     modify_data : bool
554 |         If remove_mean=True, the mean will be removed in the data matrix X,
555 |         without creating an independent copy. This option is faster but might
556 |         lead to surprises because your input array is changed.
557 |     weights: None or ndarray(T, )
558 |         weights assigned to each trajectory point. If None, all data points have weight one.
559 |         If ndarray, each data point is assigned a separate weight.
560 |     sparse_mode : str
561 |         one of:
562 |             * 'dense' : always use dense mode
563 |             * 'sparse' : always use sparse mode if possible
564 |             * 'auto' : automatic
565 |     sparse_tol: float
566 |         Threshold for considering column to be zero in order to save computing
567 |         effort when the data is sparse or almost sparse.
568 |         If max(abs(X[:, i])) < sparse_tol, then row i (and also column i if Y
569 |         is not given) of the covariance matrix will be set to zero. If Y is
570 |         given and max(abs(Y[:, i])) < sparse_tol, then column i of the
571 |         covariance matrix will be set to zero.
572 | 
573 |     Returns
574 |     -------
575 |     w : float
576 |         statistical weight
577 |     s : ndarray (M)
578 |         sum
579 |     C : ndarray (M, M)
580 |         unnormalized covariance matrix
581 | 
582 |     """
583 |     # Check consistency of inputs:
584 |     if weights is not None:
585 |         assert X.shape[0] == weights.shape[0], 'X and weights_x must have equal length'
586 |     # sparsify
587 |     X0, mask_X, xconst = _sparsify(X, remove_mean=remove_mean, modify_data=modify_data,
588 |                                    sparse_mode=sparse_mode, sparse_tol=sparse_tol)
589 |     is_sparse = mask_X is not None
590 |     # copy / convert
591 |     # TODO: do we need to copy xconst?
592 |     X0, xconst = _copy_convert(X0, const=xconst, remove_mean=remove_mean,
593 |                                copy=is_sparse or (remove_mean and not modify_data))
594 |     # sum / center
595 |     w, sx, sx0_centered = _sum(X0, xmask=mask_X, xconst=xconst, symmetric=False, remove_mean=remove_mean,
596 |                                weights=weights)
597 |     if remove_mean:
598 |         _center(X0, w, sx, mask=mask_X, const=xconst, inplace=True)  # fast in-place centering
599 |     # TODO: we could make a second const check here. If after summation not enough zeros have appeared in the
600 |     # TODO: consts, we switch back to dense treatment here.
601 |     # compute covariance matrix
602 |     C = _M2(X0, X0, mask_X=mask_X, mask_Y=mask_X, xsum=sx0_centered, xconst=xconst, ysum=sx0_centered, yconst=xconst,
603 |             weights=weights)
604 |     return w, sx, C
605 | 
606 | 
607 | def moments_XXXY(X, Y, remove_mean=False, symmetrize=False, weights=None,
608 |                  modify_data=False, sparse_mode='auto', sparse_tol=0.0):
609 |     """ Computes the first two unnormalized moments of X and Y
610 | 
611 |     If symmetrize is False, computes
612 | 
613 |     .. math:
614 |         s_x  &=& \sum_t x_t
615 |         s_y  &=& \sum_t y_t
616 |         C_XX &=& X^\top X
617 |         C_XY &=& X^\top Y
618 | 
619 |     If symmetrize is True, computes
620 | 
621 |     .. math:
622 |         s_x = s_y &=& \frac{1}{2} \sum_t(x_t + y_t)
623 |         C_XX      &=& \frac{1}{2} (X^\top X + Y^\top Y)
624 |         C_XY      &=& \frac{1}{2} (X^\top Y + Y^\top X)
625 | 
626 |     while exploiting zero or constant columns in the data matrix.
627 | 
628 |     Parameters
629 |     ----------
630 |     X : ndarray (T, M)
631 |         Data matrix
632 |     Y : ndarray (T, N)
633 |         Second data matrix
634 |     remove_mean : bool
635 |         True: remove column mean from the data, False: don't remove mean.
636 |     symmetrize : bool
637 |         Computes symmetrized means and moments (see above)
638 |     weights : None or ndarray(T, )
639 |         weights assigned to each trajectory point of X. If None, all data points have weight one.
640 |         If ndarray, each data point is assigned a separate weight.
641 |     time_lagged : bool,
642 |         indicates that Y is a time-lagged version of X.
643 |     modify_data : bool
644 |         If remove_mean=True, the mean will be removed in the data matrix X,
645 |         without creating an independent copy. This option is faster but might
646 |         lead to surprises because your input array is changed.
647 |     sparse_mode : str
648 |         one of:
649 |             * 'dense' : always use dense mode
650 |             * 'sparse' : always use sparse mode if possible
651 |             * 'auto' : automatic
652 |     sparse_tol: float
653 |         Threshold for considering column to be zero in order to save computing
654 |         effort when the data is sparse or almost sparse.
655 |         If max(abs(X[:, i])) < sparse_tol, then row i (and also column i if Y
656 |         is not given) of the covariance matrix will be set to zero. If Y is
657 |         given and max(abs(Y[:, i])) < sparse_tol, then column i of the
658 |         covariance matrix will be set to zero.
659 | 
660 |     Returns
661 |     -------
662 |     w : float
663 |         statistical weight
664 |     s_x : ndarray (M)
665 |         x-sum
666 |     s_y : ndarray (N)
667 |         y-sum
668 |     C_XX : ndarray (M, M)
669 |         unnormalized covariance matrix of X
670 |     C_XY : ndarray (M, N)
671 |         unnormalized covariance matrix of XY
672 | 
673 |     """
674 |     # Check consistency of inputs:
675 |     if Y is not None:
676 |         assert Y.shape[0] == X.shape[0], 'X and Y must have equal length.'
677 |     if weights is not None:
678 |         assert X.shape[0] == weights.shape[0], 'X and weights_x must have equal length'
679 |     # sparsify
680 |     X0, mask_X, xconst, Y0, mask_Y, yconst = _sparsify_pair(X, Y, remove_mean=remove_mean, modify_data=modify_data,
681 |                                                             symmetrize=symmetrize, sparse_mode=sparse_mode, sparse_tol=sparse_tol)
682 |     is_sparse = mask_X is not None and mask_Y is not None
683 |     # copy / convert
684 |     copy = is_sparse or (remove_mean and not modify_data)
685 |     X0, xconst = _copy_convert(X0, const=xconst, remove_mean=remove_mean, copy=copy)
686 |     Y0, yconst = _copy_convert(Y0, const=yconst, remove_mean=remove_mean, copy=copy)
687 |     # sum / center
688 |     w, sx, sx_centered, sy, sy_centered = _sum(X0, xmask=mask_X, xconst=xconst, Y=Y0, ymask=mask_Y, yconst=yconst,
689 |                                                symmetric=symmetrize, remove_mean=remove_mean, weights=weights)
690 |     if remove_mean:
691 |         _center(X0, w, sx, mask=mask_X, const=xconst, inplace=True)  # fast in-place centering
692 |         _center(Y0, w, sy, mask=mask_Y, const=yconst, inplace=True)  # fast in-place centering
693 | 
694 |     if symmetrize:
695 |         Cxx, Cxy = _M2_symmetric(X0, Y0, mask_X=mask_X, mask_Y=mask_Y,
696 |                                  xsum=sx_centered, xconst=xconst, ysum=sy_centered, yconst=yconst, weights=weights)
697 |     else:
698 |         Cxx = _M2(X0, X0, mask_X=mask_X, mask_Y=mask_X,
699 |                   xsum=sx_centered, xconst=xconst, ysum=sx_centered, yconst=xconst, weights=weights)
700 |         Cxy = _M2(X0, Y0, mask_X=mask_X, mask_Y=mask_Y,
701 |                   xsum=sx_centered, xconst=xconst, ysum=sy_centered, yconst=yconst, weights=weights)
702 | 
703 |     return w, sx, sy, Cxx, Cxy
704 | 
705 | 
706 | def moments_block(X, Y, remove_mean=False, modify_data=False,
707 |                   sparse_mode='auto', sparse_tol=0.0):
708 |     """ Computes the first two unnormalized moments of X and Y
709 | 
710 |     Computes
711 | 
712 |     .. math:
713 |         s_x  &=& \sum_t x_t
714 |         s_y  &=& \sum_t y_t
715 |         C_XX &=& X^\top X
716 |         C_XY &=& X^\top Y
717 |         C_YX &=& Y^\top X
718 |         C_YY &=& Y^\top Y
719 | 
720 |     while exploiting zero or constant columns in the data matrix.
721 | 
722 |     Parameters
723 |     ----------
724 |     X : ndarray (T, M)
725 |         Data matrix
726 |     Y : ndarray (T, N)
727 |         Second data matrix
728 |     remove_mean : bool
729 |         True: remove column mean from the data, False: don't remove mean.
730 |     modify_data : bool
731 |         If remove_mean=True, the mean will be removed in the data matrix X,
732 |         without creating an independent copy. This option is faster but might
733 |         lead to surprises because your input array is changed.
734 |     sparse_mode : str
735 |         one of:
736 |             * 'dense' : always use dense mode
737 |             * 'sparse' : always use sparse mode if possible
738 |             * 'auto' : automatic
739 |     sparse_tol: float
740 |         Threshold for considering column to be zero in order to save computing
741 |         effort when the data is sparse or almost sparse.
742 |         If max(abs(X[:, i])) < sparse_tol, then row i (and also column i if Y
743 |         is not given) of the covariance matrix will be set to zero. If Y is
744 |         given and max(abs(Y[:, i])) < sparse_tol, then column i of the
745 |         covariance matrix will be set to zero.
746 | 
747 |     Returns
748 |     -------
749 |     w : float
750 |         statistical weight of this estimation
751 |     s : [ndarray (M), ndarray (M)]
752 |         list of two elements with s[0]=sx and s[0]=sy
753 |     C : [[ndarray(M,M), ndarray(M,N)], [ndarray(N,M),ndarray(N,N)]]
754 |         list of two lists with two elements.
755 |         C[0,0] = Cxx, C[0,1] = Cxy, C[1,0] = Cyx, C[1,1] = Cyy
756 | 
757 |     """
758 |     # sparsify
759 |     X0, mask_X, xconst = _sparsify(X, sparse_mode=sparse_mode, sparse_tol=sparse_tol)
760 |     Y0, mask_Y, yconst = _sparsify(Y, sparse_mode=sparse_mode, sparse_tol=sparse_tol)
761 |     # copy / convert
762 |     copy = sparse_mode or (remove_mean and not modify_data)
763 |     X0, xconst = _copy_convert(X0, const=xconst, copy=copy)
764 |     Y0, yconst = _copy_convert(Y0, const=yconst, copy=copy)
765 |     # sum / center
766 |     w, sx, sx_centered, sy, sy_centered = _sum(X0, xmask=mask_X, xconst=xconst, Y=Y0, ymask=mask_Y, yconst=yconst,
767 |                                                symmetric=False, remove_mean=remove_mean)
768 |     if remove_mean:
769 |         _center(X0, w, sx, mask=mask_X, const=xconst, inplace=True)  # fast in-place centering
770 |         _center(Y0, w, sy, mask=mask_Y, const=yconst, inplace=True)  # fast in-place centering
771 | 
772 |     Cxx = _M2(X0, X0, mask_X=mask_X, mask_Y=mask_X,
773 |               xsum=sx_centered, xconst=xconst, ysum=sx_centered, yconst=xconst)
774 |     Cxy = _M2(X0, Y0, mask_X=mask_X, mask_Y=mask_Y,
775 |               xsum=sx_centered, xconst=xconst, ysum=sy_centered, yconst=yconst)
776 |     Cyy = _M2(Y0, Y0, mask_X=mask_Y, mask_Y=mask_Y,
777 |               xsum=sy_centered, xconst=yconst, ysum=sy_centered, yconst=yconst)
778 | 
779 |     return w, [sx, sy], [[Cxx, Cxy], [Cxy.T, Cyy]]
780 | 
781 | 
782 | def covar(X, remove_mean=False, modify_data=False, weights=None, sparse_mode='auto', sparse_tol=0.0):
783 |     """ Computes the covariance matrix of X
784 | 
785 |     Computes
786 | 
787 |     .. math:
788 |         C_XX &=& X^\top X
789 | 
790 |     while exploiting zero or constant columns in the data matrix.
791 |     WARNING: Directly use moments_XX if you can. This function does an additional
792 |     constant-matrix multiplication and does not return the mean.
793 | 
794 |     Parameters
795 |     ----------
796 |     X : ndarray (T, M)
797 |         Data matrix
798 |     remove_mean : bool
799 |         True: remove column mean from the data, False: don't remove mean.
800 |     modify_data : bool
801 |         If remove_mean=True, the mean will be removed in the data matrix X,
802 |         without creating an independent copy. This option is faster but might
803 |         lead to surprises because your input array is changed.
804 |     weights : None or ndarray(T, )
805 |         weights assigned to each trajectory point of X. If None, all data points have weight one.
806 |         If ndarray, each data point is assigned a separate weight.
807 |     sparse_mode : str
808 |         one of:
809 |             * 'dense' : always use dense mode
810 |             * 'sparse' : always use sparse mode if possible
811 |             * 'auto' : automatic
812 |     sparse_tol: float
813 |         Threshold for considering column to be zero in order to save computing
814 |         effort when the data is sparse or almost sparse.
815 |         If max(abs(X[:, i])) < sparse_tol, then row i (and also column i if Y
816 |         is not given) of the covariance matrix will be set to zero. If Y is
817 |         given and max(abs(Y[:, i])) < sparse_tol, then column i of the
818 |         covariance matrix will be set to zero.
819 | 
820 |     Returns
821 |     -------
822 |     C_XX : ndarray (M, M)
823 |         Covariance matrix of X
824 | 
825 |     See also
826 |     --------
827 |     moments_XX
828 | 
829 |     """
830 |     w, s, M = moments_XX(X, remove_mean=remove_mean, weights=weights, modify_data=modify_data,
831 |                          sparse_mode=sparse_mode, sparse_tol=sparse_tol)
832 |     return M / float(w)
833 | 
834 | 
835 | def covars(X, Y, remove_mean=False, modify_data=False, symmetrize=False, weights=None, sparse_mode='auto',
836 |            sparse_tol=0.0):
837 |     """ Computes the covariance and cross-covariance matrix of X and Y
838 | 
839 |     If symmetrize is False, computes
840 | 
841 |     .. math:
842 |         C_XX &=& X^\top X
843 |         C_XY &=& X^\top Y
844 | 
845 |     If symmetrize is True, computes
846 | 
847 |     .. math:
848 |         C_XX      &=& \frac{1}{2} (X^\top X + Y^\top Y)
849 |         C_XY      &=& \frac{1}{2} (X^\top Y + Y^\top X)
850 | 
851 |     while exploiting zero or constant columns in the data matrix.
852 |     WARNING: Directly use moments_XXXY if you can. This function does an additional
853 |     constant-matrix multiplication and does not return the mean.
854 | 
855 |     Parameters
856 |     ----------
857 |     X : ndarray (T, M)
858 |         Data matrix
859 |     Y : ndarray (T, N)
860 |         Second data matrix
861 |     remove_mean : bool
862 |         True: remove column mean from the data, False: don't remove mean.
863 |     modify_data : bool
864 |         If remove_mean=True, the mean will be removed in the data matrix X,
865 |         without creating an independent copy. This option is faster but might
866 |         lead to surprises because your input array is changed.
867 |     symmetrize : bool
868 |         Computes symmetrized means and moments (see above)
869 |     weights : None or ndarray(T, )
870 |         weights assigned to each trajectory point of X. If None, all data points have weight one.
871 |         If ndarray, each data point is assigned a separate weight.
872 |     sparse_mode : str
873 |         one of:
874 |             * 'dense' : always use dense mode
875 |             * 'sparse' : always use sparse mode if possible
876 |             * 'auto' : automatic
877 |     sparse_tol: float
878 |         Threshold for considering column to be zero in order to save computing
879 |         effort when the data is sparse or almost sparse.
880 |         If max(abs(X[:, i])) < sparse_tol, then row i (and also column i if Y
881 |         is not given) of the covariance matrix will be set to zero. If Y is
882 |         given and max(abs(Y[:, i])) < sparse_tol, then column i of the
883 |         covariance matrix will be set to zero.
884 | 
885 |     Returns
886 |     -------
887 |     C_XX : ndarray (M, M)
888 |         Covariance matrix of X
889 |     C_XY : ndarray (M, N)
890 |         Covariance matrix of XY
891 | 
892 |     See also
893 |     --------
894 |     moments_XXXY
895 | 
896 |     """
897 |     w, sx, sy, Mxx, Mxy = moments_XXXY(X, Y, remove_mean=remove_mean, modify_data=modify_data, weights=weights,
898 |                                        symmetrize=symmetrize, sparse_mode=sparse_mode, sparse_tol=sparse_tol)
899 |     return Mxx / float(w), Mxy / float(w)
900 | 


--------------------------------------------------------------------------------