├── .gitattributes ├── variational ├── solvers │ ├── __init__.py │ ├── eig_qr │ │ ├── __init__.py │ │ └── eig_qr.pyx │ ├── tests │ │ ├── __init__.py │ │ └── test_direct.py │ └── direct.py ├── estimators │ ├── tests │ │ ├── __init__.py │ │ ├── benchmark_moments.py │ │ ├── test_running_moments.py │ │ └── test_moments.py │ ├── covar_c │ │ ├── __init__.py │ │ ├── _covartools.h │ │ ├── covartools.pyx │ │ └── _covartools.c │ ├── __init__.py │ ├── running_moments.py │ └── moments.py ├── __init__.py ├── util.py └── _version.py ├── devtools ├── conda-recipe │ ├── build.sh │ ├── bld.bat │ ├── meta.yaml │ └── run_test.py └── ci │ ├── travis │ └── install_miniconda.sh │ └── appveyor │ ├── appveyor │ ├── runTestsuite.ps1 │ ├── run_with_env.cmd │ └── transform_xunit_to_appveyor.xsl │ ├── process_test_results.ps1 │ ├── run_with_env.cmd │ └── transform_xunit_to_appveyor.xsl ├── docs ├── Interface.pdf ├── Updating_Formulas.pdf ├── Interface.lyx └── Updating_Formulas.lyx ├── MANIFEST.in ├── examples └── basissets_ramachandran │ ├── torsion_A.npy │ ├── torsion_FGAIL.npy │ └── Example.py ├── .gitignore ├── setup.cfg ├── .travis.yml ├── appveyor.yml ├── README.md └── setup.py /.gitattributes: -------------------------------------------------------------------------------- 1 | variational/_version.py export-subst 2 | -------------------------------------------------------------------------------- /variational/solvers/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'noe' 2 | -------------------------------------------------------------------------------- /variational/solvers/eig_qr/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'noe' -------------------------------------------------------------------------------- /variational/estimators/tests/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'noe' 2 | -------------------------------------------------------------------------------- /variational/solvers/tests/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'noe' 2 | -------------------------------------------------------------------------------- /variational/estimators/covar_c/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'noe' 2 | -------------------------------------------------------------------------------- /devtools/conda-recipe/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | $PYTHON setup.py install 3 | -------------------------------------------------------------------------------- /docs/Interface.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/variational/master/docs/Interface.pdf -------------------------------------------------------------------------------- /docs/Updating_Formulas.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/variational/master/docs/Updating_Formulas.pdf -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | # versioneer files 2 | include versioneer.py 3 | include variational/_version.py 4 | 5 | recursive-include variational *.pyx *.c *.h 6 | -------------------------------------------------------------------------------- /examples/basissets_ramachandran/torsion_A.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/variational/master/examples/basissets_ramachandran/torsion_A.npy -------------------------------------------------------------------------------- /examples/basissets_ramachandran/torsion_FGAIL.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/variational/master/examples/basissets_ramachandran/torsion_FGAIL.npy -------------------------------------------------------------------------------- /variational/estimators/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from .moments import moments_XX, moments_XXXY, moments_block 4 | from .moments import covar, covars 5 | from .running_moments import RunningCovar, running_covar 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # compiled files 2 | *.pyc 3 | *.so 4 | build 5 | variational.egg-info 6 | dist 7 | 8 | # project files 9 | .idea 10 | 11 | # generated files 12 | variational/version.py 13 | variational/estimators/covar_c/covartools.c 14 | variational/solvers/eig_qr/eig_qr.c 15 | -------------------------------------------------------------------------------- /devtools/conda-recipe/bld.bat: -------------------------------------------------------------------------------- 1 | if not defined APPVEYOR ( 2 | echo not on appveyor 3 | "%PYTHON%" setup.py install 4 | ) else ( 5 | echo on appveyor 6 | cmd /E:ON /V:ON /C %APPVEYOR_BUILD_FOLDER%\devtools\ci\appveyor\run_with_env.cmd "%PYTHON%" setup.py install 7 | ) 8 | if errorlevel 1 exit 1 9 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | 2 | # See the docstring in versioneer.py for instructions. Note that you must 3 | # re-run 'versioneer.py setup' after changing this section, and commit the 4 | # resulting files. 5 | 6 | [versioneer] 7 | VCS = git 8 | style = pep440 9 | versionfile_source = variational/_version.py 10 | #versionfile_build = 11 | tag_prefix = 12 | parentdir_prefix = variational- 13 | 14 | -------------------------------------------------------------------------------- /devtools/ci/travis/install_miniconda.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # make TARGET overrideable with env 4 | : ${TARGET:=$HOME/miniconda} 5 | 6 | function install_miniconda { 7 | echo "installing miniconda to $TARGET" 8 | wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O mc.sh -o /dev/null 9 | bash mc.sh -b -f -p $TARGET 10 | } 11 | 12 | install_miniconda 13 | export PATH=$TARGET/bin:$PATH 14 | -------------------------------------------------------------------------------- /variational/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | __author__ = 'noe' 3 | 4 | # import subpackages such that they are available after the main package import 5 | from . import estimators 6 | from . import solvers 7 | 8 | # direct imports of important functions/classes to-level API 9 | from .solvers.direct import eig_corr 10 | from .solvers.direct import sort_by_norm 11 | from .solvers.eig_qr.eig_qr import eig_qr 12 | 13 | from ._version import get_versions 14 | __version__ = get_versions()['version'] 15 | del get_versions 16 | -------------------------------------------------------------------------------- /devtools/conda-recipe/meta.yaml: -------------------------------------------------------------------------------- 1 | package: 2 | name: variational 3 | version: !!str dev 4 | source: 5 | path: ../.. 6 | 7 | build: 8 | preserve_egg_dir: True 9 | 10 | requirements: 11 | build: 12 | - python 13 | - setuptools 14 | - cython >=0.20 15 | - numpy >=1.7 16 | - scipy 17 | - six 18 | 19 | run: 20 | - python 21 | - setuptools 22 | - numpy >=1.7 23 | - scipy 24 | - six 25 | 26 | test: 27 | requires: 28 | - nose 29 | - coverage 30 | imports: 31 | - variational 32 | 33 | -------------------------------------------------------------------------------- /variational/util.py: -------------------------------------------------------------------------------- 1 | """ Add convenience functions here if needed 2 | """ 3 | 4 | __author__ = 'noe' 5 | 6 | 7 | 8 | def features_to_basis(infiles, basisset, outfiles): 9 | """Reads input files 10 | 11 | basisset : BasisSet object 12 | basis set tob e used 13 | 14 | References 15 | --------- 16 | .. [5] Vitalini, F., Noe, F. and Keller, B. (2015): 17 | A basis set for peptides for the variational approach to conformational kinetics. (In review). 18 | 19 | """ 20 | # cycle through input files 21 | # read infile 22 | # map to basis function values 23 | # write outfile 24 | pass 25 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | 3 | sudo: false 4 | 5 | env: 6 | global: 7 | - PATH=$HOME/miniconda/bin:$PATH 8 | - common_py_deps="pylint jinja2 conda-build" 9 | matrix: 10 | - python=2.7 CONDA_PY=27 CONDA_NPY=17 11 | - python=3.4 CONDA_PY=34 CONDA_NPY=18 12 | - python=3.4 CONDA_PY=34 CONDA_NPY=19 13 | - python=3.5 CONDA_PY=35 CONDA_NPY=19 14 | 15 | before_install: 16 | - devtools/ci/travis/install_miniconda.sh 17 | - conda config --set always_yes true 18 | - conda config --add channels omnia 19 | - conda install -q $common_py_deps 20 | 21 | script: 22 | - conda build -q devtools/conda-recipe 23 | 24 | after_success: 25 | # coverage report: needs .coverage file generated by testsuite and git src 26 | - pip install coveralls 27 | - coveralls 28 | 29 | -------------------------------------------------------------------------------- /variational/estimators/covar_c/_covartools.h: -------------------------------------------------------------------------------- 1 | #ifndef _covartools_h_ 2 | #define _covartools_h_ 3 | 4 | void _subtract_row_double(double* X, double* row, int M, int N); 5 | void _subtract_row_float(double* X, double* row, int M, int N); 6 | void _subtract_row_double_copy(double* X0, double* X, double* row, int M, int N); 7 | int* _bool_to_list(int* b, int N, int nnz); 8 | void _variable_cols_char(int* cols, char* X, int M, int N, int min_constant); 9 | void _variable_cols_int(int* cols, int* X, int M, int N, int min_constant); 10 | void _variable_cols_long(int* cols, long* X, int M, int N, int min_constant); 11 | void _variable_cols_float(int* cols, float* X, int M, int N, int min_constant); 12 | void _variable_cols_double(int* cols, double* X, int M, int N, int min_constant); 13 | void _variable_cols_float_approx(int* cols, float* X, int M, int N, float tol, int min_constant); 14 | void _variable_cols_double_approx(int* cols, double* X, int M, int N, double tol, int min_constant); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /devtools/ci/appveyor/appveyor/runTestsuite.ps1: -------------------------------------------------------------------------------- 1 | function xslt_transform($xml, $xsl, $output) 2 | { 3 | trap [Exception] 4 | { 5 | Write-Host $_.Exception 6 | } 7 | 8 | $xslt = New-Object System.Xml.Xsl.XslCompiledTransform 9 | $xslt.Load($xsl) 10 | $xslt.Transform($xml, $output) 11 | } 12 | 13 | function upload($file) { 14 | trap [Exception] 15 | { 16 | Write-Host $_.Exception 17 | } 18 | 19 | $wc = New-Object 'System.Net.WebClient' 20 | $wc.UploadFile("https://ci.appveyor.com/api/testresults/xunit/$($env:APPVEYOR_JOB_ID)", $file) 21 | } 22 | 23 | function run { 24 | cd $env:APPVEYOR_BUILD_FOLDER 25 | $stylesheet = "tools/ci/appveyor/transform_xunit_to_appveyor.xsl" 26 | $input = "nosetests.xml" 27 | $output = "transformed.xml" 28 | 29 | if ( -not Test-Path $input ) { 30 | Write-Host "$input does not exist" 31 | return 32 | } 33 | xslt_transform $input $stylesheet $output 34 | 35 | upload $output 36 | Push-AppveyorArtifact $input 37 | Push-AppveyorArtifact $output 38 | } 39 | 40 | run 41 | -------------------------------------------------------------------------------- /devtools/ci/appveyor/process_test_results.ps1: -------------------------------------------------------------------------------- 1 | function xslt_transform($xml, $xsl, $output) 2 | { 3 | trap [Exception] 4 | { 5 | Write-Host $_.Exception 6 | } 7 | 8 | $xslt = New-Object System.Xml.Xsl.XslCompiledTransform 9 | $xslt.Load($xsl) 10 | $xslt.Transform($xml, $output) 11 | } 12 | 13 | function upload($file) { 14 | trap [Exception] 15 | { 16 | Write-Host $_.Exception 17 | } 18 | 19 | $wc = New-Object 'System.Net.WebClient' 20 | $wc.UploadFile("https://ci.appveyor.com/api/testresults/xunit/$($env:APPVEYOR_JOB_ID)", $file) 21 | } 22 | 23 | function run { 24 | cd $env:APPVEYOR_BUILD_FOLDER 25 | $stylesheet = "devtools/ci/appveyor/transform_xunit_to_appveyor.xsl" 26 | $input = "nosetests.xml" 27 | $output = "transformed.xml" 28 | # if ( -not Test-Path $input ) { 29 | # throw "input file missing" 30 | # } 31 | xslt_transform $input $stylesheet $output 32 | 33 | upload $output 34 | Push-AppveyorArtifact $input 35 | Push-AppveyorArtifact $output 36 | 37 | # return exit code of testsuite 38 | if ( -not $success) { 39 | throw "testsuite not successful" 40 | } 41 | } 42 | 43 | run 44 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | global: 3 | # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the 4 | # /E:ON and /V:ON options are not enabled in the batch script intepreter 5 | # See: http://stackoverflow.com/a/13751649/163740 6 | # this is being set in bld.bat of conda-recipe... 7 | #CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\devtools\\ci\\appveyor\\run_with_env.cmd" 8 | PYTHONUNBUFFERED: 1 9 | 10 | matrix: 11 | - PYTHON: "C:\\Miniconda" 12 | CONDA_PY: "27" 13 | 14 | - PYTHON: "C:\\Miniconda-x64" 15 | CONDA_PY: "27" 16 | ARCH: "64" 17 | 18 | - PYTHON: "C:\\Miniconda3" 19 | CONDA_PY: "34" 20 | 21 | - PYTHON: "C:\\Miniconda3-x64" 22 | CONDA_PY: "34" 23 | ARCH: "64" 24 | 25 | install: 26 | - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%;" 27 | 28 | - conda config --set always_yes true 29 | - conda config --add channels omnia 30 | - conda install -q conda-build jinja2 31 | 32 | 33 | build: false # Not a C# project, build stuff at the test step instead. 34 | 35 | test_script: 36 | # run testsuite and upload test results to AppVeyor; return exit code of testsuite 37 | - conda build -q devtools/conda-recipe 38 | -------------------------------------------------------------------------------- /devtools/conda-recipe/run_test.py: -------------------------------------------------------------------------------- 1 | 2 | import subprocess 3 | import os 4 | import sys 5 | import shutil 6 | import re 7 | 8 | src_dir = os.getenv('SRC_DIR') 9 | 10 | 11 | # matplotlib headless backend 12 | with open('matplotlibrc', 'w') as fh: 13 | fh.write('backend: Agg') 14 | 15 | 16 | def coverage_report(): 17 | fn = '.coverage' 18 | assert os.path.exists(fn) 19 | build_dir = os.getenv('TRAVIS_BUILD_DIR') 20 | dest = os.path.join(build_dir, fn) 21 | print( "copying coverage report to", dest) 22 | shutil.copy(fn, dest) 23 | assert os.path.exists(dest) 24 | 25 | # fix paths in .coverage file 26 | with open(dest, 'r') as fh: 27 | data = fh.read() 28 | match= '"/home/travis/miniconda/envs/_test/lib/python.+?/site-packages/.+?/(variational/.+?)"' 29 | repl = '"%s/\\1"' % build_dir 30 | data = re.sub(match, repl, data) 31 | os.unlink(dest) 32 | with open(dest, 'w+') as fh: 33 | fh.write(data) 34 | 35 | nose_run = "nosetests variational -vv" \ 36 | " --with-coverage --cover-inclusive --cover-package=variational" \ 37 | " --with-doctest --doctest-options=+NORMALIZE_WHITESPACE,+ELLIPSIS" \ 38 | .split(' ') 39 | 40 | res = subprocess.call(nose_run) 41 | 42 | 43 | # move .coverage file to git clone on Travis CI 44 | if os.getenv('TRAVIS', False): 45 | coverage_report() 46 | 47 | if False: #os.getenv('APPVEYOR', False): 48 | call = ('powershell ' + os.path.join(os.getenv('APPVEYOR_BUILD_FOLDER'), 49 | 'devtools', 'ci', 'appveyor', 50 | 'process_test_results.ps1')).split(' ') 51 | res |= subprocess.call(call) 52 | 53 | sys.exit(res) 54 | 55 | -------------------------------------------------------------------------------- /devtools/ci/appveyor/run_with_env.cmd: -------------------------------------------------------------------------------- 1 | :: To build extensions for 64 bit Python 3, we need to configure environment 2 | :: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of: 3 | :: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1) 4 | :: 5 | :: To build extensions for 64 bit Python 2, we need to configure environment 6 | :: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of: 7 | :: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0) 8 | :: 9 | :: 32 bit builds do not require specific environment configurations. 10 | :: 11 | :: Note: this script needs to be run with the /E:ON and /V:ON flags for the 12 | :: cmd interpreter, at least for (SDK v7.0) 13 | :: 14 | :: More details at: 15 | :: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows 16 | :: http://stackoverflow.com/a/13751649/163740 17 | :: 18 | :: Author: Olivier Grisel 19 | :: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/ 20 | @ECHO OFF 21 | 22 | SET COMMAND_TO_RUN=%* 23 | SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows 24 | 25 | SET MAJOR_PYTHON_VERSION="%CONDA_PY:~0,1%" 26 | IF %MAJOR_PYTHON_VERSION% == "2" ( 27 | SET WINDOWS_SDK_VERSION="v7.0" 28 | ) ELSE IF %MAJOR_PYTHON_VERSION% == "3" ( 29 | SET WINDOWS_SDK_VERSION="v7.1" 30 | ) ELSE ( 31 | ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%" 32 | EXIT 1 33 | ) 34 | 35 | IF "%ARCH%"=="64" ( 36 | ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture 37 | SET DISTUTILS_USE_SDK=1 38 | SET MSSdk=1 39 | "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION% 40 | "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release 41 | ECHO Executing: %COMMAND_TO_RUN% 42 | call %COMMAND_TO_RUN% || EXIT 1 43 | ) ELSE ( 44 | ECHO Using default MSVC build environment for 32 bit architecture 45 | ECHO Executing: %COMMAND_TO_RUN% 46 | call %COMMAND_TO_RUN% || EXIT 1 47 | ) 48 | -------------------------------------------------------------------------------- /devtools/ci/appveyor/appveyor/run_with_env.cmd: -------------------------------------------------------------------------------- 1 | :: To build extensions for 64 bit Python 3, we need to configure environment 2 | :: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of: 3 | :: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1) 4 | :: 5 | :: To build extensions for 64 bit Python 2, we need to configure environment 6 | :: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of: 7 | :: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0) 8 | :: 9 | :: 32 bit builds do not require specific environment configurations. 10 | :: 11 | :: Note: this script needs to be run with the /E:ON and /V:ON flags for the 12 | :: cmd interpreter, at least for (SDK v7.0) 13 | :: 14 | :: More details at: 15 | :: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows 16 | :: http://stackoverflow.com/a/13751649/163740 17 | :: 18 | :: Author: Olivier Grisel 19 | :: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/ 20 | @ECHO OFF 21 | 22 | SET COMMAND_TO_RUN=%* 23 | SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows 24 | 25 | SET MAJOR_PYTHON_VERSION="%CONDA_PY:~0,1%" 26 | IF %MAJOR_PYTHON_VERSION% == "2" ( 27 | SET WINDOWS_SDK_VERSION="v7.0" 28 | ) ELSE IF %MAJOR_PYTHON_VERSION% == "3" ( 29 | SET WINDOWS_SDK_VERSION="v7.1" 30 | ) ELSE ( 31 | ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%" 32 | EXIT 1 33 | ) 34 | 35 | IF "%ARCH%"=="64" ( 36 | ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture 37 | SET DISTUTILS_USE_SDK=1 38 | SET MSSdk=1 39 | "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION% 40 | "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release 41 | ECHO Executing: %COMMAND_TO_RUN% 42 | call %COMMAND_TO_RUN% || EXIT 1 43 | ) ELSE ( 44 | ECHO Using default MSVC build environment for 32 bit architecture 45 | ECHO Executing: %COMMAND_TO_RUN% 46 | call %COMMAND_TO_RUN% || EXIT 1 47 | ) 48 | -------------------------------------------------------------------------------- /variational/solvers/eig_qr/eig_qr.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport scipy.linalg.cython_lapack as scc 3 | 4 | def eig_qr(A): 5 | """ Compute eigenvalues and eigenvectors of symmetric matrix A using symmetric tridiagonal QR-algorithm 6 | with implicit shifts. The matrix is first transformed to tridiagonal shape using lapack's dsytrd routine. 7 | Then, the tridiagonal QR-iteration is performed using lapack's dsteqr routine. 8 | 9 | Parameters: 10 | ----------- 11 | A, ndarray (N, N): 12 | symmetric matrix. 13 | 14 | Returns: 15 | -------- 16 | D, ndarray(N,) 17 | array of eigenvalues of A 18 | B, ndarray(N, N) 19 | array of eigenvectors of A. 20 | """ 21 | 22 | # handle 1x1 case 23 | if np.size(A) == 1: # size can handle 1x1 arrays and numbers 24 | return A*np.ones(1), np.ones((1, 1)) 25 | 26 | # Definitions: 27 | cdef double[:,:] B = np.require(A, dtype=np.float64, requirements=["F", "A"]) 28 | cdef int n=A.shape[0], lda=A.shape[0], info, lwork=-1 29 | cdef char[:] uplo = np.zeros(1, "S1") 30 | uplo[:] = "U" 31 | cdef double[:] D = np.require(np.zeros(n), dtype=np.float64, requirements=["F", "A"]) 32 | cdef double[:] E = np.require(np.zeros(n-1), dtype=np.float64, requirements=["F", "A"]) 33 | cdef double[:] Tau = np.require(np.zeros(n-1), dtype=np.float64, requirements=["F", "A"]) 34 | cdef double[:] Work = np.require(np.zeros(1), dtype=np.float64, requirements=["F", "A"]) 35 | 36 | # Transform to tridiagonal shape: 37 | scc.dsytrd(&uplo[0], &n, &B[0, 0], &lda, &D[0], &E[0], &Tau[0], &Work[0], &lwork, &info) 38 | lwork = np.int(Work[0]) 39 | cdef double[:] Work2 = np.require(np.zeros(lwork), dtype=np.float64, requirements=["F", "A"]) 40 | scc.dsytrd(&uplo[0], &n, &B[0, 0], &lda, &D[0], &E[0], &Tau[0], &Work2[0], &lwork, &info) 41 | 42 | # Extract transformation to tridiagonal shape: 43 | lwork = -1 44 | scc.dorgtr(&uplo[0], &n, &B[0, 0], &lda, &Tau[0], &Work[0], &lwork, &info) 45 | lwork = np.int(Work[0]) 46 | cdef double[:] Work3 = np.require(np.zeros(lwork), dtype=np.float64, requirements=["F", "A"]) 47 | scc.dorgtr(&uplo[0], &n, &B[0, 0], &lda, &Tau[0], &Work3[0], &lwork, &info) 48 | 49 | # Run QR-iteration. 50 | cdef double[:] Work4 = np.require(np.zeros(np.maximum(1,2*n-2)), dtype=np.float64, requirements=["F", "A"]) 51 | cdef char[:] compz = np.zeros(1, "S1") 52 | compz[:] = "V" 53 | scc.dsteqr(&compz[0], &n, &D[0], &E[0], &B[0, 0], &n, &Work4[0], &info) 54 | 55 | return np.asarray(D), np.asarray(B) 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /variational/solvers/tests/test_direct.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import unittest 3 | import numpy as np 4 | from .. import direct 5 | 6 | __author__ = 'noe' 7 | 8 | 9 | def sort_by_norm_and_imag_sign(evals, evecs): 10 | arr = np.zeros((len(evals),), dtype=[('mag', np.float64), ('sign', np.float64)]) 11 | arr['mag'] = np.abs(evals) 12 | arr['sign'] = np.sign((np.imag(evals))) 13 | I = np.argsort(arr, order=['mag', 'sign'])[::-1] 14 | return evals[I], evecs[:, I] 15 | 16 | 17 | class TestDirect(unittest.TestCase): 18 | 19 | @classmethod 20 | def setUpClass(cls): 21 | pass 22 | 23 | def test_spd_inv_split(self): 24 | W = np.array([[1.0, 0.3, 0.2], 25 | [0.3, 0.8, 0.5], 26 | [0.2, 0.5, 0.9]]) 27 | for method in ['QR', 'schur']: 28 | L = direct.spd_inv_split(W, method=method) 29 | # Test if decomposition is correct: inv(W) == L L.T 30 | assert np.allclose(np.dot(L, L.T), np.linalg.inv(W)) 31 | # Test if matrices are orthogonal 32 | C = np.dot(L.T, L) 33 | assert np.max(np.abs(C - np.diag(np.diag(C)))) < 1e-12 34 | 35 | # Test if fails when given a nonsymmetric matrix 36 | W = np.array([[1.0, 0.2], 37 | [0.3, 0.8]]) 38 | with self.assertRaises(AssertionError): 39 | direct.spd_inv_split(W) 40 | 41 | def test_eig_corr(self): 42 | C0 = np.array([[1.0, 0.3, 0.2], 43 | [0.3, 0.8, 0.5], 44 | [0.2, 0.5, 0.9]]) 45 | Ct_sym = np.array([[0.5, 0.1, 0.0], 46 | [0.1, 0.3, 0.3], 47 | [0.0, 0.3, 0.2]]) 48 | Ct_nonsym = np.array([[0.5, 0.1, 0.3], 49 | [0.1, 0.3, 0.3], 50 | [0.0, 0.3, 0.2]]) 51 | # reference solution 52 | import scipy 53 | for Ct in [Ct_sym, Ct_nonsym]: 54 | v0, R0 = scipy.linalg.eig(Ct, C0) 55 | v0, R0 = sort_by_norm_and_imag_sign(v0, R0) 56 | for method in ['QR', 'schur']: 57 | # Test correctness 58 | v, R = direct.eig_corr(C0, Ct, method=method) 59 | v, R = sort_by_norm_and_imag_sign(v, R) 60 | assert np.allclose(v0, v) # eigenvalues equal? 61 | # eigenvectors equivalent? 62 | for i in range(R0.shape[1]): 63 | assert np.allclose(R0[:, i] / R0[0, i], R[:, i] / R[0, i]) 64 | # Test if eigenpair diagonalizes the Koopman matrix 65 | K = np.dot(np.linalg.inv(C0), Ct) 66 | assert np.allclose(K, R.dot(np.diag(v)).dot(np.linalg.inv(R))) 67 | 68 | 69 | if __name__ == "__main__": 70 | unittest.main() 71 | -------------------------------------------------------------------------------- /examples/basissets_ramachandran/Example.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Jul 31 10:55:24 2015 4 | 5 | @author: fvitalini 6 | """ 7 | 8 | """ 9 | This script contains examples of usage for the classes: 10 | RamachandranBasis 11 | RamachandranProductBasis 12 | which are contained in the variational package. 13 | """ 14 | 15 | 16 | import variational 17 | import numpy as np 18 | 19 | #Use of the function RamachandranBasis 20 | 21 | from variational.basissets.ramachandran import RamachandranBasis 22 | alabasis = RamachandranBasis('A', radians=False) #load the residue centered basis 23 | #function for residue Alanine and 24 | #default force field (ff_AMBER99SB_ILDN) 25 | #three eigenvectors are considered (order=2) 26 | #expects the timeseries in degrees. 27 | atraj = np.load('torsion_A.npy') #the file contains the phi/psi timeseries for residue A 28 | print atraj[0:10,:] #first 10 timesteps only 29 | ala_basis_traj=alabasis.map(atraj) # projects the trajectory onto the residue basis function 30 | print ala_basis_traj[0:10, :] #first 10 timesteps only 31 | 32 | 33 | #Use of the function RamachandranProductBasis 34 | 35 | # 1: Different number excitations 36 | from variational.basissets.ramachandran import RamachandranProductBasis 37 | FGAILbasis=RamachandranProductBasis('FGAIL', n_excite=3, radians=False) #load the residue centered basis 38 | #functions for residues F-G-A-I-L and 39 | #default force field (ff_AMBER99SB_ILDN) 40 | #three eigenvectors are considered (order=2) 41 | #up to 3 excited residue per basis function (n_excite=3) 42 | #expects the timeseries in degrees. 43 | FGAIL_traj = np.load('torsion_FGAIL.npy') #the file contains the phi/psi timeseries for residues FGAIL 44 | print FGAIL_traj[0:10,:] #first 10 timesteps only 45 | FGAIL_basis_set_traj, FGAIL_basis_set_list=FGAILbasis.map(FGAIL_traj) #projects the trajectory onto the residue basis functions 46 | print FGAIL_basis_set_list 47 | print FGAIL_basis_set_traj[0:10,:] #first 10 timesteps only 48 | 49 | # 2: Select only residues FG 50 | FGbasis=RamachandranProductBasis('FGAIL',include_res=[True,True,False,False,False], radians=False) #load the residue centered basis 51 | #functions for residues F-G and 52 | #default force field (ff_AMBER99SB_ILDN) 53 | #three eigenvectors are considered (order=2) 54 | #2 excited residue per basis function (n_excite=2) 55 | #expects the timeseries in degrees. 56 | FG_basis_set_traj, FG_basis_set_list=FGbasis.map(FGAIL_traj) #projects the trajectory onto the residue basis functions 57 | print FG_basis_set_list 58 | print FG_basis_set_traj[0:10,:] #first 10 timesteps only 59 | print FG_basis_set_traj[0:10,0] #first 10 timesteps of basis function 00 60 | print FG_basis_set_traj[0:10,1] #first 10 timesteps of basis function 01 61 | print FG_basis_set_traj[0:10,8] #first 10 timesteps of basis function 22 -------------------------------------------------------------------------------- /devtools/ci/appveyor/transform_xunit_to_appveyor.xsl: -------------------------------------------------------------------------------- 1 | 12 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | Fail 69 | Skip 70 | Pass 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /devtools/ci/appveyor/appveyor/transform_xunit_to_appveyor.xsl: -------------------------------------------------------------------------------- 1 | 12 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | Fail 69 | Skip 70 | Pass 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Variational Approach for conformation dynamics (VAC) 2 | ==================================================== 3 | 4 | This package contains basis sets, estimators and solvers for the variational approach for 5 | conformation dynamics, a theory that has been proposed in [1] and was further developed in 6 | [2] and [3]. The variational approach is analogous to the Ritz method [4] that is 7 | employed in computational quantum chemistry. It differs in the way how the involved 8 | matrices are computed and in the meaning of the involved operators, eigenfunctions and 9 | eigenvalues - see [3] for a comparison. 10 | 11 | Roughly, the idea of the VAC is as follows: Given a (classical) 12 | molecular dynamics trajectory with configurations {x_1, ..., x_T}, and a 13 | set of basis functions defined on the space of configurations {chi_1(x), ..., chi_n(x)}, 14 | we compute the two correlation matrices: 15 | 16 | c_ij (0) = < chi_i(x_t) chi_j(x_t) >_t 17 | c_ij (tau) = < chi_i(x_t) chi_j(x_t+tau) >_t 18 | 19 | where < . >_t is average over time t. Of course this can be generalized to many trajectories. 20 | Then we solve the generalized eigenvalue problem 21 | 22 | C(tau) r = C(0) r l(tau) 23 | 24 | where the eigenvalues l(tau) approximate the dominant eigenvalues of the Markov propagator 25 | or Markov backward propagator of the underlying dynamics. The corresponding eigenfunction 26 | of the backward propagator is approximated by 27 | 28 | psi(x) = sum_i r_i chi_i(x) 29 | 30 | Package functionalities 31 | ----------------------- 32 | 33 | This package aims at providing code to help addressing a number of key problems: 34 | 35 | 1. Basis sets for molecular dynamics (MD), and in particular protein dynamics. See [5] for a 36 | first approach in this direction. 37 | 38 | 2. Estimators for the corration matrices C(0), C(tau). The trivial time-average that is usually 39 | employed has a number of problems especially for many short simulation trajectories that are 40 | initiated far from the equilibrium distribution (the usual case!). 41 | 42 | 3. Solvers for accurately solving the eigenvalue problem above, even for huge basis sets. 43 | 44 | At this time only a few of the above functionalities are implemented and we will go step by step. 45 | This package will undergo heavy development and there is currently no date for an official 46 | release, so don't be surprised if the API (the look + feel of functions and classes) change. 47 | At the moment this package is purely intended for development purposes, so use it at your own 48 | risk. 49 | 50 | Applications 51 | ------------ 52 | 1. The time-lagged independent component analysis (TICA) method originally developed in [6] and 53 | proposed as an optimal data transformation method for building Markov state models of MD 54 | in [3,7] is a VAC with mean-free basis functions. Therefore you can easily implement TICA with 55 | this package. 56 | 57 | 2. By transforming the internal coordinates such as torsion angles or interatomic distances into 58 | suitable basis functions, you can approximate experimentally-measurable relaxation timescales 59 | and determine the corresponding structural rearrangements for peptides and proteins [2,5] 60 | 61 | 3. ... more will follow. 62 | 63 | References 64 | ---------- 65 | [1] Noe, F. and Nueske, F. (2013): A variational approach to modeling slow processes in stochastic dynamical systems. SIAM Multiscale Model. Simul. 11, 635-655. 66 | 67 | [2] Nueske, F., Keller, B., Perez-Hernandez, G., Mey, A.S.J.S. and Noe, F. (2014) Variational Approach to Molecular Kinetics. J. Chem. Theory Comput. 10, 1739-1752. 68 | 69 | [3] Perez-Hernandez, G., Paul, F., Giorgino, T., De Fabritiis, G. and Noe, F. (2013) Identification of slow molecular order parameters for Markov model construction. J. Chem. Phys. 139, 015102. 70 | 71 | [4] Ritz, W. (1909): Ueber eine neue Methode zur Loesung gewisser Variationsprobleme der mathematischen Physik. J. Reine Angew. Math., 135, 1–61. 72 | 73 | [5] Vitalini, F., Noé, F. and Keller, B. (2015): A basis set for peptides for the variational approach to conformational kinetics. (In review). 74 | 75 | [6] Molgedey, L. and Schuster H. G. (1994): Phys. Rev. Lett. 72, 3634. 76 | 77 | [7] Schwantes, C. R. and Pande, V. S. : J. Chem. Theory Comput. 9, (2013) 78 | -------------------------------------------------------------------------------- /variational/solvers/direct.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as _np 3 | __author__ = 'noe' 4 | 5 | 6 | def sort_by_norm(evals, evecs): 7 | """ 8 | Sorts the eigenvalues and eigenvectors by descending norm of the eigenvalues 9 | 10 | Parameters 11 | ---------- 12 | evals: ndarray(n) 13 | eigenvalues 14 | evecs: ndarray(n,n) 15 | eigenvectors in a column matrix 16 | 17 | Returns 18 | ------- 19 | (evals, evecs) : ndarray(m), ndarray(n,m) 20 | the sorted eigenvalues and eigenvectors 21 | 22 | """ 23 | # norms 24 | evnorms = _np.abs(evals) 25 | # sort 26 | I = _np.argsort(evnorms)[::-1] 27 | # permute 28 | evals2 = evals[I] 29 | evecs2 = evecs[:, I] 30 | # done 31 | return evals2, evecs2 32 | 33 | 34 | def spd_inv_split(W, epsilon=1e-10, method='QR', canonical_signs=False): 35 | """ 36 | Compute :math:`W^{-1} = L L^T` of the symmetric positive-definite matrix :math:`W`. 37 | 38 | by first reducing W to a low-rank approximation that is truly spd. 39 | 40 | Parameters 41 | ---------- 42 | W : ndarray((m,m), dtype=float) 43 | Symmetric positive-definite (spd) matrix. 44 | epsilon : float 45 | Truncation parameter. Eigenvalues with norms smaller than this cutoff will 46 | be removed. 47 | method : str 48 | Method to perform the decomposition of :math:`W` before inverting. Options are: 49 | 50 | * 'QR': QR-based robust eigenvalue decomposition of W 51 | * 'schur': Schur decomposition of W 52 | 53 | canonical_signs : boolean, default = False 54 | Fix signs in L, s. t. the largest element of in every column of L is positive. 55 | 56 | Returns 57 | ------- 58 | L : ndarray((n, r)) 59 | Matrix :math:`L` from the decomposition :math:`W^{-1} = L L^T`. 60 | 61 | """ 62 | # check input 63 | assert _np.allclose(W.T, W), 'C0 is not a symmetric matrix' 64 | 65 | if (_np.shape(W)[0] == 1): 66 | L = 1./_np.sqrt(W[0,0]) 67 | else: 68 | if method.lower() == 'qr': 69 | from .eig_qr.eig_qr import eig_qr 70 | s, V = eig_qr(W) 71 | # compute the Eigenvalues of C0 using Schur factorization 72 | elif method.lower() == 'schur': 73 | from scipy.linalg import schur 74 | S, V = schur(W) 75 | s = _np.diag(S) 76 | else: 77 | raise ValueError('method not implemented: ' + method) 78 | 79 | s, V = sort_by_norm(s, V) # sort them 80 | 81 | # determine the cutoff. We know that C0 is an spd matrix, 82 | # so we select the truncation threshold such that everything that is negative vanishes 83 | evmin = _np.min(s) 84 | if evmin < 0: 85 | epsilon = max(epsilon, -evmin + 1e-16) 86 | 87 | # determine effective rank m and perform low-rank approximations. 88 | evnorms = _np.abs(s) 89 | n = _np.shape(evnorms)[0] 90 | m = n - _np.searchsorted(evnorms[::-1], epsilon) 91 | Vm = V[:, 0:m] 92 | sm = s[0:m] 93 | 94 | if canonical_signs: 95 | # enforce canonical eigenvector signs 96 | for j in range(m): 97 | jj = _np.argmax(_np.abs(Vm[:, j])) 98 | Vm[:, j] *= _np.sign(Vm[jj, j]) 99 | 100 | L = _np.dot(Vm, _np.diag(1.0/_np.sqrt(sm))) 101 | 102 | # return split 103 | return L 104 | 105 | 106 | def eig_corr(C0, Ct, epsilon=1e-10, method='QR', sign_maxelement=False): 107 | r""" Solve generalized eigenvalue problem with correlation matrices C0 and Ct 108 | 109 | Numerically robust solution of a generalized Hermitian (symmetric) eigenvalue 110 | problem of the form 111 | 112 | .. math:: 113 | \mathbf{C}_t \mathbf{r}_i = \mathbf{C}_0 \mathbf{r}_i l_i 114 | 115 | Computes :math:`m` dominant eigenvalues :math:`l_i` and eigenvectors 116 | :math:`\mathbf{r}_i`, where :math:`m` is the numerical rank of the problem. 117 | This is done by first conducting a Schur decomposition of the symmetric 118 | positive matrix :math:`\mathbf{C}_0`, then truncating its spectrum to 119 | retain only eigenvalues that are numerically greater than zero, then using 120 | this decomposition to define an ordinary eigenvalue Problem for 121 | :math:`\mathbf{C}_t` of size :math:`m`, and then solving this eigenvalue 122 | problem. 123 | 124 | Parameters 125 | ---------- 126 | C0 : ndarray (n,n) 127 | time-instantaneous correlation matrix. Must be symmetric positive definite 128 | Ct : ndarray (n,n) 129 | time-lagged correlation matrix. Must be symmetric 130 | epsilon : float 131 | eigenvalue norm cutoff. Eigenvalues of C0 with norms <= epsilon will be 132 | cut off. The remaining number of Eigenvalues define the size of 133 | the output. 134 | method : str 135 | Method to perform the decomposition of :math:`W` before inverting. Options are: 136 | 137 | * 'QR': QR-based robust eigenvalue decomposition of W 138 | * 'schur': Schur decomposition of W 139 | sign_maxelement : bool 140 | If True, re-scale each eigenvector such that its entry with maximal absolute value 141 | is positive. 142 | 143 | 144 | Returns 145 | ------- 146 | l : ndarray (m) 147 | The first m generalized eigenvalues, sorted by descending norm 148 | R : ndarray (n,m) 149 | The first m generalized eigenvectors, as a column matrix. 150 | 151 | """ 152 | L = spd_inv_split(C0, epsilon=epsilon, method=method) 153 | Ct_trans = _np.dot(_np.dot(L.T, Ct), L) 154 | 155 | # solve the symmetric eigenvalue problem in the new basis 156 | if _np.allclose(Ct.T, Ct): 157 | from scipy.linalg import eigh 158 | l, R_trans = eigh(Ct_trans) 159 | else: 160 | from scipy.linalg import eig 161 | l, R_trans = eig(Ct_trans) 162 | 163 | # sort eigenpairs 164 | l, R_trans = sort_by_norm(l, R_trans) 165 | 166 | # transform the eigenvectors back to the old basis 167 | R = _np.dot(L, R_trans) 168 | 169 | # Change signs of eigenvectors: 170 | if sign_maxelement: 171 | for j in range(R.shape[1]): 172 | imax = _np.argmax(_np.abs(R[:, j])) 173 | R[:, j] *= _np.sign(R[imax, j]) 174 | 175 | # return result 176 | return l, R 177 | -------------------------------------------------------------------------------- /variational/estimators/tests/benchmark_moments.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | __author__ = 'noe' 4 | 5 | import time 6 | import numpy as np 7 | from .. import moments 8 | 9 | def genS(N): 10 | """ Generates sparsities given N (number of cols) """ 11 | S = [10, 90, 100, 500, 900, 1000, 2000, 5000, 7500, 9000, 10000, 20000, 50000, 75000, 90000] # non-zero 12 | return [s for s in S if s <= N] 13 | 14 | 15 | def genX(L, N, n_var=None, const=False): 16 | X = np.random.rand(L, N) # random data 17 | if n_var is not None: 18 | if const: 19 | Xsparse = np.ones((L, N)) 20 | else: 21 | Xsparse = np.zeros((L, N)) 22 | Xsparse[:, :n_var] = X[:, :n_var] 23 | X = Xsparse 24 | return X 25 | 26 | 27 | def genY(L, N, n_var=None, const=False): 28 | X = np.random.rand(L, N) # random data 29 | if n_var is not None: 30 | if const: 31 | Xsparse = -np.ones((L, N)) 32 | else: 33 | Xsparse = np.zeros((L, N)) 34 | Xsparse[:, :n_var] = X[:, :n_var] 35 | X = Xsparse 36 | return X 37 | 38 | 39 | def reftime_momentsXX(X, remove_mean=False, nrep=3): 40 | # time for reference calculation 41 | t1 = time.time() 42 | for r in range(nrep): 43 | s_ref = X.sum(axis=0) # computation of mean 44 | if remove_mean: 45 | X = X - s_ref/float(X.shape[0]) 46 | C_XX_ref = np.dot(X.T, X) # covariance matrix 47 | t2 = time.time() 48 | # return mean time 49 | return (t2-t1)/float(nrep) 50 | 51 | 52 | def mytime_momentsXX(X, remove_mean=False, nrep=3): 53 | # time for reference calculation 54 | t1 = time.time() 55 | for r in range(nrep): 56 | w, s, C_XX = moments.moments_XX(X, remove_mean=remove_mean) 57 | t2 = time.time() 58 | # return mean time 59 | return (t2-t1)/float(nrep) 60 | 61 | 62 | def reftime_momentsXXXY(X, Y, remove_mean=False, symmetrize=False, nrep=3): 63 | # time for reference calculation 64 | t1 = time.time() 65 | for r in range(nrep): 66 | sx = X.sum(axis=0) # computation of mean 67 | sy = Y.sum(axis=0) # computation of mean 68 | if symmetrize: 69 | sx = 0.5*(sx + sy) 70 | sy = sx 71 | if remove_mean: 72 | X = X - sx/float(X.shape[0]) 73 | Y = Y - sy/float(Y.shape[0]) 74 | if symmetrize: 75 | C_XX_ref = np.dot(X.T, X) + np.dot(Y.T, Y) 76 | C_XY = np.dot(X.T, Y) 77 | C_XY_ref = C_XY + C_XY.T 78 | else: 79 | C_XX_ref = np.dot(X.T, X) 80 | C_XY_ref = np.dot(X.T, Y) 81 | t2 = time.time() 82 | # return mean time 83 | return (t2-t1)/float(nrep) 84 | 85 | 86 | def mytime_momentsXXXY(X, Y, remove_mean=False, symmetrize=False, nrep=3): 87 | # time for reference calculation 88 | t1 = time.time() 89 | for r in range(nrep): 90 | w, sx, sy, C_XX, C_XY = moments.moments_XXXY(X, Y, remove_mean=remove_mean, symmetrize=symmetrize) 91 | t2 = time.time() 92 | # return mean time 93 | return (t2-t1)/float(nrep) 94 | 95 | 96 | def benchmark_moments(L=10000, N=10000, nrep=5, xy=False, remove_mean=False, symmetrize=False, const=False): 97 | #S = [10, 100, 1000] 98 | S = genS(N) 99 | 100 | # time for reference calculation 101 | X = genX(L, N) 102 | if xy: 103 | Y = genY(L, N) 104 | reftime = reftime_momentsXXXY(X, Y, remove_mean=remove_mean, symmetrize=symmetrize, nrep=nrep) 105 | else: 106 | reftime = reftime_momentsXX(X, remove_mean=remove_mean, nrep=nrep) 107 | 108 | # my time 109 | times = np.zeros(len(S)) 110 | for k, s in enumerate(S): 111 | X = genX(L, N, n_var=s, const=const) 112 | if xy: 113 | Y = genY(L, N, n_var=s, const=const) 114 | times[k] = mytime_momentsXXXY(X, Y, remove_mean=remove_mean, symmetrize=symmetrize, nrep=nrep) 115 | else: 116 | times[k] = mytime_momentsXX(X, remove_mean=remove_mean, nrep=nrep) 117 | 118 | # assemble report 119 | rows = ['L, data points', 'N, dimensions', 'S, nonzeros', 'time trivial', 'time moments_XX', 'speed-up'] 120 | table = np.zeros((6, len(S))) 121 | table[0, :] = L 122 | table[1, :] = N 123 | table[2, :] = S 124 | table[3, :] = reftime 125 | table[4, :] = times 126 | table[5, :] = reftime / times 127 | 128 | # print table 129 | if xy: 130 | fname = 'moments_XXXY' 131 | else: 132 | fname = 'moments_XX' 133 | print(fname + '\tremove_mean = ' + str(remove_mean) + '\tsym = ' + str(symmetrize) + '\tconst = ' + str(const)) 134 | print(rows[0] + ('\t%i' * table.shape[1])%tuple(table[0])) 135 | print(rows[1] + ('\t%i' * table.shape[1])%tuple(table[1])) 136 | print(rows[2] + ('\t%i' * table.shape[1])%tuple(table[2])) 137 | print(rows[3] + ('\t%.3f' * table.shape[1])%tuple(table[3])) 138 | print(rows[4] + ('\t%.3f' * table.shape[1])%tuple(table[4])) 139 | print(rows[5] + ('\t%.3f' * table.shape[1])%tuple(table[5])) 140 | print() 141 | 142 | 143 | def main(): 144 | LNs = [(100000, 100, 10), (10000, 1000, 7), (1000, 2000, 5), (250, 5000, 5), (100, 10000, 5)] 145 | for L, N, nrep in LNs: 146 | benchmark_moments(L=L, N=N, nrep=nrep, xy=False, remove_mean=False, symmetrize=False, const=False) 147 | benchmark_moments(L=L, N=N, nrep=nrep, xy=False, remove_mean=False, symmetrize=False, const=True) 148 | benchmark_moments(L=L, N=N, nrep=nrep, xy=False, remove_mean=True, symmetrize=False, const=False) 149 | benchmark_moments(L=L, N=N, nrep=nrep, xy=False, remove_mean=True, symmetrize=False, const=True) 150 | benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=False, symmetrize=False, const=False) 151 | benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=False, symmetrize=False, const=True) 152 | benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=False, symmetrize=True, const=False) 153 | benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=False, symmetrize=True, const=True) 154 | benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=True, symmetrize=False, const=False) 155 | benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=True, symmetrize=False, const=True) 156 | benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=True, symmetrize=True, const=False) 157 | benchmark_moments(L=L, N=N, nrep=nrep, xy=True, remove_mean=True, symmetrize=True, const=True) 158 | 159 | 160 | if __name__ == "__main__": 161 | main() -------------------------------------------------------------------------------- /variational/estimators/covar_c/covartools.pyx: -------------------------------------------------------------------------------- 1 | import numpy 2 | import ctypes 3 | cimport numpy 4 | 5 | cdef extern from "_covartools.h": 6 | void _variable_cols_char(int* cols, char* X, int M, int N, int min_constant) 7 | void _variable_cols_int(int* cols, int* X, int M, int N, int min_constant) 8 | void _variable_cols_long(int* cols, long* X, int M, int N, int min_constant) 9 | void _variable_cols_float(int* cols, float* X, int M, int N, int min_constant) 10 | void _variable_cols_double(int* cols, double* X, int M, int N, int min_constant) 11 | void _variable_cols_float_approx(int* cols, float* X, int M, int N, float tol, int min_constant) 12 | void _variable_cols_double_approx(int* cols, double* X, int M, int N, double tol, int min_constant) 13 | void _subtract_row_double(double* X, double* row, int M, int N) 14 | void _subtract_row_float(float* X, float* row, int M, int N) 15 | void _subtract_row_double_copy(double* X0, double* X, double* row, int M, int N) 16 | void _subtract_row_float_copy(float* X0, float* X, float* row, int M, int N) 17 | 18 | 19 | # ================================================ 20 | # Check for constant columns 21 | # ================================================ 22 | 23 | def variable_cols_char(cols, X, M, N, min_constant=0): 24 | pcols = numpy.PyArray_DATA(cols) 25 | pX = numpy.PyArray_DATA(X) 26 | return _variable_cols_char(pcols, pX, M, N, min_constant) 27 | 28 | def variable_cols_int(cols, X, M, N, min_constant=0): 29 | pcols = numpy.PyArray_DATA(cols) 30 | pX = numpy.PyArray_DATA(X) 31 | return _variable_cols_int(pcols, pX, M, N, min_constant) 32 | 33 | def variable_cols_long(cols, X, M, N, min_constant=0): 34 | pcols = numpy.PyArray_DATA(cols) 35 | pX = numpy.PyArray_DATA(X) 36 | return _variable_cols_long(pcols, pX, M, N, min_constant) 37 | 38 | def variable_cols_float(cols, X, M, N, tol=0.0, min_constant=0): 39 | pcols = numpy.PyArray_DATA(cols) 40 | pX = numpy.PyArray_DATA(X) 41 | if tol == 0.0: 42 | return _variable_cols_float(pcols, pX, M, N, min_constant) 43 | else: 44 | return _variable_cols_float_approx(pcols, pX, M, N, numpy.float32(tol), min_constant) 45 | 46 | def variable_cols_double(cols, X, M, N, tol=0.0, min_constant=0): 47 | pcols = numpy.PyArray_DATA(cols) 48 | pX = numpy.PyArray_DATA(X) 49 | if tol == 0.0: 50 | return _variable_cols_double(pcols, pX, M, N, min_constant) 51 | else: 52 | return _variable_cols_double_approx(pcols, pX, M, N, tol, min_constant) 53 | 54 | def variable_cols(X, tol=0, min_constant=0): 55 | """ Evaluates which columns are constant (0) or variable (1) 56 | 57 | Parameters 58 | ---------- 59 | X : ndarray 60 | Matrix whose columns will be checked for constant or variable. 61 | tol : float 62 | Tolerance for float-matrices. When set to 0 only equal columns with 63 | values will be considered constant. When set to a positive value, 64 | columns where all elements have absolute differences to the first 65 | element of that column are considered constant. 66 | min_constant : int 67 | Minimal number of constant columns to resume operation. If at one 68 | point the number of constant columns drops below min_constant, the 69 | computation will stop and all columns will be assumed to be variable. 70 | In this case, an all-True array will be returned. 71 | 72 | Returns 73 | ------- 74 | variable : bool-array 75 | Array with number of elements equal to the columns. True: column is 76 | variable / nonconstant. False: column is constant. 77 | 78 | """ 79 | if X is None: 80 | return None 81 | M, N = X.shape 82 | 83 | # prepare column array 84 | cols = numpy.zeros( (N), dtype=ctypes.c_int, order='C' ) 85 | 86 | if X.dtype == numpy.float64: 87 | completed = variable_cols_double(cols, X, M, N, tol=tol, min_constant=min_constant) 88 | elif X.dtype == numpy.float32: 89 | completed = variable_cols_float(cols, X, M, N, tol=tol, min_constant=min_constant) 90 | elif X.dtype == numpy.int32: 91 | completed = variable_cols_int(cols, X, M, N, min_constant=min_constant) 92 | elif X.dtype == numpy.int64: 93 | completed = variable_cols_long(cols, X, M, N, min_constant=min_constant) 94 | elif X.dtype == numpy.bool: 95 | completed = variable_cols_char(cols, X, M, N, min_constant=min_constant) 96 | else: 97 | raise TypeError('unsupported type of X: '+str(X.dtype)) 98 | 99 | # if interrupted, return all ones. Otherwise return the variable columns as bool array 100 | if completed == 0: 101 | return numpy.ones(cols, dtype=numpy.bool) 102 | else: 103 | return numpy.array(cols, dtype=numpy.bool) 104 | 105 | # ================================================ 106 | # Row subtraction 107 | # ================================================ 108 | 109 | def subtract_row_float(X, row, M, N): 110 | prow = numpy.PyArray_DATA(row) 111 | pX = numpy.PyArray_DATA(X) 112 | _subtract_row_float(pX, prow, M, N) 113 | 114 | def subtract_row_double(X, row, M, N): 115 | prow = numpy.PyArray_DATA(row) 116 | pX = numpy.PyArray_DATA(X) 117 | _subtract_row_double(pX, prow, M, N) 118 | 119 | def subtract_row_double_copy(X, row, M, N): 120 | X0 = numpy.zeros( X.shape, dtype=ctypes.c_double, order='C' ) 121 | pX0 = numpy.PyArray_DATA(X0) 122 | pX = numpy.PyArray_DATA(X) 123 | prow = numpy.PyArray_DATA(row) 124 | _subtract_row_double_copy(pX0, pX, prow, M, N) 125 | return X0 126 | 127 | def subtract_row_float_copy(X, row, M, N): 128 | X0 = numpy.zeros( X.shape, dtype=ctypes.c_double, order='C' ) 129 | pX0 = numpy.PyArray_DATA(X0) 130 | pX = numpy.PyArray_DATA(X) 131 | prow = numpy.PyArray_DATA(row) 132 | _subtract_row_float_copy(pX0, pX, prow, M, N) 133 | return X0 134 | 135 | 136 | def subtract_row(X, row, inplace=False): 137 | """ Subtracts given row from each row of array 138 | 139 | Parameters 140 | ---------- 141 | X : ndarray (M, N) 142 | Matrix whose rows will be shifted. 143 | row : ndarray (N) 144 | Row vector that will be subtracted from each row of X. 145 | inplace : bool 146 | True: X will be changed. False: A copy of X will be created and X will remain unchanged. 147 | 148 | Returns 149 | ------- 150 | X0 : ndarray (M, N) 151 | The row-shifted data 152 | 153 | """ 154 | M, N = X.shape 155 | 156 | if X.dtype == numpy.float64 and row.dtype == numpy.float64: 157 | if inplace: 158 | subtract_row_double(X, row, M, N) 159 | else: 160 | X = subtract_row_double_copy(X, row, M, N) 161 | elif X.dtype == numpy.float32 and row.dtype == numpy.float32: 162 | if inplace: 163 | subtract_row_float(X, row, M, N) 164 | else: 165 | X = subtract_row_float_copy(X, row, M, N) 166 | else: 167 | raise TypeError('unsupported or inconsistent types: '+str(X.dtype)+' '+str(row.dtype)) 168 | 169 | return X -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ Variational Approach for conformation dynamics (VAC) 2 | 3 | This package contains basis sets, estimators and solvers for the variational approach for 4 | conformation dynamics, a theory that has been proposed in [1] and was further developed in 5 | [2] and [3]. The variational approach is analogous to the Ritz method [4] that is 6 | employed in computational quantum chemistry. It differs in the way how the involved 7 | matrices are computed and in the meaning of the involved operators, eigenfunctions and 8 | eigenvalues - see [3] for a comparison. 9 | 10 | Roughly, the idea of the VAC is as follows: Given a (classical) 11 | molecular dynamics trajectory with configurations {x_1, ..., x_T}, and a 12 | set of basis functions defined on the space of configurations {chi_1(x), ..., chi_n(x)}, 13 | we compute the two correlation matrices: 14 | 15 | c_ij (0) = < chi_i(x_t) chi_j(x_t) >_t 16 | c_ij (tau) = < chi_i(x_t) chi_j(x_t+tau) >_t 17 | 18 | where < . >_t is average over time t. Of course this can be generalized to many trajectories. 19 | Then we solve the generalized eigenvalue problem 20 | 21 | C(tau) r = C(0) r l(tau) 22 | 23 | where the eigenvalues l(tau) approximate the dominant eigenvalues of the Markov propagator 24 | or Markov backward propagator of the underlying dynamics. The corresponding eigenfunction 25 | of the backward propagator is approximated by 26 | 27 | psi(x) = sum_i r_i chi_i(x) 28 | 29 | Package functionalities 30 | ----------------------- 31 | 32 | This package aims at providing code to help addressing a number of key problems: 33 | 34 | 1. Basis sets for molecular dynamics (MD), and in particular protein dynamics. See [5] for a 35 | first approach in this direction. 36 | 37 | 2. Estimators for the corration matrices C(0), C(tau). The trivial time-average that is usually 38 | employed has a number of problems especially for many short simulation trajectories that are 39 | initiated far from the equilibrium distribution (the usual case!). 40 | 41 | 3. Solvers for accurately solving the eigenvalue problem above, even for huge basis sets. 42 | 43 | At this time only a few of the above functionalities are implemented and we will go step by step. 44 | This package will undergo heavy development and there is currently no date for an official 45 | release, so don't be surprised if the API (the look + feel of functions and classes) change. 46 | At the moment this package is purely intended for development purposes, so use it at your own 47 | risk. 48 | 49 | Applications 50 | ------------ 51 | 1. The time-lagged independent component analysis (TICA) method originally developed in [6] and 52 | proposed as an optimal data transformation method for building Markov state models of MD 53 | in [3,7] is a VAC with mean-free basis functions. Therefore you can easily implement TICA with 54 | this package. 55 | 56 | 2. By transforming the internal coordinates such as torsion angles or interatomic distances into 57 | suitable basis functions, you can approximate experimentally-measurable relaxation timescales 58 | and determine the corresponding structural rearrangements for peptides and proteins [2,5] 59 | 60 | 3. ... more will follow. 61 | 62 | References 63 | ---------- 64 | [1] Noe, F. and Nueske, F. (2013): A variational approach to modeling slow processes 65 | in stochastic dynamical systems. SIAM Multiscale Model. Simul. 11, 635-655. 66 | 67 | [2] Nueske, F., Keller, B., Perez-Hernandez, G., Mey, A.S.J.S. and Noe, F. (2014) 68 | Variational Approach to Molecular Kinetics. J. Chem. Theory Comput. 10, 1739-1752. 69 | 70 | [3] Perez-Hernandez, G., Paul, F., Giorgino, T., De Fabritiis, G. and Noe, F. (2013) 71 | Identification of slow molecular order parameters for Markov model construction. 72 | J. Chem. Phys. 139, 015102. 73 | 74 | [4] Ritz, W. (1909): Ueber eine neue Methode zur Loesung gewisser 75 | Variationsprobleme der mathematischen Physik. J. Reine Angew. Math., 135, 1-61. 76 | 77 | [5] Vitalini, F., Noe, F. and Keller, B. (2015): A basis set for peptides for the 78 | variational approach to conformational kinetics. (In review). 79 | 80 | [6] Molgedey, L. and Schuster H. G. (1994): Phys. Rev. Lett. 72, 3634. 81 | 82 | [7] Schwantes, C. R. and Pande, V. S. (2000): J. Chem. Theory Comput. 9, 2000 83 | 84 | """ 85 | from __future__ import print_function 86 | import os 87 | import versioneer 88 | from setuptools import setup, Extension, find_packages 89 | from os.path import relpath, join 90 | 91 | DOCLINES = __doc__.split("\n") 92 | 93 | CLASSIFIERS = """\ 94 | Development Status :: 3 - Alpha 95 | Intended Audience :: Science/Research 96 | Intended Audience :: Developers 97 | License :: OSI Approved :: Open BSD clause 2 (OpenBSD) 98 | Programming Language :: Python 99 | Topic :: Scientific/Engineering :: Bio-Informatics 100 | Topic :: Scientific/Engineering :: Chemistry 101 | Topic :: Scientific/Engineering :: Physics 102 | Operating System :: Microsoft :: Windows 103 | Operating System :: POSIX 104 | Operating System :: Unix 105 | Operating System :: MacOS 106 | """ 107 | 108 | ################################################################################ 109 | # USEFUL SUBROUTINES 110 | ################################################################################ 111 | 112 | def find_package_data(data_root, package_root): 113 | files = [] 114 | for root, dirnames, filenames in os.walk(data_root): 115 | for fn in filenames: 116 | files.append(relpath(join(root, fn), package_root)) 117 | return files 118 | 119 | ################################################################################ 120 | # EXTENSIONS 121 | ################################################################################ 122 | 123 | def extensions(): 124 | from numpy import get_include as np_inc 125 | from scipy import get_include as sc_inc 126 | np_inc = np_inc() 127 | sc_inc = sc_inc() 128 | from Cython.Build import cythonize 129 | exts = [Extension('variational.estimators.covar_c.covartools', 130 | sources = ['./variational/estimators/covar_c/covartools.pyx', 131 | './variational/estimators/covar_c/_covartools.c'], 132 | include_dirs = ['./variational/estimators/covar_c/', np_inc], 133 | extra_compile_args=['-std=c99','-O3']), 134 | Extension('variational.solvers.eig_qr.eig_qr', 135 | sources=['./variational/solvers/eig_qr/eig_qr.pyx'], 136 | include_dirs=['./variational/solvers/eig_qr/', np_inc, sc_inc], 137 | extra_compile_args=['-std=c99','-O3']) 138 | ] 139 | return cythonize(exts) 140 | 141 | 142 | class lazy_cythonize(list): 143 | """evaluates extension list lazyly. 144 | pattern taken from http://tinyurl.com/qb8478q""" 145 | def __init__(self, callback): 146 | self._list, self.callback = None, callback 147 | def c_list(self): 148 | if self._list is None: self._list = self.callback() 149 | return self._list 150 | def __iter__(self): 151 | for e in self.c_list(): yield e 152 | def __getitem__(self, ii): return self.c_list()[ii] 153 | def __len__(self): return len(self.c_list()) 154 | 155 | ################################################################################ 156 | # SETUP 157 | ################################################################################ 158 | 159 | metadata=dict( 160 | name = 'variational', 161 | author = 'Frank Noe, Fabian Paul and Feliks Nueske', 162 | author_email = 'frank.noe@fu-berlin.de', 163 | description = DOCLINES[0], 164 | long_description = "\n".join(DOCLINES[2:]), 165 | version=versioneer.get_version(), 166 | cmdclass=versioneer.get_cmdclass(), 167 | license='OpenBSD', 168 | url='https://github.com/markovmodel/variational', 169 | platforms=['Linux', 'Mac OS-X', 'Unix', 'Windows'], 170 | classifiers=CLASSIFIERS.splitlines(), 171 | #package_dir={'variational': 'variational'}, 172 | packages=find_packages(), 173 | package_data={'variational.basisset':['ResiduesEigenvectors/*'] 174 | }, 175 | zip_safe=False, 176 | install_requires=[ 177 | 'numpy', 178 | 'scipy', 179 | 'six', 180 | ], 181 | setup_requires=[ 182 | 'cython>=0.24', 183 | 'numpy', 184 | ], 185 | ext_modules=lazy_cythonize(extensions), 186 | ) 187 | 188 | setup(**metadata) 189 | -------------------------------------------------------------------------------- /docs/Interface.lyx: -------------------------------------------------------------------------------- 1 | #LyX 2.1 created this file. For more info see http://www.lyx.org/ 2 | \lyxformat 474 3 | \begin_document 4 | \begin_header 5 | \textclass article 6 | \use_default_options true 7 | \maintain_unincluded_children false 8 | \language english 9 | \language_package default 10 | \inputencoding auto 11 | \fontencoding global 12 | \font_roman default 13 | \font_sans default 14 | \font_typewriter default 15 | \font_math auto 16 | \font_default_family default 17 | \use_non_tex_fonts false 18 | \font_sc false 19 | \font_osf false 20 | \font_sf_scale 100 21 | \font_tt_scale 100 22 | \graphics default 23 | \default_output_format default 24 | \output_sync 0 25 | \bibtex_command default 26 | \index_command default 27 | \paperfontsize default 28 | \use_hyperref false 29 | \papersize default 30 | \use_geometry false 31 | \use_package amsmath 1 32 | \use_package amssymb 1 33 | \use_package cancel 1 34 | \use_package esint 1 35 | \use_package mathdots 1 36 | \use_package mathtools 1 37 | \use_package mhchem 1 38 | \use_package stackrel 1 39 | \use_package stmaryrd 1 40 | \use_package undertilde 1 41 | \cite_engine basic 42 | \cite_engine_type default 43 | \biblio_style plain 44 | \use_bibtopic false 45 | \use_indices false 46 | \paperorientation portrait 47 | \suppress_date false 48 | \justification true 49 | \use_refstyle 1 50 | \index Index 51 | \shortcut idx 52 | \color #008000 53 | \end_index 54 | \secnumdepth 3 55 | \tocdepth 3 56 | \paragraph_separation indent 57 | \paragraph_indentation default 58 | \quotes_language english 59 | \papercolumns 1 60 | \papersides 1 61 | \paperpagestyle default 62 | \tracking_changes false 63 | \output_changes false 64 | \html_math_output 0 65 | \html_css_as_file 0 66 | \html_be_strict false 67 | \end_header 68 | 69 | \begin_body 70 | 71 | \begin_layout Title 72 | Interface for Variational Package 73 | \end_layout 74 | 75 | \begin_layout Standard 76 | Here, we briefly sketch the interface for all functions to appear in the 77 | variational package. 78 | The package consists of three main modules: A library of basis sets, estimators 79 | for the correlation matrices, and a solver for the resulting generalized 80 | eigenvalue problem. 81 | \end_layout 82 | 83 | \begin_layout Enumerate 84 | The basis sets library contains functions to evaluate specific classes of 85 | basis functions. 86 | Examples for these classes are Gaussian basis functions, Fourier waves 87 | defined on angles, or the MSM-eigenvector based functions and their products. 88 | The general interface is given by the function SomeBasisSet below. 89 | 90 | \end_layout 91 | 92 | \begin_layout Enumerate 93 | The estimator module contains a function that generates the correlation-matrices 94 | 95 | \begin_inset Formula $\mathbf{C}^{\tau},\,\mathbf{C}^{0}$ 96 | \end_inset 97 | 98 | from the basis function trajectories generated in the first step. 99 | \end_layout 100 | 101 | \begin_layout Enumerate 102 | The solver module contains a function the solve the generalized eigenvalue 103 | problem for the correlation matrices generated before. 104 | We will just use the function eig_corr implemented in pyemma.util.linalg. 105 | \end_layout 106 | 107 | \begin_layout Standard 108 | Below we describe the interfaces for these three modules. 109 | \end_layout 110 | 111 | \begin_layout Standard 112 | \begin_inset listings 113 | lstparams "language=Python,float,breaklines=true,tabsize=4" 114 | inline false 115 | status open 116 | 117 | \begin_layout Plain Layout 118 | 119 | def SomeBasisSet(list_of_trajectories, prefix, parameters): 120 | \end_layout 121 | 122 | \begin_layout Plain Layout 123 | 124 | """ 125 | \end_layout 126 | 127 | \begin_layout Plain Layout 128 | 129 | Parameters 130 | \end_layout 131 | 132 | \begin_layout Plain Layout 133 | 134 | ---------- 135 | \end_layout 136 | 137 | \begin_layout Plain Layout 138 | 139 | list_of_trajectories: list 140 | \end_layout 141 | 142 | \begin_layout Plain Layout 143 | 144 | List of .npy-files. 145 | Each file contains a feature trajectory, represented as an np-array of 146 | shape (T,N), where T is the number of time-steps in this trajectory and 147 | N is the number of features (distances, angles,...) on which the basis set 148 | is defined. 149 | \end_layout 150 | 151 | \begin_layout Plain Layout 152 | 153 | prefix: string 154 | \end_layout 155 | 156 | \begin_layout Plain Layout 157 | 158 | Common prefix for all files to be produced (see Output). 159 | \end_layout 160 | 161 | \begin_layout Plain Layout 162 | 163 | parameters: 164 | \end_layout 165 | 166 | \begin_layout Plain Layout 167 | 168 | Additional parameters needed for this basis set. 169 | \end_layout 170 | 171 | \begin_layout Plain Layout 172 | 173 | \end_layout 174 | 175 | \begin_layout Plain Layout 176 | 177 | Returns 178 | \end_layout 179 | 180 | \begin_layout Plain Layout 181 | 182 | ------- 183 | \end_layout 184 | 185 | \begin_layout Plain Layout 186 | 187 | Returns a list of lists of filenames where the evaluations of all requested 188 | basis functions can be found. 189 | The files will be called "prefix_trajnum_fctnum.npy", where trajnum is the 190 | trajectory number and ftcnum is the number of the basis function. 191 | Each sublist contains the files for one trajectory. 192 | \end_layout 193 | 194 | \begin_layout Plain Layout 195 | 196 | ''' 197 | \end_layout 198 | 199 | \end_inset 200 | 201 | 202 | \end_layout 203 | 204 | \begin_layout Standard 205 | \begin_inset listings 206 | lstparams "language=Python,float,breaklines=true,tabsize=4" 207 | inline false 208 | status open 209 | 210 | \begin_layout Plain Layout 211 | 212 | def Estimator(list_of_trajectories, list_of_taus): 213 | \end_layout 214 | 215 | \begin_layout Plain Layout 216 | 217 | """ 218 | \end_layout 219 | 220 | \begin_layout Plain Layout 221 | 222 | Parameters: 223 | \end_layout 224 | 225 | \begin_layout Plain Layout 226 | 227 | ----------- 228 | \end_layout 229 | 230 | \begin_layout Plain Layout 231 | 232 | list_of_trajectories: list 233 | \end_layout 234 | 235 | \begin_layout Plain Layout 236 | 237 | List of list of .npy-files, organized the same way as the output of a basis 238 | set function. 239 | Each sublist contains the files for all basis functions for one specific 240 | trajectory. 241 | \end_layout 242 | 243 | \begin_layout Plain Layout 244 | 245 | list_of_taus: ndarray (ntau,) 246 | \end_layout 247 | 248 | \begin_layout Plain Layout 249 | 250 | The lag-times for which the correlation matrices will be computed. 251 | \end_layout 252 | 253 | \begin_layout Plain Layout 254 | 255 | \end_layout 256 | 257 | \begin_layout Plain Layout 258 | 259 | Returns 260 | \end_layout 261 | 262 | \begin_layout Plain Layout 263 | 264 | ------- 265 | \end_layout 266 | 267 | \begin_layout Plain Layout 268 | 269 | list of correlation matrices. 270 | \end_layout 271 | 272 | \begin_layout Plain Layout 273 | 274 | """ 275 | \end_layout 276 | 277 | \end_inset 278 | 279 | 280 | \end_layout 281 | 282 | \begin_layout Standard 283 | \begin_inset listings 284 | lstparams "language=Python,float,breaklines=true,tabsize=4" 285 | inline false 286 | status open 287 | 288 | \begin_layout Plain Layout 289 | 290 | def eig_corr(C0, Ct, epsilon=1e-6): 291 | \end_layout 292 | 293 | \begin_layout Plain Layout 294 | 295 | """ Solve the generalized eigenvalues problem with correlation matrices 296 | C0 and Ct 297 | \end_layout 298 | 299 | \begin_layout Plain Layout 300 | 301 | Parameters 302 | \end_layout 303 | 304 | \begin_layout Plain Layout 305 | 306 | ---------- 307 | \end_layout 308 | 309 | \begin_layout Plain Layout 310 | 311 | C0 : ndarray (n,n) 312 | \end_layout 313 | 314 | \begin_layout Plain Layout 315 | 316 | time-instantaneous correlation matrix. 317 | Must be symmetric positive definite 318 | \end_layout 319 | 320 | \begin_layout Plain Layout 321 | 322 | Ct : ndarray (n,n) 323 | \end_layout 324 | 325 | \begin_layout Plain Layout 326 | 327 | time-lagged correlation matrix. 328 | Must be symmetric 329 | \end_layout 330 | 331 | \begin_layout Plain Layout 332 | 333 | epsilon : float 334 | \end_layout 335 | 336 | \begin_layout Plain Layout 337 | 338 | eigenvalue norm cutoff. 339 | Eigenvalues of C0 with norms <= epsilon will be cut off. 340 | The remaining number of Eigenvalues define the size of the output. 341 | \end_layout 342 | 343 | \begin_layout Plain Layout 344 | 345 | Returns 346 | \end_layout 347 | 348 | \begin_layout Plain Layout 349 | 350 | ------- 351 | \end_layout 352 | 353 | \begin_layout Plain Layout 354 | 355 | l : ndarray (m) 356 | \end_layout 357 | 358 | \begin_layout Plain Layout 359 | 360 | The first m generalized eigenvalues, sorted by descending norm 361 | \end_layout 362 | 363 | \begin_layout Plain Layout 364 | 365 | R : ndarray (n,m) 366 | \end_layout 367 | 368 | \begin_layout Plain Layout 369 | 370 | The first m generalized eigenvectors, as a column matrix. 371 | \end_layout 372 | 373 | \begin_layout Plain Layout 374 | 375 | """ 376 | \end_layout 377 | 378 | \end_inset 379 | 380 | 381 | \end_layout 382 | 383 | \end_body 384 | \end_document 385 | -------------------------------------------------------------------------------- /variational/estimators/tests/test_running_moments.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import unittest 3 | import numpy as np 4 | from .. import running_moments 5 | 6 | __author__ = 'noe' 7 | 8 | 9 | class TestRunningMoments(unittest.TestCase): 10 | 11 | @classmethod 12 | def setUpClass(cls): 13 | cls.X = np.random.rand(10000, 2) 14 | cls.Y = np.random.rand(10000, 2) 15 | cls.T = cls.X.shape[0] 16 | # Chunk size: 17 | cls.L = 1000 18 | # Number of chunks: 19 | cls.nchunks = cls.T / cls.L 20 | # Set a lag time for time-lagged tests: 21 | #cls.lag = 50 22 | # Weights references: 23 | cls.weights = np.random.rand(10000) 24 | # Trajectory weights: 25 | cls.trajweights = 3*np.random.rand(cls.nchunks) 26 | # bias the first part 27 | cls.X[:2000] += 1.0 28 | cls.Y[:2000] -= 1.0 29 | # direct calculation, moments of X and Y 30 | cls.w = np.shape(cls.X)[0] 31 | cls.wsym = 2*np.shape(cls.X)[0] 32 | cls.sx = cls.X.sum(axis=0) 33 | cls.sy = cls.Y.sum(axis=0) 34 | cls.Mxx = np.dot(cls.X.T, cls.X) 35 | cls.Mxy = np.dot(cls.X.T, cls.Y) 36 | cls.Myy = np.dot(cls.Y.T, cls.Y) 37 | cls.mx = cls.sx / float(cls.w) 38 | cls.my = cls.sy / float(cls.w) 39 | cls.X0 = cls.X - cls.mx 40 | cls.Y0 = cls.Y - cls.my 41 | cls.Mxx0 = np.dot(cls.X0.T, cls.X0) 42 | cls.Mxy0 = np.dot(cls.X0.T, cls.Y0) 43 | cls.Myy0 = np.dot(cls.Y0.T, cls.Y0) 44 | 45 | # direct calculation, symmetric moments 46 | cls.s_sym = cls.sx + cls.sy 47 | cls.Mxx_sym = np.dot(cls.X.T, cls.X) + np.dot(cls.Y.T, cls.Y) 48 | cls.Mxy_sym = np.dot(cls.X.T, cls.Y) + np.dot(cls.Y.T, cls.X) 49 | cls.m_sym = cls.s_sym / float(cls.wsym) 50 | cls.X0_sym = cls.X - cls.m_sym 51 | cls.Y0_sym = cls.Y - cls.m_sym 52 | cls.Mxx0_sym = np.dot(cls.X0_sym.T, cls.X0_sym) + np.dot(cls.Y0_sym.T, cls.Y0_sym) 53 | cls.Mxy0_sym = np.dot(cls.X0_sym.T, cls.Y0_sym) + np.dot(cls.Y0_sym.T, cls.X0_sym) 54 | 55 | # direct calculation, weighted moments: 56 | cls.wesum = np.sum(cls.weights) 57 | cls.sx_w = (cls.weights[:, None] * cls.X).sum(axis=0) 58 | cls.sy_w = (cls.weights[:, None] * cls.Y).sum(axis=0) 59 | cls.Mxx_w = np.dot((cls.weights[:, None] * cls.X).T, cls.X) 60 | cls.Mxy_w = np.dot((cls.weights[:, None] * cls.X).T, cls.Y) 61 | cls.mx_w = cls.sx_w / float(cls.wesum) 62 | cls.my_w = cls.sy_w / float(cls.wesum) 63 | cls.X0_w = cls.X - cls.mx_w 64 | cls.Y0_w = cls.Y - cls.my_w 65 | cls.Mxx0_w = np.dot((cls.weights[:, None] * cls.X0_w).T, cls.X0_w) 66 | cls.Mxy0_w = np.dot((cls.weights[:, None] * cls.X0_w).T, cls.Y0_w) 67 | # direct calculation, weighted symmetric moments 68 | cls.s_sym_w = cls.sx_w + cls.sy_w 69 | cls.Mxx_sym_w = np.dot((cls.weights[:, None] * cls.X).T, cls.X) + np.dot((cls.weights[:, None] * cls.Y).T, cls.Y) 70 | cls.Mxy_sym_w = np.dot((cls.weights[:, None] * cls.X).T, cls.Y) + np.dot((cls.weights[:, None] * cls.Y).T, cls.X) 71 | cls.m_sym_w = cls.s_sym_w / float(2 * cls.wesum) 72 | cls.X0_sym_w = cls.X - cls.m_sym_w 73 | cls.Y0_sym_w = cls.Y - cls.m_sym_w 74 | cls.Mxx0_sym_w = np.dot((cls.weights[:, None] *cls.X0_sym_w).T, cls.X0_sym_w) + np.dot((cls.weights[:, None] *cls.Y0_sym_w).T, cls.Y0_sym_w) 75 | cls.Mxy0_sym_w = np.dot((cls.weights[:, None] *cls.X0_sym_w).T, cls.Y0_sym_w) + np.dot((cls.weights[:, None] *cls.Y0_sym_w).T, cls.X0_sym_w) 76 | 77 | return cls 78 | 79 | def test_XX_withmean(self): 80 | # many passes 81 | cc = running_moments.RunningCovar(remove_mean=False) 82 | for i in range(0, self.T, self.L): 83 | cc.add(self.X[i:i+self.L]) 84 | assert np.allclose(cc.weight_XX(), self.T) 85 | assert np.allclose(cc.sum_X(), self.sx) 86 | assert np.allclose(cc.moments_XX(), self.Mxx) 87 | 88 | def test_XX_meanfree(self): 89 | # many passes 90 | cc = running_moments.RunningCovar(remove_mean=True) 91 | for i in range(0, self.T, self.L): 92 | cc.add(self.X[i:i+self.L]) 93 | assert np.allclose(cc.weight_XX(), self.T) 94 | assert np.allclose(cc.sum_X(), self.sx) 95 | assert np.allclose(cc.moments_XX(), self.Mxx0) 96 | 97 | def test_XXXY_withmean(self): 98 | # many passes 99 | cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=False) 100 | for i in range(0, self.T, self.L): 101 | cc.add(self.X[i:i+self.L], self.Y[i:i+self.L]) 102 | assert np.allclose(cc.weight_XY(), self.T) 103 | assert np.allclose(cc.sum_X(), self.sx) 104 | assert np.allclose(cc.moments_XX(), self.Mxx) 105 | assert np.allclose(cc.moments_XY(), self.Mxy) 106 | 107 | def test_XXXY_meanfree(self): 108 | # many passes 109 | cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=True) 110 | L = 1000 111 | for i in range(0, self.X.shape[0], L): 112 | cc.add(self.X[i:i+L], self.Y[i:i+L]) 113 | assert np.allclose(cc.weight_XY(), self.T) 114 | assert np.allclose(cc.sum_X(), self.sx) 115 | assert np.allclose(cc.moments_XX(), self.Mxx0) 116 | assert np.allclose(cc.moments_XY(), self.Mxy0) 117 | 118 | def test_XXXY_weighted_withmean(self): 119 | # many passes 120 | cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=False) 121 | for i in range(0, self.T, self.L): 122 | iX = self.X[i:i+self.L, :] 123 | iY = self.Y[i:i+self.L, :] 124 | iwe = self.weights[i:i+self.L] 125 | cc.add(iX, iY, weights=iwe) 126 | assert np.allclose(cc.weight_XY(), self.wesum) 127 | assert np.allclose(cc.sum_X(), self.sx_w) 128 | assert np.allclose(cc.moments_XX(), self.Mxx_w) 129 | assert np.allclose(cc.moments_XY(), self.Mxy_w) 130 | 131 | def test_XXXY_weighted_meanfree(self): 132 | # many passes 133 | cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=True) 134 | for i in range(0, self.T, self.L): 135 | iX = self.X[i:i+self.L, :] 136 | iY = self.Y[i:i+self.L, :] 137 | iwe = self.weights[i:i+self.L] 138 | cc.add(iX, iY, weights=iwe) 139 | assert np.allclose(cc.weight_XY(), self.wesum) 140 | assert np.allclose(cc.sum_X(), self.sx_w) 141 | assert np.allclose(cc.moments_XX(), self.Mxx0_w) 142 | assert np.allclose(cc.moments_XY(), self.Mxy0_w) 143 | 144 | def test_XXXY_sym_withmean(self): 145 | # many passes 146 | cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=False, symmetrize=True) 147 | for i in range(0, self.T, self.L): 148 | cc.add(self.X[i:i+self.L], self.Y[i:i+self.L]) 149 | assert np.allclose(cc.weight_XY(), 2*self.T) 150 | assert np.allclose(cc.sum_X(), self.s_sym) 151 | assert np.allclose(cc.moments_XX(), self.Mxx_sym) 152 | assert np.allclose(cc.moments_XY(), self.Mxy_sym) 153 | 154 | def test_XXXY_sym_meanfree(self): 155 | # many passes 156 | cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=True, symmetrize=True) 157 | for i in range(0, self.T, self.L): 158 | cc.add(self.X[i:i+self.L], self.Y[i:i+self.L]) 159 | assert np.allclose(cc.weight_XY(), 2*self.T) 160 | assert np.allclose(cc.sum_X(), self.s_sym) 161 | assert np.allclose(cc.moments_XX(), self.Mxx0_sym) 162 | assert np.allclose(cc.moments_XY(), self.Mxy0_sym) 163 | 164 | def test_XXXY_weighted_sym_withmean(self): 165 | # many passes 166 | cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=False, symmetrize=True) 167 | for i in range(0, self.T, self.L): 168 | iwe = self.weights[i:i+self.L] 169 | cc.add(self.X[i:i+self.L], self.Y[i:i+self.L], weights=iwe) 170 | assert np.allclose(cc.weight_XY(), 2 * self.wesum) 171 | assert np.allclose(cc.sum_X(), self.s_sym_w) 172 | assert np.allclose(cc.moments_XX(), self.Mxx_sym_w) 173 | assert np.allclose(cc.moments_XY(), self.Mxy_sym_w) 174 | 175 | def test_XXXY_weighted_sym_meanfree(self): 176 | # many passes 177 | cc = running_moments.RunningCovar(compute_XX=True, compute_XY=True, remove_mean=True, symmetrize=True) 178 | for i in range(0, self.T, self.L): 179 | iwe = self.weights[i:i+self.L] 180 | cc.add(self.X[i:i+self.L], self.Y[i:i+self.L], weights=iwe) 181 | assert np.allclose(cc.weight_XY(), 2*self.wesum) 182 | assert np.allclose(cc.sum_X(), self.s_sym_w) 183 | assert np.allclose(cc.moments_XX(), self.Mxx0_sym_w) 184 | assert np.allclose(cc.moments_XY(), self.Mxy0_sym_w) 185 | 186 | if __name__ == "__main__": 187 | unittest.main() -------------------------------------------------------------------------------- /variational/estimators/covar_c/_covartools.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /** Subtracts given row vector from each row of the matrix X 5 | 6 | @param X : (M, N) array 7 | @param row : (N) array 8 | @param M : int 9 | @param N : int 10 | 11 | */ 12 | void _subtract_row_double(double* X, double* row, int M, int N) 13 | { 14 | int i, j, ro; 15 | for (i=0; i!=M; ++i) 16 | { 17 | ro = i*N; 18 | for (j=0; j!=N; ++j) 19 | { 20 | X[ro + j] -= row[j]; 21 | } 22 | } 23 | } 24 | 25 | /** see above */ 26 | void _subtract_row_float(float* X, float* row, int M, int N) 27 | { 28 | int i, j, ro; 29 | for (i=0; i!=M; ++i) 30 | { 31 | ro = i*N; 32 | for (j=0; j!=N; ++j) 33 | { 34 | X[ro + j] -= row[j]; 35 | } 36 | } 37 | } 38 | 39 | void _subtract_row_double_copy(double* X0, double* X, double* row, int M, int N) 40 | { 41 | int i, j, ro; 42 | for (i=0; i!=M; ++i) 43 | { 44 | ro = i*N; 45 | for (j=0; j!=N; ++j) 46 | { 47 | X0[ro + j] = X[ro + j] - row[j]; 48 | } 49 | } 50 | } 51 | 52 | void _subtract_row_float_copy(float* X0, float* X, float* row, int M, int N) 53 | { 54 | int i, j, ro; 55 | for (i=0; i!=M; ++i) 56 | { 57 | ro = i*N; 58 | for (j=0; j!=N; ++j) 59 | { 60 | X0[ro + j] = X[ro + j] - row[j]; 61 | } 62 | } 63 | } 64 | 65 | 66 | int* _bool_to_list(int* b, int N, int nnz) 67 | { 68 | int i; 69 | int k=0; 70 | int* list = (int*)malloc(nnz*sizeof(int)); 71 | for (i=0; i tol || -diff > tol) 303 | { 304 | if (cols[j] == 0) 305 | { 306 | cols[j] = 1; 307 | nconstant--; 308 | // are constant columns below threshold? Then interrupt. 309 | if (nconstant < min_constant) 310 | return 0; 311 | // do we have 0 constant columns? Then we can stop regularly. 312 | if (nconstant == 0) 313 | return 1; 314 | } 315 | } 316 | } 317 | } 318 | 319 | return 1; 320 | } 321 | 322 | /** see above */ 323 | int _variable_cols_double_approx(int* cols, double* X, int M, int N, double tol, int min_constant) 324 | { 325 | // compare first and last row to get constant candidates 326 | int i,j; 327 | int ro = (M-1)*N; 328 | double diff; 329 | int nconstant = N; // current number of constant columns 330 | 331 | // by default all 0 (constant) 332 | for (j=0; j tol || -diff > tol) 343 | { 344 | if (cols[j] == 0) 345 | { 346 | cols[j] = 1; 347 | nconstant--; 348 | // are constant columns below threshold? Then interrupt. 349 | if (nconstant < min_constant) 350 | return 0; 351 | // do we have 0 constant columns? Then we can stop regularly. 352 | if (nconstant == 0) 353 | return 1; 354 | } 355 | } 356 | } 357 | } 358 | 359 | return 1; 360 | } 361 | -------------------------------------------------------------------------------- /variational/estimators/running_moments.py: -------------------------------------------------------------------------------- 1 | __author__ = 'noe' 2 | 3 | import warnings 4 | import numbers 5 | import numpy as np 6 | from .moments import moments_XX, moments_XXXY, moments_block 7 | 8 | 9 | class Moments(object): 10 | 11 | def __init__(self, w, sx, sy, Mxy): 12 | """ 13 | Parameters 14 | ---------- 15 | w : float 16 | statistical weight. 17 | w = \sum_t w_t 18 | In most cases, :math:`w_t=1`, and then w is just the number of samples that went into s1, S2. 19 | s : ndarray(n,) 20 | sum over samples: 21 | .. math: 22 | s = \sum_t w_t x_t 23 | M : ndarray(n, n) 24 | .. math: 25 | M = (X-s)^T (X-s) 26 | """ 27 | self.w = float(w) 28 | self.sx = sx 29 | self.sy = sy 30 | self.Mxy = Mxy 31 | 32 | def copy(self): 33 | return Moments(self.w, self.sx.copy(), self.sy.copy(), self.Mxy.copy()) 34 | 35 | def combine(self, other, mean_free=False): 36 | """ 37 | References 38 | ---------- 39 | [1] http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf 40 | """ 41 | w1 = self.w 42 | w2 = other.w 43 | w = w1 + w2 44 | dsx = (w2/w1) * self.sx - other.sx 45 | dsy = (w2/w1) * self.sy - other.sy 46 | # update 47 | self.w = w1 + w2 48 | self.sx = self.sx + other.sx 49 | self.sy = self.sy + other.sy 50 | # 51 | if mean_free: 52 | self.Mxy += other.Mxy + (w1 / (w2 * w)) * np.outer(dsx, dsy) 53 | else: 54 | self.Mxy += other.Mxy 55 | return self 56 | 57 | @property 58 | def mean_x(self): 59 | return self.sx / self.w 60 | 61 | @property 62 | def mean_y(self): 63 | return self.sy / self.w 64 | 65 | def covar(self, bessels_correction): 66 | """ Returns M / (w-1) 67 | 68 | Careful: The normalization w-1 assumes that we have counts as weights. 69 | 70 | """ 71 | if bessels_correction: 72 | return self.Mxy/ (self.w-1) 73 | else: 74 | return self.Mxy/self.w 75 | 76 | 77 | class MomentsStorage(object): 78 | """ 79 | """ 80 | 81 | def __init__(self, nsave, remove_mean=False, rtol=1.5): 82 | """ 83 | Parameters 84 | ---------- 85 | rtol : float 86 | To decide when to merge two Moments. Ideally I'd like to merge two 87 | Moments when they have equal weights (i.e. equally many data points 88 | went into them). If we always add data chunks with equal weights, 89 | this can be achieved by using a binary tree, i.e. let M1 be the 90 | moment estimates from one chunk. Two of them are added to M2, Two 91 | M2 are added to M4, and so on. This way you need to store log2 92 | (n_chunks) number of Moment estimates. 93 | In practice you might get data in chunks of unequal length or weight. 94 | Therefore we need some heuristic when two Moment estimates should get 95 | merged. This is the role of rtol. 96 | 97 | """ 98 | self.nsave = nsave 99 | self.storage = [] 100 | self.rtol = rtol 101 | self.remove_mean = remove_mean 102 | 103 | def _can_merge_tail(self): 104 | """ Checks if the two last list elements can be merged 105 | """ 106 | if len(self.storage) < 2: 107 | return False 108 | return self.storage[-2].w <= self.storage[-1].w * self.rtol 109 | 110 | def store(self, moments): 111 | """ Store object X with weight w 112 | """ 113 | if len(self.storage) == self.nsave: # merge if we must 114 | # print 'must merge' 115 | self.storage[-1].combine(moments, mean_free=self.remove_mean) 116 | else: # append otherwise 117 | # print 'append' 118 | self.storage.append(moments) 119 | # merge if possible 120 | while self._can_merge_tail(): 121 | # print 'merge: ',self.storage 122 | M = self.storage.pop() 123 | # print 'pop last: ',self.storage 124 | self.storage[-1].combine(M, mean_free=self.remove_mean) 125 | # print 'merged: ',self.storage 126 | 127 | @property 128 | def moments(self): 129 | """ 130 | """ 131 | # collapse storage if necessary 132 | while len(self.storage) > 1: 133 | # print 'collapse' 134 | M = self.storage.pop() 135 | self.storage[-1].combine(M, mean_free=self.remove_mean) 136 | # print 'return first element' 137 | return self.storage[0] 138 | 139 | 140 | class RunningCovar(object): 141 | """ Running covariance estimator 142 | 143 | Estimator object that can be fed chunks of X and Y data, and 144 | that can generate on-the-fly estimates of mean, covariance, running sum 145 | and second moment matrix. 146 | 147 | Parameters 148 | ---------- 149 | compute_XX : bool 150 | Estimate the covariance of X 151 | compute_XY : bool 152 | Estimate the cross-covariance of X and Y 153 | compute_YY : bool 154 | Estimate the covariance of Y 155 | remove_mean : bool 156 | Remove the data mean in the covariance estimation 157 | symmetrize : bool 158 | Use symmetric estimates with sum defined by sum_t x_t + y_t and 159 | second moment matrices defined by X'X + Y'Y and Y'X + X'Y. 160 | modify_data : bool 161 | If remove_mean=True, the mean will be removed in the input data, 162 | without creating an independent copy. This option is faster but should 163 | only be selected if the input data is not used elsewhere. 164 | sparse_mode : str 165 | one of: 166 | * 'dense' : always use dense mode 167 | * 'sparse' : always use sparse mode if possible 168 | * 'auto' : automatic 169 | nsave : int 170 | Depth of Moment storage. Moments computed from each chunk will be 171 | combined with Moments of similar statistical weight using the pairwise 172 | combination algorithm described in [1]_. 173 | 174 | References 175 | ---------- 176 | .. [1] http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf 177 | 178 | """ 179 | 180 | # to get the Y mean, but this is currently not stored. 181 | def __init__(self, compute_XX=True, compute_XY=False, compute_YY=False, 182 | remove_mean=False, symmetrize=False, sparse_mode='auto', modify_data=False, nsave=5): 183 | # check input 184 | if not compute_XX and not compute_XY: 185 | raise ValueError('One of compute_XX or compute_XY must be True.') 186 | if symmetrize and compute_YY: 187 | raise ValueError('Combining compute_YY and symmetrize=True is meaningless.') 188 | if symmetrize and not compute_XY: 189 | warnings.warn('symmetrize=True has no effect with compute_XY=False.') 190 | # storage 191 | self.compute_XX = compute_XX 192 | if compute_XX: 193 | self.storage_XX = MomentsStorage(nsave, remove_mean=remove_mean) 194 | self.compute_XY = compute_XY 195 | if compute_XY: 196 | self.storage_XY = MomentsStorage(nsave, remove_mean=remove_mean) 197 | self.compute_YY = compute_YY 198 | if compute_YY: 199 | self.storage_YY = MomentsStorage(nsave, remove_mean=remove_mean) 200 | # symmetry 201 | self.remove_mean = remove_mean 202 | self.symmetrize = symmetrize 203 | # flags 204 | self.sparse_mode = sparse_mode 205 | self.modify_data = modify_data 206 | 207 | def add(self, X, Y=None, weights=None): 208 | """ 209 | Add trajectory to estimate. 210 | 211 | Parameters 212 | ---------- 213 | X : ndarray(T, N) 214 | array of N time series. 215 | Y : ndarray(T, N) 216 | array of N time series, usually time shifted version of X. 217 | weights : None or float or ndarray(T, ): 218 | weights assigned to each trajectory point. If None, all data points have weight one. If float, 219 | the same weight will be given to all data points. If ndarray, each data point is assigned a separate 220 | weight. 221 | 222 | """ 223 | 224 | # check input 225 | T = X.shape[0] 226 | if Y is not None: 227 | assert Y.shape[0] == T, 'X and Y must have equal length' 228 | # Weights cannot be used for compute_YY: 229 | if weights is not None and self.compute_YY: 230 | raise ValueError('Use of weights is not implemented for compute_YY==True') 231 | if weights is not None: 232 | # Convert to array of length T if weights is a single number: 233 | if isinstance(weights, numbers.Real): 234 | weights = weights * np.ones(T, dtype=float) 235 | # Check appropriate length if weights is an array: 236 | elif isinstance(weights, np.ndarray): 237 | assert weights.shape[0] == T, 'weights and X must have equal length' 238 | else: 239 | raise TypeError('weights is of type %s, must be a number or ndarray'%(type(weights))) 240 | # estimate and add to storage 241 | if self.compute_XX and not self.compute_XY: 242 | w, s_X, C_XX = moments_XX(X, remove_mean=self.remove_mean, weights=weights, sparse_mode=self.sparse_mode, modify_data=self.modify_data) 243 | self.storage_XX.store(Moments(w, s_X, s_X, C_XX)) 244 | elif self.compute_XX and self.compute_XY: 245 | assert Y is not None 246 | w, s_X, s_Y, C_XX, C_XY = moments_XXXY(X, Y, remove_mean=self.remove_mean, symmetrize=self.symmetrize, 247 | weights=weights, sparse_mode=self.sparse_mode, modify_data=self.modify_data) 248 | # make copy in order to get independently mergeable moments 249 | self.storage_XX.store(Moments(w, s_X, s_X, C_XX)) 250 | self.storage_XY.store(Moments(w, s_X, s_Y, C_XY)) 251 | else: # compute block 252 | assert Y is not None 253 | assert not self.symmetrize 254 | w, s, C = moments_block(X, Y, remove_mean=self.remove_mean, 255 | sparse_mode=self.sparse_mode, modify_data=self.modify_data) 256 | # make copy in order to get independently mergeable moments 257 | self.storage_XX.store(Moments(w, s[0], s[0], C[0, 0])) 258 | self.storage_XY.store(Moments(w, s[0], s[1], C[0, 1])) 259 | self.storage_YY.store(Moments(w, s[1], s[1], C[1, 1])) 260 | 261 | def sum_X(self): 262 | if self.compute_XX: 263 | return self.storage_XX.moments.sx 264 | elif self.compute_XY: 265 | return self.storage_XY.moments.sx 266 | else: 267 | raise RuntimeError('sum_X is not available') 268 | 269 | def sum_Y(self): 270 | if self.compute_XY: 271 | return self.storage_XY.moments.sy 272 | elif self.compute_YY: 273 | return self.storage_YY.moments.sy 274 | else: 275 | raise RuntimeError('sum_Y is not available') 276 | 277 | def mean_X(self): 278 | if self.compute_XX: 279 | return self.storage_XX.moments.mean_x 280 | elif self.compute_XY: 281 | return self.storage_XY.moments.mean_y 282 | else: 283 | raise RuntimeError('mean_X is not available') 284 | 285 | def mean_Y(self): 286 | if self.compute_XY: 287 | return self.storage_XY.moments.mean_y 288 | elif self.compute_YY: 289 | return self.storage_YY.moments.mean_y 290 | else: 291 | raise RuntimeError('mean_Y is not available') 292 | 293 | def weight_XX(self): 294 | return self.storage_XX.moments.w 295 | 296 | def weight_XY(self): 297 | return self.storage_XY.moments.w 298 | 299 | def weight_YY(self): 300 | return self.storage_YY.moments.w 301 | 302 | def moments_XX(self): 303 | return self.storage_XX.moments.Mxy 304 | 305 | def moments_XY(self): 306 | return self.storage_XY.moments.Mxy 307 | 308 | def moments_YY(self): 309 | return self.storage_YY.moments.Mxy 310 | 311 | def cov_XX(self, bessels_correction): 312 | return self.storage_XX.moments.covar(bessels_correction=bessels_correction) 313 | 314 | def cov_XY(self, bessels_correction): 315 | return self.storage_XY.moments.covar(bessels_correction=bessels_correction) 316 | 317 | def cov_YY(self, bessels_correction): 318 | return self.storage_YY.moments.covar(bessels_correction=bessels_correction) 319 | 320 | 321 | def running_covar(xx=True, xy=False, yy=False, remove_mean=False, symmetrize=False, sparse_mode='auto', 322 | modify_data=False, nsave=5): 323 | """ Returns a running covariance estimator 324 | 325 | Returns an estimator object that can be fed chunks of X and Y data, and 326 | that can generate on-the-fly estimates of mean, covariance, running sum 327 | and second moment matrix. 328 | 329 | Parameters 330 | ---------- 331 | xx : bool 332 | Estimate the covariance of X 333 | xy : bool 334 | Estimate the cross-covariance of X and Y 335 | yy : bool 336 | Estimate the covariance of Y 337 | remove_mean : bool 338 | Remove the data mean in the covariance estimation 339 | symmetrize : bool 340 | Use symmetric estimates with sum defined by sum_t x_t + y_t and 341 | second moment matrices defined by X'X + Y'Y and Y'X + X'Y. 342 | modify_data : bool 343 | If remove_mean=True, the mean will be removed in the input data, 344 | without creating an independent copy. This option is faster but should 345 | only be selected if the input data is not used elsewhere. 346 | sparse_mode : str 347 | one of: 348 | * 'dense' : always use dense mode 349 | * 'sparse' : always use sparse mode if possible 350 | * 'auto' : automatic 351 | nsave : int 352 | Depth of Moment storage. Moments computed from each chunk will be 353 | combined with Moments of similar statistical weight using the pairwise 354 | combination algorithm described in [1]_. 355 | 356 | References 357 | ---------- 358 | .. [1] http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf 359 | 360 | """ 361 | return RunningCovar(compute_XX=xx, compute_XY=xy, compute_YY=yy, sparse_mode=sparse_mode, modify_data=modify_data, 362 | remove_mean=remove_mean, symmetrize=symmetrize, nsave=nsave) 363 | -------------------------------------------------------------------------------- /variational/_version.py: -------------------------------------------------------------------------------- 1 | 2 | # This file helps to compute a version number in source trees obtained from 3 | # git-archive tarball (such as those provided by githubs download-from-tag 4 | # feature). Distribution tarballs (built by setup.py sdist) and build 5 | # directories (produced by setup.py build) will contain a much shorter file 6 | # that just contains the computed version number. 7 | 8 | # This file is released into the public domain. Generated by 9 | # versioneer-0.15 (https://github.com/warner/python-versioneer) 10 | 11 | import errno 12 | import os 13 | import re 14 | import subprocess 15 | import sys 16 | 17 | 18 | def get_keywords(): 19 | # these strings will be replaced by git during git-archive. 20 | # setup.py/versioneer.py will grep for the variable names, so they must 21 | # each be defined on a line of their own. _version.py will just call 22 | # get_keywords(). 23 | git_refnames = " (HEAD -> master)" 24 | git_full = "491361e8e271df0e28b34549ab32e22546e18ce9" 25 | keywords = {"refnames": git_refnames, "full": git_full} 26 | return keywords 27 | 28 | 29 | class VersioneerConfig: 30 | pass 31 | 32 | 33 | def get_config(): 34 | # these strings are filled in when 'setup.py versioneer' creates 35 | # _version.py 36 | cfg = VersioneerConfig() 37 | cfg.VCS = "git" 38 | cfg.style = "pep440" 39 | cfg.tag_prefix = "" 40 | cfg.parentdir_prefix = "variational-" 41 | cfg.versionfile_source = "variational/_version.py" 42 | cfg.verbose = False 43 | return cfg 44 | 45 | 46 | class NotThisMethod(Exception): 47 | pass 48 | 49 | 50 | LONG_VERSION_PY = {} 51 | HANDLERS = {} 52 | 53 | 54 | def register_vcs_handler(vcs, method): # decorator 55 | def decorate(f): 56 | if vcs not in HANDLERS: 57 | HANDLERS[vcs] = {} 58 | HANDLERS[vcs][method] = f 59 | return f 60 | return decorate 61 | 62 | 63 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): 64 | assert isinstance(commands, list) 65 | p = None 66 | for c in commands: 67 | try: 68 | dispcmd = str([c] + args) 69 | # remember shell=False, so use git.cmd on windows, not just git 70 | p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, 71 | stderr=(subprocess.PIPE if hide_stderr 72 | else None)) 73 | break 74 | except EnvironmentError: 75 | e = sys.exc_info()[1] 76 | if e.errno == errno.ENOENT: 77 | continue 78 | if verbose: 79 | print("unable to run %s" % dispcmd) 80 | print(e) 81 | return None 82 | else: 83 | if verbose: 84 | print("unable to find command, tried %s" % (commands,)) 85 | return None 86 | stdout = p.communicate()[0].strip() 87 | if sys.version_info[0] >= 3: 88 | stdout = stdout.decode() 89 | if p.returncode != 0: 90 | if verbose: 91 | print("unable to run %s (error)" % dispcmd) 92 | return None 93 | return stdout 94 | 95 | 96 | def versions_from_parentdir(parentdir_prefix, root, verbose): 97 | # Source tarballs conventionally unpack into a directory that includes 98 | # both the project name and a version string. 99 | dirname = os.path.basename(root) 100 | if not dirname.startswith(parentdir_prefix): 101 | if verbose: 102 | print("guessing rootdir is '%s', but '%s' doesn't start with " 103 | "prefix '%s'" % (root, dirname, parentdir_prefix)) 104 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 105 | return {"version": dirname[len(parentdir_prefix):], 106 | "full-revisionid": None, 107 | "dirty": False, "error": None} 108 | 109 | 110 | @register_vcs_handler("git", "get_keywords") 111 | def git_get_keywords(versionfile_abs): 112 | # the code embedded in _version.py can just fetch the value of these 113 | # keywords. When used from setup.py, we don't want to import _version.py, 114 | # so we do it with a regexp instead. This function is not used from 115 | # _version.py. 116 | keywords = {} 117 | try: 118 | f = open(versionfile_abs, "r") 119 | for line in f.readlines(): 120 | if line.strip().startswith("git_refnames ="): 121 | mo = re.search(r'=\s*"(.*)"', line) 122 | if mo: 123 | keywords["refnames"] = mo.group(1) 124 | if line.strip().startswith("git_full ="): 125 | mo = re.search(r'=\s*"(.*)"', line) 126 | if mo: 127 | keywords["full"] = mo.group(1) 128 | f.close() 129 | except EnvironmentError: 130 | pass 131 | return keywords 132 | 133 | 134 | @register_vcs_handler("git", "keywords") 135 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 136 | if not keywords: 137 | raise NotThisMethod("no keywords at all, weird") 138 | refnames = keywords["refnames"].strip() 139 | if refnames.startswith("$Format"): 140 | if verbose: 141 | print("keywords are unexpanded, not using") 142 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 143 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 144 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 145 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 146 | TAG = "tag: " 147 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 148 | if not tags: 149 | # Either we're using git < 1.8.3, or there really are no tags. We use 150 | # a heuristic: assume all version tags have a digit. The old git %d 151 | # expansion behaves like git log --decorate=short and strips out the 152 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 153 | # between branches and tags. By ignoring refnames without digits, we 154 | # filter out many common branch names like "release" and 155 | # "stabilization", as well as "HEAD" and "master". 156 | tags = set([r for r in refs if re.search(r'\d', r)]) 157 | if verbose: 158 | print("discarding '%s', no digits" % ",".join(refs-tags)) 159 | if verbose: 160 | print("likely tags: %s" % ",".join(sorted(tags))) 161 | for ref in sorted(tags): 162 | # sorting will prefer e.g. "2.0" over "2.0rc1" 163 | if ref.startswith(tag_prefix): 164 | r = ref[len(tag_prefix):] 165 | if verbose: 166 | print("picking %s" % r) 167 | return {"version": r, 168 | "full-revisionid": keywords["full"].strip(), 169 | "dirty": False, "error": None 170 | } 171 | # no suitable tags, so version is "0+unknown", but full hex is still there 172 | if verbose: 173 | print("no suitable tags, using unknown + full revision id") 174 | return {"version": "0+unknown", 175 | "full-revisionid": keywords["full"].strip(), 176 | "dirty": False, "error": "no suitable tags"} 177 | 178 | 179 | @register_vcs_handler("git", "pieces_from_vcs") 180 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 181 | # this runs 'git' from the root of the source tree. This only gets called 182 | # if the git-archive 'subst' keywords were *not* expanded, and 183 | # _version.py hasn't already been rewritten with a short version string, 184 | # meaning we're inside a checked out source tree. 185 | 186 | if not os.path.exists(os.path.join(root, ".git")): 187 | if verbose: 188 | print("no .git in %s" % root) 189 | raise NotThisMethod("no .git directory") 190 | 191 | GITS = ["git"] 192 | if sys.platform == "win32": 193 | GITS = ["git.cmd", "git.exe"] 194 | # if there is a tag, this yields TAG-NUM-gHEX[-dirty] 195 | # if there are no tags, this yields HEX[-dirty] (no NUM) 196 | describe_out = run_command(GITS, ["describe", "--tags", "--dirty", 197 | "--always", "--long"], 198 | cwd=root) 199 | # --long was added in git-1.5.5 200 | if describe_out is None: 201 | raise NotThisMethod("'git describe' failed") 202 | describe_out = describe_out.strip() 203 | full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 204 | if full_out is None: 205 | raise NotThisMethod("'git rev-parse' failed") 206 | full_out = full_out.strip() 207 | 208 | pieces = {} 209 | pieces["long"] = full_out 210 | pieces["short"] = full_out[:7] # maybe improved later 211 | pieces["error"] = None 212 | 213 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 214 | # TAG might have hyphens. 215 | git_describe = describe_out 216 | 217 | # look for -dirty suffix 218 | dirty = git_describe.endswith("-dirty") 219 | pieces["dirty"] = dirty 220 | if dirty: 221 | git_describe = git_describe[:git_describe.rindex("-dirty")] 222 | 223 | # now we have TAG-NUM-gHEX or HEX 224 | 225 | if "-" in git_describe: 226 | # TAG-NUM-gHEX 227 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 228 | if not mo: 229 | # unparseable. Maybe git-describe is misbehaving? 230 | pieces["error"] = ("unable to parse git-describe output: '%s'" 231 | % describe_out) 232 | return pieces 233 | 234 | # tag 235 | full_tag = mo.group(1) 236 | if not full_tag.startswith(tag_prefix): 237 | if verbose: 238 | fmt = "tag '%s' doesn't start with prefix '%s'" 239 | print(fmt % (full_tag, tag_prefix)) 240 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" 241 | % (full_tag, tag_prefix)) 242 | return pieces 243 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 244 | 245 | # distance: number of commits since tag 246 | pieces["distance"] = int(mo.group(2)) 247 | 248 | # commit: short hex revision ID 249 | pieces["short"] = mo.group(3) 250 | 251 | else: 252 | # HEX: no tags 253 | pieces["closest-tag"] = None 254 | count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], 255 | cwd=root) 256 | pieces["distance"] = int(count_out) # total number of commits 257 | 258 | return pieces 259 | 260 | 261 | def plus_or_dot(pieces): 262 | if "+" in pieces.get("closest-tag", ""): 263 | return "." 264 | return "+" 265 | 266 | 267 | def render_pep440(pieces): 268 | # now build up version string, with post-release "local version 269 | # identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 270 | # get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 271 | 272 | # exceptions: 273 | # 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 274 | 275 | if pieces["closest-tag"]: 276 | rendered = pieces["closest-tag"] 277 | if pieces["distance"] or pieces["dirty"]: 278 | rendered += plus_or_dot(pieces) 279 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 280 | if pieces["dirty"]: 281 | rendered += ".dirty" 282 | else: 283 | # exception #1 284 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], 285 | pieces["short"]) 286 | if pieces["dirty"]: 287 | rendered += ".dirty" 288 | return rendered 289 | 290 | 291 | def render_pep440_pre(pieces): 292 | # TAG[.post.devDISTANCE] . No -dirty 293 | 294 | # exceptions: 295 | # 1: no tags. 0.post.devDISTANCE 296 | 297 | if pieces["closest-tag"]: 298 | rendered = pieces["closest-tag"] 299 | if pieces["distance"]: 300 | rendered += ".post.dev%d" % pieces["distance"] 301 | else: 302 | # exception #1 303 | rendered = "0.post.dev%d" % pieces["distance"] 304 | return rendered 305 | 306 | 307 | def render_pep440_post(pieces): 308 | # TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that 309 | # .dev0 sorts backwards (a dirty tree will appear "older" than the 310 | # corresponding clean one), but you shouldn't be releasing software with 311 | # -dirty anyways. 312 | 313 | # exceptions: 314 | # 1: no tags. 0.postDISTANCE[.dev0] 315 | 316 | if pieces["closest-tag"]: 317 | rendered = pieces["closest-tag"] 318 | if pieces["distance"] or pieces["dirty"]: 319 | rendered += ".post%d" % pieces["distance"] 320 | if pieces["dirty"]: 321 | rendered += ".dev0" 322 | rendered += plus_or_dot(pieces) 323 | rendered += "g%s" % pieces["short"] 324 | else: 325 | # exception #1 326 | rendered = "0.post%d" % pieces["distance"] 327 | if pieces["dirty"]: 328 | rendered += ".dev0" 329 | rendered += "+g%s" % pieces["short"] 330 | return rendered 331 | 332 | 333 | def render_pep440_old(pieces): 334 | # TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. 335 | 336 | # exceptions: 337 | # 1: no tags. 0.postDISTANCE[.dev0] 338 | 339 | if pieces["closest-tag"]: 340 | rendered = pieces["closest-tag"] 341 | if pieces["distance"] or pieces["dirty"]: 342 | rendered += ".post%d" % pieces["distance"] 343 | if pieces["dirty"]: 344 | rendered += ".dev0" 345 | else: 346 | # exception #1 347 | rendered = "0.post%d" % pieces["distance"] 348 | if pieces["dirty"]: 349 | rendered += ".dev0" 350 | return rendered 351 | 352 | 353 | def render_git_describe(pieces): 354 | # TAG[-DISTANCE-gHEX][-dirty], like 'git describe --tags --dirty 355 | # --always' 356 | 357 | # exceptions: 358 | # 1: no tags. HEX[-dirty] (note: no 'g' prefix) 359 | 360 | if pieces["closest-tag"]: 361 | rendered = pieces["closest-tag"] 362 | if pieces["distance"]: 363 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 364 | else: 365 | # exception #1 366 | rendered = pieces["short"] 367 | if pieces["dirty"]: 368 | rendered += "-dirty" 369 | return rendered 370 | 371 | 372 | def render_git_describe_long(pieces): 373 | # TAG-DISTANCE-gHEX[-dirty], like 'git describe --tags --dirty 374 | # --always -long'. The distance/hash is unconditional. 375 | 376 | # exceptions: 377 | # 1: no tags. HEX[-dirty] (note: no 'g' prefix) 378 | 379 | if pieces["closest-tag"]: 380 | rendered = pieces["closest-tag"] 381 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 382 | else: 383 | # exception #1 384 | rendered = pieces["short"] 385 | if pieces["dirty"]: 386 | rendered += "-dirty" 387 | return rendered 388 | 389 | 390 | def render(pieces, style): 391 | if pieces["error"]: 392 | return {"version": "unknown", 393 | "full-revisionid": pieces.get("long"), 394 | "dirty": None, 395 | "error": pieces["error"]} 396 | 397 | if not style or style == "default": 398 | style = "pep440" # the default 399 | 400 | if style == "pep440": 401 | rendered = render_pep440(pieces) 402 | elif style == "pep440-pre": 403 | rendered = render_pep440_pre(pieces) 404 | elif style == "pep440-post": 405 | rendered = render_pep440_post(pieces) 406 | elif style == "pep440-old": 407 | rendered = render_pep440_old(pieces) 408 | elif style == "git-describe": 409 | rendered = render_git_describe(pieces) 410 | elif style == "git-describe-long": 411 | rendered = render_git_describe_long(pieces) 412 | else: 413 | raise ValueError("unknown style '%s'" % style) 414 | 415 | return {"version": rendered, "full-revisionid": pieces["long"], 416 | "dirty": pieces["dirty"], "error": None} 417 | 418 | 419 | def get_versions(): 420 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 421 | # __file__, we can work backwards from there to the root. Some 422 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 423 | # case we can only use expanded keywords. 424 | 425 | cfg = get_config() 426 | verbose = cfg.verbose 427 | 428 | try: 429 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, 430 | verbose) 431 | except NotThisMethod: 432 | pass 433 | 434 | try: 435 | root = os.path.realpath(__file__) 436 | # versionfile_source is the relative path from the top of the source 437 | # tree (where the .git directory might live) to this file. Invert 438 | # this to find the root from __file__. 439 | for i in cfg.versionfile_source.split('/'): 440 | root = os.path.dirname(root) 441 | except NameError: 442 | return {"version": "0+unknown", "full-revisionid": None, 443 | "dirty": None, 444 | "error": "unable to find root of source tree"} 445 | 446 | try: 447 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 448 | return render(pieces, cfg.style) 449 | except NotThisMethod: 450 | pass 451 | 452 | try: 453 | if cfg.parentdir_prefix: 454 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 455 | except NotThisMethod: 456 | pass 457 | 458 | return {"version": "0+unknown", "full-revisionid": None, 459 | "dirty": None, 460 | "error": "unable to compute version"} 461 | -------------------------------------------------------------------------------- /variational/estimators/tests/test_moments.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import unittest 3 | import numpy as np 4 | from .. import moments 5 | 6 | __author__ = 'noe' 7 | 8 | class TestMoments(unittest.TestCase): 9 | 10 | @classmethod 11 | def setUpClass(cls): 12 | cls.X_2 = np.random.rand(10000, 2) 13 | cls.Y_2 = np.random.rand(10000, 2) 14 | # dense data 15 | cls.X_10 = np.random.rand(10000, 10) 16 | cls.Y_10 = np.random.rand(10000, 10) 17 | cls.X_100 = np.random.rand(10000, 100) 18 | cls.Y_100 = np.random.rand(10000, 100) 19 | # sparse zero data 20 | cls.X_10_sparsezero = np.zeros((10000, 10)) 21 | cls.X_10_sparsezero[:, 0] = cls.X_10[:, 0] 22 | cls.Y_10_sparsezero = np.zeros((10000, 10)) 23 | cls.Y_10_sparsezero[:, 0] = cls.Y_10[:, 0] 24 | cls.X_100_sparsezero = np.zeros((10000, 100)) 25 | cls.X_100_sparsezero[:, :10] = cls.X_100[:, :10] 26 | cls.Y_100_sparsezero = np.zeros((10000, 100)) 27 | cls.Y_100_sparsezero[:, :10] = cls.Y_100[:, :10] 28 | # sparse const data 29 | cls.X_10_sparseconst = np.ones((10000, 10)) 30 | cls.X_10_sparseconst[:, 0] = cls.X_10[:, 0] 31 | cls.Y_10_sparseconst = 2*np.ones((10000, 10)) 32 | cls.Y_10_sparseconst[:, 0] = cls.Y_10[:, 0] 33 | cls.X_100_sparseconst = np.ones((10000, 100)) 34 | cls.X_100_sparseconst[:, :10] = cls.X_100[:, :10] 35 | cls.Y_100_sparseconst = 2*np.zeros((10000, 100)) 36 | cls.Y_100_sparseconst[:, :10] = cls.Y_100[:, :10] 37 | # boolean data 38 | cls.Xb_2 = np.random.randint(0, 2, size=(10000, 2)) 39 | cls.Xb_2 = cls.Xb_2.astype(np.bool) 40 | cls.Xb_10 = np.random.randint(0, 2, size=(10000, 10)) 41 | cls.Xb_10 = cls.Xb_10.astype(np.bool) 42 | cls.Xb_10_sparsezero = np.zeros((10000, 10), dtype=np.bool) 43 | cls.Xb_10_sparsezero[:, 0] = cls.Xb_10[:, 0] 44 | # generate weights: 45 | cls.weights = np.random.rand(10000) 46 | # Set the lag time for time-lagged tests: 47 | cls.lag = 50 48 | 49 | return cls 50 | 51 | def _test_moments_X(self, X, remove_mean=False, sparse_mode='auto', weights=None): 52 | # proposed solution 53 | w, s_X, C_XX = moments.moments_XX(X, remove_mean=remove_mean, modify_data=False, 54 | sparse_mode=sparse_mode, weights=weights) 55 | # reference 56 | X = X.astype(np.float64) 57 | if weights is not None: 58 | X1 = weights[:, None] * X 59 | w = weights.sum() 60 | else: 61 | X1 = X 62 | w = X.shape[0] 63 | s_X_ref = X1.sum(axis=0) 64 | if remove_mean: 65 | X = X - (1.0 / w) * s_X_ref 66 | if weights is not None: 67 | X1 = weights[:, None] * X 68 | else: 69 | X1 = X 70 | C_XX_ref = np.dot(X1.T, X) 71 | # test 72 | assert np.allclose(s_X, s_X_ref) 73 | assert np.allclose(C_XX, C_XX_ref) 74 | 75 | def test_moments_X(self): 76 | # simple test, dense 77 | self._test_moments_X(self.X_10, remove_mean=False, sparse_mode='dense') 78 | self._test_moments_X(self.X_100, remove_mean=False, sparse_mode='dense') 79 | # mean-free, dense 80 | self._test_moments_X(self.X_10, remove_mean=True, sparse_mode='dense') 81 | self._test_moments_X(self.X_100, remove_mean=True, sparse_mode='dense') 82 | # weighted test, simple, dense: 83 | self._test_moments_X(self.X_10, remove_mean=False, sparse_mode='dense', weights=self.weights) 84 | self._test_moments_X(self.X_100, remove_mean=False, sparse_mode='dense', weights=self.weights) 85 | # weighted test, mean-free, dense: 86 | self._test_moments_X(self.X_10, remove_mean=True, sparse_mode='dense', weights=self.weights) 87 | self._test_moments_X(self.X_100, remove_mean=True, sparse_mode='dense', weights=self.weights) 88 | 89 | def test_moments_X_sparsezero(self): 90 | # simple test, sparse 91 | self._test_moments_X(self.X_10_sparsezero, remove_mean=False, sparse_mode='sparse') 92 | self._test_moments_X(self.X_100_sparsezero, remove_mean=False, sparse_mode='sparse') 93 | # mean-free, sparse 94 | self._test_moments_X(self.X_10_sparsezero, remove_mean=True, sparse_mode='sparse') 95 | self._test_moments_X(self.X_100_sparsezero, remove_mean=True, sparse_mode='sparse') 96 | # weighted, sparse 97 | self._test_moments_X(self.X_10_sparsezero, remove_mean=False, sparse_mode='sparse', weights=self.weights) 98 | self._test_moments_X(self.X_100_sparsezero, remove_mean=False, sparse_mode='sparse', weights=self.weights) 99 | # weighted, mean-free, sparse 100 | self._test_moments_X(self.X_10_sparsezero, remove_mean=True, sparse_mode='sparse', weights=self.weights) 101 | self._test_moments_X(self.X_100_sparsezero, remove_mean=True, sparse_mode='sparse', weights=self.weights) 102 | 103 | def test_moments_X_sparseconst(self): 104 | # simple test, sparse 105 | self._test_moments_X(self.X_10_sparseconst, remove_mean=False, sparse_mode='sparse') 106 | self._test_moments_X(self.X_100_sparseconst, remove_mean=False, sparse_mode='sparse') 107 | # mean-free, sparse 108 | self._test_moments_X(self.X_10_sparseconst, remove_mean=True, sparse_mode='sparse') 109 | self._test_moments_X(self.X_100_sparseconst, remove_mean=True, sparse_mode='sparse') 110 | # weighted, sparse: 111 | self._test_moments_X(self.X_10_sparseconst, remove_mean=False, sparse_mode='dense', weights=self.weights) 112 | self._test_moments_X(self.X_100_sparseconst, remove_mean=False, sparse_mode='dense', weights=self.weights) 113 | # weighted, mean-free, sparse: 114 | self._test_moments_X(self.X_10_sparseconst, remove_mean=True, sparse_mode='dense', weights=self.weights) 115 | self._test_moments_X(self.X_100_sparseconst, remove_mean=True, sparse_mode='dense', weights=self.weights) 116 | 117 | def test_boolean_moments(self): 118 | # standard tests 119 | self._test_moments_X(self.Xb_10, remove_mean=False, sparse_mode='dense') 120 | self._test_moments_X(self.Xb_10, remove_mean=True, sparse_mode='dense') 121 | self._test_moments_X(self.Xb_10_sparsezero, remove_mean=False, sparse_mode='sparse') 122 | self._test_moments_X(self.Xb_10_sparsezero, remove_mean=True, sparse_mode='sparse') 123 | # test integer recovery 124 | Cxx_ref = np.dot(self.Xb_10.astype(np.int64).T, self.Xb_10.astype(np.int64)) # integer 125 | s_X_ref = np.sum(self.Xb_10, axis=0) 126 | w, s_X, Cxx = moments.moments_XX(self.Xb_10, remove_mean=False, modify_data=False, sparse_mode='dense') 127 | s_X = np.round(s_X).astype(np.int64) 128 | Cxx = np.round(Cxx).astype(np.int64) 129 | assert np.array_equal(s_X, s_X_ref) 130 | assert np.array_equal(Cxx, Cxx_ref) 131 | 132 | 133 | def _test_moments_XY(self, X, Y, symmetrize=False, remove_mean=False, sparse_mode='auto', weights=None): 134 | w1, s_X, s_Y, C_XX, C_XY = moments.moments_XXXY(X, Y, remove_mean=remove_mean, modify_data=False, 135 | symmetrize=symmetrize, sparse_mode=sparse_mode, 136 | weights=weights) 137 | # reference 138 | T = X.shape[0] 139 | if weights is not None: 140 | X1 = weights[:, None] * X 141 | Y1 = weights[:, None] * Y 142 | else: 143 | X1 = X 144 | Y1 = Y 145 | s_X_ref = X1.sum(axis=0) 146 | s_Y_ref = Y1.sum(axis=0) 147 | if symmetrize: 148 | s_X_ref = s_X_ref + s_Y_ref 149 | s_Y_ref = s_X_ref 150 | if weights is not None: 151 | w = 2 * np.sum(weights) 152 | else: 153 | w = 2 * T 154 | else: 155 | if weights is not None: 156 | w = np.sum(weights) 157 | else: 158 | w = T 159 | if remove_mean: 160 | X = X - s_X_ref/float(w) 161 | Y = Y - s_Y_ref/float(w) 162 | if weights is not None: 163 | X1 = weights[:, None] * X 164 | Y1 = weights[:, None] * Y 165 | else: 166 | X1 = X 167 | Y1 = Y 168 | if symmetrize: 169 | C_XX_ref = np.dot(X1.T, X) + np.dot(Y1.T, Y) 170 | C_XY_ref = np.dot(X1.T, Y) + np.dot(Y1.T, X) 171 | else: 172 | C_XX_ref = np.dot(X1.T, X) 173 | C_XY_ref = np.dot(X1.T, Y) 174 | # test 175 | assert np.allclose(w1, w) 176 | assert np.allclose(s_X, s_X_ref) 177 | assert np.allclose(s_Y, s_Y_ref) 178 | assert np.allclose(C_XX, C_XX_ref) 179 | assert np.allclose(C_XY, C_XY_ref) 180 | 181 | def test_moments_XY(self): 182 | # simple test, dense 183 | self._test_moments_XY(self.X_10, self.Y_10, symmetrize=False, remove_mean=False, sparse_mode='dense') 184 | self._test_moments_XY(self.X_100, self.Y_10, symmetrize=False, remove_mean=False, sparse_mode='dense') 185 | self._test_moments_XY(self.X_100, self.Y_100, symmetrize=False, remove_mean=False, sparse_mode='dense') 186 | # mean-free, dense 187 | self._test_moments_XY(self.X_10, self.Y_10, symmetrize=False, remove_mean=True, sparse_mode='dense') 188 | self._test_moments_XY(self.X_100, self.Y_10, symmetrize=False, remove_mean=True, sparse_mode='dense') 189 | self._test_moments_XY(self.X_100, self.Y_100, symmetrize=False, remove_mean=True, sparse_mode='dense') 190 | 191 | def test_moments_XY_weighted(self): 192 | # weighted test, dense 193 | self._test_moments_XY(self.X_10, self.X_10, symmetrize=False, remove_mean=False, 194 | sparse_mode='dense', weights=self.weights) 195 | self._test_moments_XY(self.X_100, self.X_100, symmetrize=False, remove_mean=False, 196 | sparse_mode='dense', weights=self.weights) 197 | # weighted test, mean-free, dense 198 | self._test_moments_XY(self.X_10, self.X_10, symmetrize=False, remove_mean=True, 199 | sparse_mode='dense', weights=self.weights) 200 | self._test_moments_XY(self.X_100, self.X_100, symmetrize=False, remove_mean=True, 201 | sparse_mode='dense', weights=self.weights) 202 | 203 | def test_moments_XY_sym(self): 204 | # simple test, dense, symmetric 205 | self._test_moments_XY(self.X_2, self.Y_2, symmetrize=True, remove_mean=False, sparse_mode='dense') 206 | self._test_moments_XY(self.X_10, self.Y_10, symmetrize=True, remove_mean=False, sparse_mode='dense') 207 | self._test_moments_XY(self.X_100, self.Y_100, symmetrize=True, remove_mean=False, sparse_mode='dense') 208 | # mean-free, dense, symmetric 209 | self._test_moments_XY(self.X_2, self.Y_2, symmetrize=True, remove_mean=True, sparse_mode='dense') 210 | self._test_moments_XY(self.X_10, self.Y_10, symmetrize=True, remove_mean=True, sparse_mode='dense') 211 | self._test_moments_XY(self.X_100, self.Y_100, symmetrize=True, remove_mean=True, sparse_mode='dense') 212 | 213 | def test_moments_XY_weighted_sym(self): 214 | # simple test, dense, symmetric 215 | self._test_moments_XY(self.X_2, self.Y_2, symmetrize=True, remove_mean=False, sparse_mode='dense', 216 | weights=self.weights) 217 | self._test_moments_XY(self.X_10, self.Y_10, symmetrize=True, remove_mean=False, sparse_mode='dense' 218 | , weights=self.weights) 219 | self._test_moments_XY(self.X_100, self.Y_100, symmetrize=True, remove_mean=False, sparse_mode='dense', 220 | weights=self.weights) 221 | # mean-free, dense, symmetric 222 | self._test_moments_XY(self.X_2, self.Y_2, symmetrize=True, remove_mean=True, sparse_mode='dense', 223 | weights=self.weights) 224 | self._test_moments_XY(self.X_10, self.Y_10, symmetrize=True, remove_mean=True, sparse_mode='dense', 225 | weights=self.weights) 226 | self._test_moments_XY(self.X_100, self.Y_100, symmetrize=True, remove_mean=True, sparse_mode='dense', 227 | weights=self.weights) 228 | 229 | def test_moments_XY_sparsezero(self): 230 | # simple test, dense 231 | self._test_moments_XY(self.X_10_sparsezero, self.Y_10_sparsezero, symmetrize=False, remove_mean=False, 232 | sparse_mode='sparse') 233 | self._test_moments_XY(self.X_100_sparsezero, self.Y_10_sparsezero, symmetrize=False, remove_mean=False, 234 | sparse_mode='sparse') 235 | self._test_moments_XY(self.X_100_sparsezero, self.Y_100_sparsezero, symmetrize=False, remove_mean=False, 236 | sparse_mode='sparse') 237 | # mean-free, dense 238 | self._test_moments_XY(self.X_10_sparsezero, self.Y_10_sparsezero, symmetrize=False, remove_mean=True, 239 | sparse_mode='sparse') 240 | self._test_moments_XY(self.X_100_sparsezero, self.Y_10_sparsezero, symmetrize=False, remove_mean=True, 241 | sparse_mode='sparse') 242 | self._test_moments_XY(self.X_100_sparsezero, self.Y_100_sparsezero, symmetrize=False, remove_mean=True, 243 | sparse_mode='dense') 244 | 245 | def test_moments_XY_weighted_sparsezero(self): 246 | # weighted test, sparse 247 | self._test_moments_XY(self.X_10_sparsezero, self.X_10_sparsezero, symmetrize=False, remove_mean=False, 248 | sparse_mode='sparse', weights=self.weights) 249 | self._test_moments_XY(self.X_100_sparsezero, self.X_100_sparsezero, symmetrize=False, remove_mean=False, 250 | sparse_mode='sparse', weights=self.weights) 251 | # weighted test, mean-free, sparse 252 | self._test_moments_XY(self.X_10_sparsezero, self.X_10_sparsezero, symmetrize=False, remove_mean=True, 253 | sparse_mode='sparse', weights=self.weights) 254 | self._test_moments_XY(self.X_100_sparsezero, self.X_100_sparsezero, symmetrize=False, remove_mean=True, 255 | sparse_mode='sparse', weights=self.weights) 256 | 257 | def test_moments_XY_sym_sparsezero(self): 258 | # simple test, sparse, symmetric 259 | self._test_moments_XY(self.X_10_sparsezero, self.Y_10_sparsezero, symmetrize=True, remove_mean=False, 260 | sparse_mode='sparse') 261 | self._test_moments_XY(self.X_100_sparsezero, self.Y_100_sparsezero, symmetrize=True, remove_mean=False, 262 | sparse_mode='sparse') 263 | # mean-free, sparse, symmetric 264 | self._test_moments_XY(self.X_10_sparsezero, self.Y_10_sparsezero, symmetrize=True, remove_mean=True, 265 | sparse_mode='sparse') 266 | self._test_moments_XY(self.X_100_sparsezero, self.Y_100_sparsezero, symmetrize=True, remove_mean=True, 267 | sparse_mode='sparse') 268 | 269 | def test_moments_XY_weighted_sym_sparsezero(self): 270 | # simple test, sparse, symmetric 271 | self._test_moments_XY(self.X_10_sparsezero, self.Y_10_sparsezero, symmetrize=True, remove_mean=False, 272 | sparse_mode='sparse', weights=self.weights) 273 | self._test_moments_XY(self.X_100_sparsezero, self.Y_100_sparsezero, symmetrize=True, remove_mean=False, 274 | sparse_mode='sparse', weights=self.weights) 275 | # mean-free, sparse, symmetric 276 | self._test_moments_XY(self.X_10_sparsezero, self.Y_10_sparsezero, symmetrize=True, remove_mean=True, 277 | sparse_mode='sparse', weights=self.weights) 278 | self._test_moments_XY(self.X_100_sparsezero, self.Y_100_sparsezero, symmetrize=True, remove_mean=True, 279 | sparse_mode='sparse', weights=self.weights) 280 | 281 | def test_moments_XY_sparseconst(self): 282 | # simple test, dense 283 | self._test_moments_XY(self.X_10_sparseconst, self.Y_10_sparseconst, symmetrize=False, remove_mean=False, 284 | sparse_mode='sparse') 285 | self._test_moments_XY(self.X_100_sparseconst, self.Y_10_sparseconst, symmetrize=False, remove_mean=False, 286 | sparse_mode='sparse') 287 | self._test_moments_XY(self.X_100_sparseconst, self.Y_100_sparseconst, symmetrize=False, remove_mean=False, 288 | sparse_mode='sparse') 289 | # mean-free, dense 290 | self._test_moments_XY(self.X_10_sparseconst, self.Y_10_sparseconst, symmetrize=False, remove_mean=True, 291 | sparse_mode='sparse') 292 | self._test_moments_XY(self.X_100_sparseconst, self.Y_10_sparseconst, symmetrize=False, remove_mean=True, 293 | sparse_mode='sparse') 294 | self._test_moments_XY(self.X_100_sparseconst, self.Y_100_sparseconst, symmetrize=False, remove_mean=True, 295 | sparse_mode='dense') 296 | 297 | def test_moments_XY_weighted_sparseconst(self): 298 | # weighted test, sparse 299 | self._test_moments_XY(self.X_10_sparseconst, self.X_10_sparseconst, symmetrize=False, remove_mean=False, 300 | sparse_mode='sparse', weights=self.weights) 301 | self._test_moments_XY(self.X_100_sparseconst, self.X_100_sparseconst, symmetrize=False, remove_mean=False, 302 | sparse_mode='sparse', weights=self.weights) 303 | # weighted test, mean-free, sparse 304 | self._test_moments_XY(self.X_10_sparseconst, self.X_10_sparseconst, symmetrize=False, remove_mean=True, 305 | sparse_mode='sparse', weights=self.weights) 306 | self._test_moments_XY(self.X_100_sparseconst, self.X_100_sparseconst, symmetrize=False, remove_mean=True, 307 | sparse_mode='sparse', weights=self.weights) 308 | 309 | def test_moments_XY_sym_sparseconst(self): 310 | # simple test, sparse, symmetric 311 | self._test_moments_XY(self.X_10_sparseconst, self.Y_10_sparseconst, symmetrize=True, remove_mean=False, 312 | sparse_mode='sparse') 313 | self._test_moments_XY(self.X_100_sparseconst, self.Y_100_sparseconst, symmetrize=True, remove_mean=False, 314 | sparse_mode='sparse') 315 | # mean-free, sparse, symmetric 316 | self._test_moments_XY(self.X_10_sparseconst, self.Y_10_sparseconst, symmetrize=True, remove_mean=True, 317 | sparse_mode='sparse') 318 | self._test_moments_XY(self.X_100_sparseconst, self.Y_100_sparseconst, symmetrize=True, remove_mean=True, 319 | sparse_mode='sparse') 320 | 321 | def test_moments_XY_weighted_sym_sparseconst(self): 322 | # simple test, sparse, symmetric 323 | self._test_moments_XY(self.X_10_sparseconst, self.Y_10_sparseconst, symmetrize=True, remove_mean=False, 324 | sparse_mode='sparse', weights=self.weights) 325 | self._test_moments_XY(self.X_100_sparseconst, self.Y_100_sparseconst, symmetrize=True, remove_mean=False, 326 | sparse_mode='sparse', weights=self.weights) 327 | # mean-free, sparse, symmetric 328 | self._test_moments_XY(self.X_10_sparseconst, self.Y_10_sparseconst, symmetrize=True, remove_mean=True, 329 | sparse_mode='sparse', weights=self.weights) 330 | self._test_moments_XY(self.X_100_sparseconst, self.Y_100_sparseconst, symmetrize=True, remove_mean=True, 331 | sparse_mode='sparse', weights=self.weights) 332 | 333 | 334 | if __name__ == "__main__": 335 | unittest.main() -------------------------------------------------------------------------------- /docs/Updating_Formulas.lyx: -------------------------------------------------------------------------------- 1 | #LyX 2.1 created this file. For more info see http://www.lyx.org/ 2 | \lyxformat 474 3 | \begin_document 4 | \begin_header 5 | \textclass article 6 | \use_default_options true 7 | \begin_modules 8 | theorems-ams 9 | eqs-within-sections 10 | figs-within-sections 11 | \end_modules 12 | \maintain_unincluded_children false 13 | \language english 14 | \language_package default 15 | \inputencoding auto 16 | \fontencoding global 17 | \font_roman default 18 | \font_sans default 19 | \font_typewriter default 20 | \font_math auto 21 | \font_default_family default 22 | \use_non_tex_fonts false 23 | \font_sc false 24 | \font_osf false 25 | \font_sf_scale 100 26 | \font_tt_scale 100 27 | \graphics default 28 | \default_output_format default 29 | \output_sync 0 30 | \bibtex_command default 31 | \index_command default 32 | \paperfontsize default 33 | \spacing single 34 | \use_hyperref false 35 | \papersize default 36 | \use_geometry false 37 | \use_package amsmath 1 38 | \use_package amssymb 1 39 | \use_package cancel 1 40 | \use_package esint 1 41 | \use_package mathdots 1 42 | \use_package mathtools 1 43 | \use_package mhchem 1 44 | \use_package stackrel 1 45 | \use_package stmaryrd 1 46 | \use_package undertilde 1 47 | \cite_engine basic 48 | \cite_engine_type default 49 | \biblio_style plain 50 | \use_bibtopic false 51 | \use_indices false 52 | \paperorientation portrait 53 | \suppress_date false 54 | \justification true 55 | \use_refstyle 1 56 | \index Index 57 | \shortcut idx 58 | \color #008000 59 | \end_index 60 | \secnumdepth 3 61 | \tocdepth 3 62 | \paragraph_separation indent 63 | \paragraph_indentation default 64 | \quotes_language english 65 | \papercolumns 1 66 | \papersides 1 67 | \paperpagestyle default 68 | \tracking_changes false 69 | \output_changes false 70 | \html_math_output 0 71 | \html_css_as_file 0 72 | \html_be_strict false 73 | \end_header 74 | 75 | \begin_body 76 | 77 | \begin_layout Title 78 | Updating Formulas for Correlations 79 | \end_layout 80 | 81 | \begin_layout Standard 82 | Here, we collect updating formulas for correlations between time series: 83 | \end_layout 84 | 85 | \begin_layout Section 86 | General Time Series 87 | \end_layout 88 | 89 | \begin_layout Standard 90 | The standard case is to compute the correlation between two time series 91 | 92 | \begin_inset Formula $x_{t}(i),\, t=1,\ldots,T,\, i=1,\ldots,N$ 93 | \end_inset 94 | 95 | , and 96 | \begin_inset Formula $y_{t}(i),\, t=1,\ldots,T,\, i=1,\ldots,N$ 97 | \end_inset 98 | 99 | . 100 | Additionally, it is possible that weights are given for each time step, 101 | i.e. 102 | there are non-negative number 103 | \begin_inset Formula $w_{t},\, t=1,\ldots,T$ 104 | \end_inset 105 | 106 | . 107 | Our goal then is to compute the (unnormalized) correlation 108 | \end_layout 109 | 110 | \begin_layout Standard 111 | \begin_inset Formula 112 | \begin{eqnarray*} 113 | C(i,j) & = & \sum_{t=1}^{T}w_{t}\left(x_{t}(i)-\overline{x}(i)\right)\left(y_{t}(j)-\overline{y}(j)\right), 114 | \end{eqnarray*} 115 | 116 | \end_inset 117 | 118 | where 119 | \begin_inset Formula $\bar{x}(i),\,\bar{y}(j)$ 120 | \end_inset 121 | 122 | denote the weighted mean values of the time series, i.e. 123 | \end_layout 124 | 125 | \begin_layout Standard 126 | \begin_inset Formula 127 | \begin{eqnarray*} 128 | \overline{x}(i) & = & \frac{1}{W_{T}}\sum_{t=1}^{T}w_{t}x_{t}(i),\\ 129 | W_{T} & = & \sum_{t=1}^{T}w_{t}. 130 | \end{eqnarray*} 131 | 132 | \end_inset 133 | 134 | We are interested in computing the correlation 135 | \begin_inset Formula $C(i,j)$ 136 | \end_inset 137 | 138 | in chunks. 139 | That means we split the data into, say, two blocks 140 | \begin_inset Formula $x_{t}(i),\, t=1,\ldots,T_{1}$ 141 | \end_inset 142 | 143 | , and 144 | \begin_inset Formula $x_{t}(i),\, t=T_{1}+1,\ldots,T_{2}=T,$ 145 | \end_inset 146 | 147 | and the same for 148 | \begin_inset Formula $y_{t}$ 149 | \end_inset 150 | 151 | . 152 | We would then like to compute the correlation of each chunk separately, 153 | sum them up and add a correction term. 154 | Let us introduce the following notation 155 | \end_layout 156 | 157 | \begin_layout Standard 158 | \begin_inset Formula 159 | \begin{eqnarray} 160 | \overline{x_{T_{1}}}(i) & = & \frac{1}{w_{T_{1}}}\sum_{t=1}^{T_{1}}w_{t}x_{t},\label{eq:chunk_definitions_0}\\ 161 | \overline{x_{T_{2}}}(i) & = & \frac{1}{W_{T_{2}}}\sum_{t=T_{1}+1}^{T_{2}}w_{t}x_{t}\label{eq:chunk_definitions_1}\\ 162 | W_{T_{1}} & = & \sum_{t=1}^{T_{1}}w_{t}\label{eq:chunk_definitions_2}\\ 163 | W_{T_{2}} & = & \sum_{t=T_{1}+1}^{T_{2}}w_{t}\label{eq:chunk_definitions_3}\\ 164 | S_{T_{1}}(i,j) & = & \sum_{t=1}^{T_{1}}\left(x_{t}(i)-\overline{x_{T_{1}}}(i)\right)\left(y_{t}(j)-\overline{y_{T_{1}}}(j)\right)\label{eq:chunk_definitions_4}\\ 165 | S_{T_{2}}(i,j) & = & \sum_{t=T_{1}+1}^{T_{2}}\left(x_{t}(i)-\overline{x_{T_{2}}}(i)\right)\left(y_{t}(j)-\overline{y_{T_{2}}}(j)\right).\label{eq:chunk_definitions_5} 166 | \end{eqnarray} 167 | 168 | \end_inset 169 | 170 | Now, the calculations from section 171 | \begin_inset CommandInset ref 172 | LatexCommand ref 173 | reference "sec:Proofs" 174 | 175 | \end_inset 176 | 177 | show that the full correlation 178 | \begin_inset Formula $C(i,j)$ 179 | \end_inset 180 | 181 | can be computed as 182 | \end_layout 183 | 184 | \begin_layout Standard 185 | \begin_inset Formula 186 | \begin{eqnarray} 187 | C(i,j) & = & S_{T_{1}}(i,j)+S_{T_{2}}(i,j)+\frac{W_{T_{1}}W_{T_{2}}}{W_{T}}\left(\overline{x_{T_{2}}}(i)-\overline{x_{T_{1}}}(i)\right)\left(\overline{y_{T_{2}}}(j)-\overline{y_{T_{1}}}(j)\right)\label{eq:Update_Standard} 188 | \end{eqnarray} 189 | 190 | \end_inset 191 | 192 | 193 | \end_layout 194 | 195 | \begin_layout Section 196 | Symmetrization 197 | \end_layout 198 | 199 | \begin_layout Standard 200 | In some cases, a symmetric correlation matrix is desired, for example if 201 | 202 | \begin_inset Formula $y_{t}$ 203 | \end_inset 204 | 205 | is a time-lagged version of 206 | \begin_inset Formula $x_{t}$ 207 | \end_inset 208 | 209 | . 210 | This can be achieved by redefining the means 211 | \end_layout 212 | 213 | \begin_layout Standard 214 | \begin_inset Formula 215 | \begin{eqnarray*} 216 | \overline{x}(i) & = & \frac{1}{2W_{T}}\left[\sum_{t=1}^{T}w_{t}x_{t}(i)+\sum_{t=1}^{T}w_{t}y_{t}(i)\right], 217 | \end{eqnarray*} 218 | 219 | \end_inset 220 | 221 | and defining the symmetrized correlation by 222 | \end_layout 223 | 224 | \begin_layout Standard 225 | \begin_inset Formula 226 | \begin{eqnarray*} 227 | C_{s}(i,j) & = & \sum_{t=1}^{T}w_{t}\left(x_{t}(i)-\overline{x}(i)\right)\left(y_{t}(j)-\overline{x}(j)\right)\\ 228 | & & +\sum_{t=1}^{T}w_{t}\left(y_{t}(i)-\overline{x}(i)\right)\left(x_{t}(j)-\overline{x}(j)\right). 229 | \end{eqnarray*} 230 | 231 | \end_inset 232 | 233 | Using the analogues of Eqs. 234 | 235 | \begin_inset CommandInset ref 236 | LatexCommand eqref 237 | reference "eq:chunk_definitions_0" 238 | 239 | \end_inset 240 | 241 | - 242 | \begin_inset CommandInset ref 243 | LatexCommand eqref 244 | reference "eq:chunk_definitions_5" 245 | 246 | \end_inset 247 | 248 | , we arrive at the updating formula 249 | \end_layout 250 | 251 | \begin_layout Standard 252 | \begin_inset Formula 253 | \begin{eqnarray} 254 | C_{s}(i,j) & = & S_{T_{1}}(i,j)+S_{T_{2}}(i,j)+\frac{2W_{T_{1}}W_{T_{2}}}{W_{T}}\left(\overline{x_{T_{2}}}(i)-\overline{x_{T_{1}}}(i)\right)\left(\overline{x_{T_{2}}}(j)-\overline{x_{T_{1}}}(j)\right),\label{eq:Updata_Sym} 255 | \end{eqnarray} 256 | 257 | \end_inset 258 | 259 | see again section 260 | \begin_inset CommandInset ref 261 | LatexCommand ref 262 | reference "sec:Proofs" 263 | 264 | \end_inset 265 | 266 | . 267 | Please note that for time-lagged data, 268 | \begin_inset Formula $T_{1}$ 269 | \end_inset 270 | 271 | and 272 | \begin_inset Formula $T_{2}$ 273 | \end_inset 274 | 275 | must be changed to 276 | \begin_inset Formula $T_{1}-\tau$ 277 | \end_inset 278 | 279 | and 280 | \begin_inset Formula $T_{2}-\tau$ 281 | \end_inset 282 | 283 | , such that the first 284 | \begin_inset Formula $\tau$ 285 | \end_inset 286 | 287 | steps of every chunk only appear in 288 | \begin_inset Formula $x_{t}$ 289 | \end_inset 290 | 291 | , while the last 292 | \begin_inset Formula $\tau$ 293 | \end_inset 294 | 295 | steps only appear in 296 | \begin_inset Formula $y_{t}$ 297 | \end_inset 298 | 299 | . 300 | \end_layout 301 | 302 | \begin_layout Section 303 | Time-lagged Data without Symmetrization 304 | \end_layout 305 | 306 | \begin_layout Standard 307 | If we assume to be given a time-series 308 | \begin_inset Formula $\tilde{x_{t}}(i),\, t=1,\ldots,T+\tau$ 309 | \end_inset 310 | 311 | , and define the time-lagged time-series 312 | \begin_inset Formula $x_{t}(i)=\tilde{x}_{t}(i),\, t=1,\ldots T$ 313 | \end_inset 314 | 315 | and 316 | \begin_inset Formula $y_{t}(i)=\tilde{x}_{t+\tau},\, t=1,\ldots T$ 317 | \end_inset 318 | 319 | . 320 | If we do not wish to symmetrize the correlations, it seems most consistent 321 | to use the weights of the first 322 | \begin_inset Formula $T$ 323 | \end_inset 324 | 325 | steps, 326 | \begin_inset Formula $w_{t},\, t=1,\ldots,T$ 327 | \end_inset 328 | 329 | , only. 330 | The means are thus defined by 331 | \end_layout 332 | 333 | \begin_layout Standard 334 | \begin_inset Formula 335 | \begin{eqnarray*} 336 | \overline{x}(i) & = & \frac{1}{W_{T}}\sum_{t=1}^{T}w_{t}x_{t}(i)\\ 337 | \overline{y}(i) & = & \frac{1}{W_{T}}\sum_{t=1}^{T}w_{t}y_{t}(i)\\ 338 | & = & \frac{1}{W_{T}}\sum_{t=\tau}^{T+\tau}w_{t-\tau}\tilde{x}_{t}\\ 339 | W_{T} & = & \sum_{t=1}^{T}w_{t}. 340 | \end{eqnarray*} 341 | 342 | \end_inset 343 | 344 | The asymmetric correlation then becomes 345 | \end_layout 346 | 347 | \begin_layout Standard 348 | \begin_inset Formula 349 | \begin{eqnarray*} 350 | C_{a}(i,j) & = & \sum_{t=1}^{T}w_{t}\left(x_{t}(i)-\overline{x}(i)\right)\left(y_{t}(j)-\overline{y}(j)\right). 351 | \end{eqnarray*} 352 | 353 | \end_inset 354 | 355 | Using the analogues of Eqs. 356 | 357 | \begin_inset CommandInset ref 358 | LatexCommand eqref 359 | reference "eq:chunk_definitions_0" 360 | 361 | \end_inset 362 | 363 | - 364 | \begin_inset CommandInset ref 365 | LatexCommand eqref 366 | reference "eq:chunk_definitions_5" 367 | 368 | \end_inset 369 | 370 | , we find the updating formula for time-lagged data to be the same as Eq. 371 | 372 | \begin_inset CommandInset ref 373 | LatexCommand eqref 374 | reference "eq:Update_Standard" 375 | 376 | \end_inset 377 | 378 | : 379 | \end_layout 380 | 381 | \begin_layout Standard 382 | \begin_inset Formula 383 | \begin{eqnarray} 384 | C_{a}(i,j) & = & S_{T_{1}}(i,j)+S_{T_{2}}(i,j)+\frac{W_{T_{1}}W_{T_{2}}}{W_{T}}\left(\overline{x_{T_{2}}}(i)-\overline{x_{T_{1}}}(i)\right)\left(\overline{y_{T_{2}}}(j)-\overline{y_{T_{1}}}(j)\right)\label{eq:Update_Asym} 385 | \end{eqnarray} 386 | 387 | \end_inset 388 | 389 | 390 | \end_layout 391 | 392 | \begin_layout Section 393 | Conclusions 394 | \end_layout 395 | 396 | \begin_layout Standard 397 | We have shown that mean-free correlations can be easily computed in chunks 398 | for arbitrary time series 399 | \begin_inset Formula $x_{t}$ 400 | \end_inset 401 | 402 | , 403 | \begin_inset Formula $y_{t}$ 404 | \end_inset 405 | 406 | , including time-dependent weights. 407 | Moreover, symmetrized mean-free correlations can be computed for arbitrary 408 | time-series, which can also be time-lagged copies. 409 | Finally, we found that for time-lagged time series which are not supposed 410 | to be symmetrized, it seems to make sense to compute the means using the 411 | weights of the first 412 | \begin_inset Formula $T$ 413 | \end_inset 414 | 415 | steps. 416 | \end_layout 417 | 418 | \begin_layout Section 419 | Proofs 420 | \end_layout 421 | 422 | \begin_layout Standard 423 | \begin_inset CommandInset label 424 | LatexCommand label 425 | name "sec:Proofs" 426 | 427 | \end_inset 428 | 429 | 430 | \end_layout 431 | 432 | \begin_layout Standard 433 | First, we determine an expression for the full correlation in terms of the 434 | partial sums 435 | \begin_inset Formula $S_{T_{1}},\, S_{T_{2}}$ 436 | \end_inset 437 | 438 | and a correction term for all cases considered here. 439 | We will see then that the correction term can be expressed in the forms 440 | given in Eqs. 441 | 442 | \begin_inset CommandInset ref 443 | LatexCommand eqref 444 | reference "eq:Update_Standard" 445 | 446 | \end_inset 447 | 448 | , 449 | \begin_inset CommandInset ref 450 | LatexCommand eqref 451 | reference "eq:Updata_Sym" 452 | 453 | \end_inset 454 | 455 | and 456 | \begin_inset CommandInset ref 457 | LatexCommand eqref 458 | reference "eq:Update_Asym" 459 | 460 | \end_inset 461 | 462 | . 463 | Let us consider the standard case: 464 | \end_layout 465 | 466 | \begin_layout Standard 467 | \begin_inset Formula 468 | \begin{eqnarray} 469 | C(i,j) & = & \sum_{t=1}^{T}w_{t}\left(x_{t}(i)-\overline{x}(i)\right)\left(y_{t}(j)-\overline{y}(j)\right)\label{eq:Splitting_Cij_0}\\ 470 | & = & \sum_{t=1}^{T_{1}}w_{t}\left(x_{t}(i)-\overline{x}(i)\right)\left(y_{t}(j)-\overline{y}(j)\right)\nonumber \\ 471 | & & +\sum_{t=T_{1}+1}^{T_{2}}w_{t}\left(x_{t}(i)-\overline{x}(i)\right)\left(y_{t}(j)-\overline{y}(j)\right)\label{eq:Splitting_Cij_1}\\ 472 | & = & \sum_{t=1}^{T_{1}}w_{t}\left(\left(x_{t}(i)-\overline{x_{T_{1}}}(i)\right)-\gamma_{1}^{x}(i)\right)\left(\left(y_{t}(j)-\overline{y_{T_{1}}}(j)\right)-\gamma_{1}^{y}(j)\right)\nonumber \\ 473 | & & +\sum_{t=T_{1}+1}^{T_{2}}w_{t}\left(\left(x_{t}(i)-\overline{x_{T_{2}}}(i)\right)-\gamma_{2}^{x}(i)\right)\left(\left(y_{t}(j)-\overline{y_{T_{2}}}(j)\right)-\gamma_{2}^{y}(j)\right),\label{eq:Splitting_Cij_2} 474 | \end{eqnarray} 475 | 476 | \end_inset 477 | 478 | where 479 | \begin_inset Formula $\gamma_{k}^{x}(i)=\overline{x}(i)-\overline{x_{T_{k}}}(i)$ 480 | \end_inset 481 | 482 | and 483 | \begin_inset Formula $\gamma_{k}^{y}(i)=\overline{y}(i)-\overline{y_{T_{k}}}(i)$ 484 | \end_inset 485 | 486 | . 487 | We proceed to find 488 | \end_layout 489 | 490 | \begin_layout Standard 491 | \begin_inset Formula 492 | \begin{eqnarray} 493 | C(i,j) & = & \sum_{t=1}^{T_{1}}w_{t}\left(x_{t}(i)-\overline{x_{T_{1}}}(i)\right)\left(y_{t}(j)-\overline{y_{T_{1}}}(j)\right)-\gamma_{1}^{x}(i)\left(y_{t}(j)-\overline{y_{T_{1}}}(j)\right)\nonumber \\ 494 | & & -\gamma_{1}^{y}(j)\left(x_{t}(i)-\overline{x_{T_{1}}}(i)\right)+\gamma_{1}^{x}(i)\gamma_{1}^{y}(j)\nonumber \\ 495 | & & +\sum_{t=T_{1}+1}^{T_{2}}w_{t}\left(x_{t}(i)-\overline{x_{T_{2}}}(i)\right)\left(y_{t}(j)-\overline{y_{T_{2}}}(j)\right)-\gamma_{2}^{x}(i)\left(y_{t}(j)-\overline{y_{T_{2}}}(j)\right)\nonumber \\ 496 | & & -\gamma_{2}^{y}(j)\left(x_{t}(i)-\overline{x_{T_{2}}}(i)\right)+\gamma_{2}^{x}(i)\gamma_{2}^{y}(j)\label{eq:Splitting_Cij_3}\\ 497 | & = & S_{T_{1}}(i,j)+S_{T_{2}}(i,j)+W_{T_{1}}\gamma_{1}^{x}(i)\gamma_{1}^{y}(j)+W_{T_{2}}\gamma_{2}^{x}(i)\gamma_{2}^{y}(j).\label{eq:Splitting_Cij_4} 498 | \end{eqnarray} 499 | 500 | \end_inset 501 | 502 | It remains to deal with the term: 503 | \end_layout 504 | 505 | \begin_layout Standard 506 | \begin_inset Formula 507 | \begin{eqnarray} 508 | W_{T_{1}}\gamma_{1}^{x}(i)\gamma_{1}^{y}(j)+W_{T_{2}}\gamma_{2}^{x}(i)\gamma_{2}^{y}(j) & = & W_{T_{1}}\left(\overline{x}(i)\overline{y}(j)-\overline{x}(i)\overline{y_{T_{1}}}(j)-\overline{x_{T_{1}}}(i)\overline{y}(j)+\overline{x_{T_{1}}}(i)\overline{y_{T_{1}}}(j)\right)\nonumber \\ 509 | & & +W_{T_{2}}\left(\overline{x}(i)\overline{y}(j)-\overline{x}(i)\overline{y_{T_{2}}}(j)-\overline{x_{T_{2}}}(i)\overline{y}(j)+\overline{x_{T_{2}}}(i)\overline{y_{T_{2}}}(j)\right)\label{eq:Gamma_Reformulation_0}\\ 510 | & = & \left(W_{T_{1}}+W_{T_{2}}\right)\overline{x}(i)\overline{y}(j)+W_{T_{1}}\overline{x_{T_{1}}}(i)\overline{y_{T_{1}}}(j)\nonumber \\ 511 | & & +W_{T_{2}}\overline{x_{T_{2}}}(i)\overline{y_{T_{2}}}(j)-\overline{x}(i)\left(W_{T_{1}}\overline{y_{T_{1}}}(j)+W_{T_{2}}\overline{y_{T_{2}}}(j)\right)\nonumber \\ 512 | & & -\overline{y}(j)\left(W_{T_{1}}\overline{x_{T_{1}}}(i)+W_{T_{2}}\overline{x_{T_{2}}}(i)\right).\label{eq:Gamma_Reformulation_1} 513 | \end{eqnarray} 514 | 515 | \end_inset 516 | 517 | Now, we use that 518 | \begin_inset Formula $W_{T_{1}}\overline{x_{T_{1}}}(i)+W_{T_{2}}\overline{x_{T_{2}}}(i)=W_{T}\overline{x}(i)$ 519 | \end_inset 520 | 521 | to find: 522 | \end_layout 523 | 524 | \begin_layout Standard 525 | \begin_inset Formula 526 | \begin{eqnarray} 527 | & = & W_{T_{1}}\overline{x_{T_{1}}}(i)\overline{y_{T_{1}}}(j)++W_{T_{2}}\overline{x_{T_{2}}}(i)\overline{y_{T_{2}}}(j)\nonumber \\ 528 | & & -\overline{x}(i)\left(W_{T_{1}}\overline{y_{T_{1}}}(j)+W_{T_{2}}\overline{y_{T_{2}}}(j)\right)\label{eq:Gamma_Reformulation_2}\\ 529 | & = & \frac{1}{W_{T}}\left[W_{T}\left(W_{T_{1}}\overline{x_{T_{1}}}(i)\overline{y_{T_{1}}}(j)++W_{T_{2}}\overline{x_{T_{2}}}(i)\overline{y_{T_{2}}}(j)\right)\right]\nonumber \\ 530 | & & -\frac{1}{W_{T}}\left[W_{T}\overline{x}(i)\left(W_{T_{1}}\overline{y_{T_{1}}}(j)+W_{T_{2}}\overline{y_{T_{2}}}(j)\right)\right]\label{eq:Gamma_Reformulation_3}\\ 531 | & = & \frac{W_{T_{1}}W_{T_{2}}}{W_{T}}\left[\overline{x_{T_{1}}}(i)\overline{y_{T_{1}}}(j)+\overline{x_{T_{2}}}(i)\overline{y_{T_{2}}}(j)-\overline{x_{T_{1}}}(i)\overline{y_{T_{2}}}(j)-\overline{x_{T_{2}}}(i)\overline{y_{T_{1}}}(j)\right].\label{eq:Gamma_Reformulation_4} 532 | \end{eqnarray} 533 | 534 | \end_inset 535 | 536 | This completes the proof of Eq. 537 | 538 | \begin_inset CommandInset ref 539 | LatexCommand eqref 540 | reference "eq:Update_Standard" 541 | 542 | \end_inset 543 | 544 | . 545 | For the symmetric case, the procedure from Eqs. 546 | 547 | \begin_inset CommandInset ref 548 | LatexCommand eqref 549 | reference "eq:Splitting_Cij_0" 550 | 551 | \end_inset 552 | 553 | - 554 | \begin_inset CommandInset ref 555 | LatexCommand eqref 556 | reference "eq:Splitting_Cij_4" 557 | 558 | \end_inset 559 | 560 | can be repeated to come up with the expression 561 | \end_layout 562 | 563 | \begin_layout Standard 564 | \begin_inset Formula 565 | \begin{eqnarray*} 566 | C_{s}(i,j) & = & S_{T_{1}}(i,j)+S_{T_{2}}(i,j)+W_{T_{1}}\left(\gamma_{1}(i)\gamma_{1}(j)+\gamma_{1}(j)\gamma_{1}(i)\right)\\ 567 | & & +W_{T_{2}}\left(\gamma_{2}(i)\gamma_{2}(j)+\gamma_{2}(j)\gamma_{2}(i)\right), 568 | \end{eqnarray*} 569 | 570 | \end_inset 571 | 572 | where 573 | \begin_inset Formula $\gamma_{k}(i)=\overline{x}(i)-\overline{x_{T_{k}}}(i)$ 574 | \end_inset 575 | 576 | . 577 | Then, the steps of Eqs. 578 | 579 | \begin_inset CommandInset ref 580 | LatexCommand eqref 581 | reference "eq:Gamma_Reformulation_0" 582 | 583 | \end_inset 584 | 585 | - 586 | \begin_inset CommandInset ref 587 | LatexCommand eqref 588 | reference "eq:Gamma_Reformulation_3" 589 | 590 | \end_inset 591 | 592 | can be repeated in the same way. 593 | For the asymmetric case, Eqs. 594 | 595 | \begin_inset CommandInset ref 596 | LatexCommand eqref 597 | reference "eq:Splitting_Cij_0" 598 | 599 | \end_inset 600 | 601 | - 602 | \begin_inset CommandInset ref 603 | LatexCommand eqref 604 | reference "eq:Splitting_Cij_4" 605 | 606 | \end_inset 607 | 608 | yield the expression 609 | \end_layout 610 | 611 | \begin_layout Standard 612 | \begin_inset Formula 613 | \begin{eqnarray*} 614 | C_{a}(i,j) & = & S_{T_{!}}(i,j)+S_{T_{2}}(i,j)+W_{T_{1}}\gamma_{1}^{x}(i)\gamma_{1}^{y}(j)+W_{T_{2}}\gamma_{2}^{x}(i)\gamma_{2}^{y}(j)\\ 615 | & & -\gamma_{1}^{x}(i)\sum_{t=1}^{T_{1}}w_{t}\left(y_{t}(j)-\overline{y_{T_{1}}}(j)\right)-\gamma_{1}^{y}(j)\sum_{t=1}^{T_{1}}w_{t}\left(x_{t}(i)-\overline{x_{T_{1}}}(i)\right)\\ 616 | & & -\gamma_{2}^{x}(i)\sum_{t=T_{1}+1}^{T_{2}}w_{t}\left(y_{t}(j)-\overline{y_{T_{2}}}(j)\right)-\gamma_{2}^{y}(j)\sum_{t=T_{1}+1}^{T_{2}}w_{t}\left(x_{t}(i)-\overline{x_{T_{2}}}(i)\right). 617 | \end{eqnarray*} 618 | 619 | \end_inset 620 | 621 | Here, we have used 622 | \begin_inset Formula $\gamma_{k}^{x}(i)=\overline{x}(i)-\overline{x_{T_{k}}}(i),\,\gamma_{k}^{y}(i)=\overline{y}(i)-\overline{y_{T_{k}}}(i)$ 623 | \end_inset 624 | 625 | . 626 | The cross-terms cancel out and the expression 627 | \begin_inset Formula $W_{T_{1}}\gamma_{1}^{x}(i)\gamma_{1}^{y}(j)+W_{T_{2}}\gamma_{2}^{x}(i)\gamma_{2}^{y}(j)$ 628 | \end_inset 629 | 630 | can be reformulated through Eqs. 631 | 632 | \begin_inset CommandInset ref 633 | LatexCommand eqref 634 | reference "eq:Gamma_Reformulation_0" 635 | 636 | \end_inset 637 | 638 | - 639 | \begin_inset CommandInset ref 640 | LatexCommand eqref 641 | reference "eq:Gamma_Reformulation_4" 642 | 643 | \end_inset 644 | 645 | to end up with Eq. 646 | 647 | \begin_inset CommandInset ref 648 | LatexCommand eqref 649 | reference "eq:Update_Asym" 650 | 651 | \end_inset 652 | 653 | . 654 | \end_layout 655 | 656 | \end_body 657 | \end_document 658 | -------------------------------------------------------------------------------- /variational/estimators/moments.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Data Types 4 | ---------- 5 | The standard data type for covariance computations is 6 | float64, because the double precision (but not single precision) is 7 | usually sufficient to compute the long sums involved in covariance 8 | matrix computations. Integer types are avoided even if the data is integer, 9 | because the BLAS matrix multiplication is very fast with floats, but very 10 | slow with integers. If X is of boolean type (0/1), the standard data type 11 | is float32, because this will be sufficient to represent numbers up to 2^23 12 | without rounding error, which is usually sufficient sufficient as the 13 | largest element in np.dot(X.T, X) can then be T, the number of data points. 14 | 15 | Efficient Use 16 | ------------- 17 | In order to get speedup with boolean input, remove_mean=False is required. 18 | Note that you can still do TICA that way. 19 | 20 | Sparsification 21 | -------------- 22 | We aim at computing covariance matrices. For large (T x N) data matrices X, Y, 23 | the bottleneck of this operation is computing the matrix product np.dot(X.T, X), 24 | or np.dot(X.T, Y), with algorithmic complexity O(N^2 T). If X, Y have zero or 25 | constant columns, we can reduce N and thus reduce the algorithmic complexity. 26 | 27 | However, the BLAS matrix product used by np.dot() is highly Cache optimized - 28 | the data is accessed in a way that most operations are done in cache, making the 29 | calculation extremely efficient. Thus, even if X, Y have zero or constant columns, 30 | it does not always pay off to interfere with this operation - one one hand by 31 | spending compute time to determine the sparsity of the matrices, one the other 32 | hand by using slicing operations that reduce the algorithmic complexity, but may 33 | destroy the order of the data and thus produce more cache failures. 34 | 35 | In order to make an informed decision, we have compared the runtime of the following 36 | operations using matrices of various different sizes (T x N) and different degrees 37 | of sparsity. (using an Intel Core i7 with OS/X 10.10.1): 38 | 39 | 1. Compute np.dot(X.T, X) 40 | 2. Compute np.dot(X[:, sel].T, X[:, sel]) where sel selects the nonzero columns 41 | 3. Make a copy X0 = X[:, sel].copy() and then compute np.dot(X0.T, X0) 42 | 43 | It may seem that step 3 is not a good idea because we make the extra effort of 44 | copying the matrix. However, the new copy will have data ordered sequentially in 45 | memory, and therefore better prepared for the algorithmically more expensive but 46 | cache-optimized matrix product. 47 | 48 | We have empirically found that: 49 | 50 | * Making a copy before running np.dot (option 3) is in most cases better than 51 | using the dot product on sliced arrays (option 2). Exceptions are when the 52 | data is extremely sparse, such that only a few columns are selected. 53 | * Copying and subselecting columns (option 3) is only faster than the full 54 | dot product (option 1), if 50% or less columns are selected. This observation 55 | is roughly independent of N. 56 | * The observations above are valid for matrices (T x N) that are sufficiently 57 | large. We assume that "sufficiently large" means that they don't fully fit 58 | in the cache. For small matrices, the trends are less clear and different 59 | rules may apply. 60 | 61 | In order to optimize covariance calculation for large matrices, we therefore 62 | take the following actions: 63 | 64 | 1. Given matrix size of X (and Y), determine the minimum number of columns 65 | that need to be constant in order to use sparse computation. 66 | 2. Efficiently determine sparsity of X (and Y). Give up as soon as the 67 | number of constant column candidates drops below the minimum number, to 68 | avoid wasting time on the decision. 69 | 3. Subselect the desired columns and copy the data to a new array X0 (Y0). 70 | 4. Run operation on the new array X0 (Y0), including in-place substraction 71 | of the mean if needed. 72 | 73 | """ 74 | from __future__ import absolute_import 75 | 76 | __author__ = 'noe' 77 | 78 | import math, sys, numbers, warnings 79 | import numpy as np 80 | from .covar_c import covartools 81 | 82 | 83 | def _is_zero(x): 84 | """ Returns True if x is numerically 0 or an array with 0's. """ 85 | if x is None: 86 | return True 87 | if isinstance(x, numbers.Number): 88 | return x == 0.0 89 | if isinstance(x, np.ndarray): 90 | return np.all(x == 0) 91 | return False 92 | 93 | 94 | def _sparsify(X, remove_mean=False, modify_data=False, sparse_mode='auto', sparse_tol=0.0): 95 | """ Determines the sparsity of X and returns a selected sub-matrix 96 | 97 | Only conducts sparsification if the number of constant columns is at least 98 | max(a N - b, min_const_col_number), 99 | 100 | Parameters 101 | ---------- 102 | X : ndarray 103 | data matrix 104 | remove_mean : bool 105 | True: remove column mean from the data, False: don't remove mean. 106 | modify_data : bool 107 | If remove_mean=True, the mean will be removed in the data matrix X, 108 | without creating an independent copy. This option is faster but might 109 | lead to surprises because your input array is changed. 110 | sparse_mode : str 111 | one of: 112 | * 'dense' : always use dense mode 113 | * 'sparse' : always use sparse mode if possible 114 | * 'auto' : automatic 115 | 116 | Returns 117 | ------- 118 | X0 : ndarray (view of X) 119 | Either X itself (if not sufficiently sparse), or a sliced view of X, 120 | containing only the variable columns 121 | mask : ndarray(N, dtype=bool) or None 122 | Bool selection array that indicates which columns of X were selected for 123 | X0, i.e. X0 = X[:, mask]. mask is None if no sparse selection was made. 124 | xconst : ndarray(N) 125 | Constant column values that are outside the sparse selection, i.e. 126 | X[i, ~mask] = xconst for any row i. xconst=0 if no sparse selection was made. 127 | 128 | """ 129 | if sparse_mode.lower() == 'sparse': 130 | min_const_col_number = 0 # enforce sparsity. A single constant column will lead to sparse treatment 131 | elif sparse_mode.lower() == 'dense': 132 | min_const_col_number = X.shape[1] + 1 # never use sparsity 133 | else: 134 | if remove_mean and not modify_data: # in this case we have to copy the data anyway, and can be permissive 135 | min_const_col_number = max(0.1 * X.shape[1], 50) 136 | else: 137 | # This is a rough heuristic to choose a minimum column number for which sparsity may pay off. 138 | # This heuristic is good for large number of samples, i.e. it may be inadequate for small matrices X. 139 | if X.shape[1] < 250: 140 | min_const_col_number = X.shape[1] - 0.25 * X.shape[1] 141 | elif X.shape[1] < 1000: 142 | min_const_col_number = X.shape[1] - (0.5 * X.shape[1] - 100) 143 | else: 144 | min_const_col_number = X.shape[1] - (0.8 * X.shape[1] - 400) 145 | 146 | if X.shape[1] > min_const_col_number: 147 | mask = covartools.variable_cols(X, tol=sparse_tol, min_constant=min_const_col_number) # bool vector 148 | nconst = len(np.where(~mask)[0]) 149 | if nconst > min_const_col_number: 150 | xconst = X[0, ~mask] 151 | X = X[:, mask] # sparsify 152 | else: 153 | xconst = None 154 | mask = None 155 | else: 156 | xconst = None 157 | mask = None 158 | 159 | return X, mask, xconst # None, 0 if not sparse 160 | 161 | 162 | def _sparsify_pair(X, Y, remove_mean=False, modify_data=False, symmetrize=False, sparse_mode='auto', sparse_tol=0.0): 163 | """ 164 | """ 165 | T = X.shape[0] 166 | N = math.sqrt(X.shape[1] * Y.shape[1]) 167 | # check each data set separately for sparsity. 168 | X0, mask_X, xconst = _sparsify(X, sparse_mode=sparse_mode, sparse_tol=sparse_tol) 169 | Y0, mask_Y, yconst = _sparsify(Y, sparse_mode=sparse_mode, sparse_tol=sparse_tol) 170 | # if we have nonzero constant columns and the number of samples is too small, do not treat as 171 | # sparse, because then the const-specialized dot product function doesn't pay off. 172 | is_const = not (_is_zero(xconst) and _is_zero(yconst)) 173 | if is_const and (symmetrize or not remove_mean) and 10*T < N: 174 | return X, None, None, Y, None, None 175 | else: 176 | return X0, mask_X, xconst, Y0, mask_Y, yconst 177 | 178 | 179 | def _copy_convert(X, const=None, remove_mean=False, copy=True): 180 | """ Makes a copy or converts the data type if needed 181 | 182 | Copies the data and converts the data type if unsuitable for covariance 183 | calculation. The standard data type for covariance computations is 184 | float64, because the double precision (but not single precision) is 185 | usually sufficient to compute the long sums involved in covariance 186 | matrix computations. Integer types are avoided even if the data is integer, 187 | because the BLAS matrix multiplication is very fast with floats, but very 188 | slow with integers. If X is of boolean type (0/1), the standard data type 189 | is float32, because this will be sufficient to represent numbers up to 2^23 190 | without rounding error, which is usually sufficient sufficient as the 191 | largest element in np.dot(X.T, X) can then be T, the number of data points. 192 | 193 | Parameters 194 | ---------- 195 | remove_mean : bool 196 | If True, will enforce float64 even if the input is boolean 197 | copy : bool 198 | If True, enforces a copy even if the data type doesn't require it. 199 | 200 | Return 201 | ------ 202 | X : ndarray 203 | copy or reference to X if no copy was needed. 204 | const : ndarray or None 205 | copy or reference to const if no copy was needed. 206 | 207 | """ 208 | # determine type 209 | dtype = np.float64 # default: convert to float64 in order to avoid cancellation errors 210 | if X.dtype.kind == 'b' and X.shape[0] < 2**23 and not remove_mean: 211 | dtype = np.float32 # convert to float32 if we can represent all numbers 212 | # copy/convert if needed 213 | if X.dtype not in (np.float64, dtype): # leave as float64 (conversion is expensive), otherwise convert to dtype 214 | X = X.astype(dtype, order='C') 215 | if const is not None: 216 | const = const.astype(dtype, order='C') 217 | elif copy: 218 | X = X.copy(order='C') 219 | if const is not None: 220 | const = const.copy(order='C') 221 | 222 | return X, const 223 | 224 | 225 | def _sum_sparse(xsum, mask_X, xconst, T): 226 | s = np.zeros(len(mask_X)) 227 | s[mask_X] = xsum 228 | s[~mask_X] = T * xconst 229 | return s 230 | 231 | 232 | def _sum(X, xmask=None, xconst=None, Y=None, ymask=None, yconst=None, symmetric=False, remove_mean=False, 233 | weights=None): 234 | """ Computes the column sums and centered column sums. 235 | 236 | If symmetric = False, the sums will be determined as 237 | .. math: 238 | sx &=& \frac{1}{2} \sum_t x_t 239 | sy &=& \frac{1}{2} \sum_t y_t 240 | 241 | If symmetric, the sums will be determined as 242 | 243 | .. math: 244 | sx = sy = \frac{1}{2T} \sum_t x_t + y_t 245 | 246 | Returns 247 | ------- 248 | w : float 249 | statistical weight of sx, sy 250 | sx : ndarray 251 | effective row sum of X (including symmetrization if requested) 252 | sx_raw_centered : ndarray 253 | centered raw row sum of X 254 | 255 | optional returns (only if Y is given): 256 | 257 | sy : ndarray 258 | effective row sum of X (including symmetrization if requested) 259 | sy_raw_centered : ndarray 260 | centered raw row sum of Y 261 | 262 | """ 263 | T = X.shape[0] 264 | # Check if weights are given: 265 | if weights is not None: 266 | X = weights[:, None] * X 267 | if Y is not None: 268 | Y = weights[:, None] * Y 269 | # compute raw sums on variable data 270 | sx_raw = X.sum(axis=0) # this is the mean before subtracting it. 271 | sy_raw = 0 272 | if Y is not None: 273 | sy_raw = Y.sum(axis=0) 274 | 275 | # expand raw sums to full data 276 | if xmask is not None: 277 | if weights is not None: 278 | sx_raw = _sum_sparse(sx_raw, xmask, xconst, weights.sum()) 279 | else: 280 | sx_raw = _sum_sparse(sx_raw, xmask, xconst, T) 281 | if ymask is not None: 282 | if weights is not None: 283 | sy_raw = _sum_sparse(sy_raw, ymask, yconst, weights.sum()) 284 | else: 285 | sy_raw = _sum_sparse(sy_raw, ymask, yconst, T) 286 | 287 | # compute effective sums and centered sums 288 | if Y is not None and symmetric: 289 | sx = sx_raw + sy_raw 290 | sy = sx 291 | if weights is not None: 292 | w = 2*np.sum(weights) 293 | else: 294 | w = 2 * T 295 | else: 296 | sx = sx_raw 297 | sy = sy_raw 298 | if weights is not None: 299 | w = np.sum(weights) 300 | else: 301 | w = T 302 | 303 | sx_raw_centered = sx_raw.copy() 304 | if Y is not None: 305 | sy_raw_centered = sy_raw.copy() 306 | 307 | # center mean. 308 | if remove_mean: 309 | if Y is not None and symmetric: 310 | sx_raw_centered -= 0.5 * sx 311 | sy_raw_centered -= 0.5 * sy 312 | else: 313 | sx_raw_centered = np.zeros(sx.size) 314 | if Y is not None: 315 | sy_raw_centered = np.zeros(sy.size) 316 | 317 | # return 318 | if Y is not None: 319 | return w, sx, sx_raw_centered, sy, sy_raw_centered 320 | else: 321 | return w, sx, sx_raw_centered 322 | 323 | 324 | def _center(X, w, s, mask=None, const=None, inplace=True): 325 | """ Centers the data. 326 | 327 | Parameters 328 | ---------- 329 | w : float 330 | statistical weight of s 331 | inplace : bool 332 | center in place 333 | 334 | Returns 335 | ------- 336 | sx : ndarray 337 | uncentered row sum of X 338 | sx_centered : ndarray 339 | row sum of X after centering 340 | 341 | optional returns (only if Y is given): 342 | 343 | sy_raw : ndarray 344 | uncentered row sum of Y 345 | sy_centered : ndarray 346 | row sum of Y after centering 347 | 348 | """ 349 | xmean = s / float(w) 350 | if mask is None: 351 | X = covartools.subtract_row(X, xmean, inplace=inplace) 352 | else: 353 | X = covartools.subtract_row(X, xmean[mask], inplace=inplace) 354 | if inplace: 355 | const = np.subtract(const, xmean[~mask], const) 356 | else: 357 | const = np.subtract(const, xmean[~mask]) 358 | 359 | return X, const 360 | 361 | 362 | # ==================================================================================== 363 | # SECOND MOMENT MATRICES / COVARIANCES 364 | # ==================================================================================== 365 | 366 | def _M2_dense(X, Y, weights=None): 367 | """ 2nd moment matrix using dense matrix computations. 368 | 369 | This function is encapsulated such that we can make easy modifications of the basic algorithms 370 | 371 | """ 372 | if weights is not None: 373 | return np.dot((weights[:, None] * X).T, Y) 374 | else: 375 | return np.dot(X.T, Y) 376 | 377 | 378 | def _M2_const(Xvar, mask_X, xvarsum, xconst, Yvar, mask_Y, yvarsum, yconst, weights=None): 379 | """ Computes the unnormalized covariance matrix between X and Y, exploiting constant input columns 380 | 381 | Computes the unnormalized covariance matrix :math:`C = X^\top Y` 382 | (for symmetric=False) or :math:`C = \frac{1}{2} (X^\top Y + Y^\top X)` 383 | (for symmetric=True). Suppose the data matrices can be column-permuted 384 | to have the form 385 | 386 | .. math: 387 | X &=& (X_{\mathrm{var}}, X_{\mathrm{const}}) 388 | Y &=& (Y_{\mathrm{var}}, Y_{\mathrm{const}}) 389 | 390 | with rows: 391 | 392 | .. math: 393 | x_t &=& (x_{\mathrm{var},t}, x_{\mathrm{const}}) 394 | y_t &=& (y_{\mathrm{var},t}, y_{\mathrm{const}}) 395 | 396 | where :math:`x_{\mathrm{const}},\:y_{\mathrm{const}}` are constant vectors. 397 | The resulting matrix has the general form: 398 | 399 | .. math: 400 | C &=& [X_{\mathrm{var}}^\top Y_{\mathrm{var}} x_{sum} y_{\mathrm{const}}^\top ] 401 | & & [x_{\mathrm{const}}^\top y_{sum}^\top x_{sum} x_{sum}^\top ] 402 | 403 | where :math:`x_{sum} = \sum_t x_{\mathrm{var},t}` and 404 | :math:`y_{sum} = \sum_t y_{\mathrm{var},t}`. 405 | 406 | Parameters 407 | ---------- 408 | Xvar : ndarray (T, m) 409 | Part of the data matrix X with :math:`m \le M` variable columns. 410 | mask_X : ndarray (M) 411 | Boolean array of size M of the full columns. False for constant column, 412 | True for variable column in X. 413 | xvarsum : ndarray (m) 414 | Column sum of variable part of data matrix X 415 | xconst : ndarray (M-m) 416 | Values of the constant part of data matrix X 417 | Yvar : ndarray (T, n) 418 | Part of the data matrix Y with :math:`n \le N` variable columns. 419 | mask_Y : ndarray (N) 420 | Boolean array of size N of the full columns. False for constant column, 421 | True for variable column in Y. 422 | yvarsum : ndarray (n) 423 | Column sum of variable part of data matrix Y 424 | yconst : ndarray (N-n) 425 | Values of the constant part of data matrix Y 426 | weights : None or ndarray (N) 427 | weights for all time steps. 428 | 429 | Returns 430 | ------- 431 | C : ndarray (M, N) 432 | Unnormalized covariance matrix. 433 | 434 | """ 435 | C = np.zeros((len(mask_X), len(mask_Y))) 436 | # Block 11 437 | C[np.ix_(mask_X, mask_Y)] = _M2_dense(Xvar, Yvar, weights=weights) 438 | # other blocks 439 | xsum_is_0 = _is_zero(xvarsum) 440 | ysum_is_0 = _is_zero(yvarsum) 441 | xconst_is_0 = _is_zero(xconst) 442 | yconst_is_0 = _is_zero(yconst) 443 | # TODO: maybe we don't need the checking here, if we do the decision in the higher-level function M2 444 | # TODO: if not zero, we could still exploit the zeros in const and compute (and write!) this outer product 445 | # TODO: only to a sub-matrix 446 | # Block 12 and 21 447 | if weights is not None: 448 | wsum = np.sum(weights) 449 | xvarsum = np.sum(weights[:, None] * Xvar, axis=0) 450 | yvarsum = np.sum(weights[:, None] * Yvar, axis=0) 451 | else: 452 | wsum = Xvar.shape[0] 453 | if not (xsum_is_0 or yconst_is_0) or not (ysum_is_0 or xconst_is_0): 454 | C[np.ix_(mask_X, ~mask_Y)] = np.outer(xvarsum, yconst) 455 | C[np.ix_(~mask_X, mask_Y)] = np.outer(xconst, yvarsum) 456 | # Block 22 457 | if not (xconst_is_0 or yconst_is_0): 458 | C[np.ix_(~mask_X, ~mask_Y)] = np.outer(wsum*xconst, yconst) 459 | return C 460 | 461 | 462 | def _M2_sparse(Xvar, mask_X, Yvar, mask_Y, weights=None): 463 | """ 2nd moment matrix exploiting zero input columns """ 464 | C = np.zeros((len(mask_X), len(mask_Y))) 465 | C[np.ix_(mask_X, mask_Y)] = _M2_dense(Xvar, Yvar, weights=weights) 466 | return C 467 | 468 | 469 | def _M2_sparse_sym(Xvar, mask_X, Yvar, mask_Y, weights=None): 470 | """ 2nd self-symmetric moment matrix exploiting zero input columns 471 | 472 | Computes X'X + Y'Y and X'Y + Y'X 473 | 474 | """ 475 | assert len(mask_X) == len(mask_Y), 'X and Y need to have equal sizes for symmetrization' 476 | 477 | Cxxyy = np.zeros((len(mask_X), len(mask_Y))) 478 | Cxxyy[np.ix_(mask_X, mask_X)] = _M2_dense(Xvar, Xvar, weights=weights) 479 | Cxxyy[np.ix_(mask_Y, mask_Y)] += _M2_dense(Yvar, Yvar, weights=weights) 480 | 481 | Cxyyx = np.zeros((len(mask_X), len(mask_Y))) 482 | Cxy = _M2_dense(Xvar, Yvar, weights=weights) 483 | Cyx = _M2_dense(Yvar, Xvar, weights=weights) 484 | Cxyyx[np.ix_(mask_X, mask_Y)] = Cxy 485 | Cxyyx[np.ix_(mask_Y, mask_X)] += Cyx 486 | 487 | return Cxxyy, Cxyyx 488 | 489 | 490 | def _M2(Xvar, Yvar, mask_X=None, mask_Y=None, xsum=0, xconst=0, ysum=0, yconst=0, weights=None): 491 | """ direct (nonsymmetric) second moment matrix. Decide if we need dense, sparse, const""" 492 | if mask_X is None and mask_Y is None: 493 | return _M2_dense(Xvar, Yvar, weights=weights) 494 | else: 495 | # Check if one of the masks is not None, modify it and also adjust the constant columns: 496 | if mask_X is None: 497 | mask_X = np.ones(Xvar.shape[1], dtype=np.bool) 498 | xconst = np.ones(0, dtype=float) 499 | if mask_Y is None: 500 | mask_Y = np.ones(Yvar.shape[1], dtype=np.bool) 501 | yconst = np.ones(0, dtype=float) 502 | if _is_zero(xsum) and _is_zero(ysum) or _is_zero(xconst) and _is_zero(yconst): 503 | return _M2_sparse(Xvar, mask_X, Yvar, mask_Y, weights=weights) 504 | else: 505 | return _M2_const(Xvar, mask_X, xsum[mask_X], xconst, Yvar, mask_Y, ysum[mask_Y], yconst, weights=weights) 506 | 507 | 508 | def _M2_symmetric(Xvar, Yvar, mask_X=None, mask_Y=None, xsum=0, xconst=0, ysum=0, yconst=0, weights=None): 509 | """ symmetric second moment matrices. Decide if we need dense, sparse, const""" 510 | if mask_X is None and mask_Y is None: 511 | Cxxyy = _M2_dense(Xvar, Xvar, weights=weights) + _M2_dense(Yvar, Yvar, weights=weights) 512 | Cxy = _M2_dense(Xvar, Yvar, weights=weights) 513 | Cyx = _M2_dense(Yvar, Xvar, weights=weights) 514 | Cxyyx = Cxy + Cyx 515 | else: 516 | # Check if one of the masks is not None, modify it and also adjust the constant columns: 517 | if mask_X is None: 518 | mask_X = np.ones(Xvar.shape[1], dtype=np.bool) 519 | xconst = np.ones(0, dtype=float) 520 | if mask_Y is None: 521 | mask_Y = np.ones(Yvar.shape[1], dtype=np.bool) 522 | yconst = np.ones(0, dtype=float) 523 | if _is_zero(xsum) and _is_zero(ysum) or _is_zero(xconst) and _is_zero(yconst): 524 | Cxxyy, Cxyyx = _M2_sparse_sym(Xvar, mask_X, Yvar, mask_Y, weights=weights) 525 | else: 526 | xvarsum = xsum[mask_X] # to variable part 527 | yvarsum = ysum[mask_Y] # to variable part 528 | Cxxyy = _M2_const(Xvar, mask_X, xvarsum, xconst, Xvar, mask_X, xvarsum, xconst, weights=weights) \ 529 | + _M2_const(Yvar, mask_Y, yvarsum, yconst, Yvar, mask_Y, yvarsum, yconst, weights=weights) 530 | Cxy = _M2_const(Xvar, mask_X, xvarsum, xconst, Yvar, mask_Y, yvarsum, yconst, weights=weights) 531 | Cyx = _M2_const(Yvar, mask_Y, yvarsum, yconst, Xvar, mask_X, xvarsum, xconst, weights=weights) 532 | Cxyyx = Cxy + Cyx 533 | return Cxxyy, Cxyyx 534 | 535 | 536 | # ================================================= 537 | # USER API 538 | # ================================================= 539 | 540 | 541 | def moments_XX(X, remove_mean=False, modify_data=False, weights=None, sparse_mode='auto', sparse_tol=0.0): 542 | """ Computes the first two unnormalized moments of X 543 | 544 | Computes :math:`s = \sum_t x_t` and :math:`C = X^\top X` while exploiting 545 | zero or constant columns in the data matrix. 546 | 547 | Parameters 548 | ---------- 549 | X : ndarray (T, M) 550 | Data matrix 551 | remove_mean : bool 552 | True: remove column mean from the data, False: don't remove mean. 553 | modify_data : bool 554 | If remove_mean=True, the mean will be removed in the data matrix X, 555 | without creating an independent copy. This option is faster but might 556 | lead to surprises because your input array is changed. 557 | weights: None or ndarray(T, ) 558 | weights assigned to each trajectory point. If None, all data points have weight one. 559 | If ndarray, each data point is assigned a separate weight. 560 | sparse_mode : str 561 | one of: 562 | * 'dense' : always use dense mode 563 | * 'sparse' : always use sparse mode if possible 564 | * 'auto' : automatic 565 | sparse_tol: float 566 | Threshold for considering column to be zero in order to save computing 567 | effort when the data is sparse or almost sparse. 568 | If max(abs(X[:, i])) < sparse_tol, then row i (and also column i if Y 569 | is not given) of the covariance matrix will be set to zero. If Y is 570 | given and max(abs(Y[:, i])) < sparse_tol, then column i of the 571 | covariance matrix will be set to zero. 572 | 573 | Returns 574 | ------- 575 | w : float 576 | statistical weight 577 | s : ndarray (M) 578 | sum 579 | C : ndarray (M, M) 580 | unnormalized covariance matrix 581 | 582 | """ 583 | # Check consistency of inputs: 584 | if weights is not None: 585 | assert X.shape[0] == weights.shape[0], 'X and weights_x must have equal length' 586 | # sparsify 587 | X0, mask_X, xconst = _sparsify(X, remove_mean=remove_mean, modify_data=modify_data, 588 | sparse_mode=sparse_mode, sparse_tol=sparse_tol) 589 | is_sparse = mask_X is not None 590 | # copy / convert 591 | # TODO: do we need to copy xconst? 592 | X0, xconst = _copy_convert(X0, const=xconst, remove_mean=remove_mean, 593 | copy=is_sparse or (remove_mean and not modify_data)) 594 | # sum / center 595 | w, sx, sx0_centered = _sum(X0, xmask=mask_X, xconst=xconst, symmetric=False, remove_mean=remove_mean, 596 | weights=weights) 597 | if remove_mean: 598 | _center(X0, w, sx, mask=mask_X, const=xconst, inplace=True) # fast in-place centering 599 | # TODO: we could make a second const check here. If after summation not enough zeros have appeared in the 600 | # TODO: consts, we switch back to dense treatment here. 601 | # compute covariance matrix 602 | C = _M2(X0, X0, mask_X=mask_X, mask_Y=mask_X, xsum=sx0_centered, xconst=xconst, ysum=sx0_centered, yconst=xconst, 603 | weights=weights) 604 | return w, sx, C 605 | 606 | 607 | def moments_XXXY(X, Y, remove_mean=False, symmetrize=False, weights=None, 608 | modify_data=False, sparse_mode='auto', sparse_tol=0.0): 609 | """ Computes the first two unnormalized moments of X and Y 610 | 611 | If symmetrize is False, computes 612 | 613 | .. math: 614 | s_x &=& \sum_t x_t 615 | s_y &=& \sum_t y_t 616 | C_XX &=& X^\top X 617 | C_XY &=& X^\top Y 618 | 619 | If symmetrize is True, computes 620 | 621 | .. math: 622 | s_x = s_y &=& \frac{1}{2} \sum_t(x_t + y_t) 623 | C_XX &=& \frac{1}{2} (X^\top X + Y^\top Y) 624 | C_XY &=& \frac{1}{2} (X^\top Y + Y^\top X) 625 | 626 | while exploiting zero or constant columns in the data matrix. 627 | 628 | Parameters 629 | ---------- 630 | X : ndarray (T, M) 631 | Data matrix 632 | Y : ndarray (T, N) 633 | Second data matrix 634 | remove_mean : bool 635 | True: remove column mean from the data, False: don't remove mean. 636 | symmetrize : bool 637 | Computes symmetrized means and moments (see above) 638 | weights : None or ndarray(T, ) 639 | weights assigned to each trajectory point of X. If None, all data points have weight one. 640 | If ndarray, each data point is assigned a separate weight. 641 | time_lagged : bool, 642 | indicates that Y is a time-lagged version of X. 643 | modify_data : bool 644 | If remove_mean=True, the mean will be removed in the data matrix X, 645 | without creating an independent copy. This option is faster but might 646 | lead to surprises because your input array is changed. 647 | sparse_mode : str 648 | one of: 649 | * 'dense' : always use dense mode 650 | * 'sparse' : always use sparse mode if possible 651 | * 'auto' : automatic 652 | sparse_tol: float 653 | Threshold for considering column to be zero in order to save computing 654 | effort when the data is sparse or almost sparse. 655 | If max(abs(X[:, i])) < sparse_tol, then row i (and also column i if Y 656 | is not given) of the covariance matrix will be set to zero. If Y is 657 | given and max(abs(Y[:, i])) < sparse_tol, then column i of the 658 | covariance matrix will be set to zero. 659 | 660 | Returns 661 | ------- 662 | w : float 663 | statistical weight 664 | s_x : ndarray (M) 665 | x-sum 666 | s_y : ndarray (N) 667 | y-sum 668 | C_XX : ndarray (M, M) 669 | unnormalized covariance matrix of X 670 | C_XY : ndarray (M, N) 671 | unnormalized covariance matrix of XY 672 | 673 | """ 674 | # Check consistency of inputs: 675 | if Y is not None: 676 | assert Y.shape[0] == X.shape[0], 'X and Y must have equal length.' 677 | if weights is not None: 678 | assert X.shape[0] == weights.shape[0], 'X and weights_x must have equal length' 679 | # sparsify 680 | X0, mask_X, xconst, Y0, mask_Y, yconst = _sparsify_pair(X, Y, remove_mean=remove_mean, modify_data=modify_data, 681 | symmetrize=symmetrize, sparse_mode=sparse_mode, sparse_tol=sparse_tol) 682 | is_sparse = mask_X is not None and mask_Y is not None 683 | # copy / convert 684 | copy = is_sparse or (remove_mean and not modify_data) 685 | X0, xconst = _copy_convert(X0, const=xconst, remove_mean=remove_mean, copy=copy) 686 | Y0, yconst = _copy_convert(Y0, const=yconst, remove_mean=remove_mean, copy=copy) 687 | # sum / center 688 | w, sx, sx_centered, sy, sy_centered = _sum(X0, xmask=mask_X, xconst=xconst, Y=Y0, ymask=mask_Y, yconst=yconst, 689 | symmetric=symmetrize, remove_mean=remove_mean, weights=weights) 690 | if remove_mean: 691 | _center(X0, w, sx, mask=mask_X, const=xconst, inplace=True) # fast in-place centering 692 | _center(Y0, w, sy, mask=mask_Y, const=yconst, inplace=True) # fast in-place centering 693 | 694 | if symmetrize: 695 | Cxx, Cxy = _M2_symmetric(X0, Y0, mask_X=mask_X, mask_Y=mask_Y, 696 | xsum=sx_centered, xconst=xconst, ysum=sy_centered, yconst=yconst, weights=weights) 697 | else: 698 | Cxx = _M2(X0, X0, mask_X=mask_X, mask_Y=mask_X, 699 | xsum=sx_centered, xconst=xconst, ysum=sx_centered, yconst=xconst, weights=weights) 700 | Cxy = _M2(X0, Y0, mask_X=mask_X, mask_Y=mask_Y, 701 | xsum=sx_centered, xconst=xconst, ysum=sy_centered, yconst=yconst, weights=weights) 702 | 703 | return w, sx, sy, Cxx, Cxy 704 | 705 | 706 | def moments_block(X, Y, remove_mean=False, modify_data=False, 707 | sparse_mode='auto', sparse_tol=0.0): 708 | """ Computes the first two unnormalized moments of X and Y 709 | 710 | Computes 711 | 712 | .. math: 713 | s_x &=& \sum_t x_t 714 | s_y &=& \sum_t y_t 715 | C_XX &=& X^\top X 716 | C_XY &=& X^\top Y 717 | C_YX &=& Y^\top X 718 | C_YY &=& Y^\top Y 719 | 720 | while exploiting zero or constant columns in the data matrix. 721 | 722 | Parameters 723 | ---------- 724 | X : ndarray (T, M) 725 | Data matrix 726 | Y : ndarray (T, N) 727 | Second data matrix 728 | remove_mean : bool 729 | True: remove column mean from the data, False: don't remove mean. 730 | modify_data : bool 731 | If remove_mean=True, the mean will be removed in the data matrix X, 732 | without creating an independent copy. This option is faster but might 733 | lead to surprises because your input array is changed. 734 | sparse_mode : str 735 | one of: 736 | * 'dense' : always use dense mode 737 | * 'sparse' : always use sparse mode if possible 738 | * 'auto' : automatic 739 | sparse_tol: float 740 | Threshold for considering column to be zero in order to save computing 741 | effort when the data is sparse or almost sparse. 742 | If max(abs(X[:, i])) < sparse_tol, then row i (and also column i if Y 743 | is not given) of the covariance matrix will be set to zero. If Y is 744 | given and max(abs(Y[:, i])) < sparse_tol, then column i of the 745 | covariance matrix will be set to zero. 746 | 747 | Returns 748 | ------- 749 | w : float 750 | statistical weight of this estimation 751 | s : [ndarray (M), ndarray (M)] 752 | list of two elements with s[0]=sx and s[0]=sy 753 | C : [[ndarray(M,M), ndarray(M,N)], [ndarray(N,M),ndarray(N,N)]] 754 | list of two lists with two elements. 755 | C[0,0] = Cxx, C[0,1] = Cxy, C[1,0] = Cyx, C[1,1] = Cyy 756 | 757 | """ 758 | # sparsify 759 | X0, mask_X, xconst = _sparsify(X, sparse_mode=sparse_mode, sparse_tol=sparse_tol) 760 | Y0, mask_Y, yconst = _sparsify(Y, sparse_mode=sparse_mode, sparse_tol=sparse_tol) 761 | # copy / convert 762 | copy = sparse_mode or (remove_mean and not modify_data) 763 | X0, xconst = _copy_convert(X0, const=xconst, copy=copy) 764 | Y0, yconst = _copy_convert(Y0, const=yconst, copy=copy) 765 | # sum / center 766 | w, sx, sx_centered, sy, sy_centered = _sum(X0, xmask=mask_X, xconst=xconst, Y=Y0, ymask=mask_Y, yconst=yconst, 767 | symmetric=False, remove_mean=remove_mean) 768 | if remove_mean: 769 | _center(X0, w, sx, mask=mask_X, const=xconst, inplace=True) # fast in-place centering 770 | _center(Y0, w, sy, mask=mask_Y, const=yconst, inplace=True) # fast in-place centering 771 | 772 | Cxx = _M2(X0, X0, mask_X=mask_X, mask_Y=mask_X, 773 | xsum=sx_centered, xconst=xconst, ysum=sx_centered, yconst=xconst) 774 | Cxy = _M2(X0, Y0, mask_X=mask_X, mask_Y=mask_Y, 775 | xsum=sx_centered, xconst=xconst, ysum=sy_centered, yconst=yconst) 776 | Cyy = _M2(Y0, Y0, mask_X=mask_Y, mask_Y=mask_Y, 777 | xsum=sy_centered, xconst=yconst, ysum=sy_centered, yconst=yconst) 778 | 779 | return w, [sx, sy], [[Cxx, Cxy], [Cxy.T, Cyy]] 780 | 781 | 782 | def covar(X, remove_mean=False, modify_data=False, weights=None, sparse_mode='auto', sparse_tol=0.0): 783 | """ Computes the covariance matrix of X 784 | 785 | Computes 786 | 787 | .. math: 788 | C_XX &=& X^\top X 789 | 790 | while exploiting zero or constant columns in the data matrix. 791 | WARNING: Directly use moments_XX if you can. This function does an additional 792 | constant-matrix multiplication and does not return the mean. 793 | 794 | Parameters 795 | ---------- 796 | X : ndarray (T, M) 797 | Data matrix 798 | remove_mean : bool 799 | True: remove column mean from the data, False: don't remove mean. 800 | modify_data : bool 801 | If remove_mean=True, the mean will be removed in the data matrix X, 802 | without creating an independent copy. This option is faster but might 803 | lead to surprises because your input array is changed. 804 | weights : None or ndarray(T, ) 805 | weights assigned to each trajectory point of X. If None, all data points have weight one. 806 | If ndarray, each data point is assigned a separate weight. 807 | sparse_mode : str 808 | one of: 809 | * 'dense' : always use dense mode 810 | * 'sparse' : always use sparse mode if possible 811 | * 'auto' : automatic 812 | sparse_tol: float 813 | Threshold for considering column to be zero in order to save computing 814 | effort when the data is sparse or almost sparse. 815 | If max(abs(X[:, i])) < sparse_tol, then row i (and also column i if Y 816 | is not given) of the covariance matrix will be set to zero. If Y is 817 | given and max(abs(Y[:, i])) < sparse_tol, then column i of the 818 | covariance matrix will be set to zero. 819 | 820 | Returns 821 | ------- 822 | C_XX : ndarray (M, M) 823 | Covariance matrix of X 824 | 825 | See also 826 | -------- 827 | moments_XX 828 | 829 | """ 830 | w, s, M = moments_XX(X, remove_mean=remove_mean, weights=weights, modify_data=modify_data, 831 | sparse_mode=sparse_mode, sparse_tol=sparse_tol) 832 | return M / float(w) 833 | 834 | 835 | def covars(X, Y, remove_mean=False, modify_data=False, symmetrize=False, weights=None, sparse_mode='auto', 836 | sparse_tol=0.0): 837 | """ Computes the covariance and cross-covariance matrix of X and Y 838 | 839 | If symmetrize is False, computes 840 | 841 | .. math: 842 | C_XX &=& X^\top X 843 | C_XY &=& X^\top Y 844 | 845 | If symmetrize is True, computes 846 | 847 | .. math: 848 | C_XX &=& \frac{1}{2} (X^\top X + Y^\top Y) 849 | C_XY &=& \frac{1}{2} (X^\top Y + Y^\top X) 850 | 851 | while exploiting zero or constant columns in the data matrix. 852 | WARNING: Directly use moments_XXXY if you can. This function does an additional 853 | constant-matrix multiplication and does not return the mean. 854 | 855 | Parameters 856 | ---------- 857 | X : ndarray (T, M) 858 | Data matrix 859 | Y : ndarray (T, N) 860 | Second data matrix 861 | remove_mean : bool 862 | True: remove column mean from the data, False: don't remove mean. 863 | modify_data : bool 864 | If remove_mean=True, the mean will be removed in the data matrix X, 865 | without creating an independent copy. This option is faster but might 866 | lead to surprises because your input array is changed. 867 | symmetrize : bool 868 | Computes symmetrized means and moments (see above) 869 | weights : None or ndarray(T, ) 870 | weights assigned to each trajectory point of X. If None, all data points have weight one. 871 | If ndarray, each data point is assigned a separate weight. 872 | sparse_mode : str 873 | one of: 874 | * 'dense' : always use dense mode 875 | * 'sparse' : always use sparse mode if possible 876 | * 'auto' : automatic 877 | sparse_tol: float 878 | Threshold for considering column to be zero in order to save computing 879 | effort when the data is sparse or almost sparse. 880 | If max(abs(X[:, i])) < sparse_tol, then row i (and also column i if Y 881 | is not given) of the covariance matrix will be set to zero. If Y is 882 | given and max(abs(Y[:, i])) < sparse_tol, then column i of the 883 | covariance matrix will be set to zero. 884 | 885 | Returns 886 | ------- 887 | C_XX : ndarray (M, M) 888 | Covariance matrix of X 889 | C_XY : ndarray (M, N) 890 | Covariance matrix of XY 891 | 892 | See also 893 | -------- 894 | moments_XXXY 895 | 896 | """ 897 | w, sx, sy, Mxx, Mxy = moments_XXXY(X, Y, remove_mean=remove_mean, modify_data=modify_data, weights=weights, 898 | symmetrize=symmetrize, sparse_mode=sparse_mode, sparse_tol=sparse_tol) 899 | return Mxx / float(w), Mxy / float(w) 900 | --------------------------------------------------------------------------------