├── .pypirc.enc ├── tests ├── test_2.png ├── test_1.py └── test_2.py ├── .gitignore ├── docs ├── _templates │ └── function.rst ├── index.rst ├── Makefile └── conf.py ├── README.md ├── .travis.yml ├── setup.py ├── LICENSE └── covar.pyx /.pypirc.enc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rmcgibbo/covar/HEAD/.pypirc.enc -------------------------------------------------------------------------------- /tests/test_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rmcgibbo/covar/HEAD/tests/test_2.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | dist 3 | covar.cpp 4 | covar.egg-info 5 | docs/generated 6 | docs/_build 7 | tests/__pycache__ 8 | -------------------------------------------------------------------------------- /docs/_templates/function.rst: -------------------------------------------------------------------------------- 1 | ``{{module}}.{{objname}}`` 2 | {{ underline }}==================== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autofunction:: {{ objname }} 7 | 8 | .. include:: {{module}}.{{objname}}.examples 9 | 10 | .. raw:: html 11 | 12 |
13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | `covar`: shrinkage covariance estimation 2 | ======================================= 3 | [![Build Status](https://travis-ci.org/rmcgibbo/covar.svg?branch=master)](https://travis-ci.org/rmcgibbo/covar) 4 | 5 | 6 | This Python package contains a single function, `cov_shrink` which implements 7 | a plug-in shrinkage estimator for the covariance matrix. 8 | 9 | The estimator is described by [Schafer and Strimmer (2005)](http://www.degruyter.com/view/j/sagmb.2005.4.1/sagmb.2005.4.1.1175/sagmb.2005.4.1.1175.xml>), 10 | where it is called "Target D: (diagonal, unequal variance)". 11 | 12 | See the [documentation](https://pythonhosted.org/covar/) for more details. 13 | 14 | ### Installation 15 | 16 | ``` 17 | pip install covar 18 | ``` 19 | 20 | ### Dependencies 21 | 1. Python (2.7, or 3.3+) 22 | 2. numpy 23 | 3. scipy (0.16+) 24 | 4. cython 25 | -------------------------------------------------------------------------------- /tests/test_1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from covar import cov_shrink_ss 3 | 4 | from rpy2.robjects import r 5 | import rpy2.rinterface 6 | from rpy2.robjects.functions import SignatureTranslatedFunction 7 | from rpy2.robjects.packages import importr 8 | import rpy2.robjects.numpy2ri 9 | rpy2.robjects.numpy2ri.activate() 10 | 11 | 12 | def setup(): 13 | global corpcor 14 | try: 15 | corpcor = importr('corpcor') 16 | except rpy2.rinterface.RRuntimeError: 17 | r("install.packages('corpcor', repos='http://cran.us.r-project.org')") 18 | corpcor = importr('corpcor') 19 | 20 | 21 | def test_1(): 22 | for X in [np.random.randn(10,3), np.random.randn(100,3)]: 23 | r_result = corpcor.cov_shrink(X, lambda_var=0, verbose=False) 24 | py_result = cov_shrink_ss(X) 25 | 26 | np.testing.assert_array_almost_equal(r_result, py_result[0]) 27 | np.testing.assert_almost_equal(r.attr(r_result, 'lambda')[0], py_result[1]) 28 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | 3 | branches: 4 | only: 5 | - master 6 | 7 | before_install: 8 | - openssl aes-256-cbc -K $encrypted_cd6c015d3c94_key -iv $encrypted_cd6c015d3c94_iv -in .pypirc.enc -out ~/.pypirc -d 9 | 10 | install: 11 | - MINICONDA=Miniconda-latest-Linux-x86_64.sh 12 | - wget http://repo.continuum.io/miniconda/$MINICONDA 13 | - bash $MINICONDA -b 14 | - export PATH=$HOME/miniconda/bin:$PATH 15 | - conda config --add channels r 16 | - conda install --yes r nose numpy scipy cython rpy2 matplotlib 17 | 18 | env: 19 | matrix: 20 | - CONDA_PY=2.7 21 | - CONDA_PY=3.3 22 | - CONDA_PY=3.4 23 | - CONDA_PY=3.5 24 | 25 | script: 26 | - python setup.py install 27 | - nosetests 28 | 29 | after_success: 30 | - '[ "${CONDA_PY}" = "3.5" ] && 31 | [ "${TRAVIS_PULL_REQUEST}" = "false" ] && 32 | [ "$TRAVIS_BRANCH" != "master" ] && 33 | conda install --yes numpydoc sphinx sphinx_rtd_theme && 34 | python setup.py build_sphinx && 35 | python setup.py upload_docs --upload-dir build/sphinx/html/' 36 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Shrinkage covariance estimator for python 2 | """ 3 | from setuptools import setup, Extension 4 | from Cython.Distutils import build_ext 5 | 6 | 7 | ########################## 8 | VERSION = "0.2" 9 | __version__ = VERSION 10 | ########################## 11 | 12 | DOCLINES = __doc__.split("\n") 13 | CLASSIFIERS = """\ 14 | Development Status :: 3 - Alpha 15 | Intended Audience :: Science/Research 16 | Intended Audience :: Developers 17 | License :: OSI Approved :: BSD 18 | Programming Language :: Python 19 | Operating System :: OS Independent 20 | """ 21 | 22 | extensions = [ 23 | Extension('covar', ['covar.pyx'], language='c++'), 24 | ] 25 | 26 | setup( 27 | name='covar', 28 | author="Robert T. McGibbon", 29 | author_email='rmcgibbo@gmail.com', 30 | cmdclass={'build_ext': build_ext}, 31 | url="https://github.com/rmcgibbo/covar", 32 | description=DOCLINES[0], 33 | setup_requires=['cython', 'scipy >= 0.16', 'numpy >= 1.6'], 34 | install_requires=['scipy >= 0.16', 'numpy >= 1.6'], 35 | long_description="\n".join(DOCLINES[2:]), 36 | version=__version__, 37 | license='BSD', 38 | zip_safe=False, 39 | ext_modules=extensions, 40 | ) 41 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: covar 2 | 3 | Covar: shrinkage covariance estimation 4 | ======================================= 5 | 6 | This Python package contains two functions, :func:`cov_shrink_ss` and 7 | :func:`cov_shrink_rblw` which implements plug-in shrinkage estimators for the covariance matrix. 8 | 9 | The :func:`cov_shrink_ss` estimator is described by `Schafer and Strimmer (2005) `_, where it is 10 | called "Target D: (diagonal, unequal variance)". The :func:`cov_shrink_rblw` estimator is described by `Chen Yilun, Wiesel, and Hero (2009) `_. 11 | 12 | .. figure:: /../tests/test_2.png 13 | :width: 600 14 | 15 | Installation 16 | ~~~~~~~~~~~~ 17 | .. code-block:: bash 18 | 19 | $ pip install covar 20 | 21 | Dependencies 22 | ~~~~~~~~~~~~ 23 | Python (2.7, or 3.3+), Numpy (1.6 or later), Scipy (0.16 or later), Cython 24 | 25 | 26 | .. toctree:: 27 | :maxdepth: 1 28 | 29 | .. raw:: html 30 | 31 |
32 | 33 | .. autosummary:: 34 | :toctree: generated/ 35 | 36 | covar.cov_shrink_ss 37 | covar.cov_shrink_rblw 38 | 39 | .. raw:: html 40 | 41 |
42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, Robert T. McGibbon 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | -------------------------------------------------------------------------------- /tests/test_2.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import os.path 3 | import numpy as np 4 | import scipy.stats 5 | import matplotlib 6 | matplotlib.use('Agg') 7 | matplotlib.rc('font',family='serif') 8 | import matplotlib.pyplot as plt 9 | from covar import cov_shrink_ss, cov_shrink_rblw 10 | 11 | DIRNAME = os.path.dirname(os.path.realpath(__file__)) 12 | 13 | 14 | def test_1(): 15 | random = np.random.RandomState(0) 16 | p = 100 17 | sigma = scipy.stats.wishart(scale=np.eye(p), seed=random).rvs() 18 | Ns = [int(x) for x in [p/10, p/2, 2*p, 10*p]] 19 | x = np.arange(p) 20 | 21 | plt.figure(figsize=(8,8)) 22 | 23 | for i, N in enumerate(Ns): 24 | X = random.multivariate_normal(mean=np.zeros(p), cov=sigma, size=N) 25 | S1 = np.cov(X.T) 26 | S2 = cov_shrink_ss(X)[0] 27 | S3 = cov_shrink_rblw(np.cov(X.T), len(X))[0] 28 | 29 | plt.subplot(3,2,i+1) 30 | plt.title('p/n = %.1f' % (p/N)) 31 | 32 | plt.plot(x, sorted(np.linalg.eigvalsh(S2), reverse=True), 'b', lw=2, label='cov_shrink_ss') 33 | plt.plot(x, sorted(np.linalg.eigvalsh(S3), reverse=True), 'g', alpha=0.7, lw=2, label='cov_shrink_rblw') 34 | plt.plot(x, sorted(np.linalg.eigvalsh(sigma), reverse=True), 'k--', lw=2, label='true') 35 | plt.plot(x, sorted(np.linalg.eigvalsh(S1), reverse=True), 'r--', lw=2, label='sample covariance') 36 | 37 | if i == 1: 38 | plt.legend(fontsize=10) 39 | 40 | # plt.ylim(max(plt.ylim()[0], 1e-4), plt.ylim()[1]) 41 | plt.figtext(.05, .05, 42 | """Ordered eigenvalues of the sample covariance matrix (red), 43 | cov_shrink_ss()-estimated covariance matrix (blue), 44 | cov_shrink_rblw()-estimated covariance matrix (green), and 45 | true eigenvalues (dashed black). The data generated by sampling 46 | from a p-variate normal distribution for p=100 and various 47 | ratios of p/n. Note that for the larger value of p/n, the 48 | cov_shrink_rblw() estimator is identical to the sample 49 | covariance matrix.""") 50 | 51 | # plt.yscale('log') 52 | plt.ylabel('Eigenvalue') 53 | 54 | plt.tight_layout() 55 | plt.savefig('%s/test_2.png' % DIRNAME, dpi=300) 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " applehelp to make an Apple Help Book" 34 | @echo " devhelp to make HTML files and a Devhelp project" 35 | @echo " epub to make an epub" 36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 37 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 38 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 39 | @echo " text to make text files" 40 | @echo " man to make manual pages" 41 | @echo " texinfo to make Texinfo files" 42 | @echo " info to make Texinfo files and run them through makeinfo" 43 | @echo " gettext to make PO message catalogs" 44 | @echo " changes to make an overview of all changed/added/deprecated items" 45 | @echo " xml to make Docutils-native XML files" 46 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 47 | @echo " linkcheck to check all external links for integrity" 48 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 49 | @echo " coverage to run coverage check of the documentation (if enabled)" 50 | 51 | clean: 52 | rm -rf $(BUILDDIR)/* 53 | 54 | html: 55 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 56 | @echo 57 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 58 | 59 | dirhtml: 60 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 61 | @echo 62 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 63 | 64 | singlehtml: 65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 66 | @echo 67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 68 | 69 | pickle: 70 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 71 | @echo 72 | @echo "Build finished; now you can process the pickle files." 73 | 74 | json: 75 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 76 | @echo 77 | @echo "Build finished; now you can process the JSON files." 78 | 79 | htmlhelp: 80 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 81 | @echo 82 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 83 | ".hhp project file in $(BUILDDIR)/htmlhelp." 84 | 85 | qthelp: 86 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 87 | @echo 88 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 89 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 90 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/covar.qhcp" 91 | @echo "To view the help file:" 92 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/covar.qhc" 93 | 94 | applehelp: 95 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 96 | @echo 97 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 98 | @echo "N.B. You won't be able to view it unless you put it in" \ 99 | "~/Library/Documentation/Help or install it in your application" \ 100 | "bundle." 101 | 102 | devhelp: 103 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 104 | @echo 105 | @echo "Build finished." 106 | @echo "To view the help file:" 107 | @echo "# mkdir -p $$HOME/.local/share/devhelp/covar" 108 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/covar" 109 | @echo "# devhelp" 110 | 111 | epub: 112 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 113 | @echo 114 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 115 | 116 | latex: 117 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 118 | @echo 119 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 120 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 121 | "(use \`make latexpdf' here to do that automatically)." 122 | 123 | latexpdf: 124 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 125 | @echo "Running LaTeX files through pdflatex..." 126 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 127 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 128 | 129 | latexpdfja: 130 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 131 | @echo "Running LaTeX files through platex and dvipdfmx..." 132 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 133 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 134 | 135 | text: 136 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 137 | @echo 138 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 139 | 140 | man: 141 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 142 | @echo 143 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 144 | 145 | texinfo: 146 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 147 | @echo 148 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 149 | @echo "Run \`make' in that directory to run these through makeinfo" \ 150 | "(use \`make info' here to do that automatically)." 151 | 152 | info: 153 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 154 | @echo "Running Texinfo files through makeinfo..." 155 | make -C $(BUILDDIR)/texinfo info 156 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 157 | 158 | gettext: 159 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 160 | @echo 161 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 162 | 163 | changes: 164 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 165 | @echo 166 | @echo "The overview file is in $(BUILDDIR)/changes." 167 | 168 | linkcheck: 169 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 170 | @echo 171 | @echo "Link check complete; look for any errors in the above output " \ 172 | "or in $(BUILDDIR)/linkcheck/output.txt." 173 | 174 | doctest: 175 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 176 | @echo "Testing of doctests in the sources finished, look at the " \ 177 | "results in $(BUILDDIR)/doctest/output.txt." 178 | 179 | coverage: 180 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 181 | @echo "Testing of coverage in the sources finished, look at the " \ 182 | "results in $(BUILDDIR)/coverage/python.txt." 183 | 184 | xml: 185 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 186 | @echo 187 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 188 | 189 | pseudoxml: 190 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 191 | @echo 192 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 193 | -------------------------------------------------------------------------------- /covar.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport cython 3 | from scipy.linalg.cython_blas cimport dgemm 4 | 5 | 6 | @cython.boundscheck(False) 7 | @cython.wraparound(False) 8 | @cython.cdivision(True) 9 | def cov_shrink_ss(const double[:, ::1] X, shrinkage=None): 10 | r"""Compute a shrinkage estimate of the covariance matrix using 11 | the Schafer and Strimmer (2005) method. 12 | 13 | Parameters 14 | ---------- 15 | X : array, shape=(n, p) 16 | Data matrix. Each row represents a data point, and each column 17 | represents a feature. 18 | shrinkage : float, optional 19 | The covariance shrinkage intensity (range 0-1). If shrinkage is not 20 | specified (the default) it is estimated using an analytic formula 21 | from Schafer and Strimmer (2005). For ``shrinkage=0`` the empirical 22 | correlations are recovered. 23 | 24 | Returns 25 | ------- 26 | cov : array, shape=(p, p) 27 | Estimated covariance matrix of the data. 28 | shrinkage : float 29 | The applied covariance shrinkage intensity. 30 | 31 | References 32 | ---------- 33 | .. [1] Schafer, J., and K. Strimmer. 2005. A shrinkage approach to 34 | large-scale covariance estimation and implications for functional 35 | genomics. Statist. Appl. Genet. Mol. Biol. 4:32. 36 | http://doi.org/10.2202/1544-6115.1175 37 | 38 | Notes 39 | ----- 40 | This shrinkage estimator corresponds to "Target D": (diagonal, unequal 41 | variance) as described in [1]. The estimator takes the form 42 | 43 | .. math:: 44 | \hat{\Sigma} = (1-\gamma) \Sigma_{sample} + \gamma T, 45 | 46 | where :math:`\Sigma^{sample}` is the (noisy but unbiased) empirical 47 | covariance matrix, 48 | 49 | .. math:: 50 | \Sigma^{sample}_{ij} = \frac{1}{n-1} \sum_{k=1}^n 51 | (x_{ki} - \bar{x}_i)(x_{kj} - \bar{x}_j), 52 | 53 | the matrix :math:`T` is the shrinkage target, a less noisy but biased 54 | estimator for the covariance, and the scalar :math:`\gamma \in [0, 1]` is 55 | the shrinkage intensity (regularization strength). This approaches uses a 56 | diagonal shrinkage target, :math:`T`: 57 | 58 | .. math:: 59 | T_{ij} = \begin{cases} 60 | \Sigma^{sample}_{ii} &\text{ if } i = j\\ 61 | 0 &\text{ otherwise}, 62 | \end{cases} 63 | 64 | The idea is that by taking a weighted average of these two estimators, we 65 | can get a combined estimator which is more accurate than either is 66 | individually, especially when :math:`p` is large. The optimal weighting, 67 | :math:`\gamma`, is determined **automatically** by minimizing the mean 68 | squared error. See [1] for details on how this can be done. The formula 69 | for :math:`\gamma` is 70 | 71 | .. math:: 72 | \gamma = \frac{\sum_{i \neq j} \hat{Var}(r_{ij})}{\sum_{i \neq j} r^2_{ij}} 73 | 74 | where :math:`r` is the sample correlation matrix, 75 | 76 | .. math:: 77 | r_{ij} = \frac{\Sigma^{sample}_{ij}}{\sigma_i \sigma_j}, 78 | 79 | and :math:`\hat{Var}(r_{ij})` is given by 80 | 81 | .. math:: 82 | \hat{Var}(r_{ij}) = \frac{n}{(n-1)^3 \sigma_i^2 \sigma_j^2} \sum_{k=1}^n 83 | (w_{kij} - \bar{w}_{ij})^2, 84 | 85 | with :math:`w_{kij} = (x_{ki} - \bar{x}_i)(x_{kj} - \bar{x}_j)`, and 86 | :math:`\bar{w}_{ij} = \frac{1}{n}\sum_{k=1}^n w_{kij}`. 87 | 88 | This method is equivalent to the ``cov.shrink`` method in the R package 89 | ``corpcor``, if the argument ``lambda.var`` is set to ``0``. See 90 | https://cran.r-project.org/web/packages/corpcor/ for details. 91 | 92 | See Also 93 | -------- 94 | cov_shrink_rblw : similar method, using a different shrinkage target, 95 | :math:`T`. 96 | sklearn.covariance.ledoit_wolf : very similar approach, but uses a different 97 | shrinkage target, :math:`T`. 98 | """ 99 | cdef int n, p, i, j, k 100 | n, p = X.shape[0], X.shape[1] 101 | 102 | cdef double gamma_num = 0 103 | cdef double gamma_den = 0 104 | cdef double s_ij, s_ii, gamma 105 | 106 | cdef double[::1] X_mean = np.mean(X, axis=0) 107 | cdef double[::1] X_std = np.std(X, axis=0) 108 | cdef double[:, ::1] X_meaned = np.empty_like(X) 109 | cdef double[:, ::1] w_ij_bar = np.zeros((p, p)) 110 | cdef double[:, ::1] r = np.zeros((p, p)) 111 | cdef double[:, ::1] var_r = np.zeros((p, p)) 112 | cdef double[:, ::1] out = np.zeros((p, p)) 113 | 114 | for i in range(n): 115 | for j in range(p): 116 | X_meaned[i, j] = X[i, j] - X_mean[j] 117 | 118 | cy_dgemm_TN(X_meaned, X_meaned, w_ij_bar, 1.0/n) 119 | 120 | if shrinkage is not None: 121 | gamma = max(0.0, min(1.0, float(shrinkage))) 122 | else: 123 | for i in range(p): 124 | for j in range(p): 125 | r[i, j] = (n / ((n - 1.0) * X_std[i] * X_std[j])) * w_ij_bar[i, j] 126 | 127 | for k in range(n): 128 | for i in range(p): 129 | for j in range(p): 130 | var_r[i,j] += (X_meaned[k,i]*X_meaned[k,j] - w_ij_bar[i,j])**2 131 | 132 | 133 | for i in range(p): 134 | for j in range(p): 135 | var_r[i,j] *= (n / ((n-1.0)**3 * X_std[i]*X_std[i]*X_std[j]*X_std[j])) 136 | 137 | 138 | for i in range(p): 139 | for j in range(p): 140 | if i != j: 141 | gamma_num += var_r[i,j] 142 | gamma_den += r[i,j]**2 143 | 144 | gamma = max(0, min(1, gamma_num / gamma_den)) 145 | 146 | for i in range(p): 147 | for j in range(p): 148 | s_ij = (n / (n-1.0)) * w_ij_bar[i, j] 149 | out[i, j] = (1.0-gamma) * s_ij 150 | if i == j: 151 | out[i, i] += gamma * s_ij 152 | if out[i, j] == -0: 153 | out[i, j] = 0 154 | 155 | return np.asarray(out), gamma 156 | 157 | 158 | @cython.boundscheck(False) 159 | @cython.wraparound(False) 160 | @cython.cdivision(True) 161 | def cov_shrink_rblw(const double[:, ::1] S, int n, shrinkage=None): 162 | r"""Compute a shrinkage estimate of the covariance matrix using 163 | the Rao-Blackwellized Ledoit-Wolf estimator described by Chen et al. 164 | 165 | Parameters 166 | ---------- 167 | S : array, shape=(n, n) 168 | Sample covariance matrix (e.g. estimated with np.cov(X.T)) 169 | n : int 170 | Number of data points used in the estimate of S. 171 | shrinkage : float, optional 172 | The covariance shrinkage intensity (range 0-1). If shrinkage is not 173 | specified (the default) it is estimated using an analytic formula 174 | from Chen et al. (2009). 175 | 176 | Returns 177 | ------- 178 | sigma : array, shape=(p, p) 179 | Estimated shrunk covariance matrix 180 | shrinkage : float 181 | The applied covariance shrinkage intensity. 182 | 183 | Notes 184 | ----- 185 | This shrinkage estimator takes the form 186 | 187 | .. math:: 188 | \hat{\Sigma} = (1-\gamma) \Sigma_{sample} + \gamma T 189 | 190 | where :math:`\Sigma^{sample}` is the (noisy but unbiased) empirical 191 | covariance matrix, 192 | 193 | .. math:: 194 | \Sigma^{sample}_{ij} = \frac{1}{n-1} \sum_{k=1}^n 195 | (x_{ki} - \bar{x}_i)(x_{kj} - \bar{x}_j), 196 | 197 | the matrix :math:`T` is the shrinkage target, a less noisy but biased 198 | estimator for the covariance, and the scalar :math:`\gamma \in [0, 1]` is 199 | the shrinkage intensity (regularization strength). This approaches uses a 200 | scaled identity target, :math:`T`: 201 | 202 | .. math:: 203 | T = \frac{\mathrm{Tr}(S)}{p} I_p 204 | 205 | The shrinkage intensity, :math:`\gamma`, is determined using the RBLW 206 | estimator from [2]. The formula for :math:`\gamma` is 207 | 208 | .. math:: 209 | \gamma = \min(\alpha + \frac{\beta}{U}) 210 | 211 | where :math:`\alpha`, :math:`\beta`, and :math:`U` are 212 | 213 | .. math:: 214 | \alpha &= \frac{n-2}{n(n+2)} \\ 215 | \beta &= \frac{(p+1)n - 2}{n(n+2)} \\ 216 | U &= \frac{p\, \mathrm{Tr}(S^2)}{\mathrm{Tr}^2(S)} - 1 217 | 218 | One particularly useful property of this estimator is that it's **very 219 | fast**, because it doesn't require access to the data matrix at all (unlike 220 | :func:`cov_shrink_ss`). It only requires the sample covariance matrix 221 | and the number of data points `n`, as sufficient statistics. 222 | 223 | For reference, note that [2] defines another estimator, called the oracle 224 | approximating shrinkage estimator (OAS), but makes some mathematical errors 225 | during the derivation, and futhermore their example code published with 226 | the paper does not implement the proposed formulas. 227 | 228 | References 229 | ---------- 230 | .. [2] Chen, Yilun, Ami Wiesel, and Alfred O. Hero III. "Shrinkage 231 | estimation of high dimensional covariance matrices" ICASSP (2009) 232 | http://doi.org/10.1109/ICASSP.2009.4960239 233 | 234 | See Also 235 | -------- 236 | cov_shrink_ss : similar method, using a different shrinkage target, :math:`T`. 237 | sklearn.covariance.ledoit_wolf : very similar approach using the same 238 | shrinkage target, :math:`T`, but a different method for estimating the 239 | shrinkage intensity, :math:`gamma`. 240 | 241 | """ 242 | cdef int i, j 243 | cdef int p = S.shape[0] 244 | if S.shape[1] != p: 245 | raise ValueError('S must be a (p x p) matrix') 246 | 247 | cdef double alpha = (n-2)/(n*(n+2)) 248 | cdef double beta = ((p+1)*n - 2) / (n*(n+2)) 249 | 250 | cdef double trace_S # np.trace(S) 251 | cdef double trace_S2 = 0 # np.trace(S.dot(S)) 252 | for i in range(p): 253 | trace_S += S[i,i] 254 | for j in range(p): 255 | trace_S2 += S[i,j]*S[i,j] 256 | 257 | cdef double U = ((p * trace_S2 / (trace_S*trace_S)) - 1) 258 | cdef double rho = min(alpha + beta/U, 1) 259 | 260 | F = (trace_S / p) * np.eye(p) 261 | return (1-rho)*np.asarray(S) + rho*F, rho 262 | 263 | 264 | 265 | ############################# Private utilities ############################# 266 | 267 | @cython.boundscheck(False) 268 | cdef inline int cy_dgemm_TN(double[:, ::1] a, double[:, ::1] b, double[:, ::1] c, double alpha=1.0, double beta=0.0) nogil: 269 | """C = beta*C + alpha*dot(A.T, B) 270 | """ 271 | cdef int m, k, n 272 | m = a.shape[1] 273 | k = a.shape[0] 274 | n = b.shape[1] 275 | if a.shape[0] != b.shape[0] or a.shape[1] != c.shape[0] or b.shape[1] != c.shape[1]: 276 | return -1 277 | 278 | dgemm("N", "T", &n, &m, &k, &alpha, &b[0,0], &n, &a[0,0], &m, &beta, &c[0,0], &n) 279 | return 0 280 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # covar documentation build configuration file, created by 5 | # sphinx-quickstart on Wed Sep 30 19:48:28 2015. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | import sys 17 | import os 18 | import shlex 19 | import sphinx_rtd_theme 20 | 21 | 22 | # If extensions (or modules to document with autodoc) are in another directory, 23 | # add these directories to sys.path here. If the directory is relative to the 24 | # documentation root, use os.path.abspath to make it absolute, like shown here. 25 | #sys.path.insert(0, os.path.abspath('.')) 26 | 27 | # -- General configuration ------------------------------------------------ 28 | 29 | # If your documentation needs a minimal Sphinx version, state it here. 30 | # needs_sphinx = '1.3' 31 | 32 | # Add any Sphinx extension module names here, as strings. They can be 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 34 | # ones. 35 | extensions = [ 36 | 'sphinx.ext.autodoc', 37 | 'sphinx.ext.autosummary', 38 | 'sphinx.ext.mathjax', 39 | 'sphinx.ext.viewcode', 40 | 'sphinx.ext.intersphinx', 41 | 'numpydoc' 42 | ] 43 | 44 | autosummary_generate = True 45 | napoleon_numpy_docstring = True 46 | 47 | mathjax_path = 'https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' 48 | 49 | html_context = { 50 | 'display_github': True, 51 | 'github_user': 'rmcgibbo', 52 | 'github_repo': 'covar', 53 | "conf_py_path": '/docs/', 54 | 'github_version': 'master', 55 | "source_suffix": '.rst', 56 | } 57 | 58 | intersphinx_mapping = {'sklearn': ('http://scikit-learn.org/stable/', None)} 59 | 60 | 61 | 62 | # Add any paths that contain templates here, relative to this directory. 63 | templates_path = ['_templates'] 64 | 65 | # The suffix(es) of source filenames. 66 | # You can specify multiple suffix as a list of string: 67 | # source_suffix = ['.rst', '.md'] 68 | source_suffix = '.rst' 69 | 70 | # The encoding of source files. 71 | #source_encoding = 'utf-8-sig' 72 | 73 | # The master toctree document. 74 | master_doc = 'index' 75 | 76 | # General information about the project. 77 | project = 'covar' 78 | copyright = '2015, Robert T. McGibbon' 79 | author = 'Robert T. McGibbon' 80 | 81 | # The version info for the project you're documenting, acts as replacement for 82 | # |version| and |release|, also used in various other places throughout the 83 | # built documents. 84 | # 85 | # The short X.Y version. 86 | version = '0.2' 87 | # The full version, including alpha/beta/rc tags. 88 | release = '0.2' 89 | 90 | # The language for content autogenerated by Sphinx. Refer to documentation 91 | # for a list of supported languages. 92 | # 93 | # This is also used if you do content translation via gettext catalogs. 94 | # Usually you set "language" from the command line for these cases. 95 | language = None 96 | 97 | # There are two options for replacing |today|: either, you set today to some 98 | # non-false value, then it is used: 99 | #today = '' 100 | # Else, today_fmt is used as the format for a strftime call. 101 | #today_fmt = '%B %d, %Y' 102 | 103 | # List of patterns, relative to source directory, that match files and 104 | # directories to ignore when looking for source files. 105 | exclude_patterns = ['_build', '_templates'] 106 | 107 | # The reST default role (used for this markup: `text`) to use for all 108 | # documents. 109 | #default_role = None 110 | 111 | # If true, '()' will be appended to :func: etc. cross-reference text. 112 | #add_function_parentheses = True 113 | 114 | # If true, the current module name will be prepended to all description 115 | # unit titles (such as .. function::). 116 | #add_module_names = True 117 | 118 | # If true, sectionauthor and moduleauthor directives will be shown in the 119 | # output. They are ignored by default. 120 | #show_authors = False 121 | 122 | # The name of the Pygments (syntax highlighting) style to use. 123 | pygments_style = 'sphinx' 124 | 125 | # A list of ignored prefixes for module index sorting. 126 | #modindex_common_prefix = [] 127 | 128 | # If true, keep warnings as "system message" paragraphs in the built documents. 129 | #keep_warnings = False 130 | 131 | # If true, `todo` and `todoList` produce output, else they produce nothing. 132 | todo_include_todos = False 133 | 134 | 135 | # -- Options for HTML output ---------------------------------------------- 136 | 137 | # The theme to use for HTML and HTML Help pages. See the documentation for 138 | # a list of builtin themes. 139 | html_theme = 'sphinx_rtd_theme' 140 | 141 | # Theme options are theme-specific and customize the look and feel of a theme 142 | # further. For a list of options available for each theme, see the 143 | # documentation. 144 | #html_theme_options = {} 145 | 146 | # Add any paths that contain custom themes here, relative to this directory. 147 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 148 | 149 | 150 | # The name for this set of Sphinx documents. If None, it defaults to 151 | # " v documentation". 152 | #html_title = None 153 | 154 | # A shorter title for the navigation bar. Default is the same as html_title. 155 | #html_short_title = None 156 | 157 | # The name of an image file (relative to this directory) to place at the top 158 | # of the sidebar. 159 | #html_logo = None 160 | 161 | # The name of an image file (within the static path) to use as favicon of the 162 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 163 | # pixels large. 164 | #html_favicon = None 165 | 166 | # Add any paths that contain custom static files (such as style sheets) here, 167 | # relative to this directory. They are copied after the builtin static files, 168 | # so a file named "default.css" will overwrite the builtin "default.css". 169 | html_static_path = ['_static'] 170 | 171 | # Add any extra paths that contain custom files (such as robots.txt or 172 | # .htaccess) here, relative to this directory. These files are copied 173 | # directly to the root of the documentation. 174 | #html_extra_path = [] 175 | 176 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 177 | # using the given strftime format. 178 | #html_last_updated_fmt = '%b %d, %Y' 179 | 180 | # If true, SmartyPants will be used to convert quotes and dashes to 181 | # typographically correct entities. 182 | #html_use_smartypants = True 183 | 184 | # Custom sidebar templates, maps document names to template names. 185 | #html_sidebars = {} 186 | 187 | # Additional templates that should be rendered to pages, maps page names to 188 | # template names. 189 | #html_additional_pages = {} 190 | 191 | # If false, no module index is generated. 192 | #html_domain_indices = True 193 | 194 | # If false, no index is generated. 195 | #html_use_index = True 196 | 197 | # If true, the index is split into individual pages for each letter. 198 | #html_split_index = False 199 | 200 | # If true, links to the reST sources are added to the pages. 201 | #html_show_sourcelink = True 202 | 203 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 204 | #html_show_sphinx = True 205 | 206 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 207 | #html_show_copyright = True 208 | 209 | # If true, an OpenSearch description file will be output, and all pages will 210 | # contain a tag referring to it. The value of this option must be the 211 | # base URL from which the finished HTML is served. 212 | #html_use_opensearch = '' 213 | 214 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 215 | #html_file_suffix = None 216 | 217 | # Language to be used for generating the HTML full-text search index. 218 | # Sphinx supports the following languages: 219 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' 220 | # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' 221 | #html_search_language = 'en' 222 | 223 | # A dictionary with options for the search language support, empty by default. 224 | # Now only 'ja' uses this config value 225 | #html_search_options = {'type': 'default'} 226 | 227 | # The name of a javascript file (relative to the configuration directory) that 228 | # implements a search results scorer. If empty, the default will be used. 229 | #html_search_scorer = 'scorer.js' 230 | 231 | # Output file base name for HTML help builder. 232 | htmlhelp_basename = 'covardoc' 233 | 234 | # -- Options for LaTeX output --------------------------------------------- 235 | 236 | latex_elements = { 237 | # The paper size ('letterpaper' or 'a4paper'). 238 | #'papersize': 'letterpaper', 239 | 240 | # The font size ('10pt', '11pt' or '12pt'). 241 | #'pointsize': '10pt', 242 | 243 | # Additional stuff for the LaTeX preamble. 244 | #'preamble': '', 245 | 246 | # Latex figure (float) alignment 247 | #'figure_align': 'htbp', 248 | } 249 | 250 | # Grouping the document tree into LaTeX files. List of tuples 251 | # (source start file, target name, title, 252 | # author, documentclass [howto, manual, or own class]). 253 | latex_documents = [ 254 | (master_doc, 'covar.tex', 'covar Documentation', 255 | 'Robert T. McGibbon', 'manual'), 256 | ] 257 | 258 | # The name of an image file (relative to this directory) to place at the top of 259 | # the title page. 260 | #latex_logo = None 261 | 262 | # For "manual" documents, if this is true, then toplevel headings are parts, 263 | # not chapters. 264 | #latex_use_parts = False 265 | 266 | # If true, show page references after internal links. 267 | #latex_show_pagerefs = False 268 | 269 | # If true, show URL addresses after external links. 270 | #latex_show_urls = False 271 | 272 | # Documents to append as an appendix to all manuals. 273 | #latex_appendices = [] 274 | 275 | # If false, no module index is generated. 276 | #latex_domain_indices = True 277 | 278 | 279 | # -- Options for manual page output --------------------------------------- 280 | 281 | # One entry per manual page. List of tuples 282 | # (source start file, name, description, authors, manual section). 283 | man_pages = [ 284 | (master_doc, 'covar', 'covar Documentation', 285 | [author], 1) 286 | ] 287 | 288 | # If true, show URL addresses after external links. 289 | #man_show_urls = False 290 | 291 | 292 | # -- Options for Texinfo output ------------------------------------------- 293 | 294 | # Grouping the document tree into Texinfo files. List of tuples 295 | # (source start file, target name, title, author, 296 | # dir menu entry, description, category) 297 | texinfo_documents = [ 298 | (master_doc, 'covar', 'covar Documentation', 299 | author, 'covar', 'One line description of project.', 300 | 'Miscellaneous'), 301 | ] 302 | 303 | # Documents to append as an appendix to all manuals. 304 | #texinfo_appendices = [] 305 | 306 | # If false, no module index is generated. 307 | #texinfo_domain_indices = True 308 | 309 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 310 | #texinfo_show_urls = 'footnote' 311 | 312 | # If true, do not generate a @detailmenu in the "Top" node's menu. 313 | #texinfo_no_detailmenu = False 314 | --------------------------------------------------------------------------------