42 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2015, Robert T. McGibbon
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 |
7 | * Redistributions of source code must retain the above copyright notice, this
8 | list of conditions and the following disclaimer.
9 |
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 | this list of conditions and the following disclaimer in the documentation
12 | and/or other materials provided with the distribution.
13 |
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 |
25 |
--------------------------------------------------------------------------------
/tests/test_2.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import os.path
3 | import numpy as np
4 | import scipy.stats
5 | import matplotlib
6 | matplotlib.use('Agg')
7 | matplotlib.rc('font',family='serif')
8 | import matplotlib.pyplot as plt
9 | from covar import cov_shrink_ss, cov_shrink_rblw
10 |
11 | DIRNAME = os.path.dirname(os.path.realpath(__file__))
12 |
13 |
14 | def test_1():
15 | random = np.random.RandomState(0)
16 | p = 100
17 | sigma = scipy.stats.wishart(scale=np.eye(p), seed=random).rvs()
18 | Ns = [int(x) for x in [p/10, p/2, 2*p, 10*p]]
19 | x = np.arange(p)
20 |
21 | plt.figure(figsize=(8,8))
22 |
23 | for i, N in enumerate(Ns):
24 | X = random.multivariate_normal(mean=np.zeros(p), cov=sigma, size=N)
25 | S1 = np.cov(X.T)
26 | S2 = cov_shrink_ss(X)[0]
27 | S3 = cov_shrink_rblw(np.cov(X.T), len(X))[0]
28 |
29 | plt.subplot(3,2,i+1)
30 | plt.title('p/n = %.1f' % (p/N))
31 |
32 | plt.plot(x, sorted(np.linalg.eigvalsh(S2), reverse=True), 'b', lw=2, label='cov_shrink_ss')
33 | plt.plot(x, sorted(np.linalg.eigvalsh(S3), reverse=True), 'g', alpha=0.7, lw=2, label='cov_shrink_rblw')
34 | plt.plot(x, sorted(np.linalg.eigvalsh(sigma), reverse=True), 'k--', lw=2, label='true')
35 | plt.plot(x, sorted(np.linalg.eigvalsh(S1), reverse=True), 'r--', lw=2, label='sample covariance')
36 |
37 | if i == 1:
38 | plt.legend(fontsize=10)
39 |
40 | # plt.ylim(max(plt.ylim()[0], 1e-4), plt.ylim()[1])
41 | plt.figtext(.05, .05,
42 | """Ordered eigenvalues of the sample covariance matrix (red),
43 | cov_shrink_ss()-estimated covariance matrix (blue),
44 | cov_shrink_rblw()-estimated covariance matrix (green), and
45 | true eigenvalues (dashed black). The data generated by sampling
46 | from a p-variate normal distribution for p=100 and various
47 | ratios of p/n. Note that for the larger value of p/n, the
48 | cov_shrink_rblw() estimator is identical to the sample
49 | covariance matrix.""")
50 |
51 | # plt.yscale('log')
52 | plt.ylabel('Eigenvalue')
53 |
54 | plt.tight_layout()
55 | plt.savefig('%s/test_2.png' % DIRNAME, dpi=300)
56 |
57 |
58 |
59 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | PAPER =
8 | BUILDDIR = _build
9 |
10 | # User-friendly check for sphinx-build
11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
13 | endif
14 |
15 | # Internal variables.
16 | PAPEROPT_a4 = -D latex_paper_size=a4
17 | PAPEROPT_letter = -D latex_paper_size=letter
18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
19 | # the i18n builder cannot share the environment and doctrees with the others
20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
21 |
22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext
23 |
24 | help:
25 | @echo "Please use \`make ' where is one of"
26 | @echo " html to make standalone HTML files"
27 | @echo " dirhtml to make HTML files named index.html in directories"
28 | @echo " singlehtml to make a single large HTML file"
29 | @echo " pickle to make pickle files"
30 | @echo " json to make JSON files"
31 | @echo " htmlhelp to make HTML files and a HTML help project"
32 | @echo " qthelp to make HTML files and a qthelp project"
33 | @echo " applehelp to make an Apple Help Book"
34 | @echo " devhelp to make HTML files and a Devhelp project"
35 | @echo " epub to make an epub"
36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
37 | @echo " latexpdf to make LaTeX files and run them through pdflatex"
38 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
39 | @echo " text to make text files"
40 | @echo " man to make manual pages"
41 | @echo " texinfo to make Texinfo files"
42 | @echo " info to make Texinfo files and run them through makeinfo"
43 | @echo " gettext to make PO message catalogs"
44 | @echo " changes to make an overview of all changed/added/deprecated items"
45 | @echo " xml to make Docutils-native XML files"
46 | @echo " pseudoxml to make pseudoxml-XML files for display purposes"
47 | @echo " linkcheck to check all external links for integrity"
48 | @echo " doctest to run all doctests embedded in the documentation (if enabled)"
49 | @echo " coverage to run coverage check of the documentation (if enabled)"
50 |
51 | clean:
52 | rm -rf $(BUILDDIR)/*
53 |
54 | html:
55 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
56 | @echo
57 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
58 |
59 | dirhtml:
60 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
61 | @echo
62 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
63 |
64 | singlehtml:
65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
66 | @echo
67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
68 |
69 | pickle:
70 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
71 | @echo
72 | @echo "Build finished; now you can process the pickle files."
73 |
74 | json:
75 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
76 | @echo
77 | @echo "Build finished; now you can process the JSON files."
78 |
79 | htmlhelp:
80 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
81 | @echo
82 | @echo "Build finished; now you can run HTML Help Workshop with the" \
83 | ".hhp project file in $(BUILDDIR)/htmlhelp."
84 |
85 | qthelp:
86 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
87 | @echo
88 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \
89 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
90 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/covar.qhcp"
91 | @echo "To view the help file:"
92 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/covar.qhc"
93 |
94 | applehelp:
95 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
96 | @echo
97 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
98 | @echo "N.B. You won't be able to view it unless you put it in" \
99 | "~/Library/Documentation/Help or install it in your application" \
100 | "bundle."
101 |
102 | devhelp:
103 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
104 | @echo
105 | @echo "Build finished."
106 | @echo "To view the help file:"
107 | @echo "# mkdir -p $$HOME/.local/share/devhelp/covar"
108 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/covar"
109 | @echo "# devhelp"
110 |
111 | epub:
112 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
113 | @echo
114 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
115 |
116 | latex:
117 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
118 | @echo
119 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
120 | @echo "Run \`make' in that directory to run these through (pdf)latex" \
121 | "(use \`make latexpdf' here to do that automatically)."
122 |
123 | latexpdf:
124 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
125 | @echo "Running LaTeX files through pdflatex..."
126 | $(MAKE) -C $(BUILDDIR)/latex all-pdf
127 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
128 |
129 | latexpdfja:
130 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
131 | @echo "Running LaTeX files through platex and dvipdfmx..."
132 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
133 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
134 |
135 | text:
136 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
137 | @echo
138 | @echo "Build finished. The text files are in $(BUILDDIR)/text."
139 |
140 | man:
141 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
142 | @echo
143 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
144 |
145 | texinfo:
146 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
147 | @echo
148 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
149 | @echo "Run \`make' in that directory to run these through makeinfo" \
150 | "(use \`make info' here to do that automatically)."
151 |
152 | info:
153 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
154 | @echo "Running Texinfo files through makeinfo..."
155 | make -C $(BUILDDIR)/texinfo info
156 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
157 |
158 | gettext:
159 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
160 | @echo
161 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
162 |
163 | changes:
164 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
165 | @echo
166 | @echo "The overview file is in $(BUILDDIR)/changes."
167 |
168 | linkcheck:
169 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
170 | @echo
171 | @echo "Link check complete; look for any errors in the above output " \
172 | "or in $(BUILDDIR)/linkcheck/output.txt."
173 |
174 | doctest:
175 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
176 | @echo "Testing of doctests in the sources finished, look at the " \
177 | "results in $(BUILDDIR)/doctest/output.txt."
178 |
179 | coverage:
180 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
181 | @echo "Testing of coverage in the sources finished, look at the " \
182 | "results in $(BUILDDIR)/coverage/python.txt."
183 |
184 | xml:
185 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
186 | @echo
187 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
188 |
189 | pseudoxml:
190 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
191 | @echo
192 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
193 |
--------------------------------------------------------------------------------
/covar.pyx:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | cimport cython
3 | from scipy.linalg.cython_blas cimport dgemm
4 |
5 |
6 | @cython.boundscheck(False)
7 | @cython.wraparound(False)
8 | @cython.cdivision(True)
9 | def cov_shrink_ss(const double[:, ::1] X, shrinkage=None):
10 | r"""Compute a shrinkage estimate of the covariance matrix using
11 | the Schafer and Strimmer (2005) method.
12 |
13 | Parameters
14 | ----------
15 | X : array, shape=(n, p)
16 | Data matrix. Each row represents a data point, and each column
17 | represents a feature.
18 | shrinkage : float, optional
19 | The covariance shrinkage intensity (range 0-1). If shrinkage is not
20 | specified (the default) it is estimated using an analytic formula
21 | from Schafer and Strimmer (2005). For ``shrinkage=0`` the empirical
22 | correlations are recovered.
23 |
24 | Returns
25 | -------
26 | cov : array, shape=(p, p)
27 | Estimated covariance matrix of the data.
28 | shrinkage : float
29 | The applied covariance shrinkage intensity.
30 |
31 | References
32 | ----------
33 | .. [1] Schafer, J., and K. Strimmer. 2005. A shrinkage approach to
34 | large-scale covariance estimation and implications for functional
35 | genomics. Statist. Appl. Genet. Mol. Biol. 4:32.
36 | http://doi.org/10.2202/1544-6115.1175
37 |
38 | Notes
39 | -----
40 | This shrinkage estimator corresponds to "Target D": (diagonal, unequal
41 | variance) as described in [1]. The estimator takes the form
42 |
43 | .. math::
44 | \hat{\Sigma} = (1-\gamma) \Sigma_{sample} + \gamma T,
45 |
46 | where :math:`\Sigma^{sample}` is the (noisy but unbiased) empirical
47 | covariance matrix,
48 |
49 | .. math::
50 | \Sigma^{sample}_{ij} = \frac{1}{n-1} \sum_{k=1}^n
51 | (x_{ki} - \bar{x}_i)(x_{kj} - \bar{x}_j),
52 |
53 | the matrix :math:`T` is the shrinkage target, a less noisy but biased
54 | estimator for the covariance, and the scalar :math:`\gamma \in [0, 1]` is
55 | the shrinkage intensity (regularization strength). This approaches uses a
56 | diagonal shrinkage target, :math:`T`:
57 |
58 | .. math::
59 | T_{ij} = \begin{cases}
60 | \Sigma^{sample}_{ii} &\text{ if } i = j\\
61 | 0 &\text{ otherwise},
62 | \end{cases}
63 |
64 | The idea is that by taking a weighted average of these two estimators, we
65 | can get a combined estimator which is more accurate than either is
66 | individually, especially when :math:`p` is large. The optimal weighting,
67 | :math:`\gamma`, is determined **automatically** by minimizing the mean
68 | squared error. See [1] for details on how this can be done. The formula
69 | for :math:`\gamma` is
70 |
71 | .. math::
72 | \gamma = \frac{\sum_{i \neq j} \hat{Var}(r_{ij})}{\sum_{i \neq j} r^2_{ij}}
73 |
74 | where :math:`r` is the sample correlation matrix,
75 |
76 | .. math::
77 | r_{ij} = \frac{\Sigma^{sample}_{ij}}{\sigma_i \sigma_j},
78 |
79 | and :math:`\hat{Var}(r_{ij})` is given by
80 |
81 | .. math::
82 | \hat{Var}(r_{ij}) = \frac{n}{(n-1)^3 \sigma_i^2 \sigma_j^2} \sum_{k=1}^n
83 | (w_{kij} - \bar{w}_{ij})^2,
84 |
85 | with :math:`w_{kij} = (x_{ki} - \bar{x}_i)(x_{kj} - \bar{x}_j)`, and
86 | :math:`\bar{w}_{ij} = \frac{1}{n}\sum_{k=1}^n w_{kij}`.
87 |
88 | This method is equivalent to the ``cov.shrink`` method in the R package
89 | ``corpcor``, if the argument ``lambda.var`` is set to ``0``. See
90 | https://cran.r-project.org/web/packages/corpcor/ for details.
91 |
92 | See Also
93 | --------
94 | cov_shrink_rblw : similar method, using a different shrinkage target,
95 | :math:`T`.
96 | sklearn.covariance.ledoit_wolf : very similar approach, but uses a different
97 | shrinkage target, :math:`T`.
98 | """
99 | cdef int n, p, i, j, k
100 | n, p = X.shape[0], X.shape[1]
101 |
102 | cdef double gamma_num = 0
103 | cdef double gamma_den = 0
104 | cdef double s_ij, s_ii, gamma
105 |
106 | cdef double[::1] X_mean = np.mean(X, axis=0)
107 | cdef double[::1] X_std = np.std(X, axis=0)
108 | cdef double[:, ::1] X_meaned = np.empty_like(X)
109 | cdef double[:, ::1] w_ij_bar = np.zeros((p, p))
110 | cdef double[:, ::1] r = np.zeros((p, p))
111 | cdef double[:, ::1] var_r = np.zeros((p, p))
112 | cdef double[:, ::1] out = np.zeros((p, p))
113 |
114 | for i in range(n):
115 | for j in range(p):
116 | X_meaned[i, j] = X[i, j] - X_mean[j]
117 |
118 | cy_dgemm_TN(X_meaned, X_meaned, w_ij_bar, 1.0/n)
119 |
120 | if shrinkage is not None:
121 | gamma = max(0.0, min(1.0, float(shrinkage)))
122 | else:
123 | for i in range(p):
124 | for j in range(p):
125 | r[i, j] = (n / ((n - 1.0) * X_std[i] * X_std[j])) * w_ij_bar[i, j]
126 |
127 | for k in range(n):
128 | for i in range(p):
129 | for j in range(p):
130 | var_r[i,j] += (X_meaned[k,i]*X_meaned[k,j] - w_ij_bar[i,j])**2
131 |
132 |
133 | for i in range(p):
134 | for j in range(p):
135 | var_r[i,j] *= (n / ((n-1.0)**3 * X_std[i]*X_std[i]*X_std[j]*X_std[j]))
136 |
137 |
138 | for i in range(p):
139 | for j in range(p):
140 | if i != j:
141 | gamma_num += var_r[i,j]
142 | gamma_den += r[i,j]**2
143 |
144 | gamma = max(0, min(1, gamma_num / gamma_den))
145 |
146 | for i in range(p):
147 | for j in range(p):
148 | s_ij = (n / (n-1.0)) * w_ij_bar[i, j]
149 | out[i, j] = (1.0-gamma) * s_ij
150 | if i == j:
151 | out[i, i] += gamma * s_ij
152 | if out[i, j] == -0:
153 | out[i, j] = 0
154 |
155 | return np.asarray(out), gamma
156 |
157 |
158 | @cython.boundscheck(False)
159 | @cython.wraparound(False)
160 | @cython.cdivision(True)
161 | def cov_shrink_rblw(const double[:, ::1] S, int n, shrinkage=None):
162 | r"""Compute a shrinkage estimate of the covariance matrix using
163 | the Rao-Blackwellized Ledoit-Wolf estimator described by Chen et al.
164 |
165 | Parameters
166 | ----------
167 | S : array, shape=(n, n)
168 | Sample covariance matrix (e.g. estimated with np.cov(X.T))
169 | n : int
170 | Number of data points used in the estimate of S.
171 | shrinkage : float, optional
172 | The covariance shrinkage intensity (range 0-1). If shrinkage is not
173 | specified (the default) it is estimated using an analytic formula
174 | from Chen et al. (2009).
175 |
176 | Returns
177 | -------
178 | sigma : array, shape=(p, p)
179 | Estimated shrunk covariance matrix
180 | shrinkage : float
181 | The applied covariance shrinkage intensity.
182 |
183 | Notes
184 | -----
185 | This shrinkage estimator takes the form
186 |
187 | .. math::
188 | \hat{\Sigma} = (1-\gamma) \Sigma_{sample} + \gamma T
189 |
190 | where :math:`\Sigma^{sample}` is the (noisy but unbiased) empirical
191 | covariance matrix,
192 |
193 | .. math::
194 | \Sigma^{sample}_{ij} = \frac{1}{n-1} \sum_{k=1}^n
195 | (x_{ki} - \bar{x}_i)(x_{kj} - \bar{x}_j),
196 |
197 | the matrix :math:`T` is the shrinkage target, a less noisy but biased
198 | estimator for the covariance, and the scalar :math:`\gamma \in [0, 1]` is
199 | the shrinkage intensity (regularization strength). This approaches uses a
200 | scaled identity target, :math:`T`:
201 |
202 | .. math::
203 | T = \frac{\mathrm{Tr}(S)}{p} I_p
204 |
205 | The shrinkage intensity, :math:`\gamma`, is determined using the RBLW
206 | estimator from [2]. The formula for :math:`\gamma` is
207 |
208 | .. math::
209 | \gamma = \min(\alpha + \frac{\beta}{U})
210 |
211 | where :math:`\alpha`, :math:`\beta`, and :math:`U` are
212 |
213 | .. math::
214 | \alpha &= \frac{n-2}{n(n+2)} \\
215 | \beta &= \frac{(p+1)n - 2}{n(n+2)} \\
216 | U &= \frac{p\, \mathrm{Tr}(S^2)}{\mathrm{Tr}^2(S)} - 1
217 |
218 | One particularly useful property of this estimator is that it's **very
219 | fast**, because it doesn't require access to the data matrix at all (unlike
220 | :func:`cov_shrink_ss`). It only requires the sample covariance matrix
221 | and the number of data points `n`, as sufficient statistics.
222 |
223 | For reference, note that [2] defines another estimator, called the oracle
224 | approximating shrinkage estimator (OAS), but makes some mathematical errors
225 | during the derivation, and futhermore their example code published with
226 | the paper does not implement the proposed formulas.
227 |
228 | References
229 | ----------
230 | .. [2] Chen, Yilun, Ami Wiesel, and Alfred O. Hero III. "Shrinkage
231 | estimation of high dimensional covariance matrices" ICASSP (2009)
232 | http://doi.org/10.1109/ICASSP.2009.4960239
233 |
234 | See Also
235 | --------
236 | cov_shrink_ss : similar method, using a different shrinkage target, :math:`T`.
237 | sklearn.covariance.ledoit_wolf : very similar approach using the same
238 | shrinkage target, :math:`T`, but a different method for estimating the
239 | shrinkage intensity, :math:`gamma`.
240 |
241 | """
242 | cdef int i, j
243 | cdef int p = S.shape[0]
244 | if S.shape[1] != p:
245 | raise ValueError('S must be a (p x p) matrix')
246 |
247 | cdef double alpha = (n-2)/(n*(n+2))
248 | cdef double beta = ((p+1)*n - 2) / (n*(n+2))
249 |
250 | cdef double trace_S # np.trace(S)
251 | cdef double trace_S2 = 0 # np.trace(S.dot(S))
252 | for i in range(p):
253 | trace_S += S[i,i]
254 | for j in range(p):
255 | trace_S2 += S[i,j]*S[i,j]
256 |
257 | cdef double U = ((p * trace_S2 / (trace_S*trace_S)) - 1)
258 | cdef double rho = min(alpha + beta/U, 1)
259 |
260 | F = (trace_S / p) * np.eye(p)
261 | return (1-rho)*np.asarray(S) + rho*F, rho
262 |
263 |
264 |
265 | ############################# Private utilities #############################
266 |
267 | @cython.boundscheck(False)
268 | cdef inline int cy_dgemm_TN(double[:, ::1] a, double[:, ::1] b, double[:, ::1] c, double alpha=1.0, double beta=0.0) nogil:
269 | """C = beta*C + alpha*dot(A.T, B)
270 | """
271 | cdef int m, k, n
272 | m = a.shape[1]
273 | k = a.shape[0]
274 | n = b.shape[1]
275 | if a.shape[0] != b.shape[0] or a.shape[1] != c.shape[0] or b.shape[1] != c.shape[1]:
276 | return -1
277 |
278 | dgemm("N", "T", &n, &m, &k, &alpha, &b[0,0], &n, &a[0,0], &m, &beta, &c[0,0], &n)
279 | return 0
280 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # covar documentation build configuration file, created by
5 | # sphinx-quickstart on Wed Sep 30 19:48:28 2015.
6 | #
7 | # This file is execfile()d with the current directory set to its
8 | # containing dir.
9 | #
10 | # Note that not all possible configuration values are present in this
11 | # autogenerated file.
12 | #
13 | # All configuration values have a default; values that are commented out
14 | # serve to show the default.
15 |
16 | import sys
17 | import os
18 | import shlex
19 | import sphinx_rtd_theme
20 |
21 |
22 | # If extensions (or modules to document with autodoc) are in another directory,
23 | # add these directories to sys.path here. If the directory is relative to the
24 | # documentation root, use os.path.abspath to make it absolute, like shown here.
25 | #sys.path.insert(0, os.path.abspath('.'))
26 |
27 | # -- General configuration ------------------------------------------------
28 |
29 | # If your documentation needs a minimal Sphinx version, state it here.
30 | # needs_sphinx = '1.3'
31 |
32 | # Add any Sphinx extension module names here, as strings. They can be
33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
34 | # ones.
35 | extensions = [
36 | 'sphinx.ext.autodoc',
37 | 'sphinx.ext.autosummary',
38 | 'sphinx.ext.mathjax',
39 | 'sphinx.ext.viewcode',
40 | 'sphinx.ext.intersphinx',
41 | 'numpydoc'
42 | ]
43 |
44 | autosummary_generate = True
45 | napoleon_numpy_docstring = True
46 |
47 | mathjax_path = 'https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'
48 |
49 | html_context = {
50 | 'display_github': True,
51 | 'github_user': 'rmcgibbo',
52 | 'github_repo': 'covar',
53 | "conf_py_path": '/docs/',
54 | 'github_version': 'master',
55 | "source_suffix": '.rst',
56 | }
57 |
58 | intersphinx_mapping = {'sklearn': ('http://scikit-learn.org/stable/', None)}
59 |
60 |
61 |
62 | # Add any paths that contain templates here, relative to this directory.
63 | templates_path = ['_templates']
64 |
65 | # The suffix(es) of source filenames.
66 | # You can specify multiple suffix as a list of string:
67 | # source_suffix = ['.rst', '.md']
68 | source_suffix = '.rst'
69 |
70 | # The encoding of source files.
71 | #source_encoding = 'utf-8-sig'
72 |
73 | # The master toctree document.
74 | master_doc = 'index'
75 |
76 | # General information about the project.
77 | project = 'covar'
78 | copyright = '2015, Robert T. McGibbon'
79 | author = 'Robert T. McGibbon'
80 |
81 | # The version info for the project you're documenting, acts as replacement for
82 | # |version| and |release|, also used in various other places throughout the
83 | # built documents.
84 | #
85 | # The short X.Y version.
86 | version = '0.2'
87 | # The full version, including alpha/beta/rc tags.
88 | release = '0.2'
89 |
90 | # The language for content autogenerated by Sphinx. Refer to documentation
91 | # for a list of supported languages.
92 | #
93 | # This is also used if you do content translation via gettext catalogs.
94 | # Usually you set "language" from the command line for these cases.
95 | language = None
96 |
97 | # There are two options for replacing |today|: either, you set today to some
98 | # non-false value, then it is used:
99 | #today = ''
100 | # Else, today_fmt is used as the format for a strftime call.
101 | #today_fmt = '%B %d, %Y'
102 |
103 | # List of patterns, relative to source directory, that match files and
104 | # directories to ignore when looking for source files.
105 | exclude_patterns = ['_build', '_templates']
106 |
107 | # The reST default role (used for this markup: `text`) to use for all
108 | # documents.
109 | #default_role = None
110 |
111 | # If true, '()' will be appended to :func: etc. cross-reference text.
112 | #add_function_parentheses = True
113 |
114 | # If true, the current module name will be prepended to all description
115 | # unit titles (such as .. function::).
116 | #add_module_names = True
117 |
118 | # If true, sectionauthor and moduleauthor directives will be shown in the
119 | # output. They are ignored by default.
120 | #show_authors = False
121 |
122 | # The name of the Pygments (syntax highlighting) style to use.
123 | pygments_style = 'sphinx'
124 |
125 | # A list of ignored prefixes for module index sorting.
126 | #modindex_common_prefix = []
127 |
128 | # If true, keep warnings as "system message" paragraphs in the built documents.
129 | #keep_warnings = False
130 |
131 | # If true, `todo` and `todoList` produce output, else they produce nothing.
132 | todo_include_todos = False
133 |
134 |
135 | # -- Options for HTML output ----------------------------------------------
136 |
137 | # The theme to use for HTML and HTML Help pages. See the documentation for
138 | # a list of builtin themes.
139 | html_theme = 'sphinx_rtd_theme'
140 |
141 | # Theme options are theme-specific and customize the look and feel of a theme
142 | # further. For a list of options available for each theme, see the
143 | # documentation.
144 | #html_theme_options = {}
145 |
146 | # Add any paths that contain custom themes here, relative to this directory.
147 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
148 |
149 |
150 | # The name for this set of Sphinx documents. If None, it defaults to
151 | # " v documentation".
152 | #html_title = None
153 |
154 | # A shorter title for the navigation bar. Default is the same as html_title.
155 | #html_short_title = None
156 |
157 | # The name of an image file (relative to this directory) to place at the top
158 | # of the sidebar.
159 | #html_logo = None
160 |
161 | # The name of an image file (within the static path) to use as favicon of the
162 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
163 | # pixels large.
164 | #html_favicon = None
165 |
166 | # Add any paths that contain custom static files (such as style sheets) here,
167 | # relative to this directory. They are copied after the builtin static files,
168 | # so a file named "default.css" will overwrite the builtin "default.css".
169 | html_static_path = ['_static']
170 |
171 | # Add any extra paths that contain custom files (such as robots.txt or
172 | # .htaccess) here, relative to this directory. These files are copied
173 | # directly to the root of the documentation.
174 | #html_extra_path = []
175 |
176 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
177 | # using the given strftime format.
178 | #html_last_updated_fmt = '%b %d, %Y'
179 |
180 | # If true, SmartyPants will be used to convert quotes and dashes to
181 | # typographically correct entities.
182 | #html_use_smartypants = True
183 |
184 | # Custom sidebar templates, maps document names to template names.
185 | #html_sidebars = {}
186 |
187 | # Additional templates that should be rendered to pages, maps page names to
188 | # template names.
189 | #html_additional_pages = {}
190 |
191 | # If false, no module index is generated.
192 | #html_domain_indices = True
193 |
194 | # If false, no index is generated.
195 | #html_use_index = True
196 |
197 | # If true, the index is split into individual pages for each letter.
198 | #html_split_index = False
199 |
200 | # If true, links to the reST sources are added to the pages.
201 | #html_show_sourcelink = True
202 |
203 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
204 | #html_show_sphinx = True
205 |
206 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
207 | #html_show_copyright = True
208 |
209 | # If true, an OpenSearch description file will be output, and all pages will
210 | # contain a tag referring to it. The value of this option must be the
211 | # base URL from which the finished HTML is served.
212 | #html_use_opensearch = ''
213 |
214 | # This is the file name suffix for HTML files (e.g. ".xhtml").
215 | #html_file_suffix = None
216 |
217 | # Language to be used for generating the HTML full-text search index.
218 | # Sphinx supports the following languages:
219 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
220 | # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr'
221 | #html_search_language = 'en'
222 |
223 | # A dictionary with options for the search language support, empty by default.
224 | # Now only 'ja' uses this config value
225 | #html_search_options = {'type': 'default'}
226 |
227 | # The name of a javascript file (relative to the configuration directory) that
228 | # implements a search results scorer. If empty, the default will be used.
229 | #html_search_scorer = 'scorer.js'
230 |
231 | # Output file base name for HTML help builder.
232 | htmlhelp_basename = 'covardoc'
233 |
234 | # -- Options for LaTeX output ---------------------------------------------
235 |
236 | latex_elements = {
237 | # The paper size ('letterpaper' or 'a4paper').
238 | #'papersize': 'letterpaper',
239 |
240 | # The font size ('10pt', '11pt' or '12pt').
241 | #'pointsize': '10pt',
242 |
243 | # Additional stuff for the LaTeX preamble.
244 | #'preamble': '',
245 |
246 | # Latex figure (float) alignment
247 | #'figure_align': 'htbp',
248 | }
249 |
250 | # Grouping the document tree into LaTeX files. List of tuples
251 | # (source start file, target name, title,
252 | # author, documentclass [howto, manual, or own class]).
253 | latex_documents = [
254 | (master_doc, 'covar.tex', 'covar Documentation',
255 | 'Robert T. McGibbon', 'manual'),
256 | ]
257 |
258 | # The name of an image file (relative to this directory) to place at the top of
259 | # the title page.
260 | #latex_logo = None
261 |
262 | # For "manual" documents, if this is true, then toplevel headings are parts,
263 | # not chapters.
264 | #latex_use_parts = False
265 |
266 | # If true, show page references after internal links.
267 | #latex_show_pagerefs = False
268 |
269 | # If true, show URL addresses after external links.
270 | #latex_show_urls = False
271 |
272 | # Documents to append as an appendix to all manuals.
273 | #latex_appendices = []
274 |
275 | # If false, no module index is generated.
276 | #latex_domain_indices = True
277 |
278 |
279 | # -- Options for manual page output ---------------------------------------
280 |
281 | # One entry per manual page. List of tuples
282 | # (source start file, name, description, authors, manual section).
283 | man_pages = [
284 | (master_doc, 'covar', 'covar Documentation',
285 | [author], 1)
286 | ]
287 |
288 | # If true, show URL addresses after external links.
289 | #man_show_urls = False
290 |
291 |
292 | # -- Options for Texinfo output -------------------------------------------
293 |
294 | # Grouping the document tree into Texinfo files. List of tuples
295 | # (source start file, target name, title, author,
296 | # dir menu entry, description, category)
297 | texinfo_documents = [
298 | (master_doc, 'covar', 'covar Documentation',
299 | author, 'covar', 'One line description of project.',
300 | 'Miscellaneous'),
301 | ]
302 |
303 | # Documents to append as an appendix to all manuals.
304 | #texinfo_appendices = []
305 |
306 | # If false, no module index is generated.
307 | #texinfo_domain_indices = True
308 |
309 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
310 | #texinfo_show_urls = 'footnote'
311 |
312 | # If true, do not generate a @detailmenu in the "Top" node's menu.
313 | #texinfo_no_detailmenu = False
314 |
--------------------------------------------------------------------------------