├── .gitignore
├── DEPENDENCIES.txt
├── LICENSE
├── README.md
├── TODO.txt
├── copula_compatibility_problem.py
├── copulacdf.py
├── copulafit.py
├── copulamnsig.py
├── copulapdf.py
├── copularnd.py
├── copulastat.py
├── cvolume.py
├── debye.py
├── ecdf.py
├── invcopulastat.py
├── kde.py
├── matlab
    ├── copulacdf_test.m
    ├── copulacdf_test.mat
    ├── copulapdf_test.m
    ├── copulapdf_test.mat
    ├── copulastat_test.m
    └── copulastat_test.mat
├── multivariate_stats.py
├── plot_utils.py
└── rstable1.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | lib/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | 
25 | # PyInstaller
26 | #  Usually these files are written by a python script from a template
27 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
28 | *.manifest
29 | *.spec
30 | 
31 | # Installer logs
32 | pip-log.txt
33 | pip-delete-this-directory.txt
34 | 
35 | # Unit test / coverage reports
36 | htmlcov/
37 | .tox/
38 | .coverage
39 | .cache
40 | nosetests.xml
41 | coverage.xml
42 | 
43 | # Translations
44 | *.mo
45 | *.pot
46 | 
47 | # Django stuff:
48 | *.log
49 | 
50 | # Sphinx documentation
51 | docs/_build/
52 | 
53 | # PyBuilder
54 | target/
55 | 


--------------------------------------------------------------------------------
/DEPENDENCIES.txt:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | #******************************************************************************
 5 | #* 
 6 | #* Copyright (C) 2015  Kiran Karra <kiran.karra@gmail.com>
 7 | #*
 8 | #* This program is free software: you can redistribute it and/or modify
 9 | #* it under the terms of the GNU General Public License as published by
10 | #* the Free Software Foundation, either version 3 of the License, or
11 | #* (at your option) any later version.
12 | #*
13 | #* This program is distributed in the hope that it will be useful,
14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | #* GNU General Public License for more details.
17 | #*
18 | #* You should have received a copy of the GNU General Public License
19 | #* along with this program.  If not, see <http://www.gnu.org/licenses/>.
20 | #******************************************************************************
21 | 
22 | This file describes the dependencies required to run all of the code:
23 |   - scipy
24 |   - numpy
25 |   - matplotlib
26 |   - statsmodels
27 |   - pandas


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     {one line to give the program's name and a brief idea of what it does.}
635 |     Copyright (C) {year}  {name of author}
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     {project}  Copyright (C) {year}  {fullname}
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <http://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <http://www.gnu.org/philosophy/why-not-lgpl.html>.
675 | 
676 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # copula-bayesian-networks
2 | Code for implementing Copula Bayesian Networks
3 | 


--------------------------------------------------------------------------------
/TODO.txt:
--------------------------------------------------------------------------------
 1 | copulacdf.py
 2 |   [ ] - t copula
 3 |   
 4 | copularnd.py
 5 |   [ ] - t copula
 6 |   
 7 | copulafit.py
 8 |   [ ] - t copula PKTE estimation
 9 |   [ ] - MLE estimation for all copula types
10 |   [ ] - AMLE estimation for all copula types 
11 |   
12 | copulapdf.py
13 |   [ ] - 
14 |   
15 | copulastat.py
16 |   [ ] - t copula
17 |   
18 | invcopulastat.py
19 |   [ ] - t copula
20 |   
21 | copulamnsig.py
22 |   [ ] - a first order test on empirical multinomial signature generation


--------------------------------------------------------------------------------
/copula_compatibility_problem.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | #******************************************************************************
 5 | #* 
 6 | #* Copyright (C) 2015  Kiran Karra <kiran.karra@gmail.com>
 7 | #*
 8 | #* This program is free software: you can redistribute it and/or modify
 9 | #* it under the terms of the GNU General Public License as published by
10 | #* the Free Software Foundation, either version 3 of the License, or
11 | #* (at your option) any later version.
12 | #*
13 | #* This program is distributed in the hope that it will be useful,
14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | #* GNU General Public License for more details.
17 | #*
18 | #* You should have received a copy of the GNU General Public License
19 | #* along with this program.  If not, see <http://www.gnu.org/licenses/>.
20 | #******************************************************************************
21 | 
22 | import math
23 | import numpy as np
24 | 
25 | from scipy.stats import mvn                     # contains inverse CDF of Multivariate Gaussian
26 | from scipy.stats import norm                    # contains PDF of Gaussian
27 | 
28 | import copulacdf
29 | import plot_utils
30 | import matplotlib.pyplot as plt
31 | 
32 | """
33 | This file showcases what is known as the compatibility problem with copula's.
34 | """
35 | 
36 | 
37 | """
38 | Here, we showcase the compatibility problem by using a 3-Copula and calculating
39 | the two marginal's.
40 | """
41 | def ex1():
42 |     n = 10
43 |     eps = np.finfo(float).eps
44 |     u = np.linspace(0+eps,1-eps,n)
45 |     UU = np.meshgrid(u,u)
46 |     U2 = np.reshape(UU[0], (UU[0].shape[0]*UU[0].shape[1], 1))
47 |     U3 = np.reshape(UU[1], (UU[1].shape[0]*UU[1].shape[1], 1))
48 |     U1 = np.ones(U2.shape)*(1-eps)
49 |     U = np.concatenate((U1,U2,U3),axis=1)
50 |     
51 |     R1 = np.array([[1,0,0],[0,1,0],[0,0,1]])
52 |     R2 = np.array([[1,0.6,-0.3],[0.6,1,0.4],[-0.3,0.4,1]])
53 |     
54 |     C1_twomarginal = copulacdf.copulacdf('Gaussian',U,R1)
55 |     C2_twomarginal = copulacdf.copulacdf('Gaussian',U,R2)
56 |     
57 |     #X = UU[0]
58 |     #Y = UU[1]
59 |     #Z = np.reshape(C1_twomarginal,UU[0].shape)
60 |     #plot_utils.plot_3d(X,Y,Z, 'C1 Two-Marginal')
61 |     
62 |     # compute error between C1_twomarginal and C2_twomarginal
63 |     sq_err_vec = (C2_twomarginal-C1_twomarginal)**2
64 |     
65 |     X = UU[0]
66 |     Y = UU[1]
67 |     Z = np.reshape(sq_err_vec,UU[0].shape)
68 |     plot_utils.plot_3d(X,Y,Z, 'Two-Marginal Error')
69 |     
70 | def ex2():
71 |     n = 10
72 |     eps = np.finfo(float).eps
73 |     u = np.linspace(0+eps,1-eps,n)
74 |     UU = np.meshgrid(u,u)
75 |     U3 = np.reshape(UU[1], (UU[1].shape[0]*UU[1].shape[1], 1))
76 |     U1 = np.ones(U3.shape)*(1-eps)
77 |     U2 = np.ones(U3.shape)*(1-eps)
78 |     U = np.concatenate((U1,U2,U3),axis=1)
79 |     
80 |     R1 = np.array([[1,0,0],[0,1,0],[0,0,1]])
81 |     R2 = np.array([[1,0.6,-0.3],[0.6,1,0.4],[-0.3,0.4,1]])
82 |     
83 |     C1_onemarginal = copulacdf.copulacdf('Gaussian',U,R1)
84 |     C2_onemarginal = copulacdf.copulacdf('Gaussian',U,R2)
85 |     sq_err_vec = (C2_onemarginal-C1_onemarginal)**2
86 |     
87 |     X = UU[0]
88 |     Y = UU[1]
89 |     Z = np.reshape(sq_err_vec,UU[0].shape)
90 |     plot_utils.plot_3d(X,Y,Z, 'One-Margin Error')
91 | 
92 | if __name__=='__main__':
93 |     ex1()
94 |     ex2()


--------------------------------------------------------------------------------
/copulacdf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #******************************************************************************
  5 | #* 
  6 | #* Copyright (C) 2015  Kiran Karra <kiran.karra@gmail.com>
  7 | #*
  8 | #* This program is free software: you can redistribute it and/or modify
  9 | #* it under the terms of the GNU General Public License as published by
 10 | #* the Free Software Foundation, either version 3 of the License, or
 11 | #* (at your option) any later version.
 12 | #*
 13 | #* This program is distributed in the hope that it will be useful,
 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | #* GNU General Public License for more details.
 17 | #*
 18 | #* You should have received a copy of the GNU General Public License
 19 | #* along with this program.  If not, see <http://www.gnu.org/licenses/>.
 20 | #******************************************************************************
 21 | 
 22 | import math
 23 | import numpy as np
 24 | 
 25 | from scipy.stats import mvn                     # contains inverse CDF of Multivariate Gaussian
 26 | from scipy.stats import norm                    # contains PDF of Gaussian
 27 | from scipy.stats import t
 28 | from statsmodels.sandbox.distributions import multivariate as mvt
 29 | 
 30 | """
 31 | copulacdf.py contains routines which provide Copula CDF values 
 32 | """
 33 | 
 34 | def copulacdf(family, u, *args):
 35 |     """ Generates values of a requested copula family
 36 |     
 37 |     Inputs:
 38 |     u -- u is an N-by-P matrix of values in [0,1], representing N
 39 |          points in the P-dimensional unit hypercube.  
 40 |     
 41 |     rho -- a P-by-P correlation matrix, the first argument required for the Gaussian copula
 42 |     alpha -- a scalar argument describing the dependency for Frank, Gumbel, and Clayton copula's
 43 |     
 44 |     Outputs:
 45 |     y -- the value of the Gaussian Copula
 46 |     """
 47 |     n  = u.shape[0]
 48 |     p  = u.shape[1]
 49 | 
 50 |     num_var_args = len(args)
 51 |     family_lc = family.lower()
 52 |     if(family_lc=='gaussian'):
 53 |         if(num_var_args!=1):
 54 |             raise ValueError("Gaussian family requires one additional argument -- rho (correlation matrix) [P x P]")
 55 |         rho = args[0]
 56 |         rho_expected_shape = (p,p)
 57 |         if(type(rho)!=np.ndarray or rho.shape!=rho_expected_shape):
 58 |             raise ValueError("Gaussian family requires rho to be of type numpy.ndarray with shape=[P x P]")
 59 |         y = _gaussian(u, rho)
 60 |         
 61 |     elif(family_lc=='t'):
 62 |         if(num_var_args!=2):
 63 |             raise ValueError("T family requires two additional arguments -- rho (correlation matrix) [P x P] and nu [scalar]")
 64 |         rho = args[0]
 65 |         nu  = args[1]
 66 |         rho_expected_shape = (p,p)
 67 |         if(type(rho)!=np.ndarray or rho.shape!=rho_expected_shape):
 68 |             raise ValueError("T family requires rho to be of type numpy.ndarray with shape=[P x P]")
 69 |         y = _t(u, rho, nu)        
 70 |     elif(family_lc=='clayton'):
 71 |         if(num_var_args!=1):
 72 |             raise ValueError("Clayton family requires one additional argument -- alpha [scalar]")
 73 |         alpha = args[0]
 74 |         y = _clayton(u, alpha)
 75 |     elif(family_lc=='frank'):
 76 |         if(num_var_args!=1):
 77 |             raise ValueError("Frank family requires one additional argument -- alpha [scalar]")
 78 |         alpha = args[0]
 79 |         y = _frank(u, alpha)
 80 |     elif(family_lc=='gumbel'):
 81 |         if(num_var_args!=1):
 82 |             raise ValueError("Gumbel family requires one additional argument -- alpha [scalar]")
 83 |         alpha = args[0]
 84 |         y = _gumbel(u, alpha)
 85 |     else:
 86 |         raise ValueError("Unrecognized family of copula")
 87 |     
 88 |     return y
 89 | 
 90 | def _gaussian(u, rho):
 91 |     """ Generates values of the Gaussian copula
 92 |     
 93 |     Inputs:
 94 |     u -- u is an N-by-P matrix of values in [0,1], representing N
 95 |          points in the P-dimensional unit hypercube.  
 96 |     rho -- a P-by-P correlation matrix.
 97 |     
 98 |     Outputs:
 99 |     y -- the value of the Gaussian Copula
100 |     """
101 |     n  = u.shape[0]
102 |     p  = u.shape[1]
103 |     lo = np.full((1,p), -10)
104 |     hi = norm.ppf(u)
105 |     
106 |     mu = np.zeros(p)
107 |     
108 |     # need to use ppf(q, loc=0, scale=1) as replacement for norminv
109 |     # need to use mvn.mvnun as replacement for mvncdf
110 |     # the upper bound needs to be the output of the ppf call, right now it is set to random above
111 |     y = np.zeros(n)
112 |     # I don't know if mvnun is vectorized, I couldn't get that to work
113 |     for ii in np.arange(n):
114 |         # do some error checking.  if we have any -inf or inf values,
115 |         # 
116 |         p,i = mvn.mvnun(lo, hi[ii,:], mu, rho)
117 |         y[ii] = p
118 |     
119 |     return y
120 | 
121 | def _t(u, rho, nu):
122 |     """ Generates values of the T copula
123 |     
124 |     Inputs:
125 |     u -- u is an N-by-P matrix of values in [0,1], representing N
126 |          points in the P-dimensional unit hypercube.  
127 |     rho -- a P-by-P correlation matrix.
128 |     nu  -- degrees of freedom for T Copula
129 |     
130 |     Outputs:
131 |     y -- the value of the T Copula
132 |     """
133 |     n  = u.shape[0]
134 |     p  = u.shape[1]
135 |     loIntegrationVal = -40
136 |     lo = np.full((1,p), loIntegrationVal)        # more accuracy, but slower :/
137 |     hi = t.ppf(u, nu)
138 |     
139 |     mu = np.zeros(p)
140 |     
141 |     y = np.zeros(n)
142 |     for ii in np.arange(n):
143 |         x = hi[ii,:]
144 |         x[x<-40] = -40
145 |         p = mvt.mvstdtprob(lo[0], x, rho, nu)
146 |         y[ii] = p
147 |     
148 |     return y
149 |     
150 | def _clayton(u, alpha):
151 |     # C(u1,u2) = (u1^(-alpha) + u2^(-alpha) - 1)^(-1/alpha)
152 |     if(alpha<0):
153 |         raise ValueError("Clayton family -- invalid alpha argument. alpha must be >=0")
154 |     elif(alpha==0):
155 |         y = np.prod(u,1)
156 |     else:
157 |         tmp1 = np.power(u, -alpha) 
158 |         tmp2 = np.sum(tmp1,1) - 1
159 |         y = np.power(tmp2, -1.0/alpha)
160 |         
161 |     return y
162 | 
163 | def _frank(u, alpha):
164 |     # C(u1,u2) = -(1/alpha)*log(1 + (exp(-alpha*u1)-1)*(exp(-alpha*u2)-1)/(exp(-alpha)-1))
165 |     if(alpha==0):
166 |         y = np.prod(u,1)
167 |     else:
168 |         tmp1 = np.exp(-alpha*np.sum(u,1)) - np.sum(np.exp(-alpha*u),1)
169 |         y = -np.log( (np.exp(-alpha) + tmp1) / np.expm1(-alpha)) / alpha;
170 |         
171 |     return y
172 | 
173 | def _gumbel(u, alpha):
174 |     # C(u1,u2) = exp(-( (-log(u1))^alpha + (-log(u2))^alpha )^(1/alpha))
175 |     n = u.shape[0]
176 |     p = u.shape[1]
177 |     
178 |     if(alpha<1):
179 |         raise ValueError("Gumbel family -- invalid alpha argument. alpha must be >=1")
180 |     elif(alpha==1):
181 |         y = np.prod(u,1)
182 |     else:
183 |         # TODO: NaN checking like Matlab here would be nice :)
184 |         exparg = np.zeros(n)
185 |         for ii in np.arange(p):
186 |             tmp1 = np.power(-1*np.log(u[:,ii]), alpha)
187 |             exparg = exparg + tmp1
188 |         exparg = np.power(exparg, 1.0/alpha)
189 |         y = np.exp(-1*exparg)
190 |         
191 |     return y
192 | 
193 | def test_python_vs_matlab(family):
194 |     # generate U1, U2
195 |     n = 10
196 |     p = 2
197 |     
198 |     # generate all u1,u2 combinations
199 |     eps = np.finfo(float).eps
200 |     u = np.linspace(0+eps,1-eps,n)
201 |     UU = np.meshgrid(u,u)
202 |     U2 = np.reshape(UU[0], (UU[0].shape[0]*UU[0].shape[1], 1))
203 |     U1 = np.reshape(UU[1], (UU[1].shape[0]*UU[1].shape[1], 1))
204 |     U = np.concatenate((U1,U2),axis=1)
205 |         
206 |     rho = 0.8
207 |     nu = 2
208 |     Rho = np.array([[1,rho],[rho,1]])
209 |     
210 |     alpha = 0.3
211 |     
212 |     # test the python data against Matlab
213 |     # TODO: make python execute the matlab script which generates these samples
214 |     matlab_data = scipy.io.loadmat('matlab/copulacdf_test.mat')
215 |     
216 |     if(family.lower()=='gaussian'):
217 |         gaussian_copula_cdf_python = copulacdf(family,U,Rho)
218 |         gaussian_copula_cdf_matlab = matlab_data['gaussian_copula_cdf']
219 |         gaussian_copula_cdf_matlab = gaussian_copula_cdf_matlab[:,0]
220 |         
221 |         # compare the two
222 |         gaussian_copula_test_result = np.allclose(gaussian_copula_cdf_python,gaussian_copula_cdf_matlab)
223 |         if(gaussian_copula_test_result):
224 |             print 'Gaussian Copula Python calculation matches Matlab!'
225 |         else:
226 |             print 'Gaussian Copula Python calculation does NOT match Matlab!'
227 |             
228 |         # plot the Guassian Copula for fun
229 |         X = UU[0]
230 |         Y = UU[1]
231 |         Z = np.reshape(gaussian_copula_cdf_python,UU[0].shape)
232 |         
233 |         plot_utils.plot_3d(X,Y,Z, 'Gaussian Copula CDF')
234 |     
235 |     elif(family.lower()=='t'):
236 |         t_copula_cdf_python = copulacdf(family,U,Rho,nu)
237 |         t_copula_cdf_matlab = matlab_data['t_copula_cdf']
238 |         t_copula_cdf_matlab = t_copula_cdf_matlab[:,0]
239 |         
240 |         # compare the two
241 |         t_copula_test_result = np.allclose(t_copula_cdf_python,t_copula_cdf_matlab,atol=0.01)   # a high tolerance required
242 |                                                                                                 # b/c of way that mvt
243 |                                                                                                 # is implemented in python
244 |         if(t_copula_test_result):
245 |             print 'T Copula Python calculation matches Matlab!'
246 |         else:
247 |             print 'T Copula Python calculation does NOT match Matlab!'
248 |             
249 |         # plot the Guassian Copula for fun
250 |         X = UU[0]
251 |         Y = UU[1]
252 |         Z = np.reshape(t_copula_cdf_python,UU[0].shape)
253 |         
254 |         plot_utils.plot_3d(X,Y,Z, 'T Copula CDF')
255 |     
256 |     elif(family.lower()=='clayton'):
257 |         clayton_copula_cdf_python = copulacdf(family,U,alpha)
258 |         clayton_copula_cdf_matlab = matlab_data['clayton_copula_cdf']
259 |         clayton_copula_cdf_matlab = clayton_copula_cdf_matlab[:,0]
260 |         
261 |         # compare the two
262 |         clayton_copula_test_result = np.allclose(clayton_copula_cdf_python,clayton_copula_cdf_matlab)
263 |         if(clayton_copula_test_result):
264 |             print 'Clayton Copula Python calculation matches Matlab!'
265 |         else:
266 |             print 'Clayton Copula Python calculation does NOT match Matlab!'
267 |             
268 |         # plot the Clayton Copula for fun
269 |         X = UU[0]
270 |         Y = UU[1]
271 |         Z = np.reshape(clayton_copula_cdf_python,UU[0].shape)
272 |         
273 |         plot_utils.plot_3d(X,Y,Z, 'Clayton Copula CDF')
274 |         
275 |     elif(family.lower()=='frank'):
276 |         frank_copula_cdf_python = copulacdf(family,U,alpha)
277 |         frank_copula_cdf_matlab = matlab_data['frank_copula_cdf']
278 |         frank_copula_cdf_matlab = frank_copula_cdf_matlab[:,0]
279 |         
280 |         # compare the two
281 |         frank_copula_test_result = np.allclose(frank_copula_cdf_python,frank_copula_cdf_matlab)
282 |         if(frank_copula_test_result):
283 |             print 'Frank Copula Python calculation matches Matlab!'
284 |         else:
285 |             print 'Frank Copula Python calculation does NOT match Matlab!'
286 |             
287 |         # plot the Clayton Copula for fun
288 |         X = UU[0]
289 |         Y = UU[1]
290 |         Z = np.reshape(frank_copula_cdf_python,UU[0].shape)
291 |         
292 |         plot_utils.plot_3d(X,Y,Z, 'Frank Copula CDF')
293 |         
294 |     elif(family.lower()=='gumbel'):
295 |         alpha = 1.5
296 |         gumbel_copula_cdf_python = copulacdf(family,U,alpha)
297 |         gumbel_copula_cdf_matlab = matlab_data['gumbel_copula_cdf']
298 |         gumbel_copula_cdf_matlab = gumbel_copula_cdf_matlab[:,0]
299 |         
300 |         # compare the two
301 |         gumbel_copula_test_result = np.allclose(gumbel_copula_cdf_python,gumbel_copula_cdf_matlab)
302 |         if(gumbel_copula_test_result):
303 |             print 'Gumbel Copula Python calculation matches Matlab!'
304 |         else:
305 |             print 'Gumbel Copula Python calculation does NOT match Matlab!'
306 |             
307 |         # plot the Clayton Copula for fun
308 |         X = UU[0]
309 |         Y = UU[1]
310 |         Z = np.reshape(gumbel_copula_cdf_python,UU[0].shape)
311 |         
312 |         plot_utils.plot_3d(X,Y,Z, 'Gumbel Copula CDF')
313 | 
314 | if __name__=='__main__':
315 |     import scipy.io
316 |     import plot_utils
317 |     
318 |     #test_python_vs_matlab('Gaussian')
319 |     test_python_vs_matlab('T')
320 |     #test_python_vs_matlab('Clayton')
321 |     #test_python_vs_matlab('Frank')
322 |     #test_python_vs_matlab('Gumbel')
323 |     


--------------------------------------------------------------------------------
/copulafit.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #******************************************************************************
  5 | #* 
  6 | #* Copyright (C) 2015  Kiran Karra <kiran.karra@gmail.com>
  7 | #*
  8 | #* This program is free software: you can redistribute it and/or modify
  9 | #* it under the terms of the GNU General Public License as published by
 10 | #* the Free Software Foundation, either version 3 of the License, or
 11 | #* (at your option) any later version.
 12 | #*
 13 | #* This program is distributed in the hope that it will be useful,
 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | #* GNU General Public License for more details.
 17 | #*
 18 | #* You should have received a copy of the GNU General Public License
 19 | #* along with this program.  If not, see <http://www.gnu.org/licenses/>.
 20 | #******************************************************************************
 21 | 
 22 | import math
 23 | import numpy as np
 24 | 
 25 | import multivariate_stats
 26 | from invcopulastat import invcopulastat
 27 | from scipy.stats import kendalltau
 28 | from numpy.linalg import eig
 29 | 
 30 | """
 31 | copulafit.py contains routines which provide use various techniques, as
 32 | specified by the user to fit data to a family of copula (i.e. find the
 33 | dependency parameter).
 34 | """
 35 | 
 36 | def copulafit(family, X, algorithm):
 37 |     """
 38 |     Attempts to determine the dependency parameter of the copula family
 39 |     type specified, using the algorithm that is specified for the data
 40 |     given by the matrix X
 41 |     
 42 |     Inputs:
 43 |       family -- the copula family to fit to, must be:
 44 |         'Gaussian'
 45 |         't'
 46 |         'Clayton'
 47 |         'Gumbel'
 48 |         'Frank'
 49 |       X -- the data to determine the copula dependency parameter for. Must be
 50 |            a numpy array of shape = M x N, where M is the number of samples 
 51 |            and N is the dimensionality of the data
 52 |       algorithm -- must be one of the following strings:
 53 |         'MLE'  - Maximum Likelihood method
 54 |         'AMLE' - Approximate Maximum Likelihood method
 55 |         'PKTE' - Use's Pairwise Kendall's Tau estimator's relationship to the 
 56 |                        copula family's dependency parameter (only applicalble
 57 |                        to Clayton, Gumbel, or Frank copula's currently)
 58 |                        
 59 |     Outputs:
 60 |       the dependency parameter for the copula
 61 |                        
 62 |     """
 63 |     algorithm_lc  = algorithm.lower()
 64 |     family_lc     = family.lower()
 65 |     dep_param_est = None
 66 |     if(algorithm_lc=='MLE'):
 67 |         raise ValueError('MLE method not yet supported!')
 68 |     elif(algorithm_lc=='AMLE'):
 69 |         raise ValueError('Approximate MLE method not yet supported!')
 70 |     elif(algorithm_lc=='PKTE'):
 71 |         if(family_lc=='gaussian'):
 72 |             dep_param_est = _gaussian_PKTE(X)
 73 |         elif(family_lc=='t'):
 74 |             dep_param_est = _t_PKTE(X)
 75 |         elif(family_lc=='clayton'):
 76 |             dep_param_est = _clayton_PKTE(X)
 77 |         elif(family_lc=='gumbel'):
 78 |             dep_param_est = _gumbel_PKTE(X)
 79 |         elif(family_lc=='frank'):
 80 |             dep_param_est = _frank_PKTE(X)
 81 |     else:
 82 |         raise ValueError('Unsupported Algorithm or options!')
 83 |     
 84 |     return dep_param_est
 85 |     
 86 | def _gaussian_PKTE(X):
 87 |     # the algorithm for this comes from the paper:
 88 |     # "Gaussian Copula Precision Estimation with Missing Values" 
 89 |     # by Huahua Wang, Faridel Fazayeli, Soumyadeep Chatterjee, Arindam Banerjee
 90 |     N = X.shape[1]
 91 |     sigma_hat = np.ones((N,N))
 92 |     for dim1 in range(0,N-1):
 93 |         for dim2 in range(dim1+1,N):
 94 |             rho = np.sin(math.pi/2 * kendalltau(X[:,dim1],X[:,dim2]))
 95 |             # correlation matrix is symmetric
 96 |             sigma_hat[dim1][dim2] = rho
 97 |             sigma_hat[dim2][dim1] = rho
 98 |             
 99 |     # ensure that sigma_hat is positive semidefinite
100 |     sigma_hat = _nearPD(sigma_hat)
101 |             
102 |     return sigma_hat
103 | 
104 | # TODO: T copula stuff
105 | def _t_PKTE(X):
106 |     # first estimate correlation matrix
107 |     sigma_hat = _gaussian_PKTE(X)
108 |     
109 |     # TODO: use MLE to estimate degrees of freedom 
110 |     nu = 1
111 |     
112 |     return (sigma_hat, nu)
113 |     
114 | def _clayton_PKTE(X):
115 |     # calculate empirical kendall's tau
116 |     ktau = multivariate_stats.kendalls_tau(X)
117 |     # inverse to find dependency parameter
118 |     alpha_hat = invcopulastat('Clayton', 'kendall', ktau)
119 |     
120 |     return alpha_hat
121 | 
122 | def _gumbel_PKTE(X):
123 |     # calculate empirical kendall's tau
124 |     ktau = multivariate_stats.kendalls_tau(X)
125 |     # inverse to find dependency parameter
126 |     alpha_hat = invcopulastat('Gumbel', 'kendall', ktau)
127 |     
128 |     return alpha_hat
129 | 
130 | 
131 | def _frank_PKTE(X):
132 |     # calculate empirical kendall's tau
133 |     ktau = multivariate_stats.kendalls_tau(X)
134 |     # inverse to find dependency parameter
135 |     alpha_hat = invcopulastat('Frank', 'kendall', ktau)
136 |     
137 |     return alpha_hat
138 | 
139 | def _getAplus(A):
140 |     eigval, eigvec = eig(A)
141 |     Q = np.matrix(eigvec)
142 |     xdiag = np.matrix(np.diag(np.maximum(eigval, 0)))
143 |     return Q*xdiag*Q.T
144 | 
145 | def _getPs(A, W=None):
146 |     W05 = np.matrix(W**.5)
147 |     return  W05.I * _getAplus(W05 * A * W05) * W05.I
148 | 
149 | def _getPu(A, W=None):
150 |     Aret = np.array(A.copy())
151 |     Aret[W > 0] = np.array(W)[W > 0]
152 |     return np.matrix(Aret)
153 | 
154 | def _nearPD(A, nit=10):
155 |     n = A.shape[0]
156 |     W = np.identity(n) 
157 |     
158 |     # W is the matrix used for the norm (assumed to be Identity matrix here)
159 |     # the algorithm should work for any diagonal W
160 |     deltaS = 0
161 |     Yk = A.copy()
162 |     for k in range(nit):
163 |         Rk = Yk - deltaS
164 |         Xk = _getPs(Rk, W=W)
165 |         deltaS = Xk - Rk
166 |         Yk = _getPu(Xk, W=W)
167 |     return Yk


--------------------------------------------------------------------------------
/copulamnsig.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #******************************************************************************
  5 | #* 
  6 | #* Copyright (C) 2015  Kiran Karra <kiran.karra@gmail.com>
  7 | #*
  8 | #* This program is free software: you can redistribute it and/or modify
  9 | #* it under the terms of the GNU General Public License as published by
 10 | #* the Free Software Foundation, either version 3 of the License, or
 11 | #* (at your option) any later version.
 12 | #*
 13 | #* This program is distributed in the hope that it will be useful,
 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | #* GNU General Public License for more details.
 17 | #*
 18 | #* You should have received a copy of the GNU General Public License
 19 | #* along with this program.  If not, see <http://www.gnu.org/licenses/>.
 20 | #******************************************************************************
 21 | 
 22 | import math
 23 | import numpy as np
 24 | 
 25 | from cvolume import cvolume
 26 | import multivariate_stats
 27 | 
 28 | from ecdf import probability_integral_transform
 29 | from scipy.stats import entropy
 30 | 
 31 | def copulamnsig(family, K, *args):
 32 |     """
 33 |     Computes the copula multinomial signature as described in the paper
 34 |     "Highly Efficient Learning of Mixed Copula Networks" for a specified 
 35 |     copula family.  Essentially, it breaks up the unit grid into a K x K boxes, 
 36 |     and computes the probability of a sample from that copula pdf falling in 
 37 |     that grid.  This is then aggregated into a multinomial probability 
 38 |     distribution.  This so called "multinomial" signature of a copula is then 
 39 |     used to efficiently determine the structure of the Bayesian network, as well
 40 |     as the copula which would describe the dependency between the nodes.
 41 |     
 42 |     The grid over the unit cube is numbered as follows, for a 4 x 4 grid
 43 |       ___________________
 44 |       | 4 | 8 | 12 | 16 | 
 45 |       |---|---|----|----| 
 46 |       | 3 | 7 | 11 | 15 |
 47 |       |-----------------|
 48 |       | 2 | 6 | 10 | 14 |
 49 |       |-----------------|
 50 |       | 1 | 5 |  9 | 13 |
 51 |       |___|___|____|____|
 52 |     
 53 |     Currently, this computes the multinomial signature for a specified copula
 54 |     family of 2 dimensions.  It would be nice to expand this to multiple
 55 |     dimensions, and we can use the general formula for C-volume
 56 |     
 57 |       family - the copula type, must be:
 58 |         'Gaussian'
 59 |         'T'
 60 |         'Clayton'
 61 |         'Frank'
 62 |         'Gumbel'
 63 |       args - must be atleast of length 2, for which the first element in args
 64 |              is expected to be a string which describes the dependency value
 65 |              being provided, must be one of the following:
 66 |         'kendall' - means kendall's Tau is being provided
 67 |         'spearman' - means spearman's rho is being provided
 68 |         'native' - means that the dependency parameter of the copula family
 69 |                    itself is being provided directly
 70 |             the second argmuent  must be the value of the dependency type 
 71 |             provided. For kendall and spearman, a scalar value is expected.  
 72 |             For native, if the family type is Frank, Gumbel, or Clayton, then 
 73 |             a scalar value is expected, which represents the dependency
 74 |             parameter.  If the family type is Gaussian, then a 2 x 2 numpy array
 75 |             is expected, which represents the correlation matrix defining the
 76 |             Gaussian copula.  If the family is T, then the 2nd argument is the
 77 |             2x2 numpy array representing the correlation matrix, and the 3rd
 78 |             argument is the degrees of freedom
 79 |     """
 80 |     coords_list = _makeCoordsList(K)
 81 |             
 82 |     # mnsig is a list of dictionaries.  The (list index+1) corresponds to the
 83 |     # grid of interest in the unit cube.  In the dictionary, the actual lower
 84 |     # left coordinates of the box and the upper right coordinates of the box
 85 |     # are stored as keys 'u1v1' and 'u2v2', and then the actual value of the 
 86 |     # multinomial signature in that grid is stored as 'val'
 87 |     
 88 |     mnsig = []
 89 |     for coord in coords_list:
 90 |         # compute the C-volume and store
 91 |         u1v1 = coord[0]
 92 |         u1v2 = coord[1]
 93 |         u2v1 = coord[2]
 94 |         u2v2 = coord[3]
 95 |         try:
 96 |             val = cvolume(family, u1v1, u1v2, u2v1, u2v2, *args)
 97 |         except ValueError:
 98 |             val = np.array([-1])        # for compatibility we put the numpy wrapper
 99 | 
100 |         mnsig.append(val[0])
101 |     
102 |     return mnsig
103 | 
104 | def empirical_copulamnsig(X, K):
105 |     """
106 |     Computes an empirical copula multinomial signature based on the dataset
107 |     provided by U.  U must be a numpy array of dimensions [M x N], where M is 
108 |     the number of data points in the dataset and, N is the dimensionality of the
109 |     data
110 |     """
111 |     M = X.shape[0]
112 |     N = X.shape[1]
113 |     
114 |     # convert X to U by using the probability integral transform:  F(X) = U
115 |     U = probability_integral_transform(X)
116 |     
117 |     # generate the coordinates so we can then compare and see where each sample
118 |     # falls into in the unit cube
119 |     coords_list = _makeCoordsList(K)
120 |     
121 |     # this will be a list of dictionaries which has all the combinations of the
122 |     # empirical binomial signature 
123 |     esig = []
124 |     
125 |     # for all i < j, compute pairwise bivariate multinomial signature
126 |     for dim1 in range(0,N-1):
127 |         for dim2 in range(dim1+1,N):
128 |             # to compute the pairwise bivariate multinomial signature, what
129 |             # we do is essentially grid as before, and compute a histogram 
130 |             # for each grid .. whcih is our empirical estimate
131 |             # the grid is lay-ed out in the exact same way as described before,
132 |             # so the index of mnsig from copulamnsig and the index of the value
133 |             # generated here will be directly comparable
134 |             #     ___________________
135 |             #     | 4 | 8 | 12 | 16 | 
136 |             #     |---|---|----|----| 
137 |             #     | 3 | 7 | 11 | 15 |
138 |             #     |-----------------|
139 |             #     | 2 | 6 | 10 | 14 |
140 |             #     |-----------------|
141 |             #     | 1 | 5 |  9 | 13 |
142 |             #     |___|___|____|____|
143 |             tmp = {}
144 |             # RV 1 that we are comparing
145 |             tmp['rv1'] = dim1+1
146 |             # RV 2 that we are comparing
147 |             tmp['rv2'] = dim2+1
148 |             # the value for the zone -- initialize to 0
149 |             esig_vec = np.zeros(K*K)
150 |             
151 |             # there is probably a more efficient way to do this than to loop
152 |             # over each value, but this is a first cut at implementing this
153 |             u = U[:,dim1]
154 |             v = U[:,dim2]
155 |             
156 |             for ii in range(0,M):
157 |                 # find which zone this specific (u,v) sample falls in
158 |                 for jj in range(0,K*K):
159 |                     u1 = coords_list[jj][0][0][0]
160 |                     v1 = coords_list[jj][0][0][1]
161 |                     u2 = coords_list[jj][3][0][0]
162 |                     v2 = coords_list[jj][3][0][1]
163 |                     
164 |                     if(u[ii] >= u1 and u[ii] < u2 and 
165 |                        v[ii] >= v1 and v[ii] < v2):
166 |                         # add one to the zone that it falls into
167 |                         esig_vec[jj] = (esig_vec[jj] + 1.0/M)
168 |                         # process the next pair by kicking out of this loop
169 |                         break
170 |             tmp['esig'] = esig_vec
171 |             
172 |             esig.append(tmp)
173 |         
174 |     return esig
175 | 
176 | def _makeCoordsList(K):
177 |     eps = np.finfo(float).eps
178 |     u = np.linspace(0+eps, 1-eps, K+1)
179 |     v = np.linspace(0+eps, 1-eps, K+1)
180 |     
181 |     coords_list = []
182 |     for ii in range(0,len(u)-1):
183 |         for jj in range(0,len(v)-1):
184 |             u1 = u[ii]
185 |             u2 = u[ii+1]
186 |             v1 = v[jj]
187 |             v2 = v[jj+1]
188 |             u1v1 = np.array([[u1,v1]])
189 |             u1v2 = np.array([[u1,v2]])
190 |             u2v1 = np.array([[u2,v1]])
191 |             u2v2 = np.array([[u2,v2]])
192 |             x = []
193 |             x.append(u1v1)
194 |             x.append(u1v2)
195 |             x.append(u2v1)
196 |             x.append(u2v2)
197 |             coords_list.append(x)
198 |     
199 |     return coords_list
200 | 
201 | # the master function, which computes the correct copula family to choose from
202 | # will compare the empirical signatures to the actual signature for refernence
203 | # will do the following:
204 | #  1.) compute the empirical kendall's tau
205 | #  2.) load the precomputed multinomial signature for that kendall's tau
206 | #      for all the copula families
207 | #  3.) minimize the distance metric
208 | def optimalCopulaFamily(X, K=4, family_search=['Gaussian', 'Clayton', 'Gumbel', 'Frank']):
209 |     """
210 |     This function, given a multivariate data set X, computes the best copula family which fits
211 |     the data, using the procedure described in the paper "Highly Efficient Learning of Mixed
212 |     Copula Networks," by Gal Elidan
213 |       
214 |       X - the multivariate dataset for which we desire the copula.  Must be a numpy array of 
215 |           dimension [M x N], where M is the number of data points, and N is the dimensionality
216 |           of the dataset
217 |       K - the square root of the number of grid points (for now, we assume square gridding of the
218 |           unit cube)
219 |       family_search - a list of all the copula families to search.  Currently, what is supported is
220 |           Gaussian, Clayton, Gumbel, and Frank.  As more copula's are added, the default list will
221 |           be expanded.
222 |     """
223 |     # compute the empirical Kendall's Tau
224 |     tau_hat = multivariate_stats.kendalls_tau(X)
225 |     
226 |     # compute empirical multinomial signature
227 |     empirical_mnsig = empirical_copulamnsig(X, K)
228 |     empirical_mnsig = empirical_mnsig[0]['esig']
229 |     # replace any 0 values w/ smallest possible float value
230 |     empirical_mnsig[empirical_mnsig==0] = np.spacing(1)
231 |     
232 |     # compute the multinomial signature for each of the copula families specified
233 |     # and simultaneously compute the kullback leibler divergence between the empirical
234 |     # and the computed, and store that info
235 |     distances = {}
236 |     for family in family_search:
237 |         # because the Clayton and Gumbel Copula's have restrictions for the valid values of
238 |         # Kendall's tau, we do checks here to ensure those restrictions are met, because there
239 |         # will be a certain variance associated with the tau_hat measurement
240 |         
241 |         if(family.lower()=='clayton'):
242 |             # here we add some additional optimizatons as follows.  We know that the Clayton copula
243 |             # captures only positive concordance.  Like any estimator, tau_hat will have some variance
244 |             # associated with it.  Thus, the optimization we make is as follows, if tau_hat is within
245 |             # a configurable amount less than 0, then we will set tau_hat to 0 and continue processing.  
246 |             # However, if tau_hat is greater than that, we theoretically wouldn't have to test against 
247 |             # the Clayton copula model, so we set the KL-divergence to be infinity to exclude 
248 |             # this family from being selected
249 |             if(tau_hat<-0.05):
250 |                 distances[family] = np.inf
251 |                 continue
252 |             elif(tau_hat>=-0.05 and tau_hat<0):
253 |                 tau_hat = 0
254 |             elif(tau_hat>=1):
255 |                 tau_hat = 1 - np.spacing(1)     # as close to 1 as possible in our precision
256 |         elif(family.lower()=='gumbel'):
257 |             # here we add some additional optimizatons as follows.  We know that the Gumbel copula
258 |             # captures only positive concordance.  Like any estimator, tau_hat will have some variance
259 |             # associated with it.  Thus, the optimization we make is as follows, if tau_hat is within
260 |             # a configurable amount less than 0, then we will set tau_hat to 0 and continue processing.  
261 |             # However, if tau_hat is greater than that, we theoretically wouldn't have to test against 
262 |             # the Gumbel copula model, so we set the KL-divergence to be infinity to exclude 
263 |             # this family from being selected
264 |             if(tau_hat<-0.05):
265 |                 distances[family] = np.inf
266 |                 continue
267 |             elif(tau_hat>=-0.05 and tau_hat<0):
268 |                 tau_hat = 0
269 |             elif(tau_hat>=1):
270 |                 tau_hat = 1 - np.spacing(1)     # as close to 1 as possible in our precision
271 |         # any other copula families with restrictions can go here
272 |         
273 |         mnsig = copulamnsig(family,K,'kendall',tau_hat)
274 |         # replace any 0 values w/ smallest possible float value
275 |         mnsig[mnsig==0] = np.spacing(1)
276 |         
277 |         # compute KL divergence, see
278 |         # http://docs.scipy.org/doc/scipy-dev/reference/generated/scipy.stats.entropy.html
279 |         distances[family] = entropy(mnsig, empirical_mnsig)
280 |         
281 |     # search for the minimum distance, that is the optimal copula family to use
282 |     minDistance = np.inf
283 |     for family, distance in distances.iteritems():
284 |         if distance<minDistance:
285 |             minDistance = distance
286 |             optimalFamily = family
287 |     
288 |     depParams = invcopulastat(optimalFamily, 'kendall', tau_hat)
289 |     
290 |     return (optimalFamily, depParams, tau_hat)
291 | 
292 | def testHELM(tau, M, N, familyToTest, numMCSims, copulaFamiliesToTest):
293 |     results = {}
294 |     for fam in copulaFamiliesToTest:
295 |         results[fam.lower()] = 0
296 |     
297 |     for ii in range(0,numMCSims):
298 |         # generate samples of the requested copula with tau same as the
299 |         # empirical signature we calculated above
300 |         if(familyToTest.lower()=='gaussian'):
301 |             r = invcopulastat(familyToTest, 'kendall', tau)
302 |             
303 |             Rho = np.empty((N,N))
304 |             for jj in range(0,N):
305 |                 for kk in range(0,N):
306 |                     if(jj==kk):
307 |                         Rho[jj][kk] = 1
308 |                     else:
309 |                         Rho[jj][kk] = r
310 |             try:
311 |                 U = copularnd(familyToTest, M, Rho)
312 |             except ValueError:
313 |                 # copularnd will throw a ValueError if Rho is not a positive semidefinite matrix
314 |                 return results      # return 0, which will then be ignored by tests
315 |                 
316 |         else:       # assume Clayton, Frank, or Gumbel
317 |             try:
318 |                 alpha = invcopulastat(familyToTest, 'kendall', tau)
319 |                 U = copularnd(familyToTest, M, N, alpha)
320 |             except ValueError:
321 |                 continue
322 |             
323 |         lst = []
324 |         for jj in range(0,N):
325 |             U_conditioned = U[:,jj]
326 |             # if there are any 1's, condition it
327 |             U_conditioned[U_conditioned==1] = 0.99
328 |             if(jj%2==0):
329 |                 lst.append(norm.ppf(U_conditioned))
330 |             else:
331 |                 lst.append(expon.ppf(U_conditioned))
332 |         
333 |         # combine X and Y into the joint distribution w/ the copula
334 |         X = np.vstack(lst)
335 |         X = X.T
336 |                     
337 |         ret = optimalCopulaFamily(X, family_search=copulaFamiliesToTest)
338 |         ret_family = ret[0].lower()
339 |         # aggregate results
340 |         results[ret_family] = results[ret_family] + 1.0
341 |         
342 |         # display some progress
343 |         sys.stdout.write("\rComputing " + str(familyToTest) + " Copula (DIM=%d) (tau=%f)-- %d%%" % (N,tau,ii+1))
344 |         sys.stdout.flush()
345 |     
346 |     sys.stdout.write("\r")
347 |     
348 |     # convert results to percentage
349 |     for fam in copulaFamiliesToTest:
350 |         results[fam.lower()] = results[fam.lower()]/float(numMCSims) * 100
351 |     
352 |     return results
353 | 
354 | def plotPieChartResults(results, family, title):
355 |     colors = ['yellowgreen', 'gold', 'lightskyblue', 'lightcoral']      # for the pie chart
356 |     # explode the Gaussian portion fo the pychart
357 |     expTup = [0,0,0,0]
358 |     expTup[results.keys().index(family.lower())] = 0.1
359 |     plt.pie(results.values(), explode=expTup, labels=results.keys(),
360 |             colors=colors, autopct='%1.1f%%', shadow=True, startangle=90)
361 |     plt.title(title)
362 |     plt.show()
363 |     
364 | 
365 | def testHELM_parametric(K,M,N,tauVec,families):
366 |     # some tests on the copula multinomial signature
367 |     
368 |     # Monte-Carlo style simulations to test each copula generation
369 |     numMCSims = 1000
370 |     
371 |     resultsAggregate = {}
372 |     
373 |     for family in families:    
374 |         famResults = {}
375 |         for tau in tauVec:
376 |             results = testHELM(tau, M, N, family, numMCSims, families)
377 |             famResults[tau] = results
378 |         resultsAggregate[family] = famResults
379 |     
380 |     return resultsAggregate
381 | 
382 | def visualizeMNSig():
383 |     # some tests on the copula multinomial signature
384 |     
385 |     K = 4
386 |     M = 1000
387 |     N = 3
388 |     tauVec = np.arange(-0.9,0.95,0.05)
389 |     # the families to test against and pick optimal copula
390 |     families = ['Gaussian', 'Clayton', 'Gumbel', 'Frank']
391 |     
392 |     helmAccuracyResults = testHELM_parametric(K,M,N,tauVec,families)
393 |         
394 |     resultsAggregate = {}
395 |     
396 |     for family in families:
397 |         famResults = {}
398 |         for tau in tauVec:
399 |             mnsig = copulamnsig(family,K,'kendall',tau)
400 |             famResults[tau] = mnsig
401 |         resultsAggregate[family] = famResults
402 | 
403 |     # visualize the results
404 |     for tau in tauVec:
405 |         # we would also like to visualize this copula on the side, to try to 
406 |         # understand what may be a better way todo model selection
407 |         try:
408 |             r = invcopulastat('Gaussian', 'kendall', tau)
409 |         except ValueError:
410 |             r = -1
411 |         Rho = np.empty((N,N))
412 |         for jj in range(0,N):
413 |             for kk in range(0,N):
414 |                 if(jj==kk):
415 |                     Rho[jj][kk] = 1
416 |                 else:
417 |                     Rho[jj][kk] = r
418 |         
419 |         try:
420 |             alpha_clayton = invcopulastat('Clayton', 'kendall', tau)
421 |         except ValueError:
422 |             alpha_clayton = -1
423 |         
424 |         try:
425 |             alpha_gumbel  = invcopulastat('Gumbel', 'kendall', tau)
426 |         except ValueError:
427 |             alpha_gumbel = -1
428 |             
429 |         try:
430 |             alpha_frank   = invcopulastat('Frank', 'kendall', tau)
431 |         except ValueError:
432 |             alpha_frank   = -1
433 |         
434 |         if(r!=-1):
435 |             try:
436 |                 U_gauss   = copularnd('Gaussian', M, Rho)
437 |             except ValueError:
438 |                 U_gauss   = np.zeros((M,N))
439 |         if(alpha_clayton!=-1):
440 |             try:
441 |                 U_clayton = copularnd('Clayton', M, N, alpha_clayton)
442 |             except ValueError:
443 |                 U_clayton   = np.zeros((M,N))
444 |         if(alpha_frank!=-1):
445 |             try:
446 |                 U_frank   = copularnd('Frank', M, N, alpha_frank)
447 |             except ValueError:
448 |                 U_frank   = np.zeros((M,N))
449 |         if(alpha_gumbel!=-1):
450 |             try:
451 |                 U_gumbel  = copularnd('Gumbel', M, N, alpha_gumbel)
452 |             except ValueError:
453 |                 U_gumbel  = np.zeros((M,N))
454 |         
455 |         # get each family's MN signature and plot it
456 |         plt.figure(figsize=(30,20))
457 |         
458 |         plt.subplot(231)
459 |         if(np.sum(resultsAggregate['Gaussian'][tau])>0):
460 |             plt.plot(np.arange(1,K*K+1), resultsAggregate['Gaussian'][tau], 'b.-', label='Gaussian Copula')
461 |         if(np.sum(resultsAggregate['Clayton'][tau])>0):
462 |             plt.plot(np.arange(1,K*K+1), resultsAggregate['Clayton'][tau], 'g.-', label='Clayton Copula')
463 |         if(np.sum(resultsAggregate['Gumbel'][tau])>0):
464 |             plt.plot(np.arange(1,K*K+1), resultsAggregate['Gumbel'][tau], 'r.-', label='Gumbel Copula')
465 |         if(np.sum(resultsAggregate['Frank'][tau])>0):
466 |             plt.plot(np.arange(1,K*K+1), resultsAggregate['Frank'][tau], 'k.-', label='Frank Copula')
467 |         
468 |         plt.title(r'Copula Multinomial Signature $\tau$=' + "{0:.2f}".format(tau) + ' K=' + str(K))
469 |         plt.legend()
470 |         plt.grid()
471 |         
472 |         plt.subplot(232)
473 |         if(r!=-1):
474 |             plt.scatter(U_gauss[:,0], U_gauss[:,1])
475 |         plt.grid()
476 |         plt.title(r'Gaussian Copula, $\rho$=' + "{0:.2f}".format(r) + r' $\tau$=' + "{0:.2f}".format(tau))
477 |         
478 |         plt.subplot(233)
479 |         if(alpha_clayton!=-1):
480 |             plt.scatter(U_clayton[:,0], U_clayton[:,1])
481 |         plt.grid()
482 |         plt.title(r'Clayton Copula, $\alpha$=' + "{0:.2f}".format(alpha_clayton) + r' $\tau$=' + "{0:.2f}".format(tau))
483 |         
484 |         plt.subplot(235)
485 |         if(alpha_frank!=-1):
486 |             plt.scatter(U_frank[:,0], U_frank[:,1])
487 |         plt.grid()
488 |         plt.title(r'Frank Copula, $\alpha$=' + "{0:.2f}".format(alpha_frank) + r' $\tau$=' + "{0:.2f}".format(tau))
489 |         
490 |         plt.subplot(236)
491 |         if(alpha_gumbel!=-1):
492 |             plt.scatter(U_gumbel[:,0], U_gumbel[:,1])
493 |         plt.grid()
494 |         plt.title(r'Gumbel Copula, $\alpha$=' + "{0:.2f}".format(alpha_gumbel) + r' $\tau$=' + "{0:.2f}".format(tau))
495 |         
496 |         plt.subplot(234)
497 |         # index manually to ensure accuracy
498 |         cla = np.array([helmAccuracyResults['Clayton'][tau]['clayton'],
499 |                         helmAccuracyResults['Gaussian'][tau]['clayton'],
500 |                         helmAccuracyResults['Gumbel'][tau]['clayton'],
501 |                         helmAccuracyResults['Frank'][tau]['clayton']])
502 |         gau = np.array([helmAccuracyResults['Clayton'][tau]['gaussian'],
503 |                         helmAccuracyResults['Gaussian'][tau]['gaussian'],
504 |                         helmAccuracyResults['Gumbel'][tau]['gaussian'],
505 |                         helmAccuracyResults['Frank'][tau]['gaussian']])
506 |         gum = np.array([helmAccuracyResults['Clayton'][tau]['gumbel'],
507 |                         helmAccuracyResults['Gaussian'][tau]['gumbel'],
508 |                         helmAccuracyResults['Gumbel'][tau]['gumbel'],
509 |                         helmAccuracyResults['Frank'][tau]['gumbel']])
510 |         fra = np.array([helmAccuracyResults['Clayton'][tau]['frank'],
511 |                         helmAccuracyResults['Gaussian'][tau]['frank'],
512 |                         helmAccuracyResults['Gumbel'][tau]['frank'],
513 |                         helmAccuracyResults['Frank'][tau]['frank']])
514 |         ind = np.arange(4)
515 |         width = 0.2
516 |         p1 = plt.bar(ind,cla,width,color='b')
517 |         p2 = plt.bar(ind,gau,width,color='g',bottom=cla)
518 |         p3 = plt.bar(ind,gum,width,color='k',bottom=cla+gau)
519 |         p4 = plt.bar(ind,fra,width,color='r',bottom=cla+gau+gum)
520 |         plt.xticks(ind+width/2.,('Clayton', 'Gaussian', 'Gumbel', 'Frank'))
521 |         plt.legend( (p1[0], p2[0], p3[0], p4[0]), ('Clayton', 'Gaussian', 'Gumbel', 'Frank') )
522 | 
523 |         plt.grid()
524 |         plt.savefig(os.path.join('figures/HELM_performance/', 
525 |                      'HELM_DIM_' + str(N) + '_tau_' + "{0:.2f}".format(tau) + ' _K_' + str(K) + '.png'))
526 |         
527 |         plt.close()
528 | 
529 | 
530 | if __name__=='__main__':
531 |     from copularnd import copularnd
532 |     from invcopulastat import invcopulastat
533 |     from scipy.stats import norm
534 |     from scipy.stats import expon
535 |     import sys
536 |     import matplotlib.pyplot as plt
537 |     import os
538 | 
539 |     # some tests on the copula multinomial signature
540 |     tau = 0.4
541 |     K = 4
542 |     mnsig = copulamnsig('Gumbel',K,'kendall',tau)
543 |     # iterate through mnsig to make sure we add upto 1 as a simple sanity check
544 |     val_total = 0
545 |     for ii in range(0,len(mnsig)):
546 |         val_total = val_total + mnsig[ii]  #['val']
547 |         
548 |     if(np.isclose(val_total, 1.0)):
549 |         print 'CopulaMNSig total probability check passed!'
550 |     else:
551 |         print 'CopulaMNSig total probability check failed!'
552 |     
553 |     
554 |     M = 1000
555 |     N = 2
556 |     
557 |     # Monte-Carlo style simulations to test each copula generation
558 |     numMCSims = 100
559 |     # the families to test against and pick optimal copula
560 |     families = ['Gaussian', 'Clayton', 'Gumbel', 'Frank']
561 |     
562 |     """
563 |     for family in families:
564 |         title = 'Reference Bivariate ' + str(family) + ' Copula - HELM Identification Breakdown'
565 |         results = testHELM(tau, M, N, family, numMCSims, families)
566 |         plotPieChartResults(results, family, title)
567 |     
568 |     N = 3
569 |     for family in families:
570 |         title = 'Reference Bivariate ' + str(family) + ' Copula - HELM Identification Breakdown'
571 |         results = testHELM(tau, M, N, family, numMCSims, families)
572 |         plotPieChartResults(results, family, title)
573 |     """
574 |     #tauVec = np.arange(-0.9,0.95,0.05)
575 |     #resultsAggregate = testHELM_parametric(K,M,N,tauVec)
576 |     
577 |     visualizeMNSig()
578 |     
579 |     
580 |     


--------------------------------------------------------------------------------
/copulapdf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #******************************************************************************
  5 | #* 
  6 | #* Copyright (C) 2015  Kiran Karra <kiran.karra@gmail.com>
  7 | #*
  8 | #* This program is free software: you can redistribute it and/or modify
  9 | #* it under the terms of the GNU General Public License as published by
 10 | #* the Free Software Foundation, either version 3 of the License, or
 11 | #* (at your option) any later version.
 12 | #*
 13 | #* This program is distributed in the hope that it will be useful,
 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | #* GNU General Public License for more details.
 17 | #*
 18 | #* You should have received a copy of the GNU General Public License
 19 | #* along with this program.  If not, see <http://www.gnu.org/licenses/>.
 20 | #******************************************************************************
 21 | 
 22 | import math
 23 | import numpy as np
 24 | 
 25 | from scipy.stats import mvn                     # contains inverse CDF of Multivariate Gaussian
 26 | from scipy.stats import norm                    # contains PDF of Gaussian
 27 | from scipy.stats import t
 28 | from scipy.special import gammaln
 29 | 
 30 | from numpy.linalg import solve
 31 | from numpy.linalg import cholesky
 32 | from numpy.linalg import LinAlgError
 33 | 
 34 | from copulacdf import copulacdf
 35 | 
 36 | """
 37 | copulapdf.py contains routines which provide Copula PDF values 
 38 | """
 39 | 
 40 | def copulapdf(family, u, *args):
 41 |     """ Generates values of a requested copula family
 42 |     
 43 |     Inputs:
 44 |     u -- u is an N-by-P matrix of values in [0,1], representing N
 45 |          points in the P-dimensional unit hypercube.  
 46 |     
 47 |     rho -- a P-by-P correlation matrix, the first argument required for the Gaussian copula
 48 |     alpha -- a scalar argument describing the dependency for Frank, Gumbel, and Clayton copula's
 49 |     
 50 |     Outputs:
 51 |     y -- the value of the Gaussian Copula
 52 |     """
 53 |     n  = u.shape[0]
 54 |     p  = u.shape[1]
 55 | 
 56 |     num_var_args = len(args)
 57 |     family_lc = family.lower()
 58 |     if(family_lc=='gaussian'):
 59 |         if(num_var_args!=1):
 60 |             raise ValueError("Gaussian family requires one additional argument -- rho (correlation matrix) [P x P]")
 61 |         rho = args[0]
 62 |         rho_expected_shape = (p,p)
 63 |         if(type(rho)!=np.ndarray or rho.shape!=rho_expected_shape):
 64 |             raise ValueError("Gaussian family requires rho to be of type numpy.ndarray with shape=[P x P]")
 65 |         y = _gaussian(u, rho)
 66 |         
 67 |     elif(family_lc=='t'):
 68 |         rho = args[0]
 69 |         rho_expected_shape = (p,p)
 70 |         if(type(rho)!=np.ndarray or rho.shape!=rho_expected_shape):
 71 |             raise ValueError("T family requires rho to be of type numpy.ndarray with shape=[P x P]")
 72 |         nu = int(args[1])       # force to be an integer
 73 |         if(nu<1):
 74 |             raise ValueError("T family Degrees of Freedom argument must be an integer >= 1")
 75 |         return _t(u, rho, nu)
 76 |     elif(family_lc=='clayton'):
 77 |         if(num_var_args!=1):
 78 |             raise ValueError("Clayton family requires one additional argument -- alpha [scalar]")
 79 |         alpha = args[0]
 80 |         if(type(alpha)!=float):
 81 |             raise ValueError('Clayton family requires a scalar alpha value')
 82 |         y = _clayton(u, alpha)
 83 |     elif(family_lc=='frank'):
 84 |         if(num_var_args!=1):
 85 |             raise ValueError("Frank family requires one additional argument -- alpha [scalar]")
 86 |         alpha = args[0]
 87 |         if(type(alpha)!=float):
 88 |             raise ValueError('Clayton family requires a scalar alpha value')
 89 |         y = _frank(u, alpha)
 90 |     elif(family_lc=='gumbel'):
 91 |         if(num_var_args!=1):
 92 |             raise ValueError("Gumbel family requires one additional argument -- alpha [scalar]")
 93 |         alpha = args[0]
 94 |         if(type(alpha)!=float):
 95 |             raise ValueError('Clayton family requires a scalar alpha value')
 96 |         y = _gumbel(u, alpha)
 97 |     else:
 98 |         raise ValueError("Unrecognized family of copula")
 99 |     
100 |     return y
101 | 
102 | def _gaussian(u, rho):
103 |     try:
104 |         R = cholesky(rho)
105 |     except LinAlgError:
106 |         raise ValueError('Provided Rho matrix is not Positive Definite!')
107 |     
108 |     x = norm.ppf(u)
109 |     z = solve(R,x.T)
110 |     z = z.T
111 |     logSqrtDetRho = np.sum(np.log(np.diag(R)))
112 |     y = np.exp(-0.5 * np.sum(  np.power(z,2) - np.power(x,2) ,  axis=1  ) - logSqrtDetRho)
113 |     
114 |     return y
115 | 
116 | def _t(u, rho, nu):
117 |     d = u.shape[1]
118 |     nu = float(nu)
119 |     
120 |     try:
121 |         R = cholesky(rho)
122 |     except LinAlgError:
123 |         raise ValueError('Provided Rho matrix is not Positive Definite!')
124 |     
125 |     ticdf = t.ppf(u, nu)
126 |     
127 |     z = solve(R,ticdf.T)
128 |     z = z.T
129 |     logSqrtDetRho = np.sum(np.log(np.diag(R)))
130 |     const = gammaln((nu+d)/2.0) + (d-1)*gammaln(nu/2.0) - d*gammaln((nu+1)/2.0) - logSqrtDetRho
131 |     sq = np.power(z,2)
132 |     summer = np.sum(np.power(z,2),axis=1)
133 |     numer = -((nu+d)/2.0) * np.log(1.0 + np.sum(np.power(z,2),axis=1)/nu)
134 |     denom = np.sum(-((nu+1)/2) * np.log(1 + (np.power(ticdf,2))/nu), axis=1)
135 |     y = np.exp(const + numer - denom)
136 |     
137 |     return y
138 | 
139 | def _clayton(u, alpha):
140 |     n = u.shape[0]
141 |     d = u.shape[1]
142 |     if(d>2):
143 |         raise ValueError('Maximum dimensionality supported is 2 for the Clayton Copula Family')
144 |     if alpha<0:
145 |         raise ValueError('Dependency parameter for Clayton copula must be >= 0')
146 |     elif alpha==0:
147 |         y = np.ones((n,1))
148 |     else:
149 |         # below is the closed form of d2C/dudv of the Clayton copula
150 |         y = (alpha + 1) * np.power( u[:,0]*u[:,1], -1*(alpha+1) ) * np.power( np.power(u[:,0], -alpha) + np.power(u[:,1], -alpha) - 1, -1*(2*alpha+1)/alpha )
151 |     
152 |     return y
153 | 
154 | def _frank(u, alpha):
155 |     if alpha == 0:
156 |         y = ones(n,1);
157 |     else:
158 |         summer = np.sum(u,1)
159 |         differ = np.diff(u,1,1)
160 |         differ = differ[:,0]
161 |         denom = np.power(np.cosh(alpha*differ/2)*2 - np.exp(alpha*(summer-2)/2) - np.exp(-alpha*summer/2), 2)
162 |         y = alpha*(1-np.exp(-alpha)) / denom
163 |         
164 |     return y
165 | 
166 | def _gumbel(U, alpha):
167 |     n = U.shape[0]
168 |     d = U.shape[1]
169 |     if(d>2):
170 |         raise ValueError('Maximum dimensionality supported is 2 for the Gumbel Copula Family')
171 |     
172 |     if(alpha < 1):
173 |         raise ValueError('Bad dependency parameter for Gumbel copula')
174 |     elif alpha==1:
175 |         y = np.ones((n,1))
176 |     else:
177 |         # below is the closed form of d2C/dudv of the Gumbel copula
178 |         C = copulacdf('Gumbel', U, alpha)
179 |         u = U[:,0]
180 |         v = U[:,1]
181 |         p1 = C*1.0/(u*v)*np.power(np.power(-1*np.log(u),alpha) + np.power(-1*np.log(v),alpha), -2.0 + 2.0/alpha)*np.power(np.log(u)*np.log(v),alpha-1.0)
182 |         p2 = 1.0 + (alpha - 1.0)*np.power(np.power(-1*np.log(u),alpha) + np.power(-1*np.log(v),alpha), -1.0/alpha )
183 |         y = p1*p2
184 |     return y
185 | 
186 | def test_python_vs_matlab(family):
187 |     # generate U1, U2
188 |     n = 10
189 |     p = 2
190 |     
191 |     # generate all u1,u2 combinations
192 |     eps = np.finfo(float).eps
193 |     u = np.linspace(0.1,0.9,n)
194 |     UU = np.meshgrid(u,u)
195 |     U2 = np.reshape(UU[0], (UU[0].shape[0]*UU[0].shape[1], 1))
196 |     U1 = np.reshape(UU[1], (UU[1].shape[0]*UU[1].shape[1], 1))
197 |     U = np.concatenate((U1,U2),axis=1)
198 |     
199 |     rho = 0.8
200 |     Rho = np.array([[1,rho],[rho,1]])
201 |     nu = 2
202 |     
203 |     alpha = 0.3
204 |     
205 |     # test the python data against Matlab
206 |     # TODO: make python execute the matlab script which generates these samples
207 |     matlab_data = scipy.io.loadmat('matlab/copulapdf_test.mat')
208 |     
209 |     if(family.lower()=='gaussian'):
210 |         gaussian_copula_pdf_python = copulapdf(family,U,Rho)
211 |         gaussian_copula_pdf_matlab = matlab_data['gaussian_copula_pdf'][:,0]
212 |         
213 |         # compare the two
214 |         gaussian_copula_test_result = np.allclose(gaussian_copula_pdf_python,gaussian_copula_pdf_matlab)
215 |         if(gaussian_copula_test_result):
216 |             print 'Gaussian Copula Python calculation matches Matlab!'
217 |         else:
218 |             print 'Gaussian Copula Python calculation does NOT match Matlab!'
219 |             
220 |         # plot the Copula for fun
221 |         X = UU[0]
222 |         Y = UU[1]
223 |         Z = np.reshape(gaussian_copula_pdf_python,UU[0].shape)
224 |         
225 |         plot_utils.plot_3d(X,Y,Z, 'Gaussian Copula PDF')
226 |     
227 |     elif(family.lower()=='t'):
228 |         t_copula_pdf_python = copulapdf(family,U,Rho,nu)
229 |         t_copula_pdf_matlab = matlab_data['t_copula_pdf'][:,0]
230 |         
231 |         # compare the two
232 |         t_copula_test_result = np.allclose(t_copula_pdf_python,t_copula_pdf_matlab)
233 |         if(t_copula_test_result):
234 |             print 'T Copula Python calculation matches Matlab!'
235 |         else:
236 |             print 'T Copula Python calculation does NOT match Matlab!'
237 |             
238 |         # plot the Copula for fun
239 |         X = UU[0]
240 |         Y = UU[1]
241 |         Z = np.reshape(t_copula_pdf_python,UU[0].shape)
242 |         
243 |         plot_utils.plot_3d(X,Y,Z, 'T Copula PDF')
244 |         
245 |     elif(family.lower()=='clayton'):
246 |         clayton_copula_pdf_python = copulapdf(family,U,alpha)
247 |         clayton_copula_pdf_matlab = matlab_data['clayton_copula_pdf'][:,0]
248 |         
249 |         # compare the two
250 |         clayton_copula_test_result = np.allclose(clayton_copula_pdf_python,clayton_copula_pdf_matlab)
251 |         if(clayton_copula_test_result):
252 |             print 'Clayton Copula Python calculation matches Matlab!'
253 |         else:
254 |             print 'Clayton Copula Python calculation does NOT match Matlab!'
255 |             
256 |         # plot the Copula for fun
257 |         X = UU[0]
258 |         Y = UU[1]
259 |         Z = np.reshape(clayton_copula_pdf_python,UU[0].shape)
260 |         
261 |         plot_utils.plot_3d(X,Y,Z, 'Clayton Copula PDF')
262 |         
263 |     elif(family.lower()=='gumbel'):
264 |         alpha = 1.5
265 |         gumbel_copula_pdf_python = copulapdf(family,U,alpha)
266 |         gumbel_copula_pdf_matlab = matlab_data['gumbel_copula_pdf'][:,0]
267 |         
268 |         # compare the two
269 |         gumbel_copula_test_result = np.allclose(gumbel_copula_pdf_python,gumbel_copula_pdf_matlab)
270 |         if(gumbel_copula_test_result):
271 |             print 'Gumbel Copula Python calculation matches Matlab!'
272 |         else:
273 |             print 'Gumbel Copula Python calculation does NOT match Matlab!'
274 |             
275 |         # plot the Copula for fun
276 |         X = UU[0]
277 |         Y = UU[1]
278 |         Z = np.reshape(gumbel_copula_pdf_python,UU[0].shape)
279 |         
280 |         plot_utils.plot_3d(X,Y,Z, 'Gumbel Copula PDF')
281 |         
282 |     elif(family.lower()=='frank'):
283 |         frank_copula_pdf_python = copulapdf(family,U,alpha)
284 |         frank_copula_pdf_matlab = matlab_data['frank_copula_pdf'][:,0]
285 |         
286 |         # compare the two
287 |         frank_copula_test_result = np.allclose(frank_copula_pdf_python,frank_copula_pdf_matlab)
288 |         if(frank_copula_test_result):
289 |             print 'Frank Copula Python calculation matches Matlab!'
290 |         else:
291 |             print 'Frank Copula Python calculation does NOT match Matlab!'
292 |             
293 |         # plot the Copula for fun
294 |         X = UU[0]
295 |         Y = UU[1]
296 |         Z = np.reshape(frank_copula_pdf_python,UU[0].shape)
297 |         
298 |         plot_utils.plot_3d(X,Y,Z, 'Frank Copula PDF')
299 |     
300 | if __name__=='__main__':
301 |     import scipy.io
302 |     import plot_utils
303 |     
304 |     test_python_vs_matlab('Gaussian')
305 |     test_python_vs_matlab('T')
306 |     test_python_vs_matlab('Clayton')
307 |     test_python_vs_matlab('Gumbel')
308 |     test_python_vs_matlab('Frank')


--------------------------------------------------------------------------------
/copularnd.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #******************************************************************************
  5 | #* 
  6 | #* Copyright (C) 2015  Kiran Karra <kiran.karra@gmail.com>
  7 | #*
  8 | #* This program is free software: you can redistribute it and/or modify
  9 | #* it under the terms of the GNU General Public License as published by
 10 | #* the Free Software Foundation, either version 3 of the License, or
 11 | #* (at your option) any later version.
 12 | #*
 13 | #* This program is distributed in the hope that it will be useful,
 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | #* GNU General Public License for more details.
 17 | #*
 18 | #* You should have received a copy of the GNU General Public License
 19 | #* along with this program.  If not, see <http://www.gnu.org/licenses/>.
 20 | #******************************************************************************
 21 | 
 22 | import math
 23 | import numpy as np
 24 | 
 25 | import sys
 26 | 
 27 | from scipy.stats import norm                    # contains PDF of Gaussian
 28 | from scipy.stats import multivariate_normal
 29 | from scipy.stats import uniform
 30 | from scipy.stats import gamma
 31 | from scipy.stats import logser
 32 | from scipy.stats import t
 33 | from rstable1 import rstable1
 34 | from statsmodels.sandbox.distributions import multivariate as mvt
 35 | 
 36 | import scipy.io as sio
 37 | 
 38 | """
 39 | copularnd.py contains routines which provide samples of a copula density
 40 | """
 41 | def copularnd(family, M, *args):
 42 |     """ Generates values of the Gaussian copula
 43 |     
 44 |     Inputs:
 45 |     family -- Should be either 'gaussian', 't', 'clayton', 'frank', or 'gumbel'
 46 |     M      -- the number of samples to generate
 47 |     args   -- variable number of arguments depending on which type of copula you are trying to simulate
 48 |                 Gaussian -- should be provided a NxN rho matrix as a numpy array datatype
 49 |                 t        -- should be provided a NxN rho matrix and a nu value
 50 |                 Clayton/Frank/Gumbel - should be provided a N for the dimensionality, and a scalar alpha value
 51 |     
 52 |     Outputs:
 53 |     U -- a M x N matrix of samples from the copula density chosen
 54 |     """
 55 | 
 56 |     num_var_args = len(args)
 57 |     family_lc = family.lower()
 58 |     if(family_lc=='gaussian'):
 59 |         if(num_var_args!=1):
 60 |             raise ValueError("Gaussian family requires one additional argument -- rho (correlation matrix) [P x P]")
 61 |         rho = args[0]
 62 |         shape0 = rho.shape[0]
 63 |         shape1 = rho.shape[1]
 64 |         if(shape0!=shape1):
 65 |             raise ValueError("Gaussian family requires rho to be of type numpy.ndarray with shape=[P x P]")
 66 |         U = _gaussian(M, rho)
 67 |     elif(family_lc=='t'):
 68 |         if(num_var_args!=2):
 69 |             raise ValueError("T family requires two additional argument -- rho (correlation matrix) [P x P] and nu [scalar]")
 70 |         rho = args[0]
 71 |         shape0 = rho.shape[0]
 72 |         shape1 = rho.shape[1]
 73 |         if(shape0!=shape1):
 74 |             raise ValueError("T family requires rho to be of type numpy.ndarray with shape=[P x P]")
 75 |         nu = args[1]
 76 |         U = _t(M, rho, nu)
 77 |     elif(family_lc=='clayton'):
 78 |         if(num_var_args!=2):
 79 |             raise ValueError("Clayton family requires two additional arguments -- N, alpha [scalar]")
 80 |         N     = args[0]
 81 |         alpha = args[1]
 82 |         U = _clayton(M, N, alpha)
 83 |     elif(family_lc=='frank'):
 84 |         if(num_var_args!=2):
 85 |             raise ValueError("Frank family requires two additional arguments -- N, alpha [scalar]")
 86 |         N     = args[0]
 87 |         alpha = args[1]
 88 |         U = _frank(M, N, alpha)
 89 |     elif(family_lc=='gumbel'):
 90 |         if(num_var_args!=2):
 91 |             raise ValueError("Gumbel family requires two additional arguments -- N, alpha [scalar]")
 92 |         N     = args[0]
 93 |         alpha = args[1]
 94 |         U = _gumbel(M, N, alpha)
 95 |     else:
 96 |         raise ValueError("Unrecognized family of copula")
 97 |     
 98 |     return U
 99 | 
100 | def _gaussian(M, Rho):
101 |     """
102 |     Generates samples from the Gaussian Copula, w/ dependency
103 |     matrix described by Rho.  Rho should be a numpy square matrix.
104 |     It is assumed that we have a 0 mean.
105 |     """
106 |     N = Rho.shape[0]
107 |     mu = np.zeros(N)
108 |     y = multivariate_normal(mu,Rho)
109 |     mvnData = y.rvs(size=M)
110 |     U = norm.cdf(mvnData)
111 |     
112 |     return U
113 |     
114 | def _t(M, Rho, nu):
115 |     N = Rho.shape[0]
116 |     mu = np.zeros(N)        # zero mean
117 |     x = mvt.multivariate_t_rvs(mu,Rho,nu,M) # generate T RV's
118 |     U = t.cdf(x, nu)
119 |     
120 |     return U
121 | 
122 | # We generate the Archimedean Copula's as follows:
123 | # Random pairs from these copulae can be generated sequentially: first
124 | # generate u1 as a uniform r.v.  Then generate u2 from the conditional
125 | # distribution F(u2 | u1; alpha) by generating uniform random values, then
126 | # inverting the conditional CDF.
127 | # This method is outlined in Nelsen's Introduction to Copula's
128 | 
129 | def _clayton(M, N, alpha):
130 |     if(alpha<0):
131 |         raise ValueError('Alpha must be >=0 for Clayton Copula Family')
132 |     if(N<2):
133 |         raise ValueError('Dimensionality Argument [N] must be an integer >= 2')
134 |     elif(N==2):
135 |         u1 = uniform.rvs(size=M)
136 |         p = uniform.rvs(size=M)
137 |         if(alpha<np.spacing(1)):
138 |             u2 = p
139 |         else:
140 |             u2 = u1*np.power((np.power(p,(-alpha/(1.0+alpha))) - 1 + np.power(u1,alpha)),(-1.0/alpha))
141 |         
142 |         U = np.column_stack((u1,u2))
143 |     else:
144 |         # Algorithm 1 described in both the SAS Copula Procedure, as well as the
145 |         # paper: "High Dimensional Archimedean Copula Generation Algorithm"
146 |         U = np.empty((M,N))
147 |         for ii in range(0,M):
148 |             shape = 1.0/alpha
149 |             loc = 0
150 |             scale = 1
151 |             v = gamma.rvs(shape)
152 |             
153 |             # sample N independent uniform random variables
154 |             x_i = uniform.rvs(size=N)
155 |             t = -1*np.log(x_i)/v
156 |             if(alpha<0):
157 |                 tmp = np.maximum(0, 1.0-t)
158 |             else:
159 |                 tmp = 1.0 + t
160 |             
161 |             U[ii,:] = np.power(tmp, -1.0/alpha)
162 | 
163 |     return U
164 | 
165 | def _frank(M, N, alpha):
166 |     if(N<2):
167 |         raise ValueError('Dimensionality Argument [N] must be an integer >= 2')
168 |     elif(N==2):        
169 |         u1 = uniform.rvs(size=M)
170 |         p = uniform.rvs(size=M)
171 |         if abs(alpha) > math.log(sys.float_info.max):
172 |             u2 = (u1 < 0).astype(int) + np.sign(alpha)*u1  # u1 or 1-u1
173 |         elif abs(alpha) > math.sqrt(np.spacing(1)):
174 |             u2 = -1*np.log((np.exp(-alpha*u1)*(1-p)/p + np.exp(-alpha))/(1 + np.exp(-alpha*u1)*(1-p)/p))/alpha
175 |         else:
176 |             u2 = p
177 |         
178 |         U = np.column_stack((u1,u2))
179 |     else:
180 |         # Algorithm 1 described in both the SAS Copula Procedure, as well as the
181 |         # paper: "High Dimensional Archimedean Copula Generation Algorithm"
182 |         if(alpha<=0):
183 |             raise ValueError('For N>=3, alpha >0 in Frank Copula')
184 |             
185 |         U = np.empty((M,N))
186 |         #v_vec = np.empty(M)
187 |         for ii in range(0,M):
188 |             p = -1.0*np.expm1(-1*alpha)
189 |             if(p==1):
190 |                 # boundary case protection
191 |                 p = 1 - np.spacing(1)
192 |             v = logser.rvs(p, size=1)
193 |             #v_vec[ii] = v
194 |             # sample N independent uniform random variables
195 |             x_i = uniform.rvs(size=N)
196 |             t = -1*np.log(x_i)/v
197 |             U[ii,:] = -1.0*np.log1p( np.exp(-t)*np.expm1(-1.0*alpha))/alpha
198 |             
199 |         #sio.savemat('logser_v.mat', {'v':v_vec})
200 |             
201 |     return U
202 | 
203 | def _gumbel(M, N, alpha):
204 |     if alpha < 1:
205 |         raise ValueError('Alpha must be >=1 for Gumbel Copula Family!')
206 |     if(N<2):
207 |         raise ValueError('Dimensionality Argument [N] must be an integer >= 2')
208 |     elif(N==2):
209 |         if alpha < (1 + math.sqrt(np.spacing(1))):
210 |             u1 = uniform.rvs(size=M);
211 |             u2 = uniform.rvs(size=M);
212 |         else:
213 |             # use the Marshal-Olkin method
214 |             # Generate gamma as Stable(1/alpha,1), c.f. Devroye, Thm. IV.6.7
215 |             u = (uniform.rvs(size=M) - .5) * math.pi # Generate M uniformly distributed RV's between -pi/2 and pi/2
216 |             u2 = u + math.pi/2
217 |             e  = -1*np.log(uniform.rvs(size=M))
218 |             t = np.cos(u - u2/alpha)/e
219 |             gamma = np.power(np.sin(u2/alpha)/t,(1.0/alpha)) * t/np.cos(u);
220 |             
221 |             # Frees&Valdez, eqn 3.5
222 |             u1 = np.exp(-1* (np.power(-1*np.log(uniform.rvs(size=M)), 1.0/alpha) / gamma) )
223 |             u2 = np.exp(-1* (np.power(-1*np.log(uniform.rvs(size=M)), 1.0/alpha) / gamma) )
224 |             
225 |         U = np.column_stack((u1,u2))
226 |     else:
227 |         # Algorithm 1 described in both the SAS Copula Procedure, as well as the
228 |         # paper: "High Dimensional Archimedean Copula Generation Algorithm"
229 |         U = np.empty((M,N))
230 |         #v_vec = np.empty(M)
231 |         for ii in range(0,M):
232 |             a  = 1.0/alpha
233 |             b  = 1
234 |             g  = np.power(np.cos(math.pi/(2.0*alpha)), alpha)
235 |             d  = 0
236 |             pm = 1
237 |             v = rstable1(1,a,b,g,d,pm)
238 |             #v_vec[ii] = v
239 |             # sample N independent uniform random variables
240 |             x_i = uniform.rvs(size=N)
241 |             t = -1*np.log(x_i)/v
242 |             
243 |             U[ii,:] = np.exp(-1*np.power(t, 1.0/alpha))
244 |         
245 |         #sio.savemat('gamma_v.mat', {'v':v_vec})
246 |         
247 |     return U
248 | 
249 | 
250 | if __name__=='__main__':
251 |     import matplotlib.pyplot as plt
252 |     from plot_utils import pairs
253 |     M = 1000
254 |     rh = 0.6
255 |     Rho = np.array([[1,rh],[rh,1]])
256 |     nu = 2
257 |     N = 2
258 |     alpha = 5
259 |     
260 |     # Generate 2-D Copula RV's
261 |     Ug2d = copularnd('gaussian', M, Rho)
262 |     Ut2d = copularnd('t', M, Rho, nu)
263 |     Uc2d  = copularnd('clayton', M, N, alpha)
264 |     Uf2d  = copularnd('frank', M, N, alpha)
265 |     Ugu2d = copularnd('gumbel', M, N, alpha)
266 |     
267 |     # Generate 3-D Copula RV's
268 |     N = 3
269 |     Rho = np.array([[1,rh,rh],[rh,1,rh],[rh,rh,1]])
270 |     Ug3d = copularnd('gaussian', M, Rho)
271 |     Ut3d = copularnd('t', M, Rho, nu)
272 |     Ugu3d = copularnd('gumbel',M,N,alpha)
273 |     Uf3d = copularnd('frank',M,N,alpha)
274 |     Uc3d = copularnd('clayton',M,N,alpha)
275 |     
276 |     # plots
277 |     pairs(Ug2d, 'Gaussian')
278 |     pairs(Ut2d, 'T')
279 |     pairs(Uc2d, 'Clayton')
280 |     pairs(Uf2d, 'Frank')
281 |     pairs(Ugu2d, 'Gumbel')
282 |         
283 |     pairs(Ug3d, 'Gaussian')
284 |     pairs(Ut3d, 'T')
285 |     pairs(Uc3d, 'Clayton')
286 |     pairs(Uf3d, 'Frank')
287 |     pairs(Ugu3d, 'Gumbel')
288 |     


--------------------------------------------------------------------------------
/copulastat.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #******************************************************************************
  5 | #* 
  6 | #* Copyright (C) 2015  Kiran Karra <kiran.karra@gmail.com>
  7 | #*
  8 | #* This program is free software: you can redistribute it and/or modify
  9 | #* it under the terms of the GNU General Public License as published by
 10 | #* the Free Software Foundation, either version 3 of the License, or
 11 | #* (at your option) any later version.
 12 | #*
 13 | #* This program is distributed in the hope that it will be useful,
 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | #* GNU General Public License for more details.
 17 | #*
 18 | #* You should have received a copy of the GNU General Public License
 19 | #* along with this program.  If not, see <http://www.gnu.org/licenses/>.
 20 | #******************************************************************************
 21 | 
 22 | import math
 23 | import numpy as np
 24 | 
 25 | from debye import debye
 26 | 
 27 | """
 28 | copulastat.py contains routines which provide copula dependency measures
 29 | the copula family type and the copula's specific dependency parameter.
 30 | 
 31 | The relationships used in the functions are well known, and documented in
 32 | many copula research papers, including Nelsen's Introduction to Copula's.
 33 | """
 34 | 
 35 | def copulastat(family, dependency, *args):
 36 |     dependency_lc = dependency.lower()
 37 |     if(dependency_lc!='kendall' and dependency_lc!='spearman'):
 38 |         raise ValueError('Invalid dependency argument -- must be kendall or spearman')
 39 |     dep_param = args[0]
 40 |     if(family.lower()=='gaussian'):
 41 |         r = _gaussian(dependency_lc, dep_param)
 42 |     elif(family.lower()=='t'):
 43 |         nu = args[1]
 44 |         r = _t(dependency_lc, dep_param, nu)
 45 |     elif(family.lower()=='clayton'):
 46 |         if(dep_param<0):
 47 |             raise ValueError('Invalid alpha value for Clayton Copula!')
 48 |         r = _clayton(dependency_lc, dep_param)
 49 |     elif(family.lower()=='gumbel'):
 50 |         r = _gumbel(dependency_lc, dep_param)
 51 |     elif(family.lower()=='frank'):
 52 |         r = _frank(dependency_lc, dep_param)
 53 |     else:
 54 |         raise ValueError('Unsupported Copula Family!')
 55 |     
 56 |     return r
 57 | 
 58 | def _gaussian(dependency, rho):
 59 |     if(dependency=='kendall'):
 60 |         r = 2*np.arcsin(rho)/math.pi
 61 |     elif(dependency=='spearman'):
 62 |         r = 6*np.arcsin(rho/2)/math.pi
 63 |     return r
 64 | 
 65 | def _t(dependency, rho, nu):
 66 |     if(dependency=='kendall'):
 67 |         r = 2*np.arcsin(rho)/math.pi
 68 |     elif(dependency=='spearman'):
 69 |         # we use nu in spearman's rho
 70 |         raise NotImplementedError('Spearmans Rho currently unsupported for T Copula')
 71 | 
 72 | def _clayton(dependency, alpha):
 73 |     if(dependency=='kendall'):
 74 |         r = alpha / (2 + alpha)
 75 |     elif(dependency=='spearman'):
 76 |         a = -0.1002
 77 |         b = 0.1533
 78 |         c = -0.5024
 79 |         d = -0.05629
 80 |         poly_coefs = [a,b,c,d,-1*(a+b+c+d-1),0]
 81 |         r = np.polyval(poly_coefs, alpha/(2+alpha))
 82 |     
 83 |     return r
 84 | 
 85 | def _gumbel(dependency, alpha):
 86 |     if(dependency=='kendall'):
 87 |         r = 1 - 1/alpha
 88 |     elif(dependency=='spearman'):
 89 |         a = -.2015
 90 |         b = .4208
 91 |         c = .2429
 92 |         d = -1.453
 93 |         poly_coefs = [a,b,c,d,-1*(a+b+c+d+1),1]
 94 |         r = np.polyval(poly_coefs, 1/alpha)
 95 |     
 96 |     return r
 97 | 
 98 | def _frank(dependency, alpha):
 99 |     if(dependency=='kendall'):
100 |         r = 1 + 4 * (debye(alpha,1)-1) / alpha
101 |     elif(dependency=='spearman'):
102 |         r = 1 + 12 * (debye(alpha,2) - debye(alpha,1)) / alpha
103 |     
104 |     return r
105 | 
106 | def test_python_vs_matlab(family):
107 |     # test the python data against Matlab
108 |     # TODO: make python execute the matlab script which generates these samples
109 |     matlab_data = scipy.io.loadmat('matlab/copulastat_test.mat')
110 |     
111 |     if(family.lower()=='gaussian'):
112 |         rho = 0.3
113 |         gauss_ktau_rho_0_3_python = copulastat(family,'kendall',rho)
114 |         gauss_srho_rho_0_3_python = copulastat(family,'spearman',rho)
115 |         rho = 0.7
116 |         gauss_ktau_rho_0_7_python = copulastat(family,'kendall',rho)
117 |         gauss_srho_rho_0_7_python = copulastat(family,'spearman',rho)
118 |         rho = 1.0
119 |         gauss_ktau_rho_1_0_python = copulastat(family,'kendall',rho)
120 |         gauss_srho_rho_1_0_python = copulastat(family,'spearman',rho)
121 |         
122 |         p1 = np.isclose(gauss_ktau_rho_0_3_python, matlab_data['gauss_ktau_rho_0_3'])
123 |         p2 = np.isclose(gauss_srho_rho_0_3_python, matlab_data['gauss_srho_rho_0_3'])
124 |         p3 = np.isclose(gauss_ktau_rho_0_7_python, matlab_data['gauss_ktau_rho_0_7'])
125 |         p4 = np.isclose(gauss_srho_rho_0_7_python, matlab_data['gauss_srho_rho_0_7'])
126 |         p5 = np.isclose(gauss_ktau_rho_1_0_python, matlab_data['gauss_ktau_rho_1_0'])
127 |         p6 = np.isclose(gauss_srho_rho_1_0_python, matlab_data['gauss_srho_rho_1_0'])
128 |         
129 |         if(p1 and p2 and p3 and p4 and p5 and p6):
130 |             print 'Gaussian CopulaStat tests PASSED!'
131 |         else:
132 |             print 'Gaussian CopulaStat tests FAILED!'
133 |     elif(family.lower()=='t'):
134 |         pass
135 |     elif(family.lower()=='clayton'):
136 |         alpha = 0.3
137 |         clayton_ktau_alpha_0_3_python = copulastat(family,'kendall',alpha)
138 |         clayton_srho_alpha_0_3_python = copulastat(family,'spearman',alpha)
139 |         alpha = 0.7
140 |         clayton_ktau_alpha_0_7_python = copulastat(family,'kendall',alpha)
141 |         clayton_srho_alpha_0_7_python = copulastat(family,'spearman',alpha)
142 |         alpha = 1.0
143 |         clayton_ktau_alpha_1_0_python = copulastat(family,'kendall',alpha)
144 |         clayton_srho_alpha_1_0_python = copulastat(family,'spearman',alpha)
145 |         
146 |         p1 = np.isclose(clayton_ktau_alpha_0_3_python, matlab_data['clayton_ktau_alpha_0_3'])
147 |         p2 = np.isclose(clayton_srho_alpha_0_3_python, matlab_data['clayton_srho_alpha_0_3'])
148 |         p3 = np.isclose(clayton_ktau_alpha_0_7_python, matlab_data['clayton_ktau_alpha_0_7'])
149 |         p4 = np.isclose(clayton_srho_alpha_0_7_python, matlab_data['clayton_srho_alpha_0_7'])
150 |         p5 = np.isclose(clayton_ktau_alpha_1_0_python, matlab_data['clayton_ktau_alpha_1_0'])
151 |         p6 = np.isclose(clayton_srho_alpha_1_0_python, matlab_data['clayton_srho_alpha_1_0'])
152 |         
153 |         if(p1 and p2 and p3 and p4 and p5 and p6):
154 |             print 'Clayton CopulaStat tests PASSED!'
155 |         else:
156 |             print 'Clayton CopulaStat tests FAILED!'
157 |     elif(family.lower()=='gumbel'):
158 |         alpha = 1.0
159 |         gumbel_ktau_alpha_1_0_python = copulastat(family,'kendall',alpha)
160 |         gumbel_srho_alpha_1_0_python = copulastat(family,'spearman',alpha)
161 |         alpha = 3.0
162 |         gumbel_ktau_alpha_3_0_python = copulastat(family,'kendall',alpha)
163 |         gumbel_srho_alpha_3_0_python = copulastat(family,'spearman',alpha)
164 |         
165 |         p1 = np.isclose(gumbel_ktau_alpha_1_0_python, matlab_data['gumbel_ktau_alpha_1_0'])
166 |         p2 = np.isclose(gumbel_srho_alpha_1_0_python, matlab_data['gumbel_srho_alpha_1_0'])
167 |         p3 = np.isclose(gumbel_ktau_alpha_3_0_python, matlab_data['gumbel_ktau_alpha_3_0'])
168 |         p4 = np.isclose(gumbel_srho_alpha_3_0_python, matlab_data['gumbel_srho_alpha_3_0'])
169 |         
170 |         if(p1 and p2 and p3 and p4):
171 |             print 'Gumbel CopulaStat tests PASSED!'
172 |         else:
173 |             print 'Gumbel CopulaStat tests FAILED!'
174 |     elif(family.lower()=='frank'):
175 |         alpha = 0.3
176 |         frank_ktau_alpha_0_3_python = copulastat(family,'kendall',alpha)
177 |         frank_srho_alpha_0_3_python = copulastat(family,'spearman',alpha)
178 |         alpha = 0.7
179 |         frank_ktau_alpha_0_7_python = copulastat(family,'kendall',alpha)
180 |         frank_srho_alpha_0_7_python = copulastat(family,'spearman',alpha)
181 |         alpha = 1.0
182 |         frank_ktau_alpha_1_0_python = copulastat(family,'kendall',alpha)
183 |         frank_srho_alpha_1_0_python = copulastat(family,'spearman',alpha)
184 |         
185 |         p1 = np.isclose(frank_ktau_alpha_0_3_python, matlab_data['frank_ktau_alpha_0_3'])
186 |         p2 = np.isclose(frank_srho_alpha_0_3_python, matlab_data['frank_srho_alpha_0_3'])
187 |         p3 = np.isclose(frank_ktau_alpha_0_7_python, matlab_data['frank_ktau_alpha_0_7'])
188 |         p4 = np.isclose(frank_srho_alpha_0_7_python, matlab_data['frank_srho_alpha_0_7'])
189 |         p5 = np.isclose(frank_ktau_alpha_1_0_python, matlab_data['frank_ktau_alpha_1_0'])
190 |         p6 = np.isclose(frank_srho_alpha_1_0_python, matlab_data['frank_srho_alpha_1_0'])
191 |         
192 |         if(p1 and p2 and p3 and p4 and p5 and p6):
193 |             print 'Frank CopulaStat tests PASSED!'
194 |         else:
195 |             print 'Frank CopulaStat tests FAILED!'
196 |     
197 | if __name__=='__main__':
198 |     import scipy.io
199 |     
200 |     test_python_vs_matlab('Gaussian')
201 |     test_python_vs_matlab('Clayton')
202 |     test_python_vs_matlab('Gumbel')
203 |     test_python_vs_matlab('Frank')
204 | 


--------------------------------------------------------------------------------
/cvolume.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #******************************************************************************
  5 | #* 
  6 | #* Copyright (C) 2015  Kiran Karra <kiran.karra@gmail.com>
  7 | #*
  8 | #* This program is free software: you can redistribute it and/or modify
  9 | #* it under the terms of the GNU General Public License as published by
 10 | #* the Free Software Foundation, either version 3 of the License, or
 11 | #* (at your option) any later version.
 12 | #*
 13 | #* This program is distributed in the hope that it will be useful,
 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | #* GNU General Public License for more details.
 17 | #*
 18 | #* You should have received a copy of the GNU General Public License
 19 | #* along with this program.  If not, see <http://www.gnu.org/licenses/>.
 20 | #******************************************************************************
 21 | 
 22 | import math
 23 | import numpy as np
 24 | 
 25 | from invcopulastat import invcopulastat
 26 | from copulacdf import copulacdf
 27 | 
 28 | def cvolume(family, u1v1, u1v2, u2v1, u2v2, *args):
 29 |     """
 30 |     Computes the C-Volume of a specified copula family with dependency parameter
 31 |     defined in the args.
 32 |       family - the copula type, must be:
 33 |         'Gaussian'
 34 |         'T'
 35 |         'Clayton'
 36 |         'Frank'
 37 |         'Gumbel'
 38 |       u1v1 - a N x 2 matrix of values between [0,1] that represents the bottom
 39 |              left coordinate of the grid for which the C-Volume is desired
 40 |       u1v2 - a N x 2 matrix of values between [0,1] that represent the top
 41 |              left coordinate of the grid for which the C-Volume is desired
 42 |       u2v1 - a N x 2 matrix of values between [0,1] that represent the bottom
 43 |              right coordinate of the grid for which the C-volume is desired
 44 |       u2v2 - a N x 2 matrix of values between [0,1] that represents the top
 45 |              right coordinate of the grid for which the C-Volume is desired
 46 |       args - must be atleast of length 2, for which the first element in args
 47 |              is expected to be a string which describes the dependency value
 48 |              being provided, must be one of the following:
 49 |         'kendall' - means kendall's Tau is being provided
 50 |         'spearman' - means spearman's rho is being provided
 51 |         'native' - means that the dependency parameter of the copula family
 52 |                    itself is being provided directly
 53 |              the second argmuent  must be the value of the dependency type 
 54 |             provided. For kendall and spearman, a scalar value is expected.  
 55 |             For native, if the family type is Frank, Gumbel, or Clayton, then 
 56 |             a scalar value is expected, which represents the dependency
 57 |             parameter.  If the family type is Gaussian, then a 2 x 2 numpy array
 58 |             is expected, which represents the correlation matrix defining the
 59 |             Gaussian copula.  If the family is T, then the 2nd argument is the
 60 |             2x2 numpy array representing the correlation matrix, and the 3rd
 61 |             argument is the degrees of freedom
 62 |     """    
 63 |     family_lc = family.lower()
 64 |     if(family_lc=='gaussian'):
 65 |         if(len(args)<2):
 66 |             raise ValueError("Gaussian Family expects 2 variable arguments, the dependency type and value")
 67 |         if(args[0]=='kendall' or args[0]=='spearman'):
 68 |             # get the correlation parameter
 69 |             r = invcopulastat(family, args[0], args[1])
 70 |         else:
 71 |             r = args[1]
 72 |         
 73 |         cvol = _gaussian(u1v1, u1v2, u2v1, u2v2, r)
 74 |     elif(family_lc=='t'):
 75 |         if(len(args)<2):
 76 |             raise ValueError("T Family expects atleast 2 variable arguments, the dependency type and value")
 77 |         
 78 |         if(args[0]=='kendall' or args[0]=='spearman'):
 79 |             raise ValueError('T Family does not accept Kendalls Tau or Spearmans Rho, only native parameters')
 80 |         else:
 81 |             r = args[1]
 82 |             nu = args[2]
 83 |             
 84 |             cvol = _gaussian(u1v1, u1v2, u2v1, u2v2, r, nu)
 85 |             
 86 |     elif(family_lc=='clayton'):
 87 |         if(len(args)<2):
 88 |             raise ValueError("Clayton Family expects 2 variable arguments, the dependency type and value")
 89 |         
 90 |         if(args[0]=='kendall' or args[0]=='spearman'):
 91 |             # get the correlation parameter and degrees of freedom
 92 |             alpha = invcopulastat(family, args[0], args[1])
 93 |         else:
 94 |             alpha = args[1]
 95 |         
 96 |         cvol = _clayton(u1v1, u1v2, u2v1, u2v2, alpha)
 97 |         
 98 |     elif(family_lc=='frank'):
 99 |         if(len(args)<2):
100 |             raise ValueError("Frank Family expects 2 variable arguments, the dependency type and value")
101 |         if(args[0]=='kendall' or args[0]=='spearman'):
102 |             # get the correlation parameter and degrees of freedom
103 |             alpha = invcopulastat(family, args[0], args[1])
104 |         else:
105 |             alpha = args[1]
106 |         
107 |         cvol = _frank(u1v1, u1v2, u2v1, u2v2, alpha)
108 | 
109 |     elif(family_lc=='gumbel'):
110 |         if(len(args)<2):
111 |             raise ValueError("Gumbel Family expects 2 variable arguments, the dependency type and value")
112 |         if(args[0]=='kendall' or args[0]=='spearman'):
113 |             # get the correlation parameter and degrees of freedom
114 |             alpha = invcopulastat(family, args[0], args[1])
115 |         else:
116 |             alpha = args[1]
117 |         
118 |         cvol = _gumbel(u1v1, u1v2, u2v1, u2v2, alpha)
119 | 
120 |     return cvol
121 | 
122 |     
123 | def _gaussian(u1v1, u1v2, u2v1, u2v2, r):
124 |     # generate the Rho matrix from r
125 |     Rho = np.ones((2,2))
126 |     Rho[0][1] = r
127 |     Rho[1][0] = r
128 |     
129 |     # this is the equation for C Volume as defined by Nelsen
130 |     cvol = copulacdf('Gaussian', u2v2, Rho) - \
131 |            copulacdf('Gaussian', u2v1, Rho) - \
132 |            copulacdf('Gaussian', u1v2, Rho) + \
133 |            copulacdf('Gaussian', u1v1, Rho) 
134 |     
135 |     return cvol
136 | 
137 | def _t(u1v1, u1v2, u2v1, u2v2, r, nu):
138 |     # generate the Rho matrix from r
139 |     Rho = np.ones((2,2))
140 |     Rho[0][1] = r
141 |     Rho[1][0] = r
142 |     
143 |     # this is the equation for C Volume as defined by Nelsen
144 |     cvol = copulacdf('T', u2v2, Rho, nu) - \
145 |            copulacdf('T', u2v1, Rho, nu) - \
146 |            copulacdf('T', u1v2, Rho, nu) + \
147 |            copulacdf('T', u1v1, Rho, nu) 
148 |     
149 |     return cvol
150 |     
151 |     return None
152 | 
153 | def _clayton(u1v1, u1v2, u2v1, u2v2, alpha):
154 |     
155 |     # this is the equation for C Volume as defined by Nelsen
156 |     cvol = copulacdf('Clayton', u2v2, alpha) - \
157 |            copulacdf('Clayton', u2v1, alpha) - \
158 |            copulacdf('Clayton', u1v2, alpha) + \
159 |            copulacdf('Clayton', u1v1, alpha) 
160 |     
161 |     return cvol
162 | 
163 | def _frank(u1v1, u1v2, u2v1, u2v2, alpha):
164 |     
165 |     # this is the equation for C Volume as defined by Nelsen
166 |     cvol = copulacdf('Frank', u2v2, alpha) - \
167 |            copulacdf('Frank', u2v1, alpha) - \
168 |            copulacdf('Frank', u1v2, alpha) + \
169 |            copulacdf('Frank', u1v1, alpha) 
170 |     
171 |     return cvol
172 | 
173 | def _gumbel(u1v1, u1v2, u2v1, u2v2, alpha):
174 |     
175 |     # this is the equation for C Volume as defined by Nelsen
176 |     cvol = copulacdf('Gumbel', u2v2, alpha) - \
177 |            copulacdf('Gumbel', u2v1, alpha) - \
178 |            copulacdf('Gumbel', u1v2, alpha) + \
179 |            copulacdf('Gumbel', u1v1, alpha) 
180 |     
181 |     return cvol
182 | 


--------------------------------------------------------------------------------
/debye.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | #******************************************************************************
 5 | #* 
 6 | #* Copyright (C) 2015  Kiran Karra <kiran.karra@gmail.com>
 7 | #*
 8 | #* This program is free software: you can redistribute it and/or modify
 9 | #* it under the terms of the GNU General Public License as published by
10 | #* the Free Software Foundation, either version 3 of the License, or
11 | #* (at your option) any later version.
12 | #*
13 | #* This program is distributed in the hope that it will be useful,
14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | #* GNU General Public License for more details.
17 | #*
18 | #* You should have received a copy of the GNU General Public License
19 | #* along with this program.  If not, see <http://www.gnu.org/licenses/>.
20 | #******************************************************************************
21 | 
22 | import numpy as np
23 | import scipy.integrate as integrate
24 | 
25 | def debye(x, n):
26 |     """
27 |     Evaluate the Debye function.
28 |     See http://en.wikipedia.org/wiki/Debye_function for details
29 |     """
30 |     
31 |     # ensure n is a float
32 |     n = float(n)
33 |     
34 |     sol = integrate.quad( lambda t: pow(t,n)/(np.exp(t)-1.0) , 0.0, x)
35 |     return n*sol[0]/pow(x,n)


--------------------------------------------------------------------------------
/ecdf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #******************************************************************************
  5 | #* 
  6 | #* Copyright (C) 2015  Kiran Karra <kiran.karra@gmail.com>
  7 | #*
  8 | #* This program is free software: you can redistribute it and/or modify
  9 | #* it under the terms of the GNU General Public License as published by
 10 | #* the Free Software Foundation, either version 3 of the License, or
 11 | #* (at your option) any later version.
 12 | #*
 13 | #* This program is distributed in the hope that it will be useful,
 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | #* GNU General Public License for more details.
 17 | #*
 18 | #* You should have received a copy of the GNU General Public License
 19 | #* along with this program.  If not, see <http://www.gnu.org/licenses/>.
 20 | #******************************************************************************
 21 | 
 22 | import math
 23 | import numpy as np
 24 | 
 25 | from scipy.interpolate import interp1d
 26 | 
 27 | """
 28 | e_cdf.py contains routines which help perform empirical CDF Estimation.
 29 | """
 30 | 
 31 | def ecdf(x_i, npoints):
 32 |     """ Generates an Empirical CDF using the indicator function.
 33 |     
 34 |     Inputs:
 35 |     x_i -- the input data set, should be a numpy array
 36 |     npoints -- the number of desired points in the empirical CDF estimate
 37 |      
 38 |     Outputs:
 39 |     y -- the empirical CDF
 40 |     """
 41 |     # define the points over which we will generate the kernel density estimate
 42 |     x = np.linspace(min(x_i), max(x_i), npoints)
 43 |     n = float(x_i.size)
 44 |     y = np.zeros(npoints)
 45 |     
 46 |     for ii in np.arange(x.size):
 47 |         idxs = np.where(x_i<=x[ii])
 48 |         y[ii] = np.sum(idxs[0].size)/n
 49 |     
 50 |     return (x,y)
 51 | 
 52 | def kde_integral(kde):
 53 |     """ Generates a "smoother" Empirical CDF by integrating the KDE.  For this,
 54 |         the user should first generate the KDE using kde.py, and then pass the
 55 |         density estimate to this function
 56 |         
 57 |         Inputs:
 58 |         kde -- the kernel density estimate
 59 |         
 60 |         Outputs:
 61 |         y -- the smoothed CDF estimate
 62 |     """
 63 |     y = np.cumsum(kde)/sum(kde)
 64 |     
 65 |     return y
 66 | 
 67 | def probability_integral_transform(X):
 68 |     """
 69 |     Takes a data array X of dimension [M x N], and converts it to a uniform
 70 |     random variable using the probability integral transform, U = F(X)
 71 |     """
 72 |     M = X.shape[0]
 73 |     N = X.shape[1]
 74 |     
 75 |     # convert X to U by using the probability integral transform:  F(X) = U
 76 |     U = np.empty(X.shape)
 77 |     for ii in range(0,N):
 78 |         x_ii = X[:,ii]
 79 |         
 80 |         # estimate the empirical cdf    
 81 |         (xx,pp) = ecdf(x_ii, M)
 82 |         f = interp1d(xx, pp)    # TODO: experiment w/ different kinds of interpolation?
 83 |                                 # for example, cubic, or spline etc...?
 84 |         
 85 |         # plug this RV sample into the empirical cdf to get uniform RV
 86 |         u_ii = f(x_ii)           
 87 |         U[:,ii] = u_ii
 88 |         
 89 |     return U
 90 | 
 91 | if __name__=='__main__':
 92 |     import matplotlib.pyplot as plt
 93 |     import sys
 94 |     import kde
 95 |     
 96 |     from scipy.stats import norm
 97 |     from scipy.stats import expon
 98 |     
 99 |     # test the E_CDF estimation
100 |     N1 = 100 # number of data in data set 1
101 |     m1 = -1  # mean value
102 |     s1 = 0.1 # % variance 
103 | 
104 |     N2 = 500 # number of data in data set 2
105 |     m2 = 2   # mean value
106 |     s2 = 0.5 # variance 
107 |     
108 |     h = 0.1       # bandwidth
109 |     npoints = 100 # number of abscis points in kde
110 | 
111 |     x1 = math.sqrt(s1)*np.random.randn(N1,1) + m1
112 |     x2 = math.sqrt(s2)*np.random.randn(N2,1) + m2
113 |     x = np.concatenate((x1,x2),axis=0)
114 |     
115 |     # Kernel Density Estimate
116 |     (xx,kde_estimate) = kde.kde(x,'Gaussian',h, npoints)
117 |     plt.plot(xx,kde_estimate, 'r', label='Kernel Density Estimate')
118 |     
119 |     # the histogram of the data
120 |     n, bins, patches = plt.hist(x, 50, normed=1, facecolor='green', alpha=0.75, label='Histogram')
121 |     
122 |     # empirical CDF
123 |     (xx,pp) = ecdf(x, npoints)
124 |     plt.plot(xx,pp, 'k', label='Empirical CDF')
125 |     
126 |     # Smooth Empirical CDF (KDE Integral)
127 |     kde_integral = kde_integral(kde_estimate)
128 |     plt.plot(xx,kde_integral, 'm', label='Smooth Empirical CDF')
129 |     plt.legend(loc='upper left')
130 |     plt.show()
131 |     
132 |     # test the probability integral transform
133 |     M = 100
134 |     N = 2
135 |     X = np.empty((M,N))
136 |     X[:,0] = norm.rvs(size=M)
137 |     X[:,1] = expon.rvs(size=M)
138 |     
139 |     U = probability_integral_transform(X)
140 |     
141 |     f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2)
142 |     ax1.hist(X[:,0])
143 |     ax1.set_title('Guassian RV')
144 |     ax2.hist(U[:,0])
145 |     ax2.set_title('Gaussian Transformed to Uniform')
146 |     ax3.hist(X[:,1])
147 |     ax3.set_title('Exponential RV')
148 |     ax4.hist(U[:,1])
149 |     ax4.set_title('Exponential Transformed to Uniform')
150 |     plt.show()
151 |     


--------------------------------------------------------------------------------
/invcopulastat.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #******************************************************************************
  5 | #* 
  6 | #* Copyright (C) 2015  Kiran Karra <kiran.karra@gmail.com>
  7 | #*
  8 | #* This program is free software: you can redistribute it and/or modify
  9 | #* it under the terms of the GNU General Public License as published by
 10 | #* the Free Software Foundation, either version 3 of the License, or
 11 | #* (at your option) any later version.
 12 | #*
 13 | #* This program is distributed in the hope that it will be useful,
 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | #* GNU General Public License for more details.
 17 | #*
 18 | #* You should have received a copy of the GNU General Public License
 19 | #* along with this program.  If not, see <http://www.gnu.org/licenses/>.
 20 | #******************************************************************************
 21 | 
 22 | import math
 23 | import numpy as np
 24 | 
 25 | from debye import debye
 26 | from scipy.optimize import fsolve
 27 | 
 28 | from copulastat import copulastat
 29 | 
 30 | """
 31 | invcopulastat.py contains routines which provide the inverse copula dependency 
 32 | measures the copula family type and the copula's specific dependency parameter.
 33 | 
 34 | The relationships used in the functions are well known, and documented in
 35 | many copula research papers, including Nelsen's Introduction to Copula's.
 36 | """
 37 | 
 38 | def invcopulastat(family, dependency, val):
 39 |     dependency_lc = dependency.lower()
 40 |     if(dependency_lc!='kendall' and dependency_lc!='spearman'):
 41 |         raise ValueError('Invalid dependency argument -- must be kendall or spearman')
 42 |     if(family.lower()=='gaussian'):
 43 |         r = _gaussian(dependency_lc, val)
 44 |     elif(family.lower()=='t'):
 45 |         r = _t(dependency_lc, val)
 46 |     elif(family.lower()=='clayton'):
 47 |         r = _clayton(dependency_lc, val)
 48 |     elif(family.lower()=='gumbel'):
 49 |         r = _gumbel(dependency_lc, val)
 50 |     elif(family.lower()=='frank'):
 51 |         r = _frank(dependency_lc, val)
 52 |     else:
 53 |         raise ValueError('Unsupported Copula Family!')
 54 |     
 55 |     return r
 56 | 
 57 | def _gaussian(dependency, val):
 58 |     if(dependency=='kendall'):
 59 |         r = np.sin(math.pi/2.0*val)
 60 |     elif(dependency=='spearman'):
 61 |         r = 2*np.sin(math.pi/6.0*val)
 62 |     return r
 63 | 
 64 | def _t(dependency, val):
 65 |     if(dependency=='kendall'):
 66 |         r = np.sin(math.pi/2.0*val)
 67 |     elif(dependency=='spearman'):
 68 |         r = 2*np.sin(math.pi/6.0*val)
 69 |     return r
 70 | 
 71 | def _clayton(dependency, val):
 72 |     if(dependency=='kendall'):
 73 |         if(val<0 or val>=1):
 74 |             raise ValueError('Valid values of Kendall\'s Tau for the Clayton Copula are [0,1)')
 75 |         d = 2.0*val/(1.0-val)
 76 |     elif(dependency=='spearman'):
 77 |         raise NotImplementedError('Spearmans Rho currently unsupported for Clayton Copula family!')
 78 |     
 79 |     return d
 80 | 
 81 | def _gumbel(dependency, val):
 82 |     if(dependency=='kendall'):
 83 |         if(val<0 or val>=1):
 84 |             raise ValueError('Valid values of Kendall\'s Tau for the Gumbel Copula are [0,1)')
 85 |         d = 1.0/(1.0-val)
 86 |     elif(dependency=='spearman'):
 87 |         raise NotImplementedError('Spearmans Rho currently unsupported for Gumbel Copula family!')
 88 |     
 89 |     return d
 90 | 
 91 | def _frank_kendall_fopt(alpha, tau):
 92 |     return 4*( debye(alpha,1) - 1 )/alpha + 1 - tau
 93 | 
 94 | def _frank(dependency, val):
 95 |     if(dependency=='kendall'):
 96 |         return fsolve(_frank_kendall_fopt, 1, args=(val))[0]
 97 |     elif(dependency=='spearman'):
 98 |         # TODO --  use function solvers in scipy to invert debye function for the closed form solution
 99 |         raise NotImplementedError('Spearmans Rho currently unsupported for Frank Copula family!')
100 |     
101 |     return r
102 | 
103 | def test_python_vs_matlab(family):
104 |     # DISCLAIMER: this code assumes copulastat is working properly and tested
105 |     
106 |     if(family.lower()=='gaussian'):
107 |         dependency = 'kendall'
108 |         rho = 0.3
109 |         rho_calc = invcopulastat(family, dependency, copulastat(family, dependency, rho))
110 |         p1 = np.isclose(rho, rho_calc)
111 |         
112 |         rho = 0.7
113 |         rho_calc = invcopulastat(family, dependency, copulastat(family, dependency, rho))
114 |         p2 = np.isclose(rho, rho_calc)
115 |         
116 |         dependency = 'spearman'
117 |         rho = 0.3
118 |         rho_calc = invcopulastat(family, dependency, copulastat(family, dependency, rho))
119 |         p3 = np.isclose(rho, rho_calc)
120 |         
121 |         rho = 0.7
122 |         rho_calc = invcopulastat(family, dependency, copulastat(family, dependency, rho))
123 |         p4 = np.isclose(rho, rho_calc)
124 |         
125 |         if(p1 and p2 and p3 and p4):
126 |             print 'Gaussian CopulaStat tests PASSED!'
127 |         else:
128 |             print 'Gaussian CopulaStat tests FAILED!'
129 |         
130 |     elif(family.lower()=='t'):
131 |         pass
132 |     
133 |     elif(family.lower()=='clayton' or family.lower()=='gumbel' or family.lower()=='frank'):
134 |         dependency = 'kendall'
135 |         alpha = 0.3
136 |         tau = copulastat(family, dependency, alpha)
137 |         alpha_calc = invcopulastat(family, dependency, tau)
138 |         p1 = np.isclose(alpha, alpha_calc)
139 |         
140 |         alpha = 0.7
141 |         tau = copulastat(family, dependency, alpha)
142 |         alpha_calc = invcopulastat(family, dependency, tau)
143 |         p2 = np.isclose(alpha, alpha_calc)
144 |         
145 |         if(p1 and p2):
146 |             print family + ' CopulaStat tests PASSED!'
147 |         else:
148 |             print family + ' CopulaStat tests FAILED!'
149 |     
150 | if __name__=='__main__':    
151 |     test_python_vs_matlab('Gaussian')
152 |     test_python_vs_matlab('Clayton')
153 |     test_python_vs_matlab('Gumbel')
154 |     test_python_vs_matlab('Frank')
155 | 


--------------------------------------------------------------------------------
/kde.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #******************************************************************************
  5 | #* 
  6 | #* Copyright (C) 2015  Kiran Karra <kiran.karra@gmail.com>
  7 | #*
  8 | #* This program is free software: you can redistribute it and/or modify
  9 | #* it under the terms of the GNU General Public License as published by
 10 | #* the Free Software Foundation, either version 3 of the License, or
 11 | #* (at your option) any later version.
 12 | #*
 13 | #* This program is distributed in the hope that it will be useful,
 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | #* GNU General Public License for more details.
 17 | #*
 18 | #* You should have received a copy of the GNU General Public License
 19 | #* along with this program.  If not, see <http://www.gnu.org/licenses/>.
 20 | #******************************************************************************
 21 | 
 22 | import math
 23 | import numpy as np
 24 | 
 25 | """
 26 | kde.py contains routines which help perform Kernel Density Estimation (KDE).
 27 | """
 28 | 
 29 | def kde(x_i, kernel, h, n_points):
 30 |     """ Perform Kernel Density Estimation on a given set of points.
 31 | 
 32 |     Inputs:
 33 |     x_i -- the input data set, should be a numpy array
 34 |     kernel -- the kernel to use, must be a string of one of the choices:
 35 |                 Uniform
 36 |                 Triangular
 37 |                 Epanechnikov
 38 |                 Quartic
 39 |                 Triweight
 40 |                 Tricube
 41 |                 Gaussian
 42 |                 Cosine
 43 |                 Logistic
 44 |                 Silverman
 45 |     h -- the kernel bandwidth setting
 46 |     npoints -- the number of desired points in the kernel density estimate
 47 |      
 48 |     Outputs:
 49 |     y -- the kernel density estimate
 50 |     """
 51 |     # define the points over which we will generate the kernel density estimate
 52 |     x = np.linspace(min(x_i), max(x_i), n_points)
 53 |     n = x_i.size
 54 |     y = np.zeros(n_points)
 55 |     
 56 |     for ii in np.arange(n_points):
 57 |         # apply the kernel to the point of interest
 58 |         if(kernel.lower()=='uniform'):
 59 |             y[ii] = 1.0/(n*h) * np.sum(uniform_kernel( (x[ii]-x_i)/h ) )
 60 |         elif(kernel.lower()=='triangular'):
 61 |             y[ii] = 1.0/(n*h) * np.sum(triangle_kernel( (x[ii]-x_i)/h ) )
 62 |         elif(kernel.lower()=='epanechnikov'):
 63 |             y[ii] = 1.0/(n*h) * np.sum(epanechnikov_kernel( (x[ii]-x_i)/h ) )
 64 |         elif(kernel.lower()=='quartic'):
 65 |             y[ii] = 1.0/(n*h) * np.sum(quartic_kernel( (x[ii]-x_i)/h ) )
 66 |         elif(kernel.lower()=='triweight'):
 67 |             y[ii] = 1.0/(n*h) * np.sum(triweight_kernel( (x[ii]-x_i)/h ) )
 68 |         elif(kernel.lower()=='tricube'):
 69 |             y[ii] = 1.0/(n*h) * np.sum(tricube_kernel( (x[ii]-x_i)/h ) )
 70 |         elif(kernel.lower()=='gaussian'):
 71 |             y[ii] = 1.0/(n*h) * np.sum(gaussian_kernel( (x[ii]-x_i)/h ) )
 72 |         elif(kernel.lower()=='cosine'):
 73 |             y[ii] = 1.0/(n*h) * np.sum(cosine_kernel( (x[ii]-x_i)/h ) )
 74 |         elif(kernel.lower()=='logistic'):
 75 |             y[ii] = 1.0/(n*h) * np.sum(logistic_kernel( (x[ii]-x_i)/h ) )
 76 |         elif(kernel.lower()=='silverman'):
 77 |             y[ii] = 1.0/(n*h) * np.sum(silverman_kernel( (x[ii]-x_i)/h ) )
 78 |         else:
 79 |             print 'In here:)'
 80 | 
 81 |     return (x,y)
 82 |     
 83 | def uniform_kernel(u):
 84 |     """
 85 |     %UNIFORM_KDE - the uniform kernel
 86 |     """
 87 |     idxs = np.where(abs(u)<=1)
 88 |     y = np.zeros(u.size)
 89 |     y[idxs[0]] = 1.0/2.0
 90 |     
 91 |     return y
 92 | 
 93 | def triangle_kernel(u):
 94 |     """
 95 |     %TRIANGLE_KDE - the triangular kernel
 96 |     """
 97 |     idxs = np.where(abs(u)<=1)
 98 |     y = np.zeros(u.size)
 99 |     y[idxs[0]] = 1.0-abs(u[idxs[0]])
100 |     
101 |     return y
102 | 
103 | def epanechnikov_kernel(u):
104 |     """
105 |     %EPANECHNIKOV_KDGE - the epanechnikov kernel
106 |     """
107 |     idxs = np.where(abs(u)<=1)
108 |     y = np.zeros(u.size)
109 |     y[idxs[0]] = 3.0/4.0*(1-np.power(u[idxs[0]],2))
110 |     
111 |     return y
112 | 
113 | def quartic_kernel(u):
114 |     """
115 |     %QUARTIC_KDE - the quartic kernel
116 |     """
117 |     idxs = np.where(abs(u)<=1)
118 |     y = np.zeros(u.size)
119 |     y[idxs[0]] = 15.0/16.0*np.power((1-np.power(u[idxs[0]],2)),2)
120 |     
121 |     return y
122 | 
123 | def triweight_kernel(u):
124 |     """
125 |     %QUARTIC_KDE - the triweight kernel
126 |     """
127 |     idxs = np.where(abs(u)<=1)
128 |     y = np.zeros(u.size)
129 |     y[idxs[0]] = 35.0/32.0*np.power((1-np.power(u[idxs[0]],2)),3)
130 |     
131 |     return y
132 | 
133 | def tricube_kernel(u):
134 |     """
135 |     %QUARTIC_KDE - the quartic kernel
136 |     """
137 |     idxs = np.where(abs(u)<=1)
138 |     y = np.zeros(u.size)
139 |     y[idxs[0]] = 70.0/81.0*np.power((1-np.power(abs(u[idxs[0]]),3)),3)
140 |     
141 |     return y
142 | 
143 | def gaussian_kernel(u):
144 |     """
145 |     %GAUSSIAN_KDE - the gaussian kernel
146 |     """
147 |     y = 1.0/math.sqrt(2*math.pi) * np.exp(-np.power(u,2)/2.0)
148 |     
149 |     return y
150 | 
151 | def cosine_kernel(u):
152 |     """
153 |     %COSINE_KDE - the cosine kernel
154 |     """
155 |     idxs = np.where(abs(u)<=1)
156 |     y = np.zeros(u.size)
157 |     y[idxs[0]] = math.pi/4.0*np.cos(math.pi/2.0*u[idxs[0]])
158 |     
159 |     return y
160 | 
161 | def logistic_kernel(u):
162 |     """
163 |     %LOGISTIC_KDE - the logistic kernel
164 |     """
165 |     y = 1.0/(np.exp(u) + 2.0 + np.exp(-u))
166 |     
167 |     return y
168 | 
169 | def silverman_kernel(u):
170 |     """
171 |     %SILVERMAN_KDE - the silverman kernel
172 |     """
173 |     y = 1.0/2.0 * np.exp(-abs(u)/math.sqrt(2)) * np.sin(abs(u)/2 + math.pi/4)
174 |     
175 |     return y
176 | 
177 | 
178 | if __name__=='__main__':
179 |     import matplotlib.pyplot as plt
180 |     import sys
181 |     
182 |     # TODO: put in argument to allow user to test windows and change
183 |     # the if from if false to that if condition
184 |     
185 |     if(False):
186 |         # Plot the Uniform Kernel
187 |         x_i = np.linspace(-2,2,100)
188 |         
189 |         y = uniform_kernel(x_i)
190 |         plt.plot(x_i,y)
191 |         plt.title('Uniform Kernel')
192 |         plt.show()
193 |         
194 |         # Plot the Triangle Kernel
195 |         y = triangle_kernel(x_i)
196 |         plt.plot(x_i,y)
197 |         plt.title('Triangle Kernel')
198 |         plt.show()
199 |         
200 |         # Plot the Epanechnikov Kernel
201 |         y = epanechnikov_kernel(x_i)
202 |         plt.plot(x_i,y)
203 |         plt.title('Epanechnikov Kernel')
204 |         plt.show()
205 |         
206 |         # Plot the Quartic Kernel
207 |         y = quartic_kernel(x_i)
208 |         plt.plot(x_i,y)
209 |         plt.title('Quartic Kernel')
210 |         plt.show()
211 |         
212 |         # Plot the Triweight Kernel
213 |         y = triweight_kernel(x_i)
214 |         plt.plot(x_i,y)
215 |         plt.title('Triweight Kernel')
216 |         plt.show()
217 |         
218 |         # Plot the Tricube Kernel
219 |         y = tricube_kernel(x_i)
220 |         plt.plot(x_i,y)
221 |         plt.title('Tricube Kernel')
222 |         plt.show()
223 |         
224 |         # Plot the Gaussian Kernel
225 |         y = gaussian_kernel(x_i)
226 |         plt.plot(x_i,y)
227 |         plt.title('Gaussian Kernel')
228 |         plt.show()
229 |         
230 |         # Plot the Cosine Kernel
231 |         y = cosine_kernel(x_i)
232 |         plt.plot(x_i,y)
233 |         plt.title('Cosine Kernel')
234 |         plt.show()
235 |         
236 |         # Plot the Logistic Kernel
237 |         y = logistic_kernel(x_i)
238 |         plt.plot(x_i,y)
239 |         plt.title('Logistic Kernel')
240 |         plt.show()
241 |         
242 |         # Plot the Silverman Kernel
243 |         y = silverman_kernel(x_i)
244 |         plt.plot(x_i,y)
245 |         plt.title('Silverman Kernel')
246 |         plt.show()
247 |     
248 |     # test the KDE estimation
249 |     N1 = 100 # number of data in data set 1
250 |     m1 = -1  # mean value
251 |     s1 = 0.1 # % variance 
252 | 
253 |     N2 = 500 # number of data in data set 2
254 |     m2 = 2   # mean value
255 |     s2 = 0.5 # variance 
256 |     
257 |     h = 0.1       # bandwidth
258 |     npoints = 100 # number of abscis points in kde
259 | 
260 |     x1 = math.sqrt(s1)*np.random.randn(N1,1) + m1
261 |     x2 = math.sqrt(s2)*np.random.randn(N2,1) + m2
262 |     x = np.concatenate((x1,x2),axis=0)
263 |     
264 |     # the histogram of the data
265 |     n, bins, patches = plt.hist(x, 50, normed=1, facecolor='green', alpha=0.75)
266 |     
267 |     (xx,pp) = kde(x,'Gaussian',h, npoints)
268 |     plt.plot(xx,pp, 'b')
269 |     plt.title('Kernel Density Estimate')
270 |     plt.show()


--------------------------------------------------------------------------------
/matlab/copulacdf_test.m:
--------------------------------------------------------------------------------
 1 | % Matlab test script which generates copula samples similar to copulacdf.py
 2 | % for comparison purposes
 3 | 
 4 | clear;
 5 | clc;
 6 | 
 7 | % remove the old copulacdf_test.mat
 8 | delete('copulacdf_test.mat')
 9 | 
10 | % data which will define where we want to know the value of the Copula
11 | u = linspace(0+eps,1-eps,10);
12 | d = 2;
13 | [U1,U2] = meshgrid(u,u);
14 | 
15 | % Generate samples of Gaussian copula
16 | rho = 0.8;
17 | Rho = [1 rho; rho 1];
18 | gaussian_copula_cdf = copulacdf('gaussian',[U1(:) U2(:)], Rho);
19 | 
20 | % Generate samples of T copula
21 | nu = 2;
22 | t_copula_cdf = copulacdf('t',[U1(:) U2(:)], Rho, nu);
23 | 
24 | % Generate samples of the Clayton copula
25 | alpha = 0.3;
26 | clayton_copula_cdf = copulacdf('clayton',[U1(:) U2(:)], alpha);
27 | 
28 | % Generate samples of the Frank copula
29 | frank_copula_cdf = copulacdf('frank',[U1(:) U2(:)], alpha);
30 | 
31 | % Generate samples of the Gumbel Copula
32 | alpha = 1.5;
33 | gumbel_copula_cdf = copulacdf('gumbel',[U1(:) U2(:)], alpha);
34 | 
35 | % save them all for testing against python generated data
36 | save('copulacdf_test.mat', ...
37 |         'gaussian_copula_cdf', ...
38 |         't_copula_cdf', ...
39 |         'clayton_copula_cdf', ...
40 |         'frank_copula_cdf', ...
41 |         'gumbel_copula_cdf')
42 | 


--------------------------------------------------------------------------------
/matlab/copulacdf_test.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticresearch/copula-py/ff347131b02adc76de70b03e0aa6578bd0d86f52/matlab/copulacdf_test.mat


--------------------------------------------------------------------------------
/matlab/copulapdf_test.m:
--------------------------------------------------------------------------------
 1 | % Matlab test script which generates copula samples similar to copulapdf.py
 2 | % for comparison purposes
 3 | 
 4 | % remove the old copulapdf_test.mat
 5 | delete('copulapdf_test.mat')
 6 | 
 7 | % data which will define where we want to know the value of the Copula
 8 | u = linspace(0.1,0.9,10);
 9 | d = 2;
10 | [U1,U2] = meshgrid(u,u);
11 | 
12 | % Generate samples of Gaussian copula
13 | rho = 0.8;
14 | Rho = [1 rho; rho 1];
15 | gaussian_copula_pdf = copulapdf('gaussian',[U1(:) U2(:)], Rho);
16 | 
17 | % Generate samples of T copula
18 | nu = 2;
19 | t_copula_pdf = copulapdf('t',[U1(:) U2(:)], Rho, nu);
20 | 
21 | % Generate samples of the Clayton copula
22 | alpha = 0.3;
23 | clayton_copula_pdf = copulapdf('clayton',[U1(:) U2(:)], alpha);
24 | 
25 | % Generate samples of the Frank copula
26 | frank_copula_pdf = copulapdf('frank',[U1(:) U2(:)], alpha);
27 | 
28 | % Generate samples of the Gumbel Copula
29 | alpha = 1.5;
30 | gumbel_copula_pdf = copulapdf('gumbel',[U1(:) U2(:)], alpha);
31 | 
32 | % save them all for testing against python generated data
33 | save('copulapdf_test.mat', ...
34 |         'gaussian_copula_pdf', ...
35 |         't_copula_pdf', ...
36 |         'clayton_copula_pdf', ...
37 |         'frank_copula_pdf', ...
38 |         'gumbel_copula_pdf')
39 | 


--------------------------------------------------------------------------------
/matlab/copulapdf_test.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticresearch/copula-py/ff347131b02adc76de70b03e0aa6578bd0d86f52/matlab/copulapdf_test.mat


--------------------------------------------------------------------------------
/matlab/copulastat_test.m:
--------------------------------------------------------------------------------
 1 | % generate test data for copulastat.py
 2 | 
 3 | clear;
 4 | clc;
 5 | 
 6 | % remove the old copulastat_test.mat
 7 | delete('copulastat_test.mat')
 8 | 
 9 | gauss_ktau_rho_0_3 = copulastat('Gaussian', 0.3, 'type', 'kendall');
10 | gauss_srho_rho_0_3 = copulastat('Gaussian', 0.3, 'type', 'spearman');
11 | gauss_ktau_rho_0_7 = copulastat('Gaussian', 0.7, 'type', 'kendall');
12 | gauss_srho_rho_0_7 = copulastat('Gaussian', 0.7, 'type', 'spearman');
13 | gauss_ktau_rho_1_0 = copulastat('Gaussian', 1.0, 'type', 'kendall');
14 | gauss_srho_rho_1_0 = copulastat('Gaussian', 1.0, 'type', 'spearman');
15 | 
16 | clayton_ktau_alpha_0_3 = copulastat('clayton', 0.3, 'type', 'kendall');
17 | clayton_srho_alpha_0_3 = copulastat('clayton', 0.3, 'type', 'spearman');
18 | clayton_ktau_alpha_0_7 = copulastat('clayton', 0.7, 'type', 'kendall');
19 | clayton_srho_alpha_0_7 = copulastat('clayton', 0.7, 'type', 'spearman');
20 | clayton_ktau_alpha_1_0 = copulastat('clayton', 1.0, 'type', 'kendall');
21 | clayton_srho_alpha_1_0 = copulastat('clayton', 1.0, 'type', 'spearman');
22 | 
23 | gumbel_ktau_alpha_1_0 = copulastat('gumbel', 1.0, 'type', 'kendall');
24 | gumbel_srho_alpha_1_0 = copulastat('gumbel', 1.0, 'type', 'spearman');
25 | gumbel_ktau_alpha_3_0 = copulastat('gumbel', 3.0, 'type', 'kendall');
26 | gumbel_srho_alpha_3_0 = copulastat('gumbel', 3.0, 'type', 'spearman');
27 | 
28 | frank_ktau_alpha_0_3 = copulastat('frank', 0.3, 'type', 'kendall');
29 | frank_srho_alpha_0_3 = copulastat('frank', 0.3, 'type', 'spearman');
30 | frank_ktau_alpha_0_7 = copulastat('frank', 0.7, 'type', 'kendall');
31 | frank_srho_alpha_0_7 = copulastat('frank', 0.7, 'type', 'spearman');
32 | frank_ktau_alpha_1_0 = copulastat('frank', 1.0, 'type', 'kendall');
33 | frank_srho_alpha_1_0 = copulastat('frank', 1.0, 'type', 'spearman');
34 | 
35 | save('copulastat_test.mat');


--------------------------------------------------------------------------------
/matlab/copulastat_test.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticresearch/copula-py/ff347131b02adc76de70b03e0aa6578bd0d86f52/matlab/copulastat_test.mat


--------------------------------------------------------------------------------
/multivariate_stats.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | #******************************************************************************
 5 | #* 
 6 | #* Copyright (C) 2015  Kiran Karra <kiran.karra@gmail.com>
 7 | #*
 8 | #* This program is free software: you can redistribute it and/or modify
 9 | #* it under the terms of the GNU General Public License as published by
10 | #* the Free Software Foundation, either version 3 of the License, or
11 | #* (at your option) any later version.
12 | #*
13 | #* This program is distributed in the hope that it will be useful,
14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | #* GNU General Public License for more details.
17 | #*
18 | #* You should have received a copy of the GNU General Public License
19 | #* along with this program.  If not, see <http://www.gnu.org/licenses/>.
20 | #******************************************************************************
21 | 
22 | import math
23 | import numpy as np
24 | 
25 | from scipy.stats import spearmanr
26 | from scipy.stats import kendalltau
27 | from scipy.misc  import comb
28 | 
29 | """
30 | Encompasses calculation of Spearman's Rho and Kendall's Tau (and other statistical
31 | measures to be added in the future) for data with dimensionality >= 2
32 | """
33 | 
34 | def spearmans_rho(X):
35 |     """
36 |     Calculates a generalized Spearman's rho for a data set given by X, as 
37 |     described by "Multivariate Extensions of Spearman's Rho and Related Statistics"
38 |     Inputs:
39 |       X - the input data, should be a numpy array of shape = M x N, where
40 |           M is the number of samples, and N is the dimensionality of the data
41 |     """
42 |     M = X.shape[0]
43 |     N = X.shape[1]
44 |     if N<2:
45 |         raise ValueError('To calculate Spearman\'s Rho, need data of dimensionality >= 2')
46 |     
47 |     srho = 0.0
48 |     for dim1 in range(0,N-1):
49 |         for dim2 in range(dim1+1,N):
50 |             (r,p) = spearmanr(X[:,dim1],X[:,dim2])
51 |             srho = srho + r
52 |     # normalize
53 |     srho = srho / comb(N,2)
54 |     return srho
55 |     
56 | def kendalls_tau(X):
57 |     """
58 |     Calculates a generalized Kendall's tau for a data set given by X, as 
59 |     described by "Multivariate Extensions of Spearman's Rho and Related Statistics"
60 |     
61 |     Inputs:
62 |       X - the input data, should be a numpy array of shape = M x N, where
63 |           M is the number of samples, and N is the dimensionality of the data
64 |     """
65 |     M = X.shape[0]
66 |     N = X.shape[1]
67 |     if N<2:
68 |         raise ValueError('To calculate Kendall\'s Tau, need data of dimensionality >= 2')
69 |     
70 |     ktau = 0.0
71 |     for dim1 in range(0,N-1):
72 |         for dim2 in range(dim1+1,N):
73 |             (t,p) = kendalltau(X[:,dim1],X[:,dim2])
74 |             ktau = ktau + t
75 |     # normalize
76 |     ktau = ktau / comb(N,2)
77 |     return ktau
78 | 
79 | if __name__=='__main__':
80 |     X = np.array([[12,1,-3],
81 |                   [2,4,-4],
82 |                   [1,7,-6],
83 |                   [12,1,2],
84 |                   [2,0,1]])
85 |     srho = spearmans_rho(X)
86 |     ktau = kendalls_tau(X)
87 |     
88 |     print srho, ktau


--------------------------------------------------------------------------------
/plot_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | #******************************************************************************
 5 | #* 
 6 | #* Copyright (C) 2015  Kiran Karra <kiran.karra@gmail.com>
 7 | #*
 8 | #* This program is free software: you can redistribute it and/or modify
 9 | #* it under the terms of the GNU General Public License as published by
10 | #* the Free Software Foundation, either version 3 of the License, or
11 | #* (at your option) any later version.
12 | #*
13 | #* This program is distributed in the hope that it will be useful,
14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | #* GNU General Public License for more details.
17 | #*
18 | #* You should have received a copy of the GNU General Public License
19 | #* along with this program.  If not, see <http://www.gnu.org/licenses/>.
20 | #******************************************************************************
21 | 
22 | from mpl_toolkits.mplot3d import Axes3D
23 | import matplotlib.pyplot as plt
24 | from matplotlib import cm
25 | import pandas as pd
26 | 
27 | def plot_3d(X,Y,Z, titleStr):
28 |     fig = plt.figure()
29 |     ax = fig.gca(projection='3d')
30 |     surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm,
31 |         linewidth=0, antialiased=False)
32 |     fig.colorbar(surf, shrink=0.5, aspect=5)
33 |     plt.xlabel('U1')
34 |     plt.ylabel('U2')
35 |     plt.title(titleStr)
36 |     plt.show()
37 | 
38 | def pairs(X, titleStr):
39 |     numCols = X.shape[1]
40 |     # generate the columns 
41 |     colNames = []
42 |     for col in range(0,numCols):
43 |         colNames.append(titleStr + ' U' + str(col+1))
44 |     
45 |     df = pd.DataFrame(X, columns=colNames)
46 |     axes = pd.tools.plotting.scatter_matrix(df, alpha=0.2)
47 | 
48 |     # turn grids on
49 |     for ax1 in axes:
50 |         for ax in ax1:
51 |             ax.grid()
52 |     
53 |     plt.tight_layout()
54 |     plt.show()


--------------------------------------------------------------------------------
/rstable1.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | #******************************************************************************
 5 | #* 
 6 | #* Copyright (C) 2015  Kiran Karra <kiran.karra@gmail.com>
 7 | #*
 8 | #* This program is free software: you can redistribute it and/or modify
 9 | #* it under the terms of the GNU General Public License as published by
10 | #* the Free Software Foundation, either version 3 of the License, or
11 | #* (at your option) any later version.
12 | #*
13 | #* This program is distributed in the hope that it will be useful,
14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | #* GNU General Public License for more details.
17 | #*
18 | #* You should have received a copy of the GNU General Public License
19 | #* along with this program.  If not, see <http://www.gnu.org/licenses/>.
20 | #******************************************************************************
21 | 
22 | import math
23 | import numpy as np
24 | 
25 | from scipy.stats import uniform
26 | from scipy.stats import expon
27 | 
28 | """
29 | Algorithms copied directly from R source code of the copula package
30 |     - rstable1.R
31 |     - retstable.c
32 | """
33 | 
34 | # delta is assumed to be 0
35 | def rstable1(n, alpha, beta, gamma=1, delta=0, pm=1):
36 |     return _rstable_c(n, alpha) * gamma + delta
37 | 
38 | def _rstable_c(n, alpha):
39 |     return np.power(np.cos(math.pi/2.0*alpha), -1.0/alpha) * _rstable0(alpha)
40 |     
41 | def _rstable0(alpha):
42 |     U = uniform.rvs(size=1)
43 |     while True:
44 |         # generate non-zero exponential random variable
45 |         W = expon.rvs(size=1)
46 |         if(W!=0):
47 |             break
48 |     return np.power(_A(math.pi*U,alpha)/np.power(W,1.0-alpha),1.0/alpha)
49 | 
50 | def _A(x, alpha):
51 |     Ialpha = 1.0-alpha
52 |     return _A_3(x, alpha, Ialpha)
53 | 
54 | def _A_3(x, alpha, Ialpha):
55 |     return np.power(Ialpha* np.sinc(Ialpha*x/math.pi), Ialpha) * \
56 |             np.power(alpha * np.sinc(alpha *x/math.pi), alpha) / np.sinc(x/math.pi)
57 | 


--------------------------------------------------------------------------------