├── .gitignore ├── DEPENDENCIES.txt ├── LICENSE ├── README.md ├── TODO.txt ├── copula_compatibility_problem.py ├── copulacdf.py ├── copulafit.py ├── copulamnsig.py ├── copulapdf.py ├── copularnd.py ├── copulastat.py ├── cvolume.py ├── debye.py ├── ecdf.py ├── invcopulastat.py ├── kde.py ├── matlab ├── copulacdf_test.m ├── copulacdf_test.mat ├── copulapdf_test.m ├── copulapdf_test.mat ├── copulastat_test.m └── copulastat_test.mat ├── multivariate_stats.py ├── plot_utils.py └── rstable1.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Installer logs 32 | pip-log.txt 33 | pip-delete-this-directory.txt 34 | 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .coverage 39 | .cache 40 | nosetests.xml 41 | coverage.xml 42 | 43 | # Translations 44 | *.mo 45 | *.pot 46 | 47 | # Django stuff: 48 | *.log 49 | 50 | # Sphinx documentation 51 | docs/_build/ 52 | 53 | # PyBuilder 54 | target/ 55 | -------------------------------------------------------------------------------- /DEPENDENCIES.txt: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | #****************************************************************************** 5 | #* 6 | #* Copyright (C) 2015 Kiran Karra 7 | #* 8 | #* This program is free software: you can redistribute it and/or modify 9 | #* it under the terms of the GNU General Public License as published by 10 | #* the Free Software Foundation, either version 3 of the License, or 11 | #* (at your option) any later version. 12 | #* 13 | #* This program is distributed in the hope that it will be useful, 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | #* GNU General Public License for more details. 17 | #* 18 | #* You should have received a copy of the GNU General Public License 19 | #* along with this program. If not, see . 20 | #****************************************************************************** 21 | 22 | This file describes the dependencies required to run all of the code: 23 | - scipy 24 | - numpy 25 | - matplotlib 26 | - statsmodels 27 | - pandas -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | {one line to give the program's name and a brief idea of what it does.} 635 | Copyright (C) {year} {name of author} 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | {project} Copyright (C) {year} {fullname} 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | 676 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # copula-bayesian-networks 2 | Code for implementing Copula Bayesian Networks 3 | -------------------------------------------------------------------------------- /TODO.txt: -------------------------------------------------------------------------------- 1 | copulacdf.py 2 | [ ] - t copula 3 | 4 | copularnd.py 5 | [ ] - t copula 6 | 7 | copulafit.py 8 | [ ] - t copula PKTE estimation 9 | [ ] - MLE estimation for all copula types 10 | [ ] - AMLE estimation for all copula types 11 | 12 | copulapdf.py 13 | [ ] - 14 | 15 | copulastat.py 16 | [ ] - t copula 17 | 18 | invcopulastat.py 19 | [ ] - t copula 20 | 21 | copulamnsig.py 22 | [ ] - a first order test on empirical multinomial signature generation -------------------------------------------------------------------------------- /copula_compatibility_problem.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | #****************************************************************************** 5 | #* 6 | #* Copyright (C) 2015 Kiran Karra 7 | #* 8 | #* This program is free software: you can redistribute it and/or modify 9 | #* it under the terms of the GNU General Public License as published by 10 | #* the Free Software Foundation, either version 3 of the License, or 11 | #* (at your option) any later version. 12 | #* 13 | #* This program is distributed in the hope that it will be useful, 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | #* GNU General Public License for more details. 17 | #* 18 | #* You should have received a copy of the GNU General Public License 19 | #* along with this program. If not, see . 20 | #****************************************************************************** 21 | 22 | import math 23 | import numpy as np 24 | 25 | from scipy.stats import mvn # contains inverse CDF of Multivariate Gaussian 26 | from scipy.stats import norm # contains PDF of Gaussian 27 | 28 | import copulacdf 29 | import plot_utils 30 | import matplotlib.pyplot as plt 31 | 32 | """ 33 | This file showcases what is known as the compatibility problem with copula's. 34 | """ 35 | 36 | 37 | """ 38 | Here, we showcase the compatibility problem by using a 3-Copula and calculating 39 | the two marginal's. 40 | """ 41 | def ex1(): 42 | n = 10 43 | eps = np.finfo(float).eps 44 | u = np.linspace(0+eps,1-eps,n) 45 | UU = np.meshgrid(u,u) 46 | U2 = np.reshape(UU[0], (UU[0].shape[0]*UU[0].shape[1], 1)) 47 | U3 = np.reshape(UU[1], (UU[1].shape[0]*UU[1].shape[1], 1)) 48 | U1 = np.ones(U2.shape)*(1-eps) 49 | U = np.concatenate((U1,U2,U3),axis=1) 50 | 51 | R1 = np.array([[1,0,0],[0,1,0],[0,0,1]]) 52 | R2 = np.array([[1,0.6,-0.3],[0.6,1,0.4],[-0.3,0.4,1]]) 53 | 54 | C1_twomarginal = copulacdf.copulacdf('Gaussian',U,R1) 55 | C2_twomarginal = copulacdf.copulacdf('Gaussian',U,R2) 56 | 57 | #X = UU[0] 58 | #Y = UU[1] 59 | #Z = np.reshape(C1_twomarginal,UU[0].shape) 60 | #plot_utils.plot_3d(X,Y,Z, 'C1 Two-Marginal') 61 | 62 | # compute error between C1_twomarginal and C2_twomarginal 63 | sq_err_vec = (C2_twomarginal-C1_twomarginal)**2 64 | 65 | X = UU[0] 66 | Y = UU[1] 67 | Z = np.reshape(sq_err_vec,UU[0].shape) 68 | plot_utils.plot_3d(X,Y,Z, 'Two-Marginal Error') 69 | 70 | def ex2(): 71 | n = 10 72 | eps = np.finfo(float).eps 73 | u = np.linspace(0+eps,1-eps,n) 74 | UU = np.meshgrid(u,u) 75 | U3 = np.reshape(UU[1], (UU[1].shape[0]*UU[1].shape[1], 1)) 76 | U1 = np.ones(U3.shape)*(1-eps) 77 | U2 = np.ones(U3.shape)*(1-eps) 78 | U = np.concatenate((U1,U2,U3),axis=1) 79 | 80 | R1 = np.array([[1,0,0],[0,1,0],[0,0,1]]) 81 | R2 = np.array([[1,0.6,-0.3],[0.6,1,0.4],[-0.3,0.4,1]]) 82 | 83 | C1_onemarginal = copulacdf.copulacdf('Gaussian',U,R1) 84 | C2_onemarginal = copulacdf.copulacdf('Gaussian',U,R2) 85 | sq_err_vec = (C2_onemarginal-C1_onemarginal)**2 86 | 87 | X = UU[0] 88 | Y = UU[1] 89 | Z = np.reshape(sq_err_vec,UU[0].shape) 90 | plot_utils.plot_3d(X,Y,Z, 'One-Margin Error') 91 | 92 | if __name__=='__main__': 93 | ex1() 94 | ex2() -------------------------------------------------------------------------------- /copulacdf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | #****************************************************************************** 5 | #* 6 | #* Copyright (C) 2015 Kiran Karra 7 | #* 8 | #* This program is free software: you can redistribute it and/or modify 9 | #* it under the terms of the GNU General Public License as published by 10 | #* the Free Software Foundation, either version 3 of the License, or 11 | #* (at your option) any later version. 12 | #* 13 | #* This program is distributed in the hope that it will be useful, 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | #* GNU General Public License for more details. 17 | #* 18 | #* You should have received a copy of the GNU General Public License 19 | #* along with this program. If not, see . 20 | #****************************************************************************** 21 | 22 | import math 23 | import numpy as np 24 | 25 | from scipy.stats import mvn # contains inverse CDF of Multivariate Gaussian 26 | from scipy.stats import norm # contains PDF of Gaussian 27 | from scipy.stats import t 28 | from statsmodels.sandbox.distributions import multivariate as mvt 29 | 30 | """ 31 | copulacdf.py contains routines which provide Copula CDF values 32 | """ 33 | 34 | def copulacdf(family, u, *args): 35 | """ Generates values of a requested copula family 36 | 37 | Inputs: 38 | u -- u is an N-by-P matrix of values in [0,1], representing N 39 | points in the P-dimensional unit hypercube. 40 | 41 | rho -- a P-by-P correlation matrix, the first argument required for the Gaussian copula 42 | alpha -- a scalar argument describing the dependency for Frank, Gumbel, and Clayton copula's 43 | 44 | Outputs: 45 | y -- the value of the Gaussian Copula 46 | """ 47 | n = u.shape[0] 48 | p = u.shape[1] 49 | 50 | num_var_args = len(args) 51 | family_lc = family.lower() 52 | if(family_lc=='gaussian'): 53 | if(num_var_args!=1): 54 | raise ValueError("Gaussian family requires one additional argument -- rho (correlation matrix) [P x P]") 55 | rho = args[0] 56 | rho_expected_shape = (p,p) 57 | if(type(rho)!=np.ndarray or rho.shape!=rho_expected_shape): 58 | raise ValueError("Gaussian family requires rho to be of type numpy.ndarray with shape=[P x P]") 59 | y = _gaussian(u, rho) 60 | 61 | elif(family_lc=='t'): 62 | if(num_var_args!=2): 63 | raise ValueError("T family requires two additional arguments -- rho (correlation matrix) [P x P] and nu [scalar]") 64 | rho = args[0] 65 | nu = args[1] 66 | rho_expected_shape = (p,p) 67 | if(type(rho)!=np.ndarray or rho.shape!=rho_expected_shape): 68 | raise ValueError("T family requires rho to be of type numpy.ndarray with shape=[P x P]") 69 | y = _t(u, rho, nu) 70 | elif(family_lc=='clayton'): 71 | if(num_var_args!=1): 72 | raise ValueError("Clayton family requires one additional argument -- alpha [scalar]") 73 | alpha = args[0] 74 | y = _clayton(u, alpha) 75 | elif(family_lc=='frank'): 76 | if(num_var_args!=1): 77 | raise ValueError("Frank family requires one additional argument -- alpha [scalar]") 78 | alpha = args[0] 79 | y = _frank(u, alpha) 80 | elif(family_lc=='gumbel'): 81 | if(num_var_args!=1): 82 | raise ValueError("Gumbel family requires one additional argument -- alpha [scalar]") 83 | alpha = args[0] 84 | y = _gumbel(u, alpha) 85 | else: 86 | raise ValueError("Unrecognized family of copula") 87 | 88 | return y 89 | 90 | def _gaussian(u, rho): 91 | """ Generates values of the Gaussian copula 92 | 93 | Inputs: 94 | u -- u is an N-by-P matrix of values in [0,1], representing N 95 | points in the P-dimensional unit hypercube. 96 | rho -- a P-by-P correlation matrix. 97 | 98 | Outputs: 99 | y -- the value of the Gaussian Copula 100 | """ 101 | n = u.shape[0] 102 | p = u.shape[1] 103 | lo = np.full((1,p), -10) 104 | hi = norm.ppf(u) 105 | 106 | mu = np.zeros(p) 107 | 108 | # need to use ppf(q, loc=0, scale=1) as replacement for norminv 109 | # need to use mvn.mvnun as replacement for mvncdf 110 | # the upper bound needs to be the output of the ppf call, right now it is set to random above 111 | y = np.zeros(n) 112 | # I don't know if mvnun is vectorized, I couldn't get that to work 113 | for ii in np.arange(n): 114 | # do some error checking. if we have any -inf or inf values, 115 | # 116 | p,i = mvn.mvnun(lo, hi[ii,:], mu, rho) 117 | y[ii] = p 118 | 119 | return y 120 | 121 | def _t(u, rho, nu): 122 | """ Generates values of the T copula 123 | 124 | Inputs: 125 | u -- u is an N-by-P matrix of values in [0,1], representing N 126 | points in the P-dimensional unit hypercube. 127 | rho -- a P-by-P correlation matrix. 128 | nu -- degrees of freedom for T Copula 129 | 130 | Outputs: 131 | y -- the value of the T Copula 132 | """ 133 | n = u.shape[0] 134 | p = u.shape[1] 135 | loIntegrationVal = -40 136 | lo = np.full((1,p), loIntegrationVal) # more accuracy, but slower :/ 137 | hi = t.ppf(u, nu) 138 | 139 | mu = np.zeros(p) 140 | 141 | y = np.zeros(n) 142 | for ii in np.arange(n): 143 | x = hi[ii,:] 144 | x[x<-40] = -40 145 | p = mvt.mvstdtprob(lo[0], x, rho, nu) 146 | y[ii] = p 147 | 148 | return y 149 | 150 | def _clayton(u, alpha): 151 | # C(u1,u2) = (u1^(-alpha) + u2^(-alpha) - 1)^(-1/alpha) 152 | if(alpha<0): 153 | raise ValueError("Clayton family -- invalid alpha argument. alpha must be >=0") 154 | elif(alpha==0): 155 | y = np.prod(u,1) 156 | else: 157 | tmp1 = np.power(u, -alpha) 158 | tmp2 = np.sum(tmp1,1) - 1 159 | y = np.power(tmp2, -1.0/alpha) 160 | 161 | return y 162 | 163 | def _frank(u, alpha): 164 | # C(u1,u2) = -(1/alpha)*log(1 + (exp(-alpha*u1)-1)*(exp(-alpha*u2)-1)/(exp(-alpha)-1)) 165 | if(alpha==0): 166 | y = np.prod(u,1) 167 | else: 168 | tmp1 = np.exp(-alpha*np.sum(u,1)) - np.sum(np.exp(-alpha*u),1) 169 | y = -np.log( (np.exp(-alpha) + tmp1) / np.expm1(-alpha)) / alpha; 170 | 171 | return y 172 | 173 | def _gumbel(u, alpha): 174 | # C(u1,u2) = exp(-( (-log(u1))^alpha + (-log(u2))^alpha )^(1/alpha)) 175 | n = u.shape[0] 176 | p = u.shape[1] 177 | 178 | if(alpha<1): 179 | raise ValueError("Gumbel family -- invalid alpha argument. alpha must be >=1") 180 | elif(alpha==1): 181 | y = np.prod(u,1) 182 | else: 183 | # TODO: NaN checking like Matlab here would be nice :) 184 | exparg = np.zeros(n) 185 | for ii in np.arange(p): 186 | tmp1 = np.power(-1*np.log(u[:,ii]), alpha) 187 | exparg = exparg + tmp1 188 | exparg = np.power(exparg, 1.0/alpha) 189 | y = np.exp(-1*exparg) 190 | 191 | return y 192 | 193 | def test_python_vs_matlab(family): 194 | # generate U1, U2 195 | n = 10 196 | p = 2 197 | 198 | # generate all u1,u2 combinations 199 | eps = np.finfo(float).eps 200 | u = np.linspace(0+eps,1-eps,n) 201 | UU = np.meshgrid(u,u) 202 | U2 = np.reshape(UU[0], (UU[0].shape[0]*UU[0].shape[1], 1)) 203 | U1 = np.reshape(UU[1], (UU[1].shape[0]*UU[1].shape[1], 1)) 204 | U = np.concatenate((U1,U2),axis=1) 205 | 206 | rho = 0.8 207 | nu = 2 208 | Rho = np.array([[1,rho],[rho,1]]) 209 | 210 | alpha = 0.3 211 | 212 | # test the python data against Matlab 213 | # TODO: make python execute the matlab script which generates these samples 214 | matlab_data = scipy.io.loadmat('matlab/copulacdf_test.mat') 215 | 216 | if(family.lower()=='gaussian'): 217 | gaussian_copula_cdf_python = copulacdf(family,U,Rho) 218 | gaussian_copula_cdf_matlab = matlab_data['gaussian_copula_cdf'] 219 | gaussian_copula_cdf_matlab = gaussian_copula_cdf_matlab[:,0] 220 | 221 | # compare the two 222 | gaussian_copula_test_result = np.allclose(gaussian_copula_cdf_python,gaussian_copula_cdf_matlab) 223 | if(gaussian_copula_test_result): 224 | print 'Gaussian Copula Python calculation matches Matlab!' 225 | else: 226 | print 'Gaussian Copula Python calculation does NOT match Matlab!' 227 | 228 | # plot the Guassian Copula for fun 229 | X = UU[0] 230 | Y = UU[1] 231 | Z = np.reshape(gaussian_copula_cdf_python,UU[0].shape) 232 | 233 | plot_utils.plot_3d(X,Y,Z, 'Gaussian Copula CDF') 234 | 235 | elif(family.lower()=='t'): 236 | t_copula_cdf_python = copulacdf(family,U,Rho,nu) 237 | t_copula_cdf_matlab = matlab_data['t_copula_cdf'] 238 | t_copula_cdf_matlab = t_copula_cdf_matlab[:,0] 239 | 240 | # compare the two 241 | t_copula_test_result = np.allclose(t_copula_cdf_python,t_copula_cdf_matlab,atol=0.01) # a high tolerance required 242 | # b/c of way that mvt 243 | # is implemented in python 244 | if(t_copula_test_result): 245 | print 'T Copula Python calculation matches Matlab!' 246 | else: 247 | print 'T Copula Python calculation does NOT match Matlab!' 248 | 249 | # plot the Guassian Copula for fun 250 | X = UU[0] 251 | Y = UU[1] 252 | Z = np.reshape(t_copula_cdf_python,UU[0].shape) 253 | 254 | plot_utils.plot_3d(X,Y,Z, 'T Copula CDF') 255 | 256 | elif(family.lower()=='clayton'): 257 | clayton_copula_cdf_python = copulacdf(family,U,alpha) 258 | clayton_copula_cdf_matlab = matlab_data['clayton_copula_cdf'] 259 | clayton_copula_cdf_matlab = clayton_copula_cdf_matlab[:,0] 260 | 261 | # compare the two 262 | clayton_copula_test_result = np.allclose(clayton_copula_cdf_python,clayton_copula_cdf_matlab) 263 | if(clayton_copula_test_result): 264 | print 'Clayton Copula Python calculation matches Matlab!' 265 | else: 266 | print 'Clayton Copula Python calculation does NOT match Matlab!' 267 | 268 | # plot the Clayton Copula for fun 269 | X = UU[0] 270 | Y = UU[1] 271 | Z = np.reshape(clayton_copula_cdf_python,UU[0].shape) 272 | 273 | plot_utils.plot_3d(X,Y,Z, 'Clayton Copula CDF') 274 | 275 | elif(family.lower()=='frank'): 276 | frank_copula_cdf_python = copulacdf(family,U,alpha) 277 | frank_copula_cdf_matlab = matlab_data['frank_copula_cdf'] 278 | frank_copula_cdf_matlab = frank_copula_cdf_matlab[:,0] 279 | 280 | # compare the two 281 | frank_copula_test_result = np.allclose(frank_copula_cdf_python,frank_copula_cdf_matlab) 282 | if(frank_copula_test_result): 283 | print 'Frank Copula Python calculation matches Matlab!' 284 | else: 285 | print 'Frank Copula Python calculation does NOT match Matlab!' 286 | 287 | # plot the Clayton Copula for fun 288 | X = UU[0] 289 | Y = UU[1] 290 | Z = np.reshape(frank_copula_cdf_python,UU[0].shape) 291 | 292 | plot_utils.plot_3d(X,Y,Z, 'Frank Copula CDF') 293 | 294 | elif(family.lower()=='gumbel'): 295 | alpha = 1.5 296 | gumbel_copula_cdf_python = copulacdf(family,U,alpha) 297 | gumbel_copula_cdf_matlab = matlab_data['gumbel_copula_cdf'] 298 | gumbel_copula_cdf_matlab = gumbel_copula_cdf_matlab[:,0] 299 | 300 | # compare the two 301 | gumbel_copula_test_result = np.allclose(gumbel_copula_cdf_python,gumbel_copula_cdf_matlab) 302 | if(gumbel_copula_test_result): 303 | print 'Gumbel Copula Python calculation matches Matlab!' 304 | else: 305 | print 'Gumbel Copula Python calculation does NOT match Matlab!' 306 | 307 | # plot the Clayton Copula for fun 308 | X = UU[0] 309 | Y = UU[1] 310 | Z = np.reshape(gumbel_copula_cdf_python,UU[0].shape) 311 | 312 | plot_utils.plot_3d(X,Y,Z, 'Gumbel Copula CDF') 313 | 314 | if __name__=='__main__': 315 | import scipy.io 316 | import plot_utils 317 | 318 | #test_python_vs_matlab('Gaussian') 319 | test_python_vs_matlab('T') 320 | #test_python_vs_matlab('Clayton') 321 | #test_python_vs_matlab('Frank') 322 | #test_python_vs_matlab('Gumbel') 323 | -------------------------------------------------------------------------------- /copulafit.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | #****************************************************************************** 5 | #* 6 | #* Copyright (C) 2015 Kiran Karra 7 | #* 8 | #* This program is free software: you can redistribute it and/or modify 9 | #* it under the terms of the GNU General Public License as published by 10 | #* the Free Software Foundation, either version 3 of the License, or 11 | #* (at your option) any later version. 12 | #* 13 | #* This program is distributed in the hope that it will be useful, 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | #* GNU General Public License for more details. 17 | #* 18 | #* You should have received a copy of the GNU General Public License 19 | #* along with this program. If not, see . 20 | #****************************************************************************** 21 | 22 | import math 23 | import numpy as np 24 | 25 | import multivariate_stats 26 | from invcopulastat import invcopulastat 27 | from scipy.stats import kendalltau 28 | from numpy.linalg import eig 29 | 30 | """ 31 | copulafit.py contains routines which provide use various techniques, as 32 | specified by the user to fit data to a family of copula (i.e. find the 33 | dependency parameter). 34 | """ 35 | 36 | def copulafit(family, X, algorithm): 37 | """ 38 | Attempts to determine the dependency parameter of the copula family 39 | type specified, using the algorithm that is specified for the data 40 | given by the matrix X 41 | 42 | Inputs: 43 | family -- the copula family to fit to, must be: 44 | 'Gaussian' 45 | 't' 46 | 'Clayton' 47 | 'Gumbel' 48 | 'Frank' 49 | X -- the data to determine the copula dependency parameter for. Must be 50 | a numpy array of shape = M x N, where M is the number of samples 51 | and N is the dimensionality of the data 52 | algorithm -- must be one of the following strings: 53 | 'MLE' - Maximum Likelihood method 54 | 'AMLE' - Approximate Maximum Likelihood method 55 | 'PKTE' - Use's Pairwise Kendall's Tau estimator's relationship to the 56 | copula family's dependency parameter (only applicalble 57 | to Clayton, Gumbel, or Frank copula's currently) 58 | 59 | Outputs: 60 | the dependency parameter for the copula 61 | 62 | """ 63 | algorithm_lc = algorithm.lower() 64 | family_lc = family.lower() 65 | dep_param_est = None 66 | if(algorithm_lc=='MLE'): 67 | raise ValueError('MLE method not yet supported!') 68 | elif(algorithm_lc=='AMLE'): 69 | raise ValueError('Approximate MLE method not yet supported!') 70 | elif(algorithm_lc=='PKTE'): 71 | if(family_lc=='gaussian'): 72 | dep_param_est = _gaussian_PKTE(X) 73 | elif(family_lc=='t'): 74 | dep_param_est = _t_PKTE(X) 75 | elif(family_lc=='clayton'): 76 | dep_param_est = _clayton_PKTE(X) 77 | elif(family_lc=='gumbel'): 78 | dep_param_est = _gumbel_PKTE(X) 79 | elif(family_lc=='frank'): 80 | dep_param_est = _frank_PKTE(X) 81 | else: 82 | raise ValueError('Unsupported Algorithm or options!') 83 | 84 | return dep_param_est 85 | 86 | def _gaussian_PKTE(X): 87 | # the algorithm for this comes from the paper: 88 | # "Gaussian Copula Precision Estimation with Missing Values" 89 | # by Huahua Wang, Faridel Fazayeli, Soumyadeep Chatterjee, Arindam Banerjee 90 | N = X.shape[1] 91 | sigma_hat = np.ones((N,N)) 92 | for dim1 in range(0,N-1): 93 | for dim2 in range(dim1+1,N): 94 | rho = np.sin(math.pi/2 * kendalltau(X[:,dim1],X[:,dim2])) 95 | # correlation matrix is symmetric 96 | sigma_hat[dim1][dim2] = rho 97 | sigma_hat[dim2][dim1] = rho 98 | 99 | # ensure that sigma_hat is positive semidefinite 100 | sigma_hat = _nearPD(sigma_hat) 101 | 102 | return sigma_hat 103 | 104 | # TODO: T copula stuff 105 | def _t_PKTE(X): 106 | # first estimate correlation matrix 107 | sigma_hat = _gaussian_PKTE(X) 108 | 109 | # TODO: use MLE to estimate degrees of freedom 110 | nu = 1 111 | 112 | return (sigma_hat, nu) 113 | 114 | def _clayton_PKTE(X): 115 | # calculate empirical kendall's tau 116 | ktau = multivariate_stats.kendalls_tau(X) 117 | # inverse to find dependency parameter 118 | alpha_hat = invcopulastat('Clayton', 'kendall', ktau) 119 | 120 | return alpha_hat 121 | 122 | def _gumbel_PKTE(X): 123 | # calculate empirical kendall's tau 124 | ktau = multivariate_stats.kendalls_tau(X) 125 | # inverse to find dependency parameter 126 | alpha_hat = invcopulastat('Gumbel', 'kendall', ktau) 127 | 128 | return alpha_hat 129 | 130 | 131 | def _frank_PKTE(X): 132 | # calculate empirical kendall's tau 133 | ktau = multivariate_stats.kendalls_tau(X) 134 | # inverse to find dependency parameter 135 | alpha_hat = invcopulastat('Frank', 'kendall', ktau) 136 | 137 | return alpha_hat 138 | 139 | def _getAplus(A): 140 | eigval, eigvec = eig(A) 141 | Q = np.matrix(eigvec) 142 | xdiag = np.matrix(np.diag(np.maximum(eigval, 0))) 143 | return Q*xdiag*Q.T 144 | 145 | def _getPs(A, W=None): 146 | W05 = np.matrix(W**.5) 147 | return W05.I * _getAplus(W05 * A * W05) * W05.I 148 | 149 | def _getPu(A, W=None): 150 | Aret = np.array(A.copy()) 151 | Aret[W > 0] = np.array(W)[W > 0] 152 | return np.matrix(Aret) 153 | 154 | def _nearPD(A, nit=10): 155 | n = A.shape[0] 156 | W = np.identity(n) 157 | 158 | # W is the matrix used for the norm (assumed to be Identity matrix here) 159 | # the algorithm should work for any diagonal W 160 | deltaS = 0 161 | Yk = A.copy() 162 | for k in range(nit): 163 | Rk = Yk - deltaS 164 | Xk = _getPs(Rk, W=W) 165 | deltaS = Xk - Rk 166 | Yk = _getPu(Xk, W=W) 167 | return Yk -------------------------------------------------------------------------------- /copulamnsig.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | #****************************************************************************** 5 | #* 6 | #* Copyright (C) 2015 Kiran Karra 7 | #* 8 | #* This program is free software: you can redistribute it and/or modify 9 | #* it under the terms of the GNU General Public License as published by 10 | #* the Free Software Foundation, either version 3 of the License, or 11 | #* (at your option) any later version. 12 | #* 13 | #* This program is distributed in the hope that it will be useful, 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | #* GNU General Public License for more details. 17 | #* 18 | #* You should have received a copy of the GNU General Public License 19 | #* along with this program. If not, see . 20 | #****************************************************************************** 21 | 22 | import math 23 | import numpy as np 24 | 25 | from cvolume import cvolume 26 | import multivariate_stats 27 | 28 | from ecdf import probability_integral_transform 29 | from scipy.stats import entropy 30 | 31 | def copulamnsig(family, K, *args): 32 | """ 33 | Computes the copula multinomial signature as described in the paper 34 | "Highly Efficient Learning of Mixed Copula Networks" for a specified 35 | copula family. Essentially, it breaks up the unit grid into a K x K boxes, 36 | and computes the probability of a sample from that copula pdf falling in 37 | that grid. This is then aggregated into a multinomial probability 38 | distribution. This so called "multinomial" signature of a copula is then 39 | used to efficiently determine the structure of the Bayesian network, as well 40 | as the copula which would describe the dependency between the nodes. 41 | 42 | The grid over the unit cube is numbered as follows, for a 4 x 4 grid 43 | ___________________ 44 | | 4 | 8 | 12 | 16 | 45 | |---|---|----|----| 46 | | 3 | 7 | 11 | 15 | 47 | |-----------------| 48 | | 2 | 6 | 10 | 14 | 49 | |-----------------| 50 | | 1 | 5 | 9 | 13 | 51 | |___|___|____|____| 52 | 53 | Currently, this computes the multinomial signature for a specified copula 54 | family of 2 dimensions. It would be nice to expand this to multiple 55 | dimensions, and we can use the general formula for C-volume 56 | 57 | family - the copula type, must be: 58 | 'Gaussian' 59 | 'T' 60 | 'Clayton' 61 | 'Frank' 62 | 'Gumbel' 63 | args - must be atleast of length 2, for which the first element in args 64 | is expected to be a string which describes the dependency value 65 | being provided, must be one of the following: 66 | 'kendall' - means kendall's Tau is being provided 67 | 'spearman' - means spearman's rho is being provided 68 | 'native' - means that the dependency parameter of the copula family 69 | itself is being provided directly 70 | the second argmuent must be the value of the dependency type 71 | provided. For kendall and spearman, a scalar value is expected. 72 | For native, if the family type is Frank, Gumbel, or Clayton, then 73 | a scalar value is expected, which represents the dependency 74 | parameter. If the family type is Gaussian, then a 2 x 2 numpy array 75 | is expected, which represents the correlation matrix defining the 76 | Gaussian copula. If the family is T, then the 2nd argument is the 77 | 2x2 numpy array representing the correlation matrix, and the 3rd 78 | argument is the degrees of freedom 79 | """ 80 | coords_list = _makeCoordsList(K) 81 | 82 | # mnsig is a list of dictionaries. The (list index+1) corresponds to the 83 | # grid of interest in the unit cube. In the dictionary, the actual lower 84 | # left coordinates of the box and the upper right coordinates of the box 85 | # are stored as keys 'u1v1' and 'u2v2', and then the actual value of the 86 | # multinomial signature in that grid is stored as 'val' 87 | 88 | mnsig = [] 89 | for coord in coords_list: 90 | # compute the C-volume and store 91 | u1v1 = coord[0] 92 | u1v2 = coord[1] 93 | u2v1 = coord[2] 94 | u2v2 = coord[3] 95 | try: 96 | val = cvolume(family, u1v1, u1v2, u2v1, u2v2, *args) 97 | except ValueError: 98 | val = np.array([-1]) # for compatibility we put the numpy wrapper 99 | 100 | mnsig.append(val[0]) 101 | 102 | return mnsig 103 | 104 | def empirical_copulamnsig(X, K): 105 | """ 106 | Computes an empirical copula multinomial signature based on the dataset 107 | provided by U. U must be a numpy array of dimensions [M x N], where M is 108 | the number of data points in the dataset and, N is the dimensionality of the 109 | data 110 | """ 111 | M = X.shape[0] 112 | N = X.shape[1] 113 | 114 | # convert X to U by using the probability integral transform: F(X) = U 115 | U = probability_integral_transform(X) 116 | 117 | # generate the coordinates so we can then compare and see where each sample 118 | # falls into in the unit cube 119 | coords_list = _makeCoordsList(K) 120 | 121 | # this will be a list of dictionaries which has all the combinations of the 122 | # empirical binomial signature 123 | esig = [] 124 | 125 | # for all i < j, compute pairwise bivariate multinomial signature 126 | for dim1 in range(0,N-1): 127 | for dim2 in range(dim1+1,N): 128 | # to compute the pairwise bivariate multinomial signature, what 129 | # we do is essentially grid as before, and compute a histogram 130 | # for each grid .. whcih is our empirical estimate 131 | # the grid is lay-ed out in the exact same way as described before, 132 | # so the index of mnsig from copulamnsig and the index of the value 133 | # generated here will be directly comparable 134 | # ___________________ 135 | # | 4 | 8 | 12 | 16 | 136 | # |---|---|----|----| 137 | # | 3 | 7 | 11 | 15 | 138 | # |-----------------| 139 | # | 2 | 6 | 10 | 14 | 140 | # |-----------------| 141 | # | 1 | 5 | 9 | 13 | 142 | # |___|___|____|____| 143 | tmp = {} 144 | # RV 1 that we are comparing 145 | tmp['rv1'] = dim1+1 146 | # RV 2 that we are comparing 147 | tmp['rv2'] = dim2+1 148 | # the value for the zone -- initialize to 0 149 | esig_vec = np.zeros(K*K) 150 | 151 | # there is probably a more efficient way to do this than to loop 152 | # over each value, but this is a first cut at implementing this 153 | u = U[:,dim1] 154 | v = U[:,dim2] 155 | 156 | for ii in range(0,M): 157 | # find which zone this specific (u,v) sample falls in 158 | for jj in range(0,K*K): 159 | u1 = coords_list[jj][0][0][0] 160 | v1 = coords_list[jj][0][0][1] 161 | u2 = coords_list[jj][3][0][0] 162 | v2 = coords_list[jj][3][0][1] 163 | 164 | if(u[ii] >= u1 and u[ii] < u2 and 165 | v[ii] >= v1 and v[ii] < v2): 166 | # add one to the zone that it falls into 167 | esig_vec[jj] = (esig_vec[jj] + 1.0/M) 168 | # process the next pair by kicking out of this loop 169 | break 170 | tmp['esig'] = esig_vec 171 | 172 | esig.append(tmp) 173 | 174 | return esig 175 | 176 | def _makeCoordsList(K): 177 | eps = np.finfo(float).eps 178 | u = np.linspace(0+eps, 1-eps, K+1) 179 | v = np.linspace(0+eps, 1-eps, K+1) 180 | 181 | coords_list = [] 182 | for ii in range(0,len(u)-1): 183 | for jj in range(0,len(v)-1): 184 | u1 = u[ii] 185 | u2 = u[ii+1] 186 | v1 = v[jj] 187 | v2 = v[jj+1] 188 | u1v1 = np.array([[u1,v1]]) 189 | u1v2 = np.array([[u1,v2]]) 190 | u2v1 = np.array([[u2,v1]]) 191 | u2v2 = np.array([[u2,v2]]) 192 | x = [] 193 | x.append(u1v1) 194 | x.append(u1v2) 195 | x.append(u2v1) 196 | x.append(u2v2) 197 | coords_list.append(x) 198 | 199 | return coords_list 200 | 201 | # the master function, which computes the correct copula family to choose from 202 | # will compare the empirical signatures to the actual signature for refernence 203 | # will do the following: 204 | # 1.) compute the empirical kendall's tau 205 | # 2.) load the precomputed multinomial signature for that kendall's tau 206 | # for all the copula families 207 | # 3.) minimize the distance metric 208 | def optimalCopulaFamily(X, K=4, family_search=['Gaussian', 'Clayton', 'Gumbel', 'Frank']): 209 | """ 210 | This function, given a multivariate data set X, computes the best copula family which fits 211 | the data, using the procedure described in the paper "Highly Efficient Learning of Mixed 212 | Copula Networks," by Gal Elidan 213 | 214 | X - the multivariate dataset for which we desire the copula. Must be a numpy array of 215 | dimension [M x N], where M is the number of data points, and N is the dimensionality 216 | of the dataset 217 | K - the square root of the number of grid points (for now, we assume square gridding of the 218 | unit cube) 219 | family_search - a list of all the copula families to search. Currently, what is supported is 220 | Gaussian, Clayton, Gumbel, and Frank. As more copula's are added, the default list will 221 | be expanded. 222 | """ 223 | # compute the empirical Kendall's Tau 224 | tau_hat = multivariate_stats.kendalls_tau(X) 225 | 226 | # compute empirical multinomial signature 227 | empirical_mnsig = empirical_copulamnsig(X, K) 228 | empirical_mnsig = empirical_mnsig[0]['esig'] 229 | # replace any 0 values w/ smallest possible float value 230 | empirical_mnsig[empirical_mnsig==0] = np.spacing(1) 231 | 232 | # compute the multinomial signature for each of the copula families specified 233 | # and simultaneously compute the kullback leibler divergence between the empirical 234 | # and the computed, and store that info 235 | distances = {} 236 | for family in family_search: 237 | # because the Clayton and Gumbel Copula's have restrictions for the valid values of 238 | # Kendall's tau, we do checks here to ensure those restrictions are met, because there 239 | # will be a certain variance associated with the tau_hat measurement 240 | 241 | if(family.lower()=='clayton'): 242 | # here we add some additional optimizatons as follows. We know that the Clayton copula 243 | # captures only positive concordance. Like any estimator, tau_hat will have some variance 244 | # associated with it. Thus, the optimization we make is as follows, if tau_hat is within 245 | # a configurable amount less than 0, then we will set tau_hat to 0 and continue processing. 246 | # However, if tau_hat is greater than that, we theoretically wouldn't have to test against 247 | # the Clayton copula model, so we set the KL-divergence to be infinity to exclude 248 | # this family from being selected 249 | if(tau_hat<-0.05): 250 | distances[family] = np.inf 251 | continue 252 | elif(tau_hat>=-0.05 and tau_hat<0): 253 | tau_hat = 0 254 | elif(tau_hat>=1): 255 | tau_hat = 1 - np.spacing(1) # as close to 1 as possible in our precision 256 | elif(family.lower()=='gumbel'): 257 | # here we add some additional optimizatons as follows. We know that the Gumbel copula 258 | # captures only positive concordance. Like any estimator, tau_hat will have some variance 259 | # associated with it. Thus, the optimization we make is as follows, if tau_hat is within 260 | # a configurable amount less than 0, then we will set tau_hat to 0 and continue processing. 261 | # However, if tau_hat is greater than that, we theoretically wouldn't have to test against 262 | # the Gumbel copula model, so we set the KL-divergence to be infinity to exclude 263 | # this family from being selected 264 | if(tau_hat<-0.05): 265 | distances[family] = np.inf 266 | continue 267 | elif(tau_hat>=-0.05 and tau_hat<0): 268 | tau_hat = 0 269 | elif(tau_hat>=1): 270 | tau_hat = 1 - np.spacing(1) # as close to 1 as possible in our precision 271 | # any other copula families with restrictions can go here 272 | 273 | mnsig = copulamnsig(family,K,'kendall',tau_hat) 274 | # replace any 0 values w/ smallest possible float value 275 | mnsig[mnsig==0] = np.spacing(1) 276 | 277 | # compute KL divergence, see 278 | # http://docs.scipy.org/doc/scipy-dev/reference/generated/scipy.stats.entropy.html 279 | distances[family] = entropy(mnsig, empirical_mnsig) 280 | 281 | # search for the minimum distance, that is the optimal copula family to use 282 | minDistance = np.inf 283 | for family, distance in distances.iteritems(): 284 | if distance0): 460 | plt.plot(np.arange(1,K*K+1), resultsAggregate['Gaussian'][tau], 'b.-', label='Gaussian Copula') 461 | if(np.sum(resultsAggregate['Clayton'][tau])>0): 462 | plt.plot(np.arange(1,K*K+1), resultsAggregate['Clayton'][tau], 'g.-', label='Clayton Copula') 463 | if(np.sum(resultsAggregate['Gumbel'][tau])>0): 464 | plt.plot(np.arange(1,K*K+1), resultsAggregate['Gumbel'][tau], 'r.-', label='Gumbel Copula') 465 | if(np.sum(resultsAggregate['Frank'][tau])>0): 466 | plt.plot(np.arange(1,K*K+1), resultsAggregate['Frank'][tau], 'k.-', label='Frank Copula') 467 | 468 | plt.title(r'Copula Multinomial Signature $\tau$=' + "{0:.2f}".format(tau) + ' K=' + str(K)) 469 | plt.legend() 470 | plt.grid() 471 | 472 | plt.subplot(232) 473 | if(r!=-1): 474 | plt.scatter(U_gauss[:,0], U_gauss[:,1]) 475 | plt.grid() 476 | plt.title(r'Gaussian Copula, $\rho$=' + "{0:.2f}".format(r) + r' $\tau$=' + "{0:.2f}".format(tau)) 477 | 478 | plt.subplot(233) 479 | if(alpha_clayton!=-1): 480 | plt.scatter(U_clayton[:,0], U_clayton[:,1]) 481 | plt.grid() 482 | plt.title(r'Clayton Copula, $\alpha$=' + "{0:.2f}".format(alpha_clayton) + r' $\tau$=' + "{0:.2f}".format(tau)) 483 | 484 | plt.subplot(235) 485 | if(alpha_frank!=-1): 486 | plt.scatter(U_frank[:,0], U_frank[:,1]) 487 | plt.grid() 488 | plt.title(r'Frank Copula, $\alpha$=' + "{0:.2f}".format(alpha_frank) + r' $\tau$=' + "{0:.2f}".format(tau)) 489 | 490 | plt.subplot(236) 491 | if(alpha_gumbel!=-1): 492 | plt.scatter(U_gumbel[:,0], U_gumbel[:,1]) 493 | plt.grid() 494 | plt.title(r'Gumbel Copula, $\alpha$=' + "{0:.2f}".format(alpha_gumbel) + r' $\tau$=' + "{0:.2f}".format(tau)) 495 | 496 | plt.subplot(234) 497 | # index manually to ensure accuracy 498 | cla = np.array([helmAccuracyResults['Clayton'][tau]['clayton'], 499 | helmAccuracyResults['Gaussian'][tau]['clayton'], 500 | helmAccuracyResults['Gumbel'][tau]['clayton'], 501 | helmAccuracyResults['Frank'][tau]['clayton']]) 502 | gau = np.array([helmAccuracyResults['Clayton'][tau]['gaussian'], 503 | helmAccuracyResults['Gaussian'][tau]['gaussian'], 504 | helmAccuracyResults['Gumbel'][tau]['gaussian'], 505 | helmAccuracyResults['Frank'][tau]['gaussian']]) 506 | gum = np.array([helmAccuracyResults['Clayton'][tau]['gumbel'], 507 | helmAccuracyResults['Gaussian'][tau]['gumbel'], 508 | helmAccuracyResults['Gumbel'][tau]['gumbel'], 509 | helmAccuracyResults['Frank'][tau]['gumbel']]) 510 | fra = np.array([helmAccuracyResults['Clayton'][tau]['frank'], 511 | helmAccuracyResults['Gaussian'][tau]['frank'], 512 | helmAccuracyResults['Gumbel'][tau]['frank'], 513 | helmAccuracyResults['Frank'][tau]['frank']]) 514 | ind = np.arange(4) 515 | width = 0.2 516 | p1 = plt.bar(ind,cla,width,color='b') 517 | p2 = plt.bar(ind,gau,width,color='g',bottom=cla) 518 | p3 = plt.bar(ind,gum,width,color='k',bottom=cla+gau) 519 | p4 = plt.bar(ind,fra,width,color='r',bottom=cla+gau+gum) 520 | plt.xticks(ind+width/2.,('Clayton', 'Gaussian', 'Gumbel', 'Frank')) 521 | plt.legend( (p1[0], p2[0], p3[0], p4[0]), ('Clayton', 'Gaussian', 'Gumbel', 'Frank') ) 522 | 523 | plt.grid() 524 | plt.savefig(os.path.join('figures/HELM_performance/', 525 | 'HELM_DIM_' + str(N) + '_tau_' + "{0:.2f}".format(tau) + ' _K_' + str(K) + '.png')) 526 | 527 | plt.close() 528 | 529 | 530 | if __name__=='__main__': 531 | from copularnd import copularnd 532 | from invcopulastat import invcopulastat 533 | from scipy.stats import norm 534 | from scipy.stats import expon 535 | import sys 536 | import matplotlib.pyplot as plt 537 | import os 538 | 539 | # some tests on the copula multinomial signature 540 | tau = 0.4 541 | K = 4 542 | mnsig = copulamnsig('Gumbel',K,'kendall',tau) 543 | # iterate through mnsig to make sure we add upto 1 as a simple sanity check 544 | val_total = 0 545 | for ii in range(0,len(mnsig)): 546 | val_total = val_total + mnsig[ii] #['val'] 547 | 548 | if(np.isclose(val_total, 1.0)): 549 | print 'CopulaMNSig total probability check passed!' 550 | else: 551 | print 'CopulaMNSig total probability check failed!' 552 | 553 | 554 | M = 1000 555 | N = 2 556 | 557 | # Monte-Carlo style simulations to test each copula generation 558 | numMCSims = 100 559 | # the families to test against and pick optimal copula 560 | families = ['Gaussian', 'Clayton', 'Gumbel', 'Frank'] 561 | 562 | """ 563 | for family in families: 564 | title = 'Reference Bivariate ' + str(family) + ' Copula - HELM Identification Breakdown' 565 | results = testHELM(tau, M, N, family, numMCSims, families) 566 | plotPieChartResults(results, family, title) 567 | 568 | N = 3 569 | for family in families: 570 | title = 'Reference Bivariate ' + str(family) + ' Copula - HELM Identification Breakdown' 571 | results = testHELM(tau, M, N, family, numMCSims, families) 572 | plotPieChartResults(results, family, title) 573 | """ 574 | #tauVec = np.arange(-0.9,0.95,0.05) 575 | #resultsAggregate = testHELM_parametric(K,M,N,tauVec) 576 | 577 | visualizeMNSig() 578 | 579 | 580 | -------------------------------------------------------------------------------- /copulapdf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | #****************************************************************************** 5 | #* 6 | #* Copyright (C) 2015 Kiran Karra 7 | #* 8 | #* This program is free software: you can redistribute it and/or modify 9 | #* it under the terms of the GNU General Public License as published by 10 | #* the Free Software Foundation, either version 3 of the License, or 11 | #* (at your option) any later version. 12 | #* 13 | #* This program is distributed in the hope that it will be useful, 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | #* GNU General Public License for more details. 17 | #* 18 | #* You should have received a copy of the GNU General Public License 19 | #* along with this program. If not, see . 20 | #****************************************************************************** 21 | 22 | import math 23 | import numpy as np 24 | 25 | from scipy.stats import mvn # contains inverse CDF of Multivariate Gaussian 26 | from scipy.stats import norm # contains PDF of Gaussian 27 | from scipy.stats import t 28 | from scipy.special import gammaln 29 | 30 | from numpy.linalg import solve 31 | from numpy.linalg import cholesky 32 | from numpy.linalg import LinAlgError 33 | 34 | from copulacdf import copulacdf 35 | 36 | """ 37 | copulapdf.py contains routines which provide Copula PDF values 38 | """ 39 | 40 | def copulapdf(family, u, *args): 41 | """ Generates values of a requested copula family 42 | 43 | Inputs: 44 | u -- u is an N-by-P matrix of values in [0,1], representing N 45 | points in the P-dimensional unit hypercube. 46 | 47 | rho -- a P-by-P correlation matrix, the first argument required for the Gaussian copula 48 | alpha -- a scalar argument describing the dependency for Frank, Gumbel, and Clayton copula's 49 | 50 | Outputs: 51 | y -- the value of the Gaussian Copula 52 | """ 53 | n = u.shape[0] 54 | p = u.shape[1] 55 | 56 | num_var_args = len(args) 57 | family_lc = family.lower() 58 | if(family_lc=='gaussian'): 59 | if(num_var_args!=1): 60 | raise ValueError("Gaussian family requires one additional argument -- rho (correlation matrix) [P x P]") 61 | rho = args[0] 62 | rho_expected_shape = (p,p) 63 | if(type(rho)!=np.ndarray or rho.shape!=rho_expected_shape): 64 | raise ValueError("Gaussian family requires rho to be of type numpy.ndarray with shape=[P x P]") 65 | y = _gaussian(u, rho) 66 | 67 | elif(family_lc=='t'): 68 | rho = args[0] 69 | rho_expected_shape = (p,p) 70 | if(type(rho)!=np.ndarray or rho.shape!=rho_expected_shape): 71 | raise ValueError("T family requires rho to be of type numpy.ndarray with shape=[P x P]") 72 | nu = int(args[1]) # force to be an integer 73 | if(nu<1): 74 | raise ValueError("T family Degrees of Freedom argument must be an integer >= 1") 75 | return _t(u, rho, nu) 76 | elif(family_lc=='clayton'): 77 | if(num_var_args!=1): 78 | raise ValueError("Clayton family requires one additional argument -- alpha [scalar]") 79 | alpha = args[0] 80 | if(type(alpha)!=float): 81 | raise ValueError('Clayton family requires a scalar alpha value') 82 | y = _clayton(u, alpha) 83 | elif(family_lc=='frank'): 84 | if(num_var_args!=1): 85 | raise ValueError("Frank family requires one additional argument -- alpha [scalar]") 86 | alpha = args[0] 87 | if(type(alpha)!=float): 88 | raise ValueError('Clayton family requires a scalar alpha value') 89 | y = _frank(u, alpha) 90 | elif(family_lc=='gumbel'): 91 | if(num_var_args!=1): 92 | raise ValueError("Gumbel family requires one additional argument -- alpha [scalar]") 93 | alpha = args[0] 94 | if(type(alpha)!=float): 95 | raise ValueError('Clayton family requires a scalar alpha value') 96 | y = _gumbel(u, alpha) 97 | else: 98 | raise ValueError("Unrecognized family of copula") 99 | 100 | return y 101 | 102 | def _gaussian(u, rho): 103 | try: 104 | R = cholesky(rho) 105 | except LinAlgError: 106 | raise ValueError('Provided Rho matrix is not Positive Definite!') 107 | 108 | x = norm.ppf(u) 109 | z = solve(R,x.T) 110 | z = z.T 111 | logSqrtDetRho = np.sum(np.log(np.diag(R))) 112 | y = np.exp(-0.5 * np.sum( np.power(z,2) - np.power(x,2) , axis=1 ) - logSqrtDetRho) 113 | 114 | return y 115 | 116 | def _t(u, rho, nu): 117 | d = u.shape[1] 118 | nu = float(nu) 119 | 120 | try: 121 | R = cholesky(rho) 122 | except LinAlgError: 123 | raise ValueError('Provided Rho matrix is not Positive Definite!') 124 | 125 | ticdf = t.ppf(u, nu) 126 | 127 | z = solve(R,ticdf.T) 128 | z = z.T 129 | logSqrtDetRho = np.sum(np.log(np.diag(R))) 130 | const = gammaln((nu+d)/2.0) + (d-1)*gammaln(nu/2.0) - d*gammaln((nu+1)/2.0) - logSqrtDetRho 131 | sq = np.power(z,2) 132 | summer = np.sum(np.power(z,2),axis=1) 133 | numer = -((nu+d)/2.0) * np.log(1.0 + np.sum(np.power(z,2),axis=1)/nu) 134 | denom = np.sum(-((nu+1)/2) * np.log(1 + (np.power(ticdf,2))/nu), axis=1) 135 | y = np.exp(const + numer - denom) 136 | 137 | return y 138 | 139 | def _clayton(u, alpha): 140 | n = u.shape[0] 141 | d = u.shape[1] 142 | if(d>2): 143 | raise ValueError('Maximum dimensionality supported is 2 for the Clayton Copula Family') 144 | if alpha<0: 145 | raise ValueError('Dependency parameter for Clayton copula must be >= 0') 146 | elif alpha==0: 147 | y = np.ones((n,1)) 148 | else: 149 | # below is the closed form of d2C/dudv of the Clayton copula 150 | y = (alpha + 1) * np.power( u[:,0]*u[:,1], -1*(alpha+1) ) * np.power( np.power(u[:,0], -alpha) + np.power(u[:,1], -alpha) - 1, -1*(2*alpha+1)/alpha ) 151 | 152 | return y 153 | 154 | def _frank(u, alpha): 155 | if alpha == 0: 156 | y = ones(n,1); 157 | else: 158 | summer = np.sum(u,1) 159 | differ = np.diff(u,1,1) 160 | differ = differ[:,0] 161 | denom = np.power(np.cosh(alpha*differ/2)*2 - np.exp(alpha*(summer-2)/2) - np.exp(-alpha*summer/2), 2) 162 | y = alpha*(1-np.exp(-alpha)) / denom 163 | 164 | return y 165 | 166 | def _gumbel(U, alpha): 167 | n = U.shape[0] 168 | d = U.shape[1] 169 | if(d>2): 170 | raise ValueError('Maximum dimensionality supported is 2 for the Gumbel Copula Family') 171 | 172 | if(alpha < 1): 173 | raise ValueError('Bad dependency parameter for Gumbel copula') 174 | elif alpha==1: 175 | y = np.ones((n,1)) 176 | else: 177 | # below is the closed form of d2C/dudv of the Gumbel copula 178 | C = copulacdf('Gumbel', U, alpha) 179 | u = U[:,0] 180 | v = U[:,1] 181 | p1 = C*1.0/(u*v)*np.power(np.power(-1*np.log(u),alpha) + np.power(-1*np.log(v),alpha), -2.0 + 2.0/alpha)*np.power(np.log(u)*np.log(v),alpha-1.0) 182 | p2 = 1.0 + (alpha - 1.0)*np.power(np.power(-1*np.log(u),alpha) + np.power(-1*np.log(v),alpha), -1.0/alpha ) 183 | y = p1*p2 184 | return y 185 | 186 | def test_python_vs_matlab(family): 187 | # generate U1, U2 188 | n = 10 189 | p = 2 190 | 191 | # generate all u1,u2 combinations 192 | eps = np.finfo(float).eps 193 | u = np.linspace(0.1,0.9,n) 194 | UU = np.meshgrid(u,u) 195 | U2 = np.reshape(UU[0], (UU[0].shape[0]*UU[0].shape[1], 1)) 196 | U1 = np.reshape(UU[1], (UU[1].shape[0]*UU[1].shape[1], 1)) 197 | U = np.concatenate((U1,U2),axis=1) 198 | 199 | rho = 0.8 200 | Rho = np.array([[1,rho],[rho,1]]) 201 | nu = 2 202 | 203 | alpha = 0.3 204 | 205 | # test the python data against Matlab 206 | # TODO: make python execute the matlab script which generates these samples 207 | matlab_data = scipy.io.loadmat('matlab/copulapdf_test.mat') 208 | 209 | if(family.lower()=='gaussian'): 210 | gaussian_copula_pdf_python = copulapdf(family,U,Rho) 211 | gaussian_copula_pdf_matlab = matlab_data['gaussian_copula_pdf'][:,0] 212 | 213 | # compare the two 214 | gaussian_copula_test_result = np.allclose(gaussian_copula_pdf_python,gaussian_copula_pdf_matlab) 215 | if(gaussian_copula_test_result): 216 | print 'Gaussian Copula Python calculation matches Matlab!' 217 | else: 218 | print 'Gaussian Copula Python calculation does NOT match Matlab!' 219 | 220 | # plot the Copula for fun 221 | X = UU[0] 222 | Y = UU[1] 223 | Z = np.reshape(gaussian_copula_pdf_python,UU[0].shape) 224 | 225 | plot_utils.plot_3d(X,Y,Z, 'Gaussian Copula PDF') 226 | 227 | elif(family.lower()=='t'): 228 | t_copula_pdf_python = copulapdf(family,U,Rho,nu) 229 | t_copula_pdf_matlab = matlab_data['t_copula_pdf'][:,0] 230 | 231 | # compare the two 232 | t_copula_test_result = np.allclose(t_copula_pdf_python,t_copula_pdf_matlab) 233 | if(t_copula_test_result): 234 | print 'T Copula Python calculation matches Matlab!' 235 | else: 236 | print 'T Copula Python calculation does NOT match Matlab!' 237 | 238 | # plot the Copula for fun 239 | X = UU[0] 240 | Y = UU[1] 241 | Z = np.reshape(t_copula_pdf_python,UU[0].shape) 242 | 243 | plot_utils.plot_3d(X,Y,Z, 'T Copula PDF') 244 | 245 | elif(family.lower()=='clayton'): 246 | clayton_copula_pdf_python = copulapdf(family,U,alpha) 247 | clayton_copula_pdf_matlab = matlab_data['clayton_copula_pdf'][:,0] 248 | 249 | # compare the two 250 | clayton_copula_test_result = np.allclose(clayton_copula_pdf_python,clayton_copula_pdf_matlab) 251 | if(clayton_copula_test_result): 252 | print 'Clayton Copula Python calculation matches Matlab!' 253 | else: 254 | print 'Clayton Copula Python calculation does NOT match Matlab!' 255 | 256 | # plot the Copula for fun 257 | X = UU[0] 258 | Y = UU[1] 259 | Z = np.reshape(clayton_copula_pdf_python,UU[0].shape) 260 | 261 | plot_utils.plot_3d(X,Y,Z, 'Clayton Copula PDF') 262 | 263 | elif(family.lower()=='gumbel'): 264 | alpha = 1.5 265 | gumbel_copula_pdf_python = copulapdf(family,U,alpha) 266 | gumbel_copula_pdf_matlab = matlab_data['gumbel_copula_pdf'][:,0] 267 | 268 | # compare the two 269 | gumbel_copula_test_result = np.allclose(gumbel_copula_pdf_python,gumbel_copula_pdf_matlab) 270 | if(gumbel_copula_test_result): 271 | print 'Gumbel Copula Python calculation matches Matlab!' 272 | else: 273 | print 'Gumbel Copula Python calculation does NOT match Matlab!' 274 | 275 | # plot the Copula for fun 276 | X = UU[0] 277 | Y = UU[1] 278 | Z = np.reshape(gumbel_copula_pdf_python,UU[0].shape) 279 | 280 | plot_utils.plot_3d(X,Y,Z, 'Gumbel Copula PDF') 281 | 282 | elif(family.lower()=='frank'): 283 | frank_copula_pdf_python = copulapdf(family,U,alpha) 284 | frank_copula_pdf_matlab = matlab_data['frank_copula_pdf'][:,0] 285 | 286 | # compare the two 287 | frank_copula_test_result = np.allclose(frank_copula_pdf_python,frank_copula_pdf_matlab) 288 | if(frank_copula_test_result): 289 | print 'Frank Copula Python calculation matches Matlab!' 290 | else: 291 | print 'Frank Copula Python calculation does NOT match Matlab!' 292 | 293 | # plot the Copula for fun 294 | X = UU[0] 295 | Y = UU[1] 296 | Z = np.reshape(frank_copula_pdf_python,UU[0].shape) 297 | 298 | plot_utils.plot_3d(X,Y,Z, 'Frank Copula PDF') 299 | 300 | if __name__=='__main__': 301 | import scipy.io 302 | import plot_utils 303 | 304 | test_python_vs_matlab('Gaussian') 305 | test_python_vs_matlab('T') 306 | test_python_vs_matlab('Clayton') 307 | test_python_vs_matlab('Gumbel') 308 | test_python_vs_matlab('Frank') -------------------------------------------------------------------------------- /copularnd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | #****************************************************************************** 5 | #* 6 | #* Copyright (C) 2015 Kiran Karra 7 | #* 8 | #* This program is free software: you can redistribute it and/or modify 9 | #* it under the terms of the GNU General Public License as published by 10 | #* the Free Software Foundation, either version 3 of the License, or 11 | #* (at your option) any later version. 12 | #* 13 | #* This program is distributed in the hope that it will be useful, 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | #* GNU General Public License for more details. 17 | #* 18 | #* You should have received a copy of the GNU General Public License 19 | #* along with this program. If not, see . 20 | #****************************************************************************** 21 | 22 | import math 23 | import numpy as np 24 | 25 | import sys 26 | 27 | from scipy.stats import norm # contains PDF of Gaussian 28 | from scipy.stats import multivariate_normal 29 | from scipy.stats import uniform 30 | from scipy.stats import gamma 31 | from scipy.stats import logser 32 | from scipy.stats import t 33 | from rstable1 import rstable1 34 | from statsmodels.sandbox.distributions import multivariate as mvt 35 | 36 | import scipy.io as sio 37 | 38 | """ 39 | copularnd.py contains routines which provide samples of a copula density 40 | """ 41 | def copularnd(family, M, *args): 42 | """ Generates values of the Gaussian copula 43 | 44 | Inputs: 45 | family -- Should be either 'gaussian', 't', 'clayton', 'frank', or 'gumbel' 46 | M -- the number of samples to generate 47 | args -- variable number of arguments depending on which type of copula you are trying to simulate 48 | Gaussian -- should be provided a NxN rho matrix as a numpy array datatype 49 | t -- should be provided a NxN rho matrix and a nu value 50 | Clayton/Frank/Gumbel - should be provided a N for the dimensionality, and a scalar alpha value 51 | 52 | Outputs: 53 | U -- a M x N matrix of samples from the copula density chosen 54 | """ 55 | 56 | num_var_args = len(args) 57 | family_lc = family.lower() 58 | if(family_lc=='gaussian'): 59 | if(num_var_args!=1): 60 | raise ValueError("Gaussian family requires one additional argument -- rho (correlation matrix) [P x P]") 61 | rho = args[0] 62 | shape0 = rho.shape[0] 63 | shape1 = rho.shape[1] 64 | if(shape0!=shape1): 65 | raise ValueError("Gaussian family requires rho to be of type numpy.ndarray with shape=[P x P]") 66 | U = _gaussian(M, rho) 67 | elif(family_lc=='t'): 68 | if(num_var_args!=2): 69 | raise ValueError("T family requires two additional argument -- rho (correlation matrix) [P x P] and nu [scalar]") 70 | rho = args[0] 71 | shape0 = rho.shape[0] 72 | shape1 = rho.shape[1] 73 | if(shape0!=shape1): 74 | raise ValueError("T family requires rho to be of type numpy.ndarray with shape=[P x P]") 75 | nu = args[1] 76 | U = _t(M, rho, nu) 77 | elif(family_lc=='clayton'): 78 | if(num_var_args!=2): 79 | raise ValueError("Clayton family requires two additional arguments -- N, alpha [scalar]") 80 | N = args[0] 81 | alpha = args[1] 82 | U = _clayton(M, N, alpha) 83 | elif(family_lc=='frank'): 84 | if(num_var_args!=2): 85 | raise ValueError("Frank family requires two additional arguments -- N, alpha [scalar]") 86 | N = args[0] 87 | alpha = args[1] 88 | U = _frank(M, N, alpha) 89 | elif(family_lc=='gumbel'): 90 | if(num_var_args!=2): 91 | raise ValueError("Gumbel family requires two additional arguments -- N, alpha [scalar]") 92 | N = args[0] 93 | alpha = args[1] 94 | U = _gumbel(M, N, alpha) 95 | else: 96 | raise ValueError("Unrecognized family of copula") 97 | 98 | return U 99 | 100 | def _gaussian(M, Rho): 101 | """ 102 | Generates samples from the Gaussian Copula, w/ dependency 103 | matrix described by Rho. Rho should be a numpy square matrix. 104 | It is assumed that we have a 0 mean. 105 | """ 106 | N = Rho.shape[0] 107 | mu = np.zeros(N) 108 | y = multivariate_normal(mu,Rho) 109 | mvnData = y.rvs(size=M) 110 | U = norm.cdf(mvnData) 111 | 112 | return U 113 | 114 | def _t(M, Rho, nu): 115 | N = Rho.shape[0] 116 | mu = np.zeros(N) # zero mean 117 | x = mvt.multivariate_t_rvs(mu,Rho,nu,M) # generate T RV's 118 | U = t.cdf(x, nu) 119 | 120 | return U 121 | 122 | # We generate the Archimedean Copula's as follows: 123 | # Random pairs from these copulae can be generated sequentially: first 124 | # generate u1 as a uniform r.v. Then generate u2 from the conditional 125 | # distribution F(u2 | u1; alpha) by generating uniform random values, then 126 | # inverting the conditional CDF. 127 | # This method is outlined in Nelsen's Introduction to Copula's 128 | 129 | def _clayton(M, N, alpha): 130 | if(alpha<0): 131 | raise ValueError('Alpha must be >=0 for Clayton Copula Family') 132 | if(N<2): 133 | raise ValueError('Dimensionality Argument [N] must be an integer >= 2') 134 | elif(N==2): 135 | u1 = uniform.rvs(size=M) 136 | p = uniform.rvs(size=M) 137 | if(alpha= 2') 168 | elif(N==2): 169 | u1 = uniform.rvs(size=M) 170 | p = uniform.rvs(size=M) 171 | if abs(alpha) > math.log(sys.float_info.max): 172 | u2 = (u1 < 0).astype(int) + np.sign(alpha)*u1 # u1 or 1-u1 173 | elif abs(alpha) > math.sqrt(np.spacing(1)): 174 | u2 = -1*np.log((np.exp(-alpha*u1)*(1-p)/p + np.exp(-alpha))/(1 + np.exp(-alpha*u1)*(1-p)/p))/alpha 175 | else: 176 | u2 = p 177 | 178 | U = np.column_stack((u1,u2)) 179 | else: 180 | # Algorithm 1 described in both the SAS Copula Procedure, as well as the 181 | # paper: "High Dimensional Archimedean Copula Generation Algorithm" 182 | if(alpha<=0): 183 | raise ValueError('For N>=3, alpha >0 in Frank Copula') 184 | 185 | U = np.empty((M,N)) 186 | #v_vec = np.empty(M) 187 | for ii in range(0,M): 188 | p = -1.0*np.expm1(-1*alpha) 189 | if(p==1): 190 | # boundary case protection 191 | p = 1 - np.spacing(1) 192 | v = logser.rvs(p, size=1) 193 | #v_vec[ii] = v 194 | # sample N independent uniform random variables 195 | x_i = uniform.rvs(size=N) 196 | t = -1*np.log(x_i)/v 197 | U[ii,:] = -1.0*np.log1p( np.exp(-t)*np.expm1(-1.0*alpha))/alpha 198 | 199 | #sio.savemat('logser_v.mat', {'v':v_vec}) 200 | 201 | return U 202 | 203 | def _gumbel(M, N, alpha): 204 | if alpha < 1: 205 | raise ValueError('Alpha must be >=1 for Gumbel Copula Family!') 206 | if(N<2): 207 | raise ValueError('Dimensionality Argument [N] must be an integer >= 2') 208 | elif(N==2): 209 | if alpha < (1 + math.sqrt(np.spacing(1))): 210 | u1 = uniform.rvs(size=M); 211 | u2 = uniform.rvs(size=M); 212 | else: 213 | # use the Marshal-Olkin method 214 | # Generate gamma as Stable(1/alpha,1), c.f. Devroye, Thm. IV.6.7 215 | u = (uniform.rvs(size=M) - .5) * math.pi # Generate M uniformly distributed RV's between -pi/2 and pi/2 216 | u2 = u + math.pi/2 217 | e = -1*np.log(uniform.rvs(size=M)) 218 | t = np.cos(u - u2/alpha)/e 219 | gamma = np.power(np.sin(u2/alpha)/t,(1.0/alpha)) * t/np.cos(u); 220 | 221 | # Frees&Valdez, eqn 3.5 222 | u1 = np.exp(-1* (np.power(-1*np.log(uniform.rvs(size=M)), 1.0/alpha) / gamma) ) 223 | u2 = np.exp(-1* (np.power(-1*np.log(uniform.rvs(size=M)), 1.0/alpha) / gamma) ) 224 | 225 | U = np.column_stack((u1,u2)) 226 | else: 227 | # Algorithm 1 described in both the SAS Copula Procedure, as well as the 228 | # paper: "High Dimensional Archimedean Copula Generation Algorithm" 229 | U = np.empty((M,N)) 230 | #v_vec = np.empty(M) 231 | for ii in range(0,M): 232 | a = 1.0/alpha 233 | b = 1 234 | g = np.power(np.cos(math.pi/(2.0*alpha)), alpha) 235 | d = 0 236 | pm = 1 237 | v = rstable1(1,a,b,g,d,pm) 238 | #v_vec[ii] = v 239 | # sample N independent uniform random variables 240 | x_i = uniform.rvs(size=N) 241 | t = -1*np.log(x_i)/v 242 | 243 | U[ii,:] = np.exp(-1*np.power(t, 1.0/alpha)) 244 | 245 | #sio.savemat('gamma_v.mat', {'v':v_vec}) 246 | 247 | return U 248 | 249 | 250 | if __name__=='__main__': 251 | import matplotlib.pyplot as plt 252 | from plot_utils import pairs 253 | M = 1000 254 | rh = 0.6 255 | Rho = np.array([[1,rh],[rh,1]]) 256 | nu = 2 257 | N = 2 258 | alpha = 5 259 | 260 | # Generate 2-D Copula RV's 261 | Ug2d = copularnd('gaussian', M, Rho) 262 | Ut2d = copularnd('t', M, Rho, nu) 263 | Uc2d = copularnd('clayton', M, N, alpha) 264 | Uf2d = copularnd('frank', M, N, alpha) 265 | Ugu2d = copularnd('gumbel', M, N, alpha) 266 | 267 | # Generate 3-D Copula RV's 268 | N = 3 269 | Rho = np.array([[1,rh,rh],[rh,1,rh],[rh,rh,1]]) 270 | Ug3d = copularnd('gaussian', M, Rho) 271 | Ut3d = copularnd('t', M, Rho, nu) 272 | Ugu3d = copularnd('gumbel',M,N,alpha) 273 | Uf3d = copularnd('frank',M,N,alpha) 274 | Uc3d = copularnd('clayton',M,N,alpha) 275 | 276 | # plots 277 | pairs(Ug2d, 'Gaussian') 278 | pairs(Ut2d, 'T') 279 | pairs(Uc2d, 'Clayton') 280 | pairs(Uf2d, 'Frank') 281 | pairs(Ugu2d, 'Gumbel') 282 | 283 | pairs(Ug3d, 'Gaussian') 284 | pairs(Ut3d, 'T') 285 | pairs(Uc3d, 'Clayton') 286 | pairs(Uf3d, 'Frank') 287 | pairs(Ugu3d, 'Gumbel') 288 | -------------------------------------------------------------------------------- /copulastat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | #****************************************************************************** 5 | #* 6 | #* Copyright (C) 2015 Kiran Karra 7 | #* 8 | #* This program is free software: you can redistribute it and/or modify 9 | #* it under the terms of the GNU General Public License as published by 10 | #* the Free Software Foundation, either version 3 of the License, or 11 | #* (at your option) any later version. 12 | #* 13 | #* This program is distributed in the hope that it will be useful, 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | #* GNU General Public License for more details. 17 | #* 18 | #* You should have received a copy of the GNU General Public License 19 | #* along with this program. If not, see . 20 | #****************************************************************************** 21 | 22 | import math 23 | import numpy as np 24 | 25 | from debye import debye 26 | 27 | """ 28 | copulastat.py contains routines which provide copula dependency measures 29 | the copula family type and the copula's specific dependency parameter. 30 | 31 | The relationships used in the functions are well known, and documented in 32 | many copula research papers, including Nelsen's Introduction to Copula's. 33 | """ 34 | 35 | def copulastat(family, dependency, *args): 36 | dependency_lc = dependency.lower() 37 | if(dependency_lc!='kendall' and dependency_lc!='spearman'): 38 | raise ValueError('Invalid dependency argument -- must be kendall or spearman') 39 | dep_param = args[0] 40 | if(family.lower()=='gaussian'): 41 | r = _gaussian(dependency_lc, dep_param) 42 | elif(family.lower()=='t'): 43 | nu = args[1] 44 | r = _t(dependency_lc, dep_param, nu) 45 | elif(family.lower()=='clayton'): 46 | if(dep_param<0): 47 | raise ValueError('Invalid alpha value for Clayton Copula!') 48 | r = _clayton(dependency_lc, dep_param) 49 | elif(family.lower()=='gumbel'): 50 | r = _gumbel(dependency_lc, dep_param) 51 | elif(family.lower()=='frank'): 52 | r = _frank(dependency_lc, dep_param) 53 | else: 54 | raise ValueError('Unsupported Copula Family!') 55 | 56 | return r 57 | 58 | def _gaussian(dependency, rho): 59 | if(dependency=='kendall'): 60 | r = 2*np.arcsin(rho)/math.pi 61 | elif(dependency=='spearman'): 62 | r = 6*np.arcsin(rho/2)/math.pi 63 | return r 64 | 65 | def _t(dependency, rho, nu): 66 | if(dependency=='kendall'): 67 | r = 2*np.arcsin(rho)/math.pi 68 | elif(dependency=='spearman'): 69 | # we use nu in spearman's rho 70 | raise NotImplementedError('Spearmans Rho currently unsupported for T Copula') 71 | 72 | def _clayton(dependency, alpha): 73 | if(dependency=='kendall'): 74 | r = alpha / (2 + alpha) 75 | elif(dependency=='spearman'): 76 | a = -0.1002 77 | b = 0.1533 78 | c = -0.5024 79 | d = -0.05629 80 | poly_coefs = [a,b,c,d,-1*(a+b+c+d-1),0] 81 | r = np.polyval(poly_coefs, alpha/(2+alpha)) 82 | 83 | return r 84 | 85 | def _gumbel(dependency, alpha): 86 | if(dependency=='kendall'): 87 | r = 1 - 1/alpha 88 | elif(dependency=='spearman'): 89 | a = -.2015 90 | b = .4208 91 | c = .2429 92 | d = -1.453 93 | poly_coefs = [a,b,c,d,-1*(a+b+c+d+1),1] 94 | r = np.polyval(poly_coefs, 1/alpha) 95 | 96 | return r 97 | 98 | def _frank(dependency, alpha): 99 | if(dependency=='kendall'): 100 | r = 1 + 4 * (debye(alpha,1)-1) / alpha 101 | elif(dependency=='spearman'): 102 | r = 1 + 12 * (debye(alpha,2) - debye(alpha,1)) / alpha 103 | 104 | return r 105 | 106 | def test_python_vs_matlab(family): 107 | # test the python data against Matlab 108 | # TODO: make python execute the matlab script which generates these samples 109 | matlab_data = scipy.io.loadmat('matlab/copulastat_test.mat') 110 | 111 | if(family.lower()=='gaussian'): 112 | rho = 0.3 113 | gauss_ktau_rho_0_3_python = copulastat(family,'kendall',rho) 114 | gauss_srho_rho_0_3_python = copulastat(family,'spearman',rho) 115 | rho = 0.7 116 | gauss_ktau_rho_0_7_python = copulastat(family,'kendall',rho) 117 | gauss_srho_rho_0_7_python = copulastat(family,'spearman',rho) 118 | rho = 1.0 119 | gauss_ktau_rho_1_0_python = copulastat(family,'kendall',rho) 120 | gauss_srho_rho_1_0_python = copulastat(family,'spearman',rho) 121 | 122 | p1 = np.isclose(gauss_ktau_rho_0_3_python, matlab_data['gauss_ktau_rho_0_3']) 123 | p2 = np.isclose(gauss_srho_rho_0_3_python, matlab_data['gauss_srho_rho_0_3']) 124 | p3 = np.isclose(gauss_ktau_rho_0_7_python, matlab_data['gauss_ktau_rho_0_7']) 125 | p4 = np.isclose(gauss_srho_rho_0_7_python, matlab_data['gauss_srho_rho_0_7']) 126 | p5 = np.isclose(gauss_ktau_rho_1_0_python, matlab_data['gauss_ktau_rho_1_0']) 127 | p6 = np.isclose(gauss_srho_rho_1_0_python, matlab_data['gauss_srho_rho_1_0']) 128 | 129 | if(p1 and p2 and p3 and p4 and p5 and p6): 130 | print 'Gaussian CopulaStat tests PASSED!' 131 | else: 132 | print 'Gaussian CopulaStat tests FAILED!' 133 | elif(family.lower()=='t'): 134 | pass 135 | elif(family.lower()=='clayton'): 136 | alpha = 0.3 137 | clayton_ktau_alpha_0_3_python = copulastat(family,'kendall',alpha) 138 | clayton_srho_alpha_0_3_python = copulastat(family,'spearman',alpha) 139 | alpha = 0.7 140 | clayton_ktau_alpha_0_7_python = copulastat(family,'kendall',alpha) 141 | clayton_srho_alpha_0_7_python = copulastat(family,'spearman',alpha) 142 | alpha = 1.0 143 | clayton_ktau_alpha_1_0_python = copulastat(family,'kendall',alpha) 144 | clayton_srho_alpha_1_0_python = copulastat(family,'spearman',alpha) 145 | 146 | p1 = np.isclose(clayton_ktau_alpha_0_3_python, matlab_data['clayton_ktau_alpha_0_3']) 147 | p2 = np.isclose(clayton_srho_alpha_0_3_python, matlab_data['clayton_srho_alpha_0_3']) 148 | p3 = np.isclose(clayton_ktau_alpha_0_7_python, matlab_data['clayton_ktau_alpha_0_7']) 149 | p4 = np.isclose(clayton_srho_alpha_0_7_python, matlab_data['clayton_srho_alpha_0_7']) 150 | p5 = np.isclose(clayton_ktau_alpha_1_0_python, matlab_data['clayton_ktau_alpha_1_0']) 151 | p6 = np.isclose(clayton_srho_alpha_1_0_python, matlab_data['clayton_srho_alpha_1_0']) 152 | 153 | if(p1 and p2 and p3 and p4 and p5 and p6): 154 | print 'Clayton CopulaStat tests PASSED!' 155 | else: 156 | print 'Clayton CopulaStat tests FAILED!' 157 | elif(family.lower()=='gumbel'): 158 | alpha = 1.0 159 | gumbel_ktau_alpha_1_0_python = copulastat(family,'kendall',alpha) 160 | gumbel_srho_alpha_1_0_python = copulastat(family,'spearman',alpha) 161 | alpha = 3.0 162 | gumbel_ktau_alpha_3_0_python = copulastat(family,'kendall',alpha) 163 | gumbel_srho_alpha_3_0_python = copulastat(family,'spearman',alpha) 164 | 165 | p1 = np.isclose(gumbel_ktau_alpha_1_0_python, matlab_data['gumbel_ktau_alpha_1_0']) 166 | p2 = np.isclose(gumbel_srho_alpha_1_0_python, matlab_data['gumbel_srho_alpha_1_0']) 167 | p3 = np.isclose(gumbel_ktau_alpha_3_0_python, matlab_data['gumbel_ktau_alpha_3_0']) 168 | p4 = np.isclose(gumbel_srho_alpha_3_0_python, matlab_data['gumbel_srho_alpha_3_0']) 169 | 170 | if(p1 and p2 and p3 and p4): 171 | print 'Gumbel CopulaStat tests PASSED!' 172 | else: 173 | print 'Gumbel CopulaStat tests FAILED!' 174 | elif(family.lower()=='frank'): 175 | alpha = 0.3 176 | frank_ktau_alpha_0_3_python = copulastat(family,'kendall',alpha) 177 | frank_srho_alpha_0_3_python = copulastat(family,'spearman',alpha) 178 | alpha = 0.7 179 | frank_ktau_alpha_0_7_python = copulastat(family,'kendall',alpha) 180 | frank_srho_alpha_0_7_python = copulastat(family,'spearman',alpha) 181 | alpha = 1.0 182 | frank_ktau_alpha_1_0_python = copulastat(family,'kendall',alpha) 183 | frank_srho_alpha_1_0_python = copulastat(family,'spearman',alpha) 184 | 185 | p1 = np.isclose(frank_ktau_alpha_0_3_python, matlab_data['frank_ktau_alpha_0_3']) 186 | p2 = np.isclose(frank_srho_alpha_0_3_python, matlab_data['frank_srho_alpha_0_3']) 187 | p3 = np.isclose(frank_ktau_alpha_0_7_python, matlab_data['frank_ktau_alpha_0_7']) 188 | p4 = np.isclose(frank_srho_alpha_0_7_python, matlab_data['frank_srho_alpha_0_7']) 189 | p5 = np.isclose(frank_ktau_alpha_1_0_python, matlab_data['frank_ktau_alpha_1_0']) 190 | p6 = np.isclose(frank_srho_alpha_1_0_python, matlab_data['frank_srho_alpha_1_0']) 191 | 192 | if(p1 and p2 and p3 and p4 and p5 and p6): 193 | print 'Frank CopulaStat tests PASSED!' 194 | else: 195 | print 'Frank CopulaStat tests FAILED!' 196 | 197 | if __name__=='__main__': 198 | import scipy.io 199 | 200 | test_python_vs_matlab('Gaussian') 201 | test_python_vs_matlab('Clayton') 202 | test_python_vs_matlab('Gumbel') 203 | test_python_vs_matlab('Frank') 204 | -------------------------------------------------------------------------------- /cvolume.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | #****************************************************************************** 5 | #* 6 | #* Copyright (C) 2015 Kiran Karra 7 | #* 8 | #* This program is free software: you can redistribute it and/or modify 9 | #* it under the terms of the GNU General Public License as published by 10 | #* the Free Software Foundation, either version 3 of the License, or 11 | #* (at your option) any later version. 12 | #* 13 | #* This program is distributed in the hope that it will be useful, 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | #* GNU General Public License for more details. 17 | #* 18 | #* You should have received a copy of the GNU General Public License 19 | #* along with this program. If not, see . 20 | #****************************************************************************** 21 | 22 | import math 23 | import numpy as np 24 | 25 | from invcopulastat import invcopulastat 26 | from copulacdf import copulacdf 27 | 28 | def cvolume(family, u1v1, u1v2, u2v1, u2v2, *args): 29 | """ 30 | Computes the C-Volume of a specified copula family with dependency parameter 31 | defined in the args. 32 | family - the copula type, must be: 33 | 'Gaussian' 34 | 'T' 35 | 'Clayton' 36 | 'Frank' 37 | 'Gumbel' 38 | u1v1 - a N x 2 matrix of values between [0,1] that represents the bottom 39 | left coordinate of the grid for which the C-Volume is desired 40 | u1v2 - a N x 2 matrix of values between [0,1] that represent the top 41 | left coordinate of the grid for which the C-Volume is desired 42 | u2v1 - a N x 2 matrix of values between [0,1] that represent the bottom 43 | right coordinate of the grid for which the C-volume is desired 44 | u2v2 - a N x 2 matrix of values between [0,1] that represents the top 45 | right coordinate of the grid for which the C-Volume is desired 46 | args - must be atleast of length 2, for which the first element in args 47 | is expected to be a string which describes the dependency value 48 | being provided, must be one of the following: 49 | 'kendall' - means kendall's Tau is being provided 50 | 'spearman' - means spearman's rho is being provided 51 | 'native' - means that the dependency parameter of the copula family 52 | itself is being provided directly 53 | the second argmuent must be the value of the dependency type 54 | provided. For kendall and spearman, a scalar value is expected. 55 | For native, if the family type is Frank, Gumbel, or Clayton, then 56 | a scalar value is expected, which represents the dependency 57 | parameter. If the family type is Gaussian, then a 2 x 2 numpy array 58 | is expected, which represents the correlation matrix defining the 59 | Gaussian copula. If the family is T, then the 2nd argument is the 60 | 2x2 numpy array representing the correlation matrix, and the 3rd 61 | argument is the degrees of freedom 62 | """ 63 | family_lc = family.lower() 64 | if(family_lc=='gaussian'): 65 | if(len(args)<2): 66 | raise ValueError("Gaussian Family expects 2 variable arguments, the dependency type and value") 67 | if(args[0]=='kendall' or args[0]=='spearman'): 68 | # get the correlation parameter 69 | r = invcopulastat(family, args[0], args[1]) 70 | else: 71 | r = args[1] 72 | 73 | cvol = _gaussian(u1v1, u1v2, u2v1, u2v2, r) 74 | elif(family_lc=='t'): 75 | if(len(args)<2): 76 | raise ValueError("T Family expects atleast 2 variable arguments, the dependency type and value") 77 | 78 | if(args[0]=='kendall' or args[0]=='spearman'): 79 | raise ValueError('T Family does not accept Kendalls Tau or Spearmans Rho, only native parameters') 80 | else: 81 | r = args[1] 82 | nu = args[2] 83 | 84 | cvol = _gaussian(u1v1, u1v2, u2v1, u2v2, r, nu) 85 | 86 | elif(family_lc=='clayton'): 87 | if(len(args)<2): 88 | raise ValueError("Clayton Family expects 2 variable arguments, the dependency type and value") 89 | 90 | if(args[0]=='kendall' or args[0]=='spearman'): 91 | # get the correlation parameter and degrees of freedom 92 | alpha = invcopulastat(family, args[0], args[1]) 93 | else: 94 | alpha = args[1] 95 | 96 | cvol = _clayton(u1v1, u1v2, u2v1, u2v2, alpha) 97 | 98 | elif(family_lc=='frank'): 99 | if(len(args)<2): 100 | raise ValueError("Frank Family expects 2 variable arguments, the dependency type and value") 101 | if(args[0]=='kendall' or args[0]=='spearman'): 102 | # get the correlation parameter and degrees of freedom 103 | alpha = invcopulastat(family, args[0], args[1]) 104 | else: 105 | alpha = args[1] 106 | 107 | cvol = _frank(u1v1, u1v2, u2v1, u2v2, alpha) 108 | 109 | elif(family_lc=='gumbel'): 110 | if(len(args)<2): 111 | raise ValueError("Gumbel Family expects 2 variable arguments, the dependency type and value") 112 | if(args[0]=='kendall' or args[0]=='spearman'): 113 | # get the correlation parameter and degrees of freedom 114 | alpha = invcopulastat(family, args[0], args[1]) 115 | else: 116 | alpha = args[1] 117 | 118 | cvol = _gumbel(u1v1, u1v2, u2v1, u2v2, alpha) 119 | 120 | return cvol 121 | 122 | 123 | def _gaussian(u1v1, u1v2, u2v1, u2v2, r): 124 | # generate the Rho matrix from r 125 | Rho = np.ones((2,2)) 126 | Rho[0][1] = r 127 | Rho[1][0] = r 128 | 129 | # this is the equation for C Volume as defined by Nelsen 130 | cvol = copulacdf('Gaussian', u2v2, Rho) - \ 131 | copulacdf('Gaussian', u2v1, Rho) - \ 132 | copulacdf('Gaussian', u1v2, Rho) + \ 133 | copulacdf('Gaussian', u1v1, Rho) 134 | 135 | return cvol 136 | 137 | def _t(u1v1, u1v2, u2v1, u2v2, r, nu): 138 | # generate the Rho matrix from r 139 | Rho = np.ones((2,2)) 140 | Rho[0][1] = r 141 | Rho[1][0] = r 142 | 143 | # this is the equation for C Volume as defined by Nelsen 144 | cvol = copulacdf('T', u2v2, Rho, nu) - \ 145 | copulacdf('T', u2v1, Rho, nu) - \ 146 | copulacdf('T', u1v2, Rho, nu) + \ 147 | copulacdf('T', u1v1, Rho, nu) 148 | 149 | return cvol 150 | 151 | return None 152 | 153 | def _clayton(u1v1, u1v2, u2v1, u2v2, alpha): 154 | 155 | # this is the equation for C Volume as defined by Nelsen 156 | cvol = copulacdf('Clayton', u2v2, alpha) - \ 157 | copulacdf('Clayton', u2v1, alpha) - \ 158 | copulacdf('Clayton', u1v2, alpha) + \ 159 | copulacdf('Clayton', u1v1, alpha) 160 | 161 | return cvol 162 | 163 | def _frank(u1v1, u1v2, u2v1, u2v2, alpha): 164 | 165 | # this is the equation for C Volume as defined by Nelsen 166 | cvol = copulacdf('Frank', u2v2, alpha) - \ 167 | copulacdf('Frank', u2v1, alpha) - \ 168 | copulacdf('Frank', u1v2, alpha) + \ 169 | copulacdf('Frank', u1v1, alpha) 170 | 171 | return cvol 172 | 173 | def _gumbel(u1v1, u1v2, u2v1, u2v2, alpha): 174 | 175 | # this is the equation for C Volume as defined by Nelsen 176 | cvol = copulacdf('Gumbel', u2v2, alpha) - \ 177 | copulacdf('Gumbel', u2v1, alpha) - \ 178 | copulacdf('Gumbel', u1v2, alpha) + \ 179 | copulacdf('Gumbel', u1v1, alpha) 180 | 181 | return cvol 182 | -------------------------------------------------------------------------------- /debye.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | #****************************************************************************** 5 | #* 6 | #* Copyright (C) 2015 Kiran Karra 7 | #* 8 | #* This program is free software: you can redistribute it and/or modify 9 | #* it under the terms of the GNU General Public License as published by 10 | #* the Free Software Foundation, either version 3 of the License, or 11 | #* (at your option) any later version. 12 | #* 13 | #* This program is distributed in the hope that it will be useful, 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | #* GNU General Public License for more details. 17 | #* 18 | #* You should have received a copy of the GNU General Public License 19 | #* along with this program. If not, see . 20 | #****************************************************************************** 21 | 22 | import numpy as np 23 | import scipy.integrate as integrate 24 | 25 | def debye(x, n): 26 | """ 27 | Evaluate the Debye function. 28 | See http://en.wikipedia.org/wiki/Debye_function for details 29 | """ 30 | 31 | # ensure n is a float 32 | n = float(n) 33 | 34 | sol = integrate.quad( lambda t: pow(t,n)/(np.exp(t)-1.0) , 0.0, x) 35 | return n*sol[0]/pow(x,n) -------------------------------------------------------------------------------- /ecdf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | #****************************************************************************** 5 | #* 6 | #* Copyright (C) 2015 Kiran Karra 7 | #* 8 | #* This program is free software: you can redistribute it and/or modify 9 | #* it under the terms of the GNU General Public License as published by 10 | #* the Free Software Foundation, either version 3 of the License, or 11 | #* (at your option) any later version. 12 | #* 13 | #* This program is distributed in the hope that it will be useful, 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | #* GNU General Public License for more details. 17 | #* 18 | #* You should have received a copy of the GNU General Public License 19 | #* along with this program. If not, see . 20 | #****************************************************************************** 21 | 22 | import math 23 | import numpy as np 24 | 25 | from scipy.interpolate import interp1d 26 | 27 | """ 28 | e_cdf.py contains routines which help perform empirical CDF Estimation. 29 | """ 30 | 31 | def ecdf(x_i, npoints): 32 | """ Generates an Empirical CDF using the indicator function. 33 | 34 | Inputs: 35 | x_i -- the input data set, should be a numpy array 36 | npoints -- the number of desired points in the empirical CDF estimate 37 | 38 | Outputs: 39 | y -- the empirical CDF 40 | """ 41 | # define the points over which we will generate the kernel density estimate 42 | x = np.linspace(min(x_i), max(x_i), npoints) 43 | n = float(x_i.size) 44 | y = np.zeros(npoints) 45 | 46 | for ii in np.arange(x.size): 47 | idxs = np.where(x_i<=x[ii]) 48 | y[ii] = np.sum(idxs[0].size)/n 49 | 50 | return (x,y) 51 | 52 | def kde_integral(kde): 53 | """ Generates a "smoother" Empirical CDF by integrating the KDE. For this, 54 | the user should first generate the KDE using kde.py, and then pass the 55 | density estimate to this function 56 | 57 | Inputs: 58 | kde -- the kernel density estimate 59 | 60 | Outputs: 61 | y -- the smoothed CDF estimate 62 | """ 63 | y = np.cumsum(kde)/sum(kde) 64 | 65 | return y 66 | 67 | def probability_integral_transform(X): 68 | """ 69 | Takes a data array X of dimension [M x N], and converts it to a uniform 70 | random variable using the probability integral transform, U = F(X) 71 | """ 72 | M = X.shape[0] 73 | N = X.shape[1] 74 | 75 | # convert X to U by using the probability integral transform: F(X) = U 76 | U = np.empty(X.shape) 77 | for ii in range(0,N): 78 | x_ii = X[:,ii] 79 | 80 | # estimate the empirical cdf 81 | (xx,pp) = ecdf(x_ii, M) 82 | f = interp1d(xx, pp) # TODO: experiment w/ different kinds of interpolation? 83 | # for example, cubic, or spline etc...? 84 | 85 | # plug this RV sample into the empirical cdf to get uniform RV 86 | u_ii = f(x_ii) 87 | U[:,ii] = u_ii 88 | 89 | return U 90 | 91 | if __name__=='__main__': 92 | import matplotlib.pyplot as plt 93 | import sys 94 | import kde 95 | 96 | from scipy.stats import norm 97 | from scipy.stats import expon 98 | 99 | # test the E_CDF estimation 100 | N1 = 100 # number of data in data set 1 101 | m1 = -1 # mean value 102 | s1 = 0.1 # % variance 103 | 104 | N2 = 500 # number of data in data set 2 105 | m2 = 2 # mean value 106 | s2 = 0.5 # variance 107 | 108 | h = 0.1 # bandwidth 109 | npoints = 100 # number of abscis points in kde 110 | 111 | x1 = math.sqrt(s1)*np.random.randn(N1,1) + m1 112 | x2 = math.sqrt(s2)*np.random.randn(N2,1) + m2 113 | x = np.concatenate((x1,x2),axis=0) 114 | 115 | # Kernel Density Estimate 116 | (xx,kde_estimate) = kde.kde(x,'Gaussian',h, npoints) 117 | plt.plot(xx,kde_estimate, 'r', label='Kernel Density Estimate') 118 | 119 | # the histogram of the data 120 | n, bins, patches = plt.hist(x, 50, normed=1, facecolor='green', alpha=0.75, label='Histogram') 121 | 122 | # empirical CDF 123 | (xx,pp) = ecdf(x, npoints) 124 | plt.plot(xx,pp, 'k', label='Empirical CDF') 125 | 126 | # Smooth Empirical CDF (KDE Integral) 127 | kde_integral = kde_integral(kde_estimate) 128 | plt.plot(xx,kde_integral, 'm', label='Smooth Empirical CDF') 129 | plt.legend(loc='upper left') 130 | plt.show() 131 | 132 | # test the probability integral transform 133 | M = 100 134 | N = 2 135 | X = np.empty((M,N)) 136 | X[:,0] = norm.rvs(size=M) 137 | X[:,1] = expon.rvs(size=M) 138 | 139 | U = probability_integral_transform(X) 140 | 141 | f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2) 142 | ax1.hist(X[:,0]) 143 | ax1.set_title('Guassian RV') 144 | ax2.hist(U[:,0]) 145 | ax2.set_title('Gaussian Transformed to Uniform') 146 | ax3.hist(X[:,1]) 147 | ax3.set_title('Exponential RV') 148 | ax4.hist(U[:,1]) 149 | ax4.set_title('Exponential Transformed to Uniform') 150 | plt.show() 151 | -------------------------------------------------------------------------------- /invcopulastat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | #****************************************************************************** 5 | #* 6 | #* Copyright (C) 2015 Kiran Karra 7 | #* 8 | #* This program is free software: you can redistribute it and/or modify 9 | #* it under the terms of the GNU General Public License as published by 10 | #* the Free Software Foundation, either version 3 of the License, or 11 | #* (at your option) any later version. 12 | #* 13 | #* This program is distributed in the hope that it will be useful, 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | #* GNU General Public License for more details. 17 | #* 18 | #* You should have received a copy of the GNU General Public License 19 | #* along with this program. If not, see . 20 | #****************************************************************************** 21 | 22 | import math 23 | import numpy as np 24 | 25 | from debye import debye 26 | from scipy.optimize import fsolve 27 | 28 | from copulastat import copulastat 29 | 30 | """ 31 | invcopulastat.py contains routines which provide the inverse copula dependency 32 | measures the copula family type and the copula's specific dependency parameter. 33 | 34 | The relationships used in the functions are well known, and documented in 35 | many copula research papers, including Nelsen's Introduction to Copula's. 36 | """ 37 | 38 | def invcopulastat(family, dependency, val): 39 | dependency_lc = dependency.lower() 40 | if(dependency_lc!='kendall' and dependency_lc!='spearman'): 41 | raise ValueError('Invalid dependency argument -- must be kendall or spearman') 42 | if(family.lower()=='gaussian'): 43 | r = _gaussian(dependency_lc, val) 44 | elif(family.lower()=='t'): 45 | r = _t(dependency_lc, val) 46 | elif(family.lower()=='clayton'): 47 | r = _clayton(dependency_lc, val) 48 | elif(family.lower()=='gumbel'): 49 | r = _gumbel(dependency_lc, val) 50 | elif(family.lower()=='frank'): 51 | r = _frank(dependency_lc, val) 52 | else: 53 | raise ValueError('Unsupported Copula Family!') 54 | 55 | return r 56 | 57 | def _gaussian(dependency, val): 58 | if(dependency=='kendall'): 59 | r = np.sin(math.pi/2.0*val) 60 | elif(dependency=='spearman'): 61 | r = 2*np.sin(math.pi/6.0*val) 62 | return r 63 | 64 | def _t(dependency, val): 65 | if(dependency=='kendall'): 66 | r = np.sin(math.pi/2.0*val) 67 | elif(dependency=='spearman'): 68 | r = 2*np.sin(math.pi/6.0*val) 69 | return r 70 | 71 | def _clayton(dependency, val): 72 | if(dependency=='kendall'): 73 | if(val<0 or val>=1): 74 | raise ValueError('Valid values of Kendall\'s Tau for the Clayton Copula are [0,1)') 75 | d = 2.0*val/(1.0-val) 76 | elif(dependency=='spearman'): 77 | raise NotImplementedError('Spearmans Rho currently unsupported for Clayton Copula family!') 78 | 79 | return d 80 | 81 | def _gumbel(dependency, val): 82 | if(dependency=='kendall'): 83 | if(val<0 or val>=1): 84 | raise ValueError('Valid values of Kendall\'s Tau for the Gumbel Copula are [0,1)') 85 | d = 1.0/(1.0-val) 86 | elif(dependency=='spearman'): 87 | raise NotImplementedError('Spearmans Rho currently unsupported for Gumbel Copula family!') 88 | 89 | return d 90 | 91 | def _frank_kendall_fopt(alpha, tau): 92 | return 4*( debye(alpha,1) - 1 )/alpha + 1 - tau 93 | 94 | def _frank(dependency, val): 95 | if(dependency=='kendall'): 96 | return fsolve(_frank_kendall_fopt, 1, args=(val))[0] 97 | elif(dependency=='spearman'): 98 | # TODO -- use function solvers in scipy to invert debye function for the closed form solution 99 | raise NotImplementedError('Spearmans Rho currently unsupported for Frank Copula family!') 100 | 101 | return r 102 | 103 | def test_python_vs_matlab(family): 104 | # DISCLAIMER: this code assumes copulastat is working properly and tested 105 | 106 | if(family.lower()=='gaussian'): 107 | dependency = 'kendall' 108 | rho = 0.3 109 | rho_calc = invcopulastat(family, dependency, copulastat(family, dependency, rho)) 110 | p1 = np.isclose(rho, rho_calc) 111 | 112 | rho = 0.7 113 | rho_calc = invcopulastat(family, dependency, copulastat(family, dependency, rho)) 114 | p2 = np.isclose(rho, rho_calc) 115 | 116 | dependency = 'spearman' 117 | rho = 0.3 118 | rho_calc = invcopulastat(family, dependency, copulastat(family, dependency, rho)) 119 | p3 = np.isclose(rho, rho_calc) 120 | 121 | rho = 0.7 122 | rho_calc = invcopulastat(family, dependency, copulastat(family, dependency, rho)) 123 | p4 = np.isclose(rho, rho_calc) 124 | 125 | if(p1 and p2 and p3 and p4): 126 | print 'Gaussian CopulaStat tests PASSED!' 127 | else: 128 | print 'Gaussian CopulaStat tests FAILED!' 129 | 130 | elif(family.lower()=='t'): 131 | pass 132 | 133 | elif(family.lower()=='clayton' or family.lower()=='gumbel' or family.lower()=='frank'): 134 | dependency = 'kendall' 135 | alpha = 0.3 136 | tau = copulastat(family, dependency, alpha) 137 | alpha_calc = invcopulastat(family, dependency, tau) 138 | p1 = np.isclose(alpha, alpha_calc) 139 | 140 | alpha = 0.7 141 | tau = copulastat(family, dependency, alpha) 142 | alpha_calc = invcopulastat(family, dependency, tau) 143 | p2 = np.isclose(alpha, alpha_calc) 144 | 145 | if(p1 and p2): 146 | print family + ' CopulaStat tests PASSED!' 147 | else: 148 | print family + ' CopulaStat tests FAILED!' 149 | 150 | if __name__=='__main__': 151 | test_python_vs_matlab('Gaussian') 152 | test_python_vs_matlab('Clayton') 153 | test_python_vs_matlab('Gumbel') 154 | test_python_vs_matlab('Frank') 155 | -------------------------------------------------------------------------------- /kde.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | #****************************************************************************** 5 | #* 6 | #* Copyright (C) 2015 Kiran Karra 7 | #* 8 | #* This program is free software: you can redistribute it and/or modify 9 | #* it under the terms of the GNU General Public License as published by 10 | #* the Free Software Foundation, either version 3 of the License, or 11 | #* (at your option) any later version. 12 | #* 13 | #* This program is distributed in the hope that it will be useful, 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | #* GNU General Public License for more details. 17 | #* 18 | #* You should have received a copy of the GNU General Public License 19 | #* along with this program. If not, see . 20 | #****************************************************************************** 21 | 22 | import math 23 | import numpy as np 24 | 25 | """ 26 | kde.py contains routines which help perform Kernel Density Estimation (KDE). 27 | """ 28 | 29 | def kde(x_i, kernel, h, n_points): 30 | """ Perform Kernel Density Estimation on a given set of points. 31 | 32 | Inputs: 33 | x_i -- the input data set, should be a numpy array 34 | kernel -- the kernel to use, must be a string of one of the choices: 35 | Uniform 36 | Triangular 37 | Epanechnikov 38 | Quartic 39 | Triweight 40 | Tricube 41 | Gaussian 42 | Cosine 43 | Logistic 44 | Silverman 45 | h -- the kernel bandwidth setting 46 | npoints -- the number of desired points in the kernel density estimate 47 | 48 | Outputs: 49 | y -- the kernel density estimate 50 | """ 51 | # define the points over which we will generate the kernel density estimate 52 | x = np.linspace(min(x_i), max(x_i), n_points) 53 | n = x_i.size 54 | y = np.zeros(n_points) 55 | 56 | for ii in np.arange(n_points): 57 | # apply the kernel to the point of interest 58 | if(kernel.lower()=='uniform'): 59 | y[ii] = 1.0/(n*h) * np.sum(uniform_kernel( (x[ii]-x_i)/h ) ) 60 | elif(kernel.lower()=='triangular'): 61 | y[ii] = 1.0/(n*h) * np.sum(triangle_kernel( (x[ii]-x_i)/h ) ) 62 | elif(kernel.lower()=='epanechnikov'): 63 | y[ii] = 1.0/(n*h) * np.sum(epanechnikov_kernel( (x[ii]-x_i)/h ) ) 64 | elif(kernel.lower()=='quartic'): 65 | y[ii] = 1.0/(n*h) * np.sum(quartic_kernel( (x[ii]-x_i)/h ) ) 66 | elif(kernel.lower()=='triweight'): 67 | y[ii] = 1.0/(n*h) * np.sum(triweight_kernel( (x[ii]-x_i)/h ) ) 68 | elif(kernel.lower()=='tricube'): 69 | y[ii] = 1.0/(n*h) * np.sum(tricube_kernel( (x[ii]-x_i)/h ) ) 70 | elif(kernel.lower()=='gaussian'): 71 | y[ii] = 1.0/(n*h) * np.sum(gaussian_kernel( (x[ii]-x_i)/h ) ) 72 | elif(kernel.lower()=='cosine'): 73 | y[ii] = 1.0/(n*h) * np.sum(cosine_kernel( (x[ii]-x_i)/h ) ) 74 | elif(kernel.lower()=='logistic'): 75 | y[ii] = 1.0/(n*h) * np.sum(logistic_kernel( (x[ii]-x_i)/h ) ) 76 | elif(kernel.lower()=='silverman'): 77 | y[ii] = 1.0/(n*h) * np.sum(silverman_kernel( (x[ii]-x_i)/h ) ) 78 | else: 79 | print 'In here:)' 80 | 81 | return (x,y) 82 | 83 | def uniform_kernel(u): 84 | """ 85 | %UNIFORM_KDE - the uniform kernel 86 | """ 87 | idxs = np.where(abs(u)<=1) 88 | y = np.zeros(u.size) 89 | y[idxs[0]] = 1.0/2.0 90 | 91 | return y 92 | 93 | def triangle_kernel(u): 94 | """ 95 | %TRIANGLE_KDE - the triangular kernel 96 | """ 97 | idxs = np.where(abs(u)<=1) 98 | y = np.zeros(u.size) 99 | y[idxs[0]] = 1.0-abs(u[idxs[0]]) 100 | 101 | return y 102 | 103 | def epanechnikov_kernel(u): 104 | """ 105 | %EPANECHNIKOV_KDGE - the epanechnikov kernel 106 | """ 107 | idxs = np.where(abs(u)<=1) 108 | y = np.zeros(u.size) 109 | y[idxs[0]] = 3.0/4.0*(1-np.power(u[idxs[0]],2)) 110 | 111 | return y 112 | 113 | def quartic_kernel(u): 114 | """ 115 | %QUARTIC_KDE - the quartic kernel 116 | """ 117 | idxs = np.where(abs(u)<=1) 118 | y = np.zeros(u.size) 119 | y[idxs[0]] = 15.0/16.0*np.power((1-np.power(u[idxs[0]],2)),2) 120 | 121 | return y 122 | 123 | def triweight_kernel(u): 124 | """ 125 | %QUARTIC_KDE - the triweight kernel 126 | """ 127 | idxs = np.where(abs(u)<=1) 128 | y = np.zeros(u.size) 129 | y[idxs[0]] = 35.0/32.0*np.power((1-np.power(u[idxs[0]],2)),3) 130 | 131 | return y 132 | 133 | def tricube_kernel(u): 134 | """ 135 | %QUARTIC_KDE - the quartic kernel 136 | """ 137 | idxs = np.where(abs(u)<=1) 138 | y = np.zeros(u.size) 139 | y[idxs[0]] = 70.0/81.0*np.power((1-np.power(abs(u[idxs[0]]),3)),3) 140 | 141 | return y 142 | 143 | def gaussian_kernel(u): 144 | """ 145 | %GAUSSIAN_KDE - the gaussian kernel 146 | """ 147 | y = 1.0/math.sqrt(2*math.pi) * np.exp(-np.power(u,2)/2.0) 148 | 149 | return y 150 | 151 | def cosine_kernel(u): 152 | """ 153 | %COSINE_KDE - the cosine kernel 154 | """ 155 | idxs = np.where(abs(u)<=1) 156 | y = np.zeros(u.size) 157 | y[idxs[0]] = math.pi/4.0*np.cos(math.pi/2.0*u[idxs[0]]) 158 | 159 | return y 160 | 161 | def logistic_kernel(u): 162 | """ 163 | %LOGISTIC_KDE - the logistic kernel 164 | """ 165 | y = 1.0/(np.exp(u) + 2.0 + np.exp(-u)) 166 | 167 | return y 168 | 169 | def silverman_kernel(u): 170 | """ 171 | %SILVERMAN_KDE - the silverman kernel 172 | """ 173 | y = 1.0/2.0 * np.exp(-abs(u)/math.sqrt(2)) * np.sin(abs(u)/2 + math.pi/4) 174 | 175 | return y 176 | 177 | 178 | if __name__=='__main__': 179 | import matplotlib.pyplot as plt 180 | import sys 181 | 182 | # TODO: put in argument to allow user to test windows and change 183 | # the if from if false to that if condition 184 | 185 | if(False): 186 | # Plot the Uniform Kernel 187 | x_i = np.linspace(-2,2,100) 188 | 189 | y = uniform_kernel(x_i) 190 | plt.plot(x_i,y) 191 | plt.title('Uniform Kernel') 192 | plt.show() 193 | 194 | # Plot the Triangle Kernel 195 | y = triangle_kernel(x_i) 196 | plt.plot(x_i,y) 197 | plt.title('Triangle Kernel') 198 | plt.show() 199 | 200 | # Plot the Epanechnikov Kernel 201 | y = epanechnikov_kernel(x_i) 202 | plt.plot(x_i,y) 203 | plt.title('Epanechnikov Kernel') 204 | plt.show() 205 | 206 | # Plot the Quartic Kernel 207 | y = quartic_kernel(x_i) 208 | plt.plot(x_i,y) 209 | plt.title('Quartic Kernel') 210 | plt.show() 211 | 212 | # Plot the Triweight Kernel 213 | y = triweight_kernel(x_i) 214 | plt.plot(x_i,y) 215 | plt.title('Triweight Kernel') 216 | plt.show() 217 | 218 | # Plot the Tricube Kernel 219 | y = tricube_kernel(x_i) 220 | plt.plot(x_i,y) 221 | plt.title('Tricube Kernel') 222 | plt.show() 223 | 224 | # Plot the Gaussian Kernel 225 | y = gaussian_kernel(x_i) 226 | plt.plot(x_i,y) 227 | plt.title('Gaussian Kernel') 228 | plt.show() 229 | 230 | # Plot the Cosine Kernel 231 | y = cosine_kernel(x_i) 232 | plt.plot(x_i,y) 233 | plt.title('Cosine Kernel') 234 | plt.show() 235 | 236 | # Plot the Logistic Kernel 237 | y = logistic_kernel(x_i) 238 | plt.plot(x_i,y) 239 | plt.title('Logistic Kernel') 240 | plt.show() 241 | 242 | # Plot the Silverman Kernel 243 | y = silverman_kernel(x_i) 244 | plt.plot(x_i,y) 245 | plt.title('Silverman Kernel') 246 | plt.show() 247 | 248 | # test the KDE estimation 249 | N1 = 100 # number of data in data set 1 250 | m1 = -1 # mean value 251 | s1 = 0.1 # % variance 252 | 253 | N2 = 500 # number of data in data set 2 254 | m2 = 2 # mean value 255 | s2 = 0.5 # variance 256 | 257 | h = 0.1 # bandwidth 258 | npoints = 100 # number of abscis points in kde 259 | 260 | x1 = math.sqrt(s1)*np.random.randn(N1,1) + m1 261 | x2 = math.sqrt(s2)*np.random.randn(N2,1) + m2 262 | x = np.concatenate((x1,x2),axis=0) 263 | 264 | # the histogram of the data 265 | n, bins, patches = plt.hist(x, 50, normed=1, facecolor='green', alpha=0.75) 266 | 267 | (xx,pp) = kde(x,'Gaussian',h, npoints) 268 | plt.plot(xx,pp, 'b') 269 | plt.title('Kernel Density Estimate') 270 | plt.show() -------------------------------------------------------------------------------- /matlab/copulacdf_test.m: -------------------------------------------------------------------------------- 1 | % Matlab test script which generates copula samples similar to copulacdf.py 2 | % for comparison purposes 3 | 4 | clear; 5 | clc; 6 | 7 | % remove the old copulacdf_test.mat 8 | delete('copulacdf_test.mat') 9 | 10 | % data which will define where we want to know the value of the Copula 11 | u = linspace(0+eps,1-eps,10); 12 | d = 2; 13 | [U1,U2] = meshgrid(u,u); 14 | 15 | % Generate samples of Gaussian copula 16 | rho = 0.8; 17 | Rho = [1 rho; rho 1]; 18 | gaussian_copula_cdf = copulacdf('gaussian',[U1(:) U2(:)], Rho); 19 | 20 | % Generate samples of T copula 21 | nu = 2; 22 | t_copula_cdf = copulacdf('t',[U1(:) U2(:)], Rho, nu); 23 | 24 | % Generate samples of the Clayton copula 25 | alpha = 0.3; 26 | clayton_copula_cdf = copulacdf('clayton',[U1(:) U2(:)], alpha); 27 | 28 | % Generate samples of the Frank copula 29 | frank_copula_cdf = copulacdf('frank',[U1(:) U2(:)], alpha); 30 | 31 | % Generate samples of the Gumbel Copula 32 | alpha = 1.5; 33 | gumbel_copula_cdf = copulacdf('gumbel',[U1(:) U2(:)], alpha); 34 | 35 | % save them all for testing against python generated data 36 | save('copulacdf_test.mat', ... 37 | 'gaussian_copula_cdf', ... 38 | 't_copula_cdf', ... 39 | 'clayton_copula_cdf', ... 40 | 'frank_copula_cdf', ... 41 | 'gumbel_copula_cdf') 42 | -------------------------------------------------------------------------------- /matlab/copulacdf_test.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticresearch/copula-py/ff347131b02adc76de70b03e0aa6578bd0d86f52/matlab/copulacdf_test.mat -------------------------------------------------------------------------------- /matlab/copulapdf_test.m: -------------------------------------------------------------------------------- 1 | % Matlab test script which generates copula samples similar to copulapdf.py 2 | % for comparison purposes 3 | 4 | % remove the old copulapdf_test.mat 5 | delete('copulapdf_test.mat') 6 | 7 | % data which will define where we want to know the value of the Copula 8 | u = linspace(0.1,0.9,10); 9 | d = 2; 10 | [U1,U2] = meshgrid(u,u); 11 | 12 | % Generate samples of Gaussian copula 13 | rho = 0.8; 14 | Rho = [1 rho; rho 1]; 15 | gaussian_copula_pdf = copulapdf('gaussian',[U1(:) U2(:)], Rho); 16 | 17 | % Generate samples of T copula 18 | nu = 2; 19 | t_copula_pdf = copulapdf('t',[U1(:) U2(:)], Rho, nu); 20 | 21 | % Generate samples of the Clayton copula 22 | alpha = 0.3; 23 | clayton_copula_pdf = copulapdf('clayton',[U1(:) U2(:)], alpha); 24 | 25 | % Generate samples of the Frank copula 26 | frank_copula_pdf = copulapdf('frank',[U1(:) U2(:)], alpha); 27 | 28 | % Generate samples of the Gumbel Copula 29 | alpha = 1.5; 30 | gumbel_copula_pdf = copulapdf('gumbel',[U1(:) U2(:)], alpha); 31 | 32 | % save them all for testing against python generated data 33 | save('copulapdf_test.mat', ... 34 | 'gaussian_copula_pdf', ... 35 | 't_copula_pdf', ... 36 | 'clayton_copula_pdf', ... 37 | 'frank_copula_pdf', ... 38 | 'gumbel_copula_pdf') 39 | -------------------------------------------------------------------------------- /matlab/copulapdf_test.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticresearch/copula-py/ff347131b02adc76de70b03e0aa6578bd0d86f52/matlab/copulapdf_test.mat -------------------------------------------------------------------------------- /matlab/copulastat_test.m: -------------------------------------------------------------------------------- 1 | % generate test data for copulastat.py 2 | 3 | clear; 4 | clc; 5 | 6 | % remove the old copulastat_test.mat 7 | delete('copulastat_test.mat') 8 | 9 | gauss_ktau_rho_0_3 = copulastat('Gaussian', 0.3, 'type', 'kendall'); 10 | gauss_srho_rho_0_3 = copulastat('Gaussian', 0.3, 'type', 'spearman'); 11 | gauss_ktau_rho_0_7 = copulastat('Gaussian', 0.7, 'type', 'kendall'); 12 | gauss_srho_rho_0_7 = copulastat('Gaussian', 0.7, 'type', 'spearman'); 13 | gauss_ktau_rho_1_0 = copulastat('Gaussian', 1.0, 'type', 'kendall'); 14 | gauss_srho_rho_1_0 = copulastat('Gaussian', 1.0, 'type', 'spearman'); 15 | 16 | clayton_ktau_alpha_0_3 = copulastat('clayton', 0.3, 'type', 'kendall'); 17 | clayton_srho_alpha_0_3 = copulastat('clayton', 0.3, 'type', 'spearman'); 18 | clayton_ktau_alpha_0_7 = copulastat('clayton', 0.7, 'type', 'kendall'); 19 | clayton_srho_alpha_0_7 = copulastat('clayton', 0.7, 'type', 'spearman'); 20 | clayton_ktau_alpha_1_0 = copulastat('clayton', 1.0, 'type', 'kendall'); 21 | clayton_srho_alpha_1_0 = copulastat('clayton', 1.0, 'type', 'spearman'); 22 | 23 | gumbel_ktau_alpha_1_0 = copulastat('gumbel', 1.0, 'type', 'kendall'); 24 | gumbel_srho_alpha_1_0 = copulastat('gumbel', 1.0, 'type', 'spearman'); 25 | gumbel_ktau_alpha_3_0 = copulastat('gumbel', 3.0, 'type', 'kendall'); 26 | gumbel_srho_alpha_3_0 = copulastat('gumbel', 3.0, 'type', 'spearman'); 27 | 28 | frank_ktau_alpha_0_3 = copulastat('frank', 0.3, 'type', 'kendall'); 29 | frank_srho_alpha_0_3 = copulastat('frank', 0.3, 'type', 'spearman'); 30 | frank_ktau_alpha_0_7 = copulastat('frank', 0.7, 'type', 'kendall'); 31 | frank_srho_alpha_0_7 = copulastat('frank', 0.7, 'type', 'spearman'); 32 | frank_ktau_alpha_1_0 = copulastat('frank', 1.0, 'type', 'kendall'); 33 | frank_srho_alpha_1_0 = copulastat('frank', 1.0, 'type', 'spearman'); 34 | 35 | save('copulastat_test.mat'); -------------------------------------------------------------------------------- /matlab/copulastat_test.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticresearch/copula-py/ff347131b02adc76de70b03e0aa6578bd0d86f52/matlab/copulastat_test.mat -------------------------------------------------------------------------------- /multivariate_stats.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | #****************************************************************************** 5 | #* 6 | #* Copyright (C) 2015 Kiran Karra 7 | #* 8 | #* This program is free software: you can redistribute it and/or modify 9 | #* it under the terms of the GNU General Public License as published by 10 | #* the Free Software Foundation, either version 3 of the License, or 11 | #* (at your option) any later version. 12 | #* 13 | #* This program is distributed in the hope that it will be useful, 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | #* GNU General Public License for more details. 17 | #* 18 | #* You should have received a copy of the GNU General Public License 19 | #* along with this program. If not, see . 20 | #****************************************************************************** 21 | 22 | import math 23 | import numpy as np 24 | 25 | from scipy.stats import spearmanr 26 | from scipy.stats import kendalltau 27 | from scipy.misc import comb 28 | 29 | """ 30 | Encompasses calculation of Spearman's Rho and Kendall's Tau (and other statistical 31 | measures to be added in the future) for data with dimensionality >= 2 32 | """ 33 | 34 | def spearmans_rho(X): 35 | """ 36 | Calculates a generalized Spearman's rho for a data set given by X, as 37 | described by "Multivariate Extensions of Spearman's Rho and Related Statistics" 38 | Inputs: 39 | X - the input data, should be a numpy array of shape = M x N, where 40 | M is the number of samples, and N is the dimensionality of the data 41 | """ 42 | M = X.shape[0] 43 | N = X.shape[1] 44 | if N<2: 45 | raise ValueError('To calculate Spearman\'s Rho, need data of dimensionality >= 2') 46 | 47 | srho = 0.0 48 | for dim1 in range(0,N-1): 49 | for dim2 in range(dim1+1,N): 50 | (r,p) = spearmanr(X[:,dim1],X[:,dim2]) 51 | srho = srho + r 52 | # normalize 53 | srho = srho / comb(N,2) 54 | return srho 55 | 56 | def kendalls_tau(X): 57 | """ 58 | Calculates a generalized Kendall's tau for a data set given by X, as 59 | described by "Multivariate Extensions of Spearman's Rho and Related Statistics" 60 | 61 | Inputs: 62 | X - the input data, should be a numpy array of shape = M x N, where 63 | M is the number of samples, and N is the dimensionality of the data 64 | """ 65 | M = X.shape[0] 66 | N = X.shape[1] 67 | if N<2: 68 | raise ValueError('To calculate Kendall\'s Tau, need data of dimensionality >= 2') 69 | 70 | ktau = 0.0 71 | for dim1 in range(0,N-1): 72 | for dim2 in range(dim1+1,N): 73 | (t,p) = kendalltau(X[:,dim1],X[:,dim2]) 74 | ktau = ktau + t 75 | # normalize 76 | ktau = ktau / comb(N,2) 77 | return ktau 78 | 79 | if __name__=='__main__': 80 | X = np.array([[12,1,-3], 81 | [2,4,-4], 82 | [1,7,-6], 83 | [12,1,2], 84 | [2,0,1]]) 85 | srho = spearmans_rho(X) 86 | ktau = kendalls_tau(X) 87 | 88 | print srho, ktau -------------------------------------------------------------------------------- /plot_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | #****************************************************************************** 5 | #* 6 | #* Copyright (C) 2015 Kiran Karra 7 | #* 8 | #* This program is free software: you can redistribute it and/or modify 9 | #* it under the terms of the GNU General Public License as published by 10 | #* the Free Software Foundation, either version 3 of the License, or 11 | #* (at your option) any later version. 12 | #* 13 | #* This program is distributed in the hope that it will be useful, 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | #* GNU General Public License for more details. 17 | #* 18 | #* You should have received a copy of the GNU General Public License 19 | #* along with this program. If not, see . 20 | #****************************************************************************** 21 | 22 | from mpl_toolkits.mplot3d import Axes3D 23 | import matplotlib.pyplot as plt 24 | from matplotlib import cm 25 | import pandas as pd 26 | 27 | def plot_3d(X,Y,Z, titleStr): 28 | fig = plt.figure() 29 | ax = fig.gca(projection='3d') 30 | surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, 31 | linewidth=0, antialiased=False) 32 | fig.colorbar(surf, shrink=0.5, aspect=5) 33 | plt.xlabel('U1') 34 | plt.ylabel('U2') 35 | plt.title(titleStr) 36 | plt.show() 37 | 38 | def pairs(X, titleStr): 39 | numCols = X.shape[1] 40 | # generate the columns 41 | colNames = [] 42 | for col in range(0,numCols): 43 | colNames.append(titleStr + ' U' + str(col+1)) 44 | 45 | df = pd.DataFrame(X, columns=colNames) 46 | axes = pd.tools.plotting.scatter_matrix(df, alpha=0.2) 47 | 48 | # turn grids on 49 | for ax1 in axes: 50 | for ax in ax1: 51 | ax.grid() 52 | 53 | plt.tight_layout() 54 | plt.show() -------------------------------------------------------------------------------- /rstable1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | #****************************************************************************** 5 | #* 6 | #* Copyright (C) 2015 Kiran Karra 7 | #* 8 | #* This program is free software: you can redistribute it and/or modify 9 | #* it under the terms of the GNU General Public License as published by 10 | #* the Free Software Foundation, either version 3 of the License, or 11 | #* (at your option) any later version. 12 | #* 13 | #* This program is distributed in the hope that it will be useful, 14 | #* but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | #* GNU General Public License for more details. 17 | #* 18 | #* You should have received a copy of the GNU General Public License 19 | #* along with this program. If not, see . 20 | #****************************************************************************** 21 | 22 | import math 23 | import numpy as np 24 | 25 | from scipy.stats import uniform 26 | from scipy.stats import expon 27 | 28 | """ 29 | Algorithms copied directly from R source code of the copula package 30 | - rstable1.R 31 | - retstable.c 32 | """ 33 | 34 | # delta is assumed to be 0 35 | def rstable1(n, alpha, beta, gamma=1, delta=0, pm=1): 36 | return _rstable_c(n, alpha) * gamma + delta 37 | 38 | def _rstable_c(n, alpha): 39 | return np.power(np.cos(math.pi/2.0*alpha), -1.0/alpha) * _rstable0(alpha) 40 | 41 | def _rstable0(alpha): 42 | U = uniform.rvs(size=1) 43 | while True: 44 | # generate non-zero exponential random variable 45 | W = expon.rvs(size=1) 46 | if(W!=0): 47 | break 48 | return np.power(_A(math.pi*U,alpha)/np.power(W,1.0-alpha),1.0/alpha) 49 | 50 | def _A(x, alpha): 51 | Ialpha = 1.0-alpha 52 | return _A_3(x, alpha, Ialpha) 53 | 54 | def _A_3(x, alpha, Ialpha): 55 | return np.power(Ialpha* np.sinc(Ialpha*x/math.pi), Ialpha) * \ 56 | np.power(alpha * np.sinc(alpha *x/math.pi), alpha) / np.sinc(x/math.pi) 57 | --------------------------------------------------------------------------------