├── LICENSE ├── README.md ├── cifar10_sup.sh ├── cifar10_unsup.sh ├── ckn ├── __init__.py ├── data.py ├── kernels.py ├── layers.py ├── loss.py ├── models.py ├── ops.py └── utils.py ├── experiments ├── cifar10_sup.py └── cifar10_unsup.py └── third-party └── miso_svm-1.0 ├── LICENSE.txt ├── PKG-INFO ├── README.md ├── cblas_alt_template.h ├── cblas_defvar.h ├── common.h ├── ctypes_utils.h ├── linalg.h ├── list.h ├── misc.h ├── miso.cpp ├── miso_svm ├── __init__.py ├── classification.py ├── miso.py └── quick.py ├── setup.py ├── svm.h └── utils.h /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | CKN-Pytorch-image 635 | Copyright (C) 2019 CHEN Dexiong 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | CKN-Pytorch-image Copyright (C) 2019 CHEN Dexiong 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Convolutional kernel network with Pytorch 2 | 3 | Re-implementation of Convolutional Kernel Network (CKN) from Mairal (2016) 4 | in Python based on the [Pytorch][1] framework. 5 | The package is available under the **GPL-v3** license. 6 | 7 | Author: Dexiong Chen 8 | 9 | Credits: Ghislain Durif, Mathilde Caron, Alberto Bietti, Julien Mairal 10 | 11 | The code is based on 12 | 13 | >Mairal, Julien. 14 | [End-to-end kernel learning with supervised convolutional kernel networks][5]. NIPS 2016. 15 | 16 | If you have any issues, please contact dexiong.chen@inria.fr. 17 | 18 | ## Installation 19 | 20 | We strongly recommend users to use [anaconda][2] to install the following packages 21 | 22 | ``` 23 | numpy 24 | scipy 25 | scikit-learn 26 | pytorch=1.2.0 27 | miso_svm 28 | ``` 29 | The Python package `miso_svm` can be installed with (original [repository][3]) 30 | ``` 31 | cd third-party/miso_svm-1.0 32 | python setup.py install 33 | ``` 34 | 35 | ## Results 36 | 37 | Reproduction of the results from [Mairal (2016)][5] with this package. 38 | The results from the original paper (Mairal, 2016) were achieved using 39 | cudnn-based Matlab code available [here][4]. To run the following experiments, please first download the [data][6], put into the folder `./data/cifar-10` and then do 40 | 41 | ```bash 42 | export PYTHONPATH=$PWD:$PYTHONPATH 43 | cd experiments 44 | ``` 45 | 46 | #### Unsupervised CKN 47 | 48 | Here is a summary of the results of **unsupervised** CKN on CIFAR10 image classification dataset with pre-whitening 49 | and without data augmentation or model ensembling. 50 | 51 | ```bash 52 | # Code examples 53 | python cifar10_unsup.py --filters 64 256 --subsamplings 2 6 --kernel-sizes 3 3 54 | ``` 55 | 56 | | #layers | #filters | filter size | subsampling | sigma | Accuracy | 57 | |:---------:|:-------------:|:-------------:|:-----------:|:------------:|:--------:| 58 | | 2 | 64, 256 | 3, 3 | 2, 6 | 0.6 | 77.5 | 59 | | 2 | 256, 1024 | 3, 3 | 2, 6 | 0.6 | 82.0 | 60 | | 2 | 512, 8192 | 3, 2 | 2, 6 | 0.6 | 84.0 | 61 | 62 | #### Supervised CKN 63 | 64 | Here is a summary of the results of **supervised** CKN on CIFAR10 image classification dataset with pre-whitening 65 | and without data augmentation or model ensembling. 66 | 67 | ```bash 68 | # Code examples 69 | python cifar10_sup.py --epochs 105 --lr 0.1 --alpha 0.001 --loss hinge --alternating --model ckn5 70 | python cifar10_sup.py --epochs 105 --lr 0.1 --alpha 0.1 --loss hinge --alternating --model ckn14 71 | ``` 72 | 73 | | Architecture | Accuracy | training time (GTX1080\_ti) | 74 | |:------------:|:--------:|:--------------------------:| 75 | | CKN-5 | 86.1 | ~60 min | 76 | | CKN-14 | 90.2 | ~260 min | 77 | 78 | 79 | [1]: https://pytorch.org/ 80 | [2]: https://anaconda.org/ 81 | [3]: https://gitlab.inria.fr/gdurif/ckn-tf/tree/prod/miso_svm/ 82 | [4]: https://gitlab.inria.fr/mairal/ckn-cudnn-matlab/ 83 | [5]: http://papers.nips.cc/paper/6184-bayesian-latent-structure-discovery-from-multi-neuron-recordings.pdf 84 | [6]: http://pascal.inrialpes.fr/data2/mairal/data/cifar_white.mat 85 | -------------------------------------------------------------------------------- /cifar10_sup.sh: -------------------------------------------------------------------------------- 1 | . s ## activate virtual environment 2 | cd experiments 3 | python cifar10_sup.py --epochs 105 --lr 0.1 --alpha 0.001 --loss hinge --alternating --model ckn5 4 | -------------------------------------------------------------------------------- /cifar10_unsup.sh: -------------------------------------------------------------------------------- 1 | #filters="64 256" # 77.6% 2 | #filters="256 1024" # 82.0% 3 | filters="512 8192" # 84.0% with kernel-sizes="3 2" 4 | kernels="3 2" 5 | . s ## activate virtual environment 6 | cd experiments 7 | python cifar10_unsup.py --filters $filters --subsamplings 2 6 --kernel-sizes $kernels 8 | -------------------------------------------------------------------------------- /ckn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/claying/CKN-Pytorch-image/19ae94bd5964ee0734fe413668f8293b2568304d/ckn/__init__.py -------------------------------------------------------------------------------- /ckn/data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import scipy.io as sio 4 | 5 | import numpy as np 6 | 7 | import torch 8 | import torch.utils.data as data 9 | import torchvision.transforms as transforms 10 | 11 | 12 | class Rescale(object): 13 | def __init__(self): 14 | self.xmax = None 15 | self.xmin = None 16 | 17 | def __call__(self, pic): 18 | if self.xmax is None: 19 | self.xmax = pic.max() 20 | self.xmin = pic.min() 21 | pic = 255 * (pic - self.xmin) / (self.xmax - self.xmin) 22 | return pic.astype('uint8') 23 | return self.xmin + pic * (self.xmax - self.xmin) 24 | 25 | def create_dataset(root, train=True, dataugmentation=False): 26 | # load dataset 27 | if not '.mat' in root: 28 | mean_pix = [x/255.0 for x in [125.3, 123.0, 113.9]] 29 | std_pix = [x/255.0 for x in [63.0, 62.1, 66.7]] 30 | tr = [transforms.ToTensor(), transforms.Normalize(mean=mean_pix, std=std_pix)] 31 | if dataugmentation: 32 | dt = [transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip()] 33 | tr = dt + tr 34 | dataset = torchvision.datasets.CIFAR10( 35 | root, 36 | train=train, 37 | transform=transforms.Compose(tr), 38 | download=True, 39 | ) 40 | return dataset 41 | else: 42 | tr = [transforms.ToTensor()] 43 | if dataugmentation: 44 | dt = [transforms.ToPILImage(), transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip()] 45 | tr = dt + tr 46 | dataset = CIFARmatlab( 47 | root, 48 | train=train, 49 | transform=transforms.Compose(tr), 50 | augment=dataugmentation 51 | ) 52 | return dataset 53 | 54 | 55 | class CIFARmatlab(data.Dataset): 56 | def __init__(self, root, train=True, transform=None, augment=False, dtype='float32'): 57 | self.root = os.path.expanduser(root) 58 | self.transform = transform 59 | self.train = train # training set or test set 60 | if self.train: 61 | split = 'tr' 62 | else: 63 | split = 'te' 64 | matdata = sio.loadmat(root) 65 | R = matdata['X' + split][:, :32, :].transpose(2, 1, 0) 66 | G = matdata['X' + split][:, 32: 64, :].transpose(2, 1, 0) 67 | B = matdata['X' + split][:, 64:, :].transpose(2, 1, 0) 68 | data = np.stack([R, G, B], axis=3) 69 | labels = [e[0] for e in matdata['Y' + split]] 70 | data = data.astype(dtype) 71 | labels = labels 72 | if self.train: 73 | self.train_data = data 74 | self.train_labels = labels 75 | else: 76 | self.test_data = data 77 | self.test_labels = labels 78 | self.augment = augment 79 | 80 | def __getitem__(self, index): 81 | """ 82 | Args: 83 | index (int): Index 84 | Returns: 85 | tuple: (image, target) where target is index of the target class. 86 | """ 87 | if self.train: 88 | img, target = self.train_data[index], self.train_labels[index] 89 | else: 90 | img, target = self.test_data[index], self.test_labels[index] 91 | 92 | if self.transform is not None: 93 | if self.augment: 94 | rs = Rescale() 95 | img = rs(img) 96 | img = self.transform(img) 97 | if self.augment: 98 | img = rs(img) 99 | del rs 100 | target = torch.tensor(target, dtype=torch.long) 101 | return img, target 102 | 103 | def __len__(self): 104 | if self.train: 105 | return len(self.train_data) 106 | else: 107 | return len(self.test_data) 108 | -------------------------------------------------------------------------------- /ckn/kernels.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | 4 | def exp(x, alpha): 5 | """Element wise non-linearity 6 | kernel_exp is defined as k(x)=exp(alpha * (x-1)) 7 | return: 8 | same shape tensor as x 9 | """ 10 | return torch.exp(alpha*(x - 1.)) 11 | 12 | def poly(x, alpha=None): 13 | return x.pow(2) 14 | 15 | 16 | kernels = { 17 | "exp": exp, 18 | "poly": poly 19 | } -------------------------------------------------------------------------------- /ckn/layers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import math 3 | import torch 4 | from torch import nn 5 | import torch.nn.functional as F 6 | import numpy as np 7 | 8 | from scipy import optimize 9 | from sklearn.linear_model.base import LinearModel, LinearClassifierMixin 10 | 11 | from . import ops 12 | from .kernels import kernels 13 | from .utils import spherical_kmeans, gaussian_filter_1d, normalize_, EPS 14 | 15 | 16 | class CKNLayer(nn.Conv2d): 17 | def __init__(self, in_channels, out_channels, kernel_size, 18 | padding="SAME", dilation=1, groups=1, subsampling=1, bias=False, 19 | kernel_func="exp", kernel_args=[0.5], kernel_args_trainable=False): 20 | """Define a CKN layer 21 | Args: 22 | kernel_args: an iterable object of paramters for kernel function 23 | """ 24 | if padding == "SAME": 25 | padding = kernel_size // 2 26 | else: 27 | padding = 0 28 | super(CKNLayer, self).__init__(in_channels, out_channels, kernel_size, 29 | stride=1, padding=padding, dilation=dilation, groups=groups, bias=False) 30 | self.normalize_() 31 | self.subsampling = subsampling 32 | self.patch_dim = self.in_channels * self.kernel_size[0] * self.kernel_size[1] 33 | 34 | self._need_lintrans_computed = True 35 | 36 | self.kernel_args_trainable = kernel_args_trainable 37 | self.kernel_func = kernel_func 38 | if isinstance(kernel_args, (int, float)): 39 | kernel_args = [kernel_args] 40 | if kernel_func == "exp": 41 | kernel_args = [1./kernel_arg ** 2 for kernel_arg in kernel_args] 42 | self.kernel_args = kernel_args 43 | if kernel_args_trainable: 44 | self.kernel_args = nn.ParameterList( 45 | [nn.Parameter(torch.Tensor([kernel_arg])) for kernel_arg in kernel_args]) 46 | 47 | kernel_func = kernels[kernel_func] 48 | self.kappa = lambda x: kernel_func(x, *self.kernel_args) 49 | 50 | self.register_buffer("ones", 51 | torch.ones(1, self.in_channels // self.groups, *self.kernel_size)) 52 | self.init_pooling_filter() 53 | 54 | self.ckn_bias = None 55 | if bias: 56 | self.ckn_bias = nn.Parameter( 57 | torch.zeros(1, self.in_channels // self.groups, *self.kernel_size)) 58 | 59 | self.register_buffer("lintrans", 60 | torch.Tensor(out_channels, out_channels)) 61 | 62 | def init_pooling_filter(self): 63 | size = 2 * self.subsampling + 1 64 | pooling_filter = gaussian_filter_1d(size, self.subsampling/math.sqrt(2)).view(-1, 1) 65 | pooling_filter = pooling_filter.mm(pooling_filter.t()) 66 | pooling_filter = pooling_filter.expand(self.out_channels, 1, size, size) 67 | self.register_buffer("pooling_filter", pooling_filter) 68 | 69 | def train(self, mode=True): 70 | super(CKNLayer, self).train(mode) 71 | self._need_lintrans_computed = True 72 | 73 | def _compute_lintrans(self): 74 | """Compute the linear transformation factor kappa(ZtZ)^(-1/2) 75 | Returns: 76 | lintrans: out_channels x out_channels 77 | """ 78 | if not self._need_lintrans_computed: 79 | return self.lintrans 80 | lintrans = self.weight.view(self.out_channels, -1) 81 | lintrans = lintrans.mm(lintrans.t()) 82 | lintrans = self.kappa(lintrans) 83 | lintrans = ops.matrix_inverse_sqrt(lintrans) 84 | if not self.training: 85 | self._need_lintrans_computed = False 86 | self.lintrans.data = lintrans.data 87 | 88 | return lintrans 89 | 90 | def _conv_layer(self, x_in): 91 | """Convolution layer 92 | Compute x_out = ||x_in|| x kappa(Zt x_in/||x_in||) 93 | Args: 94 | x_in: batch_size x in_channels x H x W 95 | self.filters: out_channels x in_channels x *kernel_size 96 | x_out: batch_size x out_channels x (H - kernel_size + 1) x (W - kernel_size + 1) 97 | """ 98 | if self.ckn_bias is not None: 99 | # compute || x - b || 100 | patch_norm_x = F.conv2d(x_in.pow(2), self.ones, bias=None, 101 | stride=1, padding=self.padding, 102 | dilation=self.dilation, 103 | groups=self.groups) 104 | patch_norm = patch_norm_x - 2 * F.conv2d(x_in, self.ckn_bias, bias=None, 105 | stride=1, padding=self.padding, dilation=self.dilation, 106 | groups=self.groups) 107 | patch_norm = patch_norm + self.ckn_bias.pow(2).sum() 108 | patch_norm = torch.sqrt(patch_norm.clamp(min=EPS)) 109 | 110 | x_out = super(CKNLayer, self).forward(x_in) 111 | bias = torch.sum( 112 | (self.weight * self.ckn_bias).view(self.out_channels, -1), dim=-1) 113 | bias = bias.view(1, self.out_channels, 1, 1) 114 | x_out = x_out - bias 115 | x_out = x_out / patch_norm.clamp(min=EPS) 116 | x_out = patch_norm * self.kappa(x_out) 117 | return x_out 118 | 119 | patch_norm = torch.sqrt(F.conv2d(x_in.pow(2), self.ones, bias=None, 120 | stride=1, padding=self.padding, dilation=self.dilation, 121 | groups=self.groups).clamp(min=EPS)) 122 | # patch_norm = patch_norm.clamp(EPS) 123 | 124 | x_out = super(CKNLayer, self).forward(x_in) 125 | x_out = x_out / patch_norm.clamp(min=EPS) 126 | x_out = patch_norm * self.kappa(x_out) 127 | return x_out 128 | 129 | def _mult_layer(self, x_in, lintrans): 130 | """Multiplication layer 131 | Compute x_out = kappa(ZtZ)^(-1/2) x x_in 132 | Args: 133 | x_in: batch_size x in_channels x H x W 134 | lintrans: in_channels x in_channels 135 | x_out: batch_size x in_channels x H x W 136 | """ 137 | batch_size, in_c, H, W = x_in.size() 138 | x_out = torch.bmm( 139 | lintrans.expand(batch_size, in_c, in_c), x_in.view(batch_size, in_c, -1)) 140 | return x_out.view(batch_size, in_c, H, W) 141 | 142 | def _pool_layer(self, x_in): 143 | """Pooling layer 144 | Compute I(z) = \sum_{z'} phi(z') x exp(-\beta_1 ||z'-z||_2^2) 145 | Args: 146 | x_in: batch_size x out_channels x H x W 147 | """ 148 | if self.subsampling <= 1: 149 | return x_in 150 | x_out = F.conv2d(x_in, self.pooling_filter, bias=None, 151 | stride=self.subsampling, padding=self.subsampling, 152 | groups=self.out_channels) 153 | return x_out 154 | 155 | def forward(self, x_in): 156 | """Encode function for a CKN layer 157 | Args: 158 | x_in: batch_size x in_channels x H x W 159 | """ 160 | x_out = self._conv_layer(x_in) 161 | #print(x_out.shape) 162 | x_out = self._pool_layer(x_out) 163 | lintrans = self._compute_lintrans() 164 | x_out = self._mult_layer(x_out, lintrans) 165 | #print(x_out.shape) 166 | return x_out 167 | 168 | def extract_2d_patches(self, x): 169 | """ 170 | x: batch_size x C x H x W 171 | out: (batch_size * nH * nW) x (C * kernel_size) 172 | """ 173 | h, w = self.kernel_size 174 | return x.unfold(2, h, 1).unfold(3, w, 1).transpose(1, 3).contiguous().view(-1, self.patch_dim) 175 | 176 | def sample_patches(self, x_in, n_sampling_patches=1000): 177 | """Sample patches from the given Tensor 178 | Args: 179 | x_in (batch_size x in_channels x H x W) 180 | n_sampling_patches (int): number of patches to sample 181 | Returns: 182 | patches: (batch_size x (H - filter_size + 1)) x (in_channels x filter_size) 183 | """ 184 | patches = self.extract_2d_patches(x_in) 185 | 186 | n_sampling_patches = min(patches.size(0), n_sampling_patches) 187 | patches = patches[:n_sampling_patches] 188 | return patches 189 | 190 | def unsup_train_(self, patches): 191 | """Unsupervised training for a CKN layer 192 | Args: 193 | patches: n x (in_channels x *kernel_size) 194 | Updates: 195 | filters: out_channels x in_channels x *kernel_size 196 | """ 197 | if self.ckn_bias is not None: 198 | print("estimating bias") 199 | m_patches = patches.mean(0) 200 | self.ckn_bias.data.copy_(m_patches.view_as(self.ckn_bias.data)) 201 | patches -= m_patches 202 | patches = normalize_(patches) 203 | block_size = None if self.patch_dim < 1000 else 10 * self.patch_dim 204 | weight = spherical_kmeans(patches, self.out_channels, block_size=block_size) 205 | weight = weight.view_as(self.weight.data) 206 | self.weight.data.copy_(weight) 207 | self._need_lintrans_computed = True 208 | 209 | def normalize_(self): 210 | norm = self.weight.data.view( 211 | self.out_channels, -1).norm(p=2, dim=-1).view(-1, 1, 1, 1) 212 | self.weight.data.div_(norm.clamp_(min=EPS)) 213 | 214 | def extra_repr(self): 215 | s = super(CKNLayer, self).extra_repr() 216 | s += ', subsampling={}'.format(self.subsampling) 217 | s += ', kernel=({}, {})'.format(self.kernel_func, self.kernel_args) 218 | return s 219 | 220 | class Linear(nn.Linear, LinearModel, LinearClassifierMixin): 221 | def __init__(self, in_features, out_features, alpha=0.0, fit_bias=True, 222 | penalty="l2", maxiter=1000): 223 | super(Linear, self).__init__(in_features, out_features, fit_bias) 224 | self.alpha = alpha 225 | self.fit_bias = fit_bias 226 | self.penalty = penalty 227 | self.maxiter = maxiter 228 | 229 | def forward(self, input, scale_bias=1.0): 230 | # out = super(Linear, self).forward(input) 231 | out = F.linear(input, self.weight, scale_bias * self.bias) 232 | return out 233 | 234 | def fit(self, x, y, criterion=None): 235 | # self.cuda() 236 | use_cuda = self.weight.is_cuda 237 | # print(use_cuda) 238 | if criterion is None: 239 | criterion = nn.CrossEntropyLoss() 240 | # reduction = criterion.reduction 241 | # criterion.reduction = 'sum' 242 | if isinstance(x, np.ndarray) or isinstance(y, np.ndarray): 243 | x = torch.from_numpy(x) 244 | y = torch.from_numpy(y) 245 | if use_cuda: 246 | x = x.cuda() 247 | y = y.cuda() 248 | 249 | alpha = self.alpha * x.shape[1] / x.shape[0] 250 | if self.bias is not None: 251 | scale_bias = (x ** 2).mean(-1).sqrt().mean().item() 252 | alpha *= scale_bias ** 2 253 | self.real_alpha = alpha 254 | self.scale_bias = scale_bias 255 | 256 | def eval_loss(w): 257 | w = w.reshape((self.out_features, -1)) 258 | if self.weight.grad is not None: 259 | self.weight.grad = None 260 | if self.bias is None: 261 | self.weight.data.copy_(torch.from_numpy(w)) 262 | else: 263 | if self.bias.grad is not None: 264 | self.bias.grad = None 265 | self.weight.data.copy_(torch.from_numpy(w[:, :-1])) 266 | self.bias.data.copy_(torch.from_numpy(w[:, -1])) 267 | y_pred = self(x, scale_bias=scale_bias).squeeze_(-1) 268 | loss = criterion(y_pred, y) 269 | loss.backward() 270 | if alpha != 0.0: 271 | if self.penalty == "l2": 272 | penalty = 0.5 * alpha * torch.norm(self.weight)**2 273 | elif self.penalty == "l1": 274 | penalty = alpha * torch.norm(self.weight, p=1) 275 | penalty.backward() 276 | loss = loss + penalty 277 | return loss.item() 278 | 279 | def eval_grad(w): 280 | dw = self.weight.grad.data 281 | if alpha != 0.0: 282 | if self.penalty == "l2": 283 | dw.add_(alpha, self.weight.data) 284 | if self.bias is not None: 285 | db = self.bias.grad.data 286 | dw = torch.cat((dw, db.view(-1, 1)), dim=1) 287 | return dw.cpu().numpy().ravel().astype("float64") 288 | 289 | w_init = self.weight.data 290 | if self.bias is not None: 291 | w_init = torch.cat((w_init, 1./scale_bias * self.bias.data.view(-1, 1)), dim=1) 292 | w_init = w_init.cpu().numpy().astype("float64") 293 | 294 | w = optimize.fmin_l_bfgs_b( 295 | eval_loss, w_init, fprime=eval_grad, maxiter=self.maxiter, disp=0) 296 | if isinstance(w, tuple): 297 | w = w[0] 298 | 299 | w = w.reshape((self.out_features, -1)) 300 | self.weight.grad.data.zero_() 301 | if self.bias is None: 302 | self.weight.data.copy_(torch.from_numpy(w)) 303 | else: 304 | self.bias.grad.data.zero_() 305 | self.weight.data.copy_(torch.from_numpy(w[:, :-1])) 306 | self.bias.data.copy_(scale_bias * torch.from_numpy(w[:, -1])) 307 | # criterion.reduction = reduction 308 | 309 | def fit2(self, x, y, criterion=None): 310 | from miso_svm import MisoClassifier 311 | if isinstance(x, torch.Tensor): 312 | x = x.numpy() 313 | if isinstance(y, torch.Tensor): 314 | y = y.numpy() 315 | scale_bias = np.sqrt((x ** 2).mean(-1)).mean() 316 | print(scale_bias) 317 | alpha = self.alpha * scale_bias ** 2 * x.shape[1] 318 | alpha /= x.shape[0] 319 | x = np.hstack([x, scale_bias * np.ones((x.shape[0], 1), dtype=x.dtype)]) 320 | y = y.astype('float32') 321 | clf = MisoClassifier(Lambda=alpha, eps=1e-04, max_iterations=100 * x.shape[0], verbose=False) 322 | clf.fit(x, y) 323 | self.weight.data.copy_(torch.from_numpy(clf.W[:, :-1])) 324 | self.bias.data.copy_(scale_bias * torch.from_numpy(clf.W[:, -1])) 325 | 326 | def decision_function(self, x): 327 | x = torch.from_numpy(x) 328 | if self.weight.is_cuda: 329 | x = x.cuda() 330 | return self(x).data.cpu().numpy() 331 | 332 | def predict(self, x): 333 | return np.argmax(self.decision_function(x), axis=1) 334 | 335 | def predict_proba(self, x): 336 | return self._predict_proba_lr(x) 337 | 338 | @property 339 | def coef_(self): 340 | return self.weight.data.cpu().numpy() 341 | 342 | @property 343 | def intercept_(self): 344 | return self.bias.data.cpu().numpy() 345 | 346 | class Preprocessor(nn.Module): 347 | def __init__(self): 348 | super(Preprocessor, self).__init__() 349 | self.fitted = True 350 | 351 | def forward(self, input): 352 | out = input - input.mean(dim=1, keepdim=True) 353 | return out / out.norm(dim=1, keepdim=True).clamp(min=EPS) 354 | 355 | def fit(self, input): 356 | pass 357 | 358 | def fit_transform(self, input): 359 | self.fit(input) 360 | return self(input) 361 | -------------------------------------------------------------------------------- /ckn/loss.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | from torch import nn 4 | import torch.nn.functional as F 5 | from torch.nn.modules.loss import _Loss 6 | 7 | 8 | class HingeLoss(_Loss): 9 | def __init__(self, nclass=10, weight=None, size_average=None, reduce=None, 10 | reduction='elementwise_mean', pos_weight=None, squared=True): 11 | super(HingeLoss, self).__init__(size_average, reduce, reduction) 12 | self.nclass = nclass 13 | self.squared = squared 14 | self.register_buffer('weight', weight) 15 | self.register_buffer('pos_weight', pos_weight) 16 | 17 | def forward(self, input, target): 18 | if not (target.size(0) == input.size(0)): 19 | raise ValueError( 20 | "Target size ({}) must be the same as input size ({})".format(target.size(), input.size())) 21 | if self.pos_weight is not None: 22 | pos_weight = 1 + (self.pos_weight - 1) * target 23 | target = 2 * F.one_hot(target, num_classes=self.nclass) - 1 24 | target = target.float() 25 | loss = F.relu(1. - target * input) 26 | if self.squared: 27 | loss = 0.5 * loss ** 2 28 | if self.weight is not None: 29 | loss = loss * self.weight 30 | if self.pos_weight is not None: 31 | loss = loss * pos_weight 32 | loss = loss.sum(dim=-1) 33 | if self.reduction == 'none': 34 | return loss 35 | elif self.reduction == 'elementwise_mean': 36 | return loss.mean() 37 | else: 38 | return loss.sum() 39 | 40 | LOSS = { 41 | 'ce': nn.CrossEntropyLoss, 42 | 'hinge': HingeLoss, 43 | } 44 | -------------------------------------------------------------------------------- /ckn/models.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | from torch import nn 4 | import torch.nn.functional as F 5 | import numpy as np 6 | from sklearn.model_selection import cross_val_score 7 | 8 | from timeit import default_timer as timer 9 | 10 | from .layers import CKNLayer, Linear, Preprocessor 11 | from miso_svm import MisoClassifier 12 | 13 | 14 | class CKNSequential(nn.Module): 15 | def __init__(self, in_channels, out_channels_list, kernel_sizes, 16 | subsamplings, kernel_funcs=None, kernel_args_list=None, 17 | kernel_args_trainable=False, **kwargs): 18 | 19 | assert len(out_channels_list) == len(kernel_sizes) == len(subsamplings), "incompatible dimensions" 20 | super(CKNSequential, self).__init__() 21 | 22 | self.n_layers = len(out_channels_list) 23 | self.in_channels = in_channels 24 | self.out_channels = out_channels_list[-1] 25 | 26 | ckn_layers = [] 27 | 28 | for i in range(self.n_layers): 29 | if kernel_funcs is None: 30 | kernel_func = "exp" 31 | else: 32 | kernel_func = kernel_funcs[i] 33 | if kernel_args_list is None: 34 | kernel_args = 0.5 35 | else: 36 | kernel_args = kernel_args_list[i] 37 | 38 | ckn_layer = CKNLayer(in_channels, out_channels_list[i], 39 | kernel_sizes[i], subsampling=subsamplings[i], 40 | kernel_func=kernel_func, kernel_args=kernel_args, 41 | kernel_args_trainable=kernel_args_trainable, **kwargs) 42 | 43 | ckn_layers.append(ckn_layer) 44 | in_channels = out_channels_list[i] 45 | 46 | self.ckn_layers = nn.Sequential(*ckn_layers) 47 | 48 | def __getitem__(self, idx): 49 | return self.ckn_layers[idx] 50 | 51 | def __len__(self): 52 | return len(self.ckn_layers) 53 | 54 | def __iter__(self): 55 | return self.ckn_layers._modules.values().__iter__() 56 | 57 | def forward_at(self, x, i=0): 58 | assert x.size(1) == self.ckn_layers[i].in_channels, "bad dimension" 59 | return self.ckn_layers[i](x) 60 | 61 | def forward(self, x): 62 | return self.ckn_layers(x) 63 | 64 | def representation(self, x, n=0): 65 | if n == -1: 66 | n = self.n_layers 67 | for i in range(n): 68 | x = self.forward_at(x, i) 69 | return x 70 | 71 | def normalize_(self): 72 | for module in self.ckn_layers: 73 | module.normalize_() 74 | 75 | def unsup_train_(self, data_loader, n_sampling_patches=100000, use_cuda=False, top_layers=None): 76 | """ 77 | x: size x C x H x W 78 | top_layers: module object represents layers before this layer 79 | """ 80 | self.train(False) 81 | if use_cuda: 82 | self.cuda() 83 | with torch.no_grad(): 84 | for i, ckn_layer in enumerate(self.ckn_layers): 85 | print() 86 | print('-------------------------------------') 87 | print(' TRAINING LAYER {}'.format(i + 1)) 88 | print('-------------------------------------') 89 | n_patches = 0 90 | try: 91 | n_patches_per_batch = (n_sampling_patches + len(data_loader) - 1) // len(data_loader) 92 | except: 93 | n_patches_per_batch = 1000 94 | patches = torch.Tensor(n_sampling_patches, ckn_layer.patch_dim) 95 | if use_cuda: 96 | patches = patches.cuda() 97 | 98 | for data, _ in data_loader: 99 | if use_cuda: 100 | data = data.cuda() 101 | # data = Variable(data, volatile=True) 102 | if top_layers is not None: 103 | data = top_layers(data) 104 | data = self.representation(data, i) 105 | data_patches = ckn_layer.sample_patches(data.data, n_patches_per_batch) 106 | size = data_patches.size(0) 107 | if n_patches + size > n_sampling_patches: 108 | size = n_sampling_patches - n_patches 109 | data_patches = data_patches[:size] 110 | patches[n_patches: n_patches + size] = data_patches 111 | n_patches += size 112 | if n_patches >= n_sampling_patches: 113 | break 114 | 115 | print("total number of patches: {}".format(n_patches)) 116 | patches = patches[:n_patches] 117 | ckn_layer.unsup_train_(patches) 118 | 119 | class CKNet(nn.Module): 120 | def __init__(self, nclass, in_channels, out_channels_list, kernel_sizes, 121 | subsamplings, kernel_funcs=None, kernel_args_list=None, 122 | kernel_args_trainable=False, image_size=32, 123 | fit_bias=True, alpha=0.0, maxiter=1000, **kwargs): 124 | super(CKNet, self).__init__() 125 | self.features = CKNSequential( 126 | in_channels, out_channels_list, kernel_sizes, 127 | subsamplings, kernel_funcs, kernel_args_list, 128 | kernel_args_trainable, **kwargs) 129 | 130 | out_features = out_channels_list[-1] 131 | factor = 1 132 | for s in subsamplings: 133 | factor *= s 134 | factor = (image_size - 1) // factor + 1 135 | self.out_features = factor * factor * out_features 136 | self.nclass = nclass 137 | 138 | self.initialize_scaler() 139 | self.classifier = Linear( 140 | self.out_features, nclass, fit_bias=fit_bias, alpha=alpha, maxiter=maxiter) 141 | 142 | def initialize_scaler(self, scaler=None): 143 | pass 144 | 145 | def forward(self, input): 146 | features = self.representation(input) 147 | return self.classifier(features) 148 | 149 | def representation(self, input): 150 | features = self.features(input).view(input.shape[0], -1) 151 | if hasattr(self, 'scaler'): 152 | features = self.scaler(features) 153 | return features 154 | 155 | def unsup_train_ckn(self, data_loader, n_sampling_patches=1000000, 156 | use_cuda=False): 157 | self.features.unsup_train_(data_loader, n_sampling_patches, use_cuda=use_cuda) 158 | 159 | def unsup_train_classifier(self, data_loader, criterion=None, use_cuda=False): 160 | encoded_train, encoded_target = self.predict( 161 | data_loader, only_representation=True, use_cuda=use_cuda) 162 | self.classifier.fit(encoded_train, encoded_target, criterion) 163 | 164 | def predict(self, data_loader, only_representation=False, use_cuda=False): 165 | self.eval() 166 | if use_cuda: 167 | self.cuda() 168 | n_samples = len(data_loader.dataset) 169 | batch_start = 0 170 | for i, (data, target) in enumerate(data_loader): 171 | batch_size = data.shape[0] 172 | if use_cuda: 173 | data = data.cuda() 174 | with torch.no_grad(): 175 | if only_representation: 176 | batch_out = self.representation(data).data.cpu() 177 | else: 178 | batch_out = self(data).data.cpu() 179 | if i == 0: 180 | output = batch_out.new_empty(n_samples, batch_out.shape[-1]) 181 | target_output = target.new_empty(n_samples) 182 | output[batch_start:batch_start+batch_size] = batch_out 183 | target_output[batch_start:batch_start+batch_size] = target 184 | batch_start += batch_size 185 | return output, target_output 186 | 187 | def normalize_(self): 188 | self.features.normalize_() 189 | 190 | def print_norm(self): 191 | norms = [] 192 | with torch.no_grad(): 193 | for module in self.features: 194 | norms.append(module.weight.sum().item()) 195 | norms.append(self.classifier.weight.sum().item()) 196 | print(norms) 197 | 198 | class UnsupCKNet(CKNet): 199 | def initialize_scaler(self): 200 | self.scaler = Preprocessor() 201 | 202 | def unsup_train(self, data_loader, n_sampling_patches=1000000, 203 | use_cuda=False): 204 | self.train(False) 205 | print("Training CKN layers") 206 | tic = timer() 207 | self.unsup_train_ckn(data_loader, n_sampling_patches, use_cuda=use_cuda) 208 | toc = timer() 209 | print("Finished, elapsed time: {:.2f}min".format((toc - tic)/60)) 210 | print() 211 | print("Training classifier") 212 | tic = timer() 213 | self.unsup_train_classifier(data_loader, use_cuda=use_cuda) 214 | toc = timer() 215 | print("Finished, elapsed time: {:.2f}min".format((toc - tic)/60)) 216 | 217 | def unsup_cross_val(self, data_loader, test_loader=None, n_sampling_patches=500000, 218 | alpha_grid=None, kfold=5, scoring='accuracy', 219 | use_cuda=False): 220 | self.train(False) 221 | if alpha_grid is None: 222 | alpha_grid = np.arange(-15, 15) 223 | print("Training CKN layers") 224 | tic = timer() 225 | self.unsup_train_ckn(data_loader, n_sampling_patches, use_cuda=use_cuda) 226 | toc = timer() 227 | print("Finished, elapsed time: {:.2f}min".format((toc - tic)/60)) 228 | print() 229 | print("Start cross-validation") 230 | best_score = -float('inf') 231 | best_alpha = 0 232 | tic = timer() 233 | encoded_train, encoded_target = self.predict( 234 | data_loader, only_representation=True, use_cuda=use_cuda) 235 | 236 | n_samples = len(encoded_target) * (1 - 1. / kfold) 237 | 238 | clf = self.classifier 239 | n_jobs = None if use_cuda else -1 240 | iter_since_best = 0 241 | print(encoded_train.shape) 242 | print(encoded_target.shape) 243 | 244 | if test_loader is not None: 245 | encoded_test, encoded_label = self.predict( 246 | test_loader, only_representation=True, use_cuda=use_cuda) 247 | 248 | encoded_train = encoded_train.numpy() 249 | encoded_target = encoded_target.numpy().astype('float32') 250 | encoded_test = encoded_test.numpy() 251 | encoded_label = encoded_label.numpy().astype('float32') 252 | 253 | for alpha in alpha_grid: 254 | alpha = 1. / (2. * n_samples * 2.**alpha) 255 | #alpha = 1. / (2. * 2. ** alpha) 256 | print("lambda={}".format(alpha)) 257 | clf = MisoClassifier( 258 | Lambda=alpha, max_iterations=int(1000*n_samples), verbose=True, seed=31, threads=0) 259 | if test_loader is None: 260 | score = cross_val_score(clf, encoded_train, 261 | encoded_target, 262 | cv=kfold, scoring=scoring, n_jobs=n_jobs) 263 | score = score.mean() 264 | else: 265 | clf.fit(encoded_train, encoded_target) 266 | score = clf.score(encoded_test, encoded_label) 267 | print("val score={}".format(score)) 268 | if score > best_score: 269 | best_score = score 270 | best_alpha = alpha 271 | iter_since_best = 0 272 | else: 273 | iter_since_best += 1 274 | if iter_since_best >= 3: 275 | break 276 | print("best lambda={}, best val score={}".format(best_alpha, best_score)) 277 | if test_loader is None: 278 | clf = MisoClassifier( 279 | Lambda=best_alpha, max_iterations=int(1000*n_samples), verbose=True, seed=31, threads=0) 280 | clf.fit(encoded_train, encoded_target) 281 | toc = timer() 282 | #self.classifier.weight.data.copy_(torch.from_numpy(clf.coef_)) 283 | self.classifier.weight.data.copy_(torch.from_numpy(clf.W)) 284 | print("Finished, elapsed time: {:.2f}min".format((toc - tic)/60)) 285 | return best_score 286 | 287 | class UnsupCKNetCifar10(UnsupCKNet): 288 | def __init__(self, filters, kernel_sizes, subsamplings, sigma): 289 | super(UnsupCKNetCifar10, self).__init__( 290 | 10, 3, filters, kernel_sizes, subsamplings, 291 | kernel_args_list=sigma, fit_bias=False, maxiter=5000) 292 | 293 | class SupCKNetCifar10_5(CKNet): 294 | def __init__(self, alpha=0.0, **kwargs): 295 | kernel_sizes = [3, 1, 3, 1, 3] 296 | filters = [128, 128, 128, 128, 128] 297 | subsamplings = [2, 1, 2, 1, 3] 298 | kernel_funcs = ['exp', 'poly', 'exp', 'poly', 'exp'] 299 | kernel_args_list = [0.5, 2, 0.5, 2, 0.5] 300 | super(SupCKNetCifar10_5, self).__init__( 301 | 10, 3, filters, kernel_sizes, subsamplings, kernel_funcs=kernel_funcs, 302 | kernel_args_list=kernel_args_list, fit_bias=True, alpha=alpha, maxiter=5000, **kwargs) 303 | 304 | class SupCKNetCifar10_14(CKNet): 305 | def __init__(self, alpha=0.0, **kwargs): 306 | kernel_sizes = [3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1] 307 | filters = [256, 128, 256, 128, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256] 308 | subsamplings = [1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 2] 309 | kernel_funcs = ['exp', 'poly', 'exp', 'poly', 'exp', 'poly', 'exp', 'poly', 310 | 'exp', 'poly', 'exp', 'poly', 'exp', 'poly'] 311 | kernel_args_list = [0.5, 2, 0.5, 2, 0.5, 2, 0.5, 2, 0.5, 2, 0.5, 2, 0.5, 2] 312 | super(SupCKNetCifar10_14, self).__init__( 313 | 10, 3, filters, kernel_sizes, subsamplings, kernel_funcs=kernel_funcs, 314 | kernel_args_list=kernel_args_list, fit_bias=True, alpha=alpha, maxiter=5000, **kwargs) 315 | 316 | SUPMODELS = { 317 | 'ckn14': SupCKNetCifar10_14, 318 | 'ckn5': SupCKNetCifar10_5 319 | } 320 | -------------------------------------------------------------------------------- /ckn/ops.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | 4 | 5 | class MatrixInverseSqrt(torch.autograd.Function): 6 | """Matrix inverse square root for a symmetric definite positive matrix 7 | """ 8 | @staticmethod 9 | def forward(ctx, input, eps=1e-2): 10 | use_cuda = input.is_cuda 11 | #if input.size(0) < 300: 12 | # input = input.cpu() 13 | input = input.cpu() 14 | #print(torch.isnan(input).any()) 15 | e, v = torch.symeig(input, eigenvectors=True) 16 | if use_cuda: 17 | e = e.cuda() 18 | v = v.cuda() 19 | e.clamp_(min=0) 20 | e_sqrt = e.sqrt_().add_(eps) 21 | ctx.save_for_backward(e_sqrt, v) 22 | e_rsqrt = e_sqrt.reciprocal() 23 | 24 | output = v.mm(torch.diag(e_rsqrt).mm(v.t())) 25 | return output 26 | 27 | @staticmethod 28 | def backward(ctx, grad_output): 29 | e_sqrt, v = ctx.saved_variables 30 | ei = e_sqrt.expand_as(v) 31 | ej = e_sqrt.view([-1, 1]).expand_as(v) 32 | f = torch.reciprocal((ei + ej) * ei * ej) 33 | grad_input = -v.mm((f*(v.t().mm(grad_output.mm(v)))).mm(v.t())) 34 | return grad_input, None 35 | 36 | 37 | def matrix_inverse_sqrt(input, eps=1e-2): 38 | """Wrapper for MatrixInverseSqrt""" 39 | return MatrixInverseSqrt.apply(input, eps) 40 | -------------------------------------------------------------------------------- /ckn/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import math 4 | import random 5 | import numpy as np 6 | 7 | EPS = 1e-6 8 | 9 | 10 | def gaussian_filter_1d(size, sigma=None): 11 | """Create 1D Gaussian filter 12 | """ 13 | if size == 1: 14 | return torch.ones(1) 15 | if sigma is None: 16 | sigma = (size - 1.) / (2.*math.sqrt(2)) 17 | m = (size - 1) / 2. 18 | filt = torch.arange(-m, m+1) 19 | filt = torch.exp(-filt.pow(2)/(2.*sigma*sigma)) 20 | return filt/torch.sum(filt) 21 | 22 | def spherical_kmeans(x, n_clusters, max_iters=100, block_size=None, verbose=True, init=None): 23 | """Spherical kmeans 24 | Args: 25 | x (Tensor n_samples x n_features): data points 26 | n_clusters (int): number of clusters 27 | """ 28 | print(x.shape) 29 | use_cuda = x.is_cuda 30 | n_samples, n_features = x.size() 31 | if init is None: 32 | indices = torch.randperm(n_samples)[:n_clusters] 33 | if use_cuda: 34 | indices = indices.cuda() 35 | clusters = x[indices] 36 | 37 | prev_sim = np.inf 38 | tmp = x.new_empty(n_samples) 39 | assign = x.new_empty(n_samples, dtype=torch.long) 40 | if block_size is None or block_size == 0: 41 | block_size = x.shape[0] 42 | 43 | for n_iter in range(max_iters): 44 | # assign data points to clusters 45 | for i in range(0, n_samples, block_size): 46 | end_i = min(i + block_size, n_samples) 47 | cos_sim = x[i: end_i].mm(clusters.t()) 48 | tmp[i: end_i], assign[i: end_i] = cos_sim.max(dim=-1) 49 | # cos_sim = x.mm(clusters.t()) 50 | # tmp, assign = cos_sim.max(dim=-1) 51 | sim = tmp.mean() 52 | if (n_iter + 1) % 10 == 0 and verbose: 53 | print("Spherical kmeans iter {}, objective value {}".format( 54 | n_iter + 1, sim)) 55 | 56 | # update clusters 57 | for j in range(n_clusters): 58 | index = assign == j 59 | if index.sum().item() == 0: 60 | idx = tmp.argmin() 61 | clusters[j] = x[idx] 62 | tmp[idx] = 1. 63 | else: 64 | xj = x[index] 65 | c = xj.mean(0) 66 | clusters[j] = c / c.norm().clamp(min=EPS) 67 | 68 | if torch.abs(prev_sim - sim)/(torch.abs(sim)+1e-20) < EPS: 69 | break 70 | prev_sim = sim 71 | return clusters 72 | 73 | def normalize_(x, p=2, dim=-1): 74 | norm = x.norm(p=p, dim=dim, keepdim=True) 75 | x.div_(norm.clamp(min=EPS)) 76 | return x 77 | 78 | def accuracy(output, target, topk=(1,)): 79 | """Computes the precision@k for the specified values of k""" 80 | maxk = max(topk) 81 | batch_size = target.size(0) 82 | 83 | _, pred = output.topk(maxk, 1, True, True) 84 | pred = pred.t() 85 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 86 | 87 | res = [] 88 | for k in topk: 89 | correct_k = correct[:k].view(-1).float().sum(0) 90 | res.append(correct_k.mul_(100.0 / batch_size).item()) 91 | return res 92 | 93 | def count_parameters(model): 94 | count = 0 95 | for param in model.parameters(): 96 | count += np.prod(param.data.size()) 97 | return count 98 | 99 | if __name__ == "__main__": 100 | x = torch.rand(10000,50) 101 | x = normalize(x, dim=-1) 102 | print(x.norm(2, dim=-1)) 103 | z = spherical_kmeans(x, 32) 104 | print(z) 105 | print(z.norm(2, dim=-1)) 106 | -------------------------------------------------------------------------------- /experiments/cifar10_sup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | import argparse 4 | import torch 5 | from torch import nn 6 | from torch import optim 7 | from torch.utils.data import DataLoader 8 | 9 | from ckn.data import create_dataset 10 | from ckn.utils import accuracy, count_parameters 11 | from ckn.models import SUPMODELS 12 | from ckn.loss import LOSS 13 | 14 | from timeit import default_timer as timer 15 | 16 | 17 | def load_args(): 18 | parser = argparse.ArgumentParser( 19 | description="CKN for CIFAR10 image classification") 20 | parser.add_argument('--seed', type=int, default=1234) 21 | parser.add_argument('--datapath', type=str, default='../data/cifar-10/cifar_white.mat', 22 | help='path to the dataset') 23 | parser.add_argument('--batch-size', default=128, type=int, help='batch size') 24 | parser.add_argument('--model', default='ckn14', choices=list(SUPMODELS.keys()), help='which model to use') 25 | parser.add_argument( 26 | '--sampling-patches', type=int, default=150000, help='number of subsampled patches for initilization') 27 | parser.add_argument('--lr', default=1.0, type=float, help='initial learning rate') 28 | parser.add_argument('--epochs', default=100, type=int, help='number of epochs') 29 | parser.add_argument('--alternating', action='store_true', help='use alternating opitmization') 30 | parser.add_argument('--alpha', default=0.1, type=float, help='regularization parameter') 31 | parser.add_argument('--loss', default='hinge', choices=list(LOSS.keys()), help='loss function') 32 | parser.add_argument('--outpath', type=str, default=None, help='output path') 33 | parser.add_argument('--augmentation', action='store_true', help='data augmentation') 34 | args = parser.parse_args() 35 | args.gpu = torch.cuda.is_available() 36 | 37 | return args 38 | 39 | def sup_train(model, data_loader, args): 40 | criterion = LOSS[args.loss]() 41 | if args.alternating: 42 | optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9) 43 | else: 44 | alpha = args.alpha * model.out_features / args.batch_size 45 | optimizer = optim.SGD([ 46 | {'params': model.features.parameters()}, 47 | {'params': model.classifier.parameters(), 'weight_decay': alpha}], lr=args.lr, momentum=0.9) 48 | # lr_scheduler = None 49 | if args.model == 'ckn14': 50 | lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [90, 100], gamma=0.1) 51 | if args.augmentation: 52 | lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [90, 120], gamma=0.1) 53 | else: 54 | lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [60, 85, 100], gamma=0.1) 55 | if args.augmentation: 56 | lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [60, 100, 130], gamma=0.1) 57 | 58 | if args.gpu: 59 | model.cuda() 60 | print("Initialing CKN") 61 | tic = timer() 62 | model.unsup_train_ckn( 63 | data_loader['init'], args.sampling_patches, use_cuda=args.gpu) 64 | toc = timer() 65 | print("Finished, elapsed time: {:.2f}min".format((toc - tic)/60)) 66 | 67 | epoch_loss = None 68 | best_loss = float('inf') 69 | best_acc = 0 70 | 71 | for epoch in range(args.epochs): 72 | print('Epoch {}/{}'.format(epoch + 1, args.epochs)) 73 | print('-' * 10) 74 | if args.alternating or epoch == 0: 75 | model.train(False) 76 | tic = timer() 77 | model.unsup_train_classifier( 78 | data_loader['train'], criterion=criterion, use_cuda=args.gpu) 79 | toc = timer() 80 | print('Last layer trained, elapsed time: {:.2f}s'.format(toc - tic)) 81 | if not args.alternating: 82 | optimizer.param_groups[-1]['weight_decay'] = model.classifier.real_alpha 83 | 84 | for phase in ['train', 'val']: 85 | if phase == 'train': 86 | if lr_scheduler is not None and epoch > 0: 87 | try: 88 | lr_scheduler.step(metrics=epoch_loss) 89 | except: 90 | lr_scheduler.step() 91 | print("current LR: {}".format( 92 | optimizer.param_groups[0]['lr'])) 93 | model.train() 94 | else: 95 | model.eval() 96 | 97 | running_loss = 0.0 98 | running_acc = 0 99 | 100 | tic = timer() 101 | for data, target in data_loader[phase]: 102 | size = data.size(0) 103 | if args.gpu: 104 | data = data.cuda() 105 | target = target.cuda() 106 | 107 | # forward 108 | if phase == 'train': 109 | optimizer.zero_grad() 110 | output = model(data) 111 | loss = criterion(output, target) 112 | pred = output.data.argmax(dim=1) 113 | loss.backward() 114 | optimizer.step() 115 | model.normalize_() 116 | else: 117 | with torch.no_grad(): 118 | output = model(data) 119 | loss = criterion(output, target) 120 | pred = output.data.argmax(dim=1) 121 | 122 | running_loss += loss.item() * size 123 | running_acc += torch.sum(pred == target.data).item() 124 | toc = timer() 125 | 126 | epoch_loss = running_loss / len(data_loader[phase].dataset) 127 | epoch_acc = running_acc / len(data_loader[phase].dataset) 128 | 129 | print('{} Loss: {:.4f} Acc: {:.2f}% Elapsed time: {:.2f}s'.format( 130 | phase, epoch_loss, epoch_acc * 100, toc - tic)) 131 | 132 | if phase == 'val' and epoch_acc > best_acc: 133 | best_acc = epoch_acc 134 | best_loss = epoch_loss 135 | best_epoch = epoch 136 | best_weights = copy.deepcopy(model.state_dict()) 137 | print() 138 | 139 | print('Best epoch: {}'.format(best_epoch + 1)) 140 | print('Best val Acc: {:4f}'.format(best_acc)) 141 | print('Best val loss: {:4f}'.format(best_loss)) 142 | model.load_state_dict(best_weights) 143 | 144 | return best_acc 145 | 146 | def main(): 147 | args = load_args() 148 | print(args) 149 | torch.manual_seed(args.seed) 150 | if args.gpu: 151 | torch.cuda.manual_seed_all(args.seed) 152 | 153 | init_dset = create_dataset(args.datapath) 154 | train_dset = create_dataset(args.datapath, dataugmentation=args.augmentation) 155 | print(train_dset.train_data.shape) 156 | 157 | loader_args = {} 158 | if args.gpu: 159 | loader_args = {'pin_memory': True} 160 | init_loader = DataLoader( 161 | init_dset, batch_size=64, shuffle=False, num_workers=2, **loader_args) 162 | train_loader = DataLoader( 163 | train_dset, batch_size=args.batch_size, shuffle=True, num_workers=4, **loader_args) 164 | 165 | model = SUPMODELS[args.model](alpha=args.alpha) 166 | print(model) 167 | nb_params = count_parameters(model) 168 | print('number of paramters: {}'.format(nb_params)) 169 | 170 | test_dset = create_dataset(args.datapath, train=False) 171 | test_loader = DataLoader( 172 | test_dset, batch_size=args.batch_size, shuffle=False, num_workers=2, **loader_args) 173 | 174 | data_loader = {'init': init_loader, 'train': train_loader, 'val': test_loader} 175 | tic = timer() 176 | score = sup_train(model, data_loader, args) 177 | toc = timer() 178 | training_time = (toc - tic) / 60 179 | print("Final accuracy: {:6.2f}%, elapsed time: {:.2f}min".format(score * 100, training_time)) 180 | 181 | # y_pred, y_true = model.predict(test_loader, use_cuda=args.gpu) 182 | # scores = accuracy(y_pred, y_true, (1,)) 183 | # print(scores) 184 | if args.outpath is not None: 185 | import csv 186 | table = {'acc': score, 'training time': training_time} 187 | with open(args.outpath + '/metric.csv', 'w') as f: 188 | w = csv.DictWriter(f, table.keys()) 189 | w.writeheader() 190 | w.writerow(table) 191 | 192 | torch.save({ 193 | 'args': args, 194 | 'state_dict': model.state_dict()}, 195 | args.outpath + '/model.pkl') 196 | 197 | 198 | 199 | if __name__ == '__main__': 200 | main() 201 | -------------------------------------------------------------------------------- /experiments/cifar10_unsup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import torch 4 | from torch.utils.data import DataLoader 5 | 6 | from ckn.data import create_dataset 7 | from ckn.utils import accuracy 8 | from ckn.models import UnsupCKNetCifar10 9 | 10 | 11 | def load_args(): 12 | parser = argparse.ArgumentParser( 13 | description="unsup CKN for CIFAR10 image classification") 14 | parser.add_argument('--seed', type=int, default=1234) 15 | parser.add_argument('--datapath', type=str, default='../data/cifar-10/cifar_white.mat', 16 | help='path to the dataset') 17 | parser.add_argument('--batch-size', default=128, type=int, help='batch size') 18 | parser.add_argument('--filters', nargs='+', type=int, help='number of filters') 19 | parser.add_argument('--subsamplings', nargs='+', type=int, help='sampling routine') 20 | parser.add_argument('--kernel-sizes', nargs='+', type=int, help='kernel sizes') 21 | parser.add_argument( 22 | '--sigma', nargs='+', type=float, default=None, help='parameters for dot-product kernel') 23 | parser.add_argument( 24 | '--sampling-patches', type=int, default=1000000, help='number of subsampled patches for K-means') 25 | parser.add_argument('--cv', action='store_true', 26 | help='if True perform model select with cross validation, else on test set') 27 | args = parser.parse_args() 28 | args.gpu = torch.cuda.is_available() 29 | 30 | nlayers = len(args.filters) 31 | if args.sigma is None: 32 | args.sigma = [0.6] * nlayers 33 | 34 | return args 35 | 36 | 37 | def main(): 38 | args = load_args() 39 | print(args) 40 | torch.manual_seed(args.seed) 41 | if args.gpu: 42 | torch.cuda.manual_seed_all(args.seed) 43 | 44 | train_dset = create_dataset(args.datapath) 45 | print(train_dset.train_data.shape) 46 | 47 | loader_args = {} 48 | if args.gpu: 49 | loader_args = {'pin_memory': True} 50 | data_loader = DataLoader( 51 | train_dset, batch_size=args.batch_size, shuffle=False, num_workers=2, **loader_args) 52 | 53 | model = UnsupCKNetCifar10( 54 | args.filters, args.kernel_sizes, args.subsamplings, args.sigma) 55 | print(model) 56 | 57 | test_dset = create_dataset(args.datapath, train=False) 58 | test_loader = DataLoader( 59 | test_dset, batch_size=args.batch_size, shuffle=False, num_workers=2, **loader_args) 60 | # model.unsup_train( 61 | # data_loader, n_sampling_patches=args.sampling_patches, use_cuda=args.gpu) 62 | if args.cv: 63 | model.unsup_cross_val( 64 | data_loader, test_loader=None, n_sampling_patches=args.sampling_patches, use_cuda=args.gpu) 65 | y_pred, y_true = model.predict(test_loader, use_cuda=args.gpu) 66 | score = accuracy(y_pred, y_true, (1,)) 67 | else: 68 | score = model.unsup_cross_val( 69 | data_loader, test_loader=test_loader, n_sampling_patches=args.sampling_patches, use_cuda=args.gpu) 70 | print(score) 71 | 72 | #test_dset = create_dataset(args.datapath, train=False) 73 | #test_loader = DataLoader( 74 | # test_dset, batch_size=args.batch_size, shuffle=False, num_workers=2, **loader_args) 75 | # y_pred, y_true = model.predict(test_loader, use_cuda=args.gpu) 76 | # scores = accuracy(y_pred, y_true, (1,)) 77 | # print(scores) 78 | 79 | 80 | 81 | if __name__ == '__main__': 82 | main() 83 | -------------------------------------------------------------------------------- /third-party/miso_svm-1.0/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 1.1 2 | Name: miso_svm 3 | Version: 1.0 4 | Summary: Python interface for MISO SVM classifier 5 | Home-page: UNKNOWN 6 | Author: Ghislain Durif 7 | Author-email: ckn.dev@inria.fr 8 | License: GPLv3 9 | Description: UNKNOWN 10 | Platform: UNKNOWN 11 | Classifier: Development Status :: 4 - Beta 12 | Classifier: Intended Audience :: Science/Research 13 | Classifier: Topic :: Scientific/Engineering :: Mathematics 14 | Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) 15 | Classifier: Programming Language :: Python :: 3 16 | Classifier: Programming Language :: Python :: 3.2 17 | Classifier: Programming Language :: Python :: 3.3 18 | Classifier: Programming Language :: Python :: 3.4 19 | Classifier: Programming Language :: Python :: 3.5 20 | Classifier: Programming Language :: Python :: 3.6 21 | -------------------------------------------------------------------------------- /third-party/miso_svm-1.0/README.md: -------------------------------------------------------------------------------- 1 | # MISO SVM 2 | 3 | This package implements a SVM (support vector machine) classification procedure 4 | based on the MISO optimization algorithm, which is introduced in [1] (available 5 | at or 6 | ). 7 | 8 | The 'miso_svm' package is based on C++ interfaced codes. All files included in 9 | the 'miso_svm' package ([miso_svm/*] and in particular [miso_svm/miso_svm/*]) 10 | are released under the GPL-v3 license. 11 | 12 | --- 13 | 14 | # Installation 15 | 16 | This package requires the MKL from Intel (for Blas and OpenMP). You can get 17 | the MKL by using the Python Anaconda distribution, or you can use your own 18 | MKL license if you have one. 19 | 20 | 21 | ## Prerequisite when using Anaconda 22 | 23 | You can get anaconda or miniconda from 24 | or . 25 | 26 | Create a conda virtual environment and install dependencies within it: 27 | ```bash 28 | conda create -n cknenv # if not done yet 29 | source activate cknenv 30 | conda install numpy scipy scikit-learn matplotlib 31 | ``` 32 | 33 | ## Install miso_svm 34 | 35 | * On GNU/Linux and MacOS: 36 | 37 | If using previously created conda environment: 38 | ```bash 39 | source activate cknenv 40 | ``` 41 | 42 | then 43 | ```bash 44 | git clone https://gitlab.inria.fr/thoth/ckn 45 | cd ckn/miso_svm 46 | python setup.py install 47 | ``` 48 | 49 | OR 50 | ```bash 51 | wget http://pascal.inrialpes.fr/data2/gdurif/miso_svm-1.0.tar.gz 52 | tar zxvf miso_svm-1.0.tar.gz 53 | cd miso_svm-1.0 54 | python setup.py install 55 | ``` 56 | 57 | To specify an installation directory: 58 | ```bash 59 | inst= 60 | PYV=$(python -c "import sys;t='{v[0]}.{v[1]}'.format(v=list(sys.version_info[:2]));sys.stdout.write(t)";) 61 | export PYTHONPATH=$inst/lib/python${PYV}/site-packages:$PYTHONPATH 62 | python setup.py install --prefix=$inst 63 | ``` 64 | 65 | 66 | ## When using the official GitLab repository (for developpers) 67 | 68 | (on GNU/Linux and MacOs only) 69 | 70 | * To build/install/test the package, see: 71 | 72 | ```bash 73 | ./dev_command.sh help 74 | ``` 75 | 76 | ## Example of use 77 | 78 | See [classification.py](miso_svm/classification.py), 79 | [quick.py](miso_svm/quick.py) or [miso.py](miso_svm/miso.py) 80 | 81 | 82 | --- 83 | 84 | ## References 85 | 86 | [1] Lin, H., Mairal, J., Harchaoui, Z., 2015. A universal catalyst for first-order optimization, in: Advances in Neural Information Processing Systems. pp. 3384–3392. 87 | -------------------------------------------------------------------------------- /third-party/miso_svm-1.0/cblas_defvar.h: -------------------------------------------------------------------------------- 1 | #ifndef CBLAS_H 2 | #define CBLAS_H 3 | #include 4 | 5 | /* 6 | * Enumerated and derived types 7 | */ 8 | #define CBLAS_INDEX size_t /* this may vary between platforms */ 9 | 10 | enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102}; 11 | enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113}; 12 | enum CBLAS_UPLO {CblasUpper=121, CblasLower=122}; 13 | enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132}; 14 | enum CBLAS_SIDE {CblasLeft=141, CblasRight=142}; 15 | 16 | 17 | char CBLAS_TRANSPOSE_CHAR[] = {'N', 'T', 'C'}; 18 | char *cblas_transpose(CBLAS_TRANSPOSE TransA) 19 | { 20 | switch(TransA) 21 | { 22 | case 111: return &CBLAS_TRANSPOSE_CHAR[0]; 23 | case 112: return &CBLAS_TRANSPOSE_CHAR[1]; 24 | case 113: return &CBLAS_TRANSPOSE_CHAR[2]; 25 | } 26 | return NULL; 27 | } 28 | 29 | char CBLAS_UPLO_CHAR[] = {'U', 'L'}; 30 | char *cblas_uplo(CBLAS_UPLO Uplo) 31 | { 32 | switch(Uplo) 33 | { 34 | case 121: return &CBLAS_UPLO_CHAR[0]; 35 | case 122: return &CBLAS_UPLO_CHAR[1]; 36 | } 37 | return NULL; 38 | } 39 | 40 | char CBLAS_DIAG_CHAR[] = {'N', 'U'}; 41 | char *cblas_diag(CBLAS_DIAG Diag) 42 | { 43 | switch(Diag) 44 | { 45 | case 131: return &CBLAS_DIAG_CHAR[0]; 46 | case 132: return &CBLAS_DIAG_CHAR[1]; 47 | } 48 | return NULL; 49 | } 50 | 51 | char CBLAS_SIDE_CHAR[] = {'L', 'R'}; 52 | char *cblas_side(CBLAS_SIDE Side) 53 | { 54 | switch(Side) 55 | { 56 | case 141: return &CBLAS_SIDE_CHAR[0]; 57 | case 142: return &CBLAS_SIDE_CHAR[1]; 58 | } 59 | return NULL; 60 | } 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /third-party/miso_svm-1.0/ctypes_utils.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "linalg.h" 4 | 5 | #include "common.h" 6 | 7 | /** 8 | * From Daan Wynen 9 | */ 10 | 11 | 12 | // check for a condition, and fail with an exception strin set if it is false 13 | #define assert_py_obj(condition, error) if (! (condition) ) { \ 14 | PyErr_SetString(PyExc_TypeError, (error)); \ 15 | return NULL; \ 16 | } 17 | 18 | // the same macro, but for cases where the calling method returns an integer 19 | #define assert_py_int(condition, error) if (! (condition) ) { \ 20 | PyErr_SetString(PyExc_TypeError, (error)); \ 21 | return 0; \ 22 | } 23 | 24 | // and another version that throws the error message as a const char* instead 25 | #define assert_py_throw(condition, error) if (! (condition) ) { \ 26 | throw (error); \ 27 | } 28 | 29 | 30 | 31 | template inline string getTypeName(); 32 | template <> inline string getTypeName() { return "intc"; }; 33 | template <> inline string getTypeName() { return "uint8"; }; 34 | template <> inline string getTypeName() { return "float32"; }; 35 | template <> inline string getTypeName() { return "float64"; }; 36 | 37 | template inline int getTypeNumber(); 38 | template <> inline int getTypeNumber() { return NPY_INT; }; 39 | template <> inline int getTypeNumber() { return NPY_UINT8; }; 40 | template <> inline int getTypeNumber() { return NPY_FLOAT32; }; 41 | template <> inline int getTypeNumber() { return NPY_FLOAT64; }; 42 | 43 | 44 | // these structs hold define the python type objects for Vector, Matrix and Map 45 | // they only hold pointers to the actual C++ objects 46 | // this way, the data does not get deallocated immediately when objects leave 47 | // the scope 48 | template struct VectorWrapper { 49 | PyObject_HEAD; 50 | Vector *obj; 51 | }; 52 | 53 | template struct MatrixWrapper { 54 | PyObject_HEAD; 55 | Matrix *obj; 56 | }; 57 | 58 | template struct MapWrapper { 59 | PyObject_HEAD; 60 | Map *obj; 61 | }; 62 | 63 | 64 | // these are the deallocation methods for she structs defined above 65 | // they'll be linked to the destructor hooks of the python objects further down 66 | template 67 | static void _delete_cpp_mat(MatrixWrapper* self){ 68 | if (self && self->obj) { 69 | delete self->obj; 70 | } 71 | Py_TYPE(self)->tp_free((PyObject*)self); 72 | } 73 | 74 | template 75 | static void _delete_cpp_vec(VectorWrapper* self){ 76 | if (self && self->obj) { 77 | delete self->obj; 78 | Py_TYPE(self)->tp_free((PyObject*)self); 79 | } 80 | } 81 | 82 | template 83 | static void _delete_cpp_map(MapWrapper* self){ 84 | if (self && self->obj) { 85 | delete self->obj; 86 | Py_TYPE(self)->tp_free((PyObject*)self); 87 | } 88 | } 89 | 90 | 91 | static PyTypeObject MatrixWrapperType = { 92 | PyVarObject_HEAD_INIT(NULL, 0) 93 | "miso_svm.MatrixWrapper", /*tp_name*/ 94 | sizeof(MatrixWrapper), /*tp_basicsize*/ // FIXME: does this break if using double? 95 | 0, /*tp_itemsize*/ 96 | (destructor)_delete_cpp_mat, /*tp_dealloc*/ // FIXME: does this break if using double? 97 | 0, /*tp_print*/ 98 | 0, /*tp_getattr*/ 99 | 0, /*tp_setattr*/ 100 | 0, /*tp_compare*/ 101 | 0, /*tp_repr*/ 102 | 0, /*tp_as_number*/ 103 | 0, /*tp_as_sequence*/ 104 | 0, /*tp_as_mapping*/ 105 | 0, /*tp_hash */ 106 | 0, /*tp_call*/ 107 | 0, /*tp_str*/ 108 | 0, /*tp_getattro*/ 109 | 0, /*tp_setattro*/ 110 | 0, /*tp_as_buffer*/ 111 | Py_TPFLAGS_DEFAULT, /*tp_flags*/ 112 | "Internal deallocator object for the Matrix class", /* tp_doc */ 113 | }; 114 | 115 | static PyTypeObject VectorWrapperType = { 116 | PyVarObject_HEAD_INIT(NULL, 0) 117 | "miso_svm.VectorWrapper", /*tp_name*/ 118 | sizeof(VectorWrapper), /*tp_basicsize*/ // FIXME: does this break if using double? 119 | 0, /*tp_itemsize*/ 120 | (destructor)_delete_cpp_vec, /*tp_dealloc*/ // FIXME: does this break if using double? 121 | 0, /*tp_print*/ 122 | 0, /*tp_getattr*/ 123 | 0, /*tp_setattr*/ 124 | 0, /*tp_compare*/ 125 | 0, /*tp_repr*/ 126 | 0, /*tp_as_number*/ 127 | 0, /*tp_as_sequence*/ 128 | 0, /*tp_as_mapping*/ 129 | 0, /*tp_hash */ 130 | 0, /*tp_call*/ 131 | 0, /*tp_str*/ 132 | 0, /*tp_getattro*/ 133 | 0, /*tp_setattro*/ 134 | 0, /*tp_as_buffer*/ 135 | Py_TPFLAGS_DEFAULT, /*tp_flags*/ 136 | "Internal deallocator object for the Vector class", /* tp_doc */ 137 | }; 138 | 139 | static PyTypeObject MapWrapperType = { 140 | PyVarObject_HEAD_INIT(NULL, 0) 141 | "miso_svm.MapWrapper", /*tp_name*/ 142 | sizeof(MapWrapper), /*tp_basicsize*/ // FIXME: does this break if using double? 143 | 0, /*tp_itemsize*/ 144 | (destructor)_delete_cpp_map, /*tp_dealloc*/ //FIXME does this break if using double? 145 | 0, /*tp_print*/ 146 | 0, /*tp_getattr*/ 147 | 0, /*tp_setattr*/ 148 | 0, /*tp_compare*/ 149 | 0, /*tp_repr*/ 150 | 0, /*tp_as_number*/ 151 | 0, /*tp_as_sequence*/ 152 | 0, /*tp_as_mapping*/ 153 | 0, /*tp_hash */ 154 | 0, /*tp_call*/ 155 | 0, /*tp_str*/ 156 | 0, /*tp_getattro*/ 157 | 0, /*tp_setattro*/ 158 | 0, /*tp_as_buffer*/ 159 | Py_TPFLAGS_DEFAULT, /*tp_flags*/ 160 | "Internal deallocator object for the Map class", /* tp_doc */ 161 | }; 162 | 163 | template 164 | inline PyArrayObject* copyMatrix(Matrix* obj) { 165 | std::cout << "matrix data: " << obj->rawX() << std::endl; 166 | int nd=2; 167 | std::cout << "n: " << obj->n() << " m: " << obj->m() << std::endl; 168 | npy_intp dims[2]={obj->n(), obj->m()}; 169 | PyArrayObject* arr=NULL; 170 | arr = (PyArrayObject*)PyArray_EMPTY(nd, dims, getTypeNumber(), 0); 171 | Matrix copymat((T*)PyArray_DATA(arr), dims[1], dims[0]); 172 | std::cout << "numpy array data: " << PyArray_DATA(arr) << std::endl; 173 | if (arr == NULL) goto fail; 174 | copymat.copy(*obj); 175 | return arr; 176 | fail: 177 | delete obj; // FIXME Error Handling!? 178 | std::cout << "FAIL in copyMatrix" << std::endl; 179 | Py_XDECREF(arr); 180 | return NULL; 181 | } 182 | 183 | 184 | template 185 | inline PyArrayObject* wrapMatrix(Matrix* obj) { 186 | int nd=2; 187 | npy_intp dims[2]={obj->n(), obj->m()}; 188 | PyObject* newobj=NULL; 189 | PyArrayObject* arr=NULL; 190 | void *mymem = (void*)(obj->rawX()); 191 | arr = (PyArrayObject*)PyArray_SimpleNewFromData(nd, dims, getTypeNumber(), mymem); 192 | 193 | npy_intp* strides = PyArray_STRIDES(arr); 194 | for (int idx=0; idx, &MatrixWrapperType); 199 | if (newobj == NULL) goto fail; 200 | ((MatrixWrapper *)newobj)->obj = obj; 201 | PyArray_SetBaseObject((PyArrayObject*)arr, newobj); 202 | return arr; 203 | fail: 204 | delete obj; // FIXME Error Handling!? 205 | std::cout << "FAIL in wrapMatrix" << std::endl; 206 | Py_XDECREF(arr); 207 | return NULL; 208 | } 209 | 210 | template 211 | inline PyArrayObject* wrapVector(Vector* obj) { 212 | int nd=1; 213 | npy_intp dims[1]={obj->n()}; 214 | PyObject* newobj=NULL; 215 | void *mymem = (void*)(obj->rawX()); 216 | PyArrayObject* arr = (PyArrayObject*)PyArray_SimpleNewFromData(nd, dims, getTypeNumber(), mymem); 217 | if (arr == NULL) goto fail; 218 | newobj = (PyObject*)PyObject_New(VectorWrapper, &VectorWrapperType); 219 | if (newobj == NULL) goto fail; 220 | ((VectorWrapper *)newobj)->obj = obj; 221 | PyArray_SetBaseObject((PyArrayObject*)arr, newobj); 222 | return arr; 223 | fail: 224 | delete obj; // FIXME Error Handling!? 225 | Py_XDECREF(arr); 226 | return NULL; 227 | } 228 | 229 | template 230 | inline PyArrayObject* wrapMap(Map* obj) { 231 | int nd=3; 232 | npy_intp dims[3]={obj->z(), obj->y(), obj->x()}; 233 | PyObject* newobj=NULL; 234 | PyArrayObject* arr=NULL; 235 | void *mymem = (void*)(obj->rawX()); 236 | arr = (PyArrayObject*)PyArray_SimpleNewFromData(nd, dims, getTypeNumber(), mymem); 237 | if (arr == NULL) goto fail; 238 | newobj = (PyObject*)PyObject_New(MapWrapper, &MapWrapperType); 239 | if (newobj == NULL) goto fail; 240 | ((MapWrapper *)newobj)->obj = obj; 241 | PyArray_SetBaseObject((PyArrayObject*)arr, newobj); 242 | return arr; 243 | fail: 244 | delete obj; // FIXME Error Handling!? 245 | Py_XDECREF(arr); 246 | return NULL; 247 | } 248 | 249 | template 250 | static int npyToMatrix(PyArrayObject* array, Matrix& matrix, string obj_name) { 251 | if (array==NULL) { 252 | return 1; 253 | } 254 | if(!(PyArray_NDIM(array) == 2 && 255 | PyArray_TYPE(array) == getTypeNumber() && 256 | (PyArray_FLAGS(array) & NPY_ARRAY_C_CONTIGUOUS))) { 257 | PyErr_SetString(PyExc_TypeError, (obj_name + " should be c-contiguous 2D "+getTypeName()+" array").c_str()); 258 | return 0; 259 | } 260 | 261 | T *rawX = reinterpret_cast(PyArray_DATA(array)); 262 | const npy_intp *shape = PyArray_DIMS(array); 263 | npy_intp n = shape[0]; 264 | npy_intp m = shape[1]; 265 | 266 | matrix.setData(rawX, m, n); 267 | return 1; 268 | } 269 | 270 | template 271 | static int npyToVector(PyArrayObject* array, Vector& matrix, string obj_name) { 272 | if (array==NULL) { 273 | return 1; 274 | } 275 | T *rawX = reinterpret_cast(PyArray_DATA(array)); 276 | const npy_intp *shape = PyArray_DIMS(array); 277 | npy_intp n = shape[0]; 278 | 279 | if(!(PyArray_NDIM(array) == 1 && 280 | PyArray_TYPE(array) == getTypeNumber() && 281 | (PyArray_FLAGS(array) & NPY_ARRAY_C_CONTIGUOUS))) { 282 | PyErr_SetString(PyExc_TypeError, (obj_name + " should be c-contiguous 1D "+getTypeName()+" array").c_str()); 283 | return 0; 284 | } 285 | matrix.setData(rawX, n); 286 | return 1; 287 | } 288 | 289 | static vector get_array_shape(PyArrayObject* array) { 290 | vector result; 291 | if (array == NULL) { 292 | return result; 293 | } 294 | const int ndim = PyArray_NDIM(array); 295 | const npy_intp* shape = PyArray_DIMS(array); 296 | for (int i = 0; i < ndim; ++i) 297 | result.push_back(shape[i]); 298 | return result; 299 | } 300 | 301 | template 302 | static int npyToMap(PyArrayObject* array, Map& matrix, string obj_name) { 303 | if (array==NULL) { 304 | return 1; 305 | } 306 | const int ndim = PyArray_NDIM(array); 307 | if(ndim != 3) { 308 | PyErr_SetString(PyExc_TypeError, (obj_name + " should have 3 dimensions but has " + to_string(ndim)).c_str()); 309 | return 0; 310 | } 311 | 312 | if (PyArray_TYPE(array) != getTypeNumber()) { 313 | PyErr_SetString(PyExc_TypeError, (obj_name + " has wrong data type.").c_str()); 314 | return 0; 315 | } 316 | if (!(PyArray_FLAGS(array) & NPY_ARRAY_C_CONTIGUOUS)) { 317 | PyErr_SetString(PyExc_TypeError, (obj_name + " is not contiguous.").c_str()); 318 | return 0; 319 | } 320 | 321 | T *rawX = reinterpret_cast(PyArray_DATA(array)); 322 | const npy_intp *shape = PyArray_DIMS(array); 323 | matrix.setData(rawX, shape[2], shape[1], shape[0]); 324 | return 1; 325 | } 326 | 327 | template 328 | static int sequenceToVector(PyObject* seq, std::vector& res) { 329 | if (!PySequence_Check(seq)) { 330 | PyErr_SetString(PyExc_TypeError, "input should be a sequence"); 331 | return 0; 332 | } 333 | 334 | int n = PySequence_Size(seq); 335 | res.resize(n); 336 | 337 | for (int i=0; i 350 | static PyObject* convert_primitive(T i); 351 | 352 | template <> PyObject* convert_primitive(long i){ return PyLong_FromLong(i); } 353 | template <> PyObject* convert_primitive(int i){ return PyLong_FromLong(i); } 354 | 355 | template 356 | static PyObject* vector_to_pylist(vector vec) { 357 | PyObject* result = PyList_New(vec.size()); 358 | for (int i = 0; i < vec.size(); ++i) { 359 | if (PyList_SetItem(result, i, convert_primitive(vec[i])) == -1) { 360 | Py_DECREF(result); 361 | return NULL; 362 | } 363 | } 364 | return result; 365 | } 366 | 367 | template 368 | static int npy_list_to_vector(PyObject* list, vector*>& vec, string list_name) { 369 | const int n = PyList_Size(list); 370 | vec.resize(n); 371 | int i; 372 | for (i = 0; i < n; ++i) { 373 | PyArrayObject* arr = reinterpret_cast(PyList_GetItem(list, i)); 374 | if (arr == NULL) 375 | goto fail; 376 | Matrix* mat = new Matrix(); 377 | if(npyToMatrix(arr, *mat, list_name+"["+to_string(i)+"]") == 0) { 378 | delete mat; 379 | goto fail; 380 | } 381 | vec[i] = mat; 382 | } 383 | 384 | return 1; 385 | 386 | fail: 387 | for (int j = 0; j < i; ++j) { 388 | delete vec[j]; 389 | } 390 | return 0; 391 | } 392 | 393 | static PyObject* wrapMatrices(vector *> matrices) { 394 | PyObject* result = PyList_New(matrices.size()); 395 | if (result == NULL) 396 | return NULL; 397 | for (int i = 0; i < matrices.size(); ++i) 398 | PyList_SET_ITEM(result, i, reinterpret_cast(wrapMatrix(matrices[i]))); 399 | return result; 400 | } 401 | 402 | 403 | inline int set_omp_threads(int threads) { 404 | if (threads <= 0) { 405 | threads=1; 406 | #ifdef _OPENMP 407 | threads = MIN(MAX_THREADS, omp_get_num_procs()); 408 | #endif 409 | } 410 | threads=init_omp(threads); 411 | return threads; 412 | } 413 | -------------------------------------------------------------------------------- /third-party/miso_svm-1.0/list.h: -------------------------------------------------------------------------------- 1 | 2 | /* Software SPAMS v2.1 - Copyright 2009-2011 Julien Mairal 3 | * 4 | * This file is part of SPAMS. 5 | * 6 | * SPAMS is free software: you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * SPAMS is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with SPAMS. If not, see . 18 | */ 19 | 20 | #ifndef LIST_H 21 | #define LIST_H 22 | 23 | template class Element { 24 | public: 25 | Element(T el) { element=el; next=NULL; }; 26 | ~Element() { }; 27 | T element; 28 | Element* next; 29 | }; 30 | 31 | template class ListIterator { 32 | 33 | public: 34 | ListIterator() { _current = NULL; }; 35 | ~ListIterator() { }; 36 | inline void set(Element* elem) { _current = elem; }; 37 | inline T operator*() const { return _current->element; }; 38 | inline bool operator !=(const void* end) const { return _current != end; }; 39 | inline bool operator ==(const void* end) const { return _current == end; }; 40 | inline void operator++() { _current = _current->next; }; 41 | inline Element* current() { return _current; }; 42 | inline T operator->() { return _current->element; }; 43 | private: 44 | Element* _current; 45 | }; 46 | 47 | template class List { 48 | public: 49 | 50 | List() { _first=NULL; _last=NULL; _size=0; _iterator = new ListIterator(); }; 51 | ~List() { 52 | this->clear(); 53 | delete(_iterator); 54 | }; 55 | bool inline empty() const { return _size==0; }; 56 | inline T front() const { 57 | return _first->element; 58 | }; 59 | inline T last() const { 60 | return _last->element; 61 | }; 62 | void inline pop_front() { 63 | Element* fr=_first; 64 | _first=fr->next; 65 | fr->next=NULL; 66 | delete(fr); 67 | --_size; 68 | }; 69 | void inline push_back(T elem) { 70 | if (_first) { 71 | Element* la=_last; 72 | _last=new Element(elem); 73 | la->next=_last; 74 | } else { 75 | _first=new Element(elem); 76 | _last=_first; 77 | } 78 | ++_size; 79 | } 80 | void inline push_front(T elem) { 81 | Element* fr=_first; 82 | _first=new Element(elem); 83 | _first->next=fr; 84 | if (!_last) _last=_first; 85 | ++_size; 86 | } 87 | void inline clear() { 88 | ListIterator it = this->begin(); 89 | while (it != this->end()) { 90 | Element* cur = it.current(); 91 | ++it; 92 | delete(cur); 93 | } 94 | _size=0; 95 | _first=NULL; 96 | _last=NULL; 97 | } 98 | void inline remove(T elem) { 99 | if (_first->element == elem) { 100 | Element* el = _first; 101 | _first = _first->next; 102 | delete(el); 103 | } else { 104 | Element* old = _first; 105 | for (ListIterator it = this->begin(); it != this->end(); ++it) { 106 | if (*it == elem) { 107 | Element* el = it.current(); 108 | old->next=el->next; 109 | delete(el); 110 | break; 111 | } 112 | old=it.current(); 113 | } 114 | } 115 | }; 116 | int inline size() const { return _size; }; 117 | inline ListIterator& begin() const { _iterator->set(_first); return *_iterator; }; 118 | inline void* end() const { return NULL; }; 119 | inline void fusion(const List& list) { 120 | for (ListIterator it = list.begin(); it != list.end(); ++it) { 121 | this->push_back(*it); 122 | } 123 | } 124 | inline void reverse(List& list) { 125 | list.clear(); 126 | for (ListIterator it = this->begin(); it != this->end(); ++it) { 127 | list.push_front(*it); 128 | } 129 | } 130 | inline void copy(List& list) { 131 | list.clear(); 132 | for (ListIterator it = this->begin(); it != this->end(); ++it) { 133 | list.push_back(*it); 134 | } 135 | } 136 | void inline print() const { 137 | std::cerr << " print list " << std::endl; 138 | for (ListIterator it = this->begin(); it != this->end(); ++it) { 139 | std::cerr << *it << " "; 140 | } 141 | std::cerr << std::endl; 142 | } 143 | 144 | private: 145 | 146 | ListIterator* _iterator; 147 | Element* _first; 148 | Element* _last; 149 | int _size; 150 | }; 151 | 152 | typedef List list_int; 153 | typedef ListIterator const_iterator_int; 154 | 155 | template class BinaryHeap { 156 | public: 157 | BinaryHeap(int size) { _last=-1; _values=new T[size]; _id=new int[size]; _position=new int[size]; _size=size;}; 158 | ~BinaryHeap() { delete[](_values); delete[](_id); delete[](_position); }; 159 | 160 | bool inline is_empty() const { return _last==-1; }; 161 | void inline find_min(int& node, T& val) const { 162 | node=_id[0]; val=_values[node]; }; 163 | void inline insert(const int node, const T val) { 164 | ++_last; 165 | assert(_last < _size); 166 | _values[node]=val; 167 | _position[node]=_last; 168 | _id[_last]=node; 169 | this->siftup(_last); 170 | }; 171 | void inline delete_min() { 172 | _position[_id[_last]]=0; 173 | _id[0]=_id[_last]; 174 | _last--; 175 | this->siftdown(0); 176 | }; 177 | void inline decrease_key(const int node, const T val) { 178 | assert(val <= _values[node]); 179 | _values[node]=val; 180 | this->siftup(_position[node]); 181 | }; 182 | void inline print() const { 183 | for (int i = 0; i<=_last; ++i) { 184 | std::cerr << _id[i] << " "; 185 | } 186 | std::cerr << std::endl; 187 | for (int i = 0; i<=_last; ++i) { 188 | std::cerr << _values[_id[i]] << " "; 189 | } 190 | std::cerr << std::endl; 191 | } 192 | 193 | private: 194 | void inline siftup(const int pos) { 195 | int current_pos=pos; 196 | int parent=(current_pos-1)/2; 197 | while (current_pos != 0 && _values[_id[current_pos]] < _values[_id[parent]]) { 198 | this->swapping(current_pos,parent); 199 | parent=(current_pos-1)/2; 200 | } 201 | }; 202 | void inline siftdown(const int pos) { 203 | int current_pos=pos; 204 | int first_succ=pos+pos+1; 205 | int second_succ=first_succ+1; 206 | bool lop=true; 207 | while (lop) { 208 | if (first_succ == _last) { 209 | if (_values[_id[current_pos]] > _values[_id[first_succ]]) 210 | this->swapping(current_pos,first_succ); 211 | lop=false; 212 | } else if (second_succ <= _last) { 213 | if (_values[_id[first_succ]] > _values[_id[second_succ]]) { 214 | if (_values[_id[current_pos]] > _values[_id[second_succ]]) { 215 | this->swapping(current_pos,second_succ); 216 | first_succ=current_pos+current_pos+1; 217 | second_succ=first_succ+1; 218 | } else { 219 | lop=false; 220 | } 221 | } else { 222 | if (_values[_id[current_pos]] > _values[_id[first_succ]]) { 223 | this->swapping(current_pos,first_succ); 224 | first_succ=current_pos+current_pos+1; 225 | second_succ=first_succ+1; 226 | } else { 227 | lop=false; 228 | } 229 | } 230 | } else { 231 | lop=false; 232 | } 233 | } 234 | }; 235 | void inline swapping(int& pos1, int& pos2) { 236 | swap(_position[_id[pos1]],_position[_id[pos2]]); 237 | swap(_id[pos1],_id[pos2]); 238 | swap(pos1,pos2); 239 | }; 240 | 241 | T* _values; 242 | int* _id; 243 | int* _position; 244 | int _last; 245 | int _size; 246 | }; 247 | 248 | 249 | #endif 250 | -------------------------------------------------------------------------------- /third-party/miso_svm-1.0/misc.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * \file 3 | * toolbox Linalg 4 | * 5 | * by Julien Mairal 6 | * julien.mairal@inria.fr 7 | * 8 | * File misc.h 9 | * \brief Contains miscellaneous functions */ 10 | 11 | 12 | #ifndef MISC_H 13 | #define MISC_H 14 | 15 | #include 16 | #include 17 | #include 18 | #include "utils.h" 19 | 20 | #if defined(_MSC_VER) || defined(_WIN32) || defined(WINDOWS) 21 | #define isnan _isnan 22 | #define isinf !_finite 23 | #endif 24 | 25 | using namespace std; 26 | 27 | 28 | /// a useful debugging function 29 | static inline void stop(); 30 | /// seed for random number generation 31 | static int seed = 0; 32 | /// first random number generator from Numerical Recipe 33 | template static inline T ran1(); 34 | /// standard random number generator 35 | template static inline T ran1b(); 36 | /// random sampling from the normal distribution 37 | template static inline T normalDistrib(); 38 | /// reorganize a sparse table between indices beg and end, 39 | /// using quicksort 40 | template 41 | static void sort(I* irOut, T* prOut,I beg, I end); 42 | template 43 | static void quick_sort(I* irOut, T* prOut,const I beg, const I end, const bool incr); 44 | /// template version of the power function 45 | template 46 | T power(const T x, const T y); 47 | /// template version of the fabs function 48 | template 49 | T abs(const T x); 50 | /// template version of the fabs function 51 | template 52 | T sqr(const T x); 53 | template 54 | T sqr_alt(const T x); 55 | /// template version of the fabs function 56 | template 57 | T sqr(const int x) { 58 | return sqr(static_cast(x)); 59 | } 60 | 61 | template 62 | T exp_alt(const T x); 63 | template 64 | T log_alt(const T x); 65 | 66 | /// a useful debugging function 67 | /*static inline void stop() { 68 | std::cout << "Appuyez sur entrée pour continuer..."; 69 | cin.ignore( numeric_limits::max(), '\n' ); 70 | };*/ 71 | static inline void stop() { 72 | printf("Appuyez sur une touche pour continuer\n"); 73 | getchar(); 74 | } 75 | 76 | /// first random number generator from Numerical Recipe 77 | template static inline T ran1() { 78 | const int IA=16807,IM=2147483647,IQ=127773,IR=2836,NTAB=32; 79 | const int NDIV=(1+(IM-1)/NTAB); 80 | const T EPS=3.0e-16,AM=1.0/IM,RNMX=(1.0-EPS); 81 | static int iy=0; 82 | static int iv[NTAB]; 83 | int j,k; 84 | T temp; 85 | 86 | if (seed <= 0 || !iy) { 87 | if (-seed < 1) seed=1; 88 | else seed = -seed; 89 | for (j=NTAB+7;j>=0;j--) { 90 | k=seed/IQ; 91 | seed=IA*(seed-k*IQ)-IR*k; 92 | if (seed < 0) seed += IM; 93 | if (j < NTAB) iv[j] = seed; 94 | } 95 | iy=iv[0]; 96 | } 97 | k=seed/IQ; 98 | seed=IA*(seed-k*IQ)-IR*k; 99 | if (seed < 0) seed += IM; 100 | j=iy/NDIV; 101 | iy=iv[j]; 102 | iv[j] = seed; 103 | if ((temp=AM*iy) > RNMX) return RNMX; 104 | else return temp; 105 | }; 106 | 107 | /// standard random number generator 108 | template T ran1b() { 109 | return static_cast(rand())/RAND_MAX; 110 | } 111 | 112 | /// random sampling from the normal distribution 113 | template 114 | static inline T normalDistrib() { 115 | static bool iset = true; 116 | static T gset; 117 | 118 | T fac,rsq,v1,v2; 119 | if (iset) { 120 | do { 121 | v1 = 2.0*ran1()-1.0; 122 | v2 = 2.0*ran1()-1.0; 123 | rsq = v1*v1+v2*v2; 124 | } while (rsq >= 1.0 || rsq == 0.0); 125 | fac = sqrt(-2.0*log(rsq)/rsq); 126 | gset = v1*fac; 127 | iset = false; 128 | return v2*fac; 129 | } else { 130 | iset = true; 131 | return gset; 132 | } 133 | }; 134 | 135 | /// reorganize a sparse table between indices beg and end, 136 | /// using quicksort 137 | template 138 | static void sort(I* irOut, T* prOut,I beg, I end) { 139 | I i; 140 | if (end <= beg) return; 141 | I pivot=beg; 142 | for (i = beg+1; i<=end; ++i) { 143 | if (irOut[i] < irOut[pivot]) { 144 | if (i == pivot+1) { 145 | I tmp = irOut[i]; 146 | T tmpd = prOut[i]; 147 | irOut[i]=irOut[pivot]; 148 | prOut[i]=prOut[pivot]; 149 | irOut[pivot]=tmp; 150 | prOut[pivot]=tmpd; 151 | } else { 152 | I tmp = irOut[pivot+1]; 153 | T tmpd = prOut[pivot+1]; 154 | irOut[pivot+1]=irOut[pivot]; 155 | prOut[pivot+1]=prOut[pivot]; 156 | irOut[pivot]=irOut[i]; 157 | prOut[pivot]=prOut[i]; 158 | irOut[i]=tmp; 159 | prOut[i]=tmpd; 160 | } 161 | ++pivot; 162 | } 163 | } 164 | sort(irOut,prOut,beg,pivot-1); 165 | sort(irOut,prOut,pivot+1,end); 166 | } 167 | template 168 | static void quick_sort(I* irOut, T* prOut,const I beg, const I end, const bool incr) { 169 | if (end <= beg) return; 170 | I pivot=beg; 171 | if (incr) { 172 | const T val_pivot=prOut[pivot]; 173 | const I key_pivot=irOut[pivot]; 174 | for (I i = beg+1; i<=end; ++i) { 175 | if (prOut[i] < val_pivot) { 176 | prOut[pivot]=prOut[i]; 177 | irOut[pivot]=irOut[i]; 178 | prOut[i]=prOut[++pivot]; 179 | irOut[i]=irOut[pivot]; 180 | prOut[pivot]=val_pivot; 181 | irOut[pivot]=key_pivot; 182 | } 183 | } 184 | } else { 185 | const T val_pivot=prOut[pivot]; 186 | const I key_pivot=irOut[pivot]; 187 | for (I i = beg+1; i<=end; ++i) { 188 | if (prOut[i] > val_pivot) { 189 | prOut[pivot]=prOut[i]; 190 | irOut[pivot]=irOut[i]; 191 | prOut[i]=prOut[++pivot]; 192 | irOut[i]=irOut[pivot]; 193 | prOut[pivot]=val_pivot; 194 | irOut[pivot]=key_pivot; 195 | } 196 | } 197 | } 198 | quick_sort(irOut,prOut,beg,pivot-1,incr); 199 | quick_sort(irOut,prOut,pivot+1,end,incr); 200 | } 201 | 202 | template 203 | static void quick_sort(T* prOut,const I beg, const I end, const bool incr) { 204 | if (end <= beg) return; 205 | I pivot=beg; 206 | if (incr) { 207 | const T val_pivot=prOut[pivot]; 208 | for (I i = beg+1; i<=end; ++i) { 209 | if (prOut[i] < val_pivot) { 210 | prOut[pivot]=prOut[i]; 211 | prOut[i]=prOut[++pivot]; 212 | prOut[pivot]=val_pivot; 213 | } 214 | } 215 | } else { 216 | const T val_pivot=prOut[pivot]; 217 | for (I i = beg+1; i<=end; ++i) { 218 | if (prOut[i] > val_pivot) { 219 | prOut[pivot]=prOut[i]; 220 | prOut[i]=prOut[++pivot]; 221 | prOut[pivot]=val_pivot; 222 | } 223 | } 224 | } 225 | quick_sort(prOut,beg,pivot-1,incr); 226 | quick_sort(prOut,pivot+1,end,incr); 227 | } 228 | 229 | 230 | /// template version of the power function 231 | template <> 232 | inline double power(const double x, const double y) { 233 | return pow(x,y); 234 | }; 235 | template <> 236 | inline float power(const float x, const float y) { 237 | return powf(x,y); 238 | }; 239 | 240 | /// template version of the fabs function 241 | template <> 242 | inline double abs(const double x) { 243 | return fabs(x); 244 | }; 245 | template <> 246 | inline float abs(const float x) { 247 | return fabsf(x); 248 | }; 249 | 250 | /// template version of the fabs function 251 | template <> 252 | inline double sqr(const double x) { 253 | return sqrt(x); 254 | }; 255 | template <> 256 | inline float sqr(const float x) { 257 | return sqrtf(x); 258 | }; 259 | 260 | template <> 261 | inline double exp_alt(const double x) { 262 | return exp(x); 263 | }; 264 | template <> 265 | inline float exp_alt(const float x) { 266 | return expf(x); 267 | }; 268 | 269 | template <> 270 | inline double log_alt(const double x) { 271 | return log(x); 272 | }; 273 | template <> 274 | inline float log_alt(const float x) { 275 | return logf(x); 276 | }; 277 | 278 | 279 | template <> 280 | inline double sqr_alt(const double x) { 281 | return sqrt(x); 282 | }; 283 | template <> 284 | inline float sqr_alt(const float x) { 285 | return sqrtf(x); 286 | }; 287 | 288 | static inline int init_omp(const int numThreads) { 289 | int NUM_THREADS; 290 | #ifdef _OPENMP 291 | NUM_THREADS = (numThreads == -1) ? MIN(MAX_THREADS,omp_get_num_procs()) : numThreads; 292 | omp_set_nested(0); 293 | omp_set_dynamic(0); 294 | omp_set_num_threads(NUM_THREADS); 295 | #else 296 | NUM_THREADS = 1; 297 | #endif 298 | return NUM_THREADS; 299 | } 300 | 301 | template 302 | struct Triplet { 303 | T1 x; 304 | T2 z; 305 | T3 s; 306 | }; 307 | 308 | 309 | #endif 310 | -------------------------------------------------------------------------------- /third-party/miso_svm-1.0/miso.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION 3 | #include 4 | #include 5 | #include "cblas_alt_template.h" 6 | 7 | #include "linalg.h" 8 | 9 | #include "ctypes_utils.h" 10 | #include "svm.h" 11 | 12 | #include 13 | using namespace std; 14 | 15 | #define MAKE_INIT_NAME(x) init ## x (void) 16 | #define MODNAME_INIT(s) MAKE_INIT_NAME(s) 17 | 18 | #define STR_VALUE(arg) #arg 19 | #define FUNCTION_NAME(name) STR_VALUE(name) 20 | 21 | #define MODNAME_STR FUNCTION_NAME(MODNAME) 22 | 23 | /* 24 | Get the include directories within python using 25 | 26 | import distutils.sysconfig 27 | print distutils.sysconfig.get_python_inc() 28 | import numpy as np 29 | print np.get_include() 30 | 31 | gcc -fPIC -shared -g -Wall -O3 \ 32 | -I /usr/include/python2.7 -I /usr/lib64/python2.7/site-packages/numpy/core/include \ 33 | mymath.c -o mymath.so 34 | 35 | */ 36 | 37 | 38 | template 39 | bool all_finite(const T* const x, const int n) { 40 | bool finite(true); 41 | #pragma omp parallel for shared(finite) 42 | for (int i=0; i Xmat; 95 | Vector yvec; 96 | if (!npyToVector(y, yvec, "y")) return NULL; 97 | if (!npyToMatrix(X, Xmat, "X")) return NULL; 98 | if (max_iter <= 0) { 99 | max_iter = 1000 * Xmat.n(); 100 | if (verbose) 101 | cout << "Setting max_iter to 1000*n = " << max_iter << endl; 102 | } 103 | 104 | assert_py_obj(all_finite(Xmat.rawX(), Xmat.m()*Xmat.n()), "X contains inf or nan values!"); 105 | assert_py_obj(all_finite(yvec.rawX(), yvec.n()), "y contains inf or nan values!"); 106 | 107 | Vector* iter_count = new Vector(); 108 | Vector* primals = new Vector(); 109 | Vector* losses = new Vector(); 110 | 111 | const int num_classes = yvec.maxval()+1; 112 | Matrix* Wmat = new Matrix(Xmat.m(), num_classes); 113 | 114 | threads = set_omp_threads(threads); 115 | 116 | /* actual computation */ 117 | miso_svm_onevsrest(yvec, Xmat, *Wmat, *iter_count, *primals, *losses, lambda, eps, max_iter, accelerated, reweighted, non_uniform, verbose); 118 | 119 | PyObject* PyW = (PyObject*) wrapMatrix(Wmat); 120 | PyObject* PyIterCount = (PyObject*)wrapVector(iter_count); 121 | PyObject* PyPrimals = (PyObject*)wrapVector(primals); 122 | PyObject* PyLosses = (PyObject*)wrapVector(losses); 123 | 124 | return Py_BuildValue("OOOO", PyW, PyIterCount, PyPrimals, PyLosses); 125 | } 126 | 127 | 128 | template 129 | PyArrayObject* new_array(vector shape) { 130 | const int ndim = shape.size(); 131 | PyArrayObject* result = (PyArrayObject *) PyArray_SimpleNew(ndim, shape.data(), getTypeNumber()); 132 | return result; 133 | } 134 | 135 | 136 | static PyMethodDef method_list[] = { 137 | {"miso_one_vs_rest", (PyCFunction)pymiso_miso_one_vs_rest, METH_VARARGS | METH_KEYWORDS, "Train a linear SVM using the MISO algorithm."}, 138 | {NULL, NULL, 0, NULL} /* Sentinel */ 139 | }; 140 | 141 | static struct PyModuleDef misomodule = { 142 | PyModuleDef_HEAD_INIT, 143 | "_miso", /* name of module */ 144 | NULL, /* module documentation, may be NULL */ 145 | -1, /* size of per-interpreter state of the module, 146 | or -1 if the module keeps state in global variables. */ 147 | method_list, 148 | NULL//, NULL, NULL, NULL 149 | }; 150 | 151 | PyMODINIT_FUNC 152 | PyInit__miso(void) { 153 | 154 | PyObject* m; 155 | m = PyModule_Create(&misomodule); 156 | assert_py_obj(m!=NULL, "failed to create miso module object"); 157 | 158 | // initialize wrapper classes 159 | MatrixWrapperType.tp_new = PyType_GenericNew; 160 | VectorWrapperType.tp_new = PyType_GenericNew; 161 | MapWrapperType.tp_new = PyType_GenericNew; 162 | assert_py_obj(PyType_Ready(&MapWrapperType) >= 0, 163 | "Map wrapper type failed to initialize"); 164 | assert_py_obj(PyType_Ready(&MatrixWrapperType) >= 0, 165 | "Matrix wrapper type failed to initialize"); 166 | assert_py_obj(PyType_Ready(&VectorWrapperType) >= 0, 167 | "Vector wrapper type failed to initialize"); 168 | 169 | /* required, otherwise numpy functions do not work */ 170 | import_array(); 171 | 172 | Py_INCREF(&MatrixWrapperType); 173 | Py_INCREF(&MapWrapperType); 174 | Py_INCREF(&VectorWrapperType); 175 | PyModule_AddObject(m, "MyDealloc_Type_Mat", (PyObject *)&MatrixWrapperType); 176 | PyModule_AddObject(m, "MyDealloc_Type_Map", (PyObject *)&MapWrapperType); 177 | PyModule_AddObject(m, "MyDealloc_Type_Vec", (PyObject *)&VectorWrapperType); 178 | 179 | return m; 180 | } 181 | -------------------------------------------------------------------------------- /third-party/miso_svm-1.0/miso_svm/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from miso_svm.quick import quick 4 | from miso_svm.miso import MisoClassifier 5 | -------------------------------------------------------------------------------- /third-party/miso_svm-1.0/miso_svm/classification.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | """run Miso svm classifier on features""" 4 | 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | __author__ = "Daan Wynen, THOTH TEAM INRIA Grenoble Alpes" 10 | __copyright__ = "INRIA" 11 | __credits__ = ["Alberto Bietti", "Dexiong Chen", "Ghislain Durif", 12 | "Julien Mairal", "Daan Wynen"] 13 | __license__ = "GPL" 14 | __version__ = "1.0" 15 | __maintainer__ = "Ghislain Durif" 16 | __email__ = "ghislain.durif@inria.fr" 17 | __status__ = "Development" 18 | __date__ = "2017" 19 | 20 | 21 | import numpy as np 22 | 23 | import logging 24 | 25 | from miso_svm.miso import MisoClassifier 26 | 27 | from sklearn import model_selection, metrics 28 | from sklearn.metrics import confusion_matrix 29 | from sklearn.model_selection import StratifiedShuffleSplit 30 | from datetime import datetime 31 | 32 | EPS_NORM = 0.00001 33 | 34 | def run(features_tr, features_te, 35 | labels_tr, labels_te, 36 | out_file=None, threads=0, start_exp=-15, end_exp=15, 37 | add_iter=3, 38 | confusion_matrix=True, do_cv=False, 39 | verbose=True, seed=None): 40 | """run miso svm classification on features and labels 41 | 42 | Args: 43 | features_tr (np.array): Matrix of features (observations in rows) 44 | in training set. 45 | features_te (np.array): Matrix of features (observations in rows) 46 | in test set. 47 | labels_tr (np.array): Matrix of labels (observations in rows) 48 | in training set. 49 | labels_te (np.array): Matrix of labels (observations in rows) 50 | in test set. 51 | out_file (string): File to store final classifier in. Should end 52 | in '.npz' 53 | threads (int): Number of OpenMP threads to use, default is 0. 54 | start_exp (int): Parameter search starting point, default is -15. 55 | end_exp (int): Parameter search end point, default is 15. 56 | add_iter (int): How many iterations to continue before 57 | accepting current best iteration, default is 3. 58 | confusion_matrix (bool): should confusion matrix be ploted or not. 59 | do_cv (bool): If False, do not do cross validation. Instead, use 60 | test accuracy to select model, default is False. 61 | verbose (int): 0 or 1, indicates verbosity in C++ code, default is 0. 62 | seed (int): Random seed for the SVM, default is None. 63 | 64 | """ 65 | 66 | logging.info('Training feature map sparsity: {:5.2f}'.format(100*((features_tr==0).sum()/features_tr.size))) 67 | labels_tr = labels_tr.astype(np.float32).squeeze() 68 | logging.info('Test feature map sparsity: {:5.2f}'.format(100*((features_te==0).sum()/features_te.size))) 69 | labels_te = labels_te.astype(np.float32).squeeze() 70 | 71 | logging.info("Train shape: {}".format(features_tr.shape)) 72 | logging.info("Test shape: {}".format(features_te.shape)) 73 | 74 | 75 | logging.info('doing normalization') 76 | features_tr -= features_tr.mean(axis=1, keepdims=True) 77 | features_te -= features_te.mean(axis=1, keepdims=True) 78 | features_tr /= np.maximum(EPS_NORM, np.linalg.norm(features_tr, axis=1, keepdims=True)) 79 | features_te /= np.maximum(EPS_NORM, np.linalg.norm(features_te, axis=1, keepdims=True)) 80 | 81 | logging.info('shuffling training data') 82 | shuffle_in_unison_scary(features_tr, labels_tr) 83 | 84 | start_time = datetime.now() 85 | 86 | logging.info('\n\n') 87 | logging.info('==================== START CLASSIFICATION ===================\n') 88 | logging.info(' Starting time: {0}\n'.format(start_time)) 89 | logging.info('=============================================================\n') 90 | 91 | 92 | clf = cv_C_only(features_te, features_tr, labels_te, labels_tr, 93 | start_exp, end_exp, seed, 94 | add_iter, do_cv, verbose, threads) 95 | 96 | predictions_te = clf.predict(features_te) 97 | acc_te = clf.score(features_te, labels_te) 98 | logging.info("\n\n\tBest Acc_test: {:6.2f}%\n".format(100 * acc_te)) 99 | 100 | end_time = datetime.now() 101 | logging.info('============================================================\n') 102 | logging.info(' End time: {0}\n'.format(end_time)) 103 | logging.info(' Time taken: {0}\n'.format(end_time - start_time)) 104 | logging.info('============================================================\n') 105 | logging.info('saving classifier to {}'.format(out_file)) 106 | print("{:.2%}".format(acc_te)) # for scripts that expect the score in the last line 107 | 108 | if(out_file is not None): 109 | np.savez(out_file, clf=clf) 110 | 111 | if confusion_matrix: 112 | try: 113 | cm = confusion_matrix(labels_te, predictions_te) 114 | plot_confusion_matrix(cm, list(set(labels_tr))) 115 | plt.show(block=False) 116 | confmat_img_fname = 'confusion_matrix.png' 117 | plt.savefig(confmat_img_fname, bbox_inches='tight') 118 | except: 119 | pass 120 | 121 | 122 | # shuffles two arrays along the first axis, with the same permutation 123 | # taken from http://stackoverflow.com/q/4601373/393885 124 | def shuffle_in_unison_scary(a, b): 125 | rng_state = np.random.get_state() 126 | np.random.shuffle(a) 127 | np.random.set_state(rng_state) 128 | np.random.shuffle(b) 129 | 130 | 131 | def cv_C_only(features_te, features_tr, labels_te, labels_tr, start_exp, end_exp, seed, 132 | add_iter=3, do_cv=True, verbose=0, threads=0, **kwargs): 133 | # manual grid search {{{ 134 | 135 | best_score = -1 136 | best_acc_te = -1 137 | best_c = 0 138 | CV_FOLDS = 5 if do_cv else 1 139 | CV_TEST_PROPORTION = 0.2 140 | N = int(labels_tr.shape[0]*(1-CV_TEST_PROPORTION)) if do_cv else labels_tr.shape[0] 141 | np.random.seed(seed) 142 | splits = StratifiedShuffleSplit(CV_FOLDS, test_size=CV_TEST_PROPORTION)#, random_state=1) 143 | Ctab = np.arange(start_exp, end_exp) 144 | 145 | 146 | iter_since_best = 0 147 | start_time = datetime.now() 148 | last_time = start_time 149 | for exp in Ctab: 150 | Lambda = 1 / (2 * N * 2.0**exp) 151 | 152 | clf2 = MisoClassifier(Lambda=Lambda, 153 | max_iterations=1000*N, 154 | verbose=verbose, 155 | threads=threads, 156 | seed=seed) 157 | clf2.fit(features_tr, labels_tr) 158 | acc_tr = clf2.score(features_tr, labels_tr) 159 | acc_te = clf2.score(features_te, labels_te) 160 | 161 | if do_cv: 162 | clf = MisoClassifier(Lambda=Lambda, 163 | max_iterations=1000*N, 164 | verbose=verbose, 165 | threads=threads, 166 | seed=seed) 167 | cv_scores = model_selection.cross_val_score(clf, features_tr, labels_tr, cv=splits.split(features_tr, labels_tr), n_jobs=1) 168 | cv_i = np.mean(cv_scores) 169 | cv_i_std = np.std(cv_scores) 170 | now_time = datetime.now() 171 | logging.info("{:%H:%M:%S}\t{}\tLambda= {:<9.4e} cv={:6.2f}% (std={:6.2f})" 172 | .format(now_time, str(now_time-last_time).split('.')[0], Lambda, cv_i * 100, cv_i_std * 100)) 173 | else: 174 | now_time = datetime.now() 175 | logging.info("{:%H:%M:%S}\t{}\tLambda= {:<9.4e}\t" 176 | .format(now_time, str(now_time-last_time).split('.')[0], Lambda)) 177 | last_time = now_time 178 | 179 | logging.info("\tAcc_train: {:6.2f}% |".format(100 * acc_tr)) 180 | logging.info("\tAcc_test: {:6.2f}% |".format(100 * acc_te)) 181 | 182 | if do_cv: 183 | if cv_i > best_score: 184 | best_score = cv_i 185 | best_c = Lambda 186 | best_clf = clf2 187 | logging.info(' *') 188 | else: 189 | logging.info(' ') 190 | 191 | if acc_te > best_acc_te: 192 | best_acc_te = acc_te 193 | iter_since_best = 0 194 | # if we're not doing CV, use acc_te instead of CV score 195 | if not do_cv: 196 | best_score = acc_te 197 | best_c = Lambda 198 | best_clf = clf2 199 | logging.info(' +') 200 | else: 201 | iter_since_best += 1 202 | if iter_since_best >= add_iter: 203 | break 204 | logging.info(' ') 205 | 206 | # manual grid search }}} 207 | 208 | return best_clf 209 | 210 | 211 | def plot_confusion_matrix(cm, labels, title='Confusion matrix', cmap=plt.cm.Blues): 212 | plt.imshow(cm, interpolation='nearest', cmap=cmap) 213 | plt.title(title) 214 | plt.colorbar() 215 | tick_marks = np.arange(len(labels)) 216 | plt.xticks(tick_marks, labels, rotation=45) 217 | plt.yticks(tick_marks, labels) 218 | plt.tight_layout() 219 | plt.ylabel('True label') 220 | plt.xlabel('Predicted label') 221 | 222 | 223 | def clamp_values(arr, clampval = 800): 224 | """clamps array input in-place""" 225 | to_clamp = np.abs(arr) > clampval 226 | if (to_clamp).any(): 227 | logging.info('clamping {} values to +- {}'.format(to_clamp.sum(), clampval)) 228 | else: 229 | logging.info('no clamping necessary') 230 | np.clip(arr, -clampval, clampval, out=arr) 231 | 232 | 233 | def heal_nans(arr): 234 | """remove NaNs if any and replace them by the average of the column""" 235 | if np.isnan(arr).any(): 236 | logging.info('replacing {} NaN values in array.'.format(np.isnan(arr).sum())) 237 | else: 238 | logging.info('no NaNs in array') 239 | return 240 | 241 | for j in xrange(arr.shape[1]): 242 | nans = np.isnan(arr[:, j]) 243 | if not nans.any(): 244 | continue 245 | if nans.all(): 246 | arr[:, j] = 0 247 | continue 248 | arr[:, j][nans] = np.mean(arr[:, j][np.logical_not(nans)]) 249 | -------------------------------------------------------------------------------- /third-party/miso_svm-1.0/miso_svm/miso.py: -------------------------------------------------------------------------------- 1 | """Miso svm classifier""" 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | __author__ = "Daan Wynen, THOTH TEAM INRIA Grenoble Alpes" 8 | __copyright__ = "INRIA" 9 | __credits__ = ["Alberto Bietti", "Dexiong Chen", "Ghislain Durif", 10 | "Julien Mairal", "Daan Wynen"] 11 | __license__ = "GPL" 12 | __version__ = "1.0" 13 | __maintainer__ = "Ghislain Durif" 14 | __email__ = "ghislain.durif@inria.fr" 15 | __status__ = "Development" 16 | __date__ = "2017" 17 | 18 | import miso_svm._miso as cmiso 19 | import numpy as np 20 | from sklearn.base import BaseEstimator 21 | from sklearn.base import ClassifierMixin 22 | 23 | class MisoClassifier(BaseEstimator, ClassifierMixin): 24 | 25 | def __init__(self, 26 | Lambda=0.01, 27 | eps=1e-4, 28 | max_iterations=None, 29 | accelerated=True, 30 | threads=-1, 31 | verbose=0, 32 | seed=None): 33 | self.Lambda = Lambda 34 | self.eps = eps 35 | self.max_iterations = max_iterations 36 | self.accelerated = accelerated 37 | self.threads = threads 38 | self.verbose = verbose 39 | if seed is not None: 40 | self.seed = seed 41 | else: 42 | # set the seed, so that we can retrieve it later if needed 43 | self.seed = np.random.randint(np.iinfo(np.int32).min, np.iinfo(np.int32).max) 44 | 45 | def fit(self, X, y): 46 | assert X.shape[0] == y.shape[0] 47 | assert len(X.shape) == 2 48 | assert len(y.shape) == 1 49 | assert X.dtype == y.dtype 50 | assert X.dtype == np.float32 # TODO: might want to drop that later 51 | 52 | if self.max_iterations is None: 53 | self.max_iterations = 1000 * X.shape[0] 54 | self.W, self.iter_count, self.primals, self.losses =\ 55 | cmiso.miso_one_vs_rest(X, y, 56 | self.Lambda, self.max_iterations, 57 | eps=self.eps, 58 | accelerated=self.accelerated, 59 | threads=self.threads, 60 | verbose=self.verbose, 61 | seed=self.seed) 62 | self.W = self.W.astype('float32') 63 | self.iter_count = self.iter_count.astype('intc') 64 | self.primals = self.primals.astype('float32') 65 | self.losses = self.losses.astype('float32') 66 | 67 | def predict(self, X): 68 | activations = self.W.dot(X.T) 69 | predictions = np.argmax(activations, axis=0) 70 | return predictions 71 | 72 | 73 | def load_dataset(): 74 | ds = sklearn.datasets.load_digits() 75 | X = ds.data.astype('float32') 76 | X -= X.mean(axis=1, keepdims=True) 77 | X /= np.linalg.norm(X, axis=1, keepdims=True) 78 | Y = ds.target.astype('float32') 79 | return X, Y 80 | 81 | if __name__=='__main__': 82 | import sklearn.datasets 83 | from sklearn import svm, model_selection, metrics 84 | from sklearn.model_selection import StratifiedShuffleSplit 85 | from datetime import datetime 86 | cv_folds = 20 87 | 88 | X, Y = load_dataset() 89 | N = Y.size 90 | splits = StratifiedShuffleSplit(cv_folds, test_size=0.2)#, random_state=1) 91 | 92 | def test_clf(clf, name): 93 | start_time = datetime.now() 94 | scores = sklearn.model_selection.cross_val_score(clf, X, Y, cv=splits.split(X,Y)) 95 | time_taken = datetime.now() - start_time 96 | print('{}: {:6.2f}% in {:02}:{:07.4f} ({})' 97 | .format(name, 98 | 100*np.mean(scores), 99 | (time_taken.seconds//60)%60, 100 | (time_taken.seconds + time_taken.microseconds/1000000)%60, 101 | ', '.join(['{:6.2f}'.format(f) for f in scores]))) 102 | 103 | for L in np.float(2)**np.arange(2, -15, -1): 104 | print('Lambda = {}'.format(L)) 105 | clf_miso = MisoClassifier(verbose=0, Lambda=L, threads=1) 106 | clf_lsvc = svm.LinearSVC(loss='squared_hinge', C=1/(N * clf_miso.Lambda), fit_intercept=False) 107 | 108 | test_clf(clf_miso, 'MISO') 109 | test_clf(clf_lsvc, 'LSVC') 110 | -------------------------------------------------------------------------------- /third-party/miso_svm-1.0/miso_svm/quick.py: -------------------------------------------------------------------------------- 1 | """Quick caller for Miso svm classifier""" 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | __author__ = "Ghislain Durif, THOTH TEAM INRIA Grenoble Alpes" 8 | __copyright__ = "INRIA" 9 | __credits__ = ["Alberto Bietti", "Dexiong Chen", "Ghislain Durif", 10 | "Julien Mairal", "Daan Wynen"] 11 | __license__ = "GPL" 12 | __version__ = "1.0" 13 | __maintainer__ = "Ghislain Durif" 14 | __email__ = "ghislain.durif@inria.fr" 15 | __status__ = "Development" 16 | __date__ = "2017" 17 | 18 | import logging 19 | import numpy as np 20 | from timeit import default_timer as timer 21 | 22 | import miso_svm._miso as cmiso 23 | 24 | EPS_NORM = 0.00001 25 | 26 | def quick(features_tr, features_te, 27 | labels_tr, labels_te, 28 | eps=1e-4, threads=0, start_exp=-15, end_exp=15, 29 | add_iter=3, accelerated=True, 30 | verbose=True, seed=None): 31 | 32 | labels_tr = labels_tr.astype(np.float32).squeeze() 33 | labels_te = labels_te.astype(np.float32).squeeze() 34 | 35 | logging.info('doing normalization') 36 | features_tr -= features_tr.mean(axis=1, keepdims=True) 37 | features_te -= features_te.mean(axis=1, keepdims=True) 38 | features_tr /= np.maximum(EPS_NORM, np.linalg.norm(features_tr, axis=1, keepdims=True)) 39 | features_te /= np.maximum(EPS_NORM, np.linalg.norm(features_te, axis=1, keepdims=True)) 40 | 41 | Ctab = np.arange(start_exp, end_exp) 42 | N = labels_tr.shape[0] 43 | max_iterations=1000*N 44 | 45 | Lambdas = [] 46 | accuracys = [] 47 | 48 | best_acc = 0 49 | best_acc_i = -1 50 | 51 | for i,exp in enumerate(Ctab): 52 | if seed is None: 53 | seed = np.random.randint(np.iinfo(np.int32).min, np.iinfo(np.int32).max) 54 | 55 | Lambda = 1 / (2 * N * 2.0**exp) 56 | start = timer() 57 | W, iter_count, primals, losses =\ 58 | cmiso.miso_one_vs_rest(features_tr, labels_tr, 59 | Lambda, 60 | max_iterations, 61 | eps=eps, 62 | accelerated=accelerated, 63 | threads=threads, 64 | verbose=verbose, 65 | seed=seed) 66 | end = timer() 67 | 68 | activations = W.dot(features_te.T) 69 | predictions = np.argmax(activations, axis=0) 70 | 71 | Lambdas.append(Lambda) 72 | accuracy = 1 - (np.count_nonzero(predictions - labels_te) / labels_te.shape[0]) 73 | accuracys.append(accuracy) 74 | 75 | logging.info("Lambda = {} / acc = {:.4%} / training in {:.4f} sec" 76 | .format(Lambda, accuracy, end-start)) 77 | 78 | if accuracy > best_acc: 79 | best_acc = accuracy 80 | best_acc_i = i 81 | if i>=10 and best_acc_i <= i-4: 82 | break 83 | 84 | print("\n### Best accuracy = {:.4%} for Lambda = {}\n" 85 | .format(accuracys[best_acc_i], Lambdas[best_acc_i])) 86 | 87 | return accuracys[best_acc_i], Lambdas[best_acc_i] 88 | 89 | 90 | if __name__=='__main__': 91 | import sklearn.datasets 92 | import sys 93 | 94 | logging.basicConfig(stream=sys.stdout, 95 | format='%(levelname)s:%(message)s', 96 | level=logging.DEBUG) 97 | 98 | def load_dataset(): 99 | ds = sklearn.datasets.load_digits() 100 | X = ds.data.astype('float32') 101 | X -= X.mean(axis=1, keepdims=True) 102 | X /= np.linalg.norm(X, axis=1, keepdims=True) 103 | Y = ds.target.astype('float32') 104 | return X, Y 105 | 106 | X, Y = load_dataset() 107 | N = Y.size 108 | 109 | mask = np.random.choice([False, True], N, p=[0.8, 0.2]) 110 | 111 | Xtr = X[np.logical_not(mask),] 112 | Xte = X[mask,] 113 | 114 | Ytr = Y[np.logical_not(mask),] 115 | Yte = Y[mask,] 116 | 117 | start = timer() 118 | acc, lamb = quick(Xtr.reshape(Xtr.shape[0], -1), 119 | Xte.reshape(Xte.shape[0], -1), 120 | Ytr, Yte, 121 | eps=1e-4, threads=0, start_exp=-15, end_exp=15, 122 | add_iter=3, accelerated=True, 123 | verbose=False, seed=None) 124 | end = timer() 125 | logging.info("Training MISOS SVM in {:.4f} sec" 126 | .format(end - start)) 127 | -------------------------------------------------------------------------------- /third-party/miso_svm-1.0/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from distutils.core import setup, Extension 4 | from distutils.sysconfig import get_python_inc 5 | import distutils.util 6 | import numpy 7 | from numpy.distutils.system_info import blas_info 8 | 9 | # includes numpy : package numpy.distutils , numpy.get_include() 10 | # python setup.py build 11 | # python setup.py install --prefix=dist, 12 | incs = ['.'] + [numpy.get_include(), get_python_inc()] + blas_info().get_include_dirs() 13 | 14 | osname = distutils.util.get_platform() 15 | # cc_flags = ['-fPIC', '-fopenmp', '-Wunused-variable', '-m64'] 16 | cc_flags = ['-fPIC', '-Wall', '-fopenmp', '-std=c++11', '-lm', '-Wfatal-errors'] 17 | for _ in numpy.__config__.blas_opt_info.get("extra_compile_args", []): 18 | if _ not in cc_flags: 19 | cc_flags.append(_) 20 | for _ in numpy.__config__.lapack_opt_info.get("extra_compile_args", []): 21 | if _ not in cc_flags: 22 | cc_flags.append(_) 23 | 24 | link_flags = ['-fopenmp'] 25 | for _ in numpy.__config__.blas_opt_info.get("extra_link_args", []): 26 | if _ not in link_flags: 27 | link_flags.append(_) 28 | for _ in numpy.__config__.lapack_opt_info.get("extra_link_args", []): 29 | if _ not in link_flags: 30 | link_flags.append(_) 31 | 32 | libs = ['stdc++', 'mkl_rt', 'iomp5'] 33 | libdirs = numpy.distutils.system_info.blas_info().get_lib_dirs() 34 | 35 | miso = Extension( 36 | 'miso_svm._miso', 37 | sources = ['miso.cpp'], 38 | include_dirs = incs, 39 | extra_compile_args = ['-DINT_64BITS', '-DAXPBY', '-DHAVE_MKL'] + cc_flags, 40 | library_dirs = libdirs, 41 | libraries = libs, 42 | extra_link_args = link_flags, 43 | language = 'c++', 44 | depends = ['cblas_alt_template.h', 'cblas_defvar.h', 45 | 'common.h', 'ctypes_utils.h', 'linalg.h', 46 | 'list.h', 'misc.h', 'svm.h', 'utils.h'], 47 | ) 48 | 49 | 50 | setup ( name = 'miso_svm', 51 | version= '1.0', 52 | description='Python interface for MISO SVM classifier', 53 | author = 'Ghislain Durif', 54 | author_email = 'ckn.dev@inria.fr', 55 | url = None, 56 | license='GPLv3', 57 | ext_modules = [miso,], 58 | packages = ['miso_svm'], 59 | 60 | classifiers=[ 61 | # How mature is this project? Common values are 62 | # 3 - Alpha 63 | # 4 - Beta 64 | # 5 - Production/Stable 65 | 'Development Status :: 4 - Beta', 66 | 67 | # Indicate who your project is intended for 68 | 'Intended Audience :: Science/Research', 69 | 'Topic :: Scientific/Engineering :: Mathematics', 70 | 71 | # Pick your license as you wish (should match "license" above) 72 | 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', 73 | 74 | # Specify the Python versions you support here. In particular, ensure 75 | # that you indicate whether you support Python 2, Python 3 or both. 76 | 'Programming Language :: Python :: 3', 77 | 'Programming Language :: Python :: 3.2', 78 | 'Programming Language :: Python :: 3.3', 79 | 'Programming Language :: Python :: 3.4', 80 | 'Programming Language :: Python :: 3.5', 81 | 'Programming Language :: Python :: 3.6', 82 | ], 83 | 84 | # keywords='optimization', 85 | # install_requires=['numpy', 'scipy', 'scikit-learn'], 86 | ) 87 | -------------------------------------------------------------------------------- /third-party/miso_svm-1.0/svm.h: -------------------------------------------------------------------------------- 1 | #ifndef SVM_H 2 | #define SVM_H 3 | 4 | #include "linalg.h" 5 | 6 | template 7 | T normsq(const Vector& x, const Vector& y) { 8 | return x.nrm2sq()+y.nrm2sq()-2*y.dot(x); 9 | } 10 | 11 | template 12 | void miso_svm_multiclass_accelerated_aux(const Vector& y, const Matrix& X, Matrix& W, Matrix& alpha, Vector& C, const T lambda, const T kappa, const int max_iter, const int loss = 1) { 13 | /// assumes the right relation holds for W 14 | const int n = X.n(); 15 | const int nclasses = W.n(); 16 | Vector xi; 17 | Vector alphai; 18 | Vector diff_alpha; 19 | Vector beta; 20 | for (int ii=0; ii 52 | void miso_svm_multiclass_accelerated(const Vector& y, const Matrix& X, Matrix& W, const T lambda, const T eps, const int epochs, const T L, const int loss = 1) { 53 | Timer time1; 54 | Timer time2; 55 | const int n = X.n(); 56 | const int m = X.m(); 57 | const int nclasses = W.n(); 58 | Vector C(n); 59 | Matrix alpha(nclasses,n); 60 | Matrix Z(m,nclasses); 61 | Matrix diffZ(m,nclasses); 62 | Matrix diffW(m,nclasses); 63 | Z.setZeros(); 64 | diffZ.setZeros(); 65 | diffW.setZeros(); 66 | alpha.setZeros(); 67 | C.setZeros(); 68 | W.setZeros(); 69 | const T kappa = L/n - lambda; 70 | std::cout << "kappa: " << kappa << std::endl; 71 | const T q = lambda/(lambda+kappa); 72 | const T qp = T(0.9)*sqrt(q); 73 | const T alphak = sqrt(q); 74 | const T betak=(T(1.0)-alphak)/(T(1.0)+alphak); 75 | 76 | time2.start(); 77 | for (int ii=0; ii 0 && ii % (10) == 0) { 81 | Matrix tmp2; 82 | W.mult(X,tmp2,true,false); 83 | T los = 0; 84 | if (loss == 1) { 85 | for (int jj=0; jj 0) 90 | los += rjk*rjk; 91 | } 92 | } 93 | } 94 | los /= 2; 95 | } else if (loss==2) { 96 | Vector beta; 97 | for (int jj=0; jj 140 | void miso_svm_multiclass_aux(const Vector& y, const Matrix& X, Matrix& W, const T lambda, const T eps, const int epochs, const int loss = 1) { 141 | const int n = X.n(); 142 | const int nclasses = W.n(); 143 | Vector C(n); 144 | Matrix alpha(nclasses,n); 145 | alpha.setZeros(); 146 | C.setZeros(); 147 | W.setZeros(); 148 | 149 | Vector xi; 150 | Vector alphai; 151 | Vector diff_alpha; 152 | Vector beta; 153 | const int max_iter=n*epochs; 154 | for (int ii=0; ii 0 && ii % (10*n) == 0) { 157 | X.mult(alpha,W,false,true,-T(1.0)/(lambda*n)); // to improve numerical stability 158 | Matrix tmp2; 159 | W.mult(X,tmp2,true,false); 160 | T los = 0; 161 | if (loss == 1) { 162 | for (int jj=0; jj 0) 167 | los += rjk*rjk; 168 | } 169 | } 170 | } 171 | los /= 2; 172 | } else if (loss==2) { 173 | Vector beta; 174 | for (int jj=0; jj 220 | void miso_svm_multiclass(const Vector& y, const Matrix& X, Matrix& W, const T lambda, const T eps, const int max_iter, const bool accelerated = false, const int loss = 1) { 221 | const int n = y.n(); 222 | const int p = X.m(); 223 | const int nclasses=y.maxval()+1; 224 | W.resize(p,nclasses); 225 | Vector normX; 226 | X.norm_2sq_cols(normX); 227 | const T R = normX.mean(); 228 | //const T L = 4*normX.mean(); 229 | const T L = 2*(1+sqrt(nclasses))*normX.mean(); 230 | std::cout << "Value of R: " << R << std::endl; 231 | std::cout << "Value of L/mu: " << L/lambda << std::endl; 232 | std::cout << "Problem size: p x n: " << p << " " << n << std::endl; 233 | std::cout << "*********************" << std::endl; 234 | std::cout << "Processes Lambda " << lambda << std::endl; 235 | std::cout << "Eps " << eps << std::endl; 236 | std::cout << "Loss " << loss << std::endl; 237 | if (accelerated && n < L/lambda) { 238 | std::cout << "Accelerated algorithm" << std::endl; 239 | miso_svm_multiclass_accelerated(y,X,W,lambda,eps,max_iter,L,loss); 240 | } else { 241 | miso_svm_multiclass_aux(y,X,W,lambda,eps,max_iter,loss); 242 | } 243 | } 244 | 245 | template 246 | void mult(const Matrix& X, const Vector& ind, const Vector& alpha, Vector& w, const T a, const T b = 0.0) { 247 | w.resize(X.m()); 248 | w.scal(b); 249 | Vector col; 250 | for (int ii=0; ii 257 | void multTrans(const Matrix& X, const Vector& ind, const Vector& w, Vector& tmp) { 258 | tmp.resize(ind.n()); 259 | tmp.setZeros(); 260 | Vector col; 261 | for (int ii=0; ii 268 | void miso_svm_aux(const Vector& y, const Matrix& X, const Vector& indices, Vector& w, const T R, const T lambda, const T eps, const int max_iter, int& num_it,T& primal,T& loss, const int verbose=0) { 269 | const int n = y.n(); 270 | w.setZeros(); 271 | const T L = R+lambda; 272 | const T deltaT = n*MIN(T(1.0)/n,lambda/(2*L)); 273 | Vector xi; 274 | Vector alpha(n); 275 | alpha.setZeros(); 276 | Vector C(n); 277 | C.setZeros(); 278 | Vector tmp; 279 | T dualold=0; 280 | T dual=0; 281 | num_it=0; 282 | for (int ii = 0; ii 0 && (ii % (10*n)) == 0) { 284 | num_it+=10; 285 | if (indices.n() > 0) { 286 | mult(X,indices,alpha,w,T(1.0)/n); 287 | multTrans(X,indices,w,tmp); 288 | } else { 289 | X.mult(alpha,w,T(1.0)/n); // to improve numerical stability 290 | X.multTrans(w,tmp); 291 | } 292 | primal=0; 293 | for (int kk=0; kk 0) { 316 | X.refCol(indices[ind],xi); 317 | } else { 318 | X.refCol(ind,xi); 319 | } 320 | const T beta = yi*xi.dot(w); 321 | const T gamma=MAX(T(1.0)-beta,0); 322 | T newalpha; 323 | C[ind]=(T(1.0)-deltaT)*C[ind]+deltaT*(T(0.5)*gamma*gamma+beta*gamma); 324 | newalpha=(T(1.0)-deltaT)*(alpha[ind])+deltaT*yi*gamma/lambda; 325 | w.add(xi,(newalpha-alpha[ind])/n); 326 | alpha[ind]=newalpha; 327 | } 328 | }; 329 | 330 | template 331 | void miso_svm_onevsrest(const Vector& yAll, const Matrix& X, 332 | Matrix& W, Vector& info, Vector& primals, Vector& losses, 333 | const T lambda, const T eps, const int max_iter, 334 | const bool accelerated = false, const int reweighted = 0, const bool non_uniform=true, const int verbose=0) { 335 | const int n = yAll.n(); 336 | const int p = X.m(); 337 | const int nclasses=yAll.maxval()+1; 338 | 339 | info.resize(nclasses); 340 | primals.resize(nclasses); 341 | losses.resize(nclasses); 342 | W.resize(p,nclasses); 343 | 344 | Vector normX; 345 | X.norm_2sq_cols(normX); 346 | const T R = normX.maxval(); 347 | if (verbose) { 348 | if (reweighted) 349 | std::cout << "Reweighted algorithm" << std::endl; 350 | if (non_uniform) 351 | std::cout << "Non-uniform sampling" << std::endl; 352 | std::cout << "Value of R: " << R << std::endl; 353 | 354 | std::cout << "Problem size: p x n: " << p << " " << n << std::endl; 355 | std::cout << "*********************" << std::endl; 356 | std::cout << "Processes Lambda " << lambda << std::endl; 357 | std::cout << "Eps " << eps << std::endl; 358 | } 359 | int jj; 360 | #pragma omp parallel for private(jj) 361 | for (jj = 0; jj w; 363 | W.refCol(jj,w); 364 | int num_it; 365 | T primal; 366 | T loss; 367 | if (non_uniform) { 368 | Vector y(n); 369 | Vector ind; 370 | for (int ii = 0; ii((yAll[ii] - T(jj))) < T(0.1) ? T(1.0) : -T(1.0); 372 | if (accelerated && T(2.0)*normX.mean()/n > lambda) { 373 | nonu_accelerated_miso_svm_aux(y,X,w,normX,lambda,eps,max_iter,num_it,primal,loss, verbose); 374 | } else { 375 | nonu_miso_svm_aux(y,X,w,normX,lambda,eps,max_iter,num_it,primal,loss, verbose); 376 | } 377 | } else { 378 | if (reweighted) { 379 | int npos=0; 380 | for (int ii = 0; ii((yAll[ii] - T(jj))) < T(0.1)) npos++; 382 | const int beta= reweighted==1 ? nclasses-2 : static_cast(floor(sqrt(nclasses-2))); 383 | int nn = n + npos*(beta); 384 | Vector ind(nn); 385 | Vector y(nn); 386 | int counter=0; 387 | for (int ii = 0; ii((yAll[ii] - T(jj))) < T(0.1)) { 389 | for (int kk=0; kk lambda) { 399 | accelerated_miso_svm_aux(y,X,ind,w,R,lambda,eps,max_iter,num_it,primal,loss, verbose); 400 | } else { 401 | miso_svm_aux(y,X,ind,w,R,lambda,eps,max_iter,num_it,primal,loss, verbose); 402 | } 403 | } else { 404 | Vector y(n); 405 | Vector ind; 406 | for (int ii = 0; ii((yAll[ii] - T(jj))) < T(0.1) ? T(1.0) : -T(1.0); 408 | if (accelerated && T(2.0)*R/n > lambda) { 409 | accelerated_miso_svm_aux(y,X,ind,w,R,lambda,eps,max_iter,num_it,primal,loss, verbose); 410 | } else { 411 | miso_svm_aux(y,X,ind,w,R,lambda,eps,max_iter,num_it,primal,loss, verbose); 412 | } 413 | } 414 | } 415 | info[jj]=num_it; 416 | primals[jj]=primal; 417 | losses[jj]=loss; 418 | } 419 | if (verbose) { 420 | std::cout << "primal: " << primals.sum()/nclasses << std::endl; 421 | std::cout << "loss: " << losses.sum()/nclasses << std::endl; 422 | } 423 | } 424 | 425 | // template 426 | // void miso_svm(const Vector& y, const Matrix& X, Matrix& W, const Vector& tablambda, const T eps, const int max_iter) { 427 | // const int n = y.n(); 428 | // const int p = X.m(); 429 | // const int nlambda=tablambda.n(); 430 | // W.resize(p,nlambda); 431 | // W.setZeros(); 432 | // Vector normX; 433 | // X.norm_2sq_cols(normX); 434 | // const T R = normX.fmax(); 435 | // 436 | // std::cout << "Problem size: p x n: " << p << " " << n << std::endl; 437 | // for (int jj = 0; jj w; 442 | // W.refCol(jj,w); 443 | // miso_svm_aux(y,X,w,R,lambda,eps,max_iter,loss); 444 | // } 445 | // } 446 | 447 | template 448 | void accelerated_miso_svm_aux(const Vector& y, const Matrix& X, const Vector& indices, Vector& w, const T R, const T lambda, const T eps, const int max_iter, int& num_it, T& primal, T& loss, const int verbose) { 449 | const int n = y.n(); 450 | const int p = X.m(); 451 | w.setZeros(); 452 | Vector alpha(n); 453 | alpha.setZeros(); 454 | Vector C(n); 455 | C.setZeros(); 456 | Vector z(p); 457 | z.setZeros(); 458 | Vector zold(p); 459 | zold.setZeros(); 460 | Vector wold(p); 461 | wold.setZeros(); 462 | Vector xtw(n); 463 | xtw.setZeros(); 464 | Vector bestw(p); 465 | bestw.copy(w); 466 | T bestprimal=INFINITY; 467 | T bestloss=INFINITY; 468 | const T kappa = (T(2.0)*R/n-lambda); 469 | const T q = lambda/(lambda+kappa); 470 | const T qp = T(0.9)*sqrt(q); 471 | const T alphak = sqrt(q); 472 | const T betak=(T(1.0)-alphak)/(T(1.0)+alphak); 473 | T epsk=T(1.0); 474 | T gapk=T(1.0); 475 | T gap=T(1.0); 476 | int total_iters=0; 477 | int counter = 1; 478 | T gapold=T(1.0); 479 | for (int ii=0; ii= counter) { 484 | ++counter; 485 | w.copy(z); 486 | w.scal(kappa/(kappa+lambda)); 487 | if (indices.n() > 0) { 488 | mult(X,indices,alpha,w,lambda/(n*(kappa+lambda)),T(1.0)); 489 | } else { 490 | X.mult(alpha,w,lambda/(n*(kappa+lambda)),T(1.0)); 491 | } 492 | } else { 493 | w.add(z,kappa/(kappa+lambda)); 494 | w.add(zold,-kappa/(kappa+lambda)); 495 | } 496 | const T diffNorm = normsq(z,zold); 497 | gapk=(n*(gapk + T(0.5)*(kappa*kappa/(lambda+kappa))*diffNorm)); 498 | int num_iters; 499 | accelerated_miso_svm_aux2(y, X, indices, w, alpha, C, loss, gapk, num_iters, z, kappa, R, lambda, epsk); 500 | total_iters += num_iters; 501 | primal = loss+T(0.5)*lambda*w.nrm2sq(); 502 | if (primal < bestprimal) { 503 | bestw.copy(w); 504 | bestprimal=primal; 505 | bestloss=loss; 506 | } 507 | Vector ws; 508 | ws.copy(w); 509 | ws.scal((kappa+lambda)/lambda); 510 | ws.add(z,-kappa/lambda); 511 | const T dual=C.mean() - T(0.5)*lambda*ws.nrm2sq(); 512 | gap=primal-dual; 513 | if (gap <= eps || total_iters >= max_iter) { 514 | if (verbose) { 515 | #pragma omp critical 516 | { 517 | std::cout << "Iteration " << total_iters << ", inner it: " << ii << ", loss: " << loss << ", primal: " << primal << ", dual: " << dual << ", gap: " << (primal-dual) << std::endl; 518 | } 519 | } 520 | break; 521 | } 522 | gapold=gap; 523 | zold.copy(z); 524 | z.copy(w); 525 | z.scal(T(1.0)+betak); 526 | z.add(wold,-betak); 527 | } 528 | w.copy(bestw); 529 | num_it=total_iters/n; 530 | primal=bestprimal; 531 | loss=bestloss; 532 | }; 533 | 534 | 535 | // need to restart ! 536 | template 537 | void accelerated_miso_svm_aux2(const Vector& y, const Matrix& X, const Vector& indices, Vector& w, Vector& alpha, Vector& C, T& loss,T& gap, int& num_iters, const Vector& z, const T kappa, const T R, const T lambda, const T eps) { 538 | const int n = y.n(); 539 | const long long max_iter = MAX(static_cast(floor(log(double(eps)/double(gap))/log(double(1.0)-double(1.0)/n))),n); 540 | Vector tmp; 541 | Vector xi; 542 | for (int ii = 0; ii 0 && (ii % (n)) == 0) { 544 | loss=0; 545 | if (indices.n() > 0) { 546 | multTrans(X,indices,w,tmp); 547 | } else { 548 | X.multTrans(w,tmp); 549 | } 550 | for (int kk=0; kk 0) { 567 | X.refCol(indices[ind],xi); 568 | } else { 569 | X.refCol(ind,xi); 570 | } 571 | const T beta = yi*xi.dot(w); 572 | const T gamma=MAX(T(1.0)-beta,0); 573 | T newalpha; 574 | C[ind]=T(0.5)*gamma*gamma+beta*gamma; 575 | newalpha=yi*gamma/lambda; 576 | if (newalpha != alpha[ind]) 577 | w.add(xi,lambda*(newalpha-alpha[ind])/(n*(lambda+kappa))); 578 | alpha[ind]=newalpha; 579 | if (ii==max_iter-1) { 580 | num_iters=max_iter; 581 | loss=0; 582 | if (indices.n() > 0) { 583 | multTrans(X,indices,w,tmp); 584 | } else { 585 | X.multTrans(w,tmp); 586 | } 587 | for (int kk=0; kk 601 | int nonu_sampling(const Vector& sumpi) { 602 | const T val = static_cast(random())/RAND_MAX; 603 | const int n = sumpi.n(); 604 | if (sumpi[0] >= val) return 0; 605 | int m = 0; 606 | int M = n-1; 607 | while (M > m+1) { 608 | const int mid=(m+M)/2; 609 | if (sumpi[mid] >= val) { 610 | M=mid; 611 | } else { 612 | m=mid; 613 | } 614 | } 615 | return M; 616 | }; 617 | 618 | //nonu_accelerated_miso_svm_aux(y,X,w,normX,lambda,eps,max_iter,num_it,primal,loss); 619 | 620 | template 621 | void nonu_miso_svm_aux(const Vector& y, const Matrix& X, Vector& w, const Vector& Li, const T lambda, const T eps, const int max_iter, int& num_it,T& primal,T& loss, const int verbose) { 622 | const int n = y.n(); 623 | w.setZeros(); 624 | Vector deltaT(n); 625 | for (int ii=0; ii sumpi(n); 627 | for (int ii=0; ii xi; 632 | Vector alpha(n); 633 | alpha.setZeros(); 634 | Vector C(n); 635 | C.setZeros(); 636 | Vector tmp; 637 | T dualold=0; 638 | T dual=0; 639 | num_it=0; 640 | for (int ii = 0; ii 0 && (ii % (10*n)) == 0) { 642 | num_it+=10; 643 | X.mult(alpha,w,T(1.0)/n); // to improve numerical stability 644 | X.multTrans(w,tmp); 645 | primal=0; 646 | for (int kk=0; kk 680 | void nonu_accelerated_miso_svm_aux(const Vector& y, const Matrix& X, Vector& w, const Vector& Li, const T lambda, const T eps, const int max_iter, int& num_it, T& primal, T& loss, const int verbose) { 681 | const int n = y.n(); 682 | const int p = X.m(); 683 | 684 | w.setZeros(); 685 | Vector alpha(n); 686 | alpha.setZeros(); 687 | Vector C(n); 688 | C.setZeros(); 689 | Vector z(p); 690 | z.setZeros(); 691 | Vector zold(p); 692 | zold.setZeros(); 693 | Vector wold(p); 694 | wold.setZeros(); 695 | Vector bestw(p); 696 | bestw.copy(w); 697 | T bestprimal=INFINITY; 698 | T bestloss=INFINITY; 699 | const T kappa = (T(2.0)*Li.mean()/n-lambda); 700 | const T q = lambda/(lambda+kappa); 701 | const T qp = T(0.9)*sqrt(q); 702 | const T alphak = sqrt(q); 703 | const T betak=(T(1.0)-alphak)/(T(1.0)+alphak); 704 | Vector deltaT(n); 705 | for (int ii=0; ii sumpi(n); 707 | for (int ii=0; ii= counter) { 717 | ++counter; 718 | w.copy(z); 719 | w.scal(kappa/(kappa+lambda)); 720 | X.mult(alpha,w,lambda/(n*(kappa+lambda)),T(1.0)); 721 | } else { 722 | w.add(z,kappa/(kappa+lambda)); 723 | w.add(zold,-kappa/(kappa+lambda)); 724 | } 725 | int num_iters; 726 | nonu_accelerated_miso_svm_aux2(y, X, w, alpha, C, z, kappa, Li, lambda,n,deltaT,sumpi); 727 | total_iters += n; 728 | loss=0; 729 | Vector tmp; 730 | X.multTrans(w,tmp); 731 | for (int kk=0; kk ws; 743 | ws.copy(w); 744 | ws.scal((kappa+lambda)/lambda); 745 | ws.add(z,-kappa/lambda); 746 | const T dual=C.mean() - T(0.5)*lambda*ws.nrm2sq(); 747 | gap=primal-dual; 748 | if (gap <= eps || total_iters >= max_iter) { 749 | if (verbose) { 750 | #pragma omp critical 751 | { 752 | std::cout << "Iteration " << total_iters << ", inner it: " << ii << ", loss: " << loss << ", primal: " << primal << ", dual: " << dual << ", gap: " << (primal-dual) << std::endl; 753 | } 754 | } 755 | break; 756 | } 757 | zold.copy(z); 758 | z.copy(w); 759 | z.scal(T(1.0)+betak); 760 | z.add(wold,-betak); 761 | } 762 | w.copy(bestw); 763 | num_it=total_iters/n; 764 | primal=bestprimal; 765 | loss=bestloss; 766 | }; 767 | 768 | 769 | // need to restart ! 770 | template 771 | void nonu_accelerated_miso_svm_aux2(const Vector& y, const Matrix& X, Vector& w, Vector& alpha, Vector& C, const Vector& z, const T kappa, const Vector& Li, const T lambda, const int max_iter, const Vector& deltaT, const Vector& sumpi) { 772 | const int n = y.n(); 773 | Vector tmp; 774 | Vector xi; 775 | for (int ii = 0; ii 16 | #include 17 | #include 18 | #include 19 | 20 | #include 21 | 22 | #ifdef _OPENMP 23 | #include 24 | #endif 25 | 26 | // #ifndef MATLAB_MEX_FILE 27 | // typedef int mwSize; 28 | // #endif 29 | 30 | #ifndef MAX_THREADS 31 | #define MAX_THREADS 64 32 | #endif 33 | 34 | // MIN, MAX macros 35 | #define MIN(a,b) (((a) > (b)) ? (b) : (a)) 36 | #define MAX(a,b) (((a) > (b)) ? (a) : (b)) 37 | #define SIGN(a) (((a) < 0) ? -1.0 : 1.0) 38 | #define ABS(a) (((a) < 0) ? -(a) : (a)) 39 | // DEBUG macros 40 | #define PRINT_I(name) printf(#name " : %d\n",name); 41 | #define PRINT_F(name) printf(#name " : %g\n",name); 42 | #define PRINT_S(name) printf("%s\n",name); 43 | #define FLAG(a) printf("flag : %d \n",a); 44 | 45 | // ALGORITHM constants 46 | #define EPSILON 10e-10 47 | #ifndef INFINITY 48 | #define INFINITY 10e20 49 | #endif 50 | #define EPSILON_OMEGA 0.001 51 | #define TOL_CGRAD 10e-6 52 | #define MAX_ITER_CGRAD 40 53 | 54 | 55 | #if defined(_MSC_VER) || defined(_WIN32) || defined(WINDOWS) 56 | 57 | #include 58 | #include 59 | #define random rand 60 | #define srandom srand 61 | 62 | #if defined(_MSC_VER) || defined(_MSC_EXTENSIONS) 63 | #define DELTA_EPOCH_IN_MICROSECS 11644473600000000Ui64 64 | #else 65 | #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL 66 | #endif 67 | 68 | 69 | struct spams_timezone 70 | { 71 | int tz_minuteswest; /* minutes W of Greenwich */ 72 | int tz_dsttime; /* type of dst correction */ 73 | }; 74 | 75 | int gettimeofday(struct timeval *tv, struct spams_timezone *tz) 76 | { 77 | FILETIME ft; 78 | unsigned __int64 tmpres = 0; 79 | static int tzflag = 0; 80 | 81 | if (NULL != tv) 82 | { 83 | GetSystemTimeAsFileTime(&ft); 84 | 85 | tmpres |= ft.dwHighDateTime; 86 | tmpres <<= 32; 87 | tmpres |= ft.dwLowDateTime; 88 | 89 | tmpres /= 10; /*convert into microseconds*/ 90 | /*converting file time to unix epoch*/ 91 | tmpres -= DELTA_EPOCH_IN_MICROSECS; 92 | tv->tv_sec = (long)(tmpres / 1000000UL); 93 | tv->tv_usec = (long)(tmpres % 1000000UL); 94 | } 95 | 96 | if (NULL != tz) 97 | { 98 | if (!tzflag) 99 | { 100 | _tzset(); 101 | tzflag++; 102 | } 103 | tz->tz_minuteswest = _timezone / 60; 104 | tz->tz_dsttime = _daylight; 105 | } 106 | 107 | return 0; 108 | } 109 | 110 | #else 111 | #include 112 | #endif 113 | 114 | 115 | #include "linalg.h" 116 | 117 | using namespace std; 118 | 119 | /// Class Timer 120 | class Timer { 121 | public: 122 | /// Empty constructor 123 | Timer(); 124 | /// Destructor 125 | ~Timer(); 126 | 127 | /// start the time 128 | void inline start() { 129 | _running=true; 130 | gettimeofday(_time1,NULL); }; 131 | /// stop the time 132 | void inline stop() { 133 | gettimeofday(_time2,NULL); 134 | _running=false; 135 | _cumul+= static_cast((_time2->tv_sec - (_time1->tv_sec))*1000000 + _time2->tv_usec-_time1->tv_usec)/1000000.0; 136 | }; 137 | /// reset the timer 138 | void inline reset() { _cumul=0; 139 | gettimeofday(_time1,NULL); }; 140 | /// print the elapsed time 141 | void inline printElapsed(); 142 | /// print the elapsed time 143 | double inline getElapsed() const; 144 | 145 | private: 146 | struct timeval* _time1; 147 | struct timeval* _time2; 148 | bool _running; 149 | double _cumul; 150 | }; 151 | 152 | /// Constructor 153 | Timer::Timer() :_running(false) ,_cumul(0) { 154 | _time1 = (struct timeval*)malloc(sizeof(struct timeval)); 155 | _time2 = (struct timeval*)malloc(sizeof(struct timeval)); 156 | }; 157 | 158 | /// Destructor 159 | Timer::~Timer() { 160 | free(_time1); 161 | free(_time2); 162 | } 163 | 164 | /// print the elapsed time 165 | inline void Timer::printElapsed() { 166 | if (_running) { 167 | gettimeofday(_time2,NULL); 168 | std::cout << "Time elapsed : " << _cumul + static_cast((_time2->tv_sec - 169 | _time1->tv_sec)*1000000 + _time2->tv_usec-_time1->tv_usec)/1000000.0 << std::endl; 170 | } else { 171 | std::cout << "Time elapsed : " << _cumul << std::endl; 172 | } 173 | }; 174 | 175 | /// print the elapsed time 176 | double inline Timer::getElapsed() const { 177 | if (_running) { 178 | gettimeofday(_time2,NULL); 179 | return _cumul + 180 | static_cast((_time2->tv_sec - 181 | _time1->tv_sec)*1000000 + _time2->tv_usec- 182 | _time1->tv_usec)/1000000.0; 183 | } else { 184 | return _cumul; 185 | } 186 | } 187 | 188 | 189 | #endif 190 | --------------------------------------------------------------------------------