├── .gitignore ├── .gitignore~ ├── .idea ├── depth.iml ├── misc.xml ├── modules.xml ├── vcs.xml └── workspace.xml ├── Depth Map Prediction from a Single Image using a Multi-Scale Deep Network学习笔记.doc ├── Depth Map Prediction from a Single Image.pdf ├── LICENSE ├── README ├── common ├── __init__.py ├── __init__.pyc ├── configuration.py ├── configuration.pyc ├── imgutil.py ├── imgutil.pyc ├── logutil.py ├── logutil.pyc └── strhist.py ├── demo_nyud_depth_prediction.png ├── demo_nyud_rgb.jpg ├── demo_nyud_rgb1.jpg ├── models ├── .depth.conf.swp ├── .depth.py.swp ├── depth.conf ├── depth.py └── depth.pyc ├── net.py ├── net.pyc ├── pooling.py ├── pooling.pyc ├── test.py ├── theano_test_value_size.patch ├── thutil.py └── thutil.pyc /.gitignore: -------------------------------------------------------------------------------- 1 | weights/ 2 | -------------------------------------------------------------------------------- /.gitignore~: -------------------------------------------------------------------------------- 1 | data/ 2 | caffemodel/ 3 | h5py/ 4 | data2/ 5 | project1_landmark_dectection/train_model/ 6 | -------------------------------------------------------------------------------- /.idea/depth.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 14 | 15 | 16 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 82 | 83 | 84 | 90 | 91 | 92 | 93 | 94 | 95 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 130 | 131 | 132 | 133 | 136 | 137 | 140 | 141 | 142 | 143 | 146 | 147 | 150 | 151 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 175 | 176 | 177 | 178 | 194 | 195 | 211 | 212 | 228 | 229 | 245 | 246 | 264 | 265 | 283 | 284 | 304 | 305 | 326 | 327 | 350 | 351 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 1446727746014 389 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 431 | 434 | 435 | 436 | 438 | 439 | 440 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | 520 | 521 | 522 | 523 | 528 | 529 | 530 | 531 | 532 | 533 | -------------------------------------------------------------------------------- /Depth Map Prediction from a Single Image using a Multi-Scale Deep Network学习笔记.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hjimce/Depth-Map-Prediction/fea99a9b52648820c6c8dd0374b9b06117a5124b/Depth Map Prediction from a Single Image using a Multi-Scale Deep Network学习笔记.doc -------------------------------------------------------------------------------- /Depth Map Prediction from a Single Image.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hjimce/Depth-Map-Prediction/fea99a9b52648820c6c8dd0374b9b06117a5124b/Depth Map Prediction from a Single Image.pdf -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | ========================================================================= 2 | Depth Map Prediction from a Single Image using a Multi-Scale Deep Network 3 | ========================================================================= 4 | 5 | Authors: David Eigen, Christian Puhrsch and Rob Fergus 6 | 7 | Email: deigen@cs.nyu.edu, cpuhrsch@nyu.edu, fergus@cs.nyu.edu 8 | 9 | 10 | Requirements 11 | ------------- 12 | 13 | * theano 14 | * numpy, scipy 15 | * PIL or Pillow 16 | 17 | 18 | Running the Demo 19 | ----------------- 20 | 21 | The demo loads the depth prediction network, compiles a theano function for 22 | inference, and infers depth for a single image. To run: 23 | 24 | > THEANO_FLAGS=device=gpu0 python demo_depth.py 25 | 26 | This should create a file called "demo_nyud_depth_prediction.png" with the 27 | predicted depth for the input "demo_nyud_rgb.jpg". (Substitute the gpu you 28 | want to run on for gpu0). 29 | 30 | 31 | 32 | Other Information 33 | ------------------ 34 | 35 | This tree contains code for depth prediction network inference. While there is 36 | some code relating to training, much of the training code including most data 37 | processing is not provided here. We may release this in the future, however. 38 | 39 | While developing this project, we made a few modifications in theano not 40 | currently part of the main codeline. While the above instructions should work 41 | for inference on a current unmodified theano build, it may take up more GPU 42 | memory than needed due to use of test values for shape information. The git 43 | patch file "theano_test_value_size.patch" is also included and might be used to 44 | enable this feature on your own tree. 45 | -------------------------------------------------------------------------------- /common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hjimce/Depth-Map-Prediction/fea99a9b52648820c6c8dd0374b9b06117a5124b/common/__init__.py -------------------------------------------------------------------------------- /common/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hjimce/Depth-Map-Prediction/fea99a9b52648820c6c8dd0374b9b06117a5124b/common/__init__.pyc -------------------------------------------------------------------------------- /common/configuration.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | ''' 3 | Copyright (C) 2014 New York University 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | ''' 18 | import importlib 19 | 20 | from ConfigParser import SafeConfigParser, NoOptionError, NoSectionError 21 | 22 | def read_config(fn): 23 | conf = _ConfigParser() 24 | conf.read(fn) 25 | conf.set_eval_environ(section='config') 26 | return conf 27 | 28 | _ERROR = object() 29 | 30 | class _ConfigParser(SafeConfigParser): 31 | def __init__(self): 32 | SafeConfigParser.__init__(self) 33 | self.eval_globals = None 34 | self.eval_locals = None 35 | 36 | def get_section(self, section): 37 | return _ConfigSection(self, section) 38 | 39 | def set_eval_environ(self, section=None, globals=None, locals=None): 40 | self.eval_globals = globals or {} 41 | self.eval_locals = locals 42 | self.eval_globals.update(self._read_eval_env(section)) 43 | 44 | def _read_eval_env(self, section): 45 | if not section or not self.has_section(section): 46 | return {} 47 | mods = self.get(section, 'imports', '') 48 | eval_env = {} 49 | for modstr in mods.split(','): 50 | if ' as ' in modstr: 51 | (mod, name) = modstr.split(' as ') 52 | else: 53 | mod = name = modstr 54 | eval_env[name.strip()] = importlib.import_module(mod.strip()) 55 | return eval_env 56 | 57 | def get_eval_environ(self, globals, locals): 58 | if globals is None: 59 | globals = self.eval_globals 60 | if locals is None: 61 | locals = self.eval_locals 62 | return (globals, locals) 63 | 64 | def geteval(self, section, option, 65 | default=_ERROR, globals=None, locals=None): 66 | (globals, locals) = self.get_eval_environ(globals, locals) 67 | if isinstance(section, (tuple, list)): 68 | for sec in section: 69 | try: 70 | return self.geteval(sec, option, _ERROR, globals, locals) 71 | except (NoOptionError, NoSectionError), ex: 72 | pass 73 | if default is not _ERROR: 74 | return default 75 | raise ex 76 | try: 77 | return eval(self.get(section, option), globals, locals) 78 | except NoOptionError: 79 | if default is not _ERROR: 80 | return default 81 | raise 82 | 83 | def __get(self, section, option, default, getf): 84 | if isinstance(section, (tuple, list)): 85 | for sec in section: 86 | try: 87 | return self.__get(sec, option, _ERROR, getf) 88 | except (NoOptionError, NoSectionError), ex: 89 | pass 90 | if default is not _ERROR: 91 | return default 92 | raise ex 93 | try: 94 | return getf(self, section, option) 95 | except NoOptionError: 96 | if default is not _ERROR: 97 | return default 98 | raise 99 | 100 | def get(self, section, option, default=_ERROR): 101 | return self.__get(section, option, default, SafeConfigParser.get) 102 | 103 | def getint(self, section, option, default=_ERROR): 104 | return self.__get(section, option, default, SafeConfigParser.getint) 105 | 106 | def getfloat(self, section, option, default=_ERROR): 107 | return self.__get(section, option, default, SafeConfigParser.getfloat) 108 | 109 | def getboolean(self, section, option, default=_ERROR): 110 | return self.__get(section, option, default, SafeConfigParser.getboolean) 111 | 112 | class _ConfigSection(object): 113 | def __init__(self, conf, section): 114 | self.conf = conf 115 | self.parent = conf 116 | self.section = section 117 | self.eval_globals = None 118 | self.eval_locals = None 119 | 120 | def set_eval_environ(self, section=None, globals=None, locals=None): 121 | self.eval_globals = globals or {} 122 | self.eval_locals = locals 123 | self.eval_globals.update(self.conf._read_eval_env(section)) 124 | 125 | def get_eval_environ(self, globals, locals): 126 | if globals is None: 127 | globals = self.eval_globals 128 | if globals is None: 129 | globals = self.conf.eval_globals 130 | if locals is None: 131 | locals = self.eval_locals 132 | if locals is None: 133 | locals = self.conf.eval_locals 134 | return (globals, locals) 135 | 136 | def geteval(self, option, default=_ERROR, globals=None, locals=None): 137 | (globals, locals) = self.get_eval_environ(globals, locals) 138 | try: 139 | return eval(self.get(option), globals, locals) 140 | except NoOptionError: 141 | if default is not _ERROR: 142 | return default 143 | raise 144 | 145 | def __getattr__(self, option): 146 | val = self.conf.get(self.section, option) 147 | 148 | def has_option(self, *args): 149 | return self.conf.has_option(self.section, *args) 150 | 151 | def get(self, option, default=_ERROR): 152 | return self.conf.get(self.section, option, default) 153 | 154 | def getint(self, option, default=_ERROR): 155 | return self.conf.getint(self.section, option, default) 156 | 157 | def getfloat(self, option, default=_ERROR): 158 | return self.conf.getfloat(self.section, option, default) 159 | 160 | def getboolean(self, option, default=_ERROR): 161 | return self.conf.getboolean(self.section, option, default) 162 | 163 | def items(self, *args): 164 | return self.conf.items(self.section, *args) 165 | 166 | def set(self, *args): 167 | return self.conf.set(self.section, *args) 168 | 169 | def remove_option(self, *args): 170 | return self.conf.remove_option(self.section, *args) 171 | -------------------------------------------------------------------------------- /common/configuration.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hjimce/Depth-Map-Prediction/fea99a9b52648820c6c8dd0374b9b06117a5124b/common/configuration.pyc -------------------------------------------------------------------------------- /common/imgutil.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Misc image and filter manipulation utilities. 3 | 4 | Author: deigen 5 | 6 | ''' 7 | ''' 8 | Copyright (C) 2014 New York University 9 | 10 | This program is free software: you can redistribute it and/or modify 11 | it under the terms of the GNU General Public License as published by 12 | the Free Software Foundation, either version 3 of the License, or 13 | (at your option) any later version. 14 | 15 | This program is distributed in the hope that it will be useful, 16 | but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | GNU General Public License for more details. 19 | 20 | You should have received a copy of the GNU General Public License 21 | along with this program. If not, see . 22 | ''' 23 | 24 | import numpy as np 25 | 26 | def rot180(x): 27 | '''180 degree matrix rotation for a 2D matrix''' 28 | return x[::-1, ::-1] 29 | 30 | def scale_values(x, min=None, max=None, center=None): 31 | '''Scales values of x so min->0 and max->1. 32 | By default uses min(x) and max(x). If min or max is supplied, 33 | clamps x first. 34 | If center is supplied, instead scales values so that center->0.5, and 35 | [min, max] fit within [0,1] (i.e. scales by max difference from center) 36 | ''' 37 | min = min if min is not None else np.min(x.flat) 38 | max = max if max is not None else np.max(x.flat) 39 | if center is None: 40 | x = np.maximum(np.minimum(x, max), min) 41 | return (x - min) / (max - min) 42 | else: 43 | x = (x - center)/np.maximum(np.abs(min - center), np.abs(max - center)) 44 | return 0.5 * (x + 1) 45 | 46 | def boxslice((i0, j0), (i1, j1)): 47 | '''Given top-left and bottom-right corners, returns array index slices for 48 | the box formed by these two points. 49 | ''' 50 | return (slice(i0, i1), slice(j0, j1)) 51 | 52 | def filter_truncate(i, j, xshape, yshape): 53 | '''Given (i,j) center of filter y placed in x, and shapes (ilen, jlen) of 54 | image x and filter y, returns slices for x and y s.t. y gets truncated 55 | at x's boundary. Example: 56 | (xbox, ybox) = filter_truncate(i, j, recons.shape, filter.shape) 57 | recons[xbox] += k * filter[ybox] 58 | ''' 59 | (xi, xj) = xshape 60 | (yi, yj) = yshape 61 | 62 | xi0 = i - yi//2 63 | xi1 = i + yi//2 + (int(yi) % 2) 64 | xj0 = j - yj//2 65 | xj1 = j + yj//2 + (int(yi) % 2) 66 | yi0 = 0 67 | yi1 = yi 68 | yj0 = 0 69 | yj1 = yj 70 | 71 | if xi0 < 0: 72 | yi0 -= xi0 73 | xi0 = 0 74 | if xi1 > xi: 75 | yi1 -= (xi1 - xi) 76 | xi1 = xi 77 | if xj0 < 0: 78 | yj0 -= xj0 79 | xj0 = 0 80 | if xj1 > xj: 81 | yj1 -= (xj1 - xj) 82 | xj1 = xj 83 | 84 | return (boxslice((xi0, xj0), (xi1, xj1)), 85 | boxslice((yi0, yj0), (yi1, yj1))) 86 | 87 | def montage(imgs, layout=None, fill=0, border=0): 88 | '''Tiles given images together in a single montage image. 89 | imgs is an iterable of (h, w) or (h, w, c) arrays. 90 | ''' 91 | sz = imgs[0].shape 92 | assert all([sz == x.shape for x in imgs]) 93 | if len(sz) == 3: 94 | (h, w, c) = sz 95 | elif len(sz) == 2: 96 | (h, w) = sz 97 | c = 1 98 | else: 99 | raise ValueError('images must be 2 or 3 dimensional') 100 | 101 | bw = bh = 0 102 | if border: 103 | try: 104 | (bh, bw) = border 105 | except TypeError: 106 | bh = bw = int(border) 107 | nimgs = len(imgs) 108 | 109 | if layout is None: 110 | (ncols, nrows) = (None, None) 111 | else: 112 | (nrows, ncols) = layout 113 | 114 | if not (nrows and nrows > 0) and not (ncols and ncols > 0): 115 | if w >= h: 116 | ncols = np.ceil(np.sqrt(nimgs * h / float(w))) 117 | nrows = np.ceil(nimgs / float(ncols)) 118 | else: 119 | nrows = np.ceil(np.sqrt(nimgs * w / float(h))) 120 | ncols = np.ceil(nimgs / float(nrows)) 121 | elif not (nrows and nrows > 0): 122 | nrows = np.ceil(nimgs / float(ncols)) 123 | elif not (ncols and ncols > 0): 124 | ncols = np.ceil(nimgs / float(nrows)) 125 | 126 | mw = w * ncols + bw * (ncols-1) 127 | mh = h * nrows + bh * (nrows-1) 128 | assert mh * mw >= w*h*nimgs, 'layout not big enough to for images' 129 | M = np.zeros((mh, mw, c)) 130 | M += fill 131 | i = 0 132 | j = 0 133 | for img in imgs: 134 | M[i:i+h, j:j+w, :] = img.reshape((h, w, c)) 135 | j += w + bw 136 | if j >= mw: 137 | i += h + bh 138 | j = 0 139 | if len(sz) == 1: 140 | M = M.reshape((mh, mw)) 141 | return M 142 | 143 | def colormap(x, m=None, M=None, center=0, colors=None): 144 | '''color a grayscale array (currently red/blue by sign)''' 145 | if center is None: 146 | center = 0 147 | if colors is None: 148 | colors = np.array(((0, 0.7, 1), 149 | (0, 0, 0), 150 | (1, 0, 0)), 151 | dtype=float) 152 | if x.shape[-1] == 1: 153 | x = x[..., 0] 154 | x = scale_values(x, min=m, max=M, center=center) 155 | y = np.empty(x.shape + (3,)) 156 | for c in xrange(3): 157 | y[..., c] = np.interp(x, (0, 0.5, 1), colors[:, c]) 158 | return y 159 | 160 | def chan_to_pix(x, nchan=3, imsize=(1,1)): 161 | return (x.reshape((-1, nchan,) + imsize) 162 | .transpose((0,2,3,1)) 163 | .reshape((-1, nchan))) 164 | 165 | def pix_to_chan(x, nchan=3, imsize=(1,1)): 166 | return (x.reshape((-1,) + imsize + (nchan,)) 167 | .transpose((0,3,1,2)) 168 | .reshape((-1, nchan*imsize[0]*imsize[1]))) 169 | 170 | def bcxy_from_bxyc(im): 171 | return im.transpose((0,3,1,2)) 172 | 173 | def bxyc_from_bcxy(im): 174 | return im.transpose((0,2,3,1)) 175 | 176 | def bxyc_from_cxyb(im): 177 | return im.transpose((3,1,2,0)) 178 | 179 | def cxyb_from_bxyc(im): 180 | return im.transpose((3,1,2,0)) 181 | 182 | def filter_montage(imgs, m=None, M=None, center=None): 183 | (nf, nc) = imgs.shape[:2] 184 | 185 | if nc == 1: 186 | return montage( 187 | colormap(bxyc_from_bcxy(imgs), m, M, center), 188 | border=1, 189 | fill=0.2) 190 | elif nc == 3: 191 | return image_montage(imgs, m, M, center) 192 | else: 193 | imgs = imgs.reshape((nf*nc, 1,) + imgs.shape[2:]) 194 | return montage( 195 | colormap(bxyc_from_bcxy(imgs), m, M, center), 196 | layout=(nf, nc), 197 | border=1, 198 | fill=0.2) 199 | 200 | def image_montage(imgs, m=None, M=None, center=None): 201 | imgs = bxyc_from_bcxy(imgs) 202 | return montage( 203 | scale_values(imgs, m, M, center), 204 | border=1) 205 | 206 | def acts_montage(acts, scale=True, nimgs=16, m=None, M=None): 207 | if nimgs: 208 | acts = acts[:nimgs] 209 | if len(acts.shape) == 2: 210 | acts = acts[:, :, np.newaxis, np.newaxis] 211 | if scale: 212 | inner_fill = 0.2 213 | outer_fill = 1.0 214 | else: 215 | inner_fill = np.min(acts) + 0.2 * (np.max(acts) - np.min(acts)) 216 | outer_fill = np.max(acts) 217 | return montage([montage( 218 | (scale_values(x, min=m, max=M) 219 | if scale 220 | else x), 221 | border=1, 222 | fill=inner_fill) 223 | for x in acts], 224 | border=3, 225 | fill=outer_fill) 226 | 227 | -------------------------------------------------------------------------------- /common/imgutil.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hjimce/Depth-Map-Prediction/fea99a9b52648820c6c8dd0374b9b06117a5124b/common/imgutil.pyc -------------------------------------------------------------------------------- /common/logutil.py: -------------------------------------------------------------------------------- 1 | ''' 2 | logutil.py 3 | 4 | utilities for logging, tracking experiment runs 5 | ''' 6 | ''' 7 | Copyright (C) 2014 New York University 8 | 9 | This program is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | This program is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | along with this program. If not, see . 21 | ''' 22 | 23 | import os 24 | import time 25 | import logging 26 | import subprocess 27 | import shutil 28 | import numpy as np 29 | from PIL import Image 30 | 31 | from __builtin__ import open as _open 32 | 33 | try: 34 | from matplotlib import pyplot 35 | _have_plot = True 36 | except ImportError: 37 | _have_plot = False 38 | 39 | try: 40 | import IPython 41 | _ipython_app = IPython.Application.instance() 42 | _ipython_logger = _ipython_app.shell.logger 43 | except (ImportError, AttributeError): 44 | _ipython_app = None 45 | _ipython_logger = None 46 | 47 | class _Config(object): 48 | log_file = True 49 | log_console = True 50 | output_dir = None 51 | ipython_logfname = None 52 | 53 | _config = _Config() 54 | 55 | _log = logging.getLogger() 56 | _log.setLevel(logging.INFO) 57 | 58 | def _setup_logs(): 59 | # setup python logger 60 | handlers = list(_log.handlers) 61 | fmt = logging.Formatter('%(asctime)s - %(levelname)s : %(message)s') 62 | for h in handlers: 63 | _log.removeHandler(h) 64 | if _config.log_console: 65 | h = logging.StreamHandler() 66 | h.setFormatter(fmt) 67 | _log.addHandler(h) 68 | if _config.log_file and _config.output_dir: 69 | h = logging.FileHandler(filename('log')) 70 | h.setFormatter(fmt) 71 | _log.addHandler(h) 72 | 73 | # setup ipython session history 74 | iplogger = _ipython_logger 75 | if iplogger: 76 | if iplogger.log_active and \ 77 | iplogger.logfname != _config.ipython_logfname: 78 | # user turned on logging to their own file 79 | _config.ipython_logfname = None 80 | else: 81 | if iplogger.log_active: 82 | iplogger.logstop() 83 | if _config.output_dir: 84 | _config.ipython_logfname = filename('ipython_log.py') 85 | iplogger.logstart(_config.ipython_logfname, 86 | log_output=True, 87 | timestamp=True) 88 | 89 | _setup_logs() 90 | 91 | class Subdir(object): 92 | ''' 93 | Atomically swappable/recoverable subdirectory 94 | ''' 95 | def __init__(self, name): 96 | self.name = name 97 | self.current = name 98 | self.next = self.current + '.next' 99 | self.recover() 100 | 101 | def create_next(self): 102 | try: 103 | os.mkdir(filename(self.next)) 104 | except OSError, ex: 105 | if ex.errno != os.errno.EEXIST: 106 | raise 107 | 108 | def swap(self): 109 | curr = filename(self.current) 110 | next = curr + '.next' 111 | prev = curr + '.prev' 112 | 113 | if os.path.exists(prev): 114 | shutil.rmtree(prev) 115 | if os.path.exists(curr): 116 | os.rename(curr, prev) 117 | 118 | os.rename(next, curr) 119 | 120 | try: 121 | if os.path.exists(prev): 122 | shutil.rmtree(prev) 123 | except (OSError, IOError): 124 | _log.warn('Error removing prev state dir') 125 | _log.exception() 126 | 127 | def recover(self): 128 | curr = filename(self.current) 129 | prev = curr + '.prev' 130 | if not os.path.exists(curr) and os.path.exists(prev): 131 | _log.info('Recovering state from %s' % prev) 132 | os.rename(prev, curr) 133 | 134 | class consistent_dir(object): 135 | ''' 136 | Checks a directory remains the same (not swapped) while used and 137 | between uses. For use in with statement. 138 | ''' 139 | 140 | _dir_inums = {} 141 | 142 | def __init__(self, dirname): 143 | self.dirname = dirname 144 | 145 | def __enter__(self): 146 | name = os.path.abspath(self.dirname) 147 | if name not in self._dir_inums: 148 | inum = os.stat(name).st_ino 149 | self._dir_inums[name] = inum 150 | 151 | def __exit__(self, *args): 152 | name = os.path.abspath(self.dirname) 153 | inum = os.stat(name).st_ino 154 | if self._dir_inums[name] != inum: 155 | raise IOError('Directory changed while reading files: %s' 156 | % self.dirname) 157 | 158 | def set_output_dir(dirname): 159 | ''' 160 | Set the current directory for logging and output. 161 | ''' 162 | assert os.path.exists(dirname) 163 | _config.output_dir = dirname 164 | _setup_logs() 165 | 166 | def filename(fn): 167 | ''' 168 | Returns a path for the given filename in the current output directory. 169 | ''' 170 | if _config.output_dir: 171 | return os.path.join(_config.output_dir, fn) 172 | else: 173 | return fn 174 | 175 | def output_dir(): 176 | return _config.output_dir if _config.output_dir else '.' 177 | 178 | def getLogger(): 179 | return _log 180 | 181 | def open(fn, *args, **kwargs): 182 | ''' 183 | Open a file in the current output directory 184 | args same as for open() 185 | ''' 186 | return _open(filename(fn), *args, **kwargs) 187 | 188 | def copy(src, dst=None): 189 | ''' 190 | Copy a file to the output directory. 191 | 192 | If dst is None, uses basename(src). Otherwise, dst is the name of the 193 | file within the current output directory. 194 | ''' 195 | if dst is None: 196 | dst = os.path.basename(src) 197 | dst = filename(dst) 198 | if os.path.realpath(src) != os.path.realpath(dst): 199 | shutil.copy(src, dst) 200 | 201 | def save_image(fn, img, **kwargs): 202 | ''' 203 | Save an image img to filename fn in the current output dir. 204 | kwargs the same as for PIL Image.save() 205 | ''' 206 | (h, w, c) = img.shape 207 | if not isinstance(img, np.ndarray): 208 | img = np.array(img) 209 | if c == 1: 210 | img = np.concatenate((img,)*3, axis=2) 211 | if img.dtype.kind == 'f': 212 | img = (img * 255).astype('uint8') 213 | elif img.dtype.kind == 'f': 214 | img = img.astype('uint8') 215 | else: 216 | raise ValueError('bad dtype: %s' % img.dtype) 217 | i = Image.fromarray(img) 218 | with open(fn, 'w') as f: 219 | i.save(f, **kwargs) 220 | 221 | def save_fig(fn, *args, **kwargs): 222 | ''' 223 | Save a matplotlib figure to fn in the current output dir. 224 | args same as for pyplot.savefig(). 225 | ''' 226 | with open(fn, 'w') as f: 227 | pyplot.savefig(f, *args, **kwargs) 228 | 229 | -------------------------------------------------------------------------------- /common/logutil.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hjimce/Depth-Map-Prediction/fea99a9b52648820c6c8dd0374b9b06117a5124b/common/logutil.pyc -------------------------------------------------------------------------------- /common/strhist.py: -------------------------------------------------------------------------------- 1 | ''' 2 | strhist.py 3 | 4 | Prints histograms using text. 5 | ''' 6 | ''' 7 | Copyright (C) 2014 New York University 8 | 9 | This program is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | This program is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | along with this program. If not, see . 21 | ''' 22 | 23 | import numpy as np 24 | 25 | _strhist_chars = ' .oO@^' 26 | 27 | def _gethist(x, bins, m, M): 28 | x = np.array(x) 29 | if m == None: 30 | m = x.min() 31 | if M == None: 32 | M = x.max() 33 | (h, hbins) = np.histogram(x, bins=bins, range=(m,M)) 34 | h = h.astype(float) 35 | h /= np.sum(h) 36 | return (h, hbins, m, M) 37 | 38 | def hist_chars(x, m=None, M=None, width=50): 39 | ''' 40 | Prints a one-line histogram with one char per bin. The bin count is 41 | quantized into only a few values and scaled to create a visual 42 | representation. Min and max values are displayed on the ends. 43 | ''' 44 | (h, hbins, m, M) = _gethist(x, width, m, M) 45 | nchars = len(_strhist_chars) 46 | if np.any(h > 0): 47 | hmin = np.min(h) 48 | hmax = np.max(h) 49 | hchar = np.round((nchars-1)*(h - hmin)/(hmax - hmin)) 50 | hstr = ''.join([_strhist_chars[int(i)] for i in hchar]) 51 | else: 52 | hstr = ' ' * width 53 | return '% .5f |%s| %.5f' % (m, hstr, M) 54 | 55 | def hist_bins(x, m=None, M=None, width=50, sep=''): 56 | ''' 57 | Prints a one-line histogram with a percent in each bin. 58 | Min and max values are displayed on the ends. 59 | ''' 60 | w = 7 61 | bins = width / w 62 | (h, hbins, m, M) = _gethist(x, bins, m, M) 63 | hstr = sep.join([str(int(np.round(x*100))).center(w-2) for x in h]) 64 | return '% .2f ||%s|| %.2f' % (m, hstr, M) 65 | 66 | def hist_bars(x, m=None, M=None, bins=10, width=50): 67 | ''' 68 | Prints a histogram with one bin per line. 69 | ''' 70 | (h, hbins, m, M) = _gethist(x, bins, m, M) 71 | barlengths = np.round(width * h / np.maximum(1e-8, np.max(h))) 72 | s = ['% .3f ~ % .3f | %s' % (hbins[i], hbins[i+1], '*' * barlengths[i]) 73 | for i in xrange(len(h))] 74 | return '\n'.join(s) 75 | 76 | strhist = hist_chars 77 | hist = hist_chars 78 | 79 | if __name__ == '__main__': 80 | x = np.random.randn(10000) 81 | for fname in ('hist_chars', 'hist_bins', 'hist_bars'): 82 | print fname 83 | print eval(fname)(x) 84 | print 85 | 86 | -------------------------------------------------------------------------------- /demo_nyud_depth_prediction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hjimce/Depth-Map-Prediction/fea99a9b52648820c6c8dd0374b9b06117a5124b/demo_nyud_depth_prediction.png -------------------------------------------------------------------------------- /demo_nyud_rgb.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hjimce/Depth-Map-Prediction/fea99a9b52648820c6c8dd0374b9b06117a5124b/demo_nyud_rgb.jpg -------------------------------------------------------------------------------- /demo_nyud_rgb1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hjimce/Depth-Map-Prediction/fea99a9b52648820c6c8dd0374b9b06117a5124b/demo_nyud_rgb1.jpg -------------------------------------------------------------------------------- /models/.depth.conf.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hjimce/Depth-Map-Prediction/fea99a9b52648820c6c8dd0374b9b06117a5124b/models/.depth.conf.swp -------------------------------------------------------------------------------- /models/.depth.py.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hjimce/Depth-Map-Prediction/fea99a9b52648820c6c8dd0374b9b06117a5124b/models/.depth.py.swp -------------------------------------------------------------------------------- /models/depth.conf: -------------------------------------------------------------------------------- 1 | [config] 2 | imports = numpy as np 3 | 4 | [train] 5 | resumptive = True 6 | learning_rate = 1 7 | bsize = 32 8 | momentum = 0.9 9 | nepochs = 151 10 | evaluate_epochs = 10 11 | save_stats_epochs = 10 12 | checkpoint_all_freq = 50 13 | train_conv = True 14 | 15 | [data] 16 | data_dir = /home/deigen/proj/depth/output/000433-data-all-320x240-normals 17 | local_dir = /scratch/deigen/data/000433-data-all-320x240-normals 18 | depth_space = log 19 | zero_mean_images = False 20 | divide_std_images = False 21 | zero_mean_depths = False 22 | divide_std_depths = True 23 | 24 | [init] 25 | 26 | [load] 27 | 28 | [full1] 29 | type = full 30 | load_key = coarse_stack 31 | noutput = 4096 32 | init_w = lambda shp: 0.01*np.random.randn(*shp) 33 | bias = True 34 | weight_decay_w = 0.0001 35 | learning_rate_scale_w = 0.1 36 | learning_rate_scale_b = 0.1 37 | dropout = False 38 | 39 | [full2] 40 | type = full 41 | load_key = coarse_stack 42 | noutput = 4070 43 | output_size = (55, 74) 44 | init_w = lambda shp: 0.01*(np.random.rand(*shp)-0.5) 45 | bias = True 46 | weight_decay_w = 0.0001 47 | learning_rate_scale_w = 0.1 48 | learning_rate_scale_b = 0.1 49 | 50 | [conv_s2_1] 51 | type = conv 52 | load_key = fine_stack 53 | filter_shape = (64,3,9,9) 54 | stride = 2 55 | init_w = lambda shp: 0.001*np.random.randn(*shp) 56 | init_b = 0.0 57 | conv_mode = valid 58 | weight_decay_w = 0.0001 59 | learning_rate_scale_w = 0.001 60 | learning_rate_scale_b = 0.001 61 | 62 | [pool_s2_1] 63 | type = maxpool 64 | poolsize = (3,3) 65 | poolstride = (2,2) 66 | 67 | [conv_s2_2] 68 | type = conv 69 | load_key = fine_stack 70 | filter_shape = (64,64,5,5) 71 | init_w = lambda shp: 0.01*np.random.randn(*shp) 72 | init_b = 0.0 73 | conv_mode = same 74 | weight_decay_w = 0.0001 75 | learning_rate_scale_w = 0.01 76 | learning_rate_scale_b = 0.01 77 | 78 | [conv_s2_3] 79 | type = conv 80 | load_key = fine_stack 81 | filter_shape = (64,1,5,5) 82 | transpose = True 83 | init_w = lambda shp: 0.01*np.random.randn(*shp) 84 | init_b = 0.0 85 | conv_mode = same 86 | weight_decay_w = 0.0001 87 | learning_rate_scale_w = 0.001 88 | learning_rate_scale_b = 0.001 89 | 90 | [imnet_conv1] 91 | type = conv 92 | load_key = imagenet 93 | filter_shape = (96, 3, 11, 11) 94 | stride = 4 95 | conv_mode = valid 96 | init_w = lambda shp: 0.01*np.random.randn(*shp) 97 | learning_rate_scale_w = 0.001 98 | learning_rate_scale_b = 0.001 99 | weight_decay_w = 0.0005 100 | 101 | [imnet_pool1] 102 | type = maxpool 103 | load_key = imagenet 104 | poolsize = (3,3) 105 | poolstride = (2,2) 106 | 107 | [imnet_conv2] 108 | type = conv 109 | load_key = imagenet 110 | filter_shape = (256, 96, 5, 5) 111 | conv_mode = same 112 | stride = 1 113 | init_w = lambda shp: 0.01*np.random.randn(*shp) 114 | learning_rate_scale_w = 0.001 115 | learning_rate_scale_b = 0.001 116 | weight_decay_w = 0.0005 117 | 118 | [imnet_pool2] 119 | type = maxpool 120 | load_key = imagenet 121 | poolsize = (3,3) 122 | poolstride = (2,2) 123 | 124 | [imnet_conv3] 125 | type = conv 126 | load_key = imagenet 127 | filter_shape = (384, 256, 3, 3) 128 | conv_mode = same 129 | stride = 1 130 | init_w = lambda shp: 0.01*np.random.randn(*shp) 131 | learning_rate_scale_w = 0.001 132 | learning_rate_scale_b = 0.001 133 | weight_decay_w = 0.0005 134 | 135 | [imnet_conv4] 136 | type = conv 137 | load_key = imagenet 138 | filter_shape = (384, 384, 3, 3) 139 | conv_mode = same 140 | stride = 1 141 | init_w = lambda shp: 0.01*np.random.randn(*shp) 142 | learning_rate_scale_w = 0.001 143 | learning_rate_scale_b = 0.001 144 | weight_decay_w = 0.0005 145 | 146 | [imnet_conv5] 147 | type = conv 148 | load_key = imagenet 149 | filter_shape = (256, 384, 3, 3) 150 | conv_mode = same 151 | stride = 1 152 | init_w = lambda shp: 0.01*np.random.randn(*shp) 153 | learning_rate_scale_w = 0.001 154 | learning_rate_scale_b = 0.001 155 | weight_decay_w = 0.0005 156 | 157 | [imnet_pool5] 158 | type = maxpool 159 | load_key = imagenet 160 | poolsize = (3,3) 161 | poolstride = (2,2) 162 | 163 | -------------------------------------------------------------------------------- /models/depth.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | ''' 3 | Copyright (C) 2014 New York University 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | ''' 18 | import os 19 | import time 20 | import numpy as np 21 | import ipdb 22 | 23 | import theano 24 | import theano.tensor as T 25 | 26 | from common import imgutil, logutil 27 | 28 | import matplotlib.pyplot as plt 29 | 30 | import thutil 31 | from thutil import test_shape, theano_function, maximum 32 | 33 | from net import * 34 | from pooling import cmrnorm 35 | 36 | _log = logutil.getLogger() 37 | xx = np.newaxis 38 | 39 | def _image_montage(imgs, min, max): 40 | imgs = imgutil.bxyc_from_bcxy(imgs) 41 | return imgutil.montage( 42 | imgutil.scale_values(imgs, min=min, max=max), 43 | border=1) 44 | 45 | def _depth_montage(depths): 46 | if depths.ndim == 4: 47 | assert depths.shape[1] == 1 48 | depths = depths[:,0,:,:] 49 | #depths = imgutil.scale_values(depths, min=-2.5, max=2.5) 50 | #depths = map(imgutil.scale_values, depths) 51 | masks = [] 52 | for i in xrange(len(depths)): 53 | x = depths[i] 54 | mask = x != x.min() 55 | masks.append(mask) 56 | x = x[mask] 57 | if len(x) == 0: 58 | d = np.zeros_like(depths[i]) 59 | else: 60 | d = imgutil.scale_values(depths[i], min=x.min(), max=x.max()) 61 | depths[i] = d 62 | depths = plt.cm.jet(depths)[...,:3] 63 | for i in xrange(len(depths)): 64 | for c in xrange(3): 65 | depths[i, :, :, c][masks[i] == 0] = 0.2 66 | return imgutil.montage(depths, border=1) 67 | 68 | def _zero_pad_batch(batch, bsize): 69 | assert len(batch) <= bsize 70 | if len(batch) == bsize: 71 | return batch 72 | n = batch.shape[0] 73 | shp = batch.shape[1:] 74 | return np.concatenate((batch, np.zeros((bsize - n,) + shp, 75 | dtype=batch.dtype))) 76 | 77 | class machine(Machine): 78 | def __init__(self, conf): 79 | Machine.__init__(self, conf) 80 | 81 | def infer_depth(self, images): 82 | ''' 83 | Infers depth maps for a list of 320x240 images. 84 | images is a nimgs x 240 x 320 x 3 numpy uint8 array. 85 | returns depths (nimgs x 55 x 74) corresponding to the center box 86 | in the original rgb image. 87 | ''' 88 | images = images.transpose((0,3,1,2)) 89 | (nimgs, nc, nh, nw) = images.shape 90 | assert (nc, nh, nw) == (3, 240, 320)#网络的输出图片数据为(1,3, 240, 320) 91 | 92 | (input_h, input_w) = self.input_size#网络输入feature map 图片的大小 93 | (output_h, output_w) = self.output_size#网络输出feature map大小 94 | 95 | bsize = self.bsize 96 | b = 0 97 | 98 | # pred_depth为输出,Tensor 类型变量, 99 | v = self.vars 100 | pred_depth = self.inverse_depth_transform(self.fine.pred_mean) 101 | infer_f = theano.function([v.images], pred_depth) 102 | 103 | depths = np.zeros((nimgs, output_h, output_w), dtype=np.float32) 104 | 105 | # 一张图片的中心 bbox ,(i0, i1)为矩形的左上角、(j0, j1)为矩形的右下角 106 | dh = nh - input_h 107 | dw = nw - input_w 108 | (i0, i1) = (dh/2, nh - dh/2) 109 | (j0, j1) = (dw/2, nw - dw/2) 110 | 111 | # infer depth for images in batches 112 | b = 0 113 | while b < nimgs: 114 | batch = images[b:b+bsize] 115 | n = len(batch) 116 | if n < bsize: 117 | batch = _zero_pad_batch(batch, bsize) 118 | 119 | # crop to network input size 120 | batch = batch[:, :, i0:i1, j0:j1] 121 | 122 | # infer depth with nnet 123 | depths[b:b+n] = infer_f(batch)[:n] 124 | 125 | b += n 126 | 127 | return depths 128 | 129 | def inverse_depth_transform(self, logdepths): 130 | # map network output log depths back to depth 131 | # output bias is init'd with the mean, and output is logdepth / stdev 132 | return T.exp(logdepths * self.meta.logdepths_std) 133 | 134 | def get_predicted_depth_region(self): 135 | ''' 136 | Returns the region of a 320x240 image covered by the predicted 137 | depth map (y0 y1 x0 x1) where y runs the 240-dim and x runs the 320-dim. 138 | ''' 139 | (orig_h, orig_w) = self.orig_input_size # input before transforms 140 | (input_h, input_w) = self.input_size # input after transforms 141 | dt = self.target_crop # net output size difference from valid convs 142 | off_h = (orig_h - input_h + dt) / 2 143 | off_w = (orig_w - input_w + dt) / 2 144 | return (off_h, off_h + input_h, 145 | off_w, off_w + input_w) 146 | 147 | def define_machine(self): 148 | self.orig_input_size = (240, 320) # 149 | self.input_size = (228, 304) # 采用random crop的方法吗 150 | self.output_size = self.conf.geteval('full2', 'output_size')#获取配置文件中,full2层下的选项output_size 151 | 152 | (input_h, input_w) = self.input_size 153 | (output_h, output_w) = self.output_size 154 | #因为输出与输出的比例是4倍,所以我们需要回溯输入图片对应的区域 155 | self.target_crop = input_h - output_h * 4 156 | assert self.target_crop == input_w - output_w * 4 157 | 158 | self.define_meta() 159 | 160 | # input vars 161 | images = T.tensor4('images') 162 | depths = T.tensor3('depths') 163 | masks = T.tensor3('masks') 164 | 165 | test_values = self.make_test_values() 166 | images.tag.test_value = test_values['images'] 167 | depths.tag.test_value = test_values['depths'] 168 | masks.tag.test_value = test_values['masks'] 169 | 170 | x0 = images 171 | y0 = depths 172 | m0 = masks 173 | 174 | # downsample depth and mask by 4x 175 | m0 = m0[:,1::4,1::4] 176 | y0 = y0[:,1::4,1::4] 177 | #构建网络 178 | # 这一部分的网络是粗网络的前半部分,结构与Alexnet相同。因为文献的部分参数采用的是Alexnet训练好的模型参数,然后在进行fine-tuning 179 | self.define_imagenet_stack(x0) 180 | 181 | # pretrained features are rather large, rescale down to nicer range 182 | imnet_r5 = 0.01 * self.imagenet.r5 183 | imnet_feats = imnet_r5.reshape(( 184 | self.bsize, T.prod(imnet_r5.shape[1:]))) 185 | 186 | # 这一部分的网络是粗网络的后半部分 187 | self.define_coarse_stack(imnet_feats) 188 | 189 | # fine stack 190 | self.define_fine_stack(x0) 191 | 192 | self.vars = MachinePart(locals()) 193 | 194 | def define_meta(self): 195 | ''' 196 | precomputed means and stdev 197 | ''' 198 | # just hardcoding for this release, was in meta.mat file 199 | images_mean = 109.31410628 200 | images_std = 76.18328376 201 | images_istd = 1.0 / images_std 202 | depths_mean = 2.53434899 203 | depths_std = 1.22576694 204 | depths_istd = 1.0 / depths_std 205 | logdepths_mean = 0.82473954 206 | logdepths_std = 0.45723134 207 | logdepths_istd = 1.0 / logdepths_std 208 | self.meta = MachinePart(locals()) 209 | 210 | def make_test_values(self): 211 | (input_h, input_w) = self.input_size 212 | (output_h, output_w) = self.output_size 213 | test_images_size = (self.bsize, 3, input_h, input_w) 214 | test_depths_size = (self.bsize, output_h, output_w) 215 | 216 | test_values = {} 217 | test_values['images'] = \ 218 | (255 * np.random.rand(*test_images_size)).astype(np.float32) 219 | test_values['depths'] = \ 220 | np.random.randn(*test_depths_size).astype(np.float32) 221 | test_values['masks'] = \ 222 | np.ones(test_depths_size, dtype=np.float32) 223 | return test_values 224 | #在coarse部分,与Alexnet相同的部分 225 | def define_imagenet_stack(self, x0): 226 | print "create net" 227 | conv1 = self.create_unit('imnet_conv1') 228 | pool1 = self.create_unit('imnet_pool1') 229 | conv2 = self.create_unit('imnet_conv2') 230 | pool2 = self.create_unit('imnet_pool2') 231 | conv3 = self.create_unit('imnet_conv3') 232 | conv4 = self.create_unit('imnet_conv4') 233 | conv5 = self.create_unit('imnet_conv5') 234 | pool5 = self.create_unit('imnet_pool5') 235 | 236 | z1 = conv1.infer(x0 - 128) 237 | (p1, s1) = pool1.infer(z1) 238 | r1 = cmrnorm(relu(p1))#局部对比度归一化层? 239 | 240 | z2 = conv2.infer(r1) 241 | (p2, s2) = pool2.infer(z2) 242 | r2 = cmrnorm(relu(p2)) 243 | 244 | z3 = conv3.infer(r2) 245 | r3 = relu(z3) 246 | 247 | z4 = conv4.infer(r3) 248 | r4 = relu(z4) 249 | 250 | z5 = conv5.infer(r4) 251 | (p5, s5) = pool5.infer(z5) 252 | r5 = relu(p5) 253 | 254 | 255 | 256 | self.imagenet = MachinePart(locals()) 257 | 258 | def define_coarse_stack(self, imnet_feats): 259 | full1 = self.create_unit('full1', ninput=test_shape(imnet_feats)[1]) 260 | f_1 = relu(full1.infer(imnet_feats)) 261 | f_1_drop = random_zero(f_1, 0.5) 262 | f_1_mean = 0.5 * f_1 263 | 264 | full2 = self.create_unit('full2', ninput=test_shape(f_1_mean)[1]) 265 | 266 | f_2_drop = full2.infer(f_1_drop) 267 | f_2_mean = full2.infer(f_1_mean) 268 | 269 | # prediction 270 | (h, w) = self.output_size 271 | pred_drop = f_2_drop.reshape((self.bsize, h, w)) 272 | pred_mean = f_2_mean.reshape((self.bsize, h, w)) 273 | 274 | self.coarse = MachinePart(locals()) 275 | 276 | def define_fine_stack(self, x0): 277 | # pproc slightly different from imagenet because no cmrnorm 278 | x0_pproc = (x0 - self.meta.images_mean) \ 279 | * self.meta.images_istd 280 | 281 | conv_s2_1 = self.create_unit('conv_s2_1') 282 | z_s2_1 = relu(conv_s2_1.infer(x0_pproc)) 283 | 284 | pool_s2_1 = self.create_unit('pool_s2_1') 285 | (p_s2_1, s_s2_1) = pool_s2_1.infer(z_s2_1) 286 | 287 | # concat input features with coarse prediction 288 | (h, w) = self.output_size 289 | coarse_drop = self.coarse.pred_drop.reshape((self.bsize, 1, h, w)) 290 | coarse_mean = self.coarse.pred_mean.reshape((self.bsize, 1, h, w)) 291 | p_1_concat_drop = T.concatenate( 292 | (coarse_drop, 293 | p_s2_1[:, 1:, :, :]), 294 | axis=1) 295 | p_1_concat_mean = T.concatenate( 296 | (coarse_mean, 297 | p_s2_1[:, 1:, :, :]), 298 | axis=1) 299 | 300 | conv_s2_2 = self.create_unit('conv_s2_2') 301 | z_s2_2_drop = relu(conv_s2_2.infer(p_1_concat_drop)) 302 | z_s2_2_mean = relu(conv_s2_2.infer(p_1_concat_mean)) 303 | 304 | conv_s2_3 = self.create_unit('conv_s2_3') 305 | z_s2_3_drop = conv_s2_3.infer(z_s2_2_drop) 306 | z_s2_3_mean = conv_s2_3.infer(z_s2_2_mean) 307 | 308 | # prediction 309 | pred_drop = z_s2_3_drop[:,0,:,:] 310 | pred_mean = z_s2_3_mean[:,0,:,:] 311 | 312 | self.fine = MachinePart(locals()) 313 | #定义损失函数 这个会不会就是文献的创新点呢?缩放不变的损失函数 314 | def define_cost(self, pred, y0, m0): 315 | bsize = self.bsize 316 | npix = int(np.prod(test_shape(y0)[1:])) 317 | y0_target = y0.reshape((self.bsize, npix)) 318 | y0_mask = m0.reshape((self.bsize, npix)) 319 | pred = pred.reshape((self.bsize, npix)) 320 | 321 | p = pred * y0_mask 322 | t = y0_target * y0_mask 323 | 324 | d = (p - t) 325 | 326 | nvalid_pix = T.sum(y0_mask, axis=1) 327 | depth_cost = (T.sum(nvalid_pix * T.sum(d**2, axis=1)) 328 | - 0.5*T.sum(T.sum(d, axis=1)**2)) \ 329 | / T.maximum(T.sum(nvalid_pix**2), 1) 330 | 331 | return depth_cost 332 | 333 | def train(self): 334 | raise NotImplementedError() 335 | -------------------------------------------------------------------------------- /models/depth.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hjimce/Depth-Map-Prediction/fea99a9b52648820c6c8dd0374b9b06117a5124b/models/depth.pyc -------------------------------------------------------------------------------- /net.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | ''' 3 | Copyright (C) 2014 New York University 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | ''' 18 | import os 19 | import sys 20 | import time 21 | import numpy as np 22 | import ipdb 23 | import cPickle 24 | 25 | from collections import OrderedDict 26 | 27 | import theano, theano.tensor as T 28 | from theano.tensor.nnet import conv as theano_conv 29 | from theano.tensor.nnet import sigmoid 30 | from theano.tensor import tanh 31 | 32 | from common import imgutil, logutil, configuration 33 | 34 | #import matplotlib.pyplot as plt 35 | 36 | import pooling 37 | import thutil 38 | 39 | from thutil import test_shape, theano_function, maximum 40 | 41 | _log = logutil.getLogger() 42 | 43 | floatX = theano.config.floatX 44 | 45 | theano.config.compute_test_value = 'raise' 46 | theano.config.store_test_value_maxsize = 32 47 | theano.config.on_unused_input = 'ignore' 48 | 49 | # to enable feature not yet in theano main for logicals as float32 on gpu 50 | # theano.config.scalar.logical_op_type = 'same_as_input' 51 | 52 | theano_rng = theano.tensor.shared_randomstreams.RandomStreams() 53 | 54 | xx = np.newaxis 55 | 56 | #网络结构,relu 激活函数 57 | def relu(x): 58 | return maximum(0, x) 59 | #softmat 层 60 | def softmax(x, axis=None): 61 | ''' 62 | Applies softmax to x over the given axis (i.e. exp/sum(exp)). 63 | ''' 64 | if isinstance(axis, int): 65 | m = T.max(x, axis=axis, keepdims=True) 66 | else: 67 | m = T.max(x) 68 | exp_x = T.exp(x - m) 69 | Z = T.sum(exp_x, axis=axis, keepdims=True) 70 | return exp_x / Z 71 | #log softmax 层,输入数据x 72 | def logsoftmax(x, axis=None): 73 | ''' 74 | Applies logsoftmax to x over the given axis (i.e. exp/sum(exp)). 75 | ''' 76 | if isinstance(axis, int): 77 | m = T.max(x, axis=axis, keepdims=True) 78 | else: 79 | m = T.max(x) 80 | exp_x = T.exp(x - m) 81 | Z = T.sum(exp_x, axis=axis, keepdims=True) 82 | return x - m - T.log(Z) 83 | 84 | _mm_enable_compatibility_padding = True 85 | #卷积层,输入图片数据x,k为滤波器,stride 为卷积跨步 86 | def conv_theano_mm(x, k, border_mode, transpose=False, stride=1): 87 | #输入图片x: (bsize, xchan, h, w) 88 | #k为滤波器:(nfilt, xchan, filt_h, filt_w) 89 | 90 | (xh, xw) = test_shape(x)[-2:] 91 | (kh, kw) = test_shape(k)[-2:] 92 | 93 | if border_mode == 'valid': 94 | pad = (0,0) 95 | elif border_mode == 'same':#卷积后的图片大小与原图片的大小相同,因此左右两边都要加上卷积核宽度的一半 96 | pad = (kh // 2, kw // 2) 97 | elif border_mode == 'full': 98 | pad = (kh - 1, kw - 1) 99 | else: 100 | raise ValueError(border_mode) 101 | 102 | if stride != 1 and not transpose and _mm_enable_compatibility_padding: 103 | print 'True' 104 | # semi-compatibility with cudaconv 105 | # cudaconv strided convs go one filter tile past the end at the 106 | # bottom/right. Get the same size with some extra padding if needed. 107 | # The padding is centered, so this results in up to a half-stride image 108 | # shift to the right, not exactly the same as before. 109 | if border_mode != 'valid': 110 | raise NotImplementedError() 111 | old_h = np.ceil((xh - kh) / float(stride)) * stride + kh 112 | old_w = np.ceil((xw - kw) / float(stride)) * stride + kw 113 | pad = (int(np.ceil((old_h - xh) / 2.0)), 114 | int(np.ceil((old_w - xw) / 2.0))) 115 | 116 | if transpose: 117 | (ph, pw) = pad 118 | bottom_shape = T.constant(np.array((stride * (xh - 1) - 2*ph + kh, 119 | stride * (xw - 1) - 2*pw + kw))) 120 | res = theano.sandbox.cuda.blas.GpuCorrMM_gradInputs( 121 | pad=pad, 122 | subsample=(stride, stride)) \ 123 | (k, x, shape=bottom_shape) 124 | else: 125 | res = theano.sandbox.cuda.blas.GpuCorrMM( 126 | pad=pad, 127 | subsample=(stride, stride)) \ 128 | (x, k) 129 | return res 130 | conv = conv_theano_mm 131 | 132 | def upsample_bilinear(x, scale): 133 | ''' 134 | Bilinearly upsamples x: 135 | (nimgs, nfeat, h, w) -> (nimgs, nfeat, h*scale, w*scale) 136 | ''' 137 | kx = np.linspace(0, 1, scale + 1)[1:-1] 138 | kx = np.concatenate((kx, [1], kx[::-1])) 139 | ker = kx[xx,:] * kx[:, xx] 140 | ker = T.constant(ker[xx,xx,:,:].astype(np.float32)) 141 | xbatch = x.reshape((x.shape[0] * x.shape[1], 1, x.shape[2], x.shape[3])) 142 | xup = conv(xbatch, ker, 'valid', transpose=True, stride=scale) 143 | return xup.reshape((x.shape[0], x.shape[1], xup.shape[2], xup.shape[3])) 144 | 145 | def filter_transpose(w): 146 | ''' 147 | Transposes and filps a set of filters. 148 | (output_maps, input_maps, h, w) -> (input_maps, output_maps, h, w) 149 | and each filter is rotated by 180deg in (h, w). 150 | ''' 151 | return w.transpose((1,0,2,3))[:,:,::-1,::-1] 152 | 153 | _conv_mode_transpose = {'valid': 'full', 'full': 'valid', 'same': 'same'} 154 | 155 | def random_zero(x, p): 156 | ''' 157 | Keeps 1-p entries of x and zeros out a random subset with prob p 158 | ''' 159 | return x * theano_rng.binomial(size=x.shape, 160 | n=1, 161 | p=1-p, 162 | dtype=x.dtype) 163 | 164 | def feature_map_vectors(x): 165 | ''' 166 | Transpose/Reshape feature maps into (bsize*ni*nj, #feature maps) 167 | ''' 168 | (bsize, nc, ni, nj) = x.shape 169 | return x.transpose((0,2,3,1)).reshape((bsize*ni*nj, nc)) 170 | 171 | def feature_map_maps(x, xshape): 172 | ''' 173 | Transpose/Reshape feature map vectors back to xshape == (bsize, nc, ni, nj) 174 | ''' 175 | (bsize, nc, ni, nj) = xshape 176 | return x.reshape((bsize, ni, nj, nc)).transpose((0,3,1,2)) 177 | 178 | 179 | ### Machine class for tracking training state etc. ### 180 | 181 | _unit_types = {} 182 | def register_unit_class(cls): 183 | typename = getattr(cls, 'type', cls.__name__.lower()) 184 | _unit_types[typename] = cls 185 | return cls 186 | 187 | class Machine(object): 188 | def __init__(self, conf, state_subdir_name='state', **kwargs): 189 | self.conf = conf 190 | self.bsize = self.conf.getint('train', 'bsize') 191 | self.state_dir = logutil.Subdir(state_subdir_name) 192 | self.units = [] 193 | self.define_machine(**kwargs) 194 | #根据配置参数,创建网络的每一层 195 | def create_unit(self, sec, cls=None, name=None, load_key=None, **kwargs): 196 | conf_sec = self.conf.get_section(sec) 197 | if cls is None: 198 | cls = _unit_types[conf_sec.get('type')] 199 | if name is None: 200 | name = sec 201 | if load_key is None: 202 | load_key = conf_sec.get('load_key', name) 203 | 204 | kwargs['name'] = name 205 | kwargs['load_key'] = load_key 206 | kwargs['machine'] = self 207 | unit = cls(conf_sec, **kwargs) 208 | self.units.append(unit) 209 | return unit 210 | 211 | def define_machine(self): 212 | raise NotImplementedError 213 | 214 | 215 | class MachinePart(object): 216 | __slots__ = ('vars',) 217 | 218 | def __init__(self, vars, exclude=('self',)): 219 | self.vars = dict((k,v) for (k,v) in vars.iteritems() 220 | if k not in exclude) 221 | 222 | def __getattr__(self, k): 223 | if k in self.vars: 224 | return self.vars[k] 225 | return self.__getattribute__(k) 226 | 227 | def __getitem__(self, k): 228 | return getattr(self, k) 229 | 230 | def __setattr__(self, k, v): 231 | if k in self.__slots__: 232 | object.__setattr__(self, k, v) 233 | self.vars[k] = v 234 | 235 | def __setitem__(self, k, v): 236 | return setattr(self, k, v) 237 | 238 | 239 | def import_module(mod_file, modpath=''): 240 | import importlib 241 | (fpath, fname) = os.path.split(mod_file) 242 | (modname, ext) = os.path.splitext(fname) 243 | modpath = os.path.join(modpath, fpath) 244 | sys.path.insert(0, modpath) 245 | try: 246 | mod = importlib.import_module(modname, modpath) 247 | finally: 248 | sys.path.remove(modpath) 249 | assert (os.path.realpath(os.path.dirname(mod.__file__)) == 250 | os.path.realpath(modpath)), 'module path does not match' 251 | return mod 252 | 253 | #加载网络模型 254 | def create_machine(module_fn, config_fn, params_dir=None, 255 | edit_conf=None, load_saved_params=True): 256 | 257 | #读取网络结构 配置文件包 ConfigParser 258 | conf = configuration.read_config(config_fn) 259 | 260 | s=conf.sections() 261 | print 'section:',s 262 | conf.set_eval_environ(section='config') 263 | 264 | #加载网络模型训练好的参数 265 | if load_saved_params: 266 | assert params_dir, 'must supply params dir' 267 | if not conf.has_section('load'): 268 | conf.add_section('load') 269 | conf.set('load', 'all', params_dir) 270 | 271 | # user-supplied config edits 272 | if edit_conf: 273 | edit_conf(conf) 274 | 275 | # load definition module 276 | mod = import_module(module_fn) 277 | 278 | # construct machine class 279 | machine = getattr(mod, 'machine')(conf) 280 | 281 | return machine 282 | 283 | 284 | ### Units with parameters and inference methods ### 285 | 286 | class Unit(object): 287 | def __init__(self, conf, name, load_key=None, machine=None, tie_params={}): 288 | self.conf = conf 289 | self.name = name 290 | self.load_key = load_key 291 | self.params = None 292 | self.grads = None 293 | self.constraints = {} 294 | self.tie_params = tie_params 295 | self.machine = machine 296 | 297 | def infer(self, x): 298 | raise NotImplementedError 299 | 300 | def add_constraint(self, param, constraint): 301 | if param in self.constraints: 302 | prev = self.constraints[param] 303 | self.constraints[param] = lambda x: constraint(prev(x)) 304 | else: 305 | self.constraints[param] = constraint 306 | 307 | def _params_filename(self): 308 | return 'params-%s.pk' % self.name 309 | 310 | def _check_file(self, dir, fn, check_state_dir=True): 311 | if dir is None: 312 | return None 313 | fpaths = [os.path.join(dir, fn)] 314 | if check_state_dir: 315 | fpaths.append(os.path.join(dir, 'state', fn)) 316 | for fpath in fpaths: 317 | if os.path.exists(fpath): 318 | return fpath 319 | return None 320 | 321 | def init_params(self, *args, **kwargs): 322 | ''' 323 | Initializes parameters, either from a file or from initialization code 324 | for the unit. This looks for parameters to use in the following 325 | order (highest precedence first): 326 | 327 | * load overrides for debug and interactive sessions 328 | 1. params_file in unit config 329 | 2. load_key in [load] config section 330 | 3. default load dir ("all" in [load] config section) 331 | 332 | * params saved during training, loaded when resuming a run 333 | 4. current training state in output 334 | 5. current output directory 335 | 336 | * initializations, loaded once nothing was found for resuming 337 | 6. load_key in [init] config section 338 | 7. default init dir ("all" in [init] config section) 339 | 340 | * initialize by calling unit init code (since no was file specified) 341 | 8. call unit _init_params() 342 | ''' 343 | params_dir = None 344 | params_file = None 345 | fn = self._params_filename() 346 | 347 | # first check if a file is explicitly specified in unit config 348 | # if so, use it (even if it doesn't exist -- that case should error) 349 | case = 'in_config' 350 | params_file = self.conf.get('params_file', None) 351 | 352 | # if not, look in the dir for the load key specified for this unit 353 | if self.conf.parent.has_section('load'): 354 | if params_file is None and self.load_key is not None: 355 | case = 'load_key' 356 | params_dir = self.conf.parent.get('load', self.load_key, None) 357 | params_file = self._check_file(params_dir, fn) 358 | 359 | # then check in the default load dir 360 | if params_file is None: 361 | case = 'load_default' 362 | params_dir = self.conf.parent.get('load', 'all', None) 363 | params_file = self._check_file(params_dir, fn) 364 | 365 | # check current training state and output dir if the run is resumptive 366 | if self.conf.parent.getboolean('train', 'resumptive', True): 367 | if params_file is None: 368 | case = 'resume_current' 369 | params_dir = logutil.filename(self.machine.state_dir.current) 370 | params_file = self._check_file(params_dir, fn, 371 | check_state_dir=0) 372 | 373 | if params_file is None: 374 | case = 'resume_current' 375 | params_dir = logutil.filename(logutil.output_dir()) 376 | params_file = self._check_file(params_dir, fn, 377 | check_state_dir=0) 378 | 379 | # next, look for initializations by key, then default init 380 | if self.conf.parent.has_section('init'): 381 | if params_file is None and self.load_key is not None: 382 | case = 'init_key' 383 | params_dir = self.conf.parent.get('init', self.load_key, None) 384 | params_file = self._check_file(params_dir, fn) 385 | 386 | if params_file is None: 387 | case = 'init_default' 388 | params_dir = self.conf.parent.get('init', 'all', None) 389 | params_file = self._check_file(params_dir, fn) 390 | 391 | # if we did not find a params file, init with _init_params() 392 | if params_file is None: 393 | case = 'none' 394 | 395 | kwargs['tie_params'] = self.tie_params 396 | for (k, x) in self.tie_params.iteritems(): 397 | setattr(self, k, x) 398 | 399 | # 参数加载 如果params_file不为 None ,那么我们就加载参数文件 400 | if params_file is not None: 401 | assert case != 'none' 402 | self.load_params(params_file) 403 | self.loaded = case in ('in_config', 'load_key', 'load_default') 404 | self.resumed = case in ('resume_current',) 405 | self.init_from_load = case in ('init_key', 'init_default') 406 | #如果为None,那么我们就采用初始化的方法 407 | else: 408 | self.params = [] 409 | self._init_params(*args, **kwargs) 410 | self.loaded = False 411 | self.resumed = False 412 | self.init_from_load = False 413 | #参数保存 414 | def _save_params(self, dir=None, fn=None, attrs=[]): 415 | if fn is None: 416 | fn = self._params_filename() 417 | if dir: 418 | fn = os.path.join(dir, fn) 419 | pdict = dict((x, getattr(self, x)) for x in attrs) 420 | if self.params: 421 | pdict.update((p.name, p) for p in self.params) 422 | pdict['params'] = [p.name for p in self.params] 423 | with logutil.open(fn, 'w') as f: 424 | cPickle.dump(pdict, f, cPickle.HIGHEST_PROTOCOL) 425 | #根据文件名,加载参数模型,记住是文件名,而不是文件夹名 426 | def _load_params(self, fn): 427 | _log.info('Loading parameters from %s' % fn) 428 | 429 | with logutil.consistent_dir(os.path.dirname(fn)): 430 | with open(fn, 'r') as f: 431 | pdict = cPickle.load(f) 432 | params = pdict.pop('params', []) 433 | 434 | for (name, value) in pdict.iteritems(): 435 | setattr(self, name, value) 436 | self.params = [pdict[x] for x in params] 437 | 438 | save_params = _save_params 439 | load_params = _load_params 440 | 441 | def get_updates(self, cost, learning_rate, momentum): 442 | if not self.params: 443 | self.learning_rate = T.constant(0) 444 | return {} 445 | 446 | if self.grads is None: 447 | self.grads = [theano.shared(np.zeros_like(p.get_value())) 448 | for p in self.params] 449 | 450 | # compute the gradients of the cost with respect to the parameters 451 | gparams = T.grad(cost, self.params, disconnected_inputs='ignore') 452 | grad_mult = self.conf.geteval('grad_mult', None) 453 | if grad_mult is not None: 454 | grad_mult = T.constant(grad_mult, dtype=floatX) 455 | gparams = [g * grad_mult for g in gparams] 456 | 457 | clip = self.conf.getfloat('grad_clip', None) 458 | if clip is not None: 459 | gparams = [T.clip(g, -clip, clip) for g in gparams] 460 | 461 | self.gparams = gparams 462 | 463 | # generate the list of updates 464 | gupdates = OrderedDict() 465 | pupdates = OrderedDict() 466 | 467 | self.learning_rate = self.conf.getfloat('learning_rate', None) 468 | if self.learning_rate: 469 | self.learning_rate = T.constant(self.learning_rate) 470 | else: 471 | self.learning_rate = learning_rate 472 | for (gparam, param, gold) in zip(gparams, self.params, self.grads): 473 | lrscale = self.conf.getfloat( 474 | 'learning_rate_scale_%s' % param.name, 475 | None) 476 | if lrscale is None: 477 | lrscale = self.conf.getfloat('learning_rate_scale', 1.0) 478 | decay = self.conf.getfloat('weight_decay_%s' % param.name, 0.0) 479 | 480 | lr = self.learning_rate 481 | if lrscale != 1.0: 482 | lr *= lrscale 483 | 484 | if decay: 485 | gparam += decay * param 486 | 487 | if momentum: 488 | gnew = momentum * gold + gparam 489 | gupdates[gold] = gnew 490 | pupdates[param] = param - lr * gnew 491 | else: 492 | gupdates[gold] = gparam 493 | pupdates[param] = param - lr * gparam 494 | 495 | # apply update constraints 496 | for (p, constraint) in self.constraints.iteritems(): 497 | pupdates[p] = constraint(pupdates[p]) 498 | 499 | return OrderedDict(gupdates.items() + pupdates.items()) 500 | 501 | #最大池化 502 | @register_unit_class 503 | class MaxPool(Unit): 504 | #加载配置文件的相关参数 505 | def __init__(self, conf, **kwargs): 506 | Unit.__init__(self, conf, **kwargs) 507 | self.conf = conf 508 | self.vis_shape = kwargs.get('vis_shape', None) 509 | self.poolsize = self.conf.geteval('poolsize', None) 510 | self.poolstride = self.conf.geteval('poolstride', None) 511 | #池化操作计算,输入图片y,进行最大池化 512 | def pool(self, y): 513 | print "pool" 514 | '''apply pooling to unpooled output''' 515 | if self.vis_shape is None: 516 | self.vis_shape = test_shape(y)[-2:] 517 | (p_y, p_inds) = pooling.maxpool2d(y, winsize=self.poolsize,stride=self.poolstride) 518 | 519 | return (p_y, p_inds) 520 | 521 | infer = pool 522 | # #这个函数是反卷积要用的函数,可能paper一开始的思想是模仿FCN等网络的思想,所以才有了这个函数,本篇paper中没有用到,所以可以把它注释掉 523 | # def unpool(self, y, inds): 524 | # print "unpool" 525 | # '''unpool pooled output''' 526 | # y = pooling.index_unpool_2d(y, inds, 527 | # winsize=self.poolsize, 528 | # stride=self.poolstride, 529 | # output_shape=self.vis_shape[-2:]) 530 | # return y 531 | 532 | 533 | # @register_unit_class 534 | # class SumPool(Unit): 535 | # def __init__(self, conf, **kwargs): 536 | # Unit.__init__(self, conf, **kwargs) 537 | # self.conf = conf 538 | # self.vis_shape = kwargs.get('vis_shape', None) 539 | # self.average = self.conf.getboolean('average', False) 540 | # self.poolsize = self.conf.geteval('poolsize', None) 541 | # self.poolstride = self.conf.geteval('poolstride', None) 542 | # 543 | # def pool(self, y): 544 | # print "unpool" 545 | # '''apply pooling to unpooled output''' 546 | # self.vis_shape = self.vis_shape or test_shape(y)[-2:] 547 | # p_y = pooling.sumpool2d(y, winsize=self.poolsize, 548 | # stride=self.poolstride, 549 | # average=self.average) 550 | # return p_y 551 | # 552 | # infer = pool 553 | # 554 | # def unpool(self, y): 555 | # print "unpool" 556 | # '''unpool pooled output''' 557 | # y = pooling.sum_unpool_2d(y, 558 | # winsize=self.poolsize, 559 | # stride=self.poolstride, 560 | # average=self.average, 561 | # output_shape=self.vis_shape[-2:]) 562 | # return y 563 | 564 | #卷积层 565 | @register_unit_class 566 | class Conv(Unit): 567 | #根据配置文件,获取相关的参数 568 | def __init__(self, conf, init_W=None, **kwargs): 569 | 570 | Unit.__init__(self, conf, **kwargs) 571 | self.conf = conf 572 | assert self.conf.get('type') == 'conv' 573 | self.filter_shape = self.conf.geteval('filter_shape') 574 | self.conv_mode = self.conf.get('conv_mode', 'valid') 575 | 576 | self.transpose = self.conf.getboolean('transpose', False) 577 | self.have_bias = self.conf.getboolean('bias', True) 578 | self.stride = self.conf.getint('stride', 1) 579 | 580 | self.init_params(init_W) 581 | #本层网络参数初始化 582 | def _init_params(self, init_W, tie_params): 583 | (nfilt, fc, fi, fj) = self.filter_shape 584 | 585 | if 'W' not in tie_params: 586 | if init_W is None: 587 | w_shape = self.filter_shape 588 | init_W = self.conf.geteval('init_W')(w_shape).astype(floatX) 589 | self.W = theano.shared(value=init_W, name='W') 590 | self.params.append(self.W) 591 | 592 | if self.have_bias and 'b' not in tie_params: 593 | init_b = self.conf.geteval('init_b', 0) 594 | nb = nfilt if not self.transpose else fc 595 | self.b = theano.shared(init_b + np.zeros(nb, dtype=floatX), 596 | name='b') 597 | self.params.append(self.b) 598 | #计算网络的输出 599 | def infer(self, x): 600 | (nfilt, fc, fi, fj) = self.filter_shape 601 | if (fi, fj) == (1, 1):#如果卷积核的大小为1*1的情况 602 | W = self.W.reshape((nfilt, fc)) 603 | (bsize, nc, ni, nj) = x.shape 604 | xvec = x.transpose((1,0,2,3)).reshape((nc, bsize*ni*nj)) 605 | if self.transpose: 606 | y = T.dot(W.T, xvec) 607 | y = y.reshape((fc, bsize, ni, nj)).transpose((1,0,2,3)) 608 | else: 609 | y = T.dot(W, xvec) 610 | y = y.reshape((nfilt, bsize, ni, nj)).transpose((1,0,2,3)) 611 | y = thutil.gpu_contiguous(y) 612 | else:#正常的卷积层 613 | y = conv(x, self.W, border_mode=self.conv_mode, 614 | transpose=self.transpose, 615 | stride=self.stride) 616 | if self.have_bias: 617 | y += self.b.reshape((1, self.b.shape[0], 1, 1)) 618 | return y 619 | 620 | #全连接层 621 | @register_unit_class 622 | class Full(Unit): 623 | #权连接层的输入神经元个数ninput。然后通过conf可以获取本层神经元的个数 624 | def __init__(self, conf, ninput, init_W=None, **kwargs): 625 | Unit.__init__(self, conf, **kwargs) 626 | self.conf = conf 627 | assert self.conf.get('type') == 'full' 628 | 629 | self.ninput = ninput#输入个数 630 | self.noutput = self.conf.getint('noutput')#输出个数 631 | self.transpose = self.conf.getboolean('transpose', False) 632 | self.have_bias = self.conf.getboolean('bias', True) 633 | 634 | self.init_params(init_W) 635 | #参数初始化 636 | def _init_params(self, init_W, tie_params): 637 | if 'W' not in tie_params: 638 | if init_W is None: 639 | w_shape = (self.ninput, self.noutput) 640 | init_W = self.conf.geteval('init_W')(w_shape).astype(floatX) 641 | self.W = theano.shared(value=init_W, name='W') 642 | self.params.append(self.W) 643 | 644 | if self.have_bias and 'b' not in tie_params: 645 | nbias = self.noutput if not self.transpose else self.ninput 646 | init_b = self.conf.geteval('init_b', 0) 647 | init_b = self.conf.geteval('init_bias', init_b) 648 | self.bias = theano.shared(init_b + np.zeros(nbias, dtype=floatX), 649 | name='bias') 650 | self.params.append(self.bias) 651 | #网络输出计算 652 | def infer(self, x): 653 | W = self.W 654 | if self.transpose: 655 | W = W.T 656 | y = T.dot(x, W) 657 | if self.have_bias: 658 | y += self.bias.reshape((1, self.bias.size)) 659 | return y 660 | 661 | -------------------------------------------------------------------------------- /net.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hjimce/Depth-Map-Prediction/fea99a9b52648820c6c8dd0374b9b06117a5124b/net.pyc -------------------------------------------------------------------------------- /pooling.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (C) 2014 New York University 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | ''' 17 | import numpy as np 18 | import theano 19 | import theano.tensor as T 20 | from theano import Op, Apply 21 | from theano.gradient import DisconnectedType 22 | 23 | import thutil 24 | from thutil import test_value, Eval 25 | 26 | if thutil.use_gpu: 27 | import theano.sandbox.cuda 28 | from theano.sandbox.cuda import GpuOp, gpu_from_host, host_from_gpu, \ 29 | CudaNdarrayType, CudaNdarray 30 | 31 | 32 | def subsample2d(input, stride=(2,2), output_shape=None, transpose='n'): 33 | (bsize, ic, ix, iy) = input.shape 34 | (dx, dy) = stride 35 | if transpose.lower() == 't': 36 | if output_shape is None: 37 | output_shape = (ix * dx, iy * dy) 38 | out = T.zeros((bsize, ic,) + output_shape, dtype=input.dtype) 39 | out = T.set_subtensor(out[:, :, ::dx, ::dy], input) 40 | else: 41 | out = input[:, :, ::dx, ::dy] 42 | return out 43 | 44 | def maxpool2d(input, winsize, stride=None, input_shape=None): 45 | if input_shape is None: 46 | input_shape = test_value(input).shape[-2:] 47 | inds = maxinds_2d(input, winsize, stride, input_shape) 48 | vals = index_pool_2d(input, inds, winsize, stride, input_shape) 49 | return (vals, inds) 50 | 51 | def maxinds_2d(input, winsize, stride=None, input_shape=None): 52 | if input_shape is None: 53 | input_shape = test_value(input).shape[-2:] 54 | return MaxInds2D(input_shape, winsize, stride)(input) 55 | 56 | def index_pool_2d(input, inds, winsize, stride=None, 57 | input_shape=None): 58 | if input_shape is None: 59 | input_shape = test_value(input).shape[-2:] 60 | return IndexPool2D(input_shape, winsize, stride)(input, inds) 61 | 62 | def index_unpool_2d(input, inds, winsize, stride=None, 63 | input_shape=None, output_shape=None): 64 | if input_shape is None: 65 | input_shape = test_value(input).shape[-2:] 66 | return IndexUnpool2D(input_shape, winsize, stride, 67 | output_shape=output_shape)(input, inds) 68 | 69 | def sumpool2d(input, winsize, stride=None, input_shape=None, average=False): 70 | if input_shape is None: 71 | input_shape = test_value(input).shape[-2:] 72 | return SumPool2D(input_shape, winsize, stride, average=average)(input) 73 | 74 | def sum_unpool_2d(input, winsize, stride=None, 75 | input_shape=None, output_shape=None, average=False): 76 | if input_shape is None: 77 | input_shape = test_value(input).shape[-2:] 78 | return SumUnpool2D(input_shape, winsize, stride, output_shape, 79 | average=average)(input) 80 | 81 | def maxpool_features(input, winsize): 82 | if winsize == 1: 83 | return (input, T.zeros_like(input)) 84 | (bsize, nc, ni, nj) = input.shape 85 | inp = input.transpose((0,2,3,1)).reshape((bsize, ni*nj, nc, 1)) 86 | (vals, inds) = maxpool2d(inp, (winsize, 1)) 87 | sz = vals.size / (bsize*ni*nj) 88 | vals = vals.reshape((bsize, ni, nj, sz)).transpose((0,3,1,2)) 89 | return (vals, inds) 90 | 91 | def index_unpool_features(input, inds, winsize): 92 | if winsize == 1: 93 | return input 94 | (bsize, nc, ni, nj) = input.shape 95 | inp = input.transpose((0,2,3,1)).reshape((bsize, ni*nj, nc, 1)) 96 | vals = index_unpool_2d(inp, inds, (winsize,1)) 97 | sz = vals.size / (bsize*ni*nj) 98 | vals = vals.reshape((bsize, ni, nj, sz)).transpose((0,3,1,2)) 99 | return vals 100 | 101 | def cmrnorm(x, winsize=5, scale=0.0001, pow=0.75, input_shape=None): 102 | if input_shape is None: 103 | input_shape = test_value(x.shape)[1:] 104 | return CMRNorm(input_shape, winsize, scale, pow, x.dtype)(x) 105 | 106 | class PoolOp(Op): 107 | (is_pooling, is_unpooling) = (True, False) 108 | 109 | def __init__(self, input_shape, winsize, stride, 110 | output_shape=None, dtype=None): 111 | if stride is None: 112 | stride = winsize 113 | self.input_shape = input_shape 114 | self.winsize = winsize 115 | self.stride = stride 116 | if output_shape is None: 117 | output_shape = self._infer_shape(input_shape) 118 | self.output_shape = output_shape 119 | self.output_dtype = dtype 120 | self._hash_key = (type(self), self.input_shape, 121 | self.winsize, self.stride, 122 | self.output_shape, self.output_dtype) 123 | 124 | def __eq__(self, other): 125 | return (type(self) == type(other) and 126 | self.input_shape == other.input_shape and 127 | self.winsize == other.winsize and 128 | self.stride == other.stride and 129 | self.output_shape == other.output_shape and 130 | self.output_dtype == other.output_dtype) 131 | 132 | def __hash__(self): 133 | return hash(self._hash_key) 134 | 135 | def _infer_shape(self, input_shape): 136 | (x, y) = input_shape 137 | (wx, wy) = self.winsize 138 | (sx, sy) = self.stride 139 | if self.is_pooling: 140 | return ((x-wx)//sx + 1, (y-wy)//sy + 1) 141 | else: # unpooling 142 | return ((x-1)*sx + wx, (y-1)*sy + wy) 143 | 144 | def infer_shape(self, node, input_shapes): 145 | s = input_shapes[0][:-2] + self.output_shape 146 | return (s,) 147 | 148 | def make_node(self, *inputs): 149 | inputs = tuple(map(T.as_tensor_variable, inputs)) 150 | output = T.tensor4(dtype=(self.output_dtype or inputs[0].dtype)) 151 | return Apply(self, inputs, (output,)) 152 | 153 | def perform(self, node, inputs, (output,)): 154 | (bsize, nchan, unpooli, unpoolj) = inputs[0].shape 155 | (pooli, poolj) = self.output_shape 156 | (wi, wj) = self.winsize 157 | (si, sj) = self.stride 158 | output_dtype = self.output_dtype or node.inputs[0].dtype 159 | out = output[0] 160 | if out is None or out.dtype != output_dtype or \ 161 | out.shape != (bsize, nchan, pooli, poolj): 162 | out = output[0] = np.empty((bsize, nchan, pooli, poolj), 163 | dtype=output_dtype) 164 | inp = inputs[0] 165 | poolfunc = self.perform_pool 166 | for b in xrange(bsize): 167 | for c in xrange(nchan): 168 | x = out[b, c].flat 169 | x[:] = [poolfunc(inp[b, c, i*si:i*si+wi, j*sj:j*sj+wj], 170 | inputs, b, c, i, j) 171 | for i in xrange(pooli) 172 | for j in xrange(poolj)] 173 | 174 | def _pool_c_code(self, node, name, input, output, body, sub, 175 | output_type=None): 176 | (unpooli, unpoolj) = self.input_shape 177 | (pooli, poolj) = self.output_shape 178 | (wi, wj) = self.winsize 179 | (si, sj) = self.stride 180 | fail = sub['fail'] 181 | if output_type is None: 182 | output_type = 'PyArray_ObjectType((PyObject*) %s, 0)' % input 183 | 184 | code = ''' 185 | #define MIN(a,b) ((a) < (b) ? (a) : (b)) 186 | int istart, jstart, iend, jend; 187 | int ind; 188 | int bsize = PyArray_DIMS(%(input)s)[0]; 189 | int nchan = PyArray_DIMS(%(input)s)[1]; 190 | npy_intp dims[4] = {0, 0, %(pooli)d, %(poolj)d}; 191 | dims[0] = bsize; 192 | dims[1] = nchan; 193 | 194 | if (PyArray_NDIM(%(input)s) != 4) { 195 | PyErr_SetString(PyExc_ValueError, "input must be a 4d ndarray"); 196 | %(fail)s; 197 | } 198 | Py_XDECREF(%(output)s); 199 | %(output)s = (PyArrayObject*) PyArray_ZEROS( 200 | 4, dims, %(output_type)s, 0); 201 | 202 | for (int b = 0; b < bsize; ++b) { 203 | for (int c = 0; c < nchan; ++c) { 204 | for (int i = 0; i < %(pooli)d; ++i) { 205 | istart = i * %(si)d; 206 | iend = MIN(istart + %(wi)d, %(unpooli)d); 207 | for (int j = 0; j < %(poolj)d; ++j) { 208 | jstart = j * %(sj)d; 209 | jend = MIN(jstart + %(wj)d, %(unpoolj)d); 210 | 211 | %(body)s 212 | } 213 | } 214 | } 215 | } 216 | ''' % locals() 217 | return code 218 | 219 | class UnpoolOp(PoolOp): 220 | (is_pooling, is_unpooling) = (False, True) 221 | 222 | def perform(self, node, inputs, (output,)): 223 | vals = inputs[0] 224 | (bsize, nchan, pooli, poolj) = vals.shape 225 | (wi, wj) = self.winsize 226 | (si, sj) = self.stride 227 | (unpooli, unpoolj) = self.output_shape 228 | out = output[0] = np.zeros((bsize, nchan, unpooli, unpoolj), 229 | dtype=vals.dtype) 230 | for b in xrange(bsize): 231 | for c in xrange(nchan): 232 | for i in xrange(pooli): 233 | for j in xrange(poolj): 234 | x = out[b, c, i*si:i*si+wi, j*sj:j*sj+wj] 235 | self.perform_unpool(x, vals[b,c,i,j], 236 | inputs, b, c, i, j) 237 | 238 | 239 | class MaxInds2D(PoolOp): 240 | def perform_pool(self, pool_vals, inputs, b, c, i, j): 241 | return np.argmax(pool_vals) 242 | 243 | def make_gpu_node(self, input): 244 | return MaxInds2D_GPU(self.input_shape, self.winsize, self.stride) \ 245 | (input) 246 | 247 | def c_support_code(self): 248 | code = ''' 249 | template 250 | inline int _argmax(PyArrayObject *x, int b, int c, 251 | int istart, int iend, int jstart, int jend) 252 | { 253 | int k = 0, kmax = 0; 254 | T v, vmax; 255 | vmax = *(T*) PyArray_GETPTR4(x, b, c, istart, jstart); 256 | for (int i = istart; i < iend; ++i) { 257 | for (int j = jstart; j < jend; ++j, ++k) { 258 | v = *(T*) PyArray_GETPTR4(x, b, c, i, j); 259 | if (v > vmax) { 260 | vmax = v; 261 | kmax = k; 262 | } 263 | } 264 | } 265 | return kmax; 266 | } 267 | ''' 268 | return code 269 | 270 | def c_code(self, node, name, (input,), (output,), sub): 271 | output_type = {'int32': 'NPY_INT', 272 | 'float32': 'NPY_FLOAT32', 273 | 'float64': 'NPY_FLOAT64', 274 | }[self.output_dtype or node.inputs[0].dtype] 275 | body = ''' 276 | int v = _argmax( 277 | %(input)s, b, c, istart, iend, jstart, jend); 278 | *(dtype_%(output)s*) PyArray_GETPTR4(%(output)s, b, c, i, j) 279 | = (dtype_%(output)s) v; 280 | ''' % locals() 281 | return self._pool_c_code(node, name, input, output, body, sub) 282 | 283 | 284 | class IndexPool2D(PoolOp): 285 | def perform_pool(self, pool_vals, inputs, b, c, i, j): 286 | return pool_vals.flat[int(inputs[1][b,c,i,j])] 287 | 288 | def grad(self, (vals, inds), (dvals,)): 289 | return (IndexUnpool2D(self.output_shape, self.winsize, self.stride, 290 | output_shape=self.input_shape)(dvals, inds), 291 | DisconnectedType()(),) 292 | 293 | def make_gpu_node(self, input, inds): 294 | return IndexPool2D_GPU(self.input_shape, self.winsize, self.stride) \ 295 | (input, inds) 296 | 297 | def c_support_code(self): 298 | code = ''' 299 | template 300 | inline T _select_ind(PyArrayObject *x, int b, int c, 301 | int istart, int iend, int jstart, int jend, 302 | int ind) 303 | { 304 | int jlen = jend - jstart; 305 | int i = istart + ind / jlen; 306 | int j = jstart + ind % jlen; 307 | return *(T*) PyArray_GETPTR4(x, b, c, i, j); 308 | } 309 | ''' 310 | return code 311 | 312 | def c_code(self, node, name, (input, inds), (output,), sub): 313 | body = ''' 314 | int ind = (int) *(dtype_%(inds)s*) 315 | PyArray_GETPTR4(%(inds)s, b, c, i, j); 316 | dtype_%(input)s v = _select_ind( 317 | %(input)s, b, c, 318 | istart, iend, jstart, jend, 319 | ind); 320 | *(dtype_%(output)s*) 321 | PyArray_GETPTR4(%(output)s, b, c, i, j) = v; 322 | ''' % locals() 323 | return self._pool_c_code(node, name, input, output, body, sub) 324 | 325 | 326 | class IndexUnpool2D(UnpoolOp): 327 | def perform_unpool(self, unpool_vals, pool_val, inputs, b, c, i, j): 328 | unpool_vals.flat[int(inputs[1][b,c,i,j])] += pool_val 329 | 330 | def make_gpu_node(self, input, inds): 331 | return IndexUnpool2D_GPU(self.input_shape, self.winsize, self.stride, 332 | self.output_shape)(input, inds) 333 | 334 | def grad(self, (vals, inds), (doutput,)): 335 | return (IndexPool2D(self.output_shape, self.winsize, self.stride) 336 | (doutput, inds), 337 | DisconnectedType()(),) 338 | 339 | def c_support_code(self): 340 | code = ''' 341 | #define MIN(a,b) ((a) < (b) ? (a) : (b)) 342 | template 343 | inline void _add_ind( 344 | PyArrayObject *x, int b, int c, 345 | int istart, int iend, int jstart, int jend, 346 | int ind, T val) 347 | { 348 | int jlen = jend - jstart; 349 | int i = istart + ind / jlen; 350 | int j = jstart + ind % jlen; 351 | *(T*) PyArray_GETPTR4(x, b, c, i, j) += val; 352 | } 353 | ''' 354 | return code 355 | 356 | def c_code(self, node, name, (input, inds), (output,), sub): 357 | (unpooli, unpoolj) = self.output_shape 358 | (pooli, poolj) = self.input_shape 359 | (wi, wj) = self.winsize 360 | (si, sj) = self.stride 361 | fail = sub['fail'] 362 | 363 | code = ''' 364 | int istart, jstart; 365 | int ind; 366 | int bsize = PyArray_DIMS(%(input)s)[0]; 367 | int nchan = PyArray_DIMS(%(input)s)[1]; 368 | npy_intp dims[4] = {0, 0, %(unpooli)d, %(unpoolj)d}; 369 | dims[0] = bsize; 370 | dims[1] = nchan; 371 | dtype_%(output)s v; 372 | 373 | if (PyArray_NDIM(%(input)s) != 4) { 374 | PyErr_SetString(PyExc_ValueError, "input must be a 4d ndarray"); 375 | %(fail)s; 376 | } 377 | Py_XDECREF(%(output)s); 378 | %(output)s = (PyArrayObject*) PyArray_ZEROS( 379 | 4, dims, 380 | PyArray_ObjectType((PyObject*) %(input)s, 0), 381 | 0); 382 | 383 | for (int b = 0; b < bsize; ++b) { 384 | for (int c = 0; c < nchan; ++c) { 385 | for (int i = 0; i < %(pooli)d; ++i) { 386 | istart = i * %(si)d; 387 | for (int j = 0; j < %(poolj)d; ++j) { 388 | jstart = j * %(sj)d; 389 | 390 | ind = (int) *(dtype_%(inds)s*) 391 | PyArray_GETPTR4(%(inds)s, b, c, i, j); 392 | v = *(dtype_%(input)s*) 393 | PyArray_GETPTR4(%(input)s, b, c, i, j); 394 | _add_ind( 395 | %(output)s, b, c, 396 | istart, MIN(istart + %(wi)d, %(unpooli)d), 397 | jstart, MIN(jstart + %(wj)d, %(unpoolj)d), 398 | ind, v); 399 | } 400 | } 401 | } 402 | } 403 | ''' % locals() 404 | return code 405 | 406 | 407 | class SumPool2D(PoolOp): 408 | def __init__(self, *args, **kwargs): 409 | self.average = kwargs.pop('average', False) 410 | PoolOp.__init__(self, *args, **kwargs) 411 | self._hash_key = self._hash_key + (self.average,) 412 | 413 | def __eq__(self, other): 414 | return PoolOp.__eq__(self, other) and self.average == other.average 415 | 416 | def perform_pool(self, pool_vals, inputs, b, c, i, j): 417 | if self.average: 418 | return np.mean(pool_vals) 419 | return np.sum(pool_vals) 420 | 421 | def make_gpu_node(self, input): 422 | return SumPool2D_GPU(self.input_shape, self.winsize, self.stride, 423 | average=self.average)(input) 424 | 425 | def grad(self, (vals,), (dvals,)): 426 | return (SumUnpool2D(self.output_shape, self.winsize, self.stride, 427 | output_shape=self.input_shape, 428 | average=self.average) 429 | (dvals), 430 | ) 431 | 432 | def c_support_code(self): 433 | code = ''' 434 | template 435 | inline T _sum_window(PyArrayObject *x, int b, int c, 436 | int istart, int iend, int jstart, int jend) 437 | { 438 | T vsum = 0; 439 | for (int i = istart; i < iend; ++i) { 440 | for (int j = jstart; j < jend; ++j) { 441 | vsum += *(T*) PyArray_GETPTR4(x, b, c, i, j); 442 | } 443 | } 444 | if (average) 445 | vsum /= (iend - istart) * (jend - jstart); 446 | return vsum; 447 | } 448 | ''' 449 | return code 450 | 451 | def c_code(self, node, name, (input,), (output,), sub): 452 | output_type = {'int32': 'NPY_INT', 453 | 'float32': 'NPY_FLOAT32', 454 | 'float64': 'NPY_FLOAT64', 455 | }[self.output_dtype or node.inputs[0].dtype] 456 | average = int(self.average) 457 | body = ''' 458 | dtype_%(input)s v = _sum_window( 459 | %(input)s, b, c, istart, iend, jstart, jend); 460 | *(dtype_%(output)s*) PyArray_GETPTR4(%(output)s, b, c, i, j) 461 | = (dtype_%(output)s) v; 462 | ''' % locals() 463 | return self._pool_c_code(node, name, input, output, body, sub) 464 | 465 | 466 | class SumUnpool2D(UnpoolOp): 467 | def __init__(self, *args, **kwargs): 468 | self.average = kwargs.pop('average', False) 469 | UnpoolOp.__init__(self, *args, **kwargs) 470 | self._hash_key = self._hash_key + (self.average,) 471 | 472 | def __eq__(self, other): 473 | return UnpoolOp.__eq__(self, other) and self.average == other.average 474 | 475 | def perform_unpool(self, unpool_vals, pool_val, inputs, b, c, i, j): 476 | if self.average: 477 | unpool_vals += pool_val / float(unpool_vals.size) 478 | else: 479 | unpool_vals += pool_val 480 | 481 | def make_gpu_node(self, input): 482 | return SumUnpool2D_GPU(self.input_shape, self.winsize, self.stride, 483 | self.output_shape, 484 | average=self.average)(input) 485 | 486 | def grad(self, (vals,), (dvals,)): 487 | return (SumPool2D(self.output_shape, self.winsize, self.stride, 488 | average=self.average) 489 | (dvals), 490 | ) 491 | 492 | def c_support_code(self): 493 | code = ''' 494 | #define MIN(a,b) ((a) < (b) ? (a) : (b)) 495 | template 496 | inline void _add_val_to_window( 497 | PyArrayObject *x, int b, int c, 498 | int istart, int iend, int jstart, int jend, 499 | T val) 500 | { 501 | if (average) { 502 | val /= (T) ((iend - istart) * (jend - jstart)); 503 | } 504 | for (int i = istart; i < iend; ++i) { 505 | for (int j = jstart; j < jend; ++j) { 506 | *(T*) PyArray_GETPTR4(x, b, c, i, j) += val; 507 | } 508 | } 509 | } 510 | ''' 511 | return code 512 | 513 | def c_code(self, node, name, (input,), (output,), sub): 514 | (unpooli, unpoolj) = self.output_shape 515 | (pooli, poolj) = self.input_shape 516 | (wi, wj) = self.winsize 517 | (si, sj) = self.stride 518 | average = self.average 519 | fail = sub['fail'] 520 | 521 | code = ''' 522 | int istart, jstart; 523 | int bsize = PyArray_DIMS(%(input)s)[0]; 524 | int nchan = PyArray_DIMS(%(input)s)[1]; 525 | npy_intp dims[4] = {0, 0, %(unpooli)d, %(unpoolj)d}; 526 | dims[0] = bsize; 527 | dims[1] = nchan; 528 | dtype_%(output)s v; 529 | 530 | if (PyArray_NDIM(%(input)s) != 4) { 531 | PyErr_SetString(PyExc_ValueError, "input must be a 4d ndarray"); 532 | %(fail)s; 533 | } 534 | Py_XDECREF(%(output)s); 535 | %(output)s = (PyArrayObject*) PyArray_ZEROS( 536 | 4, dims, 537 | PyArray_ObjectType((PyObject*) %(input)s, 0), 538 | 0); 539 | 540 | for (int b = 0; b < bsize; ++b) { 541 | for (int c = 0; c < nchan; ++c) { 542 | for (int i = 0; i < %(pooli)d; ++i) { 543 | istart = i * %(si)d; 544 | for (int j = 0; j < %(poolj)d; ++j) { 545 | jstart = j * %(sj)d; 546 | 547 | v = *(dtype_%(input)s*) 548 | PyArray_GETPTR4(%(input)s, b, c, i, j); 549 | /* add val to all elements in this pooling window */ 550 | _add_val_to_window( 551 | %(output)s, b, c, 552 | istart, MIN(istart + %(wi)d, %(unpooli)d), 553 | jstart, MIN(jstart + %(wj)d, %(unpoolj)d), 554 | v); 555 | } 556 | } 557 | } 558 | } 559 | ''' % locals() 560 | return code 561 | 562 | 563 | class CMRNorm(Op): 564 | def __init__(self, input_shape, winsize, scale, pow, dtype=None): 565 | self.input_shape = tuple(input_shape) 566 | self.winsize = winsize 567 | self.scale = scale 568 | self.pow = pow 569 | self.output_dtype = dtype 570 | self.enable_grad = True 571 | self._hash_key = (type(self), self.__class__, self.input_shape, 572 | self.winsize, self.scale, self.pow, 573 | self.output_dtype) 574 | 575 | def __eq__(self, other): 576 | return (type(self) == type(other) and 577 | self.__class__ == other.__class__ and 578 | self.input_shape == other.input_shape and 579 | self.winsize == other.winsize and 580 | self.scale == other.scale and 581 | self.pow == other.pow and 582 | self.output_dtype == other.output_dtype) 583 | 584 | def __hash__(self): 585 | return hash(self._hash_key) 586 | 587 | def make_gpu_node(self, input): 588 | return CMRNorm_GPU(self.input_shape, 589 | self.winsize, self.scale, self.pow, 590 | dtype=self.output_dtype)(input) 591 | 592 | def infer_shape(self, node, input_shapes): 593 | return input_shapes 594 | 595 | def make_node(self, *inputs): 596 | inputs = tuple(map(T.as_tensor_variable, inputs)) 597 | output = T.tensor4(dtype=(self.output_dtype or inputs[0].dtype)) 598 | return Apply(self, inputs, (output,)) 599 | 600 | def perform(self, node, (input,), (output,)): 601 | (bsize, nchan, ni, nj) = input.shape 602 | output_dtype = self.output_dtype or node.inputs[0].dtype 603 | out = output[0] 604 | if out is None or out.dtype != output_dtype or \ 605 | out.shape != (bsize, nchan, ni, ni): 606 | out = output[0] = np.empty((bsize, nchan, ni, nj), 607 | dtype=output_dtype) 608 | x = input 609 | x2 = x ** 2 610 | sums = np.zeros_like(x) 611 | for p in xrange(self.winsize): 612 | d = p - (self.winsize//2) 613 | sums[:,max(0,-d):min(nchan,nchan-d),:,:] += \ 614 | x2[:,max(0,d):min(nchan,nchan+d),:,:] 615 | out[:] = x * ((2 + self.scale * sums) ** (-self.pow)) 616 | 617 | def grad(self, (x,), (dy,)): 618 | if not self.enable_grad: 619 | return [dy] 620 | return (CMRNormGrad(self.input_shape, 621 | self.winsize, self.scale, self.pow, 622 | dtype=self.output_dtype) 623 | (x, self(x), dy), 624 | ) 625 | 626 | class CMRNormGrad(CMRNorm): 627 | def perform(self, node, (x, y, dy), (output,)): 628 | (bsize, nchan, ni, nj) = x.shape 629 | output_dtype = self.output_dtype or node.inputs[0].dtype 630 | dx = output[0] 631 | if dx is None or dx.dtype != output_dtype or \ 632 | dx.shape != (bsize, nchan, ni, ni): 633 | dx = output[0] = np.empty((bsize, nchan, ni, nj), 634 | dtype=output_dtype) 635 | x2 = x ** 2 636 | sums = np.zeros_like(x) 637 | for p in xrange(self.winsize): 638 | d = p - (self.winsize//2) 639 | sums[:,max(0,-d):min(nchan,nchan-d),:,:] += \ 640 | x2[:,max(0,d):min(nchan,nchan+d),:,:] 641 | denom = (2 + self.scale * sums) ** (-self.pow) 642 | a = (-2 * self.scale * self.pow) * y * denom 643 | dx[:] = 0 644 | x_dy = x * dy 645 | for p in xrange(self.winsize): 646 | d = p - (self.winsize//2) 647 | # slices of "convolution" window sliding 648 | lhs = slice(max(0,-d), min(nchan,nchan-d)) 649 | rhs = slice(max(0,d), min(nchan,nchan+d)) 650 | dx[:,lhs,:,:] += x_dy[:,rhs,:,:] 651 | dx *= a 652 | dx += dy * denom 653 | 654 | def make_gpu_node(self, *inputs): 655 | return CMRNormGrad_GPU(self.input_shape, 656 | self.winsize, self.scale, self.pow, 657 | dtype=self.output_dtype)(*inputs) 658 | 659 | def infer_shape(self, node, input_shapes): 660 | return (input_shapes[0],) 661 | 662 | def grad(self, inputs, doutputs): 663 | raise NotImplementedError 664 | 665 | 666 | if thutil.use_gpu: 667 | 668 | source_support_defs = ''' 669 | #define MOD % 670 | #define MIN(a,b) ((a) < (b) ? (a) : (b)) 671 | #define IDX4(n1, n2, n3, n4, i1, i2, i3, i4) \\ 672 | ((i1)*(n2)*(n3)*(n4) + (i2)*(n3)*(n4) + (i3)*(n4) + (i4)) 673 | #define IDX3(n1, n2, n3, i1, i2, i3) \\ 674 | ((i1)*(n2)*(n3) + (i2)*(n3) + (i3)) 675 | #define UNRAVEL_IDX4(ind, n1, n2, n3, n4, i1, i2, i3, i4) \\ 676 | { \\ 677 | i1 = (ind) / ((n2)*(n3)*(n4)); \\ 678 | i2 = ((ind) MOD ((n2)*(n3)*(n4))) / ((n3)*(n4)); \\ 679 | i3 = ((ind) MOD ((n3)*(n4))) / (n4); \\ 680 | i4 = ((ind) MOD (n4)); \\ 681 | } 682 | #define SETIF(var, val, cond) \ 683 | (var = (val)*!!(cond) + (var)*!(cond)) 684 | #define DIVUP(x,y) (1 + (((x) - 1) / (y))) 685 | 686 | static void launch_sizes(int nthreads, dim3 &grid_size, dim3 &block_size) 687 | { 688 | static const int min_threads = 16; 689 | static const int max_threads = 256; 690 | static const int max_blocks = 65535; 691 | 692 | int ngroups = (nthreads + min_threads - 1) / min_threads; 693 | 694 | if (ngroups == 1) { 695 | grid_size = dim3(1); 696 | block_size = dim3(min_threads); 697 | } else if (nthreads < max_blocks * min_threads) { 698 | grid_size = dim3(ngroups); 699 | block_size = dim3(min_threads); 700 | } else if (nthreads < max_blocks * max_threads) { 701 | grid_size = dim3(max_blocks); 702 | block_size = dim3((ngroups + max_blocks - 1) 703 | / max_blocks * min_threads); 704 | } else { 705 | grid_size = dim3(max_blocks); 706 | block_size = dim3(max_threads); 707 | } 708 | } 709 | ''' 710 | 711 | class PoolGpuOp(GpuOp): 712 | def make_node(self, *inputs): 713 | output = CudaNdarrayType((False,) * 4)() 714 | return Apply(self, inputs, (output,)) 715 | 716 | def c_support_code(self): 717 | if self.is_pooling: 718 | unpooled_shape = self.input_shape 719 | pooled_shape = self.output_shape 720 | pooled_stride_i = 1 721 | pooled_stride_j = 1 722 | else: 723 | unpooled_shape = self.output_shape 724 | pooled_shape = self.input_shape 725 | pooled_stride_i = 'DIVUP(wsize_i, stride_i)' 726 | pooled_stride_j = 'DIVUP(wsize_j, stride_j)' 727 | assert unpooled_shape[0] >= (pooled_shape[0] - 1) * self.stride[0] + self.winsize[0] 728 | assert unpooled_shape[1] >= (pooled_shape[1] - 1) * self.stride[1] + self.winsize[1] 729 | 730 | source = source_support_defs + ''' 731 | 732 | #define unpooled_i %(unpooled_shape[0])d 733 | #define unpooled_j %(unpooled_shape[1])d 734 | #define pooled_i %(pooled_shape[0])d 735 | #define pooled_j %(pooled_shape[1])d 736 | #define wsize_i %(self.winsize[0])d 737 | #define wsize_j %(self.winsize[1])d 738 | #define stride_i %(self.stride[0])d 739 | #define stride_j %(self.stride[1])d 740 | #define pooled_stride_i %(pooled_stride_i)s 741 | #define pooled_stride_j %(pooled_stride_j)s 742 | #define pooled_si DIVUP(pooled_i, pooled_stride_i) 743 | #define pooled_sj DIVUP(pooled_j, pooled_stride_j) 744 | 745 | #define ntiles_per_call (pooled_si * pooled_sj) 746 | static __global__ void %(self.ker_name)s ( %(self.ker_args)s, 747 | float *out, 748 | int nimgs, 749 | int pooled_start_i, 750 | int pooled_start_j, 751 | uint32_t randseed) 752 | { 753 | unsigned total_threads = gridDim.x * blockDim.x; 754 | int tid = blockIdx.x * blockDim.x + threadIdx.x; 755 | int t, tnum, img, tile; 756 | int pi, pj, ui0, uj0; 757 | 758 | %(self.ker_defs)s 759 | 760 | for (t = tid; t < nimgs * ntiles_per_call; t += total_threads) { 761 | tnum = t; /* tile number */ 762 | img = tnum / ntiles_per_call; /* image the tile is in */ 763 | tile = tnum MOD ntiles_per_call; /* tile in image */ 764 | pi = (tile / pooled_sj); /* pooled pixel indices */ 765 | pj = (tile MOD pooled_sj); 766 | pi = pi * pooled_stride_i + pooled_start_i; 767 | pj = pj * pooled_stride_j + pooled_start_j; 768 | if (pi >= pooled_i || pj >= pooled_j) 769 | continue; 770 | ui0 = pi * stride_i; /* unpooled window top-left pixel */ 771 | uj0 = pj * stride_j; 772 | 773 | %(self.ker_loop_body)s 774 | } 775 | } 776 | 777 | ''' % Eval() 778 | return source 779 | 780 | def c_code(self, node, nodename, inputs, outputs, sub): 781 | (output,) = outputs 782 | 783 | source = ''' 784 | const int *input_dims = CudaNdarray_HOST_DIMS(%(inputs[0])s); 785 | const int *output_dims = %(output)s ? 786 | CudaNdarray_HOST_DIMS(%(output)s) : 787 | NULL; 788 | const int dims[] = { input_dims[0], 789 | input_dims[1], 790 | %(self.output_shape[0])s, 791 | %(self.output_shape[1])s }; 792 | const int nimgs = dims[0] * dims[1]; /* imgs * channels */ 793 | 794 | int ntiles = nimgs * ntiles_per_call; /* one thread per tile */ 795 | dim3 grid_size, block_size; 796 | launch_sizes(ntiles, grid_size, block_size); 797 | 798 | CudaNdarray %(', '.join('*%s_contig' % inp for inp in inputs))s ; 799 | cudaError_t err; 800 | ''' 801 | 802 | source += ''' 803 | if (%(output)s == NULL 804 | || !CudaNdarray_is_c_contiguous(%(output)s) 805 | || %(output)s->nd != 4 806 | || dims[0] != output_dims[0] 807 | || dims[1] != output_dims[1] 808 | || dims[2] != output_dims[2] 809 | || dims[3] != output_dims[3]) { 810 | 811 | Py_XDECREF(%(output)s); 812 | %(output)s = (CudaNdarray*)CudaNdarray_New(); 813 | if (%(output)s == NULL 814 | || CudaNdarray_alloc_contiguous(%(output)s, 4, dims)) { 815 | Py_XDECREF(%(output)s); 816 | %(output)s = NULL; 817 | %(sub['fail'])s; 818 | } 819 | } 820 | 821 | if (%(self.zero_output)d) { 822 | if (cudaMemset( 823 | CudaNdarray_DEV_DATA(%(output)s), 824 | 0, CudaNdarray_SIZE(%(output)s) * sizeof(float)) 825 | != cudaSuccess) { 826 | PyErr_Format(PyExc_MemoryError, 827 | "%(self.ker_name)s: Error in memset"); 828 | Py_XDECREF(%(output)s); 829 | %(output)s = NULL; 830 | %(sub['fail'])s; 831 | } 832 | 833 | } 834 | ''' 835 | 836 | for inp in inputs: 837 | source += ''' 838 | %(inp)s_contig = %(inp)s; 839 | if (!CudaNdarray_is_c_contiguous(%(inp)s)) { 840 | %(inp)s_contig = (CudaNdarray*) CudaNdarray_Copy(%(inp)s); 841 | assert(CudaNdarray_is_c_contiguous(%(inp)s_contig)); 842 | } 843 | ''' % Eval() 844 | 845 | source += ''' 846 | /* call kernel once for each offset within the pooled stride */ 847 | for (int j = 0; j < pooled_stride_j; ++j) { 848 | for (int i = 0; i < pooled_stride_i; ++i) { 849 | %(self.ker_name)s <<>> ( 850 | %(', '.join('CudaNdarray_DEV_DATA(%s_contig)' % x 851 | for x in inputs))s, 852 | CudaNdarray_DEV_DATA(%(output)s), 853 | nimgs, 854 | i, j, 855 | rand() 856 | ); 857 | } 858 | } 859 | CNDA_THREAD_SYNC; 860 | ''' 861 | 862 | for inp in inputs: 863 | source += ''' 864 | if (%(inp)s_contig != %(inp)s) { 865 | Py_DECREF(%(inp)s_contig); 866 | } 867 | ''' % Eval() 868 | 869 | source += ''' 870 | err = cudaGetLastError(); 871 | 872 | if (err != cudaSuccess) { 873 | PyErr_Format(PyExc_RuntimeError, 874 | "Cuda error: %%s: %%s", 875 | "%(self.ker_name)s", cudaGetErrorString(err)); 876 | %(sub['fail'])s 877 | } 878 | ''' 879 | 880 | source = source % Eval() 881 | return source 882 | 883 | 884 | class MaxInds2D_GPU(PoolGpuOp, MaxInds2D): 885 | ker_name = 'pool_maxind' 886 | 887 | ker_args = 'float *X' 888 | 889 | ker_defs = ''' 890 | int u, ui, uj, ismax, max_ind; 891 | float val, max_val; 892 | ''' 893 | 894 | zero_output = False 895 | 896 | ker_loop_body = ''' 897 | max_val = -1000000; 898 | for (u = 0; u < wsize_i * wsize_j; ++u) { 899 | ui = ui0 + u / wsize_j; 900 | uj = uj0 + u MOD wsize_j; 901 | if ((ui < unpooled_i) && (uj < unpooled_j)) { 902 | val = X[IDX3(nimgs, unpooled_i, unpooled_j, 903 | img, ui, uj)]; 904 | ismax = (val > max_val); 905 | SETIF(max_val, val, ismax); 906 | SETIF(max_ind, u, ismax); 907 | } 908 | } 909 | 910 | out[IDX3(nimgs, pooled_i, pooled_j, 911 | img, pi, pj)] 912 | = max_ind; 913 | ''' 914 | 915 | def perform(self, node, (input,), (output,)): 916 | output_host = [None] 917 | MaxInds2D.perform(self, node, (np.array(input),), (output_host,)) 918 | output[0] = CudaNdarray(output_host[0].astype(np.float32)) 919 | 920 | 921 | class IndexPool2D_GPU(PoolGpuOp, IndexPool2D): 922 | ker_name = 'pool_index' 923 | 924 | ker_args = 'float *input, float *inds' 925 | 926 | ker_defs = ''' 927 | int ui, uj, ind; 928 | float val; 929 | ''' 930 | 931 | zero_output = False 932 | 933 | ker_loop_body = ''' 934 | ind = (int) inds[IDX3(nimgs, pooled_i, pooled_j, 935 | img, pi, pj)]; 936 | 937 | ui = ui0 + ind / wsize_j; 938 | uj = uj0 + ind MOD wsize_j; 939 | val = input[IDX3(nimgs, unpooled_i, unpooled_j, 940 | img, ui, uj)]; 941 | 942 | out[IDX3(nimgs, pooled_i, pooled_j, 943 | img, pi, pj)] = val; 944 | ''' 945 | 946 | def perform(self, node, inputs, (output,)): 947 | output_host = [None] 948 | IndexPool2D.perform(self, node, 949 | map(np.array, inputs), (output_host,)) 950 | output[0] = CudaNdarray(output_host[0].astype(np.float32)) 951 | 952 | 953 | class IndexUnpool2D_GPU(PoolGpuOp, IndexUnpool2D): 954 | ker_name = 'unpool_index' 955 | 956 | ker_args = 'float *input, float *inds' 957 | 958 | ker_defs = ''' 959 | int ui, uj, ind; 960 | float val; 961 | ''' 962 | 963 | zero_output = True 964 | 965 | ker_loop_body = ''' 966 | ind = (int) inds[IDX3(nimgs, pooled_i, pooled_j, 967 | img, pi, pj)]; 968 | val = input[IDX3(nimgs, pooled_i, pooled_j, 969 | img, pi, pj)]; 970 | 971 | ui = ui0 + ind / wsize_j; 972 | uj = uj0 + ind MOD wsize_j; 973 | out[IDX3(nimgs, unpooled_i, unpooled_j, 974 | img, ui, uj)] += val; 975 | ''' 976 | 977 | def perform(self, node, inputs, (output,)): 978 | output_host = [None] 979 | IndexUnpool2D.perform(self, node, 980 | map(np.array, inputs), (output_host,)) 981 | output[0] = CudaNdarray(output_host[0].astype(np.float32)) 982 | 983 | 984 | class SumPool2D_GPU(PoolGpuOp, SumPool2D): 985 | ker_name = 'pool_sum' 986 | 987 | ker_args = 'float *X' 988 | 989 | ker_defs = ''' 990 | int u, ui, uj, usize_i, usize_j; 991 | float vsum; 992 | ''' 993 | 994 | zero_output = False 995 | 996 | def __init__(self, *args, **kwargs): 997 | super(SumPool2D_GPU, self).__init__(*args, **kwargs) 998 | 999 | self.ker_loop_body = ''' 1000 | vsum = 0; 1001 | for (u = 0; u < wsize_i * wsize_j; ++u) { 1002 | ui = ui0 + u / wsize_j; 1003 | uj = uj0 + u MOD wsize_j; 1004 | if (ui < unpooled_i && uj < unpooled_j) 1005 | vsum += X[IDX3(nimgs, unpooled_i, unpooled_j, 1006 | img, ui, uj)]; 1007 | } 1008 | 1009 | if (%(average)s) { 1010 | usize_i = MIN(ui0 + wsize_i, unpooled_i) - ui0; 1011 | usize_j = MIN(uj0 + wsize_j, unpooled_j) - uj0; 1012 | vsum /= (usize_i * usize_j); 1013 | } 1014 | 1015 | out[IDX3(nimgs, pooled_i, pooled_j, 1016 | img, pi, pj)] 1017 | = vsum; 1018 | ''' % {'average': int(self.average)} 1019 | 1020 | def perform(self, node, (input,), (output,)): 1021 | output_host = [None] 1022 | SumPool2D.perform(self, node, (np.array(input),), (output_host,)) 1023 | output[0] = CudaNdarray(output_host[0].astype(np.float32)) 1024 | 1025 | 1026 | class SumUnpool2D_GPU(PoolGpuOp, SumUnpool2D): 1027 | ker_name = 'unpool_sum' 1028 | 1029 | ker_args = 'float *input' 1030 | 1031 | ker_defs = ''' 1032 | int u, ui, uj, usize_i, usize_j; 1033 | float val; 1034 | ''' 1035 | 1036 | zero_output = True 1037 | 1038 | def __init__(self, *args, **kwargs): 1039 | super(SumUnpool2D_GPU, self).__init__(*args, **kwargs) 1040 | 1041 | self.ker_loop_body = ''' 1042 | val = input[IDX3(nimgs, pooled_i, pooled_j, 1043 | img, pi, pj)]; 1044 | if (%(average)d) { /* average? */ 1045 | usize_i = MIN(ui0 + wsize_i, unpooled_i) - ui0; 1046 | usize_j = MIN(uj0 + wsize_j, unpooled_j) - uj0; 1047 | val /= (float) (usize_i * usize_j); 1048 | } 1049 | 1050 | for (u = 0; u < wsize_i * wsize_j; ++u) { 1051 | ui = ui0 + u / wsize_j; 1052 | uj = uj0 + u MOD wsize_j; 1053 | if (ui < unpooled_i && uj < unpooled_j) 1054 | out[IDX3(nimgs, unpooled_i, unpooled_j, 1055 | img, ui, uj)] += val; 1056 | } 1057 | ''' % {'average': int(self.average)} 1058 | 1059 | def perform(self, node, (input,), (output,)): 1060 | output_host = [None] 1061 | SumUnpool2D.perform(self, node, (np.array(input),), (output_host,)) 1062 | output[0] = CudaNdarray(output_host[0].astype(np.float32)) 1063 | 1064 | 1065 | class CMRNorm_GPU(GpuOp, CMRNorm): 1066 | def __init__(self, *args, **kwargs): 1067 | CMRNorm.__init__(self, *args, **kwargs) 1068 | self._define_kernel_code() 1069 | 1070 | def make_node(self, *inputs): 1071 | output = CudaNdarrayType((False,) * 4)() 1072 | return Apply(self, inputs, (output,)) 1073 | 1074 | def perform(self, node, inputs, (output,)): 1075 | output_host = [None] 1076 | CMRNorm.perform(self, node, 1077 | map(np.array, inputs), (output_host,)) 1078 | output[0] = CudaNdarray(output_host[0].astype(np.float32)) 1079 | 1080 | def c_support_code(self): 1081 | source = source_support_defs + ''' 1082 | 1083 | static __global__ void %(self.ker_name)s ( %(self.ker_args)s, 1084 | float *output, 1085 | int nimgs, 1086 | int nchan, 1087 | int ni, int nj) 1088 | { 1089 | unsigned total_threads = gridDim.x * blockDim.x; 1090 | int tid = blockIdx.x * blockDim.x + threadIdx.x; 1091 | int t, img, chan, i, j; 1092 | 1093 | for (t = tid; t < nchan * ni * nj * nimgs; t += total_threads) { 1094 | UNRAVEL_IDX4(t, nimgs, nchan, ni, nj, 1095 | img, chan, i, j); 1096 | 1097 | %(self.ker_loop_body)s 1098 | } 1099 | } 1100 | 1101 | ''' % Eval() 1102 | return source 1103 | 1104 | zero_output = False 1105 | 1106 | ker_name = 'cmrnorm' 1107 | 1108 | ker_args = 'float *input' 1109 | 1110 | def _define_kernel_code(self): 1111 | self.ker_loop_body = ''' 1112 | 1113 | int d; 1114 | float sum = 0; 1115 | float x; 1116 | 1117 | for (d = -%(self.winsize / 2)d; d <= %(self.winsize / 2)d; ++d) { 1118 | if (chan + d >= 0 && chan + d < nchan) { 1119 | x = input[IDX4(nimgs, nchan, ni, nj, 1120 | img, chan + d, i, j)]; 1121 | sum += x * x; 1122 | } 1123 | } 1124 | 1125 | x = input[IDX4(nimgs, nchan, ni, nj, 1126 | img, chan, i, j)]; 1127 | output[IDX4(nimgs, nchan, ni, nj, 1128 | img, chan, i, j)] 1129 | = x * __powf(2 + %(self.scale)s * sum, -%(self.pow)s); 1130 | 1131 | ''' % Eval() 1132 | 1133 | def c_code_cache_version(self): 1134 | return (1, hash(self)) 1135 | 1136 | def c_code(self, node, nodename, inputs, outputs, sub): 1137 | (output,) = outputs 1138 | 1139 | source = ''' 1140 | const int *dims = CudaNdarray_HOST_DIMS(%(inputs[0])s); 1141 | const int *output_dims = %(output)s ? 1142 | CudaNdarray_HOST_DIMS(%(output)s) : 1143 | NULL; 1144 | 1145 | const int nchan = dims[0]; 1146 | const int ni = dims[1]; 1147 | const int nj = dims[2]; 1148 | const int nimgs = dims[3]; 1149 | 1150 | int nelems = nimgs * nchan * ni * nj; /* one thread elem */ 1151 | dim3 grid_size, block_size; 1152 | launch_sizes(nelems, grid_size, block_size); 1153 | 1154 | CudaNdarray %(', '.join('*%s_contig' % inp for inp in inputs))s ; 1155 | cudaError_t err; 1156 | ''' 1157 | 1158 | source += ''' 1159 | if (%(output)s == NULL 1160 | || !CudaNdarray_is_c_contiguous(%(output)s) 1161 | || %(output)s->nd != 4 1162 | || dims[0] != output_dims[0] 1163 | || dims[1] != output_dims[1] 1164 | || dims[2] != output_dims[2] 1165 | || dims[3] != output_dims[3]) { 1166 | 1167 | Py_XDECREF(%(output)s); 1168 | %(output)s = (CudaNdarray*)CudaNdarray_New(); 1169 | if (%(output)s == NULL 1170 | || CudaNdarray_alloc_contiguous(%(output)s, 4, dims)) { 1171 | Py_XDECREF(%(output)s); 1172 | %(output)s = NULL; 1173 | %(sub['fail'])s; 1174 | } 1175 | } 1176 | 1177 | if (%(self.zero_output)d) { 1178 | if (cudaMemset( 1179 | CudaNdarray_DEV_DATA(%(output)s), 1180 | 0, CudaNdarray_SIZE(%(output)s) * sizeof(float)) 1181 | != cudaSuccess) { 1182 | PyErr_Format(PyExc_MemoryError, 1183 | "%(self.ker_name)s: Error in memset"); 1184 | Py_XDECREF(%(output)s); 1185 | %(output)s = NULL; 1186 | %(sub['fail'])s; 1187 | } 1188 | 1189 | } 1190 | ''' 1191 | 1192 | for inp in inputs: 1193 | source += ''' 1194 | %(inp)s_contig = %(inp)s; 1195 | if (!CudaNdarray_is_c_contiguous(%(inp)s)) { 1196 | %(inp)s_contig = (CudaNdarray*) CudaNdarray_Copy(%(inp)s); 1197 | assert(CudaNdarray_is_c_contiguous(%(inp)s_contig)); 1198 | } 1199 | ''' % Eval() 1200 | 1201 | source += ''' 1202 | %(self.ker_name)s <<>> ( 1203 | %(', '.join('CudaNdarray_DEV_DATA(%s_contig)' % x 1204 | for x in inputs))s, 1205 | CudaNdarray_DEV_DATA(%(output)s), 1206 | nimgs, nchan, ni, nj 1207 | ); 1208 | 1209 | CNDA_THREAD_SYNC; 1210 | ''' 1211 | 1212 | for inp in inputs: 1213 | source += ''' 1214 | if (%(inp)s_contig != %(inp)s) { 1215 | Py_DECREF(%(inp)s_contig); 1216 | } 1217 | ''' % Eval() 1218 | 1219 | source += ''' 1220 | err = cudaGetLastError(); 1221 | 1222 | if (err != cudaSuccess) { 1223 | PyErr_Format(PyExc_RuntimeError, 1224 | "Cuda error: %%s: %%s", 1225 | "%(self.ker_name)s", cudaGetErrorString(err)); 1226 | %(sub['fail'])s 1227 | } 1228 | ''' 1229 | 1230 | source = source % Eval() 1231 | return source 1232 | 1233 | class CMRNormGrad_GPU(CMRNorm_GPU, CMRNormGrad): 1234 | def __init__(self, *args, **kwargs): 1235 | CMRNormGrad.__init__(self, *args, **kwargs) 1236 | self._define_kernel_code() 1237 | 1238 | def perform(self, node, inputs, (output,)): 1239 | output_host = [None] 1240 | CMRNormGrad.perform(self, node, 1241 | map(np.array, inputs), (output_host,)) 1242 | output[0] = CudaNdarray(output_host[0].astype(np.float32)) 1243 | 1244 | zero_output = False 1245 | 1246 | ker_name = 'cmrnormgrad' 1247 | 1248 | ker_args = 'float *input, float *ys, float *dys' 1249 | 1250 | def _define_kernel_code(self): 1251 | self.ker_loop_body = ''' 1252 | 1253 | int d; 1254 | float sum = 0; 1255 | float x, denom, a, y, dx, x_d, dy_d; 1256 | 1257 | for (d = -%(self.winsize / 2)d; d <= %(self.winsize / 2)d; ++d) { 1258 | if (chan + d >= 0 && chan + d < nchan) { 1259 | x_d = input[IDX4(nimgs, nchan, ni, nj, 1260 | img, chan + d, i, j)]; 1261 | sum += x_d * x_d; 1262 | } 1263 | } 1264 | 1265 | x = input[IDX4(nimgs, nchan, ni, nj, 1266 | img, chan, i, j)]; 1267 | y = ys[IDX4(nimgs, nchan, ni, nj, 1268 | img, chan, i, j)]; 1269 | 1270 | denom = __powf(2 + %(self.scale)s * sum, -%(self.pow)s); 1271 | a = (-2 * %(self.scale)s * %(self.pow)s) * y * denom; 1272 | 1273 | dx = 0; 1274 | for (d = -%(self.winsize / 2)d; d <= %(self.winsize / 2)d; ++d) { 1275 | if (chan + d >= 0 && chan + d < nchan) { 1276 | x_d = input[IDX4(nimgs, nchan, ni, nj, 1277 | img, chan + d, i, j)]; 1278 | dy_d = dys[IDX4(nimgs, nchan, ni, nj, 1279 | img, chan + d, i, j)]; 1280 | dx += x_d * dy_d; 1281 | } 1282 | } 1283 | 1284 | dx *= a; 1285 | dx += denom * dys[IDX4(nimgs, nchan, ni, nj, 1286 | img, chan, i, j)]; 1287 | 1288 | output[IDX4(nimgs, nchan, ni, nj, 1289 | img, chan, i, j)] = dx; 1290 | 1291 | ''' % Eval() 1292 | 1293 | 1294 | def test_pooling(): 1295 | from theano.tests.unittest_tools import verify_grad 1296 | 1297 | winsize = (5,5) 1298 | stride = (3,3) 1299 | 1300 | xtest = np.random.rand(3,2,16,30) 1301 | xtest = xtest.astype(theano.config.floatX) 1302 | 1303 | x = T.tensor4('x', dtype=theano.config.floatX) 1304 | x.tag.test_value = xtest 1305 | 1306 | # max pool/unpool 1307 | 1308 | xinds = maxinds_2d(x, winsize, stride=stride) 1309 | indf = theano.function([x], xinds, mode='DEBUG_MODE') 1310 | theano.printing.debugprint(indf) 1311 | xinds_val = indf(xtest) 1312 | 1313 | xshape = xtest.shape[-2:] 1314 | 1315 | xmax = index_pool_2d(x, xinds, winsize, stride=stride) 1316 | poolf = theano.function([x], xmax, mode='DEBUG_MODE') 1317 | theano.printing.debugprint(poolf) 1318 | xmax_val = poolf(xtest) 1319 | 1320 | unpoolf = theano.function([x], index_unpool_2d(xmax, xinds, winsize, 1321 | stride=stride, 1322 | input_shape=xmax_val.shape[-2:], 1323 | output_shape=xshape), 1324 | mode='DEBUG_MODE') 1325 | theano.printing.debugprint(unpoolf) 1326 | ux_val = unpoolf(xtest) 1327 | if stride == winsize: 1328 | assert np.sum(xtest == ux_val) == np.prod(xmax_val.shape) 1329 | 1330 | # sum pool/unpool 1331 | 1332 | xsum = sumpool2d(x, winsize, stride) 1333 | poolf = theano.function([x], xsum, mode='DEBUG_MODE') 1334 | theano.printing.debugprint(poolf) 1335 | xsum_val = poolf(xtest) 1336 | assert xsum_val.shape == xmax_val.shape 1337 | 1338 | xavg = sumpool2d(x, winsize, stride, average=True) 1339 | poolf = theano.function([x], xavg, mode='DEBUG_MODE') 1340 | theano.printing.debugprint(poolf) 1341 | xavg_val = poolf(xtest) 1342 | assert xavg_val.shape == xsum_val.shape 1343 | 1344 | unpoolf = theano.function([x], sum_unpool_2d(xsum, winsize, stride, 1345 | input_shape=xsum_val.shape[-2:], 1346 | output_shape=xshape), 1347 | mode='DEBUG_MODE') 1348 | theano.printing.debugprint(unpoolf) 1349 | ux_val = unpoolf(xtest) 1350 | 1351 | 1352 | T.verify_grad(lambda x: sumpool2d(x, winsize=winsize, stride=stride, 1353 | input_shape=(16,30)), 1354 | (xtest,), 1355 | rng=np.random.RandomState(0)) 1356 | 1357 | T.verify_grad(lambda xsum: sum_unpool_2d(xsum, 1358 | winsize=winsize, stride=stride, 1359 | input_shape=xsum_val.shape[-2:], 1360 | output_shape=xshape), 1361 | (xsum_val,), 1362 | rng=np.random.RandomState(0)) 1363 | 1364 | T.verify_grad(lambda x: sumpool2d(x, winsize=winsize, stride=stride, 1365 | average=True, 1366 | input_shape=(16,30)), 1367 | (xtest,), 1368 | rng=np.random.RandomState(0)) 1369 | 1370 | T.verify_grad(lambda x: index_pool_2d(x, xinds_val, 1371 | winsize=winsize, stride=stride, 1372 | input_shape=(16,30))[0], 1373 | (xtest,), 1374 | rng=np.random.RandomState(0)) 1375 | 1376 | T.verify_grad(lambda xmax: index_unpool_2d(xmax, xinds_val, 1377 | winsize=winsize, 1378 | stride=stride, 1379 | input_shape=xmax_val.shape[-2:], 1380 | output_shape=(16,30)), 1381 | (xmax_val,), 1382 | rng=np.random.RandomState(0)) 1383 | 1384 | def test_cmrnorm(): 1385 | from theano.tests.unittest_tools import verify_grad 1386 | 1387 | xtest = np.random.rand(2,8,3,4) 1388 | xtest = xtest.astype(theano.config.floatX) 1389 | 1390 | x = T.tensor4('x', dtype=theano.config.floatX) 1391 | x.tag.test_value = xtest 1392 | 1393 | y = cmrnorm(x, input_shape=xtest.shape[1:]) 1394 | f = theano.function([x], y, mode='DEBUG_MODE') 1395 | f(xtest) 1396 | 1397 | f = theano.function([x], gpu_from_host(T.grad(T.sum(y), wrt=x)), 1398 | mode='DEBUG_MODE') 1399 | f(xtest) 1400 | theano.printing.debugprint(f) 1401 | 1402 | T.verify_grad(lambda x: cmrnorm(x, input_shape=xtest.shape[1:]), 1403 | (xtest,), 1404 | rng=np.random.RandomState(0)) 1405 | 1406 | print 'cmrnorm passed' 1407 | 1408 | if __name__ == '__main__': 1409 | test_pooling() 1410 | test_cmrnorm() 1411 | -------------------------------------------------------------------------------- /pooling.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hjimce/Depth-Map-Prediction/fea99a9b52648820c6c8dd0374b9b06117a5124b/pooling.pyc -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | import os 3 | import sys 4 | import numpy as np 5 | 6 | from PIL import Image 7 | 8 | import net 9 | 10 | def main(): 11 | # location of depth module, config and parameters 12 | module_fn = 'models/depth.py' 13 | config_fn = 'models/depth.conf'#网络结构 14 | params_dir = 'weights/depth'#网络相关参数 15 | 16 | # load depth network 17 | machine = net.create_machine(module_fn, config_fn, params_dir) 18 | 19 | # demo image 20 | rgb = Image.open('demo_nyud_rgb.jpg') 21 | rgb = rgb.resize((320, 240), Image.BICUBIC) 22 | 23 | # build depth inference function and run 24 | rgb_imgs = np.asarray(rgb).reshape((1, 240, 320, 3)) 25 | pred_depths = machine.infer_depth(rgb_imgs) 26 | 27 | # save prediction 28 | (m, M) = (pred_depths.min(), pred_depths.max()) 29 | depth_img_np = (pred_depths[0] - m) / (M - m) 30 | depth_img = Image.fromarray((255*depth_img_np).astype(np.uint8)) 31 | depth_img.save('demo_nyud_depth_prediction.png') 32 | 33 | 34 | if __name__ == '__main__': 35 | main() -------------------------------------------------------------------------------- /theano_test_value_size.patch: -------------------------------------------------------------------------------- 1 | diff --git a/theano/configdefaults.py b/theano/configdefaults.py 2 | index 58ed2e9..97d6564 100644 3 | --- a/theano/configdefaults.py 4 | +++ b/theano/configdefaults.py 5 | @@ -470,6 +470,14 @@ AddConfigVar('compute_test_value', 6 | EnumStr('off', 'ignore', 'warn', 'raise', 'pdb'), 7 | in_c_key=False) 8 | 9 | +AddConfigVar('store_test_value_maxsize', 10 | + ("Maximum size for test values that are kept. If compute_test_value " 11 | + "is enabled, keeps test values smaller than the given size (in " 12 | + "number of entries). Beyond that, only the shape is stored; a " 13 | + "an array with the same shape and type is created on demand, filled " 14 | + "with a single random entry from the array."), 15 | + IntParam(sys.maxint), 16 | + in_c_key=False) 17 | 18 | AddConfigVar('compute_test_value_opt', 19 | ("For debugging Theano optimization only." 20 | diff --git a/theano/gof/op.py b/theano/gof/op.py 21 | index ac85eec..a306077 100644 22 | --- a/theano/gof/op.py 23 | +++ b/theano/gof/op.py 24 | @@ -18,6 +18,7 @@ import numpy 25 | import os 26 | import sys 27 | import warnings 28 | +import numpy 29 | 30 | import theano 31 | from theano import config 32 | @@ -461,6 +462,10 @@ class PureOp(object): 33 | elif isinstance(v, graph.Variable) and hasattr(v.tag, 'test_value'): 34 | # ensure that the test value is correct 35 | return v.type.filter(v.tag.test_value) 36 | + elif isinstance(v, graph.Variable) and hasattr(v.tag, 'test_shape'): 37 | + test_value = numpy.empty(v.tag.test_shape, dtype=v.type.dtype) 38 | + test_value.fill(v.tag.test_value_fill) 39 | + return v.type.filter(test_value, strict=False, allow_downcast=True) 40 | 41 | raise AttributeError('%s has no test value' % v) 42 | 43 | @@ -552,7 +557,14 @@ class PureOp(object): 44 | 45 | # add 'test_value' to output tag, so that downstream ops can use these 46 | # numerical values as inputs to their perform method. 47 | - output.tag.test_value = storage_map[output][0] 48 | + test_value = storage_map[output][0] 49 | + if not hasattr(test_value, 'size') or \ 50 | + test_value.size < config.store_test_value_maxsize: 51 | + output.tag.test_value = test_value 52 | + elif hasattr(test_value, 'shape'): 53 | + test_value = numpy.asarray(test_value) 54 | + output.tag.test_shape = test_value.shape 55 | + output.tag.test_value_fill = test_value.flat[0] 56 | 57 | if self.default_output is not None: 58 | rval = node.outputs[self.default_output] 59 | -------------------------------------------------------------------------------- /thutil.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (C) 2014 New York University 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | ''' 17 | import sys 18 | import time 19 | import numpy as np 20 | import operator 21 | import types 22 | import ipdb 23 | import inspect 24 | import traceback 25 | 26 | import theano 27 | import theano.tensor as T 28 | 29 | from theano import Op, Apply 30 | 31 | from theano.tensor.shared_randomstreams import RandomStreams 32 | from theano.tensor.nnet import conv 33 | from theano.gof import local_optimizer 34 | 35 | from common import imgutil, logutil 36 | 37 | _log = logutil.getLogger() 38 | 39 | use_gpu = theano.config.device.startswith('gpu') 40 | 41 | checkgrad = False 42 | 43 | if use_gpu: 44 | from theano.sandbox.cuda import GpuOp, gpu_from_host, host_from_gpu, \ 45 | CudaNdarrayType, CudaNdarray 46 | from theano.sandbox.cuda.basic_ops import gpu_contiguous 47 | 48 | class Eval(object): 49 | def __init__(self, globals=None, locals=None): 50 | self.globals = globals or {} 51 | self.locals = locals or sys._getframe(1).f_locals 52 | 53 | def __getitem__(self, key): 54 | return eval(key, self.globals, self.locals) 55 | 56 | def c_contiguous(x): 57 | if x.is_c_contiguous(): 58 | return x 59 | return x.copy() 60 | 61 | def isvalid(x): 62 | return T.all(T.logical_not(T.logical_or(T.isnan(x), T.isinf(x)))) 63 | 64 | def maximum(x, y): 65 | if checkgrad: 66 | return x + y 67 | return T.maximum(x, y) 68 | 69 | def minimum(x, y): 70 | if checkgrad: 71 | return x + y 72 | return T.minimum(x, y) 73 | 74 | def named(x, name): 75 | x.name = name 76 | return x 77 | 78 | def test_value(x): 79 | if isinstance(x, np.ndarray): 80 | return x 81 | return theano.gof.op.get_test_value(x) 82 | 83 | def test_shape(x): 84 | return tuple(test_value(x.shape)) 85 | 86 | def theano_function(*vars_by_pos, **kwargs): 87 | '''theano function decorator''' 88 | mode = kwargs.pop('mode', 'FAST_RUN') 89 | check_valid = kwargs.pop('check_valid', False) 90 | checks = kwargs.pop('checks', ()) 91 | vars_by_name = kwargs 92 | def compile_func(f): 93 | argnames = f.func_code.co_varnames[:f.func_code.co_argcount] 94 | if any([a in vars_by_name for a in argnames[:len(vars_by_pos)]]): 95 | raise ValueError('Argument supplied twice to %s' % f.func_name) 96 | varspec = dict(vars_by_name) 97 | varspec.update(zip(argnames[:len(vars_by_pos)], vars_by_pos)) 98 | argvars = [] 99 | for name in argnames: 100 | spec = varspec[name] 101 | if isinstance(spec, (tuple, list)): 102 | (var, test_val) = spec 103 | else: 104 | var = spec 105 | test_val = None 106 | assert isinstance(var, T.Variable) 107 | var.name = name 108 | if test_val is not None: 109 | var.tag.test_value = test_val 110 | argvars.append(var) 111 | return function(argvars, f(*argvars), 112 | check_valid=check_valid, 113 | checks=checks, 114 | mode=mode) 115 | return compile_func 116 | 117 | def function(inputs, outputs=None, check_valid=False, checks=(), **kwargs): 118 | input_names = None 119 | output_names = None 120 | if isinstance(inputs, dict): 121 | if inputs: 122 | (input_names, inputs) = zip(*inputs.iteritems()) 123 | else: 124 | (input_names, inputs) = ((), ()) 125 | if isinstance(outputs, dict): 126 | if outputs: 127 | (output_names, outputs) = zip(*outputs.iteritems()) 128 | else: 129 | (output_names, outputs) = ((), ()) 130 | 131 | if check_valid or checks: 132 | updates = kwargs.setdefault('updates', {}) 133 | asserts = [assert_(c, 'check failed: %s' % c) for c in checks] 134 | 135 | if check_valid: 136 | if outputs: 137 | if not isinstance(outputs, (list, tuple)): 138 | outputs = [outputs] 139 | asserts += (assert_(isvalid(x), 140 | 'output invalid: %d (%s)' % (i, x.name)) 141 | for (i, x) in enumerate(outputs)) 142 | 143 | if updates: 144 | asserts += (assert_(isvalid(xnew), 145 | 'update invalid: variable %s' % str(x)) 146 | for (x, xnew) in updates.iteritems()) 147 | 148 | checks_passed = theano.shared(np.int8(1), name='checks_passed') 149 | updates[checks_passed] = \ 150 | T.all(T.as_tensor_variable(asserts)).astype('int8') 151 | 152 | f = _CheckedFunction(inputs, outputs, **kwargs) 153 | else: 154 | f = theano.function(inputs, outputs, **kwargs) 155 | if hasattr(f.fn, 'clear_storage'): 156 | f.clear_storage = f.fn.clear_storage 157 | else: 158 | _log.warn('Function %s has no clear_storage: disabling', f.fn) 159 | f.clear_storage = lambda: None 160 | 161 | if input_names is not None or output_names is not None: 162 | return NamedInputOutputFunction(input_names, output_names, f) 163 | return f 164 | 165 | class NamedInputOutputFunction(object): 166 | def __init__(self, input_names, output_names, f): 167 | self.input_names = input_names 168 | self.output_names = output_names 169 | self.f = f 170 | 171 | if output_names: 172 | class _NamedOutputs(object): 173 | __slots__ = output_names 174 | 175 | def __init__(self, vals): 176 | [setattr(self, k, v) for (k,v) in zip(self.__slots__, vals)] 177 | 178 | def __eq__(self, other): 179 | return type(self) == type(other) and \ 180 | self.items() == other.items() 181 | 182 | def __getitem__(self, k): 183 | return getattr(self, k) 184 | 185 | def iteritems(self): 186 | return ((s, self[s]) for s in self.__slots__) 187 | 188 | __iter__ = iteritems 189 | 190 | def items(self): 191 | return list(self.iteritems()) 192 | 193 | self._NamedOutputs = _NamedOutputs 194 | 195 | if hasattr(f.fn, 'clear_storage'): 196 | self.clear_storage = f.fn.clear_storage 197 | else: 198 | _log.warn('Function %s has no clear_storage: disabling', f.fn) 199 | self.clear_storage = lambda: None 200 | 201 | def __call__(self, *args, **kwargs): 202 | inputs = args 203 | if self.input_names: 204 | assert not inputs, \ 205 | 'theano function with kw args cannot take positional args' 206 | inputs = [kwargs[k] for k in self.input_names] 207 | 208 | outputs = self.f(*inputs) 209 | 210 | if self.output_names: 211 | outputs = self._NamedOutputs(outputs) 212 | 213 | return outputs 214 | 215 | class _CheckedFunction(object): 216 | def __init__(self, inputs, outputs, **kwargs): 217 | self.f = theano.function(inputs, outputs, 218 | inplace_updates=False, 219 | **kwargs) 220 | self.dbg_kwargs = dict(kwargs) 221 | self.dbg_kwargs.update(inputs=inputs, 222 | outputs=outputs, 223 | inplace_updates=False, 224 | mode='DEBUG_MODE') 225 | self.f_dbg = None 226 | self.fn = self.f.fn 227 | self.clear_storage = self.f.fn.clear_storage 228 | 229 | def __call__(self, *args, **kwargs): 230 | try: 231 | return self.f(*args, **kwargs) 232 | except AssertionError: 233 | _log.exception('assertion failed in function %s' % self.f.name) 234 | if self.f_dbg is None: 235 | _log.info('creating debug function for %s' % self.f.name) 236 | self.f_dbg = theano.function(**self.dbg_kwargs) 237 | _log.error('calling debug function for %s' % self.f.name) 238 | self.f_dbg(*args, **kwargs) 239 | _log.error('debug version seems to have passed' % self.f.name) 240 | raise 241 | 242 | class Assert(theano.Op): 243 | view_map = {0: [0]} 244 | 245 | def __init__(self, msg=None): 246 | self.msg = msg 247 | 248 | def __eq__(self, other): 249 | return (type(self) == type(other) and 250 | self.msg == other.msg) 251 | 252 | def __hash__(self): 253 | return reduce(operator.xor, map(hash, (type(self), self.msg))) 254 | 255 | def make_node(self, input): 256 | output = T.as_tensor_variable(input).type() 257 | return theano.Apply(self, (input,), (output,)) 258 | 259 | def make_gpu_node(self, input): 260 | return Assert_GPU(self.msg)(input) 261 | 262 | def infer_shape(self, node, input_shapes): 263 | return input_shapes 264 | 265 | def perform(self, node, (input,), (output,)): 266 | assert np.all(input), self.msg 267 | output[0] = input 268 | 269 | def grad(self, inputs, doutputs): 270 | return (None,) 271 | 272 | def assert_(cond, msg=None): 273 | return Assert(msg)(cond) 274 | 275 | class Constant(theano.Op): 276 | def __init__(self, ninputs): 277 | self.view_map = dict((i,[i]) for i in xrange(ninputs)) 278 | 279 | def __eq__(self, other): 280 | return (type(self) == type(other) and 281 | len(self.view_map) == len(other.view_map)) 282 | 283 | def __hash__(self): 284 | return reduce(operator.xor, 285 | map(hash, (type(self), len(self.view_map)))) 286 | 287 | def make_node(self, *inputs): 288 | outputs = tuple([T.as_tensor_variable(inp).type() for inp in inputs]) 289 | return theano.Apply(self, inputs, outputs) 290 | 291 | def make_gpu_node(self, *inputs): 292 | return Constant_GPU(len(inputs))(*inputs) 293 | 294 | def infer_shape(self, node, input_shapes): 295 | return input_shapes 296 | 297 | def perform(self, node, inputs, outputs): 298 | for (inp, out) in zip(inputs, outputs): 299 | out[0] = inp 300 | 301 | def grad(self, inputs, doutputs): 302 | return [T.DisconnectedType()() for _ in inputs] 303 | 304 | def constant(*inputs): 305 | return Constant(len(inputs))(*inputs) 306 | 307 | 308 | class _BreakpointVars(object): 309 | def __init__(self, th_vars, py_vars): 310 | self.th_vars = th_vars 311 | self.py_vars = py_vars 312 | 313 | def __getattr__(self, k): 314 | if k in self.th_vars: 315 | return self.th_vars[k] 316 | if k in self.py_vars: 317 | return self.py_vars[k] 318 | return object.__getattr__(self, k) 319 | 320 | def __repr__(self): 321 | s = [] 322 | s.append('Theano runtime variables:') 323 | s += ('%-16s %s' % (k, str(v.shape)) 324 | for (k, v) in sorted(self.th_vars.items(), key=lambda (k,v): k)) 325 | s.append('') 326 | s.append('Python creation-time variables:') 327 | s.append(', '.join(sorted(self.py_vars.keys()))) 328 | s.append('') 329 | return '\n'.join(s) 330 | 331 | class Breakpoint(theano.Op): 332 | view_map = {0: [0]} 333 | 334 | global_breakpoint_enable = False 335 | 336 | def __init__(self, var_names, cond, tb, py_vars, 337 | breakpoint_grad, is_grad=False): 338 | self.var_names = var_names 339 | self.cond = cond 340 | self.tb = tb 341 | self.py_vars = py_vars 342 | self.nvars = len(var_names) 343 | self.breakpoint_grad = breakpoint_grad 344 | self.is_grad = is_grad 345 | 346 | def __eq__(self, other): 347 | return (type(self) == type(other) and 348 | self.var_names == other.var_names and 349 | self.cond == other.cond and 350 | self.tb == other.tb) 351 | 352 | def __hash__(self): 353 | return reduce(operator.xor, map(hash, ( 354 | type(self), self.var_names, self.cond, self.tb))) 355 | 356 | def make_node(self, *inputs): 357 | output = T.as_tensor_variable(inputs[0]).type() 358 | return theano.Apply(self, inputs, (output,)) 359 | 360 | def make_gpu_node(self, *inputs): 361 | return Breakpoint_GPU( 362 | self.var_names, self.cond, self.tb, self.py_vars, 363 | self.breakpoint_grad, self.is_grad)(*inputs) 364 | 365 | def infer_shape(self, node, input_shapes): 366 | return (input_shapes[0],) 367 | 368 | def perform(self, node, inputs, (output,)): 369 | output[0] = inputs[0] 370 | if not Breakpoint.global_breakpoint_enable: 371 | return 372 | x = inputs[0] 373 | if not isinstance(x, np.ndarray): 374 | x = np.array(x) 375 | if self.cond(x): 376 | vars = _BreakpointVars( 377 | dict(zip(self.var_names, map(np.array, inputs[1:]))), 378 | self.py_vars) 379 | if self.is_grad: 380 | place = 'theano gradient eval' 381 | else: 382 | place = 'theano eval' 383 | print >> sys.stderr, 'Breakpoint in %s, created at' % place 384 | print >> sys.stderr, ' ...' 385 | traceback.print_list(self.tb[-4:], sys.stderr) 386 | ipdb.set_trace() 387 | pass # in theano breakpoint 388 | 389 | def grad(self, inputs, (doutput,)): 390 | if self.breakpoint_grad: 391 | doutput = Breakpoint(self.var_names, self.cond, 392 | self.tb, self.py_vars, True, True) \ 393 | (doutput, *inputs[1:]) 394 | return [doutput] + [T.DisconnectedType()() for _ in xrange(self.nvars)] 395 | 396 | _theano_types = (theano.tensor.basic.TensorConstant, 397 | theano.tensor.basic.TensorVariable, 398 | theano.compile.SharedVariable, 399 | ) 400 | 401 | def is_theano_var(x): 402 | return isinstance(x, _theano_types) 403 | 404 | def breakpoint(output, vars=None, cond=lambda v: True, grad=True): 405 | tb = tuple(traceback.extract_stack()[:-1]) 406 | py_vars = {} 407 | if type(vars) not in (tuple, list, dict, types.NoneType): 408 | raise ValueError('vars keyword arg must be None, dict, list or tuple') 409 | if not isinstance(vars, dict): 410 | frame_locals = inspect.stack()[1][0].f_locals 411 | if vars is not None: 412 | frame_locals = dict((name, val) 413 | for (name, val) in frame_locals.iteritems() 414 | if name in vars or val in vars) 415 | vars = frame_locals 416 | assert isinstance(vars, dict) 417 | th_vars = dict((name, val) for (name, val) in vars.iteritems() 418 | if isinstance(val, _theano_types)) 419 | py_vars = dict((name, val) for (name, val) in vars.iteritems() 420 | if name not in th_vars) 421 | (th_var_names, th_var_vals) = zip(*th_vars.iteritems()) 422 | return Breakpoint(th_var_names, cond, tb, py_vars, grad) \ 423 | (output, *th_var_vals) 424 | 425 | def enable_breakpoints(enable=True): 426 | Breakpoint.global_breakpoint_enable = enable 427 | 428 | def cross(x, y, axis=None): 429 | ndim = x.ndim 430 | assert x.ndim == y.ndim 431 | if axis is None: 432 | axis = ndim - 1 433 | def _getindexslice(a, i): 434 | return a[tuple([slice(i,i+1) if d == axis else slice(None) 435 | for d in xrange(ndim)])] 436 | x0 = _getindexslice(x, 0) 437 | x1 = _getindexslice(x, 1) 438 | x2 = _getindexslice(x, 2) 439 | y0 = _getindexslice(y, 0) 440 | y1 = _getindexslice(y, 1) 441 | y2 = _getindexslice(y, 2) 442 | 443 | res = T.concatenate((x1*y2 - x2*y1, 444 | x2*y0 - x0*y2, 445 | x0*y1 - x1*y0), axis=axis) 446 | return res 447 | 448 | 449 | if use_gpu: 450 | 451 | class Constant_GPU(Constant, GpuOp): 452 | def make_node(self, *inputs): 453 | outputs = tuple([inp.type() for inp in inputs]) 454 | return theano.Apply(self, inputs, outputs) 455 | 456 | class Assert_GPU(Assert, GpuOp): 457 | def make_node(self, input): 458 | output = input.type() 459 | return theano.Apply(self, (input,), (output,)) 460 | 461 | def perform(self, node, (input,), (output,)): 462 | assert np.all(np.array(input)) 463 | output[0] = input 464 | 465 | class Breakpoint_GPU(Breakpoint, GpuOp): 466 | def make_node(self, *inputs): 467 | output = inputs[0].type() 468 | return theano.Apply(self, inputs, (output,)) 469 | 470 | @theano.sandbox.cuda.opt.register_opt() 471 | @theano.gof.local_optimizer(None) 472 | def local_gpu_togpu(node): 473 | if node.op == gpu_from_host: 474 | host_input = node.inputs[0] 475 | if host_input.owner and \ 476 | hasattr(host_input.owner.op, 'make_gpu_node'): 477 | try: 478 | gpu_inputs = map(gpu_from_host, host_input.owner.inputs) 479 | except TypeError: 480 | return False 481 | return [host_input.owner.op.make_gpu_node(*gpu_inputs)] 482 | elif hasattr(node.op, 'make_gpu_node') and \ 483 | all([x.owner and x.owner.op == host_from_gpu 484 | for x in node.inputs]): 485 | gpu_inputs = [x.owner.inputs[0] for x in node.inputs] 486 | return [host_from_gpu(node.op.make_gpu_node(*gpu_inputs))] 487 | return False 488 | 489 | @theano.sandbox.cuda.opt.register_opt() 490 | @theano.gof.local_optimizer([Breakpoint]) 491 | def local_gpu_togpu_breakpoint(node): 492 | if isinstance(node.op, Breakpoint): 493 | result_input = node.inputs[0] 494 | if result_input.owner and result_input.owner.op == host_from_gpu: 495 | gpu_inputs = [x.owner.inputs[0] 496 | if x.owner and x.owner.op == host_from_gpu 497 | else x 498 | for x in node.inputs] 499 | return [host_from_gpu(node.op.make_gpu_node(*gpu_inputs))] 500 | return False 501 | 502 | -------------------------------------------------------------------------------- /thutil.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hjimce/Depth-Map-Prediction/fea99a9b52648820c6c8dd0374b9b06117a5124b/thutil.pyc --------------------------------------------------------------------------------