├── LICENSE ├── README.md ├── data ├── dictionary_and_semantic_hierarchy.txt ├── download_urls_multithreading.py ├── download_urls_multithreading.sh ├── im_list_for_classification.txt ├── image_lists │ ├── train_im_list_tiny_1.txt │ └── train_im_list_tiny_2.txt ├── imagenet2012_dictionary.txt ├── images │ ├── im_0.jpg │ ├── im_1.jpg │ ├── im_2.jpg │ └── im_3.jpg ├── tfrecord.py ├── tfrecord.sh ├── tfrecords │ ├── 0.tfrecords │ └── 1.tfrecords ├── train_im_list_tiny.txt └── train_urls_tiny.txt ├── data_processing ├── __init__.py ├── dataset.py └── image_preprocessing.py ├── example ├── extract_feature.sh ├── finetune.sh ├── image_classification.sh └── train.sh ├── extract_feature.py ├── finetune.py ├── flags.py ├── git_images ├── hist_num_annotations.png └── num_images_per_class.png ├── image_classification.py ├── models ├── __init__.py ├── __init__.pyc ├── resnet.py └── resnet.pyc └── train.py /LICENSE: -------------------------------------------------------------------------------- 1 | Tencent is pleased to support the open source community by making Tencent ML-Images available. 2 | 3 | Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. 4 | 5 | If you have downloaded a copy of the Tencent ML-Images source code from Tencent, please note that Tencent ML-Images source code is licensed under the BSD 3-Clause License, except for the third-party components or materials listed below which are subject to different license terms. Your integration of Tencent ML-Images into your own projects may require compliance with the BSD 3-Clause License, as well as the other licenses applicable to the third-party components or materials included within Tencent ML-Images. 6 | In addition, please note that the annotations of Tencent ML-Images are licensed under the Creative Commons Attribution 4.0 International Public License (CC BY 4.0), https://creativecommons.org/licenses/by/4.0/. 7 | The copies of the BSD 3-Clause License and the Creative Commons Attribution 4.0 International Public License are included in this file. 8 | 9 | Other dependencies and licenses: 10 | 11 | Open Source Software Licensed Under the Apache License, Version 2.0: 12 | ---------------------------------------------------------------------------------------- 13 | 1. TensorFlow 1.6.0 14 | Copyright 2018 The TensorFlow Authors. All rights reserved. 15 | 16 | 2. Open Images Dataset V3 17 | Copyright 2016 The Open Images Authors. All rights reserved. 18 | 19 | 20 | Terms of the Apache License, Version 2.0: 21 | -------------------------------------------------------------------- 22 | Apache License 23 | 24 | Version 2.0, January 2004 25 | 26 | http://www.apache.org/licenses/ 27 | 28 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 29 | 30 | 1. Definitions. 31 | 32 | “License” shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. 33 | 34 | “Licensor” shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. 35 | 36 | “Legal Entity” shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, “control” means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. 37 | 38 | “You” (or “Your”) shall mean an individual or Legal Entity exercising permissions granted by this License. 39 | 40 | “Source” form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. 41 | 42 | “Object” form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. 43 | 44 | “Work” shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). 45 | 46 | “Derivative Works” shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. 47 | 48 | “Contribution” shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, “submitted” means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as “Not a Contribution.” 49 | 50 | “Contributor” shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 51 | 52 | 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 53 | 54 | 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 55 | 56 | 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: 57 | 58 | a) You must give any other recipients of the Work or Derivative Works a copy of this License; and 59 | 60 | b) You must cause any modified files to carry prominent notices stating that You changed the files; and 61 | 62 | c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and 63 | 64 | d) If the Work includes a “NOTICE” text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. 65 | 66 | You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 67 | 68 | 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 69 | 70 | 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 71 | 72 | 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 73 | 74 | 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 75 | 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. 76 | 77 | END OF TERMS AND CONDITIONS 78 | 79 | APPENDIX: How to apply the Apache License to your work 80 | To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. 81 | 82 | Copyright [yyyy] [name of copyright owner] 83 | 84 | Licensed under the Apache License, Version 2.0 (the "License"); 85 | you may not use this file except in compliance with the License. 86 | You may obtain a copy of the License at 87 | http://www.apache.org/licenses/LICENSE-2.0 88 | Unless required by applicable law or agreed to in writing, software 89 | distributed under the License is distributed on an "AS IS" BASIS, 90 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 91 | See the License for the specific language governing permissions and 92 | limitations under the License. 93 | 94 | 95 | 96 | Open Source Software Licensed Under the Python Software Foundation License Version 2: 97 | ---------------------------------------------------------------------------------------- 98 | 1. Python 2.7.12 99 | Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 100 | 2012, 2013, 2014, 2015, 2016 Python Software Foundation. All rights reserved. 101 | 102 | Copyright (c) 2000 BeOpen.com. 103 | All rights reserved. 104 | 105 | Copyright (c) 1995-2001 Corporation for National Research Initiatives. 106 | All rights reserved. 107 | 108 | Copyright (c) 1991-1995 Stichting Mathematisch Centrum. 109 | All rights reserved. 110 | 111 | 112 | Terms of the Python Software Foundation License Version 2: 113 | --------------------------------------------------- 114 | A. HISTORY OF THE SOFTWARE 115 | ========================== 116 | Python was created in the early 1990s by Guido van Rossum at Stichting 117 | Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands 118 | as a successor of a language called ABC. Guido remains Python's 119 | principal author, although it includes many contributions from others. 120 | 121 | In 1995, Guido continued his work on Python at the Corporation for 122 | National Research Initiatives (CNRI, see http://www.cnri.reston.va.us) 123 | in Reston, Virginia where he released several versions of the 124 | software. 125 | 126 | In May 2000, Guido and the Python core development team moved to 127 | BeOpen.com to form the BeOpen PythonLabs team. In October of the same 128 | year, the PythonLabs team moved to Digital Creations (now Zope 129 | Corporation, see http://www.zope.com). In 2001, the Python Software 130 | Foundation (PSF, see http://www.python.org/psf/) was formed, a 131 | non-profit organization created specifically to own Python-related 132 | Intellectual Property. Zope Corporation is a sponsoring member of 133 | the PSF. 134 | 135 | All Python releases are Open Source (see http://www.opensource.org for 136 | the Open Source Definition). Historically, most, but not all, Python 137 | releases have also been GPL-compatible; the table below summarizes 138 | the various releases. 139 | 140 | Release Derived from Year Owner GPL-compatible? (1) 141 | 0.9.0 thru 1.2 1991-1995 CWI yes 142 | 1.3 thru 1.5.2 1.2 1995-1999 CNRI yes 143 | 1.6 1.5.2 2000 CNRI no 144 | 2.0 1.6 2000 BeOpen.com no 145 | 1.6.1 1.6 2001 CNRI yes (2) 146 | 2.1 2.0+1.6.1 2001 PSF no 147 | 2.0.1 2.0+1.6.1 2001 PSF yes 148 | 2.1.1 2.1+2.0.1 2001 PSF yes 149 | 2.1.2 2.1.1 2002 PSF yes 150 | 2.1.3 2.1.2 2002 PSF yes 151 | 2.2 and above 2.1.1 2001-now PSF yes 152 | 153 | Footnotes: 154 | 155 | (1) GPL-compatible doesn't mean that we're distributing Python under 156 | the GPL. All Python licenses, unlike the GPL, let you distribute 157 | a modified version without making your changes open source. The 158 | GPL-compatible licenses make it possible to combine Python with 159 | other software that is released under the GPL; the others don't. 160 | 161 | (2) According to Richard Stallman, 1.6.1 is not GPL-compatible, 162 | because its license has a choice of law clause. According to 163 | CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1 164 | is "not incompatible" with the GPL. 165 | 166 | Thanks to the many outside volunteers who have worked under Guido's 167 | direction to make these releases possible. 168 | 169 | B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON 170 | =============================================================== 171 | PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 172 | -------------------------------------------- 173 | 174 | 1. This LICENSE AGREEMENT is between the Python Software Foundation 175 | ("PSF"), and the Individual or Organization ("Licensee") accessing and 176 | otherwise using this software ("Python") in source or binary form and 177 | its associated documentation. 178 | 179 | 2. Subject to the terms and conditions of this License Agreement, PSF hereby 180 | grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, 181 | analyze, test, perform and/or display publicly, prepare derivative works, 182 | distribute, and otherwise use Python alone or in any derivative version, 183 | provided, however, that PSF's License Agreement and PSF's notice of copyright, 184 | i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 185 | 2011, 2012, 2013, 2014, 2015 Python Software Foundation; All Rights Reserved" 186 | are retained in Python alone or in any derivative version prepared by Licensee. 187 | 188 | 3. In the event Licensee prepares a derivative work that is based on 189 | or incorporates Python or any part thereof, and wants to make 190 | the derivative work available to others as provided herein, then 191 | Licensee hereby agrees to include in any such work a brief summary of 192 | the changes made to Python. 193 | 194 | 4. PSF is making Python available to Licensee on an "AS IS" 195 | basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR 196 | IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND 197 | DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR 198 | FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT 199 | INFRINGE ANY THIRD PARTY RIGHTS. 200 | 201 | 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 202 | FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS 203 | A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, 204 | OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 205 | 206 | 6. This License Agreement will automatically terminate upon a material 207 | breach of its terms and conditions. 208 | 209 | 7. Nothing in this License Agreement shall be deemed to create any 210 | relationship of agency, partnership, or joint venture between PSF and 211 | Licensee. This License Agreement does not grant permission to use PSF 212 | trademarks or trade name in a trademark sense to endorse or promote 213 | products or services of Licensee, or any third party. 214 | 215 | 8. By copying, installing or otherwise using Python, Licensee 216 | agrees to be bound by the terms and conditions of this License 217 | Agreement. 218 | 219 | BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 220 | ------------------------------------------- 221 | BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 222 | 223 | 1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an 224 | office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the 225 | Individual or Organization ("Licensee") accessing and otherwise using 226 | this software in source or binary form and its associated 227 | documentation ("the Software"). 228 | 229 | 2. Subject to the terms and conditions of this BeOpen Python License 230 | Agreement, BeOpen hereby grants Licensee a non-exclusive, 231 | royalty-free, world-wide license to reproduce, analyze, test, perform 232 | and/or display publicly, prepare derivative works, distribute, and 233 | otherwise use the Software alone or in any derivative version, 234 | provided, however, that the BeOpen Python License is retained in the 235 | Software, alone or in any derivative version prepared by Licensee. 236 | 237 | 3. BeOpen is making the Software available to Licensee on an "AS IS" 238 | basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR 239 | IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND 240 | DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR 241 | FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE 242 | WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 243 | 244 | 4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE 245 | SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR 246 | LOSS AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR 247 | ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 248 | 249 | 5. This License Agreement will automatically terminate upon a material 250 | breach of its terms and conditions. 251 | 252 | 6. This License Agreement shall be governed by and interpreted in all 253 | respects by the law of the State of California, excluding conflict of 254 | law provisions. Nothing in this License Agreement shall be deemed to 255 | create any relationship of agency, partnership, or joint venture 256 | between BeOpen and Licensee. This License Agreement does not grant 257 | permission to use BeOpen trademarks or trade names in a trademark 258 | sense to endorse or promote products or services of Licensee, or any 259 | third party. As an exception, the "BeOpen Python" logos available at 260 | http://www.pythonlabs.com/logos.html may be used according to the 261 | permissions granted on that web page. 262 | 263 | 7. By copying, installing or otherwise using the software, Licensee 264 | agrees to be bound by the terms and conditions of this License 265 | Agreement. 266 | 267 | 268 | CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 269 | --------------------------------------- 270 | 1. This LICENSE AGREEMENT is between the Corporation for National 271 | Research Initiatives, having an office at 1895 Preston White Drive, 272 | Reston, VA 20191 ("CNRI"), and the Individual or Organization 273 | ("Licensee") accessing and otherwise using Python 1.6.1 software in 274 | source or binary form and its associated documentation. 275 | 276 | 2. Subject to the terms and conditions of this License Agreement, CNRI 277 | hereby grants Licensee a nonexclusive, royalty-free, world-wide 278 | license to reproduce, analyze, test, perform and/or display publicly, 279 | prepare derivative works, distribute, and otherwise use Python 1.6.1 280 | alone or in any derivative version, provided, however, that CNRI's 281 | License Agreement and CNRI's notice of copyright, i.e., "Copyright (c) 282 | 1995-2001 Corporation for National Research Initiatives; All Rights 283 | Reserved" are retained in Python 1.6.1 alone or in any derivative 284 | version prepared by Licensee. Alternately, in lieu of CNRI's License 285 | Agreement, Licensee may substitute the following text (omitting the 286 | quotes): "Python 1.6.1 is made available subject to the terms and 287 | conditions in CNRI's License Agreement. This Agreement together with 288 | Python 1.6.1 may be located on the Internet using the following 289 | unique, persistent identifier (known as a handle): 1895.22/1013. This 290 | Agreement may also be obtained from a proxy server on the Internet 291 | using the following URL: http://hdl.handle.net/1895.22/1013". 292 | 293 | 3. In the event Licensee prepares a derivative work that is based on 294 | or incorporates Python 1.6.1 or any part thereof, and wants to make 295 | the derivative work available to others as provided herein, then 296 | Licensee hereby agrees to include in any such work a brief summary of 297 | the changes made to Python 1.6.1. 298 | 299 | 4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" 300 | basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR 301 | IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND 302 | DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR 303 | FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL 304 | NOT INFRINGE ANY THIRD PARTY RIGHTS. 305 | 306 | 5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 307 | 1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS 308 | A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, 309 | OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 310 | 311 | 6. This License Agreement will automatically terminate upon a material 312 | breach of its terms and conditions. 313 | 314 | 7. This License Agreement shall be governed by the federal 315 | intellectual property law of the United States, including without 316 | limitation the federal copyright law, and, to the extent such 317 | U.S. federal law does not apply, by the law of the Commonwealth of 318 | Virginia, excluding Virginia's conflict of law provisions. 319 | Notwithstanding the foregoing, with regard to derivative works based 320 | on Python 1.6.1 that incorporate non-separable material that was 321 | previously distributed under the GNU General Public License (GPL), the 322 | law of the Commonwealth of Virginia shall govern this License 323 | Agreement only as to issues arising under or with respect to 324 | Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this 325 | License Agreement shall be deemed to create any relationship of 326 | agency, partnership, or joint venture between CNRI and Licensee. This 327 | License Agreement does not grant permission to use CNRI trademarks or 328 | trade name in a trademark sense to endorse or promote products or 329 | services of Licensee, or any third party. 330 | 331 | 8. By clicking on the "ACCEPT" button where indicated, or by copying, 332 | installing or otherwise using Python 1.6.1, Licensee agrees to be 333 | bound by the terms and conditions of this License Agreement. 334 | 335 | ACCEPT 336 | 337 | 338 | CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 339 | -------------------------------------------------- 340 | Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, 341 | The Netherlands. All rights reserved. 342 | 343 | Permission to use, copy, modify, and distribute this software and its 344 | documentation for any purpose and without fee is hereby granted, 345 | provided that the above copyright notice appear in all copies and that 346 | both that copyright notice and this permission notice appear in 347 | supporting documentation, and that the name of Stichting Mathematisch 348 | Centrum or CWI not be used in advertising or publicity pertaining to 349 | distribution of the software without specific, written prior 350 | permission. 351 | 352 | STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH 353 | REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF 354 | MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH 355 | CENTRUM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES 356 | OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR 357 | PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER 358 | TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 359 | PERFORMANCE OF THIS SOFTWARE. 360 | 361 | 362 | 363 | 364 | Materials Licensed Under the Creative Commons Attribution 4.0 International Public License (CC BY 4.0): 365 | The below materials in this distribution may have been modified by THL A29 Limited (“Tencent Modifications”). All Tencent Modifications are Copyright (C) 2018 THL A29 Limited. 366 | ---------------------------------------------------------------------------------------- 367 | 1. Open Images Dataset V3 368 | The annotations of Open Images Dataset are licensed by Google Inc. under CC BY 4.0 license. 369 | Copyright 2016 The Open Images Authors. All rights reserved. 370 | 371 | 372 | Terms of the Creative Commons Attribution 4.0 International Public License (CC BY 4.0): 373 | -------------------------------------------------------------------- 374 | https://creativecommons.org/licenses/by/4.0/ 375 | 376 | 377 | 378 | Creative Commons Attribution 4.0 International Public License 379 | By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions. 380 | Section 1 – Definitions. 381 | a. Adapted Material means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image. 382 | b. Adapter's License means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License. 383 | c. Copyright and Similar Rights means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights. 384 | d. Effective Technological Measures means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements. 385 | e. Exceptions and Limitations means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material. 386 | f. Licensed Material means the artistic or literary work, database, or other material to which the Licensor applied this Public License. 387 | g. Licensed Rights means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license. 388 | h. Licensor means the individual(s) or entity(ies) granting rights under this Public License. 389 | i. Share means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them. 390 | j. Sui Generis Database Rights means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world. 391 | k. You means the individual or entity exercising the Licensed Rights under this Public License. Your has a corresponding meaning. 392 | Section 2 – Scope. 393 | a. License grant. 394 | 1. Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to: 395 | A. reproduce and Share the Licensed Material, in whole or in part; and 396 | B. produce, reproduce, and Share Adapted Material. 397 | 2. Exceptions and Limitations. For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions. 398 | 3. Term. The term of this Public License is specified in Section 6(a). 399 | 4. Media and formats; technical modifications allowed. The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a)(4) never produces Adapted Material. 400 | 5. Downstream recipients. 401 | A. Offer from the Licensor – Licensed Material. Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License. 402 | B. No downstream restrictions. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material. 403 | 6. No endorsement. Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i). 404 | b. Other rights. 405 | 1. Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise. 406 | 2. Patent and trademark rights are not licensed under this Public License. 407 | 3. To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties. 408 | Section 3 – License Conditions. 409 | Your exercise of the Licensed Rights is expressly made subject to the following conditions. 410 | a. Attribution. 411 | 1. If You Share the Licensed Material (including in modified form), You must: 412 | A. retain the following if it is supplied by the Licensor with the Licensed Material: 413 | i. identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated); 414 | ii. a copyright notice; 415 | iii. a notice that refers to this Public License; 416 | iv. a notice that refers to the disclaimer of warranties; 417 | v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable; 418 | B. indicate if You modified the Licensed Material and retain an indication of any previous modifications; and 419 | C. indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License. 420 | 2. You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information. 421 | 3. If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable. 422 | 4. If You Share Adapted Material You produce, the Adapter's License You apply must not prevent recipients of the Adapted Material from complying with this Public License. 423 | Section 4 – Sui Generis Database Rights. 424 | Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material: 425 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database; 426 | b. if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material; and 427 | c. You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database. 428 | For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights. 429 | Section 5 – Disclaimer of Warranties and Limitation of Liability. 430 | a. Unless otherwise separately undertaken by the Licensor, to the extent possible, the Licensor offers the Licensed Material as-is and as-available, and makes no representations or warranties of any kind concerning the Licensed Material, whether express, implied, statutory, or other. This includes, without limitation, warranties of title, merchantability, fitness for a particular purpose, non-infringement, absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not known or discoverable. Where disclaimers of warranties are not allowed in full or in part, this disclaimer may not apply to You. 431 | b. To the extent possible, in no event will the Licensor be liable to You on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this Public License or use of the Licensed Material, even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. Where a limitation of liability is not allowed in full or in part, this limitation may not apply to You. 432 | c. The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability. 433 | Section 6 – Term and Termination. 434 | a. This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically. 435 | b. Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates: 436 | 1. automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or 437 | 2. upon express reinstatement by the Licensor. 438 | For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License. 439 | c. For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License. 440 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public License. 441 | Section 7 – Other Terms and Conditions. 442 | a. The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed. 443 | b. Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License. 444 | Section 8 – Interpretation. 445 | a. For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License. 446 | b. To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions. 447 | c. No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor. 448 | d. Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority. 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | Terms of the BSD 3-Clause License: 457 | -------------------------------------------------------------------- 458 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 459 | 460 |  Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 461 |  Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 462 |  Neither the name of [copyright holder] nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 463 | 464 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 465 | 466 | 467 | 468 | 469 | 470 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tencent ML-Images 2 | 3 | This repository introduces the open-source project dubbed **Tencent ML-Images**, which publishes 4 | 5 | * **ML-Images**: the largest open-source multi-label image database, including 17,609,752 training and 88,739 validation image URLs, which are annotated with up to 11,166 categories 6 | * **Resnet-101 model**: it is pre-trained on ML-Images, and achieves the top-1 accuracy 80.73% on ImageNet via transfer learning 7 | 8 | ## Updates 9 | 10 | * [2019/12/26] Our manuscript of this open-source project has been accepted to IEEE Access ([Journal](https://ieeexplore.ieee.org/document/8918053/authors#authors), [ArXiv](https://arxiv.org/abs/1901.01703)). It presents more details of the database, the loss function, the training algorithm, and more experimental results. 11 | * [2018/12/19] We simplify the procedure of downloading images. Please see [Download Images](#download-images). 12 | 13 | 14 | 15 | # Contents 16 | 17 | * [Dependencies](#dependencies) 18 | 19 | * [Data](#data) 20 | * [Image Source](#image-source) 21 | * [Download Images](#download-images) 22 | * [Download Images from ImageNet](#download-images-from-imagenet) 23 | * [Download Images from Open Images](#download-images-from-open-images) 24 | * [Semantic Hierarchy](#semantic-hierarchy) 25 | * [Annotations](#annotations) 26 | * [Statistics](#statistics) 27 | 28 | * [Train](#train) 29 | * [Prepare the TFRecord File](#prepare-the-tfrecord-file) 30 | * [Pretrain on ML-Images](#pretrain-on-ml-images) 31 | * [Finetune on ImageNet](#finetune-on-imagenet) 32 | * [Checkpoints](#checkpoints) 33 | * [Single-Label Image Classification](#single-label-image-classification) 34 | * [Feature Extraction](#feature-extraction) 35 | 36 | 37 | * [Results](#results) 38 | 39 | * [Copyright](#copyright) 40 | 41 | * [Citation](#citation) 42 | 43 | 44 | # [Dependencies](#dependencies) 45 | * Linux 46 | * [Python 2.7](https://www.python.org/) 47 | * [Tensorflow >= 1.6.0](https://www.tensorflow.org/install/) 48 | 49 | # [Data](#data) 50 | [[back to top](#)] 51 | 52 | ### [Image Source](#image-source) 53 | [[back to top](#)] 54 | 55 | 56 | The image URLs of ML-Images are collected from [ImageNet](http://www.image-net.org/) and [Open Images](https://github.com/openimages/dataset). 57 | Specifically, 58 | * Part 1: From the whole database of ImageNet, we adopt 10,706,941 training and 50,000 validation image URLs, covering 10,032 categories. 59 | * Part 2: From Open Images, we adopt 6,902,811 training and 38,739 validation image URLs, covering 1,134 unique categories (note that some other categories are merged with their synonymous categories from ImageNet). 60 | 61 | Finally, ML-Images includes 17,609,752 training and 88,739 validation image URLs, covering 11,166 categories. 62 | 63 | 70 | 74 | 75 | 78 | 79 | ### [Download Images](#download-images) 80 | [[back to top](#)] 81 | 82 | Due to the copyright, we cannot provide the original images directly. However, one can obtain all images of our database using the following files: 83 | * train_image_id_from_imagenet.txt ([Link1](https://drive.google.com/file/d/1-7x4wPa764MJkjhhNj0PWPhgwMJOXziA/view?usp=sharing), [Link2](https://pan.baidu.com/s/1oUfIMCHj1wyz0ywuSn1iEQ)) 84 | * val_image_id_from_imagenet.txt ([Link1](https://drive.google.com/file/d/1-1x1vJFZGesz-5R2W8DLWHaVEbIPjuJs/view?usp=sharing), [Link2](https://pan.baidu.com/s/10prwZcHstYA8ppyXxbEbXA)) 85 | * train_urls_from_openimages.txt ([Link1](https://drive.google.com/file/d/1__HFVimF5yUwlyEjaUoSmBBfRQKJTYKW/view?usp=sharing), [Link2]( https://pan.baidu.com/s/1jjkaLu5JiHV6D0qyWXSxMA )) 86 | * val_urls_from_openimages.txt ([Link1](https://drive.google.com/file/d/1JkTcEEkB1zYI6NtAM-vXpsv7uDZ3glEz/view?usp=sharing), [Link2](https://pan.baidu.com/s/1F8mk58IGj9BP0-HSF-M9aw)) 87 | 88 | 89 | 90 | #### [Download Images from ImageNet](#download-images-from-imagenet) 91 | We find that massive urls provided by ImageNet have expired (please check the file `List of all image URLs of Fall 2011 Release` at http://image-net.org/download-imageurls). Thus, here we provide the original image IDs of ImageNet used in our database. One can obtain the training/validation images of our database through the following steps: 92 | * Download the whole database of [ImageNet](http://image-net.org/download-images) 93 | * Extract the training/validation images using the image IDs in `train_image_id_from_imagenet.txt` and `val_image_id_from_imagenet.txt` 94 | 95 | The format of `train_image_id_from_imagenet.txt` is as follows: 96 | ``` 97 | ... 98 | n04310904/n04310904_8388.JPEG 2367:1 2172:1 1831:1 1054:1 1041:1 865:1 2:1 99 | n11753700/n11753700_1897.JPEG 5725:1 5619:1 5191:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 100 | ... 101 | ``` 102 | As shown above, one image corresponds to one row. The first term is the original image ID of ImageNet. The followed terms separated by space are the annotations. For example, "2367:1" indicates class 2367 and its confidence 1. Note that the class index starts from 0, and you can find the class name from the file [data/dictionary_and_semantic_hierarchy.txt](data/dictionary_and_semantic_hierarchy.txt). 103 | 104 | **NOTE**: We find that there are some repeated URLs in `List of all image URLs of Fall 2011 Release` of ImageNet, i.e., the image corresponding to one URL may be stored in multiple sub-folders with different image IDs. We manually check a few repeated images, and find the reason is that one image annotated with a child class may also be annotated with its parent class, then it is saved to two sub-folders with different image IDs. To the best of our knowledge, this point has never been claimed in ImageNet or any other place. If one want to use ImageNet, this point should be noticed. 105 | Due to that, there are also a few repeated images in our database, but our training is not significantly influenced. In future, we will update the database by removing the repeated images. 106 | 107 | 108 | #### [Download Images from Open Images](#download-images-from-open-images) 109 | 110 | 114 | 115 | The images from Open Images can be downloaded using URLs. 116 | The format of `train_urls_from_openimages.txt` is as follows: 117 | ``` 118 | ... 119 | https://c4.staticflickr.com/8/7239/6997334729_e5fb3938b1_o.jpg 3:1 5193:0.9 5851:0.9 9413:1 9416:1 120 | https://c2.staticflickr.com/4/3035/3033882900_a9a4263c55_o.jpg 1053:0.8 1193:0.8 1379:0.8 121 | ... 122 | ``` 123 | As shown above, one image corresponds to one row. The first term is the image URL. The followed terms separated by space are the annotations. For example, "5193:0.9" indicates class 5193 and its confidence 0.9. 124 | 125 | ##### Download Images using URLs 126 | We also provide the code to download images using URLs. 127 | As `train_urls_from_openimages.txt` is very large, here we provide a tiny file [train_urls_tiny.txt](data/train_urls_tiny.txt) to demonstrate the downloading procedure. 128 | ``` 129 | cd data 130 | ./download_urls_multithreading.sh 131 | ``` 132 | A sub-folder `data/images` will be generated to save the downloaded jpeg images, as well as a file `train_im_list_tiny.txt` to save the image list and the corresponding annotations. 133 | 134 | 135 | 136 | 137 | ### [Semantic Hierarchy](#semantic-hierarchy) 138 | [[back to top](#)] 139 | 140 | We build the semantic hiearchy of 11,166 categories, according to [WordNet](https://wordnet.princeton.edu/). 141 | The direct parent categories of each class can be found from the file [data/dictionary_and_semantic_hierarchy.txt](data/dictionary_and_semantic_hierarchy.txt). The whole semantic hierarchy includes 4 independent trees, of which 142 | the root nodes are `thing`, `matter`, `object, physical object` and `atmospheric phenomenon`, respectively. 143 | The length of the longest semantic path from root to leaf nodes is 16, and the average length is 7.47. 144 | 145 | ### [Annotations](#annotations) 146 | [[back to top](#)] 147 | 148 | Since the image URLs of ML-Images are collected from ImageNet and Open Images, the annotations of ML-Images are constructed based on the 149 | original annotations from ImageNet and Open Images. Note that the original annotations from Open Images are licensed by Google Inc. under [CC BY-4.0](https://creativecommons.org/licenses/by/4.0/). Specifically, we conduct the following steps to construct the new annotations of ML-Images. 150 | * For the 6,902,811 training URLs from Open Images, we remove the annotated tags that are out of the remained 1,134 categories. 151 | * According to the constructed [semantic hierarchy](data/dictionary_and_semantic_hierarchy.txt) of 11,166 categories, we augment the annotations of all URLs of ML-Images following the cateria that if one URL is annotated with category i, then all ancestor categories will also be annotated to this URL. 152 | * We train a ResNet-101 model based on the 6,902,811 training URLs from Open Images, with 1,134 outputs. Using this ResNet-101 model, we predict the tags from 1,134 categories for the 10,756,941 single-annotated image URLs from ImageNet. Consequently, we obtain a normalized co-occurrence matrix between 10,032 categories from ImageNet and 1,134 categories from Open Images. We can determine the strongly co-occurrenced pairs of categories. For example, category i and j are strongly co-occurrenced; then, if one image is annotated with category i, then category j should also be annotated. 153 | 154 | The annotations of all URLs in ML-Images are stored in `train_urls.txt` and `val_urls.txt`. 155 | 156 | 160 | 161 | ### [Statistics](#statistics) 162 | [[back to top](#)] 163 | 164 | The main statistics of ML-Images are summarized in ML-Images. 165 | 166 | 167 | | # Train images | # Validation images | # Classes | # Trainable Classes | # Avg tags per image | # Avg images per class | 168 | | :-------------: |:--------------------:| :--------:| :-----------------: |:-------------------:| :---------------------:| 169 | | 17,609,752 | 88,739 | 11,166 | 10,505 | 8.72 | 13,843 | 170 | 171 | Note: *Trainable class* indicates the class that has over 100 train images. 172 | 173 |
174 | 175 | The number of images per class and the histogram of the number of annotations in training set are shown in the following figures. 176 | 177 | GitHub GitHub 178 | 179 | 180 | # [Train](#train) 181 | [[back to top](#)] 182 | 183 | 184 | 188 | 189 | 198 | 199 | 202 | 203 | 212 | 225 | 226 | 231 | 232 | 233 | ### [Prepare the TFRecord File](#prepare-tfrecord) 234 | [[back to top](#)] 235 | 236 | Here we generate the tfrecords using the multithreading module. One should firstly split the file `train_im_list_tiny.txt` into multiple smaller files, and save them into the sub-folder `data/image_lists/`. 237 | ``` 238 | cd data 239 | ./tfrecord.sh 240 | ``` 241 | Multiple tfrecords (named like `x.tfrecords`) will saved to `data/tfrecords/`. 242 | 243 | ### [Pretrain on ML-Images](#pretrain-on-ml-images) 244 | [[back to top](#)] 245 | 246 | Before training, one should move the train and validation tfrecords to `data/ml-images/train` and `data/ml-images/val`, respectively. 247 | Then, 248 | ``` 249 | ./example/train.sh 250 | ``` 251 | **Note**: Here we only provide the training code in the single node single GPU framework, while our actual training on ML-Images is based on an internal distributed training framework (not released yet). One could modify the training code to the distributed framework following [distributed tensorFlow](https://www.tensorflow.org/deploy/distributed). 252 | 253 | ### [Finetune on ImageNet](#finetune-on-imagenet) 254 | [[back to top](#)] 255 | 256 | One should firstly download the ImageNet (ILSVRC2012) database, then prepare the tfrecord file using [tfrecord.sh](example/tfrecord.sh). 257 | Then, you can finetune the ResNet-101 model on ImageNet as follows, with the checkpoint pre-trained on ML-Images. 258 | ``` 259 | ./example/finetune.sh 260 | ``` 261 | 262 | ### [Checkpoints](#checkpoints) 263 | [[back to top](#)] 264 | 265 | * ckpt-resnet101-mlimages ([link1](https://drive.google.com/open?id=1FKkw2HD0jrCJKOM_kpyOvZ_m_YPA9tdV), [link2](https://pan.baidu.com/s/1166673BNWuIeWxD7lf6RNA)): pretrained on ML-Images 266 | * ckpt-resnet101-mlimages-imagenet ([link1](https://drive.google.com/open?id=1wIhRemoPxTw7uDz-TlwfYJsOR2usb2kg), [link2](https://pan.baidu.com/s/1UE7gavcVznYVA5NZ-GFAvg)): pretrained on ML-Images and finetuned on ImageNet (ILSVRC2012) 267 | 268 | Please download above two checkpoints and move them into the folder `checkpoints/`, if you want to extract features using them. 269 | 270 | ### [Single-Label Image Classification](#single-label-image-classification) 271 | 272 | Here we provide a demo for single-label image-classification, using the checkpoint `ckpt-resnet101-mlimages-imagenet` downloaded above. 273 | ``` 274 | ./example/image_classification.sh 275 | ``` 276 | The prediction will be saved to `label_pred.txt`. If one wants to recognize other images, `data/im_list_for_classification.txt` should be modified to include the path of these images. 277 | 278 | ### [Feature Extraction](#feature-extraction) 279 | [[back to top](#)] 280 | 281 | ``` 282 | ./example/extract_feature.sh 283 | ``` 284 | 285 | 286 | # [Results](#results) 287 | [[back to top](#)] 288 | 289 | The retults of different ResNet-101 checkpoints on the validation set of ImageNet (ILSVRC2012) are summarized in the following table. 290 | 291 | 292 | | Checkpoints | Train and finetune setting | Top-1 acc
on Val 224
| Top-5 acc
on Val 224
| Top-1 acc
on Val 299
| Top-5 acc
on Val 299
| 293 | :------------- |:--------------------| :--------:| :-----------------: |:------------------:| :-------------------:| 294 | [MSRA ResNet-101](https://github.com/KaimingHe/deep-residual-networks) | train on ImageNet | 76.4 | 92.9 | -- | -- | 295 | [Google ResNet-101 ckpt1](https://arxiv.org/abs/1707.02968) | train on ImageNet, 299 x 299 | -- | -- | 77.5 | 93.9 | 296 | Our ResNet-101 ckpt1 | train on ImageNet | 77.8 | 93.9 | 79.0 | 94.5 | 297 | [Google ResNet-101 ckpt2](https://arxiv.org/abs/1707.02968) | Pretrain on JFT-300M, finetune on ImageNet, 299 x 299 | -- | -- | 79.2 | 94.7 | 298 | Our ResNet-101 ckpt2 | Pretrain on ML-Images, finetune on ImageNet | **78.8** | **94.5** | 79.5 | 94.9 | 299 | Our ResNet-101 ckpt3 | Pretrain on ML-Images, finetune on ImageNet 224 to 299 | 78.3 | 94.2 | **80.73** | **95.5** | 300 | Our ResNet-101 ckpt4 | Pretrain on ML-Images, finetune on ImageNet 299 x 299 | 75.8 | 92.7 | 79.6 | 94.6 | 301 | 302 | Note: 303 | * if not specified, the image size in training/finetuning is 224 x 224. 304 | * *finetune on ImageNet from 224 to 299* means that the image size in early epochs of finetuning is 224 x 224, then 299 x 299 in late epochs. 305 | * *Top-1 acc on Val 224* indicates the top-1 accuracy on 224 x 224 validation images. 306 | 307 | 308 | 309 | 310 | # [Copyright](#copyright) 311 | [[back to top](#)] 312 | 313 | The annotations of images are licensed by Tencent under [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/) license. 314 | The contents of this repository, including the codes, documents and checkpoints, are released under an [BSD 3-Clause](https://opensource.org/licenses/BSD-3-Clause) license. Please refer to [LICENSE](LICENSE) for more details. 315 | 316 | If there is any concern about the copyright of any image used in this project, please [email us](mailto:wubaoyuan1987@gmail.com). 317 | 318 | # [Citation](#citation) 319 | [[back to top](#)] 320 | 321 | If any content of this project is utilized in your work (such as data, checkpoint, code, or the proposed loss or training algorithm), please cite the following manuscript. 322 | ``` 323 | @article{tencent-ml-images-2019, 324 | title={Tencent ML-Images: A Large-Scale Multi-Label Image Database for Visual Representation Learning}, 325 | author={Wu, Baoyuan and Chen, Weidong and Fan, Yanbo and Zhang, Yong and Hou, Jinlong and Liu, Jie and Zhang, Tong}, 326 | journal={IEEE Access}, 327 | volume={7}, 328 | year={2019} 329 | } 330 | ``` 331 | 332 | 333 | -------------------------------------------------------------------------------- /data/download_urls_multithreading.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Tencent is pleased to support the open source community by making Tencent ML-Images available. 4 | Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. 5 | Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 6 | https://opensource.org/licenses/BSD-3-Clause 7 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 8 | See the License for the specific language governing permissions and limitations under the License. 9 | """ 10 | 11 | import os 12 | import sys 13 | import urllib 14 | import argparse 15 | import threading,signal 16 | import time 17 | import socket 18 | socket.setdefaulttimeout(10.0) 19 | 20 | def downloadImg(start, end, url_list, save_dir): 21 | global record,count,count_invalid,is_exit 22 | im_names = [] 23 | with open(url_list, 'r') as url_f: 24 | for line in url_f.readlines()[start:end]: 25 | sp = line.rstrip('\n').split('\t') 26 | url = sp[0] 27 | url_list = url.split('/') 28 | im_name = url_list[-2] + '_' + url_list[-1] 29 | try: 30 | urllib.urlretrieve(url, os.path.join(save_dir, im_name)) 31 | record += 1 32 | im_file_Record.write(im_name + '\t' + '\t'.join(sp[1:]) + '\n') 33 | print('url = {} is finished and {} imgs have been downloaded of all {} imgs'.format(url, record, count)) 34 | except IOError as e: 35 | print ("The url:{} is ***INVALID***".format(url)) 36 | invalid_file.write(url + '\n') 37 | count_invalid += 1 38 | 39 | if __name__ == "__main__": 40 | parser = argparse.ArgumentParser() 41 | parser.add_argument('--url_list', type=str, help='the url list file') 42 | parser.add_argument('--im_list', type=str, default='img.txt',help='the image list file') 43 | parser.add_argument('--num_threads', type=int, default=8, help='the num of processing') 44 | parser.add_argument('--save_dir', type=str, default='./images', help='the directory to save images') 45 | args = parser.parse_args() 46 | 47 | url_list = args.url_list 48 | im_list = args.im_list 49 | num_threads = args.num_threads 50 | save_dir = args.save_dir 51 | # create savedir 52 | if not os.path.isdir(save_dir): 53 | os.mkdir(save_dir) 54 | 55 | count = 0 # the num of urls 56 | count_invalid = 0 # the num of invalid urls 57 | record = 0 58 | with open(url_list,'r') as f: 59 | for line in f: 60 | count += 1 61 | part = int(count/num_threads) 62 | with open(im_list, 'w') as im_file_Record,open('invalid_url.txt','w') as invalid_file: # record the downloaded imgs 63 | thread_list = [] 64 | for i in range(num_threads): 65 | if(i == num_threads-1): 66 | t = threading.Thread(target = downloadImg, kwargs={'start':i*part, 'end':count, 'url_list':url_list, 'save_dir':save_dir}) 67 | else: 68 | t = threading.Thread(target = downloadImg, kwargs={'start':i*part, 'end':(i+1)*part, 'url_list':url_list, 'save_dir':save_dir}) 69 | t.setDaemon(True) 70 | thread_list.append(t) 71 | t.start() 72 | 73 | for i in range(num_threads): 74 | try: 75 | while thread_list[i].isAlive(): 76 | pass 77 | except KeyboardInterrupt: 78 | break 79 | 80 | if count_invalid==0: 81 | print ("all {} imgs have been downloaded!".format(count)) 82 | else: 83 | print("{}/{} imgs have been downloaded, {} URLs are invalid".format(count-count_invalid, count, count_invalid)) 84 | -------------------------------------------------------------------------------- /data/download_urls_multithreading.sh: -------------------------------------------------------------------------------- 1 | python2.7 ./download_urls_multithreading.py --url_list=train_urls_tiny.txt --im_list=train_im_list_tiny.txt --num_threads=20 --save_dir='./images' 2 | -------------------------------------------------------------------------------- /data/im_list_for_classification.txt: -------------------------------------------------------------------------------- 1 | data/images/im_0.jpg 2 | data/images/im_1.jpg 3 | data/images/im_2.jpg 4 | data/images/im_3.jpg 5 | -------------------------------------------------------------------------------- /data/image_lists/train_im_list_tiny_1.txt: -------------------------------------------------------------------------------- 1 | 17382017900_f7152f6e1c_o.jpg 940:0.8 860:0.8 17:1 1:1 2 | 3672185768_21abfdbdf1_o.jpg 5205:0.9 5188:0.9 5173:0.9 5170:0.9 3 | 74229536_063452179d_o.jpg 9416:1.0 9413:1.0 3:1.0 4 | 333886038_e01496fe70_o.jpg 5177:1 5170:1 1042:1 865:1 2:1 10964:1 10954:1 4:1 0:1 8556:1 5851:1 5193:1 5181:1 5173:1 8582:1 4821:1 4781:1 4767:1 4765:1 1067:1 1041:1 9147:1 9133:1 5 | 8439238840_265104e92b_o.jpg 1552:0.9 1521:0.9 6 | 134423290_0297a1edf0_o.jpg 11089:1 282:1 274:1 16:1 1:1 7 | 6211955974_43e1bbdd9d_o.jpg 10942:0.8 2563:0.8 8 | 435489099_f4a01d6a3b_o.jpg 5177:1 5170:1 1042:1 865:1 2:1 10964:1 10954:1 4:1 0:1 8556:1 4767:1 4765:1 1067:1 1041:1 9 | 3713857698_ce1ac4f0d4_o.jpg 6470:0.8 5174:0.8 8274:0.9 8205:0.9 10 | 2938752319_e66de877cb_o.jpg 11124:0.8 11123:0.8 19:1.0 14:1.0 1:1.0 3:0.9 10973:0.8 5:0.8 13:0.8 11 | 8138634055_b12eb5a88f_o.jpg 17:0.8 1:0.8 3634:0.9 1836:0.9 12 | 3313422599_b50fec0c7a_o.jpg 3761:0.9 3657:0.9 13 | 1557324960_1ae893fed8_o.jpg 4097:1 4089:1 4063:1 1837:1 1054:1 1041:1 865:1 2:1 4129:1 4132:1 14 | 14331938228_80fd35917d_o.jpg 11124:0.8 11123:0.8 913:0.8 885:0.8 15 | 6071452334_57111b2799_o.jpg 4767:0.9 4765:0.9 16 | 20207897114_fabf17ef03_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 11060:0.9 3:0.9 19:1.0 14:1.0 1:1.0 10973:0.8 5:0.8 4767:1 4765:1 1067:1 1041:1 17 | 2572916531_38735defe1_o.jpg 4063:0.9 1837:0.9 4132:1 4097:1 4089:1 1054:1 1041:1 865:1 2:1 18 | 331204556_044215e293_o.jpg 11003:0.8 4650:0.8 4666:0.8 19 | 306206679_ac2e2d3dd7_o.jpg 1353:1 1193:1 1053:1 1041:1 865:1 2:1 20 | 3185906982_2734e3ea8e_o.jpg 5177:0.9 5170:0.9 8623:0.9 8493:0.9 9204:1 9198:1 9170:1 8585:1 1042:1 865:1 2:1 21 | 10164826235_a5533c7bdb_o.jpg 1053:0.9 1041:0.9 22 | 11587568003_c9da58b833_o.jpg 5177:0.9 5170:0.9 8623:0.9 8493:0.9 9204:1 9198:1 9170:1 8585:1 1042:1 865:1 2:1 23 | 4505160846_b3797e2559_o.jpg 5851:0.9 5193:0.9 11091:0.8 11090:0.8 11057:0.8 9305:0.8 11157:0.8 5173:0.9 5170:0.9 24 | 16218058281_fe038ddbb3_o.jpg 8666:0.9 8512:0.9 25 | 8423073355_a4547b54aa_o.jpg 913:0.9 885:0.9 8636:0.8 8629:0.8 26 | 8195413631_db78ce5a8e_o.jpg 1353:1 1193:1 1053:1 1041:1 865:1 2:1 1674:0.9 1205:0.9 27 | 75954166_76b4358319_o.jpg 5177:1 5170:1 1042:1 865:1 2:1 8582:1 4767:1 4765:1 1067:1 1041:1 28 | 8470961329_f084fa4552_o.jpg 2434:0.9 2429:0.9 1832:0.9 29 | 2448038014_fb88b0faeb_o.jpg 11114:0.8 1084:0.8 11124:0.8 11123:0.8 30 | 5839657249_5c2cc9926d_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 10964:1 10954:1 4:1 0:1 5177:1 4767:1 4765:1 1067:1 1041:1 31 | 7155811268_3f3386d274_o.jpg 11165:1 2643:1 2563:1 1833:1 1054:1 1041:1 865:1 2:1 1187:1 1053:1 19:1 14:1 1:1 32 | 2336677673_5cf456df93_o.jpg 4679:0.8 1062:0.8 33 | 15850638445_04d563e775_o.jpg 11057:0.8 9305:0.8 34 | 8247858038_4aaf68da28_o.jpg 3:0.9 1193:0.8 1053:0.8 35 | 4246479857_5a0fa1930c_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 10968:0.9 20:0.9 3:0.9 11057:0.8 9305:0.8 9416:1 9413:1 36 | 17268618631_a35e47c7d3_o.jpg 19:1 14:1 1:1 4063:1 1837:1 1054:1 1041:1 865:1 2:1 11105:0.8 4678:0.8 11147:0.8 1142:0.8 9486:0.8 884:0.8 10207:1 3802:1 3780:1 3762:1 3657:1 1836:1 4686:0.8 4679:0.8 1193:1 1053:1 1062:0.9 37 | 5715926508_bb4fe6d4bc_o.jpg 1193:1 1053:1 1041:1 865:1 2:1 38 | 262201507_1454c50290_o.jpg 3805:1 3797:1 3780:1 3762:1 3657:1 1836:1 1054:1 1041:1 865:1 2:1 4063:1 1837:1 3802:1 2944:1 2913:1 2896:1 2577:1 1833:1 39 | 4083648900_067cd150d7_o.jpg 9303:0.8 9300:0.8 40 | 21827691670_2103ff9714_o.jpg 1332:0.8 1314:0.8 1053:0.9 1041:0.9 41 | 8121198037_063687512f_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 42 | 6914737323_89438b4958_o.jpg 10942:0.8 2563:0.8 43 | 520809355_40f9d2f09e_o.jpg 9363:0.8 9303:0.8 5173:0.9 5170:0.9 44 | 4583003907_f4c04e43ef_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 10942:0.8 2563:0.8 2644:0.8 45 | 15706263331_cae9be57b2_o.jpg 3802:1.0 3780:1.0 3762:1.0 3657:1.0 1836:1.0 1054:1.0 1041:1.0 865:1.0 2:1.0 4063:1.0 1837:1.0 46 | 5650341228_5c8f2d74c5_o.jpg 3611:0.8 1835:0.8 47 | 4081065718_b70e87bdc6_o.jpg 5769:0.8 5665:0.8 1332:0.9 1314:0.9 1340:0.8 48 | 23398906_fae4940560_o.jpg 3802:1 3780:1 3762:1 3657:1 1836:1 1054:1 1041:1 865:1 2:1 4063:1 1837:1 49 | 16424257390_2e946aed5f_o.jpg 10949:1 21:1 15:1 1:1 289:1 17:1 3634:0.9 1836:0.9 50 | 7598082136_1d9c366bf3_o.jpg 3845:0.8 3802:0.8 2944:1 2913:1 2896:1 2577:1 1833:1 1054:1 1041:1 865:1 2:1 4063:1 1837:1 3780:1 3762:1 3657:1 1836:1 51 | -------------------------------------------------------------------------------- /data/image_lists/train_im_list_tiny_2.txt: -------------------------------------------------------------------------------- 1 | 5059479200_857af7e475_o.jpg 5:0.8 0:0.8 10983:0.8 10974:0.8 962:0.8 863:0.8 2 | 3246667026_5cfa58453c_o.jpg 2569:0.9 1833:0.9 2684:0.9 2655:0.9 2617:0.8 2691:0.8 3 | 4727699820_0527f2ee2a_o.jpg 5851:0.8 5193:0.8 17:1 1:1 5173:0.8 5170:0.8 4 | 3806645650_396b51ddc8_o.jpg 904:1 893:1 890:1 884:1 870:1 859:1 2:1 5 | 15185915283_5486c284d8_o.jpg 899:0.9 890:0.9 6 | 9577264333_488917c90e_o.jpg 858:0.8 2:0.8 1332:0.8 1314:0.8 7 | 5831701088_0cd275facd_o.jpg 5851:0.8 5193:0.8 11141:0.8 11140:0.8 4679:0.9 1062:0.9 8 | 2719045377_bbd3fb7c77_o.jpg 5177:1 5170:1 1042:1 865:1 2:1 8556:1 4063:1 1837:1 1054:1 1041:1 4767:1 4765:1 1067:1 9 | 5144818210_3ff147c4f5_o.jpg 5851:0.9 5193:0.9 11141:0.8 11140:0.8 11143:1.0 884:1.0 870:1.0 859:1.0 2:1.0 10 | 2898506763_9cc1e3afde_o.jpg 4063:1 1837:1 1054:1 1041:1 865:1 2:1 10207:1 3802:1 3780:1 3762:1 3657:1 1836:1 11 | 8133093285_2949e63a08_o.jpg 5851:0.9 5193:0.9 867:0.8 858:0.8 5:1 0:1 11110:0.8 9969:0.8 10991:0.8 10985:0.9 10976:0.9 10981:0.9 10974:0.9 5173:0.8 5170:0.8 9483:0.9 884:0.9 11143:1.0 870:1.0 859:1.0 2:1.0 12 | 19997213482_a5c47a049c_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 858:0.8 11157:1 9305:1 9300:1 9289:1 1043:1 11143:1.0 884:1.0 870:1.0 859:1.0 13 | 4915052210_a4ef8792a7_o.jpg 1193:0.8 1053:0.8 14 | 4593035529_543a9de68e_o.jpg 4097:1 4089:1 4063:1 1837:1 1054:1 1041:1 865:1 2:1 5:1 0:1 4129:1 4132:1 15 | 1548058686_fc8798b3fd_o.jpg 4063:1 1837:1 1054:1 1041:1 865:1 2:1 1050:1 8766:1 8531:1 5177:1 5170:1 1042:1 3797:1 3780:1 3762:1 3657:1 1836:1 16 | 6229581102_e8a6a51db8_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 4127:1 4102:1 4096:1 4089:1 4063:1 1837:1 1054:1 1041:1 17 | 4551712992_0dff6d87fc_o.jpg 1378:0.9 1193:0.9 1053:0.8 18 | 4144284900_6ebd34882f_o.jpg 5173:0.9 5170:0.9 19 | 3912307968_5881f36683_o.jpg 11091:0.9 11090:0.9 9483:0.9 884:0.9 20 | 32066458_bc255c1de9_o.jpg 1378:0.8 1193:0.8 1053:0.9 21 | 2613610603_81dd2ce03d_o.jpg 9416:0.8 9413:0.8 11124:0.8 11123:0.8 22 | 5048972174_88a85f9fd0_o.jpg 11162:0.8 11161:0.8 2944:0.8 2913:0.8 23 | 15446852851_5e61761d18_o.jpg 5177:0.9 5170:0.9 24 | 16771865540_6bbbeb1545_o.jpg 1162:0.8 1053:0.8 19:1 14:1 1:1 5:1 0:1 3:0.9 10986:0.8 10976:0.8 10974:1 25 | 2459138484_72ee7e0f82_o.jpg 1353:0.9 1193:0.9 1281:0.9 1178:0.9 3:0.9 9486:0.8 884:0.8 11002:0.8 885:0.8 1332:0.8 1314:0.8 913:0.8 1379:0.8 26 | 1583067562_bb24f98be0_o.jpg 5177:0.9 5170:0.9 27 | 3941211541_85119b5dca_o.jpg 2684:1 2655:1 2569:1 1833:1 1054:1 1041:1 865:1 2:1 2080:1 2058:1 1905:1 1829:1 2610:1 28 | 16400759764_07256cb031_o.jpg 961:0.8 863:0.8 998:0.8 958:0.8 19:1.0 14:1.0 1:1.0 1021:0.9 962:0.9 9290:0.8 1043:0.8 10973:0.9 5:0.9 13:0.8 29 | 19447434033_7820f80b7b_o.jpg 9290:0.8 1043:0.8 30 | 5740477627_dd000e359f_o.jpg 11147:0.8 1142:0.8 858:0.9 2:0.9 10946:0.8 1047:0.8 31 | 3659209345_c6b0a58628_o.jpg 5177:0.9 5170:0.9 32 | 16792450152_16fcdbbd2b_o.jpg 1353:1 1193:1 1053:1 1041:1 865:1 2:1 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 4063:1 1837:1 1054:1 1674:0.8 1205:0.8 3802:1 3780:1 3762:1 3657:1 1836:1 1715:1 1700:1 1206:1 9416:1 9413:1 3:1 33 | 15519586065_580457550f_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 19:1.0 14:1.0 1:1.0 11124:0.8 11123:0.8 5769:0.8 5665:0.8 3:0.9 34 | 4467043072_0d48d262df_o.jpg 942:0.8 860:0.8 10946:0.8 1047:0.8 1053:0.9 1041:0.9 4679:0.9 1062:0.9 35 | 15111560160_ffbea0a25b_o.jpg 19:1.0 14:1.0 1:1.0 5:1 0:1 3:0.9 10974:1 4132:1 4097:1 4089:1 4063:1 1837:1 1054:1 1041:1 865:1 2:1 36 | 12727073325_3ba4d64a21_o.jpg 4063:0.9 1837:0.9 4129:0.9 4097:0.9 4184:0.8 4181:0.8 4173:0.9 4132:0.9 1380:1 1193:1 1053:1 1041:1 865:1 2:1 4089:1 1054:1 37 | 4183920312_fda4530fb6_o.jpg 11129:0.9 11127:0.9 38 | 8911845093_eef6449d70_o.jpg 4096:0.9 4089:0.9 4116:0.8 4107:0.8 4100:1.0 4063:1.0 1837:1.0 1054:1.0 1041:1.0 865:1.0 2:1.0 39 | 6805430694_5ea42cf2d1_o.jpg 11129:0.9 11127:0.9 1193:0.8 1053:0.8 40 | 12574918044_6ce025c238_o.jpg 1193:0.8 1053:0.8 41 | 6848997330_fe724e87c9_o.jpg 714:1 597:1 292:1 17:1 1:1 3634:0.9 1836:0.9 9308:1 9302:1 9300:1 9289:1 1043:1 865:1 2:1 24:0.8 21:0.8 602:0.9 844:0.8 42 | 121144061_6cf80b5e86_o.jpg 10996:0.8 8735:0.8 8727:0.9 2569:1 1833:1 1054:1 1041:1 865:1 2:1 5177:1 5170:1 1042:1 2664:1 2653:1 43 | 8106214778_cb9b9b4f1a_o.jpg 5177:0.9 5170:0.9 425:1 289:1 17:1 1:1 10956:0.9 10955:0.9 8623:1 8493:1 1042:1 865:1 2:1 44 | 4934246137_610e420517_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 11063:0.8 11058:0.8 9416:1 9413:1 3:1 45 | 14614550836_c3c9e41c20_o.jpg 5173:0.9 5170:0.9 46 | 4257610875_9c3c940d33_o.jpg 3:0.9 942:0.9 860:0.9 4679:0.8 1062:0.8 47 | 12285712405_290c0dc702_o.jpg 3781:1 3762:1 3657:1 1836:1 1054:1 1041:1 865:1 2:1 4063:0.9 1837:0.9 48 | 7222487914_847ab61301_o.jpg 5173:0.8 5170:0.8 3916:1 3666:1 1836:1 1054:1 1041:1 865:1 2:1 49 | 14883330897_052967e5d7_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 961:0.9 863:0.9 998:0.8 958:0.8 1021:0.8 962:0.8 9290:0.8 1043:0.8 964:0.8 50 | 8789780074_c70f8ebef7_o.jpg 17:0.8 1:0.8 51 | -------------------------------------------------------------------------------- /data/imagenet2012_dictionary.txt: -------------------------------------------------------------------------------- 1 | 0 tench Tinca tinca 2 | 1 goldfish Carassius auratus 3 | 2 great white shark white shark man-eater man-eating shark Carcharodon carcharias 4 | 3 tiger shark Galeocerdo cuvieri 5 | 4 hammerhead hammerhead shark 6 | 5 electric ray crampfish numbfish torpedo 7 | 6 stingray 8 | 7 cock 9 | 8 hen 10 | 9 ostrich Struthio camelus 11 | 10 brambling Fringilla montifringilla 12 | 11 goldfinch Carduelis carduelis 13 | 12 house finch linnet Carpodacus mexicanus 14 | 13 junco snowbird 15 | 14 indigo bunting indigo finch indigo bird Passerina cyanea 16 | 15 robin American robin Turdus migratorius 17 | 16 bulbul 18 | 17 jay 19 | 18 magpie 20 | 19 chickadee 21 | 20 water ouzel dipper 22 | 21 kite 23 | 22 bald eagle American eagle Haliaeetus leucocephalus 24 | 23 vulture 25 | 24 great grey owl great gray owl Strix nebulosa 26 | 25 European fire salamander Salamandra salamandra 27 | 26 common newt Triturus vulgaris 28 | 27 eft 29 | 28 spotted salamander Ambystoma maculatum 30 | 29 axolotl mud puppy Ambystoma mexicanum 31 | 30 bullfrog Rana catesbeiana 32 | 31 tree frog tree-frog 33 | 32 tailed frog bell toad ribbed toad tailed toad Ascaphus trui 34 | 33 loggerhead loggerhead turtle Caretta caretta 35 | 34 leatherback turtle leatherback leathery turtle Dermochelys coriacea 36 | 35 mud turtle 37 | 36 terrapin 38 | 37 box turtle box tortoise 39 | 38 banded gecko 40 | 39 common iguana iguana Iguana iguana 41 | 40 American chameleon anole Anolis carolinensis 42 | 41 whiptail whiptail lizard 43 | 42 agama 44 | 43 frilled lizard Chlamydosaurus kingi 45 | 44 alligator lizard 46 | 45 Gila monster Heloderma suspectum 47 | 46 green lizard Lacerta viridis 48 | 47 African chameleon Chamaeleo chamaeleon 49 | 48 Komodo dragon Komodo lizard dragon lizard giant lizard Varanus komodoensis 50 | 49 African crocodile Nile crocodile Crocodylus niloticus 51 | 50 American alligator Alligator mississipiensis 52 | 51 triceratops 53 | 52 thunder snake worm snake Carphophis amoenus 54 | 53 ringneck snake ring-necked snake ring snake 55 | 54 hognose snake puff adder sand viper 56 | 55 green snake grass snake 57 | 56 king snake kingsnake 58 | 57 garter snake grass snake 59 | 58 water snake 60 | 59 vine snake 61 | 60 night snake Hypsiglena torquata 62 | 61 boa constrictor Constrictor constrictor 63 | 62 rock python rock snake Python sebae 64 | 63 Indian cobra Naja naja 65 | 64 green mamba 66 | 65 sea snake 67 | 66 horned viper cerastes sand viper horned asp Cerastes cornutus 68 | 67 diamondback diamondback rattlesnake Crotalus adamanteus 69 | 68 sidewinder horned rattlesnake Crotalus cerastes 70 | 69 trilobite 71 | 70 harvestman daddy longlegs Phalangium opilio 72 | 71 scorpion 73 | 72 black and gold garden spider Argiope aurantia 74 | 73 barn spider Araneus cavaticus 75 | 74 garden spider Aranea diademata 76 | 75 black widow Latrodectus mactans 77 | 76 tarantula 78 | 77 wolf spider hunting spider 79 | 78 tick 80 | 79 centipede 81 | 80 black grouse 82 | 81 ptarmigan 83 | 82 ruffed grouse partridge Bonasa umbellus 84 | 83 prairie chicken prairie grouse prairie fowl 85 | 84 peacock 86 | 85 quail 87 | 86 partridge 88 | 87 African grey African gray Psittacus erithacus 89 | 88 macaw 90 | 89 sulphur-crested cockatoo Kakatoe galerita Cacatua galerita 91 | 90 lorikeet 92 | 91 coucal 93 | 92 bee eater 94 | 93 hornbill 95 | 94 hummingbird 96 | 95 jacamar 97 | 96 toucan 98 | 97 drake 99 | 98 red-breasted merganser Mergus serrator 100 | 99 goose 101 | 100 black swan Cygnus atratus 102 | 101 tusker 103 | 102 echidna spiny anteater anteater 104 | 103 platypus duckbill duckbilled platypus duck-billed platypus Ornithorhynchus anatinus 105 | 104 wallaby brush kangaroo 106 | 105 koala koala bear kangaroo bear native bear Phascolarctos cinereus 107 | 106 wombat 108 | 107 jellyfish 109 | 108 sea anemone anemone 110 | 109 brain coral 111 | 110 flatworm platyhelminth 112 | 111 nematode nematode worm roundworm 113 | 112 conch 114 | 113 snail 115 | 114 slug 116 | 115 sea slug nudibranch 117 | 116 chiton coat-of-mail shell sea cradle polyplacophore 118 | 117 chambered nautilus pearly nautilus nautilus 119 | 118 Dungeness crab Cancer magister 120 | 119 rock crab Cancer irroratus 121 | 120 fiddler crab 122 | 121 king crab Alaska crab Alaskan king crab Alaska king crab Paralithodes camtschatica 123 | 122 American lobster Northern lobster Maine lobster Homarus americanus 124 | 123 spiny lobster langouste rock lobster crawfish crayfish sea crawfish 125 | 124 crayfish crawfish crawdad crawdaddy 126 | 125 hermit crab 127 | 126 isopod 128 | 127 white stork Ciconia ciconia 129 | 128 black stork Ciconia nigra 130 | 129 spoonbill 131 | 130 flamingo 132 | 131 little blue heron Egretta caerulea 133 | 132 American egret great white heron Egretta albus 134 | 133 bittern 135 | 134 crane 136 | 135 limpkin Aramus pictus 137 | 136 European gallinule Porphyrio porphyrio 138 | 137 American coot marsh hen mud hen water hen Fulica americana 139 | 138 bustard 140 | 139 ruddy turnstone Arenaria interpres 141 | 140 red-backed sandpiper dunlin Erolia alpina 142 | 141 redshank Tringa totanus 143 | 142 dowitcher 144 | 143 oystercatcher oyster catcher 145 | 144 pelican 146 | 145 king penguin Aptenodytes patagonica 147 | 146 albatross mollymawk 148 | 147 grey whale gray whale devilfish Eschrichtius gibbosus Eschrichtius robustus 149 | 148 killer whale killer orca grampus sea wolf Orcinus orca 150 | 149 dugong Dugong dugon 151 | 150 sea lion 152 | 151 Chihuahua 153 | 152 Japanese spaniel 154 | 153 Maltese dog Maltese terrier Maltese 155 | 154 Pekinese Pekingese Peke 156 | 155 Shih-Tzu 157 | 156 Blenheim spaniel 158 | 157 papillon 159 | 158 toy terrier 160 | 159 Rhodesian ridgeback 161 | 160 Afghan hound Afghan 162 | 161 basset basset hound 163 | 162 beagle 164 | 163 bloodhound sleuthhound 165 | 164 bluetick 166 | 165 black-and-tan coonhound 167 | 166 Walker hound Walker foxhound 168 | 167 English foxhound 169 | 168 redbone 170 | 169 borzoi Russian wolfhound 171 | 170 Irish wolfhound 172 | 171 Italian greyhound 173 | 172 whippet 174 | 173 Ibizan hound Ibizan Podenco 175 | 174 Norwegian elkhound elkhound 176 | 175 otterhound otter hound 177 | 176 Saluki gazelle hound 178 | 177 Scottish deerhound deerhound 179 | 178 Weimaraner 180 | 179 Staffordshire bullterrier Staffordshire bull terrier 181 | 180 American Staffordshire terrier Staffordshire terrier American pit bull terrier pit bull terrier 182 | 181 Bedlington terrier 183 | 182 Border terrier 184 | 183 Kerry blue terrier 185 | 184 Irish terrier 186 | 185 Norfolk terrier 187 | 186 Norwich terrier 188 | 187 Yorkshire terrier 189 | 188 wire-haired fox terrier 190 | 189 Lakeland terrier 191 | 190 Sealyham terrier Sealyham 192 | 191 Airedale Airedale terrier 193 | 192 cairn cairn terrier 194 | 193 Australian terrier 195 | 194 Dandie Dinmont Dandie Dinmont terrier 196 | 195 Boston bull Boston terrier 197 | 196 miniature schnauzer 198 | 197 giant schnauzer 199 | 198 standard schnauzer 200 | 199 Scotch terrier Scottish terrier Scottie 201 | 200 Tibetan terrier chrysanthemum dog 202 | 201 silky terrier Sydney silky 203 | 202 soft-coated wheaten terrier 204 | 203 West Highland white terrier 205 | 204 Lhasa Lhasa apso 206 | 205 flat-coated retriever 207 | 206 curly-coated retriever 208 | 207 golden retriever 209 | 208 Labrador retriever 210 | 209 Chesapeake Bay retriever 211 | 210 German short-haired pointer 212 | 211 vizsla Hungarian pointer 213 | 212 English setter 214 | 213 Irish setter red setter 215 | 214 Gordon setter 216 | 215 Brittany spaniel 217 | 216 clumber clumber spaniel 218 | 217 English springer English springer spaniel 219 | 218 Welsh springer spaniel 220 | 219 cocker spaniel English cocker spaniel cocker 221 | 220 Sussex spaniel 222 | 221 Irish water spaniel 223 | 222 kuvasz 224 | 223 schipperke 225 | 224 groenendael 226 | 225 malinois 227 | 226 briard 228 | 227 kelpie 229 | 228 komondor 230 | 229 Old English sheepdog bobtail 231 | 230 Shetland sheepdog Shetland sheep dog Shetland 232 | 231 collie 233 | 232 Border collie 234 | 233 Bouvier des Flandres Bouviers des Flandres 235 | 234 Rottweiler 236 | 235 German shepherd German shepherd dog German police dog alsatian 237 | 236 Doberman Doberman pinscher 238 | 237 miniature pinscher 239 | 238 Greater Swiss Mountain dog 240 | 239 Bernese mountain dog 241 | 240 Appenzeller 242 | 241 EntleBucher 243 | 242 boxer 244 | 243 bull mastiff 245 | 244 Tibetan mastiff 246 | 245 French bulldog 247 | 246 Great Dane 248 | 247 Saint Bernard St Bernard 249 | 248 Eskimo dog husky 250 | 249 malamute malemute Alaskan malamute 251 | 250 Siberian husky 252 | 251 dalmatian coach dog carriage dog 253 | 252 affenpinscher monkey pinscher monkey dog 254 | 253 basenji 255 | 254 pug pug-dog 256 | 255 Leonberg 257 | 256 Newfoundland Newfoundland dog 258 | 257 Great Pyrenees 259 | 258 Samoyed Samoyede 260 | 259 Pomeranian 261 | 260 chow chow chow 262 | 261 keeshond 263 | 262 Brabancon griffon 264 | 263 Pembroke Pembroke Welsh corgi 265 | 264 Cardigan Cardigan Welsh corgi 266 | 265 toy poodle 267 | 266 miniature poodle 268 | 267 standard poodle 269 | 268 Mexican hairless 270 | 269 timber wolf grey wolf gray wolf Canis lupus 271 | 270 white wolf Arctic wolf Canis lupus tundrarum 272 | 271 red wolf maned wolf Canis rufus Canis niger 273 | 272 coyote prairie wolf brush wolf Canis latrans 274 | 273 dingo warrigal warragal Canis dingo 275 | 274 dhole Cuon alpinus 276 | 275 African hunting dog hyena dog Cape hunting dog Lycaon pictus 277 | 276 hyena hyaena 278 | 277 red fox Vulpes vulpes 279 | 278 kit fox Vulpes macrotis 280 | 279 Arctic fox white fox Alopex lagopus 281 | 280 grey fox gray fox Urocyon cinereoargenteus 282 | 281 tabby tabby cat 283 | 282 tiger cat 284 | 283 Persian cat 285 | 284 Siamese cat Siamese 286 | 285 Egyptian cat 287 | 286 cougar puma catamount mountain lion painter panther Felis concolor 288 | 287 lynx catamount 289 | 288 leopard Panthera pardus 290 | 289 snow leopard ounce Panthera uncia 291 | 290 jaguar panther Panthera onca Felis onca 292 | 291 lion king of beasts Panthera leo 293 | 292 tiger Panthera tigris 294 | 293 cheetah chetah Acinonyx jubatus 295 | 294 brown bear bruin Ursus arctos 296 | 295 American black bear black bear Ursus americanus Euarctos americanus 297 | 296 ice bear polar bear Ursus Maritimus Thalarctos maritimus 298 | 297 sloth bear Melursus ursinus Ursus ursinus 299 | 298 mongoose 300 | 299 meerkat mierkat 301 | 300 tiger beetle 302 | 301 ladybug ladybeetle lady beetle ladybird ladybird beetle 303 | 302 ground beetle carabid beetle 304 | 303 long-horned beetle longicorn longicorn beetle 305 | 304 leaf beetle chrysomelid 306 | 305 dung beetle 307 | 306 rhinoceros beetle 308 | 307 weevil 309 | 308 fly 310 | 309 bee 311 | 310 ant emmet pismire 312 | 311 grasshopper hopper 313 | 312 cricket 314 | 313 walking stick walkingstick stick insect 315 | 314 cockroach roach 316 | 315 mantis mantid 317 | 316 cicada cicala 318 | 317 leafhopper 319 | 318 lacewing lacewing fly 320 | 319 "dragonfly darning needle devils darning needle sewing needle snake feeder snake doctor mosquito hawk skeeter hawk" 321 | 320 damselfly 322 | 321 admiral 323 | 322 ringlet ringlet butterfly 324 | 323 monarch monarch butterfly milkweed butterfly Danaus plexippus 325 | 324 cabbage butterfly 326 | 325 sulphur butterfly sulfur butterfly 327 | 326 lycaenid lycaenid butterfly 328 | 327 starfish sea star 329 | 328 sea urchin 330 | 329 sea cucumber holothurian 331 | 330 wood rabbit cottontail cottontail rabbit 332 | 331 hare 333 | 332 Angora Angora rabbit 334 | 333 hamster 335 | 334 porcupine hedgehog 336 | 335 fox squirrel eastern fox squirrel Sciurus niger 337 | 336 marmot 338 | 337 beaver 339 | 338 guinea pig Cavia cobaya 340 | 339 sorrel 341 | 340 zebra 342 | 341 hog pig grunter squealer Sus scrofa 343 | 342 wild boar boar Sus scrofa 344 | 343 warthog 345 | 344 hippopotamus hippo river horse Hippopotamus amphibius 346 | 345 ox 347 | 346 water buffalo water ox Asiatic buffalo Bubalus bubalis 348 | 347 bison 349 | 348 ram tup 350 | 349 bighorn bighorn sheep cimarron Rocky Mountain bighorn Rocky Mountain sheep Ovis canadensis 351 | 350 ibex Capra ibex 352 | 351 hartebeest 353 | 352 impala Aepyceros melampus 354 | 353 gazelle 355 | 354 Arabian camel dromedary Camelus dromedarius 356 | 355 llama 357 | 356 weasel 358 | 357 mink 359 | 358 polecat fitch foulmart foumart Mustela putorius 360 | 359 black-footed ferret ferret Mustela nigripes 361 | 360 otter 362 | 361 skunk polecat wood pussy 363 | 362 badger 364 | 363 armadillo 365 | 364 three-toed sloth ai Bradypus tridactylus 366 | 365 orangutan orang orangutang Pongo pygmaeus 367 | 366 gorilla Gorilla gorilla 368 | 367 chimpanzee chimp Pan troglodytes 369 | 368 gibbon Hylobates lar 370 | 369 siamang Hylobates syndactylus Symphalangus syndactylus 371 | 370 guenon guenon monkey 372 | 371 patas hussar monkey Erythrocebus patas 373 | 372 baboon 374 | 373 macaque 375 | 374 langur 376 | 375 colobus colobus monkey 377 | 376 proboscis monkey Nasalis larvatus 378 | 377 marmoset 379 | 378 capuchin ringtail Cebus capucinus 380 | 379 howler monkey howler 381 | 380 titi titi monkey 382 | 381 spider monkey Ateles geoffroyi 383 | 382 squirrel monkey Saimiri sciureus 384 | 383 Madagascar cat ring-tailed lemur Lemur catta 385 | 384 indri indris Indri indri Indri brevicaudatus 386 | 385 Indian elephant Elephas maximus 387 | 386 African elephant Loxodonta africana 388 | 387 lesser panda red panda panda bear cat cat bear Ailurus fulgens 389 | 388 giant panda panda panda bear coon bear Ailuropoda melanoleuca 390 | 389 barracouta snoek 391 | 390 eel 392 | 391 coho cohoe coho salmon blue jack silver salmon Oncorhynchus kisutch 393 | 392 rock beauty Holocanthus tricolor 394 | 393 anemone fish 395 | 394 sturgeon 396 | 395 gar garfish garpike billfish Lepisosteus osseus 397 | 396 lionfish 398 | 397 puffer pufferfish blowfish globefish 399 | 398 abacus 400 | 399 abaya 401 | 400 "academic gown academic robe judges robe" 402 | 401 accordion piano accordion squeeze box 403 | 402 acoustic guitar 404 | 403 aircraft carrier carrier flattop attack aircraft carrier 405 | 404 airliner 406 | 405 airship dirigible 407 | 406 altar 408 | 407 ambulance 409 | 408 amphibian amphibious vehicle 410 | 409 analog clock 411 | 410 apiary bee house 412 | 411 apron 413 | 412 ashcan trash can garbage can wastebin ash bin ash-bin ashbin dustbin trash barrel trash bin 414 | 413 assault rifle assault gun 415 | 414 backpack back pack knapsack packsack rucksack haversack 416 | 415 bakery bakeshop bakehouse 417 | 416 balance beam beam 418 | 417 balloon 419 | 418 ballpoint ballpoint pen ballpen Biro 420 | 419 Band Aid 421 | 420 banjo 422 | 421 bannister banister balustrade balusters handrail 423 | 422 barbell 424 | 423 barber chair 425 | 424 barbershop 426 | 425 barn 427 | 426 barometer 428 | 427 barrel cask 429 | 428 barrow garden cart lawn cart wheelbarrow 430 | 429 baseball 431 | 430 basketball 432 | 431 bassinet 433 | 432 bassoon 434 | 433 bathing cap swimming cap 435 | 434 bath towel 436 | 435 bathtub bathing tub bath tub 437 | 436 beach wagon station wagon wagon estate car beach waggon station waggon waggon 438 | 437 beacon lighthouse beacon light pharos 439 | 438 beaker 440 | 439 bearskin busby shako 441 | 440 beer bottle 442 | 441 beer glass 443 | 442 bell cote bell cot 444 | 443 bib 445 | 444 bicycle-built-for-two tandem bicycle tandem 446 | 445 bikini two-piece 447 | 446 binder ring-binder 448 | 447 binoculars field glasses opera glasses 449 | 448 birdhouse 450 | 449 boathouse 451 | 450 bobsled bobsleigh bob 452 | 451 bolo tie bolo bola tie bola 453 | 452 bonnet poke bonnet 454 | 453 bookcase 455 | 454 bookshop bookstore bookstall 456 | 455 bottlecap 457 | 456 bow 458 | 457 bow tie bow-tie bowtie 459 | 458 brass memorial tablet plaque 460 | 459 brassiere bra bandeau 461 | 460 breakwater groin groyne mole bulwark seawall jetty 462 | 461 breastplate aegis egis 463 | 462 broom 464 | 463 bucket pail 465 | 464 buckle 466 | 465 bulletproof vest 467 | 466 bullet train bullet 468 | 467 butcher shop meat market 469 | 468 cab hack taxi taxicab 470 | 469 caldron cauldron 471 | 470 candle taper wax light 472 | 471 cannon 473 | 472 canoe 474 | 473 can opener tin opener 475 | 474 cardigan 476 | 475 car mirror 477 | 476 carousel carrousel merry-go-round roundabout whirligig 478 | 477 "carpenters kit tool kit" 479 | 478 carton 480 | 479 car wheel 481 | 480 cash machine cash dispenser automated teller machine automatic teller machine automated teller automatic teller ATM 482 | 481 cassette 483 | 482 cassette player 484 | 483 castle 485 | 484 catamaran 486 | 485 CD player 487 | 486 cello violoncello 488 | 487 cellular telephone cellular phone cellphone cell mobile phone 489 | 488 chain 490 | 489 chainlink fence 491 | 490 chain mail ring mail mail chain armor chain armour ring armor ring armour 492 | 491 chain saw chainsaw 493 | 492 chest 494 | 493 chiffonier commode 495 | 494 chime bell gong 496 | 495 china cabinet china closet 497 | 496 Christmas stocking 498 | 497 church church building 499 | 498 cinema movie theater movie theatre movie house picture palace 500 | 499 cleaver meat cleaver chopper 501 | 500 cliff dwelling 502 | 501 cloak 503 | 502 clog geta patten sabot 504 | 503 cocktail shaker 505 | 504 coffee mug 506 | 505 coffeepot 507 | 506 coil spiral volute whorl helix 508 | 507 combination lock 509 | 508 computer keyboard keypad 510 | 509 confectionery confectionary candy store 511 | 510 container ship containership container vessel 512 | 511 convertible 513 | 512 corkscrew bottle screw 514 | 513 cornet horn trumpet trump 515 | 514 cowboy boot 516 | 515 cowboy hat ten-gallon hat 517 | 516 cradle 518 | 517 crane 519 | 518 crash helmet 520 | 519 crate 521 | 520 crib cot 522 | 521 Crock Pot 523 | 522 croquet ball 524 | 523 crutch 525 | 524 cuirass 526 | 525 dam dike dyke 527 | 526 desk 528 | 527 desktop computer 529 | 528 dial telephone dial phone 530 | 529 diaper nappy napkin 531 | 530 digital clock 532 | 531 digital watch 533 | 532 dining table board 534 | 533 dishrag dishcloth 535 | 534 dishwasher dish washer dishwashing machine 536 | 535 disk brake disc brake 537 | 536 dock dockage docking facility 538 | 537 dogsled dog sled dog sleigh 539 | 538 dome 540 | 539 doormat welcome mat 541 | 540 drilling platform offshore rig 542 | 541 drum membranophone tympan 543 | 542 drumstick 544 | 543 dumbbell 545 | 544 Dutch oven 546 | 545 electric fan blower 547 | 546 electric guitar 548 | 547 electric locomotive 549 | 548 entertainment center 550 | 549 envelope 551 | 550 espresso maker 552 | 551 face powder 553 | 552 feather boa boa 554 | 553 file file cabinet filing cabinet 555 | 554 fireboat 556 | 555 fire engine fire truck 557 | 556 fire screen fireguard 558 | 557 flagpole flagstaff 559 | 558 flute transverse flute 560 | 559 folding chair 561 | 560 football helmet 562 | 561 forklift 563 | 562 fountain 564 | 563 fountain pen 565 | 564 four-poster 566 | 565 freight car 567 | 566 French horn horn 568 | 567 frying pan frypan skillet 569 | 568 fur coat 570 | 569 garbage truck dustcart 571 | 570 gasmask respirator gas helmet 572 | 571 gas pump gasoline pump petrol pump island dispenser 573 | 572 goblet 574 | 573 go-kart 575 | 574 golf ball 576 | 575 golfcart golf cart 577 | 576 gondola 578 | 577 gong tam-tam 579 | 578 gown 580 | 579 grand piano grand 581 | 580 greenhouse nursery glasshouse 582 | 581 grille radiator grille 583 | 582 grocery store grocery food market market 584 | 583 guillotine 585 | 584 hair slide 586 | 585 hair spray 587 | 586 half track 588 | 587 hammer 589 | 588 hamper 590 | 589 hand blower blow dryer blow drier hair dryer hair drier 591 | 590 hand-held computer hand-held microcomputer 592 | 591 handkerchief hankie hanky hankey 593 | 592 hard disc hard disk fixed disk 594 | 593 harmonica mouth organ harp mouth harp 595 | 594 harp 596 | 595 harvester reaper 597 | 596 hatchet 598 | 597 holster 599 | 598 home theater home theatre 600 | 599 honeycomb 601 | 600 hook claw 602 | 601 hoopskirt crinoline 603 | 602 horizontal bar high bar 604 | 603 horse cart horse-cart 605 | 604 hourglass 606 | 605 iPod 607 | 606 iron smoothing iron 608 | 607 "jack-o-lantern" 609 | 608 jean blue jean denim 610 | 609 jeep landrover 611 | 610 jersey T-shirt tee shirt 612 | 611 jigsaw puzzle 613 | 612 jinrikisha ricksha rickshaw 614 | 613 joystick 615 | 614 kimono 616 | 615 knee pad 617 | 616 knot 618 | 617 lab coat laboratory coat 619 | 618 ladle 620 | 619 lampshade lamp shade 621 | 620 laptop laptop computer 622 | 621 lawn mower mower 623 | 622 lens cap lens cover 624 | 623 letter opener paper knife paperknife 625 | 624 library 626 | 625 lifeboat 627 | 626 lighter light igniter ignitor 628 | 627 limousine limo 629 | 628 liner ocean liner 630 | 629 lipstick lip rouge 631 | 630 Loafer 632 | 631 lotion 633 | 632 loudspeaker speaker speaker unit loudspeaker system speaker system 634 | 633 "loupe jewelers loupe" 635 | 634 lumbermill sawmill 636 | 635 magnetic compass 637 | 636 mailbag postbag 638 | 637 mailbox letter box 639 | 638 maillot 640 | 639 maillot tank suit 641 | 640 manhole cover 642 | 641 maraca 643 | 642 marimba xylophone 644 | 643 mask 645 | 644 matchstick 646 | 645 maypole 647 | 646 maze labyrinth 648 | 647 measuring cup 649 | 648 medicine chest medicine cabinet 650 | 649 megalith megalithic structure 651 | 650 microphone mike 652 | 651 microwave microwave oven 653 | 652 military uniform 654 | 653 milk can 655 | 654 minibus 656 | 655 miniskirt mini 657 | 656 minivan 658 | 657 missile 659 | 658 mitten 660 | 659 mixing bowl 661 | 660 mobile home manufactured home 662 | 661 Model T 663 | 662 modem 664 | 663 monastery 665 | 664 monitor 666 | 665 moped 667 | 666 mortar 668 | 667 mortarboard 669 | 668 mosque 670 | 669 mosquito net 671 | 670 motor scooter scooter 672 | 671 mountain bike all-terrain bike off-roader 673 | 672 mountain tent 674 | 673 mouse computer mouse 675 | 674 mousetrap 676 | 675 moving van 677 | 676 muzzle 678 | 677 nail 679 | 678 neck brace 680 | 679 necklace 681 | 680 nipple 682 | 681 notebook notebook computer 683 | 682 obelisk 684 | 683 oboe hautboy hautbois 685 | 684 ocarina sweet potato 686 | 685 odometer hodometer mileometer milometer 687 | 686 oil filter 688 | 687 organ pipe organ 689 | 688 oscilloscope scope cathode-ray oscilloscope CRO 690 | 689 overskirt 691 | 690 oxcart 692 | 691 oxygen mask 693 | 692 packet 694 | 693 paddle boat paddle 695 | 694 paddlewheel paddle wheel 696 | 695 padlock 697 | 696 paintbrush 698 | 697 "pajama pyjama pjs jammies" 699 | 698 palace 700 | 699 panpipe pandean pipe syrinx 701 | 700 paper towel 702 | 701 parachute chute 703 | 702 parallel bars bars 704 | 703 park bench 705 | 704 parking meter 706 | 705 passenger car coach carriage 707 | 706 patio terrace 708 | 707 pay-phone pay-station 709 | 708 pedestal plinth footstall 710 | 709 pencil box pencil case 711 | 710 pencil sharpener 712 | 711 perfume essence 713 | 712 Petri dish 714 | 713 photocopier 715 | 714 pick plectrum plectron 716 | 715 pickelhaube 717 | 716 picket fence paling 718 | 717 pickup pickup truck 719 | 718 pier 720 | 719 piggy bank penny bank 721 | 720 pill bottle 722 | 721 pillow 723 | 722 ping-pong ball 724 | 723 pinwheel 725 | 724 pirate pirate ship 726 | 725 pitcher ewer 727 | 726 "plane carpenters plane woodworking plane" 728 | 727 planetarium 729 | 728 plastic bag 730 | 729 plate rack 731 | 730 plow plough 732 | 731 "plunger plumbers helper" 733 | 732 Polaroid camera Polaroid Land camera 734 | 733 pole 735 | 734 police van police wagon paddy wagon patrol wagon wagon black Maria 736 | 735 poncho 737 | 736 pool table billiard table snooker table 738 | 737 pop bottle soda bottle 739 | 738 pot flowerpot 740 | 739 "potters wheel" 741 | 740 power drill 742 | 741 prayer rug prayer mat 743 | 742 printer 744 | 743 prison prison house 745 | 744 projectile missile 746 | 745 projector 747 | 746 puck hockey puck 748 | 747 punching bag punch bag punching ball punchball 749 | 748 purse 750 | 749 quill quill pen 751 | 750 quilt comforter comfort puff 752 | 751 racer race car racing car 753 | 752 racket racquet 754 | 753 radiator 755 | 754 radio wireless 756 | 755 radio telescope radio reflector 757 | 756 rain barrel 758 | 757 recreational vehicle RV R.V. 759 | 758 reel 760 | 759 reflex camera 761 | 760 refrigerator icebox 762 | 761 remote control remote 763 | 762 restaurant eating house eating place eatery 764 | 763 revolver six-gun six-shooter 765 | 764 rifle 766 | 765 rocking chair rocker 767 | 766 rotisserie 768 | 767 rubber eraser rubber pencil eraser 769 | 768 rugby ball 770 | 769 rule ruler 771 | 770 running shoe 772 | 771 safe 773 | 772 safety pin 774 | 773 saltshaker salt shaker 775 | 774 sandal 776 | 775 sarong 777 | 776 sax saxophone 778 | 777 scabbard 779 | 778 scale weighing machine 780 | 779 school bus 781 | 780 schooner 782 | 781 scoreboard 783 | 782 screen CRT screen 784 | 783 screw 785 | 784 screwdriver 786 | 785 seat belt seatbelt 787 | 786 sewing machine 788 | 787 shield buckler 789 | 788 shoe shop shoe-shop shoe store 790 | 789 shoji 791 | 790 shopping basket 792 | 791 shopping cart 793 | 792 shovel 794 | 793 shower cap 795 | 794 shower curtain 796 | 795 ski 797 | 796 ski mask 798 | 797 sleeping bag 799 | 798 slide rule slipstick 800 | 799 sliding door 801 | 800 slot one-armed bandit 802 | 801 snorkel 803 | 802 snowmobile 804 | 803 snowplow snowplough 805 | 804 soap dispenser 806 | 805 soccer ball 807 | 806 sock 808 | 807 solar dish solar collector solar furnace 809 | 808 sombrero 810 | 809 soup bowl 811 | 810 space bar 812 | 811 space heater 813 | 812 space shuttle 814 | 813 spatula 815 | 814 speedboat 816 | 815 "spider web spiders web" 817 | 816 spindle 818 | 817 sports car sport car 819 | 818 spotlight spot 820 | 819 stage 821 | 820 steam locomotive 822 | 821 steel arch bridge 823 | 822 steel drum 824 | 823 stethoscope 825 | 824 stole 826 | 825 stone wall 827 | 826 stopwatch stop watch 828 | 827 stove 829 | 828 strainer 830 | 829 streetcar tram tramcar trolley trolley car 831 | 830 stretcher 832 | 831 studio couch day bed 833 | 832 stupa tope 834 | 833 submarine pigboat sub U-boat 835 | 834 suit suit of clothes 836 | 835 sundial 837 | 836 sunglass 838 | 837 sunglasses dark glasses shades 839 | 838 sunscreen sunblock sun blocker 840 | 839 suspension bridge 841 | 840 swab swob mop 842 | 841 sweatshirt 843 | 842 swimming trunks bathing trunks 844 | 843 swing 845 | 844 switch electric switch electrical switch 846 | 845 syringe 847 | 846 table lamp 848 | 847 tank army tank armored combat vehicle armoured combat vehicle 849 | 848 tape player 850 | 849 teapot 851 | 850 teddy teddy bear 852 | 851 television television system 853 | 852 tennis ball 854 | 853 thatch thatched roof 855 | 854 theater curtain theatre curtain 856 | 855 thimble 857 | 856 thresher thrasher threshing machine 858 | 857 throne 859 | 858 tile roof 860 | 859 toaster 861 | 860 tobacco shop tobacconist shop tobacconist 862 | 861 toilet seat 863 | 862 torch 864 | 863 totem pole 865 | 864 tow truck tow car wrecker 866 | 865 toyshop 867 | 866 tractor 868 | 867 trailer truck tractor trailer trucking rig rig articulated lorry semi 869 | 868 tray 870 | 869 trench coat 871 | 870 tricycle trike velocipede 872 | 871 trimaran 873 | 872 tripod 874 | 873 triumphal arch 875 | 874 trolleybus trolley coach trackless trolley 876 | 875 trombone 877 | 876 tub vat 878 | 877 turnstile 879 | 878 typewriter keyboard 880 | 879 umbrella 881 | 880 unicycle monocycle 882 | 881 upright upright piano 883 | 882 vacuum vacuum cleaner 884 | 883 vase 885 | 884 vault 886 | 885 velvet 887 | 886 vending machine 888 | 887 vestment 889 | 888 viaduct 890 | 889 violin fiddle 891 | 890 volleyball 892 | 891 waffle iron 893 | 892 wall clock 894 | 893 wallet billfold notecase pocketbook 895 | 894 wardrobe closet press 896 | 895 warplane military plane 897 | 896 washbasin handbasin washbowl lavabo wash-hand basin 898 | 897 washer automatic washer washing machine 899 | 898 water bottle 900 | 899 water jug 901 | 900 water tower 902 | 901 whiskey jug 903 | 902 whistle 904 | 903 wig 905 | 904 window screen 906 | 905 window shade 907 | 906 Windsor tie 908 | 907 wine bottle 909 | 908 wing 910 | 909 wok 911 | 910 wooden spoon 912 | 911 wool woolen woollen 913 | 912 worm fence snake fence snake-rail fence Virginia fence 914 | 913 wreck 915 | 914 yawl 916 | 915 yurt 917 | 916 web site website internet site site 918 | 917 comic book 919 | 918 crossword puzzle crossword 920 | 919 street sign 921 | 920 traffic light traffic signal stoplight 922 | 921 book jacket dust cover dust jacket dust wrapper 923 | 922 menu 924 | 923 plate 925 | 924 guacamole 926 | 925 consomme 927 | 926 hot pot hotpot 928 | 927 trifle 929 | 928 ice cream icecream 930 | 929 ice lolly lolly lollipop popsicle 931 | 930 French loaf 932 | 931 bagel beigel 933 | 932 pretzel 934 | 933 cheeseburger 935 | 934 hotdog hot dog red hot 936 | 935 mashed potato 937 | 936 head cabbage 938 | 937 broccoli 939 | 938 cauliflower 940 | 939 zucchini courgette 941 | 940 spaghetti squash 942 | 941 acorn squash 943 | 942 butternut squash 944 | 943 cucumber cuke 945 | 944 artichoke globe artichoke 946 | 945 bell pepper 947 | 946 cardoon 948 | 947 mushroom 949 | 948 Granny Smith 950 | 949 strawberry 951 | 950 orange 952 | 951 lemon 953 | 952 fig 954 | 953 pineapple ananas 955 | 954 banana 956 | 955 jackfruit jak jack 957 | 956 custard apple 958 | 957 pomegranate 959 | 958 hay 960 | 959 carbonara 961 | 960 chocolate sauce chocolate syrup 962 | 961 dough 963 | 962 meat loaf meatloaf 964 | 963 pizza pizza pie 965 | 964 potpie 966 | 965 burrito 967 | 966 red wine 968 | 967 espresso 969 | 968 cup 970 | 969 eggnog 971 | 970 alp 972 | 971 bubble 973 | 972 cliff drop drop-off 974 | 973 coral reef 975 | 974 geyser 976 | 975 lakeside lakeshore 977 | 976 promontory headland head foreland 978 | 977 sandbar sand bar 979 | 978 seashore coast seacoast sea-coast 980 | 979 valley vale 981 | 980 volcano 982 | 981 ballplayer baseball player 983 | 982 groom bridegroom 984 | 983 scuba diver 985 | 984 rapeseed 986 | 985 daisy 987 | 986 "yellow ladys slipper yellow lady-slipper Cypripedium calceolus Cypripedium parviflorum" 988 | 987 corn 989 | 988 acorn 990 | 989 hip rose hip rosehip 991 | 990 buckeye horse chestnut conker 992 | 991 coral fungus 993 | 992 agaric 994 | 993 gyromitra 995 | 994 stinkhorn carrion fungus 996 | 995 earthstar 997 | 996 hen-of-the-woods hen of the woods Polyporus frondosus Grifola frondosa 998 | 997 bolete 999 | 998 ear spike capitulum 1000 | 999 toilet tissue toilet paper bathroom tissue 1001 | -------------------------------------------------------------------------------- /data/images/im_0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/tencent-ml-images/182631879cdb3d44d594d13d3f29a98bf7acdf81/data/images/im_0.jpg -------------------------------------------------------------------------------- /data/images/im_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/tencent-ml-images/182631879cdb3d44d594d13d3f29a98bf7acdf81/data/images/im_1.jpg -------------------------------------------------------------------------------- /data/images/im_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/tencent-ml-images/182631879cdb3d44d594d13d3f29a98bf7acdf81/data/images/im_2.jpg -------------------------------------------------------------------------------- /data/images/im_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/tencent-ml-images/182631879cdb3d44d594d13d3f29a98bf7acdf81/data/images/im_3.jpg -------------------------------------------------------------------------------- /data/tfrecord.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import sys 3 | import os 4 | import tensorflow as tf 5 | import numpy as np 6 | import imghdr 7 | import threading 8 | import argparse 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument("-idx","--indexs", type=str, default="", help="dirs contains train index files") 11 | parser.add_argument("-tfs", "--tfrecords", type=str, default="", help="dirs contains train tfrecords") 12 | parser.add_argument("-im", "--images", type=str, default="", help="the path contains the raw images") 13 | parser.add_argument("-cls", "--num_class", type=int, default=0, help="class label number") 14 | parser.add_argument("-one", "--one_hot", type=bool, default=True, help="indicates the format of label fields in tfrecords") 15 | parser.add_argument("-sidx", "--start_index", type=int, default=0, help="the start number of train tfrecord files") 16 | args = parser.parse_args() 17 | 18 | def _int64_feature(value): 19 | """Wrapper for inserting int64 features into Example proto.""" 20 | if not isinstance(value, list): 21 | value = [value] 22 | return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) 23 | 24 | def _float_feature(value): 25 | """Wrapper for inserting float features into Example proto.""" 26 | if not isinstance(value, list): 27 | value = [value] 28 | return tf.train.Feature(float_list=tf.train.FloatList(value=value)) 29 | 30 | def _bytes_feature(value): 31 | """Wrapper for inserting bytes features into Example proto.""" 32 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 33 | 34 | class ImageCoder(object): 35 | """Helper class that provides TensorFlow image coding utilities.""" 36 | def __init__(self): 37 | # Create a single Session to run all image coding calls. 38 | self._sess = tf.Session() 39 | 40 | # Initializes function that converts PNG to JPEG data. 41 | self._png_data = tf.placeholder(dtype=tf.string) 42 | image = tf.image.decode_png(self._png_data, channels=3) 43 | self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100) 44 | 45 | # Initializes function that decodes RGB JPEG data. 46 | self._decode_jpeg_data = tf.placeholder(dtype=tf.string) 47 | self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3) 48 | 49 | def png_to_jpeg(self, image_data): 50 | return self._sess.run(self._png_to_jpeg, 51 | feed_dict={self._png_data: image_data}) 52 | 53 | def decode_jpeg(self, image_data): 54 | image = self._sess.run(self._decode_jpeg, 55 | feed_dict={self._decode_jpeg_data: image_data}) 56 | assert len(image.shape) == 3 57 | assert image.shape[2] == 3 58 | return image 59 | 60 | def _is_png(filename): 61 | return (imghdr.what(filename)=='png') 62 | 63 | def _is_jpeg(filename): 64 | return (imghdr.what(filename)=='jpeg') 65 | 66 | def _process_image(filename, coder): 67 | """Process a single image file.""" 68 | with tf.gfile.FastGFile(filename, 'rb') as f: 69 | image_data = f.read() 70 | if not _is_jpeg(filename): 71 | if _is_png(filename): 72 | print('Converting PNG to JPEG for %s' % filename) 73 | image_data = coder.png_to_jpeg(image_data) 74 | else: 75 | try: 76 | image = coder.decode_jpeg(image_data) 77 | assert len(image.shape) == 3 78 | height = image.shape[0] 79 | width = image.shape[1] 80 | assert image.shape[2] == 3 81 | return image_data, height, width 82 | except: 83 | print('Cannot converted type %s' % imghdr.what(filename)) 84 | return [], 0, 0 85 | 86 | image = coder.decode_jpeg(image_data) 87 | assert len(image.shape) == 3 88 | height = image.shape[0] 89 | width = image.shape[1] 90 | assert image.shape[2] == 3 91 | 92 | return image_data, height, width 93 | 94 | def _save_one(train_txt, tfrecord_name, label_num, one_hot): 95 | writer = tf.python_io.TFRecordWriter(tfrecord_name) 96 | with tf.Session() as sess: 97 | coder = ImageCoder() 98 | with open(train_txt, 'r') as lines: 99 | for line in lines: 100 | sp = line.rstrip("\n").split() 101 | imgf = os.path.join(args.images, sp[0]) 102 | print(imgf) 103 | img, height, width = _process_image(imgf, coder) 104 | if height*width==0: 105 | continue 106 | 107 | if one_hot: 108 | label = np.zeros([label_num,], dtype=np.float32) 109 | for i in range(1, len(sp)): 110 | if len(sp[i].split(":"))==2: 111 | label[int(sp[i].split(":")[0])] = float(sp[i].split(":")[1]) 112 | else: 113 | label[int(sp[i].split(":")[0])] = 1.0 114 | example = tf.train.Example(features=tf.train.Features(feature={ 115 | 'width': _int64_feature(width), 116 | 'height': _int64_feature(height), 117 | 'image': _bytes_feature(tf.compat.as_bytes(img)), 118 | 'label': _bytes_feature(tf.compat.as_bytes(label.tostring())), 119 | 'name': _bytes_feature(sp[0]) 120 | })) 121 | writer.write(example.SerializeToString()) 122 | 123 | else: 124 | label = int(sp[1]) 125 | example = tf.train.Example(features=tf.train.Features(feature={ 126 | 'width': _int64_feature(width), 127 | 'height': _int64_feature(height), 128 | 'image': _bytes_feature(tf.compat.as_bytes(img)), 129 | 'label': _int64_feature(label), 130 | 'name': _bytes_feature(sp[0]) 131 | })) 132 | writer.write(example.SerializeToString()) 133 | writer.close() 134 | 135 | def _save(): 136 | files = os.listdir(args.indexs) 137 | coord = tf.train.Coordinator() 138 | threads = [] 139 | 140 | i = args.start_index 141 | for idxf in files: 142 | threads.append( 143 | threading.Thread(target=_save_one, 144 | args=(os.path.join(args.indexs, idxf), 145 | os.path.join(args.tfrecords, str(i) + ".tfrecords"), 146 | args.num_class, args.one_hot) 147 | ) 148 | ) 149 | i = i+1 150 | 151 | i=0 152 | thread = [] 153 | for t in threads: 154 | if i==32: 155 | for ct in thread: 156 | ct.start() 157 | coord.join(thread) 158 | i = 0 159 | thread = [t] 160 | else: 161 | thread.append(t) 162 | i += 1 163 | 164 | for ct in thread: 165 | ct.start() 166 | coord.join(thread) 167 | 168 | if __name__=='__main__': 169 | _save() 170 | -------------------------------------------------------------------------------- /data/tfrecord.sh: -------------------------------------------------------------------------------- 1 | echo "Generating tfrecords ..." 2 | 3 | ./tfrecord.py -idx image_lists/ -tfs tfrecords/ -im images/ -cls 11166 -one True 4 | -------------------------------------------------------------------------------- /data/tfrecords/0.tfrecords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/tencent-ml-images/182631879cdb3d44d594d13d3f29a98bf7acdf81/data/tfrecords/0.tfrecords -------------------------------------------------------------------------------- /data/tfrecords/1.tfrecords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/tencent-ml-images/182631879cdb3d44d594d13d3f29a98bf7acdf81/data/tfrecords/1.tfrecords -------------------------------------------------------------------------------- /data/train_im_list_tiny.txt: -------------------------------------------------------------------------------- 1 | 17382017900_f7152f6e1c_o.jpg 940:0.8 860:0.8 17:1 1:1 2 | 3672185768_21abfdbdf1_o.jpg 5205:0.9 5188:0.9 5173:0.9 5170:0.9 3 | 74229536_063452179d_o.jpg 9416:1.0 9413:1.0 3:1.0 4 | 333886038_e01496fe70_o.jpg 5177:1 5170:1 1042:1 865:1 2:1 10964:1 10954:1 4:1 0:1 8556:1 5851:1 5193:1 5181:1 5173:1 8582:1 4821:1 4781:1 4767:1 4765:1 1067:1 1041:1 9147:1 9133:1 5 | 8439238840_265104e92b_o.jpg 1552:0.9 1521:0.9 6 | 134423290_0297a1edf0_o.jpg 11089:1 282:1 274:1 16:1 1:1 7 | 6211955974_43e1bbdd9d_o.jpg 10942:0.8 2563:0.8 8 | 435489099_f4a01d6a3b_o.jpg 5177:1 5170:1 1042:1 865:1 2:1 10964:1 10954:1 4:1 0:1 8556:1 4767:1 4765:1 1067:1 1041:1 9 | 3713857698_ce1ac4f0d4_o.jpg 6470:0.8 5174:0.8 8274:0.9 8205:0.9 10 | 2938752319_e66de877cb_o.jpg 11124:0.8 11123:0.8 19:1.0 14:1.0 1:1.0 3:0.9 10973:0.8 5:0.8 13:0.8 11 | 8138634055_b12eb5a88f_o.jpg 17:0.8 1:0.8 3634:0.9 1836:0.9 12 | 3313422599_b50fec0c7a_o.jpg 3761:0.9 3657:0.9 13 | 1557324960_1ae893fed8_o.jpg 4097:1 4089:1 4063:1 1837:1 1054:1 1041:1 865:1 2:1 4129:1 4132:1 14 | 14331938228_80fd35917d_o.jpg 11124:0.8 11123:0.8 913:0.8 885:0.8 15 | 6071452334_57111b2799_o.jpg 4767:0.9 4765:0.9 16 | 20207897114_fabf17ef03_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 11060:0.9 3:0.9 19:1.0 14:1.0 1:1.0 10973:0.8 5:0.8 4767:1 4765:1 1067:1 1041:1 17 | 2572916531_38735defe1_o.jpg 4063:0.9 1837:0.9 4132:1 4097:1 4089:1 1054:1 1041:1 865:1 2:1 18 | 331204556_044215e293_o.jpg 11003:0.8 4650:0.8 4666:0.8 19 | 306206679_ac2e2d3dd7_o.jpg 1353:1 1193:1 1053:1 1041:1 865:1 2:1 20 | 3185906982_2734e3ea8e_o.jpg 5177:0.9 5170:0.9 8623:0.9 8493:0.9 9204:1 9198:1 9170:1 8585:1 1042:1 865:1 2:1 21 | 10164826235_a5533c7bdb_o.jpg 1053:0.9 1041:0.9 22 | 11587568003_c9da58b833_o.jpg 5177:0.9 5170:0.9 8623:0.9 8493:0.9 9204:1 9198:1 9170:1 8585:1 1042:1 865:1 2:1 23 | 4505160846_b3797e2559_o.jpg 5851:0.9 5193:0.9 11091:0.8 11090:0.8 11057:0.8 9305:0.8 11157:0.8 5173:0.9 5170:0.9 24 | 16218058281_fe038ddbb3_o.jpg 8666:0.9 8512:0.9 25 | 8423073355_a4547b54aa_o.jpg 913:0.9 885:0.9 8636:0.8 8629:0.8 26 | 8195413631_db78ce5a8e_o.jpg 1353:1 1193:1 1053:1 1041:1 865:1 2:1 1674:0.9 1205:0.9 27 | 75954166_76b4358319_o.jpg 5177:1 5170:1 1042:1 865:1 2:1 8582:1 4767:1 4765:1 1067:1 1041:1 28 | 8470961329_f084fa4552_o.jpg 2434:0.9 2429:0.9 1832:0.9 29 | 2448038014_fb88b0faeb_o.jpg 11114:0.8 1084:0.8 11124:0.8 11123:0.8 30 | 5839657249_5c2cc9926d_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 10964:1 10954:1 4:1 0:1 5177:1 4767:1 4765:1 1067:1 1041:1 31 | 7155811268_3f3386d274_o.jpg 11165:1 2643:1 2563:1 1833:1 1054:1 1041:1 865:1 2:1 1187:1 1053:1 19:1 14:1 1:1 32 | 2336677673_5cf456df93_o.jpg 4679:0.8 1062:0.8 33 | 15850638445_04d563e775_o.jpg 11057:0.8 9305:0.8 34 | 8247858038_4aaf68da28_o.jpg 3:0.9 1193:0.8 1053:0.8 35 | 4246479857_5a0fa1930c_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 10968:0.9 20:0.9 3:0.9 11057:0.8 9305:0.8 9416:1 9413:1 36 | 17268618631_a35e47c7d3_o.jpg 19:1 14:1 1:1 4063:1 1837:1 1054:1 1041:1 865:1 2:1 11105:0.8 4678:0.8 11147:0.8 1142:0.8 9486:0.8 884:0.8 10207:1 3802:1 3780:1 3762:1 3657:1 1836:1 4686:0.8 4679:0.8 1193:1 1053:1 1062:0.9 37 | 5715926508_bb4fe6d4bc_o.jpg 1193:1 1053:1 1041:1 865:1 2:1 38 | 262201507_1454c50290_o.jpg 3805:1 3797:1 3780:1 3762:1 3657:1 1836:1 1054:1 1041:1 865:1 2:1 4063:1 1837:1 3802:1 2944:1 2913:1 2896:1 2577:1 1833:1 39 | 4083648900_067cd150d7_o.jpg 9303:0.8 9300:0.8 40 | 21827691670_2103ff9714_o.jpg 1332:0.8 1314:0.8 1053:0.9 1041:0.9 41 | 8121198037_063687512f_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 42 | 6914737323_89438b4958_o.jpg 10942:0.8 2563:0.8 43 | 520809355_40f9d2f09e_o.jpg 9363:0.8 9303:0.8 5173:0.9 5170:0.9 44 | 4583003907_f4c04e43ef_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 10942:0.8 2563:0.8 2644:0.8 45 | 15706263331_cae9be57b2_o.jpg 3802:1.0 3780:1.0 3762:1.0 3657:1.0 1836:1.0 1054:1.0 1041:1.0 865:1.0 2:1.0 4063:1.0 1837:1.0 46 | 5650341228_5c8f2d74c5_o.jpg 3611:0.8 1835:0.8 47 | 4081065718_b70e87bdc6_o.jpg 5769:0.8 5665:0.8 1332:0.9 1314:0.9 1340:0.8 48 | 23398906_fae4940560_o.jpg 3802:1 3780:1 3762:1 3657:1 1836:1 1054:1 1041:1 865:1 2:1 4063:1 1837:1 49 | 16424257390_2e946aed5f_o.jpg 10949:1 21:1 15:1 1:1 289:1 17:1 3634:0.9 1836:0.9 50 | 7598082136_1d9c366bf3_o.jpg 3845:0.8 3802:0.8 2944:1 2913:1 2896:1 2577:1 1833:1 1054:1 1041:1 865:1 2:1 4063:1 1837:1 3780:1 3762:1 3657:1 1836:1 51 | 5059479200_857af7e475_o.jpg 5:0.8 0:0.8 10983:0.8 10974:0.8 962:0.8 863:0.8 52 | 3246667026_5cfa58453c_o.jpg 2569:0.9 1833:0.9 2684:0.9 2655:0.9 2617:0.8 2691:0.8 53 | 4727699820_0527f2ee2a_o.jpg 5851:0.8 5193:0.8 17:1 1:1 5173:0.8 5170:0.8 54 | 3806645650_396b51ddc8_o.jpg 904:1 893:1 890:1 884:1 870:1 859:1 2:1 55 | 15185915283_5486c284d8_o.jpg 899:0.9 890:0.9 56 | 9577264333_488917c90e_o.jpg 858:0.8 2:0.8 1332:0.8 1314:0.8 57 | 5831701088_0cd275facd_o.jpg 5851:0.8 5193:0.8 11141:0.8 11140:0.8 4679:0.9 1062:0.9 58 | 2719045377_bbd3fb7c77_o.jpg 5177:1 5170:1 1042:1 865:1 2:1 8556:1 4063:1 1837:1 1054:1 1041:1 4767:1 4765:1 1067:1 59 | 5144818210_3ff147c4f5_o.jpg 5851:0.9 5193:0.9 11141:0.8 11140:0.8 11143:1.0 884:1.0 870:1.0 859:1.0 2:1.0 60 | 2898506763_9cc1e3afde_o.jpg 4063:1 1837:1 1054:1 1041:1 865:1 2:1 10207:1 3802:1 3780:1 3762:1 3657:1 1836:1 61 | 8133093285_2949e63a08_o.jpg 5851:0.9 5193:0.9 867:0.8 858:0.8 5:1 0:1 11110:0.8 9969:0.8 10991:0.8 10985:0.9 10976:0.9 10981:0.9 10974:0.9 5173:0.8 5170:0.8 9483:0.9 884:0.9 11143:1.0 870:1.0 859:1.0 2:1.0 62 | 19997213482_a5c47a049c_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 858:0.8 11157:1 9305:1 9300:1 9289:1 1043:1 11143:1.0 884:1.0 870:1.0 859:1.0 63 | 4915052210_a4ef8792a7_o.jpg 1193:0.8 1053:0.8 64 | 4593035529_543a9de68e_o.jpg 4097:1 4089:1 4063:1 1837:1 1054:1 1041:1 865:1 2:1 5:1 0:1 4129:1 4132:1 65 | 1548058686_fc8798b3fd_o.jpg 4063:1 1837:1 1054:1 1041:1 865:1 2:1 1050:1 8766:1 8531:1 5177:1 5170:1 1042:1 3797:1 3780:1 3762:1 3657:1 1836:1 66 | 6229581102_e8a6a51db8_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 4127:1 4102:1 4096:1 4089:1 4063:1 1837:1 1054:1 1041:1 67 | 4551712992_0dff6d87fc_o.jpg 1378:0.9 1193:0.9 1053:0.8 68 | 4144284900_6ebd34882f_o.jpg 5173:0.9 5170:0.9 69 | 3912307968_5881f36683_o.jpg 11091:0.9 11090:0.9 9483:0.9 884:0.9 70 | 32066458_bc255c1de9_o.jpg 1378:0.8 1193:0.8 1053:0.9 71 | 2613610603_81dd2ce03d_o.jpg 9416:0.8 9413:0.8 11124:0.8 11123:0.8 72 | 5048972174_88a85f9fd0_o.jpg 11162:0.8 11161:0.8 2944:0.8 2913:0.8 73 | 15446852851_5e61761d18_o.jpg 5177:0.9 5170:0.9 74 | 16771865540_6bbbeb1545_o.jpg 1162:0.8 1053:0.8 19:1 14:1 1:1 5:1 0:1 3:0.9 10986:0.8 10976:0.8 10974:1 75 | 2459138484_72ee7e0f82_o.jpg 1353:0.9 1193:0.9 1281:0.9 1178:0.9 3:0.9 9486:0.8 884:0.8 11002:0.8 885:0.8 1332:0.8 1314:0.8 913:0.8 1379:0.8 76 | 1583067562_bb24f98be0_o.jpg 5177:0.9 5170:0.9 77 | 3941211541_85119b5dca_o.jpg 2684:1 2655:1 2569:1 1833:1 1054:1 1041:1 865:1 2:1 2080:1 2058:1 1905:1 1829:1 2610:1 78 | 16400759764_07256cb031_o.jpg 961:0.8 863:0.8 998:0.8 958:0.8 19:1.0 14:1.0 1:1.0 1021:0.9 962:0.9 9290:0.8 1043:0.8 10973:0.9 5:0.9 13:0.8 79 | 19447434033_7820f80b7b_o.jpg 9290:0.8 1043:0.8 80 | 5740477627_dd000e359f_o.jpg 11147:0.8 1142:0.8 858:0.9 2:0.9 10946:0.8 1047:0.8 81 | 3659209345_c6b0a58628_o.jpg 5177:0.9 5170:0.9 82 | 16792450152_16fcdbbd2b_o.jpg 1353:1 1193:1 1053:1 1041:1 865:1 2:1 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 4063:1 1837:1 1054:1 1674:0.8 1205:0.8 3802:1 3780:1 3762:1 3657:1 1836:1 1715:1 1700:1 1206:1 9416:1 9413:1 3:1 83 | 15519586065_580457550f_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 19:1.0 14:1.0 1:1.0 11124:0.8 11123:0.8 5769:0.8 5665:0.8 3:0.9 84 | 4467043072_0d48d262df_o.jpg 942:0.8 860:0.8 10946:0.8 1047:0.8 1053:0.9 1041:0.9 4679:0.9 1062:0.9 85 | 15111560160_ffbea0a25b_o.jpg 19:1.0 14:1.0 1:1.0 5:1 0:1 3:0.9 10974:1 4132:1 4097:1 4089:1 4063:1 1837:1 1054:1 1041:1 865:1 2:1 86 | 12727073325_3ba4d64a21_o.jpg 4063:0.9 1837:0.9 4129:0.9 4097:0.9 4184:0.8 4181:0.8 4173:0.9 4132:0.9 1380:1 1193:1 1053:1 1041:1 865:1 2:1 4089:1 1054:1 87 | 4183920312_fda4530fb6_o.jpg 11129:0.9 11127:0.9 88 | 8911845093_eef6449d70_o.jpg 4096:0.9 4089:0.9 4116:0.8 4107:0.8 4100:1.0 4063:1.0 1837:1.0 1054:1.0 1041:1.0 865:1.0 2:1.0 89 | 6805430694_5ea42cf2d1_o.jpg 11129:0.9 11127:0.9 1193:0.8 1053:0.8 90 | 12574918044_6ce025c238_o.jpg 1193:0.8 1053:0.8 91 | 6848997330_fe724e87c9_o.jpg 714:1 597:1 292:1 17:1 1:1 3634:0.9 1836:0.9 9308:1 9302:1 9300:1 9289:1 1043:1 865:1 2:1 24:0.8 21:0.8 602:0.9 844:0.8 92 | 121144061_6cf80b5e86_o.jpg 10996:0.8 8735:0.8 8727:0.9 2569:1 1833:1 1054:1 1041:1 865:1 2:1 5177:1 5170:1 1042:1 2664:1 2653:1 93 | 8106214778_cb9b9b4f1a_o.jpg 5177:0.9 5170:0.9 425:1 289:1 17:1 1:1 10956:0.9 10955:0.9 8623:1 8493:1 1042:1 865:1 2:1 94 | 4934246137_610e420517_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 11063:0.8 11058:0.8 9416:1 9413:1 3:1 95 | 14614550836_c3c9e41c20_o.jpg 5173:0.9 5170:0.9 96 | 4257610875_9c3c940d33_o.jpg 3:0.9 942:0.9 860:0.9 4679:0.8 1062:0.8 97 | 12285712405_290c0dc702_o.jpg 3781:1 3762:1 3657:1 1836:1 1054:1 1041:1 865:1 2:1 4063:0.9 1837:0.9 98 | 7222487914_847ab61301_o.jpg 5173:0.8 5170:0.8 3916:1 3666:1 1836:1 1054:1 1041:1 865:1 2:1 99 | 14883330897_052967e5d7_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 961:0.9 863:0.9 998:0.8 958:0.8 1021:0.8 962:0.8 9290:0.8 1043:0.8 964:0.8 100 | 8789780074_c70f8ebef7_o.jpg 17:0.8 1:0.8 101 | -------------------------------------------------------------------------------- /data/train_urls_tiny.txt: -------------------------------------------------------------------------------- 1 | https://farm2.staticflickr.com/2034/1557324960_1ae893fed8_o.jpg 4097:1 4089:1 4063:1 1837:1 1054:1 1041:1 865:1 2:1 4129:1 4132:1 2 | https://c3.staticflickr.com/7/6080/6071452334_57111b2799_o.jpg 4767:0.9 4765:0.9 3 | https://c1.staticflickr.com/4/3593/5715926508_bb4fe6d4bc_o.jpg 1193:1 1053:1 1041:1 865:1 2:1 4 | https://farm4.staticflickr.com/99/262201507_1454c50290_o.jpg 3805:1 3797:1 3780:1 3762:1 3657:1 1836:1 1054:1 1041:1 865:1 2:1 4063:1 1837:1 3802:1 2944:1 2913:1 2896:1 2577:1 1833:1 5 | https://c7.staticflickr.com/3/2725/4083648900_067cd150d7_o.jpg 9303:0.8 9300:0.8 6 | https://c8.staticflickr.com/1/128/333886038_e01496fe70_o.jpg 5177:1 5170:1 1042:1 865:1 2:1 10964:1 10954:1 4:1 0:1 8556:1 5851:1 5193:1 5181:1 5173:1 8582:1 4821:1 4781:1 4767:1 4765:1 1067:1 1041:1 9147:1 9133:1 7 | https://c2.staticflickr.com/1/587/20207897114_fabf17ef03_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 11060:0.9 3:0.9 19:1.0 14:1.0 1:1.0 10973:0.8 5:0.8 4767:1 4765:1 1067:1 1041:1 8 | https://farm2.staticflickr.com/7532/16218058281_fe038ddbb3_o.jpg 8666:0.9 8512:0.9 9 | https://farm1.staticflickr.com/7655/16400759764_07256cb031_o.jpg 961:0.8 863:0.8 998:0.8 958:0.8 19:1.0 14:1.0 1:1.0 1021:0.9 962:0.9 9290:0.8 1043:0.8 10973:0.9 5:0.9 13:0.8 10 | https://farm8.staticflickr.com/3796/19447434033_7820f80b7b_o.jpg 9290:0.8 1043:0.8 11 | https://c7.staticflickr.com/1/42/74229536_063452179d_o.jpg 9416:1.0 9413:1.0 3:1.0 12 | https://farm8.staticflickr.com/8044/8138634055_b12eb5a88f_o.jpg 17:0.8 1:0.8 3634:0.9 1836:0.9 13 | https://farm7.staticflickr.com/2378/2572916531_38735defe1_o.jpg 4063:0.9 1837:0.9 4132:1 4097:1 4089:1 1054:1 1041:1 865:1 2:1 14 | https://farm8.staticflickr.com/8106/8470961329_f084fa4552_o.jpg 2434:0.9 2429:0.9 1832:0.9 15 | https://farm4.staticflickr.com/8593/15850638445_04d563e775_o.jpg 11057:0.8 9305:0.8 16 | https://farm3.staticflickr.com/3393/3185906982_2734e3ea8e_o.jpg 5177:0.9 5170:0.9 8623:0.9 8493:0.9 9204:1 9198:1 9170:1 8585:1 1042:1 865:1 2:1 17 | https://c1.staticflickr.com/5/4018/4583003907_f4c04e43ef_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 10942:0.8 2563:0.8 2644:0.8 18 | https://c6.staticflickr.com/3/2542/4081065718_b70e87bdc6_o.jpg 5769:0.8 5665:0.8 1332:0.9 1314:0.9 1340:0.8 19 | https://c5.staticflickr.com/9/8623/16424257390_2e946aed5f_o.jpg 10949:1 21:1 15:1 1:1 289:1 17:1 3634:0.9 1836:0.9 20 | https://c5.staticflickr.com/2/1386/5144818210_3ff147c4f5_o.jpg 5851:0.9 5193:0.9 11141:0.8 11140:0.8 11143:1.0 884:1.0 870:1.0 859:1.0 2:1.0 21 | https://c4.staticflickr.com/6/5828/21827691670_2103ff9714_o.jpg 1332:0.8 1314:0.8 1053:0.9 1041:0.9 22 | https://c8.staticflickr.com/8/7199/6914737323_89438b4958_o.jpg 10942:0.8 2563:0.8 23 | https://c1.staticflickr.com/6/5146/5650341228_5c8f2d74c5_o.jpg 3611:0.8 1835:0.8 24 | https://farm8.staticflickr.com/3866/14883330897_052967e5d7_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 961:0.9 863:0.9 998:0.8 958:0.8 1021:0.8 962:0.8 9290:0.8 1043:0.8 964:0.8 25 | https://farm8.staticflickr.com/3685/8789780074_c70f8ebef7_o.jpg 17:0.8 1:0.8 26 | https://farm3.staticflickr.com/7590/16792450152_16fcdbbd2b_o.jpg 1353:1 1193:1 1053:1 1041:1 865:1 2:1 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 4063:1 1837:1 1054:1 1674:0.8 1205:0.8 3802:1 3780:1 3762:1 3657:1 1836:1 1715:1 1700:1 1206:1 9416:1 9413:1 3:1 27 | https://c1.staticflickr.com/3/2716/4467043072_0d48d262df_o.jpg 942:0.8 860:0.8 10946:0.8 1047:0.8 1053:0.9 1041:0.9 4679:0.9 1062:0.9 28 | https://c7.staticflickr.com/4/3874/15111560160_ffbea0a25b_o.jpg 19:1.0 14:1.0 1:1.0 5:1 0:1 3:0.9 10974:1 4132:1 4097:1 4089:1 4063:1 1837:1 1054:1 1041:1 865:1 2:1 29 | https://c6.staticflickr.com/8/7429/12727073325_3ba4d64a21_o.jpg 4063:0.9 1837:0.9 4129:0.9 4097:0.9 4184:0.8 4181:0.8 4173:0.9 4132:0.9 1380:1 1193:1 1053:1 1041:1 865:1 2:1 4089:1 1054:1 30 | https://c4.staticflickr.com/4/3686/8911845093_eef6449d70_o.jpg 4096:0.9 4089:0.9 4116:0.8 4107:0.8 4100:1.0 4063:1.0 1837:1.0 1054:1.0 1041:1.0 865:1.0 2:1.0 31 | https://c5.staticflickr.com/4/3862/14331938228_80fd35917d_o.jpg 11124:0.8 11123:0.8 913:0.8 885:0.8 32 | https://farm6.staticflickr.com/4009/4246479857_5a0fa1930c_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 10968:0.9 20:0.9 3:0.9 11057:0.8 9305:0.8 9416:1 9413:1 33 | https://farm5.staticflickr.com/192/520809355_40f9d2f09e_o.jpg 9363:0.8 9303:0.8 5173:0.9 5170:0.9 34 | https://farm3.staticflickr.com/17/23398906_fae4940560_o.jpg 3802:1 3780:1 3762:1 3657:1 1836:1 1054:1 1041:1 865:1 2:1 4063:1 1837:1 35 | https://farm7.staticflickr.com/4060/4257610875_9c3c940d33_o.jpg 3:0.9 942:0.9 860:0.9 4679:0.8 1062:0.8 36 | https://farm1.staticflickr.com/2365/2448038014_fb88b0faeb_o.jpg 11114:0.8 1084:0.8 11124:0.8 11123:0.8 37 | https://c3.staticflickr.com/8/7469/15185915283_5486c284d8_o.jpg 899:0.9 890:0.9 38 | https://c2.staticflickr.com/6/5071/5831701088_0cd275facd_o.jpg 5851:0.8 5193:0.8 11141:0.8 11140:0.8 4679:0.9 1062:0.9 39 | https://farm6.staticflickr.com/4058/4551712992_0dff6d87fc_o.jpg 1378:0.9 1193:0.9 1053:0.8 40 | https://farm1.staticflickr.com/3282/2459138484_72ee7e0f82_o.jpg 1353:0.9 1193:0.9 1281:0.9 1178:0.9 3:0.9 9486:0.8 884:0.8 11002:0.8 885:0.8 1332:0.8 1314:0.8 913:0.8 1379:0.8 41 | https://farm6.staticflickr.com/119/306206679_ac2e2d3dd7_o.jpg 1353:1 1193:1 1053:1 1041:1 865:1 2:1 42 | https://c6.staticflickr.com/4/3756/11587568003_c9da58b833_o.jpg 5177:0.9 5170:0.9 8623:0.9 8493:0.9 9204:1 9198:1 9170:1 8585:1 1042:1 865:1 2:1 43 | https://farm7.staticflickr.com/36/75954166_76b4358319_o.jpg 5177:1 5170:1 1042:1 865:1 2:1 8582:1 4767:1 4765:1 1067:1 1041:1 44 | https://farm6.staticflickr.com/8477/8247858038_4aaf68da28_o.jpg 3:0.9 1193:0.8 1053:0.8 45 | https://c7.staticflickr.com/1/334/19997213482_a5c47a049c_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 858:0.8 11157:1 9305:1 9300:1 9289:1 1043:1 11143:1.0 884:1.0 870:1.0 859:1.0 46 | https://farm2.staticflickr.com/8355/8439238840_265104e92b_o.jpg 1552:0.9 1521:0.9 47 | https://farm3.staticflickr.com/3173/2719045377_bbd3fb7c77_o.jpg 5177:1 5170:1 1042:1 865:1 2:1 8556:1 4063:1 1837:1 1054:1 1041:1 4767:1 4765:1 1067:1 48 | https://farm3.staticflickr.com/2546/3912307968_5881f36683_o.jpg 11091:0.9 11090:0.9 9483:0.9 884:0.9 49 | https://c7.staticflickr.com/4/3051/2613610603_81dd2ce03d_o.jpg 9416:0.8 9413:0.8 11124:0.8 11123:0.8 50 | https://farm8.staticflickr.com/7592/16771865540_6bbbeb1545_o.jpg 1162:0.8 1053:0.8 19:1 14:1 1:1 5:1 0:1 3:0.9 10986:0.8 10976:0.8 10974:1 51 | https://c2.staticflickr.com/4/3404/3672185768_21abfdbdf1_o.jpg 5205:0.9 5188:0.9 5173:0.9 5170:0.9 52 | https://c1.staticflickr.com/3/2023/2938752319_e66de877cb_o.jpg 11124:0.8 11123:0.8 19:1.0 14:1.0 1:1.0 3:0.9 10973:0.8 5:0.8 13:0.8 53 | https://farm2.staticflickr.com/7423/10164826235_a5533c7bdb_o.jpg 1053:0.9 1041:0.9 54 | https://c7.staticflickr.com/3/2642/5839657249_5c2cc9926d_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 10964:1 10954:1 4:1 0:1 5177:1 4767:1 4765:1 1067:1 1041:1 55 | https://c4.staticflickr.com/6/5316/7155811268_3f3386d274_o.jpg 11165:1 2643:1 2563:1 1833:1 1054:1 1041:1 865:1 2:1 1187:1 1053:1 19:1 14:1 1:1 56 | https://farm8.staticflickr.com/3280/2336677673_5cf456df93_o.jpg 4679:0.8 1062:0.8 57 | https://c1.staticflickr.com/9/8779/17268618631_a35e47c7d3_o.jpg 19:1 14:1 1:1 4063:1 1837:1 1054:1 1041:1 865:1 2:1 11105:0.8 4678:0.8 11147:0.8 1142:0.8 9486:0.8 884:0.8 10207:1 3802:1 3780:1 3762:1 3657:1 1836:1 4686:0.8 4679:0.8 1193:1 1053:1 1062:0.9 58 | https://c3.staticflickr.com/9/8463/8121198037_063687512f_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 59 | https://farm1.staticflickr.com/2056/5740477627_dd000e359f_o.jpg 11147:0.8 1142:0.8 858:0.9 2:0.9 10946:0.8 1047:0.8 60 | https://farm7.staticflickr.com/2925/14614550836_c3c9e41c20_o.jpg 5173:0.9 5170:0.9 61 | https://c6.staticflickr.com/9/8015/7598082136_1d9c366bf3_o.jpg 3845:0.8 3802:0.8 2944:1 2913:1 2896:1 2577:1 1833:1 1054:1 1041:1 865:1 2:1 4063:1 1837:1 3780:1 3762:1 3657:1 1836:1 62 | https://farm4.staticflickr.com/4153/5059479200_857af7e475_o.jpg 5:0.8 0:0.8 10983:0.8 10974:0.8 962:0.8 863:0.8 63 | https://c2.staticflickr.com/2/1343/4727699820_0527f2ee2a_o.jpg 5851:0.8 5193:0.8 17:1 1:1 5173:0.8 5170:0.8 64 | https://c2.staticflickr.com/4/3557/3806645650_396b51ddc8_o.jpg 904:1 893:1 890:1 884:1 870:1 859:1 2:1 65 | https://c8.staticflickr.com/3/2846/9577264333_488917c90e_o.jpg 858:0.8 2:0.8 1332:0.8 1314:0.8 66 | https://farm2.staticflickr.com/3940/15706263331_cae9be57b2_o.jpg 3802:1.0 3780:1.0 3762:1.0 3657:1.0 1836:1.0 1054:1.0 1041:1.0 865:1.0 2:1.0 4063:1.0 1837:1.0 67 | https://farm5.staticflickr.com/3117/3246667026_5cfa58453c_o.jpg 2569:0.9 1833:0.9 2684:0.9 2655:0.9 2617:0.8 2691:0.8 68 | https://farm7.staticflickr.com/4123/4915052210_a4ef8792a7_o.jpg 1193:0.8 1053:0.8 69 | https://c5.staticflickr.com/3/2055/1548058686_fc8798b3fd_o.jpg 4063:1 1837:1 1054:1 1041:1 865:1 2:1 1050:1 8766:1 8531:1 5177:1 5170:1 1042:1 3797:1 3780:1 3762:1 3657:1 1836:1 70 | https://farm7.staticflickr.com/6111/6229581102_e8a6a51db8_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 4127:1 4102:1 4096:1 4089:1 4063:1 1837:1 1054:1 1041:1 71 | https://farm1.staticflickr.com/4109/5048972174_88a85f9fd0_o.jpg 11162:0.8 11161:0.8 2944:0.8 2913:0.8 72 | https://farm2.staticflickr.com/2481/3659209345_c6b0a58628_o.jpg 5177:0.9 5170:0.9 73 | https://c1.staticflickr.com/3/2507/4183920312_fda4530fb6_o.jpg 11129:0.9 11127:0.9 74 | https://c5.staticflickr.com/8/7054/6805430694_5ea42cf2d1_o.jpg 11129:0.9 11127:0.9 1193:0.8 1053:0.8 75 | https://farm1.staticflickr.com/2806/12285712405_290c0dc702_o.jpg 3781:1 3762:1 3657:1 1836:1 1054:1 1041:1 865:1 2:1 4063:0.9 1837:0.9 76 | https://c6.staticflickr.com/6/5598/15446852851_5e61761d18_o.jpg 5177:0.9 5170:0.9 77 | https://c7.staticflickr.com/3/2169/1583067562_bb24f98be0_o.jpg 5177:0.9 5170:0.9 78 | https://c8.staticflickr.com/3/2584/3941211541_85119b5dca_o.jpg 2684:1 2655:1 2569:1 1833:1 1054:1 1041:1 865:1 2:1 2080:1 2058:1 1905:1 1829:1 2610:1 79 | https://c4.staticflickr.com/7/6049/6848997330_fe724e87c9_o.jpg 714:1 597:1 292:1 17:1 1:1 3634:0.9 1836:0.9 9308:1 9302:1 9300:1 9289:1 1043:1 865:1 2:1 24:0.8 21:0.8 602:0.9 844:0.8 80 | https://farm3.staticflickr.com/49/121144061_6cf80b5e86_o.jpg 10996:0.8 8735:0.8 8727:0.9 2569:1 1833:1 1054:1 1041:1 865:1 2:1 5177:1 5170:1 1042:1 2664:1 2653:1 81 | https://c5.staticflickr.com/9/8186/8133093285_2949e63a08_o.jpg 5851:0.9 5193:0.9 867:0.8 858:0.8 5:1 0:1 11110:0.8 9969:0.8 10991:0.8 10985:0.9 10976:0.9 10981:0.9 10974:0.9 5173:0.8 5170:0.8 9483:0.9 884:0.9 11143:1.0 870:1.0 859:1.0 2:1.0 82 | https://c7.staticflickr.com/2/1031/4593035529_543a9de68e_o.jpg 4097:1 4089:1 4063:1 1837:1 1054:1 1041:1 865:1 2:1 5:1 0:1 4129:1 4132:1 83 | https://farm6.staticflickr.com/2775/4144284900_6ebd34882f_o.jpg 5173:0.9 5170:0.9 84 | https://c3.staticflickr.com/1/23/32066458_bc255c1de9_o.jpg 1378:0.8 1193:0.8 1053:0.9 85 | https://c1.staticflickr.com/6/5197/7222487914_847ab61301_o.jpg 5173:0.8 5170:0.8 3916:1 3666:1 1836:1 1054:1 1041:1 865:1 2:1 86 | https://c1.staticflickr.com/1/130/331204556_044215e293_o.jpg 11003:0.8 4650:0.8 4666:0.8 87 | https://farm2.staticflickr.com/8487/8195413631_db78ce5a8e_o.jpg 1353:1 1193:1 1053:1 1041:1 865:1 2:1 1674:0.9 1205:0.9 88 | https://farm1.staticflickr.com/3099/2898506763_9cc1e3afde_o.jpg 4063:1 1837:1 1054:1 1041:1 865:1 2:1 10207:1 3802:1 3780:1 3762:1 3657:1 1836:1 89 | https://farm8.staticflickr.com/5602/15519586065_580457550f_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 19:1.0 14:1.0 1:1.0 11124:0.8 11123:0.8 5769:0.8 5665:0.8 3:0.9 90 | https://c6.staticflickr.com/3/2868/12574918044_6ce025c238_o.jpg 1193:0.8 1053:0.8 91 | https://farm8.staticflickr.com/3512/3313422599_b50fec0c7a_o.jpg 3761:0.9 3657:0.9 92 | https://farm5.staticflickr.com/4059/4505160846_b3797e2559_o.jpg 5851:0.9 5193:0.9 11091:0.8 11090:0.8 11057:0.8 9305:0.8 11157:0.8 5173:0.9 5170:0.9 93 | https://c5.staticflickr.com/9/8510/8423073355_a4547b54aa_o.jpg 913:0.9 885:0.9 8636:0.8 8629:0.8 94 | https://farm6.staticflickr.com/8045/8106214778_cb9b9b4f1a_o.jpg 5177:0.9 5170:0.9 425:1 289:1 17:1 1:1 10956:0.9 10955:0.9 8623:1 8493:1 1042:1 865:1 2:1 95 | https://farm4.staticflickr.com/4134/4934246137_610e420517_o.jpg 5851:1 5193:1 5181:1 5173:1 5170:1 1042:1 865:1 2:1 11063:0.8 11058:0.8 9416:1 9413:1 3:1 96 | https://c4.staticflickr.com/9/8726/17382017900_f7152f6e1c_o.jpg 940:0.8 860:0.8 17:1 1:1 97 | https://c1.staticflickr.com/1/52/134423290_0297a1edf0_o.jpg 11089:1 282:1 274:1 16:1 1:1 98 | https://c2.staticflickr.com/7/6097/6211955974_43e1bbdd9d_o.jpg 10942:0.8 2563:0.8 99 | https://c6.staticflickr.com/1/179/435489099_f4a01d6a3b_o.jpg 5177:1 5170:1 1042:1 865:1 2:1 10964:1 10954:1 4:1 0:1 8556:1 4767:1 4765:1 1067:1 1041:1 100 | https://c8.staticflickr.com/3/2628/3713857698_ce1ac4f0d4_o.jpg 6470:0.8 5174:0.8 8274:0.9 8205:0.9 101 | -------------------------------------------------------------------------------- /data_processing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/tencent-ml-images/182631879cdb3d44d594d13d3f29a98bf7acdf81/data_processing/__init__.py -------------------------------------------------------------------------------- /data_processing/dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tencent is pleased to support the open source community by making Tencent ML-Images available. 3 | Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. 4 | Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 5 | https://opensource.org/licenses/BSD-3-Clause 6 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 7 | """ 8 | 9 | 10 | """Small library that points to a data set. 11 | Methods of Data class: 12 | data_files: Returns a python list of all (sharded) data set files. 13 | reader: Return a reader for a single entry from the data set. 14 | """ 15 | 16 | import os 17 | import tensorflow as tf 18 | from datetime import datetime 19 | 20 | class Dataset(object): 21 | 22 | def __init__(self, data_dir, worker_hosts = [], task_id = 0, use_split = False, record_pattern='*tfrecords'): 23 | """Initialize dataset the path to the data.""" 24 | self.data_dir = data_dir 25 | self.worker_hosts = worker_hosts 26 | self.task_id = task_id 27 | self.use_split = use_split 28 | self.record_pattern = record_pattern 29 | 30 | def data_filter(self, file_name): 31 | idx = int(file_name.split('/')[-1].split('.tfrecords')[0]) 32 | return (idx % len(self.worker_hosts) == self.task_id) 33 | 34 | def data_files(self): 35 | """Returns a python list of all (sharded) data files. 36 | Returns: 37 | python list of all (sharded) data set files. 38 | Raises: 39 | ValueError: if there are not data_files in the data dir 40 | """ 41 | tf_record_pattern = os.path.join(self.data_dir, self.record_pattern) 42 | data_files = tf.gfile.Glob(tf_record_pattern) 43 | data_files = filter(self.data_filter, data_files) if self.use_split else data_files 44 | if not data_files: 45 | print('No files found for in data dir %s' % (self.data_dir)) 46 | exit(-1) 47 | tf.logging.info('[%s] Worker[%d/%d] Files[%d] TrainDir[%s]' % 48 | (datetime.now(), self.task_id, len(self.worker_hosts), len(data_files), self.data_dir)) 49 | return data_files 50 | 51 | def reader(self): 52 | """Return a reader for a single entry from the data set. 53 | See io_ops.py for details of Reader class. 54 | Returns: 55 | Reader object that reads the data set. 56 | """ 57 | return tf.TFRecordReader() 58 | -------------------------------------------------------------------------------- /data_processing/image_preprocessing.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tencent is pleased to support the open source community by making Tencent ML-Images available. 3 | Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. 4 | Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 5 | https://opensource.org/licenses/BSD-3-Clause 6 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 7 | """ 8 | 9 | 10 | """Read and preprocess image data. 11 | Image processing occurs on a single image at a time. Image are read and 12 | preprocessed in parallel across multiple threads. The resulting images 13 | are concatenated together to form a single batch for training or evaluation. 14 | """ 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import math 20 | import tensorflow as tf 21 | 22 | def rotate_image(image, thread_id=0, scope=None): 23 | """Rotate image 24 | thread_id comes from {0, 1, 2, 3} uniformly, 25 | we will apply rotation on 1/4 images of the trainning set 26 | Args: 27 | image: Tensor containing single image. 28 | thread_id: preprocessing thread ID. 29 | scope: Optional scope for name_scope. 30 | Returns: 31 | rotated image 32 | """ 33 | with tf.name_scope(name=scope, default_name='rotate_image'): 34 | angle = tf.random_uniform([], minval=-45*math.pi/180, maxval=45*math.pi/180, dtype=tf.float32, name="angle") 35 | distorted_image = tf.cond( 36 | tf.equal(thread_id, tf.constant(0, dtype=tf.int32)), 37 | lambda: tf.contrib.image.rotate(image, angle), 38 | lambda: image 39 | ) 40 | return distorted_image 41 | 42 | def distort_color(image, thread_id=0, scope=None): 43 | """Distort the color of the image. 44 | thread_id comes from {0, 1, 2, 3} uniformly, 45 | and we will apply color distortion when thresd_id = 0 or 1, 46 | thus, only 1/2 images of the trainning set will be distorted 47 | Args: 48 | image: Tensor containing single image. 49 | thread_id: preprocessing thread ID. 50 | scope: Optional scope for name_scope. 51 | Returns: 52 | color-distorted image 53 | """ 54 | with tf.name_scope(name=scope, default_name='distort_color'): 55 | def color_ordering_0(image): 56 | image = tf.image.random_brightness(image, max_delta=32. / 255.) 57 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5) 58 | image = tf.image.random_hue(image, max_delta=0.2) 59 | image = tf.image.random_contrast(image, lower=0.5, upper=1.5) 60 | return image 61 | 62 | def color_ordering_1(image): 63 | image = tf.image.random_brightness(image, max_delta=32. / 255.) 64 | image = tf.image.random_contrast(image, lower=0.5, upper=1.5) 65 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5) 66 | image = tf.image.random_hue(image, max_delta=0.2) 67 | return image 68 | 69 | image = tf.cond( 70 | tf.equal(thread_id, tf.constant(0, dtype=tf.int32)), 71 | lambda: color_ordering_0(image), 72 | lambda: image 73 | ) 74 | image = tf.cond( 75 | tf.equal(thread_id, tf.constant(1, dtype=tf.int32)), 76 | lambda: color_ordering_1(image), 77 | lambda: image 78 | ) 79 | # The random_* ops do not necessarily clamp. 80 | image = tf.clip_by_value(image, 0.0, 1.0) 81 | return image 82 | 83 | def distort_image(image, height, width, object_cover, area_cover, bbox, thread_id=0, scope=None): 84 | """Distort one image for training a network. 85 | Args: 86 | image: Tensor containing single image 87 | height: integer, image height 88 | width: integer, image width 89 | object_cover: float 90 | area_cover: float 91 | bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] 92 | where each coordinate is [0, 1) and the coordinates are arranged 93 | as [ymin, xmin, ymax, xmax]. 94 | thread_id: integer indicating the preprocessing thread. 95 | scope: Optional scope for name_scope. 96 | Returns: 97 | 3-D float Tensor of distorted image used for training. 98 | """ 99 | with tf.name_scope(name=scope, default_name='distort_image'): 100 | # Crop the image to the specified bounding box. 101 | bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box( 102 | tf.shape(image), 103 | bounding_boxes=bbox, 104 | min_object_covered=object_cover, 105 | aspect_ratio_range=[0.75, 1.33], 106 | area_range=[area_cover, 1.0], 107 | max_attempts=100, 108 | use_image_if_no_bounding_boxes=True) 109 | distorted_image = tf.slice(image, bbox_begin, bbox_size) 110 | 111 | # Resize the image to net input shape 112 | distorted_image = tf.image.resize_images(distorted_image, [height, width]) 113 | distorted_image.set_shape([height, width, 3]) 114 | 115 | # Flip image, we just apply horizontal flip on 1/2 images of the trainning set 116 | distorted_image = tf.image.random_flip_left_right(distorted_image) 117 | 118 | # Rotate image 119 | distorted_image = rotate_image(distorted_image, thread_id) 120 | 121 | # Distored image color 122 | distorted_image = distort_color(distorted_image, thread_id) 123 | return distorted_image 124 | 125 | def eval_image(image, height, width, scope=None): 126 | """Prepare one image for evaluation. 127 | Args: 128 | image: Tensor containing single image 129 | height: integer 130 | width: integer 131 | scope: Optional scope for name_scope. 132 | Returns: 133 | 3-D float Tensor of prepared image. 134 | """ 135 | with tf.name_scope(values=[image, height, width], name=scope, default_name='eval_image'): 136 | # Crop the central region of the image with an area containing 80% of the original image. 137 | image = tf.image.central_crop(image, central_fraction=0.80) 138 | # Resize the image to the original height and width. 139 | image = tf.expand_dims(image, 0) 140 | image = tf.image.resize_bilinear(image, [height, width], align_corners=False) 141 | image = tf.squeeze(image, [0]) 142 | return image 143 | 144 | def image_preprocessing(image, output_height, output_width, object_cover, area_cover, train, bbox): 145 | """Decode and preprocess one image for evaluation or training. 146 | Args: 147 | image: Tensor containing single image 148 | bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] 149 | where each coordinate is [0, 1) and the coordinates are arranged as 150 | [ymin, xmin, ymax, xmax]. 151 | output_height: integer 152 | output_width: integer 153 | train: boolean 154 | Returns: 155 | 3-D float Tensor containing an appropriately scaled image 156 | Raises: 157 | ValueError: if user does not provide bounding box 158 | """ 159 | if train: 160 | thread_id = tf.random_uniform([], minval=0, maxval=3, dtype=tf.int32, name="thread_id") 161 | image = distort_image(image, output_height, output_width, object_cover, area_cover, bbox, thread_id) 162 | else: 163 | image = eval_image(image, output_height, output_width) 164 | 165 | # Finally, rescale to [-1,1] instead of [0, 1) 166 | image = tf.subtract(image, 0.5) 167 | image = tf.multiply(image, 2.0) 168 | image = tf.reshape(image, shape=[output_height, output_width, 3]) 169 | return image 170 | 171 | def preprocess_image(image, output_height, output_width, object_cover, area_cover, is_training=False, bbox=None): 172 | return image_preprocessing(image, output_height, output_width, object_cover, area_cover, is_training, bbox) 173 | -------------------------------------------------------------------------------- /example/extract_feature.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | set -x 3 | 4 | PYTHON=/path/to/you/python 5 | RESNET=101 6 | DATA_FORMAT='NCHW' 7 | GPUID=0 8 | CKPT="./ckpts" 9 | 10 | $PYTHON extract_feature.py \ 11 | --resnet_size=$RESNET \ 12 | --data_format=$DATA_FORMAT \ 13 | --visiable_gpu=${GPUID} \ 14 | --pretrain_ckpt=$CKPT \ 15 | --result=test.txt \ 16 | --images=imglist.txt 17 | 18 | -------------------------------------------------------------------------------- /example/finetune.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python2 finetune.py \ 4 | --mode=train \ 5 | --class_num=1000 \ 6 | --data_dir=./data/imagenet \ 7 | --num_gpus=4 \ 8 | --batch_size=64 \ 9 | --max_iter=600000 \ 10 | --lr=0.1 \ 11 | --lr_decay_step=150000 \ 12 | --lr_decay_factor=0.1 \ 13 | --weight_decay_rate=0.0001 \ 14 | --optimizer='mom' \ 15 | --batch_norm_elipson=1e-5 \ 16 | --resnet_size=101 \ 17 | --prof_interval=500 \ 18 | --log_interval=5000 \ 19 | --snapshot=5000 \ 20 | --model_dir=./out/checkpoint/ \ 21 | --log_dir=./out/log/ \ 22 | --image_size=224 \ 23 | --FixBlock2=True \ 24 | --restore=True \ 25 | --data_format='NCHW' \ 26 | --pretrain_ckpt="./out/checkpoint/model.ckpt" 27 | -------------------------------------------------------------------------------- /example/image_classification.sh: -------------------------------------------------------------------------------- 1 | python2.7 image_classification.py \ 2 | --images=data/im_list_for_classification.txt \ 3 | --top_k_pred=5 \ 4 | --model_dir=checkpoints/resnet.ckpt \ 5 | --dictionary=data/imagenet2012_dictionary.txt 6 | -------------------------------------------------------------------------------- /example/train.sh: -------------------------------------------------------------------------------- 1 | 2 | set -x 3 | 4 | # Parameters for the training 5 | PYTHON=/usr/bin/python 6 | DATASET_DIR=./data/ml-images 7 | WITH_BBOX=FALSE 8 | IMG_SIZE=224 9 | CLASSNUM=11166 10 | RESNET=101 11 | MASK_THRES=0.7 12 | NEG_SELECT=0.1 13 | BATCHSIZE=1 14 | SNAPSHOT=4400 15 | BATCHNORM_DECAY=0.997 16 | BATCHNORM_EPS=1e-5 17 | LR=0.08 18 | LR_DECAY_STEP=110000 19 | LR_DECAY_FACTOR=0.1 20 | WEIGHT_DECAY=0.0001 21 | WARMUP=35200 22 | LR_WARMUP=0.01 23 | LR_WARMUP_DECAY_STEP=4400 24 | LR_WARMUP_DECAY_FACTOR=1.297 25 | MAXIER=440000 26 | DATA_FORMAT='NCHW' 27 | LOG_INTERVAL=100 28 | LOG_DIR="./out/log" 29 | if [[ ! -d $LOG_DIR ]]; then 30 | mkdir -p $LOG_DIR 31 | fi 32 | 33 | $PYTHON train.py \ 34 | --data_dir=${DATASET_DIR} \ 35 | --model_dir=./out/checkpoint/imagenet/resnet_model_${NODE_NUM}node_${GPU_NUM}gpu \ 36 | --tmp_model_dir=./out/tmp/imagenet/resnet_model_${NODE_NUM}node_${GPU_NUM}gpu \ 37 | --image_size=${IMG_SIZE} \ 38 | --class_num=${CLASSNUM} \ 39 | --resnet_size=${RESNET} \ 40 | --mask_thres=${MASK_THRES} \ 41 | --neg_select=${NEG_SELECT} \ 42 | --batch_size=${BATCHSIZE} \ 43 | --with_bbox=${WITH_BBOX} \ 44 | --batch_norm_decay=${BATCHNORM_DECAY} \ 45 | --batch_norm_epsilon=${BATCHNORM_EPS} \ 46 | --lr=${LR} \ 47 | --lr_decay_step=${LR_DECAY_STEP} \ 48 | --lr_decay_factor=${LR_DECAY_FACTOR} \ 49 | --weight_decay=${WEIGHT_DECAY} \ 50 | --max_iter=${MAXIER} \ 51 | --snapshot=${SNAPSHOT} \ 52 | --warmup=${WARMUP} \ 53 | --lr_warmup=${LR_WARMUP} \ 54 | --lr_warmup_decay_step=${LR_WARMUP_DECAY_STEP} \ 55 | --lr_warmup_decay_factor=${LR_WARMUP_DECAY_FACTOR} \ 56 | --log_interval=${LOG_INTERVAL} \ 57 | --data_format=${DATA_FORMAT} 2>&1 | tee ${LOG_DIR}/Node${NODE_NUM}_GPU${GPU_NUM}.log 58 | -------------------------------------------------------------------------------- /extract_feature.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """ 3 | Tencent is pleased to support the open source community by making Tencent ML-Images available. 4 | Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. 5 | Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 6 | https://opensource.org/licenses/BSD-3-Clause 7 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 8 | """ 9 | 10 | """Use pre-trained model extract image feature 11 | """ 12 | from __future__ import absolute_import 13 | from __future__ import division 14 | from __future__ import print_function 15 | 16 | import sys 17 | import numpy as np 18 | import cv2 as cv 19 | import tensorflow as tf 20 | from models import resnet as resnet 21 | from flags import FLAGS 22 | 23 | tf.app.flags.DEFINE_string("result", "", 24 | "file name to save features") 25 | tf.app.flags.DEFINE_string("images", "", 26 | "contains image path per line per image") 27 | 28 | """Crop Image To 224*224 29 | Args: 30 | img: an 3-D numpy array (H,W,C) 31 | type: crop method support [ center | 10crop ] 32 | """ 33 | def preprocess(img, type="center"): 34 | # resize image with smallest side to be 256 35 | rawH = float(img.shape[0]) 36 | rawW = float(img.shape[1]) 37 | newH = 256.0 38 | newW = 256.0 39 | if rawH <= rawW: 40 | newW = (rawW/rawH) * newH 41 | else: 42 | newH = (rawH/rawW) * newW 43 | img = cv.resize(img, (int(newW), int(newH))) 44 | imgs = None 45 | if type=='center': 46 | imgs = np.zeros((1, 224, 224, 3)) 47 | imgs[0,...] = img[int((newH-224)/2):int((newH-224)/2)+224, 48 | int((newW-224)/2):int((newW-224)/2)+224] 49 | elif type=='10crop': 50 | imgs = np.zeros((10, 224, 224, 3)) 51 | offset = [(0, 0), 52 | (0, int(newW-224)), 53 | (int(newH-224), 0), 54 | (int(newH-224), int(newW-224)), 55 | (int((newH-224)/2), int((newW-224)/2))] 56 | for i in range(0, 5): 57 | imgs[i,...] = img[offset[i][0]:offset[i][0]+224, 58 | offset[i][1]:offset[i][1]+224] 59 | img = cv.flip(img, 1) 60 | for i in range(0, 5): 61 | imgs[i+5,...] = img[offset[i][0]:offset[i][0]+224, 62 | offset[i][1]:offset[i][1]+224] 63 | else: 64 | raise ValueError("Type not support") 65 | imgs = ((imgs/255.0) - 0.5) * 2.0 66 | imgs = imgs[...,::-1] 67 | return imgs 68 | 69 | # build model 70 | images = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224, 3]) 71 | net = resnet.ResNet(images, is_training=False) 72 | net.build_model() 73 | 74 | logits = net.logit 75 | feat = net.feat 76 | 77 | # restore model 78 | saver = tf.train.Saver(tf.global_variables()) 79 | config = tf.ConfigProto() 80 | config.gpu_options.allow_growth = True 81 | config.gpu_options.visible_device_list = str(FLAGS.visiable_gpu) 82 | config.log_device_placement=False 83 | sess = tf.Session(config=config) 84 | 85 | # load trained model 86 | saver.restore(sess, FLAGS.pretrain_ckpt) 87 | 88 | # inference on net 89 | types='center' 90 | ffeat = open(FLAGS.result, 'w') 91 | with open(FLAGS.images, 'r') as lines: 92 | for line in lines: 93 | sp = line.rstrip('\n').split(' ') 94 | raw_img = cv.imread(sp[0]) 95 | if type(raw_img)==None or raw_img.data==None : 96 | print("open pic " + sp[0] + " failed") 97 | continue 98 | imgs = preprocess(raw_img, types) 99 | feats = sess.run(feat, {images:imgs}) 100 | feats = np.squeeze(feats[0]) 101 | if types=='10crop': 102 | feats = np.mean(feats, axis=0) 103 | print('feature-length:{}, feature={}'.format(len(feats), feats)) 104 | ffeat.write(sp[0] + "\t" + sp[1] + "\t" + " ".join([str(x) for x in list(feats)]) + '\n') 105 | ffeat.close() 106 | -------------------------------------------------------------------------------- /finetune.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | """ 4 | Tencent is pleased to support the open source community by making Tencent ML-Images available. 5 | Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. 6 | Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 7 | https://opensource.org/licenses/BSD-3-Clause 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 9 | """ 10 | 11 | from __future__ import print_function 12 | 13 | import sys 14 | import os 15 | import time 16 | from datetime import datetime 17 | import tensorflow as tf 18 | 19 | from models import resnet as resnet 20 | from data_processing import dataset as file_db 21 | from data_processing import image_preprocessing as image_preprocess 22 | from flags import FLAGS 23 | 24 | try: 25 | xrange # Python 2 26 | except NameError: 27 | xrange = range # Python 3 28 | 29 | #os.environ["CUDA_VISIBLE_DEVICES"]="0,1,2,3" 30 | 31 | def assign_weights_from_cp(cpk_path, sess, scope): 32 | ''' 33 | restore from ckpt 34 | ''' 35 | reader = tf.train.NewCheckpointReader(cpk_path) 36 | temp = reader.debug_string().decode('utf8') 37 | lines = temp.split("\n") 38 | i = 0 39 | var_to_shape_map = reader.get_variable_to_shape_map() 40 | for key in var_to_shape_map: 41 | with tf.variable_scope(scope, reuse=True): 42 | try: 43 | if key.find(r'global_step')!=-1 or key.find(r'Momentum')!=-1 or key.find(r'logits')!=-1: 44 | print("do not need restore from ckpt key:%s" % key) 45 | continue 46 | var = tf.get_variable(key) 47 | sess.run(var.assign(reader.get_tensor(key))) 48 | print("restore from ckpt key:%s" % key) 49 | except ValueError: 50 | print("can not restore from ckpt key:%s" % key) 51 | 52 | def record_parser_fn(value, is_training): 53 | """Parse an image record from `value`.""" 54 | keys_to_features = { 55 | 'width': tf.FixedLenFeature([], dtype=tf.int64, default_value=0), 56 | 'height': tf.FixedLenFeature([], dtype=tf.int64, default_value=0), 57 | 'image': tf.FixedLenFeature([], dtype=tf.string, default_value=''), 58 | 'label': tf.FixedLenFeature([], dtype=tf.int64, default_value=-1), 59 | 'name': tf.FixedLenFeature([], dtype=tf.string, default_value='') 60 | } 61 | 62 | parsed = tf.parse_single_example(value, keys_to_features) 63 | 64 | image = tf.image.decode_image(tf.reshape(parsed['image'], shape=[]), 65 | FLAGS.image_channels) 66 | image = tf.image.convert_image_dtype(image, dtype=tf.float32) 67 | 68 | bbox = tf.concat(axis=0, values=[ [[]], [[]], [[]], [[]] ]) 69 | bbox = tf.transpose(tf.expand_dims(bbox, 0), [0, 2, 1]) 70 | image = image_preprocess.preprocess_image( 71 | image=image, 72 | output_height=FLAGS.image_size, 73 | output_width=FLAGS.image_size, 74 | object_cover=0.0, 75 | area_cover=0.05, 76 | is_training=is_training, 77 | bbox=bbox) 78 | 79 | label = tf.cast(tf.reshape(parsed['label'], shape=[]),dtype=tf.int32) 80 | label = tf.one_hot(label, FLAGS.class_num) 81 | 82 | return image, label 83 | 84 | def tower_model(images, labels): 85 | model = resnet.ResNet(images, is_training=(FLAGS.mode == tf.estimator.ModeKeys.TRAIN)) 86 | model.build_model() 87 | 88 | # waring: Calculate loss, which includes softmax cross entropy and L2 regularization. 89 | cross_entropy = tf.losses.softmax_cross_entropy( 90 | logits=model.logit, onehot_labels=labels) 91 | tf.identity(cross_entropy, name='cross_entropy') 92 | tf.summary.scalar('cross_entropy', cross_entropy) 93 | 94 | # Add weight decay to the loss. We Add the batch norm variables into L2 normal because 95 | # in large scale data training this will improve the generalization power of model. 96 | loss = cross_entropy + FLAGS.weight_decay * tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if 97 | 'bn' not in v.name]) + 0.1 * FLAGS.weight_decay * tf.add_n( 98 | [tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bn' in v.name]) 99 | 100 | return model, loss 101 | 102 | def average_gradients(tower_grads): 103 | """ Calculate the average gradient of shared variables across all towers. """ 104 | average_grads = [] 105 | for grad_and_vars in zip(*tower_grads): 106 | grads = [] 107 | for grad, var in grad_and_vars: 108 | grads.append(tf.expand_dims(grad, 0)) 109 | # Average over the 'tower' dimension. 110 | gradient = tf.reduce_mean(tf.concat(axis=0, values=grads), 0) 111 | v = grad_and_vars[0][1] 112 | grad_and_var = (gradient, v) 113 | average_grads.append(grad_and_var) 114 | return average_grads 115 | 116 | def train(train_dataset, is_training=True): 117 | with tf.Graph().as_default(), tf.device('/cpu:0'): 118 | # set global_step and learning_rate 119 | global_step = tf.train.get_or_create_global_step() 120 | lr = tf.train.exponential_decay( 121 | FLAGS.lr, 122 | global_step, 123 | FLAGS.lr_decay_step, 124 | FLAGS.lr_decay_factor, 125 | staircase=True) 126 | 127 | # optimizer, default is momentum 128 | if FLAGS.optimizer == "sgd": 129 | optimizer = tf.train.GradientDescentOptimizer(lr) 130 | elif FLAGS.optimizer == "mom": 131 | optimizer = tf.train.MomentumOptimizer(learning_rate=lr, momentum=FLAGS.opt_momentum) 132 | else: 133 | raise ValueError("Do not support optimizer '%s'" % FLAGS.optimizer) 134 | 135 | # Get images and labels for training and split the batch across GPUs. 136 | """Input function which provides batches for train or eval.""" 137 | worker_num = 1 138 | num_preprocess_threads = FLAGS.num_preprocess_threads * FLAGS.num_gpus 139 | batch_size = FLAGS.batch_size * FLAGS.num_gpus 140 | print('batch_size={}'.format(batch_size)) 141 | dataset = tf.data.Dataset.from_tensor_slices(train_dataset.data_files()) 142 | dataset = dataset.shuffle(buffer_size=FLAGS.file_shuffle_buffer, seed=worker_num) 143 | dataset = dataset.flat_map(tf.data.TFRecordDataset) 144 | dataset = dataset.map(lambda value: record_parser_fn(value, is_training), 145 | num_parallel_calls=num_preprocess_threads) 146 | dataset = dataset.prefetch(batch_size) 147 | 148 | if is_training: 149 | # When choosing shuffle buffer sizes, larger sizes result in better 150 | # randomness, while smaller sizes have better performance. 151 | # dataset = dataset.shuffle(buffer_size=_SHUFFLE_BUFFER, seed=worker_id) 152 | dataset = dataset.shuffle(buffer_size=FLAGS.shuffle_buffer) 153 | # We call repeat after shuffling, rather than before, to prevent separate 154 | # epochs from blending together. 155 | dataset = dataset.repeat() 156 | 157 | dataset = dataset.batch(batch_size) 158 | iterator = dataset.make_one_shot_iterator() 159 | images, labels = iterator.get_next() 160 | 161 | images_splits = tf.split(images, FLAGS.num_gpus, 0) 162 | labels_splits = tf.split(labels, FLAGS.num_gpus, 0) 163 | summaries = tf.get_collection(tf.GraphKeys.SUMMARIES) 164 | 165 | # Calculate the gradients for each model tower 166 | Loss = None 167 | tower_grads = [] 168 | 169 | #building graphs 170 | with tf.variable_scope(tf.get_variable_scope()): 171 | print("Building graph ...", file=sys.stderr) 172 | for i in xrange(FLAGS.num_gpus): 173 | with tf.device("/gpu:%d" % i): 174 | with tf.name_scope("%s_%d" % ("tower", i)) as scope: 175 | # Build graph 176 | model, Loss = tower_model(images_splits[i], labels_splits[i]) 177 | # Reuse variables for the next tower 178 | tf.get_variable_scope().reuse_variables() 179 | 180 | # Get finetune variables 181 | finetune_vars = [] 182 | if FLAGS.FixBlock2: 183 | finetune_vars = [v for v in tf.trainable_variables() 184 | if v.name.find(r"stages_2") != -1 or 185 | v.name.find(r"stages_3") != -1 or 186 | v.name.find(r"global_pool") != -1 or 187 | v.name.find(r"logits") != -1] 188 | else: 189 | finetune_vars = tf.trainable_variables() 190 | 191 | # Only the summaries from the final tower are retained 192 | summary = tf.get_collection(tf.GraphKeys.SUMMARIES, scope=scope) 193 | grads = optimizer.compute_gradients(Loss, var_list=finetune_vars) 194 | tower_grads.append(grads) 195 | 196 | print("Build Graph (%s/%s)" % (i+1, FLAGS.num_gpus), file=sys.stderr) 197 | summaries.append(summary) 198 | summaries.append(tf.summary.scalar('learning_rate', lr)) 199 | 200 | # Build train op, 201 | grads = average_gradients(tower_grads) 202 | apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step) 203 | 204 | ############## 205 | batchnorm_updates_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 206 | train_op = tf.group(apply_gradient_op, batchnorm_updates_op) 207 | 208 | # Build Session : may conf carefully 209 | sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=False)) 210 | saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.max_to_keep) 211 | summary_op = tf.summary.merge(summaries) 212 | summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) 213 | 214 | # Initialize Model 215 | if FLAGS.restore: 216 | print("Restoring checkpoint from %s" % FLAGS.pretrain_ckpt, file=sys.stderr) 217 | sess.run(tf.global_variables_initializer()) 218 | sess.run(tf.local_variables_initializer()) 219 | 220 | #restore from existing ckpts 221 | assign_weights_from_cp(FLAGS.pretrain_ckpt, sess, tf.get_variable_scope()) 222 | 223 | else: 224 | print("Run global_variables_initializer ..", file=sys.stderr) 225 | sess.run(tf.global_variables_initializer()) 226 | sess.run(tf.local_variables_initializer()) 227 | 228 | sys.stdout.write("---------------Trainging Begin---------------\n") 229 | 230 | batch_duration = 0.0 231 | # Initial queue runner 232 | coord = tf.train.Coordinator() 233 | threads = tf.train.start_queue_runners(sess=sess, coord=coord) 234 | 235 | # Start train iter 236 | step = sess.run(global_step) 237 | i=0 238 | while i <= FLAGS.max_iter: 239 | # profile log 240 | if i > 0 and i % FLAGS.prof_interval == 0: 241 | print("%s: step %d, iteration %d, %.2f sec/batch" % 242 | (datetime.now(), step, i, batch_duration)) 243 | 244 | # log 245 | if i > 0 and i % FLAGS.log_interval == 0: 246 | summary_str = sess.run(summary_op) 247 | summary_writer.add_summary(summary_str, i) 248 | 249 | # checkpoint 250 | if i > 0 and i % FLAGS.snapshot == 0: 251 | if not os.path.exists(FLAGS.model_dir): 252 | os.mkdir(FLAGS.model_dir) 253 | ckpt_path = os.path.join(FLAGS.model_dir, "resnet.ckpt") 254 | saver.save(sess, ckpt_path, global_step=global_step) 255 | 256 | # train 257 | batch_start = time.time() 258 | _, step, loss = sess.run([train_op, global_step, Loss]) 259 | batch_duration = time.time() - batch_start 260 | i = i + 1 261 | print("%s: step %d, iteration %d, train loss %.2f " % (datetime.now(), step, i, loss)) 262 | coord.request_stop() 263 | 264 | def main(_): 265 | train_dataset = file_db.Dataset(os.path.join(FLAGS.data_dir, 'train')) 266 | train(train_dataset, is_training=(FLAGS.mode == tf.estimator.ModeKeys.TRAIN)) 267 | 268 | if __name__ == "__main__": 269 | tf.app.run() 270 | -------------------------------------------------------------------------------- /flags.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """ 3 | Tencent is pleased to support the open source community by making Tencent ML-Images available. 4 | Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. 5 | Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 6 | https://opensource.org/licenses/BSD-3-Clause 7 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 8 | """ 9 | 10 | import tensorflow as tf 11 | FLAGS = tf.app.flags.FLAGS 12 | 13 | """Global Options 14 | """ 15 | tf.app.flags.DEFINE_string('mode', 'train', 16 | "run coder in tain or validation mode") 17 | tf.app.flags.DEFINE_integer('max_to_keep', 200, 18 | "save checkpoint here") 19 | 20 | 21 | """Data Options 22 | """ 23 | tf.app.flags.DEFINE_string('data_dir', './data/train/', 24 | "Path to the data TFRecord of Example protos. Should save in train and val") 25 | tf.app.flags.DEFINE_integer('batch_size', 512, 26 | "Number of images to process in a batch.") 27 | tf.app.flags.DEFINE_integer('num_preprocess_threads', 4, 28 | "Number of preprocessing threads per tower. Please make this a multiple of 4") 29 | tf.app.flags.DEFINE_integer('file_shuffle_buffer', 1500, 30 | "buffer size for file names") 31 | tf.app.flags.DEFINE_integer('shuffle_buffer', 2048, 32 | "buffer size for samples") 33 | tf.app.flags.DEFINE_boolean('with_bbox', True, 34 | "whether use bbox in train set") 35 | 36 | """Model Options 37 | """ 38 | tf.app.flags.DEFINE_integer('class_num', 1000, 39 | "distinct class number") 40 | tf.app.flags.DEFINE_integer('resnet_size', 101, 41 | "resnet block layer number [ 18, 34, 50, 101, 152, 200 ]") 42 | tf.app.flags.DEFINE_string('data_format', 'channels_first', 43 | "data format for the input and output data [ channels_first | channels_last ]") 44 | tf.app.flags.DEFINE_integer('image_size', 224, 45 | "default image size for model input layer") 46 | tf.app.flags.DEFINE_integer('image_channels', 3, 47 | "default image channels for model input layer") 48 | tf.app.flags.DEFINE_float('batch_norm_decay', 0.997, 49 | "use for batch normal moving avg") 50 | tf.app.flags.DEFINE_float('batch_norm_epsilon', 1e-5, 51 | "use for batch normal layer, for avoid divide by zero") 52 | tf.app.flags.DEFINE_float('mask_thres', 0.7, 53 | "mask thres for balance pos neg") 54 | tf.app.flags.DEFINE_float('neg_select', 0.3, 55 | "how many class within only negtive samples in a batch select to learn") 56 | 57 | """Train Options 58 | """ 59 | tf.app.flags.DEFINE_boolean('restore', False, 60 | "whether to restore weights from pretrained checkpoint.") 61 | tf.app.flags.DEFINE_integer('num_gpus', 1, 62 | "How many GPUs to use.") 63 | tf.app.flags.DEFINE_string('optimizer','mom', 64 | "optimation algorthm") 65 | tf.app.flags.DEFINE_float('opt_momentum', 0.9, 66 | "moment during learing") 67 | tf.app.flags.DEFINE_float('lr', 0.1, 68 | "Initial learning rate.") 69 | tf.app.flags.DEFINE_integer('lr_decay_step', 0, 70 | "Iterations after which learning rate decays.") 71 | tf.app.flags.DEFINE_float('lr_decay_factor', 0.1, 72 | "Learning rate decay factor.") 73 | tf.app.flags.DEFINE_float('weight_decay', 0.0001, 74 | "Tainable Weight l2 loss factor.") 75 | tf.app.flags.DEFINE_integer('warmup', 0, 76 | "Steps when stop warmup, need when use distributed learning") 77 | tf.app.flags.DEFINE_float('lr_warmup', 0.1, 78 | "Initial warmup learning rate, need when use distributed learning") 79 | tf.app.flags.DEFINE_integer('lr_warmup_decay_step', 0, 80 | "Iterations after which learning rate decays, need when use distributed learning") 81 | tf.app.flags.DEFINE_float('lr_warmup_decay_factor', 1.414, 82 | "Warmup learning rate decay factor, need when use distributed learning") 83 | tf.app.flags.DEFINE_integer('max_iter', 1000000, 84 | "max iter number for stopping.-1 forever") 85 | tf.app.flags.DEFINE_integer('test_interval', 0, 86 | "iterations interval for evluate model") 87 | tf.app.flags.DEFINE_integer('test_iter', 0, 88 | "iterations for evluate model") 89 | tf.app.flags.DEFINE_integer('prof_interval', 10, 90 | "iterations for print training time cost") 91 | tf.app.flags.DEFINE_integer('log_interval', 0, 92 | "iterations for print summery log") 93 | tf.app.flags.DEFINE_string('log_dir', './out/log/', 94 | "Directory where to write event logs") 95 | tf.app.flags.DEFINE_string('model_dir', './out/checkpoint/', 96 | "path for saving learned tf model") 97 | tf.app.flags.DEFINE_string('tmp_model_dir', './out/tmp/checkpoint/', 98 | "The directory where the temporary model will be stored") 99 | tf.app.flags.DEFINE_integer('snapshot', 0, 100 | "Iteration for saving model snapshot") 101 | tf.app.flags.DEFINE_integer('epoch_iter', 0, 102 | "Iteration for epoch ") 103 | tf.app.flags.DEFINE_float('drop_rate', 0.5, 104 | "DropOut rate") 105 | tf.app.flags.DEFINE_integer('random_seed', 1234, 106 | "Random sedd for neigitive class selected") 107 | tf.app.flags.DEFINE_string('pretrain_ckpt', '', 108 | 'pretrain checkpoint file') 109 | tf.app.flags.DEFINE_boolean('FixBlock2', False, 110 | 'whether to fix the first two block, used for fintuning') 111 | 112 | 113 | """eval options 114 | """ 115 | tf.app.flags.DEFINE_integer('visiable_gpu', 0, 116 | "wihch gpu can use") 117 | tf.app.flags.DEFINE_string('piclist', '', 118 | "eval picture list") 119 | tf.app.flags.DEFINE_integer('interval', 32, 120 | "eval chekpoint interval") 121 | tf.app.flags.DEFINE_integer('start', 0, 122 | "the start index of ckpts") 123 | -------------------------------------------------------------------------------- /git_images/hist_num_annotations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/tencent-ml-images/182631879cdb3d44d594d13d3f29a98bf7acdf81/git_images/hist_num_annotations.png -------------------------------------------------------------------------------- /git_images/num_images_per_class.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/tencent-ml-images/182631879cdb3d44d594d13d3f29a98bf7acdf81/git_images/num_images_per_class.png -------------------------------------------------------------------------------- /image_classification.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """ 3 | Tencent is pleased to support the open source community by making Tencent ML-Images available. 4 | Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. 5 | Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 6 | https://opensource.org/licenses/BSD-3-Clause 7 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 8 | """ 9 | 10 | """Use the saved checkpoint to run single-label image classification""" 11 | 12 | from __future__ import absolute_import 13 | from __future__ import division 14 | from __future__ import print_function 15 | 16 | import sys 17 | import numpy as np 18 | import cv2 as cv 19 | import tensorflow as tf 20 | from models import resnet as resnet 21 | from flags import FLAGS 22 | 23 | tf.app.flags.DEFINE_string("result", "label_pred.txt", 24 | "file name to save predictions") 25 | tf.app.flags.DEFINE_string("images", "", 26 | "contains image path per line per image") 27 | tf.app.flags.DEFINE_integer("top_k_pred", 5, 28 | "the top-k predictions") 29 | tf.app.flags.DEFINE_string("dictionary", "", 30 | "the class dictionary of imagenet-2012") 31 | 32 | def _load_dictionary(dict_file): 33 | dictionary = dict() 34 | with open(dict_file, 'r') as lines: 35 | for line in lines: 36 | sp = line.rstrip('\n').split('\t') 37 | idx, name = sp[0], sp[1] 38 | dictionary[idx] = name 39 | return dictionary 40 | 41 | def preprocess(img): 42 | rawH = float(img.shape[0]) 43 | rawW = float(img.shape[1]) 44 | newH = 256.0 45 | newW = 256.0 46 | test_crop = 224.0 47 | 48 | if rawH <= rawW: 49 | newW = (rawW/rawH) * newH 50 | else: 51 | newH = (rawH/rawW) * newW 52 | img = cv.resize(img, (int(newW), int(newH))) 53 | img = img[int((newH-test_crop)/2):int((newH-test_crop)/2)+int(test_crop),int((newW-test_crop)/2):int((newW-test_crop)/2)+int(test_crop)] 54 | img = ((img/255.0) - 0.5) * 2.0 55 | img = img[...,::-1] 56 | return img 57 | 58 | # build model 59 | images = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224, 3]) 60 | net = resnet.ResNet(images, is_training=False) 61 | net.build_model() 62 | 63 | logit = net.logit 64 | prob = tf.nn.softmax(logit) 65 | prob_topk, pred_topk = tf.nn.top_k(prob, k=FLAGS.top_k_pred) 66 | 67 | # restore model 68 | config = tf.ConfigProto() 69 | config.gpu_options.allow_growth = True 70 | config.gpu_options.visible_device_list = str(FLAGS.visiable_gpu) 71 | config.log_device_placement=False 72 | sess = tf.Session(config=config) 73 | saver = tf.train.Saver(tf.global_variables()) 74 | saver.restore(sess, FLAGS.model_dir) 75 | 76 | dictionary = _load_dictionary(FLAGS.dictionary) 77 | 78 | # inference 79 | types= 'center'#'10crop' 80 | orig_stdout = sys.stdout 81 | f = open(FLAGS.result, 'w') 82 | sys.stdout = f 83 | with open(FLAGS.images, 'r') as lines: 84 | for line in lines: 85 | sp = line.rstrip('\n').split('\t') 86 | raw_img = cv.imread(sp[0]) 87 | if type(raw_img)==None or raw_img.data==None : 88 | print("open pic " + sp[0] + " failed") 89 | continue 90 | #imgs = preprocess(raw_img, types) 91 | img = preprocess(raw_img) 92 | logits, probs_topk, preds_topk = sess.run([logit, prob_topk, pred_topk], 93 | {images:np.expand_dims(img, axis=0)}) 94 | probs_topk = np.squeeze(probs_topk) 95 | preds_topk = np.squeeze(preds_topk) 96 | names_topk = [dictionary[str(i)] for i in preds_topk] 97 | print('+++ the predictions of {} is:'.format(sp[0])) 98 | for i, pred in enumerate(preds_topk): 99 | print('%d %s: %.3f' % (pred, names_topk[i], probs_topk[i])) 100 | sys.stdout = orig_stdout 101 | f.close() 102 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/tencent-ml-images/182631879cdb3d44d594d13d3f29a98bf7acdf81/models/__init__.py -------------------------------------------------------------------------------- /models/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/tencent-ml-images/182631879cdb3d44d594d13d3f29a98bf7acdf81/models/__init__.pyc -------------------------------------------------------------------------------- /models/resnet.py: -------------------------------------------------------------------------------- 1 | """ResNet model 2 | Related papers: 3 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 4 | Deep Residual Learning for Image Recognition. arXiv:1512.03385 5 | [2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 6 | Identity Mappings in Deep Residual Networks. arXiv: 1603.05027 7 | """ 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import sys 13 | sys.path.insert(0, '../') 14 | from flags import FLAGS 15 | import tensorflow as tf 16 | 17 | class ResNet(object): 18 | def __init__(self, images, is_training): 19 | """Net constructor 20 | Args: 21 | images: 4-D Tensor of images with Shape [batch_size, image_size, image_size, 3] 22 | is_training: bool, used in batch normalization 23 | Return: 24 | A wrapper For building model 25 | """ 26 | self.is_training = is_training 27 | self.filters = [256, 512, 1024, 2048] # feature map size for each stages 28 | self.strides = [2, 2, 2, 2] # conv strides for each stages's first block 29 | if FLAGS.resnet_size == 50: # resnet size paramters 30 | self.stages = [3, 4, 6, 3] 31 | elif FLAGS.resnet_size == 101: 32 | self.stages = [3, 4, 23, 3] 33 | elif FLAGS.resnet_size == 152: 34 | self.stages = [3, 8, 36, 3] 35 | else: 36 | raise ValueError('resnet_size %d Not implement:' % FLAGS.resnet_size) 37 | self.data_format = FLAGS.data_format 38 | self.num_classes = FLAGS.class_num 39 | self.images = images 40 | if self.data_format == "NCHW": 41 | self.images = tf.transpose(images, [0, 3, 1, 2]) 42 | 43 | 44 | def build_model(self): 45 | # Initial net 46 | with tf.variable_scope('init'): 47 | x = self.images 48 | x = self._pre_padding_conv('init_conv', x, 7, 64, 2) 49 | 50 | # 4 stages 51 | for i in range(0, len(self.stages)): 52 | with tf.variable_scope('stages_%d_block_%d' % (i,0)): 53 | x = self._bottleneck_residual( 54 | x, 55 | self.filters[i], 56 | self.strides[i], 57 | 'conv', 58 | self.is_training) 59 | for j in range(1, self.stages[i]): 60 | with tf.variable_scope('stages_%d_block_%d' % (i,j)): 61 | x = self._bottleneck_residual( 62 | x, 63 | self.filters[i], 64 | 1, 65 | 'identity', 66 | self.is_training) 67 | 68 | # class wise avg pool 69 | with tf.variable_scope('global_pool'): 70 | x = self._batch_norm('bn', x, self.is_training) 71 | x = self._relu(x) 72 | x = self._global_avg_pool(x) 73 | 74 | # extract features 75 | self.feat=x 76 | 77 | # logits 78 | with tf.variable_scope("logits"): 79 | self.logit = self._fully_connected(x, out_dim=self.num_classes) 80 | 81 | return self.logit 82 | 83 | def _bottleneck_residual(self, x, out_channel, strides, _type, is_training): 84 | """Residual Block 85 | Args: 86 | x : A 4-D tensor 87 | out_channels : out feature map size of residual block 88 | strides : conv strides of block 89 | _type: short cut type, 'conv' or 'identity' 90 | is_training : A Boolean for whether the model is in training or inference mdoel 91 | """ 92 | # short cut 93 | orig_x = x 94 | if _type=='conv': 95 | orig_x = self._batch_norm('conv1_b1_bn', orig_x, is_training) 96 | orig_x = self._relu(orig_x) 97 | orig_x = self._pre_padding_conv('conv1_b1', orig_x, 1, out_channel, strides) 98 | 99 | # bottleneck_residual_block 100 | x = self._batch_norm('conv1_b2_bn', x, is_training) 101 | x = self._relu(x) 102 | x = self._pre_padding_conv('conv1_b2', x, 1, out_channel/4, 1) 103 | x = self._batch_norm('conv2_b2_bn', x, is_training) 104 | x = self._relu(x) 105 | x = self._pre_padding_conv('conv2_b2', x, 3, out_channel/4, strides) 106 | x = self._batch_norm('conv3_b2_bn', x, is_training) 107 | x = self._relu(x) 108 | x = self._pre_padding_conv('conv3_b2', x, 1, out_channel, 1) 109 | 110 | # sum 111 | return x + orig_x 112 | 113 | def _batch_norm(self, name, x, is_training=True): 114 | """Batch normalization. 115 | Considering the performance, we use batch_normalization in contrib/layers/python/layers/layers.py 116 | instead of tf.nn.batch_normalization and set fused=True 117 | Args: 118 | x: input tensor 119 | is_training: Whether to return the output in training mode or in inference mode, use the argment 120 | in finetune 121 | """ 122 | with tf.variable_scope(name): 123 | return tf.layers.batch_normalization( 124 | inputs=x, 125 | axis=1 if self.data_format == 'NCHW' else 3, 126 | momentum = FLAGS.batch_norm_decay, 127 | epsilon = FLAGS.batch_norm_epsilon, 128 | center=True, 129 | scale=True, 130 | training=is_training, 131 | fused=True 132 | ) 133 | 134 | def _pre_padding(self, x, kernel_size): 135 | """Padding Based On Kernel_size""" 136 | pad_total = kernel_size - 1 137 | pad_beg = pad_total // 2 138 | pad_end = pad_total - pad_beg 139 | if self.data_format == 'NCHW': 140 | x = tf.pad(x, [[0, 0], [0, 0], [pad_beg, pad_end], [pad_beg, pad_end]]) 141 | else: 142 | x = tf.pad(x, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]) 143 | return x 144 | 145 | def _pre_padding_conv(self, name, x, kernel_size, out_channels, strides, bias=False): 146 | """Convolution 147 | As the way of padding in conv is depended on input size and kernel size, which is very different with caffe 148 | So we will do pre-padding to Align the padding operation. 149 | Args: 150 | x : A 4-D tensor 151 | kernel_size : size of kernel, here we just use square conv kernel 152 | out_channels : out feature map size 153 | strides : conv stride 154 | bias : bias may always be false 155 | """ 156 | if strides > 1: 157 | x = self._pre_padding(x, kernel_size) 158 | with tf.variable_scope(name): 159 | return tf.layers.conv2d( 160 | inputs = x, 161 | filters = out_channels, 162 | kernel_size=kernel_size, 163 | strides=strides, 164 | padding=('SAME' if strides == 1 else 'VALID'), 165 | use_bias=bias, 166 | kernel_initializer=tf.variance_scaling_initializer(), 167 | data_format= 'channels_first' if self.data_format == 'NCHW' else 'channels_last') 168 | 169 | def _relu(self, x, leakiness=0.0): 170 | """ 171 | Relu. With optical leakiness support 172 | Note: if leakiness set zero, we will use tf.nn.relu for concern about performance 173 | Args: 174 | x : A 4-D tensor 175 | leakiness : slope when x < 0 176 | """ 177 | if leakiness==0.0: 178 | return tf.nn.relu(x) 179 | else: 180 | return tf.where(tf.less(x, 0.0), leakiness * x, x, name='leaky_relu') 181 | 182 | def _global_avg_pool(self, x): 183 | """ 184 | Global Average Pool, for concern about performance we use tf.reduce_mean 185 | instead of tf.layers.average_pooling2d 186 | Args: 187 | x: 4-D Tensor 188 | """ 189 | assert x.get_shape().ndims == 4 190 | axes = [2, 3] if self.data_format == 'NCHW' else [1, 2] 191 | return tf.reduce_mean(x, axes, keep_dims=True) 192 | 193 | def _fully_connected(self, x, out_dim): 194 | """ 195 | As tf.layers.dense need 2-D tensor, reshape it first 196 | Args: 197 | x : 4-D Tensor 198 | out_dim : dimensionality of the output space. 199 | """ 200 | assert x.get_shape().ndims == 4 201 | axes = 1 if self.data_format == 'NCHW' else -1 202 | x = tf.reshape(x, shape=[-1, x.get_shape()[axes]]) 203 | return tf.layers.dense(x, units = out_dim) 204 | -------------------------------------------------------------------------------- /models/resnet.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/tencent-ml-images/182631879cdb3d44d594d13d3f29a98bf7acdf81/models/resnet.pyc -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tencent is pleased to support the open source community by making Tencent ML-Images available. 3 | Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. 4 | Licensed under the BSD 3-Clause License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 5 | https://opensource.org/licenses/BSD-3-Clause 6 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 7 | """ 8 | 9 | 10 | """Runs a ResNet model on the ImageNet dataset.""" 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import os 16 | import sys 17 | import math 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | from data_processing import dataset as file_db 22 | from data_processing import image_preprocessing as image_preprocess 23 | from models import resnet as resnet 24 | from flags import FLAGS 25 | 26 | def record_parser_fn(value, is_training): 27 | """Parse an image record from `value`.""" 28 | keys_to_features = { 29 | 'width': tf.FixedLenFeature([], dtype=tf.int64, default_value=0), 30 | 'height': tf.FixedLenFeature([], dtype=tf.int64, default_value=0), 31 | 'image': tf.FixedLenFeature([], dtype=tf.string, default_value=''), 32 | 'label': tf.FixedLenFeature([], dtype=tf.string, default_value=''), 33 | 'name': tf.FixedLenFeature([], dtype=tf.string, default_value='') 34 | } 35 | 36 | parsed = tf.parse_single_example(value, keys_to_features) 37 | 38 | image = tf.image.decode_image(tf.reshape(parsed['image'], shape=[]), 39 | FLAGS.image_channels) 40 | image = tf.image.convert_image_dtype(image, dtype=tf.float32) 41 | 42 | bbox = tf.concat(axis=0, values=[ [[]], [[]], [[]], [[]] ]) 43 | bbox = tf.transpose(tf.expand_dims(bbox, 0), [0, 2, 1]) 44 | image = image_preprocess.preprocess_image( 45 | image=image, 46 | output_height=FLAGS.image_size, 47 | output_width=FLAGS.image_size, 48 | object_cover=0.7, 49 | area_cover=0.7, 50 | is_training=is_training, 51 | bbox=bbox) 52 | 53 | label = tf.reshape(tf.decode_raw(parsed['label'], tf.float32), shape=[FLAGS.class_num,]) 54 | 55 | return image, label 56 | 57 | def input_fn(is_training, data_dir, batch_size, num_epochs=1): 58 | """Input function which provides batches for train or eval.""" 59 | dataset = None 60 | if is_training: 61 | dataset = file_db.Dataset(os.path.join(data_dir, 'train')) 62 | else: 63 | dataset = file_db.Dataset(os.path.join(data_dir, 'val')) 64 | 65 | worker_id = 0 66 | worker_num = 1 67 | 68 | dataset = tf.data.Dataset.from_tensor_slices(dataset.data_files()) 69 | 70 | # divide the dataset 71 | if is_training: 72 | dataset = dataset.shuffle(buffer_size=FLAGS.file_shuffle_buffer, seed=worker_num) 73 | dataset = dataset.shard(worker_num, worker_id) 74 | 75 | dataset = dataset.flat_map(tf.data.TFRecordDataset) 76 | dataset = dataset.map(lambda value: record_parser_fn(value, is_training), 77 | num_parallel_calls=5) 78 | dataset = dataset.prefetch(batch_size) 79 | 80 | if is_training: 81 | # When choosing shuffle buffer sizes, larger sizes result in better 82 | # randomness, while smaller sizes have better performance. 83 | # dataset = dataset.shuffle(buffer_size=_SHUFFLE_BUFFER, seed=worker_id) 84 | dataset = dataset.shuffle(buffer_size=FLAGS.shuffle_buffer) 85 | 86 | # We call repeat after shuffling, rather than before, to prevent separate 87 | # epochs from blending together. 88 | dataset = dataset.repeat() 89 | 90 | dataset = dataset.batch(batch_size) 91 | iterator = dataset.make_one_shot_iterator() 92 | images, labels = iterator.get_next() 93 | return images, labels 94 | 95 | def resnet_model_fn(features, labels, mode, params): 96 | """Our model_fn for ResNet to be used with our Estimator.""" 97 | tf.summary.image('images', features, max_outputs=6) 98 | 99 | # build model 100 | net = resnet.ResNet(features, is_training=(mode == tf.estimator.ModeKeys.TRAIN)) 101 | logits = net.build_model() 102 | predictions = { 103 | 'classes': tf.argmax(logits, axis=1), 104 | 'probabilities': tf.nn.softmax(logits, name='softmax_tensor') 105 | } 106 | 107 | if mode == tf.estimator.ModeKeys.PREDICT: 108 | return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) 109 | 110 | # Calculate loss, which includes softmax cross entropy and L2 regularization. 111 | # a. get loss coeficiente 112 | pos_mask = tf.reduce_sum( 113 | tf.cast( 114 | tf.greater_equal( 115 | labels, tf.fill(tf.shape(labels), FLAGS.mask_thres)), 116 | tf.float32), 117 | 0) 118 | pos_curr_count = tf.cast(tf.greater( pos_mask, 0), tf.float32) 119 | neg_curr_count = tf.cast(tf.less_equal(pos_mask, 0), tf.float32) 120 | pos_count = tf.Variable(tf.zeros(shape=[FLAGS.class_num,]), trainable=False) 121 | neg_count = tf.Variable(tf.zeros(shape=[FLAGS.class_num,]), trainable=False) 122 | neg_select = tf.cast( 123 | tf.less_equal( 124 | tf.random_uniform( 125 | shape=[FLAGS.class_num,], 126 | minval=0, maxval=1, 127 | seed = FLAGS.random_seed), 128 | FLAGS.neg_select), 129 | tf.float32) 130 | tf.summary.histogram('pos_curr_count', pos_curr_count) 131 | tf.summary.histogram('neg_curr_count', neg_curr_count) 132 | tf.summary.histogram('neg_select', neg_select) 133 | with tf.control_dependencies([pos_curr_count, neg_curr_count, neg_select]): 134 | pos_count = tf.assign_sub( 135 | tf.assign_add(pos_count, pos_curr_count), 136 | tf.multiply(pos_count, neg_curr_count)) 137 | neg_count = tf.assign_sub( 138 | tf.assign_add(neg_count, tf.multiply(neg_curr_count, neg_select)), 139 | tf.multiply(neg_count, pos_curr_count)) 140 | tf.summary.histogram('pos_count', pos_count) 141 | tf.summary.histogram('neg_count', neg_count) 142 | pos_loss_coef = -1 * (tf.log((0.01 + pos_count)/10)/tf.log(10.0)) 143 | pos_loss_coef = tf.where( 144 | tf.greater(pos_loss_coef, tf.fill(tf.shape(pos_loss_coef), 0.01)), 145 | pos_loss_coef, 146 | tf.fill(tf.shape(pos_loss_coef), 0.01)) 147 | pos_loss_coef = tf.multiply(pos_loss_coef, pos_curr_count) 148 | tf.summary.histogram('pos_loss_coef', pos_loss_coef) 149 | neg_loss_coef = -1 * (tf.log((8 + neg_count)/10)/tf.log(10.0)) 150 | neg_loss_coef = tf.where( 151 | tf.greater(neg_loss_coef, tf.fill(tf.shape(neg_loss_coef), 0.01)), 152 | neg_loss_coef, 153 | tf.fill(tf.shape(neg_loss_coef), 0.001)) 154 | neg_loss_coef = tf.multiply(neg_loss_coef, tf.multiply(neg_curr_count, neg_select)) 155 | tf.summary.histogram('neg_loss_coef', neg_loss_coef) 156 | loss_coef = tf.add(pos_loss_coef, neg_loss_coef) 157 | tf.summary.histogram('loss_coef', loss_coef) 158 | 159 | # b. get non-negative mask 160 | non_neg_mask = tf.fill(tf.shape(labels), -1.0, name='non_neg') 161 | non_neg_mask = tf.cast(tf.not_equal(labels, non_neg_mask), tf.float32) 162 | tf.summary.histogram('non_neg', non_neg_mask) 163 | 164 | # cal loss 165 | cross_entropy = tf.nn.weighted_cross_entropy_with_logits( 166 | logits=logits, targets=labels, pos_weight=12, name='sigmod_cross_entropy') 167 | tf.summary.histogram('sigmod_ce', cross_entropy) 168 | cross_entropy_cost = tf.reduce_sum(tf.reduce_mean(cross_entropy * non_neg_mask, axis=0) * loss_coef) 169 | 170 | # Create a tensor named cross_entropy for logging purposes. 171 | tf.identity(cross_entropy_cost, name='cross_entropy') 172 | tf.summary.scalar('cross_entropy', cross_entropy_cost) 173 | 174 | # Add weight decay to the loss. We exclude the batch norm variables because 175 | # doing so leads to a small improvement in accuracy. 176 | loss = cross_entropy_cost + FLAGS.weight_decay * tf.add_n( 177 | [tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'batch_normalization' not in v.name]) 178 | 179 | if mode == tf.estimator.ModeKeys.TRAIN: 180 | # Scale the learning rate linearly with the batch size. When the batch size 181 | # is 256, the learning rate should be 0.1. 182 | lr_warmup = FLAGS.lr_warmup 183 | warmup_step = FLAGS.warmup 184 | warmup_decay_step = FLAGS.lr_warmup_decay_step 185 | warmup_decay_factor = FLAGS.lr_warmup_decay_factor 186 | global_step = tf.train.get_or_create_global_step() 187 | boundaries = [ 188 | int(FLAGS.lr_decay_step * epoch) for epoch in [1, 2, 3, 4]] 189 | values = [ 190 | FLAGS.lr * decay for decay in [1, 0.1, 0.01, 1e-3, 1e-4]] 191 | learning_rate = tf.train.piecewise_constant( 192 | tf.cast(global_step, tf.int32), boundaries, values) 193 | 194 | # Linear Scaling Rule and Gradual Warmup 195 | lr = tf.cond( 196 | global_step < warmup_step, 197 | lambda: tf.train.exponential_decay( 198 | lr_warmup, 199 | global_step, 200 | warmup_decay_step, 201 | warmup_decay_factor, 202 | staircase=True 203 | ), 204 | lambda: learning_rate 205 | ) 206 | 207 | # Create a tensor named learning_rate for logging purposes. 208 | tf.identity(lr, name='learning_rate') 209 | tf.summary.scalar('learning_rate', lr) 210 | 211 | optimizer = tf.train.MomentumOptimizer( 212 | learning_rate=lr, 213 | momentum=FLAGS.opt_momentum) 214 | 215 | # Batch norm requires update_ops to be added as a train_op dependency. 216 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 217 | with tf.control_dependencies(update_ops): 218 | train_op = optimizer.minimize(loss, global_step) 219 | else: 220 | train_op = None 221 | 222 | # Build evaluate metrics 223 | accuracy = tf.metrics.accuracy( 224 | tf.argmax(labels, axis=1), predictions['classes']) 225 | metrics = {'accuracy': accuracy} 226 | tf.identity(accuracy[1], name='train_accuracy') 227 | tf.summary.scalar('train_accuracy', accuracy[1]) 228 | 229 | return tf.estimator.EstimatorSpec( 230 | mode=mode, 231 | predictions=predictions, 232 | loss=loss, 233 | train_op=train_op, 234 | eval_metric_ops=metrics) 235 | 236 | def main(_): 237 | # Using the Winograd non-fused algorithms provides a small performance boost. 238 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' 239 | 240 | config = tf.ConfigProto() 241 | config.gpu_options.allow_growth = True 242 | config.gpu_options.visible_device_list = str(FLAGS.visiable_gpu) 243 | 244 | model_path = FLAGS.model_dir 245 | max_ckp_num = (FLAGS.max_to_keep) 246 | run_config = tf.estimator.RunConfig(save_checkpoints_steps=FLAGS.snapshot, 247 | keep_checkpoint_max=max_ckp_num, 248 | session_config=config, 249 | save_summary_steps=FLAGS.log_interval) 250 | resnet_classifier = tf.estimator.Estimator( 251 | model_fn=resnet_model_fn, 252 | model_dir=model_path, 253 | config=run_config, 254 | params={ 255 | 'resnet_size': FLAGS.resnet_size, 256 | 'data_format': FLAGS.data_format, 257 | 'batch_size': FLAGS.batch_size, 258 | } 259 | ) 260 | tensors_to_log = { 261 | 'learning_rate': 'learning_rate', 262 | 'cross_entropy': 'cross_entropy', 263 | 'train_accuracy': 'train_accuracy' 264 | } 265 | 266 | logging_hook = tf.train.LoggingTensorHook( 267 | tensors=tensors_to_log, every_n_iter=FLAGS.log_interval, at_end=True) 268 | 269 | print('Total run steps = {}'.format(FLAGS.max_iter)) 270 | hook_list = [logging_hook] 271 | resnet_classifier.train( 272 | input_fn=lambda: input_fn(True, FLAGS.data_dir, FLAGS.batch_size), 273 | steps=FLAGS.max_iter, 274 | hooks=hook_list 275 | ) 276 | 277 | if __name__ == '__main__': 278 | tf.logging.set_verbosity(tf.logging.INFO) 279 | tf.app.run() 280 | --------------------------------------------------------------------------------