├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Docker ├── Dockerfile ├── README.md ├── build.sh └── run.sh ├── LICENSE ├── README.md ├── __init__.py ├── clustering.py ├── download_model.sh ├── eval_linear.py ├── eval_linear.sh ├── eval_retrieval.py ├── eval_retrieval.sh ├── eval_voc_classif.py ├── eval_voc_classif.sh ├── eval_voc_classif_all.sh ├── eval_voc_classif_fc6_8.sh ├── main.py ├── main.sh ├── models ├── __init__.py ├── alexnet.py └── vgg16.py ├── util.py └── visu ├── activ-retrieval.py ├── activ-retrieval.sh ├── gradient_ascent.py └── gradient_ascent.sh /.gitignore: -------------------------------------------------------------------------------- 1 | **/.*.swp 2 | *.ipynb 3 | **/*.pyc 4 | *.pyc 5 | .ipynb_checkpoints 6 | third-parties/ 7 | third-parties/* 8 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to. 4 | Please read the [full text](https://code.fb.com/codeofconduct/) 5 | so that you can understand what actions will and will not be tolerated. 6 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | In the context of this project, we do not expect pull requests. 4 | If you find a bug, or would like to suggest an improvement, please open an issue. 5 | -------------------------------------------------------------------------------- /Docker/Dockerfile: -------------------------------------------------------------------------------- 1 | #FROM ubuntu:14.04 2 | #FROM anibali/pytorch:cuda-8.0 3 | FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04 4 | 5 | WORKDIR /usr/src/app 6 | 7 | RUN apt-get update && apt-get install -y \ 8 | python2.7 \ 9 | python-pip \ 10 | git \ 11 | vim \ 12 | wget \ 13 | curl \ 14 | cmake \ 15 | build-essential 16 | 17 | RUN curl -o miniconda.sh -O https://repo.anaconda.com/miniconda/Miniconda2-latest-Linux-x86_64.sh &&\ 18 | chmod +x miniconda.sh &&\ 19 | ./miniconda.sh -b -p /opt/conda &&\ 20 | rm miniconda.sh 21 | 22 | RUN export PATH=$PATH:/opt/conda/bin &&\ 23 | conda create -n pytorch27 python=2 24 | 25 | # Make RUN commands use the new environment: 26 | SHELL ["conda", "run", "-n", "pytorch27", "/bin/bash", "-c"] 27 | ENV PATH="/opt/conda/bin:${PATH}" 28 | 29 | RUN export CMAKE_PREFIX_PATH=/opt/conda/ &&\ 30 | conda install numpy mkl setuptools cmake cffi scikit-learn &&\ 31 | apt-get -y install gcc libblas-dev liblapack-dev &&\ 32 | conda install -c soumith magma-cuda80 &&\ 33 | pip install torchvision==0.1.8 34 | 35 | RUN wget https://github.com/pytorch/pytorch/archive/v0.1.8.tar.gz &&\ 36 | tar -xzf v0.1.8.tar.gz &&\ 37 | rm v0.1.8.tar.gz &&\ 38 | cd pytorch-0.1.8 &&\ 39 | pip install -r requirements.txt &&\ 40 | python setup.py install 41 | 42 | RUN wget https://github.com/facebookresearch/faiss/archive/v1.3.0.tar.gz &&\ 43 | tar -xzf v1.3.0.tar.gz &&\ 44 | rm v1.3.0.tar.gz &&\ 45 | cd faiss-1.3.0 &&\ 46 | ./configure &&\ 47 | make &&\ 48 | make install &&\ 49 | cd gpu &&\ 50 | make -j &&\ 51 | cd ../python &&\ 52 | make _swigfaiss_gpu.so &&\ 53 | cd ../ &&\ 54 | make py &&\ 55 | conda init bash 56 | 57 | RUN pip install bpython future 58 | RUN echo "conda activate pytorch27" >> /root/.bashrc 59 | 60 | ENV PYTHONPATH="/usr/src/app/faiss-1.3.0/python/:${PYTHONPATH}" 61 | COPY . . 62 | -------------------------------------------------------------------------------- /Docker/README.md: -------------------------------------------------------------------------------- 1 | # Installation Using Docker 2 | 3 | 1. Run `bash build.sh` 4 | 1. Run `bash run.sh` 5 | -------------------------------------------------------------------------------- /Docker/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker build . -t torch 4 | -------------------------------------------------------------------------------- /Docker/run.sh: -------------------------------------------------------------------------------- 1 | docker run --gpus all -it --name deepcluster torch /bin/bash 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Attribution-NonCommercial 4.0 International 2 | 3 | ======================================================================= 4 | 5 | Creative Commons Corporation ("Creative Commons") is not a law firm and 6 | does not provide legal services or legal advice. Distribution of 7 | Creative Commons public licenses does not create a lawyer-client or 8 | other relationship. Creative Commons makes its licenses and related 9 | information available on an "as-is" basis. Creative Commons gives no 10 | warranties regarding its licenses, any material licensed under their 11 | terms and conditions, or any related information. Creative Commons 12 | disclaims all liability for damages resulting from their use to the 13 | fullest extent possible. 14 | 15 | Using Creative Commons Public Licenses 16 | 17 | Creative Commons public licenses provide a standard set of terms and 18 | conditions that creators and other rights holders may use to share 19 | original works of authorship and other material subject to copyright 20 | and certain other rights specified in the public license below. The 21 | following considerations are for informational purposes only, are not 22 | exhaustive, and do not form part of our licenses. 23 | 24 | Considerations for licensors: Our public licenses are 25 | intended for use by those authorized to give the public 26 | permission to use material in ways otherwise restricted by 27 | copyright and certain other rights. Our licenses are 28 | irrevocable. Licensors should read and understand the terms 29 | and conditions of the license they choose before applying it. 30 | Licensors should also secure all rights necessary before 31 | applying our licenses so that the public can reuse the 32 | material as expected. Licensors should clearly mark any 33 | material not subject to the license. This includes other CC- 34 | licensed material, or material used under an exception or 35 | limitation to copyright. More considerations for licensors: 36 | wiki.creativecommons.org/Considerations_for_licensors 37 | 38 | Considerations for the public: By using one of our public 39 | licenses, a licensor grants the public permission to use the 40 | licensed material under specified terms and conditions. If 41 | the licensor's permission is not necessary for any reason--for 42 | example, because of any applicable exception or limitation to 43 | copyright--then that use is not regulated by the license. Our 44 | licenses grant only permissions under copyright and certain 45 | other rights that a licensor has authority to grant. Use of 46 | the licensed material may still be restricted for other 47 | reasons, including because others have copyright or other 48 | rights in the material. A licensor may make special requests, 49 | such as asking that all changes be marked or described. 50 | Although not required by our licenses, you are encouraged to 51 | respect those requests where reasonable. More_considerations 52 | for the public: 53 | wiki.creativecommons.org/Considerations_for_licensees 54 | 55 | ======================================================================= 56 | 57 | Creative Commons Attribution-NonCommercial 4.0 International Public 58 | License 59 | 60 | By exercising the Licensed Rights (defined below), You accept and agree 61 | to be bound by the terms and conditions of this Creative Commons 62 | Attribution-NonCommercial 4.0 International Public License ("Public 63 | License"). To the extent this Public License may be interpreted as a 64 | contract, You are granted the Licensed Rights in consideration of Your 65 | acceptance of these terms and conditions, and the Licensor grants You 66 | such rights in consideration of benefits the Licensor receives from 67 | making the Licensed Material available under these terms and 68 | conditions. 69 | 70 | Section 1 -- Definitions. 71 | 72 | a. Adapted Material means material subject to Copyright and Similar 73 | Rights that is derived from or based upon the Licensed Material 74 | and in which the Licensed Material is translated, altered, 75 | arranged, transformed, or otherwise modified in a manner requiring 76 | permission under the Copyright and Similar Rights held by the 77 | Licensor. For purposes of this Public License, where the Licensed 78 | Material is a musical work, performance, or sound recording, 79 | Adapted Material is always produced where the Licensed Material is 80 | synched in timed relation with a moving image. 81 | 82 | b. Adapter's License means the license You apply to Your Copyright 83 | and Similar Rights in Your contributions to Adapted Material in 84 | accordance with the terms and conditions of this Public License. 85 | 86 | c. Copyright and Similar Rights means copyright and/or similar rights 87 | closely related to copyright including, without limitation, 88 | performance, broadcast, sound recording, and Sui Generis Database 89 | Rights, without regard to how the rights are labeled or 90 | categorized. For purposes of this Public License, the rights 91 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 92 | Rights. 93 | d. Effective Technological Measures means those measures that, in the 94 | absence of proper authority, may not be circumvented under laws 95 | fulfilling obligations under Article 11 of the WIPO Copyright 96 | Treaty adopted on December 20, 1996, and/or similar international 97 | agreements. 98 | 99 | e. Exceptions and Limitations means fair use, fair dealing, and/or 100 | any other exception or limitation to Copyright and Similar Rights 101 | that applies to Your use of the Licensed Material. 102 | 103 | f. Licensed Material means the artistic or literary work, database, 104 | or other material to which the Licensor applied this Public 105 | License. 106 | 107 | g. Licensed Rights means the rights granted to You subject to the 108 | terms and conditions of this Public License, which are limited to 109 | all Copyright and Similar Rights that apply to Your use of the 110 | Licensed Material and that the Licensor has authority to license. 111 | 112 | h. Licensor means the individual(s) or entity(ies) granting rights 113 | under this Public License. 114 | 115 | i. NonCommercial means not primarily intended for or directed towards 116 | commercial advantage or monetary compensation. For purposes of 117 | this Public License, the exchange of the Licensed Material for 118 | other material subject to Copyright and Similar Rights by digital 119 | file-sharing or similar means is NonCommercial provided there is 120 | no payment of monetary compensation in connection with the 121 | exchange. 122 | 123 | j. Share means to provide material to the public by any means or 124 | process that requires permission under the Licensed Rights, such 125 | as reproduction, public display, public performance, distribution, 126 | dissemination, communication, or importation, and to make material 127 | available to the public including in ways that members of the 128 | public may access the material from a place and at a time 129 | individually chosen by them. 130 | 131 | k. Sui Generis Database Rights means rights other than copyright 132 | resulting from Directive 96/9/EC of the European Parliament and of 133 | the Council of 11 March 1996 on the legal protection of databases, 134 | as amended and/or succeeded, as well as other essentially 135 | equivalent rights anywhere in the world. 136 | 137 | l. You means the individual or entity exercising the Licensed Rights 138 | under this Public License. Your has a corresponding meaning. 139 | 140 | Section 2 -- Scope. 141 | 142 | a. License grant. 143 | 144 | 1. Subject to the terms and conditions of this Public License, 145 | the Licensor hereby grants You a worldwide, royalty-free, 146 | non-sublicensable, non-exclusive, irrevocable license to 147 | exercise the Licensed Rights in the Licensed Material to: 148 | 149 | a. reproduce and Share the Licensed Material, in whole or 150 | in part, for NonCommercial purposes only; and 151 | 152 | b. produce, reproduce, and Share Adapted Material for 153 | NonCommercial purposes only. 154 | 155 | 2. Exceptions and Limitations. For the avoidance of doubt, where 156 | Exceptions and Limitations apply to Your use, this Public 157 | License does not apply, and You do not need to comply with 158 | its terms and conditions. 159 | 160 | 3. Term. The term of this Public License is specified in Section 161 | 6(a). 162 | 163 | 4. Media and formats; technical modifications allowed. The 164 | Licensor authorizes You to exercise the Licensed Rights in 165 | all media and formats whether now known or hereafter created, 166 | and to make technical modifications necessary to do so. The 167 | Licensor waives and/or agrees not to assert any right or 168 | authority to forbid You from making technical modifications 169 | necessary to exercise the Licensed Rights, including 170 | technical modifications necessary to circumvent Effective 171 | Technological Measures. For purposes of this Public License, 172 | simply making modifications authorized by this Section 2(a) 173 | (4) never produces Adapted Material. 174 | 175 | 5. Downstream recipients. 176 | 177 | a. Offer from the Licensor -- Licensed Material. Every 178 | recipient of the Licensed Material automatically 179 | receives an offer from the Licensor to exercise the 180 | Licensed Rights under the terms and conditions of this 181 | Public License. 182 | 183 | b. No downstream restrictions. You may not offer or impose 184 | any additional or different terms or conditions on, or 185 | apply any Effective Technological Measures to, the 186 | Licensed Material if doing so restricts exercise of the 187 | Licensed Rights by any recipient of the Licensed 188 | Material. 189 | 190 | 6. No endorsement. Nothing in this Public License constitutes or 191 | may be construed as permission to assert or imply that You 192 | are, or that Your use of the Licensed Material is, connected 193 | with, or sponsored, endorsed, or granted official status by, 194 | the Licensor or others designated to receive attribution as 195 | provided in Section 3(a)(1)(A)(i). 196 | 197 | b. Other rights. 198 | 199 | 1. Moral rights, such as the right of integrity, are not 200 | licensed under this Public License, nor are publicity, 201 | privacy, and/or other similar personality rights; however, to 202 | the extent possible, the Licensor waives and/or agrees not to 203 | assert any such rights held by the Licensor to the limited 204 | extent necessary to allow You to exercise the Licensed 205 | Rights, but not otherwise. 206 | 207 | 2. Patent and trademark rights are not licensed under this 208 | Public License. 209 | 210 | 3. To the extent possible, the Licensor waives any right to 211 | collect royalties from You for the exercise of the Licensed 212 | Rights, whether directly or through a collecting society 213 | under any voluntary or waivable statutory or compulsory 214 | licensing scheme. In all other cases the Licensor expressly 215 | reserves any right to collect such royalties, including when 216 | the Licensed Material is used other than for NonCommercial 217 | purposes. 218 | 219 | Section 3 -- License Conditions. 220 | 221 | Your exercise of the Licensed Rights is expressly made subject to the 222 | following conditions. 223 | 224 | a. Attribution. 225 | 226 | 1. If You Share the Licensed Material (including in modified 227 | form), You must: 228 | 229 | a. retain the following if it is supplied by the Licensor 230 | with the Licensed Material: 231 | 232 | i. identification of the creator(s) of the Licensed 233 | Material and any others designated to receive 234 | attribution, in any reasonable manner requested by 235 | the Licensor (including by pseudonym if 236 | designated); 237 | 238 | ii. a copyright notice; 239 | 240 | iii. a notice that refers to this Public License; 241 | 242 | iv. a notice that refers to the disclaimer of 243 | warranties; 244 | 245 | v. a URI or hyperlink to the Licensed Material to the 246 | extent reasonably practicable; 247 | 248 | b. indicate if You modified the Licensed Material and 249 | retain an indication of any previous modifications; and 250 | 251 | c. indicate the Licensed Material is licensed under this 252 | Public License, and include the text of, or the URI or 253 | hyperlink to, this Public License. 254 | 255 | 2. You may satisfy the conditions in Section 3(a)(1) in any 256 | reasonable manner based on the medium, means, and context in 257 | which You Share the Licensed Material. For example, it may be 258 | reasonable to satisfy the conditions by providing a URI or 259 | hyperlink to a resource that includes the required 260 | information. 261 | 262 | 3. If requested by the Licensor, You must remove any of the 263 | information required by Section 3(a)(1)(A) to the extent 264 | reasonably practicable. 265 | 266 | 4. If You Share Adapted Material You produce, the Adapter's 267 | License You apply must not prevent recipients of the Adapted 268 | Material from complying with this Public License. 269 | 270 | Section 4 -- Sui Generis Database Rights. 271 | 272 | Where the Licensed Rights include Sui Generis Database Rights that 273 | apply to Your use of the Licensed Material: 274 | 275 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 276 | to extract, reuse, reproduce, and Share all or a substantial 277 | portion of the contents of the database for NonCommercial purposes 278 | only; 279 | 280 | b. if You include all or a substantial portion of the database 281 | contents in a database in which You have Sui Generis Database 282 | Rights, then the database in which You have Sui Generis Database 283 | Rights (but not its individual contents) is Adapted Material; and 284 | 285 | c. You must comply with the conditions in Section 3(a) if You Share 286 | all or a substantial portion of the contents of the database. 287 | 288 | For the avoidance of doubt, this Section 4 supplements and does not 289 | replace Your obligations under this Public License where the Licensed 290 | Rights include other Copyright and Similar Rights. 291 | 292 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 293 | 294 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 295 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 296 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 297 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 298 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 299 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 300 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 301 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 302 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 303 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 304 | 305 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 306 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 307 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 308 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 309 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 310 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 311 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 312 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 313 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 314 | 315 | c. The disclaimer of warranties and limitation of liability provided 316 | above shall be interpreted in a manner that, to the extent 317 | possible, most closely approximates an absolute disclaimer and 318 | waiver of all liability. 319 | 320 | Section 6 -- Term and Termination. 321 | 322 | a. This Public License applies for the term of the Copyright and 323 | Similar Rights licensed here. However, if You fail to comply with 324 | this Public License, then Your rights under this Public License 325 | terminate automatically. 326 | 327 | b. Where Your right to use the Licensed Material has terminated under 328 | Section 6(a), it reinstates: 329 | 330 | 1. automatically as of the date the violation is cured, provided 331 | it is cured within 30 days of Your discovery of the 332 | violation; or 333 | 334 | 2. upon express reinstatement by the Licensor. 335 | 336 | For the avoidance of doubt, this Section 6(b) does not affect any 337 | right the Licensor may have to seek remedies for Your violations 338 | of this Public License. 339 | 340 | c. For the avoidance of doubt, the Licensor may also offer the 341 | Licensed Material under separate terms or conditions or stop 342 | distributing the Licensed Material at any time; however, doing so 343 | will not terminate this Public License. 344 | 345 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 346 | License. 347 | 348 | Section 7 -- Other Terms and Conditions. 349 | 350 | a. The Licensor shall not be bound by any additional or different 351 | terms or conditions communicated by You unless expressly agreed. 352 | 353 | b. Any arrangements, understandings, or agreements regarding the 354 | Licensed Material not stated herein are separate from and 355 | independent of the terms and conditions of this Public License. 356 | 357 | Section 8 -- Interpretation. 358 | 359 | a. For the avoidance of doubt, this Public License does not, and 360 | shall not be interpreted to, reduce, limit, restrict, or impose 361 | conditions on any use of the Licensed Material that could lawfully 362 | be made without permission under this Public License. 363 | 364 | b. To the extent possible, if any provision of this Public License is 365 | deemed unenforceable, it shall be automatically reformed to the 366 | minimum extent necessary to make it enforceable. If the provision 367 | cannot be reformed, it shall be severed from this Public License 368 | without affecting the enforceability of the remaining terms and 369 | conditions. 370 | 371 | c. No term or condition of this Public License will be waived and no 372 | failure to comply consented to unless expressly agreed to by the 373 | Licensor. 374 | 375 | d. Nothing in this Public License constitutes or may be interpreted 376 | as a limitation upon, or waiver of, any privileges and immunities 377 | that apply to the Licensor or You, including from the legal 378 | processes of any jurisdiction or authority. 379 | 380 | ======================================================================= 381 | 382 | Creative Commons is not a party to its public 383 | licenses. Notwithstanding, Creative Commons may elect to apply one of 384 | its public licenses to material it publishes and in those instances 385 | will be considered the “Licensor.” The text of the Creative Commons 386 | public licenses is dedicated to the public domain under the CC0 Public 387 | Domain Dedication. Except for the limited purpose of indicating that 388 | material is shared under a Creative Commons public license or as 389 | otherwise permitted by the Creative Commons policies published at 390 | creativecommons.org/policies, Creative Commons does not authorize the 391 | use of the trademark "Creative Commons" or any other trademark or logo 392 | of Creative Commons without its prior written consent including, 393 | without limitation, in connection with any unauthorized modifications 394 | to any of its public licenses or any other arrangements, 395 | understandings, or agreements concerning use of licensed material. For 396 | the avoidance of doubt, this paragraph does not form part of the 397 | public licenses. 398 | 399 | Creative Commons may be contacted at creativecommons.org. 400 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Clustering for Unsupervised Learning of Visual Features 2 | 3 | ## News 4 | We release [paper](https://arxiv.org/abs/2006.09882) and [code](https://github.com/facebookresearch/swav) for SwAV, our new self-supervised method. 5 | SwAV pushes self-supervised learning to only 1.2% away from supervised learning on ImageNet with a ResNet-50! 6 | It combines online clustering with a multi-crop data augmentation. 7 | 8 | We also present DeepCluster-v2, which is an improved version of DeepCluster (ResNet-50, better data augmentation, cosine learning rate schedule, MLP projection head, use of centroids, ...). 9 | Check out [DeepCluster-v2 code](https://github.com/facebookresearch/swav/blob/master/main_deepclusterv2.py). 10 | 11 | ## DeepCluster 12 | This code implements the unsupervised training of convolutional neural networks, or convnets, as described in the paper [Deep Clustering for Unsupervised Learning of Visual Features](https://arxiv.org/abs/1807.05520). 13 | 14 | Moreover, we provide the evaluation protocol codes we used in the paper: 15 | * Pascal VOC classification 16 | * Linear classification on activations 17 | * Instance-level image retrieval 18 | 19 | Finally, this code also includes a visualisation module that allows to assess visually the quality of the learned features. 20 | 21 | ## Requirements 22 | 23 | - a Python installation version 2.7 24 | - the SciPy and scikit-learn packages 25 | - a PyTorch install version 0.1.8 ([pytorch.org](http://pytorch.org)) 26 | - CUDA 8.0 27 | - a Faiss install ([Faiss](https://github.com/facebookresearch/faiss)) 28 | - The ImageNet dataset (which can be automatically downloaded by recent version of [torchvision](https://pytorch.org/docs/stable/torchvision/datasets.html#imagenet)) 29 | 30 | ## Pre-trained models 31 | We provide pre-trained models with AlexNet and VGG-16 architectures, available for download. 32 | * The models in Caffe format expect BGR inputs that range in [0, 255]. You do not need to subtract the per-color-channel mean image since the preprocessing of the data is already included in our released models. 33 | * The models in PyTorch format expect RGB inputs that range in [0, 1]. You should preprocessed your data before passing them to the released models by normalizing them: ```mean_rgb = [0.485, 0.456, 0.406]```; ```std_rgb = [0.229, 0.224, 0.225] ``` 34 | Note that in all our released models, sobel filters are computed within the models as two convolutional layers (greyscale + sobel filters). 35 | 36 | You can download all variants by running 37 | ``` 38 | $ ./download_model.sh 39 | ``` 40 | This will fetch the models into `${HOME}/deepcluster_models` by default. 41 | You can change that path in the environment variable. 42 | Direct download links are provided here: 43 | * [AlexNet-PyTorch](https://dl.fbaipublicfiles.com/deepcluster/alexnet/checkpoint.pth.tar) 44 | * [AlexNet-prototxt](https://dl.fbaipublicfiles.com/deepcluster/alexnet/model.prototxt) + [AlexNet-caffemodel](https://dl.fbaipublicfiles.com/deepcluster/alexnet/model.caffemodel) 45 | * [VGG16-PyTorch](https://dl.fbaipublicfiles.com/deepcluster/vgg16/checkpoint.pth.tar) 46 | * [VGG16-prototxt](https://dl.fbaipublicfiles.com/deepcluster/vgg16/model.prototxt) + [VGG16-caffemodel](https://dl.fbaipublicfiles.com/deepcluster/vgg16/model.caffemodel) 47 | 48 | We also provide the last epoch cluster assignments for these models. After downloading, open the file with Python 2: 49 | ``` 50 | import pickle 51 | with open("./alexnet_cluster_assignment.pickle", "rb") as f: 52 | b = pickle.load(f) 53 | ``` 54 | If you're a Python 3 user, specify ```encoding='latin1'``` in the load fonction. 55 | Each file is a list of (image path, cluster_index) tuples. 56 | * [AlexNet-clusters](https://dl.fbaipublicfiles.com/deepcluster/alexnet/alexnet_cluster_assignment.pickle) 57 | * [VGG16-clusters](https://dl.fbaipublicfiles.com/deepcluster/vgg16/vgg16_cluster_assignment.pickle) 58 | 59 | Finally, we release the features extracted with DeepCluster model for ImageNet dataset. 60 | These features are in dimension 4096 and correspond to a forward on the model up to the penultimate convolutional layer (just before last ReLU). 61 | In you plan to cluster the features, don't forget to normalize and reduce/whiten them. 62 | * [AlexNet-imnetfeatures](https://dl.fbaipublicfiles.com/deepcluster/alexnet/alexnet_features.pkl) 63 | * [VGG16-imnetfeatures](https://dl.fbaipublicfiles.com/deepcluster/vgg16/vgg16_features.pkl) 64 | 65 | ## Running the unsupervised training 66 | 67 | Unsupervised training can be launched by running: 68 | ``` 69 | $ ./main.sh 70 | ``` 71 | Please provide the path to the data folder: 72 | ``` 73 | DIR=/datasets01/imagenet_full_size/061417/train 74 | ``` 75 | To train an AlexNet network, specify `ARCH=alexnet` whereas to train a VGG-16 convnet use `ARCH=vgg16`. 76 | 77 | You can also specify where you want to save the clustering logs and checkpoints using: 78 | ``` 79 | EXP=exp 80 | ``` 81 | 82 | During training, models are saved every other n iterations (set using the `--checkpoints` flag), and can be found in for instance in `${EXP}/checkpoints/checkpoint_0.pth.tar`. 83 | A log of the assignments in the clusters at each epoch can be found in the pickle file `${EXP}/clusters`. 84 | 85 | 86 | Full documentation of the unsupervised training code `main.py`: 87 | ``` 88 | usage: main.py [-h] [--arch ARCH] [--sobel] [--clustering {Kmeans,PIC}] 89 | [--nmb_cluster NMB_CLUSTER] [--lr LR] [--wd WD] 90 | [--reassign REASSIGN] [--workers WORKERS] [--epochs EPOCHS] 91 | [--start_epoch START_EPOCH] [--batch BATCH] 92 | [--momentum MOMENTUM] [--resume PATH] 93 | [--checkpoints CHECKPOINTS] [--seed SEED] [--exp EXP] 94 | [--verbose] 95 | DIR 96 | 97 | PyTorch Implementation of DeepCluster 98 | 99 | positional arguments: 100 | DIR path to dataset 101 | 102 | optional arguments: 103 | -h, --help show this help message and exit 104 | --arch ARCH, -a ARCH CNN architecture (default: alexnet) 105 | --sobel Sobel filtering 106 | --clustering {Kmeans,PIC} 107 | clustering algorithm (default: Kmeans) 108 | --nmb_cluster NMB_CLUSTER, --k NMB_CLUSTER 109 | number of cluster for k-means (default: 10000) 110 | --lr LR learning rate (default: 0.05) 111 | --wd WD weight decay pow (default: -5) 112 | --reassign REASSIGN how many epochs of training between two consecutive 113 | reassignments of clusters (default: 1) 114 | --workers WORKERS number of data loading workers (default: 4) 115 | --epochs EPOCHS number of total epochs to run (default: 200) 116 | --start_epoch START_EPOCH 117 | manual epoch number (useful on restarts) (default: 0) 118 | --batch BATCH mini-batch size (default: 256) 119 | --momentum MOMENTUM momentum (default: 0.9) 120 | --resume PATH path to checkpoint (default: None) 121 | --checkpoints CHECKPOINTS 122 | how many iterations between two checkpoints (default: 123 | 25000) 124 | --seed SEED random seed (default: 31) 125 | --exp EXP path to exp folder 126 | --verbose chatty 127 | ``` 128 | 129 | 130 | ## Evaluation protocols 131 | 132 | ### Pascal VOC 133 | 134 | To run the classification task with fine-tuning launch: 135 | ``` 136 | ./eval_voc_classif_all.sh 137 | ``` 138 | and with no finetuning: 139 | ``` 140 | ./eval_voc_classif_fc6_8.sh 141 | ``` 142 | 143 | Both these scripts download [this code](https://github.com/philkr/voc-classification). 144 | You need to download the [VOC 2007 dataset](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/). Then, specify in both `./eval_voc_classif_all.sh` and `./eval_voc_classif_fc6_8.sh` scripts the path `CAFFE` to point to the caffe branch, and `VOC` to point to the Pascal VOC directory. 145 | Indicate in `PROTO` and `MODEL` respectively the path to the prototxt file of the model and the path to the model weights of the model to evaluate. 146 | The flag `--train-from` allows to indicate the separation between the frozen and to-train layers. 147 | 148 | We implemented [voc classification](https://github.com/facebookresearch/deepcluster/blob/master/eval_voc_classif.py) with PyTorch. 149 | 150 | Erratum: When training the MLP only (fc6-8), the parameters of scaling of the batch-norm layers in the whole network are trained. 151 | With freezing these parameters we get 70.4 mAP. 152 | 153 | ### Linear classification on activations 154 | 155 | You can run these transfer tasks using: 156 | ``` 157 | $ ./eval_linear.sh 158 | ``` 159 | 160 | You need to specify the path to the supervised data (ImageNet or Places): 161 | ``` 162 | DATA=/datasets01/imagenet_full_size/061417/ 163 | ``` 164 | the path of your model: 165 | ``` 166 | MODEL=/private/home/mathilde/deepcluster/checkpoint.pth.tar 167 | ``` 168 | and on top of which convolutional layer to train the classifier: 169 | ``` 170 | CONV=3 171 | ``` 172 | 173 | You can specify where you want to save the output of this experiment (checkpoints and best models) with 174 | ``` 175 | EXP=exp 176 | ``` 177 | 178 | Full documentation for this task: 179 | ``` 180 | usage: eval_linear.py [-h] [--data DATA] [--model MODEL] [--conv {1,2,3,4,5}] 181 | [--tencrops] [--exp EXP] [--workers WORKERS] 182 | [--epochs EPOCHS] [--batch_size BATCH_SIZE] [--lr LR] 183 | [--momentum MOMENTUM] [--weight_decay WEIGHT_DECAY] 184 | [--seed SEED] [--verbose] 185 | 186 | Train linear classifier on top of frozen convolutional layers of an AlexNet. 187 | 188 | optional arguments: 189 | -h, --help show this help message and exit 190 | --data DATA path to dataset 191 | --model MODEL path to model 192 | --conv {1,2,3,4,5} on top of which convolutional layer train logistic 193 | regression 194 | --tencrops validation accuracy averaged over 10 crops 195 | --exp EXP exp folder 196 | --workers WORKERS number of data loading workers (default: 4) 197 | --epochs EPOCHS number of total epochs to run (default: 90) 198 | --batch_size BATCH_SIZE 199 | mini-batch size (default: 256) 200 | --lr LR learning rate 201 | --momentum MOMENTUM momentum (default: 0.9) 202 | --weight_decay WEIGHT_DECAY, --wd WEIGHT_DECAY 203 | weight decay pow (default: -4) 204 | --seed SEED random seed 205 | --verbose chatty 206 | ``` 207 | 208 | ### Instance-level image retrieval 209 | 210 | You can run the instance-level image retrieval transfer task using: 211 | ``` 212 | ./eval_retrieval.sh 213 | ``` 214 | 215 | ## Visualisation 216 | 217 | We provide two standard visualisation methods presented in our paper. 218 | 219 | ### Filter visualisation with gradient ascent 220 | 221 | First, it is posible to learn an input image that maximizes the activation of a given filter. We follow the process 222 | described by [Yosinki et al.](https://arxiv.org/abs/1506.06579) with a cross entropy function between the target 223 | filter and the other filters in the same layer. 224 | From the visu folder you can run 225 | ``` 226 | ./gradient_ascent.sh 227 | ``` 228 | You will need to specify the model path ```MODEL```, the architecture of your model ```ARCH```, the path of the folder in which you want to save the synthetic images ```EXP``` and the convolutional layer to consider ```CONV```. 229 | 230 | Full documentation: 231 | ``` 232 | usage: gradient_ascent.py [-h] [--model MODEL] [--arch {alexnet,vgg16}] 233 | [--conv CONV] [--exp EXP] [--lr LR] [--wd WD] 234 | [--sig SIG] [--step STEP] [--niter NITER] 235 | [--idim IDIM] 236 | 237 | Gradient ascent visualisation 238 | 239 | optional arguments: 240 | -h, --help show this help message and exit 241 | --model MODEL Model 242 | --arch {alexnet,vgg16} 243 | arch 244 | --conv CONV convolutional layer 245 | --exp EXP path to res 246 | --lr LR learning rate (default: 3) 247 | --wd WD weight decay (default: 10^-5) 248 | --sig SIG gaussian blur (default: 0.3) 249 | --step STEP number of iter between gaussian blurs (default: 5) 250 | --niter NITER total number of iterations (default: 1000) 251 | --idim IDIM size of input image (default: 224) 252 | ``` 253 | I recommand you play with the hyper-parameters to find a regime where the visualisations are good. 254 | For example with the pre-trained deepcluster AlexNet, for conv1 using a learning rate of 3 and 30.000 iterations works well. 255 | For conv5, using a learning rate of 30 and 3.000 iterations gives nice images with the other parameters set to their default values. 256 | 257 | ### Top 9 maximally activated images in a dataset 258 | 259 | Finally, we provide code to retrieve images in a dataset that maximally activate a given filter in the convnet. 260 | From the visu folder, after having changed the fields ```MODEL```, ```EXP```, ```CONV``` and ```DATA```, run 261 | ``` 262 | ./activ-retrieval.sh 263 | ``` 264 | 265 | ## DeeperCluster 266 | 267 | We have proposed another unsupervised feature learning paper at ICCV 2019. 268 | We have shown that unsupervised learning can be used to pre-train convnets, leading to a boost in performance on ImageNet classification. 269 | We achieve that by scaling DeepCluster to 96M images and mixing it with RotNet self-supervision. 270 | Check out the [paper](https://arxiv.org/abs/1905.01278) and [code](https://github.com/facebookresearch/DeeperCluster). 271 | 272 | ## License 273 | 274 | You may find out more about the license [here](https://github.com/facebookresearch/deepcluster/blob/master/LICENSE). 275 | 276 | ## Reference 277 | 278 | If you use this code, please cite the following paper: 279 | 280 | Mathilde Caron, Piotr Bojanowski, Armand Joulin, and Matthijs Douze. "Deep Clustering for Unsupervised Learning of Visual Features." Proc. ECCV (2018). 281 | 282 | ``` 283 | @InProceedings{caron2018deep, 284 | title={Deep Clustering for Unsupervised Learning of Visual Features}, 285 | author={Caron, Mathilde and Bojanowski, Piotr and Joulin, Armand and Douze, Matthijs}, 286 | booktitle={European Conference on Computer Vision}, 287 | year={2018}, 288 | } 289 | ``` 290 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/deepcluster/2d1927e8e3dd272329e879e510fbbdf1b1d02d17/__init__.py -------------------------------------------------------------------------------- /clustering.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | import time 8 | 9 | import faiss 10 | import numpy as np 11 | from PIL import Image 12 | from PIL import ImageFile 13 | from scipy.sparse import csr_matrix, find 14 | import torch 15 | import torch.utils.data as data 16 | import torchvision.transforms as transforms 17 | 18 | ImageFile.LOAD_TRUNCATED_IMAGES = True 19 | 20 | __all__ = ['PIC', 'Kmeans', 'cluster_assign', 'arrange_clustering'] 21 | 22 | 23 | def pil_loader(path): 24 | """Loads an image. 25 | Args: 26 | path (string): path to image file 27 | Returns: 28 | Image 29 | """ 30 | with open(path, 'rb') as f: 31 | img = Image.open(f) 32 | return img.convert('RGB') 33 | 34 | 35 | class ReassignedDataset(data.Dataset): 36 | """A dataset where the new images labels are given in argument. 37 | Args: 38 | image_indexes (list): list of data indexes 39 | pseudolabels (list): list of labels for each data 40 | dataset (list): list of tuples with paths to images 41 | transform (callable, optional): a function/transform that takes in 42 | an PIL image and returns a 43 | transformed version 44 | """ 45 | 46 | def __init__(self, image_indexes, pseudolabels, dataset, transform=None): 47 | self.imgs = self.make_dataset(image_indexes, pseudolabels, dataset) 48 | self.transform = transform 49 | 50 | def make_dataset(self, image_indexes, pseudolabels, dataset): 51 | label_to_idx = {label: idx for idx, label in enumerate(set(pseudolabels))} 52 | images = [] 53 | for j, idx in enumerate(image_indexes): 54 | path = dataset[idx][0] 55 | pseudolabel = label_to_idx[pseudolabels[j]] 56 | images.append((path, pseudolabel)) 57 | return images 58 | 59 | def __getitem__(self, index): 60 | """ 61 | Args: 62 | index (int): index of data 63 | Returns: 64 | tuple: (image, pseudolabel) where pseudolabel is the cluster of index datapoint 65 | """ 66 | path, pseudolabel = self.imgs[index] 67 | img = pil_loader(path) 68 | if self.transform is not None: 69 | img = self.transform(img) 70 | return img, pseudolabel 71 | 72 | def __len__(self): 73 | return len(self.imgs) 74 | 75 | 76 | def preprocess_features(npdata, pca=256): 77 | """Preprocess an array of features. 78 | Args: 79 | npdata (np.array N * ndim): features to preprocess 80 | pca (int): dim of output 81 | Returns: 82 | np.array of dim N * pca: data PCA-reduced, whitened and L2-normalized 83 | """ 84 | _, ndim = npdata.shape 85 | npdata = npdata.astype('float32') 86 | 87 | # Apply PCA-whitening with Faiss 88 | mat = faiss.PCAMatrix (ndim, pca, eigen_power=-0.5) 89 | mat.train(npdata) 90 | assert mat.is_trained 91 | npdata = mat.apply_py(npdata) 92 | 93 | # L2 normalization 94 | row_sums = np.linalg.norm(npdata, axis=1) 95 | npdata = npdata / row_sums[:, np.newaxis] 96 | 97 | return npdata 98 | 99 | 100 | def make_graph(xb, nnn): 101 | """Builds a graph of nearest neighbors. 102 | Args: 103 | xb (np.array): data 104 | nnn (int): number of nearest neighbors 105 | Returns: 106 | list: for each data the list of ids to its nnn nearest neighbors 107 | list: for each data the list of distances to its nnn NN 108 | """ 109 | N, dim = xb.shape 110 | 111 | # we need only a StandardGpuResources per GPU 112 | res = faiss.StandardGpuResources() 113 | 114 | # L2 115 | flat_config = faiss.GpuIndexFlatConfig() 116 | flat_config.device = int(torch.cuda.device_count()) - 1 117 | index = faiss.GpuIndexFlatL2(res, dim, flat_config) 118 | index.add(xb) 119 | D, I = index.search(xb, nnn + 1) 120 | return I, D 121 | 122 | 123 | def cluster_assign(images_lists, dataset): 124 | """Creates a dataset from clustering, with clusters as labels. 125 | Args: 126 | images_lists (list of list): for each cluster, the list of image indexes 127 | belonging to this cluster 128 | dataset (list): initial dataset 129 | Returns: 130 | ReassignedDataset(torch.utils.data.Dataset): a dataset with clusters as 131 | labels 132 | """ 133 | assert images_lists is not None 134 | pseudolabels = [] 135 | image_indexes = [] 136 | for cluster, images in enumerate(images_lists): 137 | image_indexes.extend(images) 138 | pseudolabels.extend([cluster] * len(images)) 139 | 140 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 141 | std=[0.229, 0.224, 0.225]) 142 | t = transforms.Compose([transforms.RandomResizedCrop(224), 143 | transforms.RandomHorizontalFlip(), 144 | transforms.ToTensor(), 145 | normalize]) 146 | 147 | return ReassignedDataset(image_indexes, pseudolabels, dataset, t) 148 | 149 | 150 | def run_kmeans(x, nmb_clusters, verbose=False): 151 | """Runs kmeans on 1 GPU. 152 | Args: 153 | x: data 154 | nmb_clusters (int): number of clusters 155 | Returns: 156 | list: ids of data in each cluster 157 | """ 158 | n_data, d = x.shape 159 | 160 | # faiss implementation of k-means 161 | clus = faiss.Clustering(d, nmb_clusters) 162 | 163 | # Change faiss seed at each k-means so that the randomly picked 164 | # initialization centroids do not correspond to the same feature ids 165 | # from an epoch to another. 166 | clus.seed = np.random.randint(1234) 167 | 168 | clus.niter = 20 169 | clus.max_points_per_centroid = 10000000 170 | res = faiss.StandardGpuResources() 171 | flat_config = faiss.GpuIndexFlatConfig() 172 | flat_config.useFloat16 = False 173 | flat_config.device = 0 174 | index = faiss.GpuIndexFlatL2(res, d, flat_config) 175 | 176 | # perform the training 177 | clus.train(x, index) 178 | _, I = index.search(x, 1) 179 | losses = faiss.vector_to_array(clus.obj) 180 | if verbose: 181 | print('k-means loss evolution: {0}'.format(losses)) 182 | 183 | return [int(n[0]) for n in I], losses[-1] 184 | 185 | 186 | def arrange_clustering(images_lists): 187 | pseudolabels = [] 188 | image_indexes = [] 189 | for cluster, images in enumerate(images_lists): 190 | image_indexes.extend(images) 191 | pseudolabels.extend([cluster] * len(images)) 192 | indexes = np.argsort(image_indexes) 193 | return np.asarray(pseudolabels)[indexes] 194 | 195 | 196 | class Kmeans(object): 197 | def __init__(self, k): 198 | self.k = k 199 | 200 | def cluster(self, data, verbose=False): 201 | """Performs k-means clustering. 202 | Args: 203 | x_data (np.array N * dim): data to cluster 204 | """ 205 | end = time.time() 206 | 207 | # PCA-reducing, whitening and L2-normalization 208 | xb = preprocess_features(data) 209 | 210 | # cluster the data 211 | I, loss = run_kmeans(xb, self.k, verbose) 212 | self.images_lists = [[] for i in range(self.k)] 213 | for i in range(len(data)): 214 | self.images_lists[I[i]].append(i) 215 | 216 | if verbose: 217 | print('k-means time: {0:.0f} s'.format(time.time() - end)) 218 | 219 | return loss 220 | 221 | 222 | def make_adjacencyW(I, D, sigma): 223 | """Create adjacency matrix with a Gaussian kernel. 224 | Args: 225 | I (numpy array): for each vertex the ids to its nnn linked vertices 226 | + first column of identity. 227 | D (numpy array): for each data the l2 distances to its nnn linked vertices 228 | + first column of zeros. 229 | sigma (float): Bandwidth of the Gaussian kernel. 230 | 231 | Returns: 232 | csr_matrix: affinity matrix of the graph. 233 | """ 234 | V, k = I.shape 235 | k = k - 1 236 | indices = np.reshape(np.delete(I, 0, 1), (1, -1)) 237 | indptr = np.multiply(k, np.arange(V + 1)) 238 | 239 | def exp_ker(d): 240 | return np.exp(-d / sigma**2) 241 | 242 | exp_ker = np.vectorize(exp_ker) 243 | res_D = exp_ker(D) 244 | data = np.reshape(np.delete(res_D, 0, 1), (1, -1)) 245 | adj_matrix = csr_matrix((data[0], indices[0], indptr), shape=(V, V)) 246 | return adj_matrix 247 | 248 | 249 | def run_pic(I, D, sigma, alpha): 250 | """Run PIC algorithm""" 251 | a = make_adjacencyW(I, D, sigma) 252 | graph = a + a.transpose() 253 | cgraph = graph 254 | nim = graph.shape[0] 255 | 256 | W = graph 257 | t0 = time.time() 258 | 259 | v0 = np.ones(nim) / nim 260 | 261 | # power iterations 262 | v = v0.astype('float32') 263 | 264 | t0 = time.time() 265 | dt = 0 266 | for i in range(200): 267 | vnext = np.zeros(nim, dtype='float32') 268 | 269 | vnext = vnext + W.transpose().dot(v) 270 | 271 | vnext = alpha * vnext + (1 - alpha) / nim 272 | # L1 normalize 273 | vnext /= vnext.sum() 274 | v = vnext 275 | 276 | if i == 200 - 1: 277 | clust = find_maxima_cluster(W, v) 278 | 279 | return [int(i) for i in clust] 280 | 281 | 282 | def find_maxima_cluster(W, v): 283 | n, m = W.shape 284 | assert (n == m) 285 | assign = np.zeros(n) 286 | # for each node 287 | pointers = list(range(n)) 288 | for i in range(n): 289 | best_vi = 0 290 | l0 = W.indptr[i] 291 | l1 = W.indptr[i + 1] 292 | for l in range(l0, l1): 293 | j = W.indices[l] 294 | vi = W.data[l] * (v[j] - v[i]) 295 | if vi > best_vi: 296 | best_vi = vi 297 | pointers[i] = j 298 | n_clus = 0 299 | cluster_ids = -1 * np.ones(n) 300 | for i in range(n): 301 | if pointers[i] == i: 302 | cluster_ids[i] = n_clus 303 | n_clus = n_clus + 1 304 | for i in range(n): 305 | # go from pointers to pointers starting from i until reached a local optim 306 | current_node = i 307 | while pointers[current_node] != current_node: 308 | current_node = pointers[current_node] 309 | 310 | assign[i] = cluster_ids[current_node] 311 | assert (assign[i] >= 0) 312 | return assign 313 | 314 | 315 | class PIC(object): 316 | """Class to perform Power Iteration Clustering on a graph of nearest neighbors. 317 | Args: 318 | args: for consistency with k-means init 319 | sigma (float): bandwidth of the Gaussian kernel (default 0.2) 320 | nnn (int): number of nearest neighbors (default 5) 321 | alpha (float): parameter in PIC (default 0.001) 322 | distribute_singletons (bool): If True, reassign each singleton to 323 | the cluster of its closest non 324 | singleton nearest neighbors (up to nnn 325 | nearest neighbors). 326 | Attributes: 327 | images_lists (list of list): for each cluster, the list of image indexes 328 | belonging to this cluster 329 | """ 330 | 331 | def __init__(self, args=None, sigma=0.2, nnn=5, alpha=0.001, distribute_singletons=True): 332 | self.sigma = sigma 333 | self.alpha = alpha 334 | self.nnn = nnn 335 | self.distribute_singletons = distribute_singletons 336 | 337 | def cluster(self, data, verbose=False): 338 | end = time.time() 339 | 340 | # preprocess the data 341 | xb = preprocess_features(data) 342 | 343 | # construct nnn graph 344 | I, D = make_graph(xb, self.nnn) 345 | 346 | # run PIC 347 | clust = run_pic(I, D, self.sigma, self.alpha) 348 | images_lists = {} 349 | for h in set(clust): 350 | images_lists[h] = [] 351 | for data, c in enumerate(clust): 352 | images_lists[c].append(data) 353 | 354 | # allocate singletons to clusters of their closest NN not singleton 355 | if self.distribute_singletons: 356 | clust_NN = {} 357 | for i in images_lists: 358 | # if singleton 359 | if len(images_lists[i]) == 1: 360 | s = images_lists[i][0] 361 | # for NN 362 | for n in I[s, 1:]: 363 | # if NN is not a singleton 364 | if not len(images_lists[clust[n]]) == 1: 365 | clust_NN[s] = n 366 | break 367 | for s in clust_NN: 368 | del images_lists[clust[s]] 369 | clust[s] = clust[clust_NN[s]] 370 | images_lists[clust[s]].append(s) 371 | 372 | self.images_lists = [] 373 | for c in images_lists: 374 | self.images_lists.append(images_lists[c]) 375 | 376 | if verbose: 377 | print('pic time: {0:.0f} s'.format(time.time() - end)) 378 | return 0 379 | -------------------------------------------------------------------------------- /download_model.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | #!/bin/bash 8 | 9 | MODELROOT="${HOME}/deepcluster_models" 10 | 11 | mkdir -p ${MODELROOT} 12 | 13 | for MODEL in alexnet vgg16 14 | do 15 | mkdir -p "${MODELROOT}/${MODEL}" 16 | for FILE in checkpoint.pth.tar model.caffemodel model.prototxt 17 | do 18 | wget -c "https://dl.fbaipublicfiles.com/deepcluster/${MODEL}/${FILE}" \ 19 | -P "${MODELROOT}/${MODEL}" 20 | 21 | done 22 | done 23 | -------------------------------------------------------------------------------- /eval_linear.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | import argparse 9 | import os 10 | import time 11 | 12 | import numpy as np 13 | import torch 14 | import torch.nn as nn 15 | import torch.backends.cudnn as cudnn 16 | import torch.optim 17 | import torch.utils.data 18 | import torchvision.transforms as transforms 19 | import torchvision.datasets as datasets 20 | 21 | from util import AverageMeter, learning_rate_decay, load_model, Logger 22 | 23 | parser = argparse.ArgumentParser(description="""Train linear classifier on top 24 | of frozen convolutional layers of an AlexNet.""") 25 | 26 | parser.add_argument('--data', type=str, help='path to dataset') 27 | parser.add_argument('--model', type=str, help='path to model') 28 | parser.add_argument('--conv', type=int, choices=[1, 2, 3, 4, 5], 29 | help='on top of which convolutional layer train logistic regression') 30 | parser.add_argument('--tencrops', action='store_true', 31 | help='validation accuracy averaged over 10 crops') 32 | parser.add_argument('--exp', type=str, default='', help='exp folder') 33 | parser.add_argument('--workers', default=4, type=int, 34 | help='number of data loading workers (default: 4)') 35 | parser.add_argument('--epochs', type=int, default=90, help='number of total epochs to run (default: 90)') 36 | parser.add_argument('--batch_size', default=256, type=int, 37 | help='mini-batch size (default: 256)') 38 | parser.add_argument('--lr', default=0.01, type=float, help='learning rate') 39 | parser.add_argument('--momentum', default=0.9, type=float, help='momentum (default: 0.9)') 40 | parser.add_argument('--weight_decay', '--wd', default=-4, type=float, 41 | help='weight decay pow (default: -4)') 42 | parser.add_argument('--seed', type=int, default=31, help='random seed') 43 | parser.add_argument('--verbose', action='store_true', help='chatty') 44 | 45 | 46 | def main(): 47 | global args 48 | args = parser.parse_args() 49 | 50 | #fix random seeds 51 | torch.manual_seed(args.seed) 52 | torch.cuda.manual_seed_all(args.seed) 53 | np.random.seed(args.seed) 54 | 55 | best_prec1 = 0 56 | 57 | # load model 58 | model = load_model(args.model) 59 | model.cuda() 60 | cudnn.benchmark = True 61 | 62 | # freeze the features layers 63 | for param in model.features.parameters(): 64 | param.requires_grad = False 65 | 66 | # define loss function (criterion) and optimizer 67 | criterion = nn.CrossEntropyLoss().cuda() 68 | 69 | # data loading code 70 | traindir = os.path.join(args.data, 'train') 71 | valdir = os.path.join(args.data, 'val') 72 | 73 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 74 | std=[0.229, 0.224, 0.225]) 75 | 76 | if args.tencrops: 77 | transformations_val = [ 78 | transforms.Resize(256), 79 | transforms.TenCrop(224), 80 | transforms.Lambda(lambda crops: torch.stack([normalize(transforms.ToTensor()(crop)) for crop in crops])), 81 | ] 82 | else: 83 | transformations_val = [transforms.Resize(256), 84 | transforms.CenterCrop(224), 85 | transforms.ToTensor(), 86 | normalize] 87 | 88 | transformations_train = [transforms.Resize(256), 89 | transforms.CenterCrop(256), 90 | transforms.RandomCrop(224), 91 | transforms.RandomHorizontalFlip(), 92 | transforms.ToTensor(), 93 | normalize] 94 | train_dataset = datasets.ImageFolder( 95 | traindir, 96 | transform=transforms.Compose(transformations_train) 97 | ) 98 | 99 | val_dataset = datasets.ImageFolder( 100 | valdir, 101 | transform=transforms.Compose(transformations_val) 102 | ) 103 | train_loader = torch.utils.data.DataLoader(train_dataset, 104 | batch_size=args.batch_size, 105 | shuffle=True, 106 | num_workers=args.workers, 107 | pin_memory=True) 108 | val_loader = torch.utils.data.DataLoader(val_dataset, 109 | batch_size=int(args.batch_size/2), 110 | shuffle=False, 111 | num_workers=args.workers) 112 | 113 | # logistic regression 114 | reglog = RegLog(args.conv, len(train_dataset.classes)).cuda() 115 | optimizer = torch.optim.SGD( 116 | filter(lambda x: x.requires_grad, reglog.parameters()), 117 | args.lr, 118 | momentum=args.momentum, 119 | weight_decay=10**args.weight_decay 120 | ) 121 | 122 | # create logs 123 | exp_log = os.path.join(args.exp, 'log') 124 | if not os.path.isdir(exp_log): 125 | os.makedirs(exp_log) 126 | 127 | loss_log = Logger(os.path.join(exp_log, 'loss_log')) 128 | prec1_log = Logger(os.path.join(exp_log, 'prec1')) 129 | prec5_log = Logger(os.path.join(exp_log, 'prec5')) 130 | 131 | for epoch in range(args.epochs): 132 | end = time.time() 133 | 134 | # train for one epoch 135 | train(train_loader, model, reglog, criterion, optimizer, epoch) 136 | 137 | # evaluate on validation set 138 | prec1, prec5, loss = validate(val_loader, model, reglog, criterion) 139 | 140 | loss_log.log(loss) 141 | prec1_log.log(prec1) 142 | prec5_log.log(prec5) 143 | 144 | # remember best prec@1 and save checkpoint 145 | is_best = prec1 > best_prec1 146 | best_prec1 = max(prec1, best_prec1) 147 | if is_best: 148 | filename = 'model_best.pth.tar' 149 | else: 150 | filename = 'checkpoint.pth.tar' 151 | torch.save({ 152 | 'epoch': epoch + 1, 153 | 'arch': 'alexnet', 154 | 'state_dict': model.state_dict(), 155 | 'prec5': prec5, 156 | 'best_prec1': best_prec1, 157 | 'optimizer' : optimizer.state_dict(), 158 | }, os.path.join(args.exp, filename)) 159 | 160 | 161 | class RegLog(nn.Module): 162 | """Creates logistic regression on top of frozen features""" 163 | def __init__(self, conv, num_labels): 164 | super(RegLog, self).__init__() 165 | self.conv = conv 166 | if conv==1: 167 | self.av_pool = nn.AvgPool2d(6, stride=6, padding=3) 168 | s = 9600 169 | elif conv==2: 170 | self.av_pool = nn.AvgPool2d(4, stride=4, padding=0) 171 | s = 9216 172 | elif conv==3: 173 | self.av_pool = nn.AvgPool2d(3, stride=3, padding=1) 174 | s = 9600 175 | elif conv==4: 176 | self.av_pool = nn.AvgPool2d(3, stride=3, padding=1) 177 | s = 9600 178 | elif conv==5: 179 | self.av_pool = nn.AvgPool2d(2, stride=2, padding=0) 180 | s = 9216 181 | self.linear = nn.Linear(s, num_labels) 182 | 183 | def forward(self, x): 184 | x = self.av_pool(x) 185 | x = x.view(x.size(0), x.size(1) * x.size(2) * x.size(3)) 186 | return self.linear(x) 187 | 188 | 189 | def forward(x, model, conv): 190 | if hasattr(model, 'sobel') and model.sobel is not None: 191 | x = model.sobel(x) 192 | count = 1 193 | for m in model.features.modules(): 194 | if not isinstance(m, nn.Sequential): 195 | x = m(x) 196 | if isinstance(m, nn.ReLU): 197 | if count == conv: 198 | return x 199 | count = count + 1 200 | return x 201 | 202 | def accuracy(output, target, topk=(1,)): 203 | """Computes the precision@k for the specified values of k""" 204 | maxk = max(topk) 205 | batch_size = target.size(0) 206 | 207 | _, pred = output.topk(maxk, 1, True, True) 208 | pred = pred.t() 209 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 210 | 211 | res = [] 212 | for k in topk: 213 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 214 | res.append(correct_k.mul_(100.0 / batch_size)) 215 | return res 216 | 217 | def train(train_loader, model, reglog, criterion, optimizer, epoch): 218 | batch_time = AverageMeter() 219 | data_time = AverageMeter() 220 | losses = AverageMeter() 221 | top1 = AverageMeter() 222 | top5 = AverageMeter() 223 | 224 | # freeze also batch norm layers 225 | model.eval() 226 | 227 | end = time.time() 228 | for i, (input, target) in enumerate(train_loader): 229 | 230 | # measure data loading time 231 | data_time.update(time.time() - end) 232 | 233 | #adjust learning rate 234 | learning_rate_decay(optimizer, len(train_loader) * epoch + i, args.lr) 235 | 236 | target = target.cuda(async=True) 237 | input_var = torch.autograd.Variable(input.cuda()) 238 | target_var = torch.autograd.Variable(target) 239 | # compute output 240 | 241 | output = forward(input_var, model, reglog.conv) 242 | output = reglog(output) 243 | loss = criterion(output, target_var) 244 | # measure accuracy and record loss 245 | prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) 246 | losses.update(loss.data[0], input.size(0)) 247 | top1.update(prec1[0], input.size(0)) 248 | top5.update(prec5[0], input.size(0)) 249 | 250 | # compute gradient and do SGD step 251 | optimizer.zero_grad() 252 | loss.backward() 253 | optimizer.step() 254 | 255 | # measure elapsed time 256 | batch_time.update(time.time() - end) 257 | end = time.time() 258 | 259 | if args.verbose and i % 100 == 0: 260 | print('Epoch: [{0}][{1}/{2}]\t' 261 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 262 | 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 263 | 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 264 | 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 265 | 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})' 266 | .format(epoch, i, len(train_loader), batch_time=batch_time, 267 | data_time=data_time, loss=losses, top1=top1, top5=top5)) 268 | 269 | 270 | def validate(val_loader, model, reglog, criterion): 271 | batch_time = AverageMeter() 272 | losses = AverageMeter() 273 | top1 = AverageMeter() 274 | top5 = AverageMeter() 275 | 276 | # switch to evaluate mode 277 | model.eval() 278 | softmax = nn.Softmax(dim=1).cuda() 279 | end = time.time() 280 | for i, (input_tensor, target) in enumerate(val_loader): 281 | if args.tencrops: 282 | bs, ncrops, c, h, w = input_tensor.size() 283 | input_tensor = input_tensor.view(-1, c, h, w) 284 | target = target.cuda(async=True) 285 | input_var = torch.autograd.Variable(input_tensor.cuda(), volatile=True) 286 | target_var = torch.autograd.Variable(target, volatile=True) 287 | 288 | output = reglog(forward(input_var, model, reglog.conv)) 289 | 290 | if args.tencrops: 291 | output_central = output.view(bs, ncrops, -1)[: , ncrops / 2 - 1, :] 292 | output = softmax(output) 293 | output = torch.squeeze(output.view(bs, ncrops, -1).mean(1)) 294 | else: 295 | output_central = output 296 | 297 | prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) 298 | top1.update(prec1[0], input_tensor.size(0)) 299 | top5.update(prec5[0], input_tensor.size(0)) 300 | loss = criterion(output_central, target_var) 301 | losses.update(loss.data[0], input_tensor.size(0)) 302 | 303 | # measure elapsed time 304 | batch_time.update(time.time() - end) 305 | end = time.time() 306 | 307 | if args.verbose and i % 100 == 0: 308 | print('Validation: [{0}/{1}]\t' 309 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 310 | 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 311 | 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 312 | 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})' 313 | .format(i, len(val_loader), batch_time=batch_time, 314 | loss=losses, top1=top1, top5=top5)) 315 | 316 | return top1.avg, top5.avg, losses.avg 317 | 318 | if __name__ == '__main__': 319 | main() 320 | -------------------------------------------------------------------------------- /eval_linear.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | #!/bin/bash 8 | 9 | DATA="/datasets01/imagenet_full_size/061417/" 10 | MODELROOT="${HOME}/deepcluster_models" 11 | MODEL="${MODELROOT}/alexnet/checkpoint.pth.tar" 12 | EXP="${HOME}/deepcluster_exp/linear_classif" 13 | 14 | PYTHON="${HOME}/test/conda/bin/python" 15 | 16 | mkdir -p ${EXP} 17 | 18 | ${PYTHON} eval_linear.py --model ${MODEL} --data ${DATA} --conv 3 --lr 0.01 \ 19 | --wd -7 --tencrops --verbose --exp ${EXP} --workers 12 20 | -------------------------------------------------------------------------------- /eval_retrieval.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | 9 | import argparse 10 | from collections import OrderedDict 11 | import os 12 | import pickle 13 | import subprocess 14 | import sys 15 | 16 | import numpy as np 17 | from PIL import Image 18 | import torch 19 | import torchvision 20 | from torch.autograd import Variable 21 | 22 | from util import load_model 23 | 24 | 25 | class ImageHelper: 26 | def __init__(self, S, L, transforms): 27 | self.S = S 28 | self.L = L 29 | self.transforms = transforms 30 | 31 | def load_and_prepare_image(self, fname, roi=None): 32 | # Read image, get aspect ratio, and resize such as the largest side equals S 33 | im = Image.open(fname) 34 | im_size_hw = np.array((im.size[1], im.size[0])) 35 | if self.S == -1: 36 | ratio = 1.0 37 | elif self.S == -2: 38 | if np.max(im_size_hw) > 124: 39 | ratio = 1024.0/np.max(im_size_hw) 40 | else: 41 | ratio = -1 42 | else: 43 | ratio = float(self.S)/np.max(im_size_hw) 44 | new_size = tuple(np.round(im_size_hw * ratio).astype(np.int32)) 45 | im_resized = self.transforms(im.resize((new_size[1], new_size[0]), Image.BILINEAR)) 46 | # If there is a roi, adapt the roi to the new size and crop. Do not rescale 47 | # the image once again 48 | if roi is not None: 49 | # ROI format is (xmin,ymin,xmax,ymax) 50 | roi = np.round(roi * ratio).astype(np.int32) 51 | im_resized = im_resized[:, roi[1]:roi[3], roi[0]:roi[2]] 52 | return im_resized 53 | 54 | def get_rmac_region_coordinates(self, H, W, L): 55 | # Almost verbatim from Tolias et al Matlab implementation. 56 | # Could be heavily pythonized, but really not worth it... 57 | # Desired overlap of neighboring regions 58 | ovr = 0.4 59 | # Possible regions for the long dimension 60 | steps = np.array((2, 3, 4, 5, 6, 7), dtype=np.float32) 61 | w = np.minimum(H, W) 62 | 63 | b = (np.maximum(H, W) - w) / (steps - 1) 64 | # steps(idx) regions for long dimension. The +1 comes from Matlab 65 | # 1-indexing... 66 | idx = np.argmin(np.abs(((w**2 - w * b) / w**2) - ovr)) + 1 67 | 68 | # Region overplus per dimension 69 | Wd = 0 70 | Hd = 0 71 | if H < W: 72 | Wd = idx 73 | elif H > W: 74 | Hd = idx 75 | 76 | regions_xywh = [] 77 | for l in range(1, L+1): 78 | wl = np.floor(2 * w / (l + 1)) 79 | wl2 = np.floor(wl / 2 - 1) 80 | # Center coordinates 81 | if l + Wd - 1 > 0: 82 | b = (W - wl) / (l + Wd - 1) 83 | else: 84 | b = 0 85 | cenW = np.floor(wl2 + b * np.arange(l - 1 + Wd + 1)) - wl2 86 | # Center coordinates 87 | if l + Hd - 1 > 0: 88 | b = (H - wl) / (l + Hd - 1) 89 | else: 90 | b = 0 91 | cenH = np.floor(wl2 + b * np.arange(l - 1 + Hd + 1)) - wl2 92 | 93 | for i_ in cenH: 94 | for j_ in cenW: 95 | regions_xywh.append([j_, i_, wl, wl]) 96 | 97 | # Round the regions. Careful with the borders! 98 | for i in range(len(regions_xywh)): 99 | for j in range(4): 100 | regions_xywh[i][j] = int(round(regions_xywh[i][j])) 101 | if regions_xywh[i][0] + regions_xywh[i][2] > W: 102 | regions_xywh[i][0] -= ((regions_xywh[i][0] + regions_xywh[i][2]) - W) 103 | if regions_xywh[i][1] + regions_xywh[i][3] > H: 104 | regions_xywh[i][1] -= ((regions_xywh[i][1] + regions_xywh[i][3]) - H) 105 | return np.array(regions_xywh) 106 | 107 | 108 | class PCA(object): 109 | ''' 110 | Fits and applies PCA whitening 111 | ''' 112 | def __init__(self, n_components): 113 | self.n_components = n_components 114 | 115 | def fit(self, X): 116 | mean = X.mean(axis=0) 117 | X -= mean 118 | self.mean = Variable(torch.from_numpy(mean).view(1, -1)) 119 | Xcov = np.dot(X.T, X) 120 | d, V = np.linalg.eigh(Xcov) 121 | 122 | eps = d.max() * 1e-5 123 | n_0 = (d < eps).sum() 124 | if n_0 > 0: 125 | print("%d / %d singular values are 0" % (n_0, d.size)) 126 | d[d < eps] = eps 127 | totenergy = d.sum() 128 | idx = np.argsort(d)[::-1][:self.n_components] 129 | d = d[idx] 130 | V = V[:, idx] 131 | 132 | print("keeping %.2f %% of the energy" % (d.sum() / totenergy * 100.0)) 133 | 134 | D = np.diag(1. / np.sqrt(d)) 135 | self.DVt = Variable(torch.from_numpy(np.dot(D, V.T))) 136 | 137 | def to_cuda(self): 138 | self.mean = self.mean.cuda() 139 | self.DVt = self.DVt.cuda() 140 | 141 | def apply(self, X): 142 | X = X - self.mean 143 | num = torch.mm(self.DVt, X.transpose(0, 1)).transpose(0, 1) 144 | # L2 normalize on output 145 | return num 146 | 147 | 148 | class Dataset: 149 | def __init__(self, path, eval_binary_path): 150 | self.path = path 151 | self.eval_binary_path = eval_binary_path 152 | # Some images from the Paris dataset are corrupted. Standard practice is 153 | # to ignore them 154 | self.blacklisted = set(["paris_louvre_000136", 155 | "paris_louvre_000146", 156 | "paris_moulinrouge_000422", 157 | "paris_museedorsay_001059", 158 | "paris_notredame_000188", 159 | "paris_pantheon_000284", 160 | "paris_pantheon_000960", 161 | "paris_pantheon_000974", 162 | "paris_pompidou_000195", 163 | "paris_pompidou_000196", 164 | "paris_pompidou_000201", 165 | "paris_pompidou_000467", 166 | "paris_pompidou_000640", 167 | "paris_sacrecoeur_000299", 168 | "paris_sacrecoeur_000330", 169 | "paris_sacrecoeur_000353", 170 | "paris_triomphe_000662", 171 | "paris_triomphe_000833", 172 | "paris_triomphe_000863", 173 | "paris_triomphe_000867"]) 174 | self.load() 175 | 176 | def load(self): 177 | # Load the dataset GT 178 | self.lab_root = '{0}/lab/'.format(self.path) 179 | self.img_root = '{0}/jpg/'.format(self.path) 180 | lab_filenames = np.sort(os.listdir(self.lab_root)) 181 | # Get the filenames without the extension 182 | self.img_filenames = [e[:-4] for e in np.sort(os.listdir(self.img_root)) 183 | if e[:-4] not in self.blacklisted] 184 | 185 | # Parse the label files. Some challenges as filenames do not correspond 186 | # exactly to query names. Go through all the labels to: 187 | # i) map names to filenames and vice versa 188 | # ii) get the relevant regions of interest of the queries, 189 | # iii) get the indexes of the dataset images that are queries 190 | # iv) get the relevants / non-relevants list 191 | self.relevants = {} 192 | self.junk = {} 193 | self.non_relevants = {} 194 | 195 | self.filename_to_name = {} 196 | self.name_to_filename = OrderedDict() 197 | self.q_roi = {} 198 | for e in lab_filenames: 199 | if e.endswith('_query.txt'): 200 | q_name = e[:-len('_query.txt')] 201 | q_data = open("{0}/{1}".format(self.lab_root, e)).readline().split(" ") 202 | q_filename = q_data[0][5:] if q_data[0].startswith('oxc1_') else q_data[0] 203 | self.filename_to_name[q_filename] = q_name 204 | self.name_to_filename[q_name] = q_filename 205 | good = set([e.strip() for e in open("{0}/{1}_ok.txt".format(self.lab_root, q_name))]) 206 | good = good.union(set([e.strip() for e in open("{0}/{1}_good.txt".format(self.lab_root, q_name))])) 207 | junk = set([e.strip() for e in open("{0}/{1}_junk.txt".format(self.lab_root, q_name))]) 208 | good_plus_junk = good.union(junk) 209 | self.relevants[q_name] = [i for i in range(len(self.img_filenames)) 210 | if self.img_filenames[i] in good] 211 | self.junk[q_name] = [i for i in range(len(self.img_filenames)) 212 | if self.img_filenames[i] in junk] 213 | self.non_relevants[q_name] = [i for i in range(len(self.img_filenames)) 214 | if self.img_filenames[i] not in good_plus_junk] 215 | self.q_roi[q_name] = np.array([float(q) for q in q_data[1:]], dtype=np.float32) 216 | #np.array(map(float, q_data[1:]), dtype=np.float32) 217 | 218 | self.q_names = self.name_to_filename.keys() 219 | self.q_index = np.array([self.img_filenames.index(self.name_to_filename[qn]) 220 | for qn in self.q_names]) 221 | self.N_images = len(self.img_filenames) 222 | self.N_queries = len(self.q_index) 223 | 224 | def score(self, sim, temp_dir, eval_bin): 225 | if not os.path.exists(temp_dir): 226 | os.makedirs(temp_dir) 227 | idx = np.argsort(sim, axis=1)[:, ::-1] 228 | maps = [self.score_rnk_partial(i, idx[i], temp_dir, eval_bin) 229 | for i in range(len(self.q_names))] 230 | for i in range(len(self.q_names)): 231 | print("{0}: {1:.2f}".format(self.q_names[i], 100 * maps[i])) 232 | print(20 * "-") 233 | print("Mean: {0:.2f}".format(100 * np.mean(maps))) 234 | 235 | def score_rnk_partial(self, i, idx, temp_dir, eval_bin): 236 | rnk = np.array(self.img_filenames)[idx] 237 | with open("{0}/{1}.rnk".format(temp_dir, self.q_names[i]), 'w') as f: 238 | f.write("\n".join(rnk)+"\n") 239 | cmd = "{0} {1}{2} {3}/{4}.rnk".format(eval_bin, self.lab_root, self.q_names[i], temp_dir, self.q_names[i]) 240 | p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 241 | map_ = float(p.stdout.readlines()[0]) 242 | p.wait() 243 | return map_ 244 | 245 | def get_filename(self, i): 246 | return os.path.normpath("{0}/{1}.jpg".format(self.img_root, 247 | self.img_filenames[i])) 248 | 249 | def get_query_filename(self, i): 250 | return os.path.normpath("{0}/{1}.jpg".format(self.img_root, 251 | self.img_filenames[self.q_index[i]])) 252 | 253 | def get_query_roi(self, i): 254 | return self.q_roi[self.q_names[i]] 255 | 256 | 257 | def ensure_directory_exists(fname): 258 | dirname = fname[:fname.rfind('/')] 259 | if not os.path.exists(dirname): 260 | os.makedirs(dirname) 261 | 262 | 263 | def normalize_L2(a, dim): 264 | norms = torch.sqrt(torch.sum(a**2, dim=dim, keepdim=True)) 265 | return a / norms 266 | 267 | 268 | def rmac(features, rmac_levels, pca=None): 269 | nim, nc, xd, yd = features.size() 270 | 271 | rmac_regions = image_helper.get_rmac_region_coordinates(xd, yd, rmac_levels) 272 | rmac_regions = rmac_regions.astype(np.int) 273 | nr = len(rmac_regions) 274 | 275 | rmac_descriptors = [] 276 | for x0, y0, w, h in rmac_regions: 277 | desc = features[:, :, y0:y0 + h, x0:x0 + w] 278 | desc = torch.max(desc, 2, keepdim=True)[0] 279 | desc = torch.max(desc, 3, keepdim=True)[0] 280 | # insert an additional dimension for the cat to work 281 | rmac_descriptors.append(desc.view(-1, 1, nc)) 282 | 283 | rmac_descriptors = torch.cat(rmac_descriptors, 1) 284 | 285 | rmac_descriptors = normalize_L2(rmac_descriptors, 2) 286 | 287 | if pca is None: 288 | return rmac_descriptors 289 | 290 | # PCA + whitening 291 | npca = pca.n_components 292 | rmac_descriptors = pca.apply(rmac_descriptors.view(nr * nim, nc)) 293 | rmac_descriptors = normalize_L2(rmac_descriptors, 1) 294 | 295 | rmac_descriptors = rmac_descriptors.view(nim, nr, npca) 296 | 297 | # Sum aggregation and L2-normalization 298 | rmac_descriptors = torch.sum(rmac_descriptors, 1) 299 | rmac_descriptors = normalize_L2(rmac_descriptors, 1) 300 | return rmac_descriptors 301 | 302 | 303 | if __name__ == '__main__': 304 | parser = argparse.ArgumentParser(description='Evaluate Oxford / Paris') 305 | parser.add_argument('--S', type=int, default=1024, 306 | help='Resize larger side of image to S pixels (e.g. 800)') 307 | parser.add_argument('--L', type=int, default=3, 308 | help='Use L spatial levels (e.g. 3)') 309 | parser.add_argument('--n_pca', type=int, default=512, 310 | help='output dimension of PCA') 311 | parser.add_argument('--model', type=str, default='pretrained', 312 | help='Model from which RMAC is computed') 313 | parser.add_argument('--dataset', type=str, required=True, 314 | help='path to dataset') 315 | parser.add_argument('--dataset_name', type=str, default='Oxford', 316 | choices=['Oxford', 'Paris'], help='Dataset name') 317 | parser.add_argument('--stage', type=str, default='extract_train', 318 | choices=['extract_train', 'train_pca', 'db_features', 319 | 'q_features', 'eval'], help='what action to perform ') 320 | parser.add_argument('--eval_binary', type=str, required=True, 321 | help='Path to the compute_ap binary to evaluate Oxford / Paris') 322 | parser.add_argument('--temp_dir', type=str, default='', 323 | help='Path to a temporary directory to store features and scores') 324 | parser.add_argument('--multires', dest='multires', action='store_true', 325 | help='Enable multiresolution features') 326 | parser.add_argument('--aqe', type=int, required=False, 327 | help='Average query expansion with k neighbors') 328 | parser.add_argument('--dbe', type=int, required=False, 329 | help='Database expansion with k neighbors') 330 | 331 | parser.set_defaults(multires=False) 332 | args = parser.parse_args() 333 | 334 | # Load the dataset and the image helper 335 | print "Prepare the dataset from ", args.dataset 336 | dataset = Dataset(args.dataset, args.eval_binary) 337 | 338 | ensure_directory_exists(args.temp_dir + '/') 339 | 340 | if args.stage in ('extract_train', 'db_features', 'q_features'): 341 | 342 | if args.model == 'pretrained': 343 | print("loading supervised pretrained VGG-16") 344 | net = torchvision.models.vgg16_bn(pretrained=True) 345 | else: 346 | net = load_model(args.model) 347 | 348 | transforms_comp = [] 349 | features_layers = list(net.features.children())[:-1] 350 | net.features = torch.nn.Sequential(*features_layers) 351 | transforms_comp.extend([ 352 | torchvision.transforms.ToTensor(), 353 | torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], 354 | std=[0.229, 0.224, 0.225]) 355 | ]) 356 | 357 | transforms = torchvision.transforms.Compose(transforms_comp) 358 | 359 | print("moving to GPU") 360 | net.cuda() 361 | net.eval() 362 | print(" done") 363 | 364 | print("initialize image helper") 365 | image_helper = ImageHelper(args.S, args.L, transforms) 366 | 367 | 368 | if args.stage == 'extract_train': 369 | print("extract regions for training") 370 | # extract at a single scale 371 | S = args.S 372 | image_helper.S = S 373 | N_dataset = dataset.N_images 374 | def process_image(i): 375 | print(i), 376 | sys.stdout.flush() 377 | fname_out = "{0}/{1}_S{2}_L{3}_regions/{4}.npy".format(args.temp_dir, args.dataset_name, S, args.L, i) 378 | 379 | ensure_directory_exists(fname_out) 380 | I = image_helper.load_and_prepare_image(dataset.get_filename(i), roi=None) 381 | v = torch.autograd.Variable(I.unsqueeze(0)) 382 | vc = v.cuda() 383 | if hasattr(net, 'sobel') and net.sobel is not None: 384 | vc = net.sobel(vc) 385 | activation_map = net.features(vc).cpu() 386 | 387 | rmac_descriptors = rmac(activation_map, args.L) 388 | np.save(fname_out, rmac_descriptors.data.numpy()) 389 | 390 | map(process_image, range(dataset.N_images)) 391 | 392 | elif args.stage == 'train_pca': 393 | # load training vectors 394 | train_x = [] 395 | for i in range(10000): 396 | fname_in = "{0}/{1}_S{2}_L{3}_regions/{4}.npy".format(args.temp_dir, args.dataset_name, args.S, args.L, i) 397 | if not os.path.exists(fname_in): 398 | break 399 | x = np.load(fname_in) 400 | train_x.append(x) 401 | 402 | print("loaded %d train vectors" % len(train_x)) 403 | 404 | train_x = np.vstack([x.reshape(-1, x.shape[-1]) for x in train_x]) 405 | print(" size", train_x.shape) 406 | 407 | pca = PCA(args.n_pca) 408 | pca.fit(train_x) 409 | pcaname = '%s/%s_S%d_PCA.pickle' % (args.temp_dir, args.dataset_name, args.S) 410 | 411 | print("writing", pcaname) 412 | pickle.dump(pca, open(pcaname, 'w'), -1) 413 | 414 | elif args.stage == 'db_features' or args.stage == 'q_features': 415 | # for tests on Paris, use Oxford PCA, and vice-versa 416 | pcaname = '%s/%s_S%d_PCA.pickle' % ( 417 | args.temp_dir, 'Paris' if args.dataset_name == 'Oxford' else 'Oxford', args.S) 418 | print("loading PCA from", pcaname) 419 | pca = pickle.load(open(pcaname, 'r')) 420 | 421 | print("Compute features") 422 | # extract at a single scale 423 | S = args.S 424 | image_helper.S = S 425 | N_dataset = dataset.N_images 426 | 427 | def process_image(fname_in, roi, fname_out): 428 | softmax = torch.nn.Softmax().cuda() 429 | I = image_helper.load_and_prepare_image(fname_in, roi=roi) 430 | v = torch.autograd.Variable(I.unsqueeze(0)) 431 | vc = v.cuda() 432 | if hasattr(net, 'sobel') and net.sobel is not None: 433 | vc = net.sobel(vc) 434 | activation_map = net.features(vc).cpu() 435 | descriptors = rmac(activation_map, args.L, pca=pca) 436 | np.save(fname_out, descriptors.data.numpy()) 437 | 438 | if args.stage == 'db_features': 439 | for i in range(dataset.N_images): 440 | fname_in = dataset.get_filename(i) 441 | fname_out = "{0}/{1}_S{2}_L{3}_db/{4}.npy".format(args.temp_dir, args.dataset_name, S, args.L, i) 442 | ensure_directory_exists(fname_out) 443 | print(i), 444 | sys.stdout.flush() 445 | process_image(fname_in, None, fname_out) 446 | 447 | elif args.stage == 'q_features': 448 | for i in range(dataset.N_queries): 449 | fname_in = dataset.get_query_filename(i) 450 | roi = dataset.get_query_roi(i) 451 | fname_out = "{0}/{1}_S{2}_L{3}_q/{4}.npy".format(args.temp_dir, args.dataset_name, S, args.L, i) 452 | ensure_directory_exists(fname_out) 453 | print(i), 454 | sys.stdout.flush() 455 | process_image(fname_in, roi, fname_out) 456 | 457 | elif args.stage == 'eval': 458 | S = args.S 459 | 460 | print("load query features") 461 | features_queries = [] 462 | for i in range(dataset.N_queries): 463 | fname = "{0}/{1}_S{2}_L{3}_q/{4}.npy".format(args.temp_dir, args.dataset_name, S, args.L, i) 464 | features_queries.append(np.load(fname)) 465 | features_queries = np.vstack(features_queries) 466 | 467 | print(" size", features_queries.shape) 468 | 469 | print("load database features") 470 | features_dataset = [] 471 | for i in range(dataset.N_images): 472 | fname = "{0}/{1}_S{2}_L{3}_db/{4}.npy".format(args.temp_dir, args.dataset_name, S, args.L, i) 473 | features_dataset.append(np.load(fname)) 474 | features_dataset = np.vstack(features_dataset) 475 | print(" size", features_dataset.shape) 476 | 477 | # Compute similarity 478 | sim = features_queries.dot(features_dataset.T) 479 | 480 | # Score 481 | dataset.score(sim, args.temp_dir, args.eval_binary) 482 | -------------------------------------------------------------------------------- /eval_retrieval.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | #!/bin/bash 8 | 9 | 10 | # This source is adapted from the "deep_retrieval" package that comes with 11 | # Deep Image Retrieval: Learning global representations for image search. A. Gordo, J. Almazan, J. Revaud, and D. Larlus. In ECCV, 2016 12 | # The original source is not accessible anymore, but other people shared the code, see eg. https://github.com/figitaki/deep-retrieval 13 | # follow the instructions on that github repo to download the data, compile the evaluation package, and set the path to the resulting directory below: 14 | 15 | #DATASETS='./datasets' 16 | 17 | # load pytorch model from here 18 | MODEL='/private/home/mathilde/model-to-release/vgg16/checkpoint.pth.tar' 19 | 20 | # this is to obtain the supervised performance 21 | #MODEL='pretrained' 22 | 23 | TEMP='/private/home/mathilde/temp' 24 | 25 | # should be compiled as part of the dataset preparation 26 | EVALBINARY="$DATASETS/evaluation/compute_ap" 27 | EVAL='Paris' 28 | PCA='Oxford' 29 | DATASETEVAL="$DATASETS/$EVAL" 30 | DATASETPCA="$DATASETS/$PCA" 31 | 32 | python eval_retrieval.py --model ${MODEL} --eval_binary ${EVALBINARY} --temp_dir ${TEMP} --dataset ${DATASETPCA} --dataset_name ${PCA} --stage extract_train 33 | python eval_retrieval.py --model ${MODEL} --eval_binary ${EVALBINARY} --temp_dir ${TEMP} --dataset ${DATASETPCA} --dataset_name ${PCA} --stage train_pca 34 | python eval_retrieval.py --model ${MODEL} --eval_binary ${EVALBINARY} --temp_dir ${TEMP} --dataset ${DATASETEVAL} --dataset_name ${EVAL} --stage q_features 35 | python eval_retrieval.py --model ${MODEL} --eval_binary ${EVALBINARY} --temp_dir ${TEMP} --dataset ${DATASETEVAL} --dataset_name ${EVAL} --stage db_features 36 | python eval_retrieval.py --model ${MODEL} --eval_binary ${EVALBINARY} --temp_dir ${TEMP} --dataset ${DATASETEVAL} --dataset_name ${EVAL} --stage eval 37 | -------------------------------------------------------------------------------- /eval_voc_classif.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | #!/usr/bin/env python 8 | # -*- coding: utf-8 -*- 9 | 10 | import argparse 11 | import os 12 | import math 13 | import time 14 | import glob 15 | from collections import defaultdict 16 | 17 | import numpy as np 18 | import torch 19 | import torch.nn as nn 20 | import torch.optim 21 | import torch.utils.data 22 | import torchvision 23 | import torchvision.transforms as transforms 24 | import torch.backends.cudnn as cudnn 25 | from sklearn import metrics 26 | from PIL import Image 27 | from PIL import ImageFile 28 | ImageFile.LOAD_TRUNCATED_IMAGES = True 29 | 30 | from util import AverageMeter, load_model 31 | from eval_linear import accuracy 32 | 33 | 34 | parser = argparse.ArgumentParser() 35 | parser.add_argument('--vocdir', type=str, required=False, default='', help='pascal voc 2007 dataset') 36 | parser.add_argument('--split', type=str, required=False, default='train', choices=['train', 'trainval'], help='training split') 37 | parser.add_argument('--model', type=str, required=False, default='', 38 | help='evaluate this model') 39 | parser.add_argument('--nit', type=int, default=80000, help='Number of training iterations') 40 | parser.add_argument('--fc6_8', type=int, default=1, help='If true, train only the final classifier') 41 | parser.add_argument('--train_batchnorm', type=int, default=0, help='If true, train batch-norm layer parameters') 42 | parser.add_argument('--eval_random_crops', type=int, default=1, help='If true, eval on 10 random crops, otherwise eval on 10 fixed crops') 43 | parser.add_argument('--stepsize', type=int, default=5000, help='Decay step') 44 | parser.add_argument('--lr', type=float, required=False, default=0.003, help='learning rate') 45 | parser.add_argument('--wd', type=float, required=False, default=1e-6, help='weight decay') 46 | parser.add_argument('--min_scale', type=float, required=False, default=0.1, help='scale') 47 | parser.add_argument('--max_scale', type=float, required=False, default=0.5, help='scale') 48 | parser.add_argument('--seed', type=int, default=31, help='random seed') 49 | 50 | def main(): 51 | args = parser.parse_args() 52 | print(args) 53 | 54 | # fix random seeds 55 | torch.manual_seed(args.seed) 56 | torch.cuda.manual_seed_all(args.seed) 57 | np.random.seed(args.seed) 58 | 59 | # create model and move it to gpu 60 | model = load_model(args.model) 61 | model.top_layer = nn.Linear(model.top_layer.weight.size(1), 20) 62 | model.cuda() 63 | cudnn.benchmark = True 64 | 65 | # what partition of the data to use 66 | if args.split == 'train': 67 | args.test = 'val' 68 | elif args.split == 'trainval': 69 | args.test = 'test' 70 | # data loader 71 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 72 | std=[0.229, 0.224, 0.225]) 73 | dataset = VOC2007_dataset(args.vocdir, split=args.split, transform=transforms.Compose([ 74 | transforms.RandomHorizontalFlip(), 75 | transforms.RandomResizedCrop(224, scale=(args.min_scale, args.max_scale), ratio=(1, 1)), 76 | transforms.ToTensor(), 77 | normalize, 78 | ])) 79 | 80 | loader = torch.utils.data.DataLoader(dataset, 81 | batch_size=16, shuffle=False, 82 | num_workers=24, pin_memory=True) 83 | print('PASCAL VOC 2007 ' + args.split + ' dataset loaded') 84 | 85 | # re initialize classifier 86 | for y, m in enumerate(model.classifier.modules()): 87 | if isinstance(m, nn.Linear): 88 | m.weight.data.normal_(0, 0.01) 89 | m.bias.data.fill_(0.1) 90 | model.top_layer.bias.data.fill_(0.1) 91 | 92 | if args.fc6_8: 93 | # freeze some layers 94 | for param in model.features.parameters(): 95 | param.requires_grad = False 96 | # unfreeze batchnorm scaling 97 | if args.train_batchnorm: 98 | for layer in model.modules(): 99 | if isinstance(layer, torch.nn.BatchNorm2d): 100 | for param in layer.parameters(): 101 | param.requires_grad = True 102 | 103 | # set optimizer 104 | optimizer = torch.optim.SGD( 105 | filter(lambda x: x.requires_grad, model.parameters()), 106 | lr=args.lr, 107 | momentum=0.9, 108 | weight_decay=args.wd, 109 | ) 110 | 111 | criterion = nn.BCEWithLogitsLoss(reduction='none') 112 | 113 | print('Start training') 114 | it = 0 115 | losses = AverageMeter() 116 | while it < args.nit: 117 | it = train( 118 | loader, 119 | model, 120 | optimizer, 121 | criterion, 122 | args.fc6_8, 123 | losses, 124 | it=it, 125 | total_iterations=args.nit, 126 | stepsize=args.stepsize, 127 | ) 128 | 129 | print('Evaluation') 130 | if args.eval_random_crops: 131 | transform_eval = [ 132 | transforms.RandomHorizontalFlip(), 133 | transforms.RandomResizedCrop(224, scale=(args.min_scale, args.max_scale), ratio=(1, 1)), 134 | transforms.ToTensor(), 135 | normalize, 136 | ] 137 | else: 138 | transform_eval = [ 139 | transforms.Resize(256), 140 | transforms.TenCrop(224), 141 | transforms.Lambda(lambda crops: torch.stack([normalize(transforms.ToTensor()(crop)) for crop in crops])) 142 | ] 143 | 144 | print('Train set') 145 | train_dataset = VOC2007_dataset(args.vocdir, split=args.split, transform=transforms.Compose(transform_eval)) 146 | train_loader = torch.utils.data.DataLoader( 147 | train_dataset, 148 | batch_size=1, 149 | shuffle=False, 150 | num_workers=24, 151 | pin_memory=True, 152 | ) 153 | evaluate(train_loader, model, args.eval_random_crops) 154 | 155 | print('Test set') 156 | test_dataset = VOC2007_dataset(args.vocdir, split=args.test, transform=transforms.Compose(transform_eval)) 157 | test_loader = torch.utils.data.DataLoader( 158 | test_dataset, 159 | batch_size=1, 160 | shuffle=False, 161 | num_workers=24, 162 | pin_memory=True, 163 | ) 164 | evaluate(test_loader, model, args.eval_random_crops) 165 | 166 | 167 | def evaluate(loader, model, eval_random_crops): 168 | model.eval() 169 | gts = [] 170 | scr = [] 171 | for crop in range(9 * eval_random_crops + 1): 172 | for i, (input, target) in enumerate(loader): 173 | # move input to gpu and optionally reshape it 174 | if len(input.size()) == 5: 175 | bs, ncrops, c, h, w = input.size() 176 | input = input.view(-1, c, h, w) 177 | input = input.cuda(non_blocking=True) 178 | 179 | # forward pass without grad computation 180 | with torch.no_grad(): 181 | output = model(input) 182 | if crop < 1 : 183 | scr.append(torch.sum(output, 0, keepdim=True).cpu().numpy()) 184 | gts.append(target) 185 | else: 186 | scr[i] += output.cpu().numpy() 187 | gts = np.concatenate(gts, axis=0).T 188 | scr = np.concatenate(scr, axis=0).T 189 | aps = [] 190 | for i in range(20): 191 | # Subtract eps from score to make AP work for tied scores 192 | ap = metrics.average_precision_score(gts[i][gts[i]<=1], scr[i][gts[i]<=1]-1e-5*gts[i][gts[i]<=1]) 193 | aps.append( ap ) 194 | print(np.mean(aps), ' ', ' '.join(['%0.2f'%a for a in aps])) 195 | 196 | 197 | def train(loader, model, optimizer, criterion, fc6_8, losses, it=0, total_iterations=None, stepsize=None, verbose=True): 198 | # to log 199 | batch_time = AverageMeter() 200 | data_time = AverageMeter() 201 | top1 = AverageMeter() 202 | end = time.time() 203 | 204 | current_iteration = it 205 | 206 | # use dropout for the MLP 207 | model.train() 208 | # in the batch norms always use global statistics 209 | model.features.eval() 210 | 211 | for (input, target) in loader: 212 | # measure data loading time 213 | data_time.update(time.time() - end) 214 | 215 | # adjust learning rate 216 | if current_iteration != 0 and current_iteration % stepsize == 0: 217 | for param_group in optimizer.param_groups: 218 | param_group['lr'] = param_group['lr'] * 0.5 219 | print('iter {0} learning rate is {1}'.format(current_iteration, param_group['lr'])) 220 | 221 | # move input to gpu 222 | input = input.cuda(non_blocking=True) 223 | 224 | # forward pass with or without grad computation 225 | output = model(input) 226 | 227 | target = target.float().cuda() 228 | mask = (target == 255) 229 | loss = torch.sum(criterion(output, target).masked_fill_(mask, 0)) / target.size(0) 230 | 231 | # backward 232 | optimizer.zero_grad() 233 | loss.backward() 234 | # clip gradients 235 | torch.nn.utils.clip_grad_norm_(model.parameters(), 10) 236 | # and weights update 237 | optimizer.step() 238 | 239 | # measure accuracy and record loss 240 | losses.update(loss.item(), input.size(0)) 241 | 242 | # measure elapsed time 243 | batch_time.update(time.time() - end) 244 | end = time.time() 245 | if verbose is True and current_iteration % 25 == 0: 246 | print('Iteration[{0}]\t' 247 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 248 | 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 249 | 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( 250 | current_iteration, batch_time=batch_time, 251 | data_time=data_time, loss=losses)) 252 | current_iteration = current_iteration + 1 253 | if total_iterations is not None and current_iteration == total_iterations: 254 | break 255 | return current_iteration 256 | 257 | 258 | class VOC2007_dataset(torch.utils.data.Dataset): 259 | def __init__(self, voc_dir, split='train', transform=None): 260 | # Find the image sets 261 | image_set_dir = os.path.join(voc_dir, 'ImageSets', 'Main') 262 | image_sets = glob.glob(os.path.join(image_set_dir, '*_' + split + '.txt')) 263 | assert len(image_sets) == 20 264 | # Read the labels 265 | self.n_labels = len(image_sets) 266 | images = defaultdict(lambda:-np.ones(self.n_labels, dtype=np.uint8)) 267 | for k, s in enumerate(sorted(image_sets)): 268 | for l in open(s, 'r'): 269 | name, lbl = l.strip().split() 270 | lbl = int(lbl) 271 | # Switch the ignore label and 0 label (in VOC -1: not present, 0: ignore) 272 | if lbl < 0: 273 | lbl = 0 274 | elif lbl == 0: 275 | lbl = 255 276 | images[os.path.join(voc_dir, 'JPEGImages', name + '.jpg')][k] = lbl 277 | self.images = [(k, images[k]) for k in images.keys()] 278 | np.random.shuffle(self.images) 279 | self.transform = transform 280 | 281 | def __len__(self): 282 | return len(self.images) 283 | 284 | def __getitem__(self, i): 285 | img = Image.open(self.images[i][0]) 286 | img = img.convert('RGB') 287 | if self.transform is not None: 288 | img = self.transform(img) 289 | return img, self.images[i][1] 290 | 291 | if __name__ == '__main__': 292 | main() 293 | 294 | -------------------------------------------------------------------------------- /eval_voc_classif.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | #!/bin/bash 8 | 9 | VOCDIR="" 10 | MODELROOT="${HOME}/deepcluster_models" 11 | MODEL="${MODELROOT}/alexnet/checkpoint.pth.tar" 12 | 13 | PYTHON="${HOME}/test/conda/bin/python" 14 | 15 | # with training the batch norm 16 | # 72.0 mAP 17 | $PYTHON eval_voc_classif.py --vocdir $VOCDIR --model $MODEL --split trainval --fc6_8 1 --train_batchnorm 1 18 | 19 | # without training the batch norm 20 | # 70.4 mAP 21 | $PYTHON eval_voc_classif.py --vocdir $VOCDIR --model $MODEL --split trainval --fc6_8 1 --train_batchnorm 0 22 | -------------------------------------------------------------------------------- /eval_voc_classif_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright (c) 2017-present, Facebook, Inc. 4 | # All rights reserved. 5 | # 6 | # This source code is licensed under the license found in the 7 | # LICENSE file in the root directory of this source tree. 8 | 9 | VOC="/private/home/bojanowski/data/VOCdevkit/VOC2007" 10 | CAFFE="/private/home/bojanowski/code/unsup-eval-pascal/voc-classification/caffe" 11 | 12 | # download code for pascal classification 13 | mkdir -p third-parties 14 | if [ ! -d third-parties/voc-classification ]; then 15 | git clone https://github.com/philkr/voc-classification.git third-parties/voc-classification 16 | fi 17 | 18 | # user config 19 | USERCONFIG=third-parties/voc-classification/src/user_config.py 20 | /bin/cat <$USERCONFIG 21 | from os import path 22 | # Path to caffe 23 | CAFFE_DIR = "${CAFFE}" 24 | # Path to the VOC 2007 or 2012 directory 25 | VOC_DIR = "${VOC}" 26 | EOM 27 | 28 | # change stepsize in train_cls.py 29 | sed -i -e "s/stepsize=10000/stepsize=20000/g" third-parties/voc-classification/src/train_cls.py 30 | sed -i -e "s/stepsize=5000/stepsize=20000/g" third-parties/voc-classification/src/train_cls.py 31 | 32 | # run transfer 33 | MODELROOT="${HOME}/deepcluster_models" 34 | PROTO="${MODELROOT}/alexnet/model.prototxt" 35 | MODEL="${MODELROOT}/alexnet/model.caffemodel" 36 | EXP="${HOME}/deepcluster_exp/pascal_all" 37 | LR=0.001 38 | BSZ=16 39 | 40 | mkdir -p ${EXP} 41 | 42 | python third-parties/voc-classification/src/train_cls.py ${PROTO} ${MODEL} --output ${EXP}/ \ 43 | --clip ThresholdBackward28 --train-from ConvNdBackward5 \ 44 | --random-from DropoutBackward23 --gpu 0 --no-mean \ 45 | -lr ${LR} -bs ${BSZ} -nit 150000 2>&1 | tee ${EXP}/output.txt 46 | -------------------------------------------------------------------------------- /eval_voc_classif_fc6_8.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | #!/bin/bash 8 | 9 | VOC='/private/home/bojanowski/data/VOCdevkit/VOC2007' 10 | CAFFE='/private/home/bojanowski/code/unsup-eval-pascal/voc-classification/caffe' 11 | 12 | # download code for pascal classification 13 | mkdir -p third-parties 14 | if [ ! -d third-parties/voc-classification ]; then 15 | git clone https://github.com/philkr/voc-classification.git third-parties/voc-classification 16 | fi 17 | 18 | # user config 19 | USERCONFIG=third-parties/voc-classification/src/user_config.py 20 | /bin/cat <$USERCONFIG 21 | from os import path 22 | # Path to caffe 23 | CAFFE_DIR = '${CAFFE}' 24 | # Path to the VOC 2007 or 2012 directory 25 | VOC_DIR = '${VOC}' 26 | EOM 27 | 28 | # change stepsize in train_cls.py 29 | sed -i -e 's/stepsize=10000/stepsize=5000/g' third-parties/voc-classification/src/train_cls.py 30 | sed -i -e 's/stepsize=20000/stepsize=5000/g' third-parties/voc-classification/src/train_cls.py 31 | 32 | # run transfer 33 | PROTO="/private/home/mathilde/model-to-release/alexnet/model.prototxt" 34 | MODEL="/private/home/mathilde/model-to-release/alexnet/model.caffemodel" 35 | LR=0.003 36 | BSZ=16 37 | EXP="" 38 | 39 | mkdir -p ${EXP} 40 | 41 | python third-parties/voc-classification/src/train_cls.py ${PROTO} ${MODEL} --output ${EXP}/ \ 42 | --clip ThresholdBackward28 --train-from DropoutBackward23 \ 43 | --random-from DropoutBackward23 --gpu 0 --no-mean \ 44 | -lr ${LR} -bs ${BSZ} -nit 150000 2>&1 | tee ${EXP}/output.txt 45 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | import argparse 8 | import os 9 | import pickle 10 | import time 11 | 12 | import faiss 13 | import numpy as np 14 | from sklearn.metrics.cluster import normalized_mutual_info_score 15 | import torch 16 | import torch.nn as nn 17 | import torch.nn.parallel 18 | import torch.backends.cudnn as cudnn 19 | import torch.optim 20 | import torch.utils.data 21 | import torchvision.transforms as transforms 22 | import torchvision.datasets as datasets 23 | 24 | import clustering 25 | import models 26 | from util import AverageMeter, Logger, UnifLabelSampler 27 | 28 | 29 | def parse_args(): 30 | parser = argparse.ArgumentParser(description='PyTorch Implementation of DeepCluster') 31 | 32 | parser.add_argument('data', metavar='DIR', help='path to dataset') 33 | parser.add_argument('--arch', '-a', type=str, metavar='ARCH', 34 | choices=['alexnet', 'vgg16'], default='alexnet', 35 | help='CNN architecture (default: alexnet)') 36 | parser.add_argument('--sobel', action='store_true', help='Sobel filtering') 37 | parser.add_argument('--clustering', type=str, choices=['Kmeans', 'PIC'], 38 | default='Kmeans', help='clustering algorithm (default: Kmeans)') 39 | parser.add_argument('--nmb_cluster', '--k', type=int, default=10000, 40 | help='number of cluster for k-means (default: 10000)') 41 | parser.add_argument('--lr', default=0.05, type=float, 42 | help='learning rate (default: 0.05)') 43 | parser.add_argument('--wd', default=-5, type=float, 44 | help='weight decay pow (default: -5)') 45 | parser.add_argument('--reassign', type=float, default=1., 46 | help="""how many epochs of training between two consecutive 47 | reassignments of clusters (default: 1)""") 48 | parser.add_argument('--workers', default=4, type=int, 49 | help='number of data loading workers (default: 4)') 50 | parser.add_argument('--epochs', type=int, default=200, 51 | help='number of total epochs to run (default: 200)') 52 | parser.add_argument('--start_epoch', default=0, type=int, 53 | help='manual epoch number (useful on restarts) (default: 0)') 54 | parser.add_argument('--batch', default=256, type=int, 55 | help='mini-batch size (default: 256)') 56 | parser.add_argument('--momentum', default=0.9, type=float, help='momentum (default: 0.9)') 57 | parser.add_argument('--resume', default='', type=str, metavar='PATH', 58 | help='path to checkpoint (default: None)') 59 | parser.add_argument('--checkpoints', type=int, default=25000, 60 | help='how many iterations between two checkpoints (default: 25000)') 61 | parser.add_argument('--seed', type=int, default=31, help='random seed (default: 31)') 62 | parser.add_argument('--exp', type=str, default='', help='path to exp folder') 63 | parser.add_argument('--verbose', action='store_true', help='chatty') 64 | return parser.parse_args() 65 | 66 | 67 | def main(args): 68 | # fix random seeds 69 | torch.manual_seed(args.seed) 70 | torch.cuda.manual_seed_all(args.seed) 71 | np.random.seed(args.seed) 72 | 73 | # CNN 74 | if args.verbose: 75 | print('Architecture: {}'.format(args.arch)) 76 | model = models.__dict__[args.arch](sobel=args.sobel) 77 | fd = int(model.top_layer.weight.size()[1]) 78 | model.top_layer = None 79 | model.features = torch.nn.DataParallel(model.features) 80 | model.cuda() 81 | cudnn.benchmark = True 82 | 83 | # create optimizer 84 | optimizer = torch.optim.SGD( 85 | filter(lambda x: x.requires_grad, model.parameters()), 86 | lr=args.lr, 87 | momentum=args.momentum, 88 | weight_decay=10**args.wd, 89 | ) 90 | 91 | # define loss function 92 | criterion = nn.CrossEntropyLoss().cuda() 93 | 94 | # optionally resume from a checkpoint 95 | if args.resume: 96 | if os.path.isfile(args.resume): 97 | print("=> loading checkpoint '{}'".format(args.resume)) 98 | checkpoint = torch.load(args.resume) 99 | args.start_epoch = checkpoint['epoch'] 100 | # remove top_layer parameters from checkpoint 101 | for key in checkpoint['state_dict']: 102 | if 'top_layer' in key: 103 | del checkpoint['state_dict'][key] 104 | model.load_state_dict(checkpoint['state_dict']) 105 | optimizer.load_state_dict(checkpoint['optimizer']) 106 | print("=> loaded checkpoint '{}' (epoch {})" 107 | .format(args.resume, checkpoint['epoch'])) 108 | else: 109 | print("=> no checkpoint found at '{}'".format(args.resume)) 110 | 111 | # creating checkpoint repo 112 | exp_check = os.path.join(args.exp, 'checkpoints') 113 | if not os.path.isdir(exp_check): 114 | os.makedirs(exp_check) 115 | 116 | # creating cluster assignments log 117 | cluster_log = Logger(os.path.join(args.exp, 'clusters')) 118 | 119 | # preprocessing of data 120 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 121 | std=[0.229, 0.224, 0.225]) 122 | tra = [transforms.Resize(256), 123 | transforms.CenterCrop(224), 124 | transforms.ToTensor(), 125 | normalize] 126 | 127 | # load the data 128 | end = time.time() 129 | dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra)) 130 | if args.verbose: 131 | print('Load dataset: {0:.2f} s'.format(time.time() - end)) 132 | 133 | dataloader = torch.utils.data.DataLoader(dataset, 134 | batch_size=args.batch, 135 | num_workers=args.workers, 136 | pin_memory=True) 137 | 138 | # clustering algorithm to use 139 | deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster) 140 | 141 | # training convnet with DeepCluster 142 | for epoch in range(args.start_epoch, args.epochs): 143 | end = time.time() 144 | 145 | # remove head 146 | model.top_layer = None 147 | model.classifier = nn.Sequential(*list(model.classifier.children())[:-1]) 148 | 149 | # get the features for the whole dataset 150 | features = compute_features(dataloader, model, len(dataset)) 151 | 152 | # cluster the features 153 | if args.verbose: 154 | print('Cluster the features') 155 | clustering_loss = deepcluster.cluster(features, verbose=args.verbose) 156 | 157 | # assign pseudo-labels 158 | if args.verbose: 159 | print('Assign pseudo labels') 160 | train_dataset = clustering.cluster_assign(deepcluster.images_lists, 161 | dataset.imgs) 162 | 163 | # uniformly sample per target 164 | sampler = UnifLabelSampler(int(args.reassign * len(train_dataset)), 165 | deepcluster.images_lists) 166 | 167 | train_dataloader = torch.utils.data.DataLoader( 168 | train_dataset, 169 | batch_size=args.batch, 170 | num_workers=args.workers, 171 | sampler=sampler, 172 | pin_memory=True, 173 | ) 174 | 175 | # set last fully connected layer 176 | mlp = list(model.classifier.children()) 177 | mlp.append(nn.ReLU(inplace=True).cuda()) 178 | model.classifier = nn.Sequential(*mlp) 179 | model.top_layer = nn.Linear(fd, len(deepcluster.images_lists)) 180 | model.top_layer.weight.data.normal_(0, 0.01) 181 | model.top_layer.bias.data.zero_() 182 | model.top_layer.cuda() 183 | 184 | # train network with clusters as pseudo-labels 185 | end = time.time() 186 | loss = train(train_dataloader, model, criterion, optimizer, epoch) 187 | 188 | # print log 189 | if args.verbose: 190 | print('###### Epoch [{0}] ###### \n' 191 | 'Time: {1:.3f} s\n' 192 | 'Clustering loss: {2:.3f} \n' 193 | 'ConvNet loss: {3:.3f}' 194 | .format(epoch, time.time() - end, clustering_loss, loss)) 195 | try: 196 | nmi = normalized_mutual_info_score( 197 | clustering.arrange_clustering(deepcluster.images_lists), 198 | clustering.arrange_clustering(cluster_log.data[-1]) 199 | ) 200 | print('NMI against previous assignment: {0:.3f}'.format(nmi)) 201 | except IndexError: 202 | pass 203 | print('####################### \n') 204 | # save running checkpoint 205 | torch.save({'epoch': epoch + 1, 206 | 'arch': args.arch, 207 | 'state_dict': model.state_dict(), 208 | 'optimizer' : optimizer.state_dict()}, 209 | os.path.join(args.exp, 'checkpoint.pth.tar')) 210 | 211 | # save cluster assignments 212 | cluster_log.log(deepcluster.images_lists) 213 | 214 | 215 | def train(loader, model, crit, opt, epoch): 216 | """Training of the CNN. 217 | Args: 218 | loader (torch.utils.data.DataLoader): Data loader 219 | model (nn.Module): CNN 220 | crit (torch.nn): loss 221 | opt (torch.optim.SGD): optimizer for every parameters with True 222 | requires_grad in model except top layer 223 | epoch (int) 224 | """ 225 | batch_time = AverageMeter() 226 | losses = AverageMeter() 227 | data_time = AverageMeter() 228 | forward_time = AverageMeter() 229 | backward_time = AverageMeter() 230 | 231 | # switch to train mode 232 | model.train() 233 | 234 | # create an optimizer for the last fc layer 235 | optimizer_tl = torch.optim.SGD( 236 | model.top_layer.parameters(), 237 | lr=args.lr, 238 | weight_decay=10**args.wd, 239 | ) 240 | 241 | end = time.time() 242 | for i, (input_tensor, target) in enumerate(loader): 243 | data_time.update(time.time() - end) 244 | 245 | # save checkpoint 246 | n = len(loader) * epoch + i 247 | if n % args.checkpoints == 0: 248 | path = os.path.join( 249 | args.exp, 250 | 'checkpoints', 251 | 'checkpoint_' + str(n / args.checkpoints) + '.pth.tar', 252 | ) 253 | if args.verbose: 254 | print('Save checkpoint at: {0}'.format(path)) 255 | torch.save({ 256 | 'epoch': epoch + 1, 257 | 'arch': args.arch, 258 | 'state_dict': model.state_dict(), 259 | 'optimizer' : opt.state_dict() 260 | }, path) 261 | 262 | target = target.cuda(async=True) 263 | input_var = torch.autograd.Variable(input_tensor.cuda()) 264 | target_var = torch.autograd.Variable(target) 265 | 266 | output = model(input_var) 267 | loss = crit(output, target_var) 268 | 269 | # record loss 270 | losses.update(loss.data[0], input_tensor.size(0)) 271 | 272 | # compute gradient and do SGD step 273 | opt.zero_grad() 274 | optimizer_tl.zero_grad() 275 | loss.backward() 276 | opt.step() 277 | optimizer_tl.step() 278 | 279 | # measure elapsed time 280 | batch_time.update(time.time() - end) 281 | end = time.time() 282 | 283 | if args.verbose and (i % 200) == 0: 284 | print('Epoch: [{0}][{1}/{2}]\t' 285 | 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 286 | 'Data: {data_time.val:.3f} ({data_time.avg:.3f})\t' 287 | 'Loss: {loss.val:.4f} ({loss.avg:.4f})' 288 | .format(epoch, i, len(loader), batch_time=batch_time, 289 | data_time=data_time, loss=losses)) 290 | 291 | return losses.avg 292 | 293 | def compute_features(dataloader, model, N): 294 | if args.verbose: 295 | print('Compute features') 296 | batch_time = AverageMeter() 297 | end = time.time() 298 | model.eval() 299 | # discard the label information in the dataloader 300 | for i, (input_tensor, _) in enumerate(dataloader): 301 | input_var = torch.autograd.Variable(input_tensor.cuda(), volatile=True) 302 | aux = model(input_var).data.cpu().numpy() 303 | 304 | if i == 0: 305 | features = np.zeros((N, aux.shape[1]), dtype='float32') 306 | 307 | aux = aux.astype('float32') 308 | if i < len(dataloader) - 1: 309 | features[i * args.batch: (i + 1) * args.batch] = aux 310 | else: 311 | # special treatment for final batch 312 | features[i * args.batch:] = aux 313 | 314 | # measure elapsed time 315 | batch_time.update(time.time() - end) 316 | end = time.time() 317 | 318 | if args.verbose and (i % 200) == 0: 319 | print('{0} / {1}\t' 320 | 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f})' 321 | .format(i, len(dataloader), batch_time=batch_time)) 322 | return features 323 | 324 | 325 | if __name__ == '__main__': 326 | args = parse_args() 327 | main(args) 328 | -------------------------------------------------------------------------------- /main.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | #!/bin/bash 8 | 9 | DIR="/datasets01/imagenet_full_size/061417/train" 10 | ARCH="alexnet" 11 | LR=0.05 12 | WD=-5 13 | K=10000 14 | WORKERS=12 15 | EXP="/private/home/${USER}/test/exp" 16 | PYTHON="/private/home/${USER}/test/conda/bin/python" 17 | 18 | mkdir -p ${EXP} 19 | 20 | CUDA_VISIBLE_DEVICES=0 ${PYTHON} main.py ${DIR} --exp ${EXP} --arch ${ARCH} \ 21 | --lr ${LR} --wd ${WD} --k ${K} --sobel --verbose --workers ${WORKERS} 22 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | from .vgg16 import * 8 | from .alexnet import * 9 | -------------------------------------------------------------------------------- /models/alexnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | import math 8 | 9 | import numpy as np 10 | import torch 11 | import torch.nn as nn 12 | 13 | __all__ = [ 'AlexNet', 'alexnet'] 14 | 15 | # (number of filters, kernel size, stride, pad) 16 | CFG = { 17 | '2012': [(96, 11, 4, 2), 'M', (256, 5, 1, 2), 'M', (384, 3, 1, 1), (384, 3, 1, 1), (256, 3, 1, 1), 'M'] 18 | } 19 | 20 | 21 | class AlexNet(nn.Module): 22 | def __init__(self, features, num_classes, sobel): 23 | super(AlexNet, self).__init__() 24 | self.features = features 25 | self.classifier = nn.Sequential(nn.Dropout(0.5), 26 | nn.Linear(256 * 6 * 6, 4096), 27 | nn.ReLU(inplace=True), 28 | nn.Dropout(0.5), 29 | nn.Linear(4096, 4096), 30 | nn.ReLU(inplace=True)) 31 | 32 | self.top_layer = nn.Linear(4096, num_classes) 33 | self._initialize_weights() 34 | 35 | if sobel: 36 | grayscale = nn.Conv2d(3, 1, kernel_size=1, stride=1, padding=0) 37 | grayscale.weight.data.fill_(1.0 / 3.0) 38 | grayscale.bias.data.zero_() 39 | sobel_filter = nn.Conv2d(1, 2, kernel_size=3, stride=1, padding=1) 40 | sobel_filter.weight.data[0, 0].copy_( 41 | torch.FloatTensor([[1, 0, -1], [2, 0, -2], [1, 0, -1]]) 42 | ) 43 | sobel_filter.weight.data[1, 0].copy_( 44 | torch.FloatTensor([[1, 2, 1], [0, 0, 0], [-1, -2, -1]]) 45 | ) 46 | sobel_filter.bias.data.zero_() 47 | self.sobel = nn.Sequential(grayscale, sobel_filter) 48 | for p in self.sobel.parameters(): 49 | p.requires_grad = False 50 | else: 51 | self.sobel = None 52 | 53 | def forward(self, x): 54 | if self.sobel: 55 | x = self.sobel(x) 56 | x = self.features(x) 57 | x = x.view(x.size(0), 256 * 6 * 6) 58 | x = self.classifier(x) 59 | if self.top_layer: 60 | x = self.top_layer(x) 61 | return x 62 | 63 | def _initialize_weights(self): 64 | for y, m in enumerate(self.modules()): 65 | if isinstance(m, nn.Conv2d): 66 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 67 | for i in range(m.out_channels): 68 | m.weight.data[i].normal_(0, math.sqrt(2. / n)) 69 | if m.bias is not None: 70 | m.bias.data.zero_() 71 | elif isinstance(m, nn.BatchNorm2d): 72 | m.weight.data.fill_(1) 73 | m.bias.data.zero_() 74 | elif isinstance(m, nn.Linear): 75 | m.weight.data.normal_(0, 0.01) 76 | m.bias.data.zero_() 77 | 78 | 79 | def make_layers_features(cfg, input_dim, bn): 80 | layers = [] 81 | in_channels = input_dim 82 | for v in cfg: 83 | if v == 'M': 84 | layers += [nn.MaxPool2d(kernel_size=3, stride=2)] 85 | else: 86 | conv2d = nn.Conv2d(in_channels, v[0], kernel_size=v[1], stride=v[2], padding=v[3]) 87 | if bn: 88 | layers += [conv2d, nn.BatchNorm2d(v[0]), nn.ReLU(inplace=True)] 89 | else: 90 | layers += [conv2d, nn.ReLU(inplace=True)] 91 | in_channels = v[0] 92 | return nn.Sequential(*layers) 93 | 94 | 95 | def alexnet(sobel=False, bn=True, out=1000): 96 | dim = 2 + int(not sobel) 97 | model = AlexNet(make_layers_features(CFG['2012'], dim, bn=bn), out, sobel) 98 | return model 99 | -------------------------------------------------------------------------------- /models/vgg16.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | import torch 8 | import torch.nn as nn 9 | import math 10 | from random import random as rd 11 | 12 | __all__ = [ 'VGG', 'vgg16'] 13 | 14 | 15 | class VGG(nn.Module): 16 | 17 | def __init__(self, features, num_classes, sobel): 18 | super(VGG, self).__init__() 19 | self.features = features 20 | self.classifier = nn.Sequential( 21 | nn.Linear(512 * 7 * 7, 4096), 22 | nn.ReLU(True), 23 | nn.Dropout(0.5), 24 | nn.Linear(4096, 4096), 25 | nn.ReLU(True) 26 | ) 27 | self.top_layer = nn.Linear(4096, num_classes) 28 | self._initialize_weights() 29 | if sobel: 30 | grayscale = nn.Conv2d(3, 1, kernel_size=1, stride=1, padding=0) 31 | grayscale.weight.data.fill_(1.0 / 3.0) 32 | grayscale.bias.data.zero_() 33 | sobel_filter = nn.Conv2d(1, 2, kernel_size=3, stride=1, padding=1) 34 | sobel_filter.weight.data[0,0].copy_( 35 | torch.FloatTensor([[1, 0, -1], [2, 0, -2], [1, 0, -1]]) 36 | ) 37 | sobel_filter.weight.data[1,0].copy_( 38 | torch.FloatTensor([[1, 2, 1], [0, 0, 0], [-1, -2, -1]]) 39 | ) 40 | sobel_filter.bias.data.zero_() 41 | self.sobel = nn.Sequential(grayscale, sobel_filter) 42 | for p in self.sobel.parameters(): 43 | p.requires_grad = False 44 | else: 45 | self.sobel = None 46 | 47 | def forward(self, x): 48 | if self.sobel: 49 | x = self.sobel(x) 50 | x = self.features(x) 51 | x = x.view(x.size(0), -1) 52 | x = self.classifier(x) 53 | if self.top_layer: 54 | x = self.top_layer(x) 55 | return x 56 | 57 | def _initialize_weights(self): 58 | for y,m in enumerate(self.modules()): 59 | if isinstance(m, nn.Conv2d): 60 | #print(y) 61 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 62 | for i in range(m.out_channels): 63 | m.weight.data[i].normal_(0, math.sqrt(2. / n)) 64 | if m.bias is not None: 65 | m.bias.data.zero_() 66 | elif isinstance(m, nn.BatchNorm2d): 67 | m.weight.data.fill_(1) 68 | m.bias.data.zero_() 69 | elif isinstance(m, nn.Linear): 70 | m.weight.data.normal_(0, 0.01) 71 | m.bias.data.zero_() 72 | 73 | 74 | def make_layers(input_dim, batch_norm): 75 | layers = [] 76 | in_channels = input_dim 77 | cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'] 78 | for v in cfg: 79 | if v == 'M': 80 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 81 | else: 82 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) 83 | if batch_norm: 84 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] 85 | else: 86 | layers += [conv2d, nn.ReLU(inplace=True)] 87 | in_channels = v 88 | return nn.Sequential(*layers) 89 | 90 | 91 | def vgg16(sobel=False, bn=True, out=1000): 92 | dim = 2 + int(not sobel) 93 | model = VGG(make_layers(dim, bn), out, sobel) 94 | return model 95 | -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | import os 8 | import pickle 9 | 10 | import numpy as np 11 | import torch 12 | from torch.utils.data.sampler import Sampler 13 | 14 | import models 15 | 16 | 17 | def load_model(path): 18 | """Loads model and return it without DataParallel table.""" 19 | if os.path.isfile(path): 20 | print("=> loading checkpoint '{}'".format(path)) 21 | checkpoint = torch.load(path) 22 | 23 | # size of the top layer 24 | N = checkpoint['state_dict']['top_layer.bias'].size() 25 | 26 | # build skeleton of the model 27 | sob = 'sobel.0.weight' in checkpoint['state_dict'].keys() 28 | model = models.__dict__[checkpoint['arch']](sobel=sob, out=int(N[0])) 29 | 30 | # deal with a dataparallel table 31 | def rename_key(key): 32 | if not 'module' in key: 33 | return key 34 | return ''.join(key.split('.module')) 35 | 36 | checkpoint['state_dict'] = {rename_key(key): val 37 | for key, val 38 | in checkpoint['state_dict'].items()} 39 | 40 | # load weights 41 | model.load_state_dict(checkpoint['state_dict']) 42 | print("Loaded") 43 | else: 44 | model = None 45 | print("=> no checkpoint found at '{}'".format(path)) 46 | return model 47 | 48 | 49 | class UnifLabelSampler(Sampler): 50 | """Samples elements uniformely accross pseudolabels. 51 | Args: 52 | N (int): size of returned iterator. 53 | images_lists: dict of key (target), value (list of data with this target) 54 | """ 55 | 56 | def __init__(self, N, images_lists): 57 | self.N = N 58 | self.images_lists = images_lists 59 | self.indexes = self.generate_indexes_epoch() 60 | 61 | def generate_indexes_epoch(self): 62 | nmb_non_empty_clusters = 0 63 | for i in range(len(self.images_lists)): 64 | if len(self.images_lists[i]) != 0: 65 | nmb_non_empty_clusters += 1 66 | 67 | size_per_pseudolabel = int(self.N / nmb_non_empty_clusters) + 1 68 | res = np.array([]) 69 | 70 | for i in range(len(self.images_lists)): 71 | # skip empty clusters 72 | if len(self.images_lists[i]) == 0: 73 | continue 74 | indexes = np.random.choice( 75 | self.images_lists[i], 76 | size_per_pseudolabel, 77 | replace=(len(self.images_lists[i]) <= size_per_pseudolabel) 78 | ) 79 | res = np.concatenate((res, indexes)) 80 | 81 | np.random.shuffle(res) 82 | res = list(res.astype('int')) 83 | if len(res) >= self.N: 84 | return res[:self.N] 85 | res += res[: (self.N - len(res))] 86 | return res 87 | 88 | def __iter__(self): 89 | return iter(self.indexes) 90 | 91 | def __len__(self): 92 | return len(self.indexes) 93 | 94 | 95 | class AverageMeter(object): 96 | """Computes and stores the average and current value""" 97 | def __init__(self): 98 | self.reset() 99 | 100 | def reset(self): 101 | self.val = 0 102 | self.avg = 0 103 | self.sum = 0 104 | self.count = 0 105 | 106 | def update(self, val, n=1): 107 | self.val = val 108 | self.sum += val * n 109 | self.count += n 110 | self.avg = self.sum / self.count 111 | 112 | 113 | def learning_rate_decay(optimizer, t, lr_0): 114 | for param_group in optimizer.param_groups: 115 | lr = lr_0 / np.sqrt(1 + lr_0 * param_group['weight_decay'] * t) 116 | param_group['lr'] = lr 117 | 118 | 119 | class Logger(object): 120 | """ Class to update every epoch to keep trace of the results 121 | Methods: 122 | - log() log and save 123 | """ 124 | 125 | def __init__(self, path): 126 | self.path = path 127 | self.data = [] 128 | 129 | def log(self, train_point): 130 | self.data.append(train_point) 131 | with open(os.path.join(self.path), 'wb') as fp: 132 | pickle.dump(self.data, fp, -1) 133 | -------------------------------------------------------------------------------- /visu/activ-retrieval.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | import argparse 8 | import os 9 | from shutil import copyfile 10 | import sys 11 | 12 | import numpy as np 13 | from PIL import Image 14 | import torch 15 | import torch.nn as nn 16 | import torchvision.transforms as transforms 17 | import torchvision.datasets as datasets 18 | 19 | sys.path.insert(0, '..') 20 | from util import load_model 21 | 22 | 23 | def parse_args(): 24 | parser = argparse.ArgumentParser(description='Retrieve images with maximal activations') 25 | parser.add_argument('--data', type=str, help='path to dataset') 26 | parser.add_argument('--model', type=str, help='Model') 27 | parser.add_argument('--conv', type=int, default=1, help='convolutional layer') 28 | parser.add_argument('--exp', type=str, default='', help='path to res') 29 | parser.add_argument('--count', type=int, default=9, help='save this many images') 30 | parser.add_argument('--workers', default=4, type=int, 31 | help='number of data loading workers (default: 4)') 32 | return parser.parse_args() 33 | 34 | 35 | def main(args): 36 | # create repo 37 | repo = os.path.join(args.exp, 'conv' + str(args.conv)) 38 | if not os.path.isdir(repo): 39 | os.makedirs(repo) 40 | 41 | # build model 42 | model = load_model(args.model) 43 | model.cuda() 44 | for params in model.parameters(): 45 | params.requires_grad = False 46 | model.eval() 47 | 48 | #load data 49 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 50 | std=[0.229, 0.224, 0.225]) 51 | tra = [transforms.Resize(256), 52 | transforms.CenterCrop(224), 53 | transforms.ToTensor(), 54 | normalize] 55 | 56 | # dataset 57 | dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra)) 58 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=256, 59 | num_workers=args.workers) 60 | 61 | # keys are filters and value are arrays with activation scores for the whole dataset 62 | layers_activations = {} 63 | for i, (input_tensor, _) in enumerate(dataloader): 64 | input_var = torch.autograd.Variable(input_tensor.cuda(), volatile=True) 65 | activations = forward(model, args.conv, input_var) 66 | 67 | if i == 0: 68 | layers_activations = {filt: np.zeros(len(dataset)) for filt in activations} 69 | if i < len(dataloader) - 1: 70 | e_idx = (i + 1) * 256 71 | else: 72 | e_idx = len(dataset) 73 | s_idx = i * 256 74 | for filt in activations: 75 | layers_activations[filt][s_idx: e_idx] = activations[filt].cpu().data.numpy() 76 | 77 | if i % 100 == 0: 78 | print('{0}/{1}'.format(i, len(dataloader))) 79 | 80 | # save top N images for each filter 81 | for filt in layers_activations: 82 | repofilter = os.path.join(repo, filt) 83 | if not os.path.isdir(repofilter): 84 | os.mkdir(repofilter) 85 | top = np.argsort(layers_activations[filt])[::-1] 86 | if args.count > 0: 87 | top = top[:args.count] 88 | 89 | for pos, img in enumerate(top): 90 | src, _ = dataset.imgs[img] 91 | copyfile(src, os.path.join(repofilter, "{}_{}".format(pos, src.split('/')[-1]))) 92 | 93 | 94 | def forward(model, my_layer, x): 95 | if model.sobel is not None: 96 | x = model.sobel(x) 97 | layer = 1 98 | res = {} 99 | for m in model.features.modules(): 100 | if not isinstance(m, nn.Sequential): 101 | x = m(x) 102 | if isinstance(m, nn.ReLU): 103 | if layer == my_layer: 104 | for channel in range(int(x.size()[1])): 105 | key = 'layer' + str(layer) + '-channel' + str(channel) 106 | res[key] = torch.squeeze(x.mean(3).mean(2))[:, channel] 107 | return res 108 | layer = layer + 1 109 | return res 110 | 111 | 112 | if __name__ == '__main__': 113 | args = parse_args() 114 | main(args) 115 | -------------------------------------------------------------------------------- /visu/activ-retrieval.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | #!/bin/bash 8 | 9 | MODEL="$HOME/deepcluster_models/vgg16/checkpoint.pth.tar" 10 | EXP="$HOME/temp/" 11 | CONV=5 12 | DATA='/datasets01/imagenet_full_size/061417/val' 13 | 14 | python activ-retrieval.py --model ${MODEL} --exp ${EXP} --conv ${CONV} --data ${DATA} 15 | -------------------------------------------------------------------------------- /visu/gradient_ascent.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | import argparse 8 | import os 9 | from scipy.ndimage.filters import gaussian_filter 10 | import sys 11 | 12 | import numpy as np 13 | from PIL import Image 14 | import torch 15 | import torch.nn as nn 16 | import torchvision 17 | import torchvision.transforms as transforms 18 | 19 | sys.path.insert(0, '..') 20 | from util import load_model 21 | 22 | parser = argparse.ArgumentParser(description='Gradient ascent visualisation') 23 | parser.add_argument('--model', type=str, help='Model') 24 | parser.add_argument('--arch', type=str, default='alexnet', choices=['alexnet', 'vgg16'], help='arch') 25 | parser.add_argument('--conv', type=int, default=1, help='convolutional layer') 26 | parser.add_argument('--exp', type=str, default='', help='path to res') 27 | parser.add_argument('--lr', type=float, default=3, help='learning rate (default: 3)') 28 | parser.add_argument('--wd', type=float, default=0.00001, help='weight decay (default: 10^-5)') 29 | parser.add_argument('--sig', type=float, default=0.3, help='gaussian blur (default: 0.3)') 30 | parser.add_argument('--step', type=int, default=5, help='number of iter between gaussian blurs (default: 5)') 31 | parser.add_argument('--niter', type=int, default=1000, help='total number of iterations (default: 1000)') 32 | parser.add_argument('--idim', type=int, default=224, help='size of input image (default: 224)') 33 | 34 | CONV = {'alexnet': [96, 256, 384, 384, 256], 35 | 'vgg16': [64, 64, 128, 128, 256, 256, 256, 512, 512, 512, 512, 512, 512]} 36 | 37 | 38 | def main(): 39 | args = parser.parse_args() 40 | 41 | # sanity check 42 | if args.arch == 'alexnet': 43 | assert args.conv < 6 44 | elif args.arch == 'vgg16': 45 | assert args.conv < 14 46 | 47 | # create repo 48 | repo = os.path.join(args.exp, 'conv' + str(args.conv)) 49 | if not os.path.isdir(repo): 50 | os.makedirs(repo) 51 | 52 | # build model 53 | model = load_model(args.model) 54 | model.cuda() 55 | for params in model.parameters(): 56 | params.requires_grad = False 57 | model.eval() 58 | 59 | def gradient_ascent(f): 60 | print f, 61 | sys.stdout.flush() 62 | fname_out = '{0}/layer{1}-channel{2}.jpeg'.format(repo, args.conv, f) 63 | 64 | img_noise = np.random.normal(size=(args.idim, args.idim, 3)) * 20 + 128 65 | img_noise = img_noise.astype('float32') 66 | inp = transforms.ToTensor()(img_noise) 67 | inp = torch.unsqueeze(inp, 0) 68 | 69 | for it in range(args.niter): 70 | x = torch.autograd.Variable(inp.cuda(), requires_grad=True) 71 | out = forward(model, args.conv-1, f, x) 72 | criterion = nn.CrossEntropyLoss() 73 | filt_var = torch.autograd.Variable(torch.ones(1).long()*f).cuda() 74 | output = out.mean(3).mean(2) 75 | loss = - criterion(output, filt_var) - args.wd*torch.norm(x)**2 76 | 77 | # compute gradient 78 | loss.backward() 79 | 80 | # normalize gradient 81 | grads = x.grad.data.cpu() 82 | grads = grads.div(torch.norm(grads)+1e-8) 83 | 84 | # apply gradient 85 | inp = inp.add(args.lr*grads) 86 | 87 | # gaussian blur 88 | if it%args.step == 0: 89 | inp = gaussian_filter(torch.squeeze(inp).numpy().transpose((2, 1, 0)), 90 | sigma=(args.sig, args.sig, 0)) 91 | inp = torch.unsqueeze(torch.from_numpy(inp).float().transpose(2, 0), 0) 92 | 93 | # save image at the last iteration 94 | if it == args.niter - 1: 95 | a = deprocess_image(inp.numpy()) 96 | Image.fromarray(a).save(fname_out) 97 | 98 | map(gradient_ascent, range(CONV[args.arch][args.conv-1])) 99 | 100 | 101 | def deprocess_image(x): 102 | x = x[0, :, :, :] 103 | # normalize tensor: center on 0., ensure std is 0.1 104 | x -= x.mean() 105 | x /= (x.std() + 1e-5) 106 | x *= 0.1 107 | 108 | # clip to [0, 1] 109 | x += 0.5 110 | x = np.clip(x, 0, 1) 111 | 112 | # convert to RGB array 113 | x *= 255 114 | x = x.transpose((1, 2, 0)) 115 | x = np.clip(x, 0, 255).astype('uint8') 116 | return x 117 | 118 | 119 | def forward(model, layer, channel, x): 120 | if model.sobel is not None: 121 | x = model.sobel(x) 122 | count = 0 123 | for y, m in enumerate(model.features.modules()): 124 | if not isinstance(m, nn.Sequential): 125 | x = m(x) 126 | if isinstance(m, nn.Conv2d): 127 | if count == layer: 128 | res = x 129 | if isinstance(m, nn.ReLU): 130 | if count == layer: 131 | # check if channel is not activated 132 | if x[:, channel, :, :].mean().data.cpu().numpy() == 0: 133 | return res 134 | return x 135 | count = count + 1 136 | 137 | 138 | if __name__ == '__main__': 139 | main() 140 | -------------------------------------------------------------------------------- /visu/gradient_ascent.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | #!/bin/bash 8 | 9 | MODEL='/private/home/mathilde/model-to-release/alexnet/checkpoint.pth.tar' 10 | ARCH='vgg16' 11 | EXP='/private/home/mathilde/temp' 12 | CONV=6 13 | 14 | python gradient_ascent.py --model ${MODEL} --exp ${EXP} --conv ${CONV} --arch ${ARCH} 15 | --------------------------------------------------------------------------------