├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Docker
    ├── Dockerfile
    ├── README.md
    ├── build.sh
    └── run.sh
├── LICENSE
├── README.md
├── __init__.py
├── clustering.py
├── download_model.sh
├── eval_linear.py
├── eval_linear.sh
├── eval_retrieval.py
├── eval_retrieval.sh
├── eval_voc_classif.py
├── eval_voc_classif.sh
├── eval_voc_classif_all.sh
├── eval_voc_classif_fc6_8.sh
├── main.py
├── main.sh
├── models
    ├── __init__.py
    ├── alexnet.py
    └── vgg16.py
├── util.py
└── visu
    ├── activ-retrieval.py
    ├── activ-retrieval.sh
    ├── gradient_ascent.py
    └── gradient_ascent.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | **/.*.swp
2 | *.ipynb
3 | **/*.pyc
4 | *.pyc
5 | .ipynb_checkpoints
6 | third-parties/
7 | third-parties/*
8 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 | 
3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
4 | Please read the [full text](https://code.fb.com/codeofconduct/)
5 | so that you can understand what actions will and will not be tolerated.
6 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing 
2 | 
3 | In the context of this project, we do not expect pull requests. 
4 | If you find a bug, or would like to suggest an improvement, please open an issue.
5 | 


--------------------------------------------------------------------------------
/Docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | #FROM ubuntu:14.04
 2 | #FROM anibali/pytorch:cuda-8.0
 3 | FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04
 4 | 
 5 | WORKDIR /usr/src/app
 6 | 
 7 | RUN apt-get update && apt-get install -y \
 8 |     python2.7 \
 9 |     python-pip \
10 |     git \
11 |     vim \
12 |     wget \
13 |     curl \
14 |     cmake \
15 |     build-essential
16 | 
17 | RUN curl -o miniconda.sh -O https://repo.anaconda.com/miniconda/Miniconda2-latest-Linux-x86_64.sh &&\
18 |     chmod +x miniconda.sh &&\
19 |     ./miniconda.sh -b -p /opt/conda &&\
20 |     rm miniconda.sh
21 | 
22 | RUN export PATH=$PATH:/opt/conda/bin &&\
23 |     conda create -n pytorch27 python=2
24 | 
25 | # Make RUN commands use the new environment:
26 | SHELL ["conda", "run", "-n", "pytorch27", "/bin/bash", "-c"]
27 | ENV PATH="/opt/conda/bin:${PATH}"
28 | 
29 | RUN export CMAKE_PREFIX_PATH=/opt/conda/ &&\
30 |     conda install numpy mkl setuptools cmake cffi scikit-learn &&\
31 |     apt-get -y install gcc libblas-dev liblapack-dev &&\
32 |     conda install -c soumith magma-cuda80 &&\
33 |     pip install torchvision==0.1.8
34 | 
35 | RUN wget https://github.com/pytorch/pytorch/archive/v0.1.8.tar.gz &&\
36 |     tar -xzf v0.1.8.tar.gz &&\
37 |     rm v0.1.8.tar.gz &&\
38 |     cd pytorch-0.1.8 &&\
39 |     pip install -r requirements.txt &&\
40 |     python setup.py install 
41 | 
42 | RUN wget https://github.com/facebookresearch/faiss/archive/v1.3.0.tar.gz &&\
43 |     tar -xzf v1.3.0.tar.gz &&\
44 |     rm v1.3.0.tar.gz &&\
45 |     cd faiss-1.3.0 &&\
46 |     ./configure &&\
47 |     make &&\
48 |     make install &&\
49 |     cd gpu &&\
50 |     make -j &&\
51 |     cd ../python &&\
52 |     make _swigfaiss_gpu.so &&\
53 |     cd ../ &&\
54 |     make py &&\
55 |     conda init bash
56 | 
57 | RUN pip install bpython future
58 | RUN echo "conda activate pytorch27" >> /root/.bashrc
59 | 
60 | ENV PYTHONPATH="/usr/src/app/faiss-1.3.0/python/:${PYTHONPATH}"
61 | COPY . .
62 | 


--------------------------------------------------------------------------------
/Docker/README.md:
--------------------------------------------------------------------------------
1 | # Installation Using Docker
2 | 
3 | 1. Run `bash build.sh`
4 | 1. Run `bash run.sh`
5 | 


--------------------------------------------------------------------------------
/Docker/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | docker build . -t torch
4 | 


--------------------------------------------------------------------------------
/Docker/run.sh:
--------------------------------------------------------------------------------
1 | docker run --gpus all -it --name deepcluster  torch /bin/bash
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Attribution-NonCommercial 4.0 International
  2 | 
  3 | =======================================================================
  4 | 
  5 | Creative Commons Corporation ("Creative Commons") is not a law firm and
  6 | does not provide legal services or legal advice. Distribution of
  7 | Creative Commons public licenses does not create a lawyer-client or
  8 | other relationship. Creative Commons makes its licenses and related
  9 | information available on an "as-is" basis. Creative Commons gives no
 10 | warranties regarding its licenses, any material licensed under their
 11 | terms and conditions, or any related information. Creative Commons
 12 | disclaims all liability for damages resulting from their use to the
 13 | fullest extent possible.
 14 | 
 15 | Using Creative Commons Public Licenses
 16 | 
 17 | Creative Commons public licenses provide a standard set of terms and
 18 | conditions that creators and other rights holders may use to share
 19 | original works of authorship and other material subject to copyright
 20 | and certain other rights specified in the public license below. The
 21 | following considerations are for informational purposes only, are not
 22 | exhaustive, and do not form part of our licenses.
 23 | 
 24 |      Considerations for licensors: Our public licenses are
 25 |      intended for use by those authorized to give the public
 26 |      permission to use material in ways otherwise restricted by
 27 |      copyright and certain other rights. Our licenses are
 28 |      irrevocable. Licensors should read and understand the terms
 29 |      and conditions of the license they choose before applying it.
 30 |      Licensors should also secure all rights necessary before
 31 |      applying our licenses so that the public can reuse the
 32 |      material as expected. Licensors should clearly mark any
 33 |      material not subject to the license. This includes other CC-
 34 |      licensed material, or material used under an exception or
 35 |      limitation to copyright. More considerations for licensors:
 36 | 	wiki.creativecommons.org/Considerations_for_licensors
 37 | 
 38 |      Considerations for the public: By using one of our public
 39 |      licenses, a licensor grants the public permission to use the
 40 |      licensed material under specified terms and conditions. If
 41 |      the licensor's permission is not necessary for any reason--for
 42 |      example, because of any applicable exception or limitation to
 43 |      copyright--then that use is not regulated by the license. Our
 44 |      licenses grant only permissions under copyright and certain
 45 |      other rights that a licensor has authority to grant. Use of
 46 |      the licensed material may still be restricted for other
 47 |      reasons, including because others have copyright or other
 48 |      rights in the material. A licensor may make special requests,
 49 |      such as asking that all changes be marked or described.
 50 |      Although not required by our licenses, you are encouraged to
 51 |      respect those requests where reasonable. More_considerations
 52 |      for the public:
 53 | 	wiki.creativecommons.org/Considerations_for_licensees
 54 | 
 55 | =======================================================================
 56 | 
 57 | Creative Commons Attribution-NonCommercial 4.0 International Public
 58 | License
 59 | 
 60 | By exercising the Licensed Rights (defined below), You accept and agree
 61 | to be bound by the terms and conditions of this Creative Commons
 62 | Attribution-NonCommercial 4.0 International Public License ("Public
 63 | License"). To the extent this Public License may be interpreted as a
 64 | contract, You are granted the Licensed Rights in consideration of Your
 65 | acceptance of these terms and conditions, and the Licensor grants You
 66 | such rights in consideration of benefits the Licensor receives from
 67 | making the Licensed Material available under these terms and
 68 | conditions.
 69 | 
 70 | Section 1 -- Definitions.
 71 | 
 72 |   a. Adapted Material means material subject to Copyright and Similar
 73 |      Rights that is derived from or based upon the Licensed Material
 74 |      and in which the Licensed Material is translated, altered,
 75 |      arranged, transformed, or otherwise modified in a manner requiring
 76 |      permission under the Copyright and Similar Rights held by the
 77 |      Licensor. For purposes of this Public License, where the Licensed
 78 |      Material is a musical work, performance, or sound recording,
 79 |      Adapted Material is always produced where the Licensed Material is
 80 |      synched in timed relation with a moving image.
 81 | 
 82 |   b. Adapter's License means the license You apply to Your Copyright
 83 |      and Similar Rights in Your contributions to Adapted Material in
 84 |      accordance with the terms and conditions of this Public License.
 85 | 
 86 |   c. Copyright and Similar Rights means copyright and/or similar rights
 87 |      closely related to copyright including, without limitation,
 88 |      performance, broadcast, sound recording, and Sui Generis Database
 89 |      Rights, without regard to how the rights are labeled or
 90 |      categorized. For purposes of this Public License, the rights
 91 |      specified in Section 2(b)(1)-(2) are not Copyright and Similar
 92 |      Rights.
 93 |   d. Effective Technological Measures means those measures that, in the
 94 |      absence of proper authority, may not be circumvented under laws
 95 |      fulfilling obligations under Article 11 of the WIPO Copyright
 96 |      Treaty adopted on December 20, 1996, and/or similar international
 97 |      agreements.
 98 | 
 99 |   e. Exceptions and Limitations means fair use, fair dealing, and/or
100 |      any other exception or limitation to Copyright and Similar Rights
101 |      that applies to Your use of the Licensed Material.
102 | 
103 |   f. Licensed Material means the artistic or literary work, database,
104 |      or other material to which the Licensor applied this Public
105 |      License.
106 | 
107 |   g. Licensed Rights means the rights granted to You subject to the
108 |      terms and conditions of this Public License, which are limited to
109 |      all Copyright and Similar Rights that apply to Your use of the
110 |      Licensed Material and that the Licensor has authority to license.
111 | 
112 |   h. Licensor means the individual(s) or entity(ies) granting rights
113 |      under this Public License.
114 | 
115 |   i. NonCommercial means not primarily intended for or directed towards
116 |      commercial advantage or monetary compensation. For purposes of
117 |      this Public License, the exchange of the Licensed Material for
118 |      other material subject to Copyright and Similar Rights by digital
119 |      file-sharing or similar means is NonCommercial provided there is
120 |      no payment of monetary compensation in connection with the
121 |      exchange.
122 | 
123 |   j. Share means to provide material to the public by any means or
124 |      process that requires permission under the Licensed Rights, such
125 |      as reproduction, public display, public performance, distribution,
126 |      dissemination, communication, or importation, and to make material
127 |      available to the public including in ways that members of the
128 |      public may access the material from a place and at a time
129 |      individually chosen by them.
130 | 
131 |   k. Sui Generis Database Rights means rights other than copyright
132 |      resulting from Directive 96/9/EC of the European Parliament and of
133 |      the Council of 11 March 1996 on the legal protection of databases,
134 |      as amended and/or succeeded, as well as other essentially
135 |      equivalent rights anywhere in the world.
136 | 
137 |   l. You means the individual or entity exercising the Licensed Rights
138 |      under this Public License. Your has a corresponding meaning.
139 | 
140 | Section 2 -- Scope.
141 | 
142 |   a. License grant.
143 | 
144 |        1. Subject to the terms and conditions of this Public License,
145 |           the Licensor hereby grants You a worldwide, royalty-free,
146 |           non-sublicensable, non-exclusive, irrevocable license to
147 |           exercise the Licensed Rights in the Licensed Material to:
148 | 
149 |             a. reproduce and Share the Licensed Material, in whole or
150 |                in part, for NonCommercial purposes only; and
151 | 
152 |             b. produce, reproduce, and Share Adapted Material for
153 |                NonCommercial purposes only.
154 | 
155 |        2. Exceptions and Limitations. For the avoidance of doubt, where
156 |           Exceptions and Limitations apply to Your use, this Public
157 |           License does not apply, and You do not need to comply with
158 |           its terms and conditions.
159 | 
160 |        3. Term. The term of this Public License is specified in Section
161 |           6(a).
162 | 
163 |        4. Media and formats; technical modifications allowed. The
164 |           Licensor authorizes You to exercise the Licensed Rights in
165 |           all media and formats whether now known or hereafter created,
166 |           and to make technical modifications necessary to do so. The
167 |           Licensor waives and/or agrees not to assert any right or
168 |           authority to forbid You from making technical modifications
169 |           necessary to exercise the Licensed Rights, including
170 |           technical modifications necessary to circumvent Effective
171 |           Technological Measures. For purposes of this Public License,
172 |           simply making modifications authorized by this Section 2(a)
173 |           (4) never produces Adapted Material.
174 | 
175 |        5. Downstream recipients.
176 | 
177 |             a. Offer from the Licensor -- Licensed Material. Every
178 |                recipient of the Licensed Material automatically
179 |                receives an offer from the Licensor to exercise the
180 |                Licensed Rights under the terms and conditions of this
181 |                Public License.
182 | 
183 |             b. No downstream restrictions. You may not offer or impose
184 |                any additional or different terms or conditions on, or
185 |                apply any Effective Technological Measures to, the
186 |                Licensed Material if doing so restricts exercise of the
187 |                Licensed Rights by any recipient of the Licensed
188 |                Material.
189 | 
190 |        6. No endorsement. Nothing in this Public License constitutes or
191 |           may be construed as permission to assert or imply that You
192 |           are, or that Your use of the Licensed Material is, connected
193 |           with, or sponsored, endorsed, or granted official status by,
194 |           the Licensor or others designated to receive attribution as
195 |           provided in Section 3(a)(1)(A)(i).
196 | 
197 |   b. Other rights.
198 | 
199 |        1. Moral rights, such as the right of integrity, are not
200 |           licensed under this Public License, nor are publicity,
201 |           privacy, and/or other similar personality rights; however, to
202 |           the extent possible, the Licensor waives and/or agrees not to
203 |           assert any such rights held by the Licensor to the limited
204 |           extent necessary to allow You to exercise the Licensed
205 |           Rights, but not otherwise.
206 | 
207 |        2. Patent and trademark rights are not licensed under this
208 |           Public License.
209 | 
210 |        3. To the extent possible, the Licensor waives any right to
211 |           collect royalties from You for the exercise of the Licensed
212 |           Rights, whether directly or through a collecting society
213 |           under any voluntary or waivable statutory or compulsory
214 |           licensing scheme. In all other cases the Licensor expressly
215 |           reserves any right to collect such royalties, including when
216 |           the Licensed Material is used other than for NonCommercial
217 |           purposes.
218 | 
219 | Section 3 -- License Conditions.
220 | 
221 | Your exercise of the Licensed Rights is expressly made subject to the
222 | following conditions.
223 | 
224 |   a. Attribution.
225 | 
226 |        1. If You Share the Licensed Material (including in modified
227 |           form), You must:
228 | 
229 |             a. retain the following if it is supplied by the Licensor
230 |                with the Licensed Material:
231 | 
232 |                  i. identification of the creator(s) of the Licensed
233 |                     Material and any others designated to receive
234 |                     attribution, in any reasonable manner requested by
235 |                     the Licensor (including by pseudonym if
236 |                     designated);
237 | 
238 |                 ii. a copyright notice;
239 | 
240 |                iii. a notice that refers to this Public License;
241 | 
242 |                 iv. a notice that refers to the disclaimer of
243 |                     warranties;
244 | 
245 |                  v. a URI or hyperlink to the Licensed Material to the
246 |                     extent reasonably practicable;
247 | 
248 |             b. indicate if You modified the Licensed Material and
249 |                retain an indication of any previous modifications; and
250 | 
251 |             c. indicate the Licensed Material is licensed under this
252 |                Public License, and include the text of, or the URI or
253 |                hyperlink to, this Public License.
254 | 
255 |        2. You may satisfy the conditions in Section 3(a)(1) in any
256 |           reasonable manner based on the medium, means, and context in
257 |           which You Share the Licensed Material. For example, it may be
258 |           reasonable to satisfy the conditions by providing a URI or
259 |           hyperlink to a resource that includes the required
260 |           information.
261 | 
262 |        3. If requested by the Licensor, You must remove any of the
263 |           information required by Section 3(a)(1)(A) to the extent
264 |           reasonably practicable.
265 | 
266 |        4. If You Share Adapted Material You produce, the Adapter's
267 |           License You apply must not prevent recipients of the Adapted
268 |           Material from complying with this Public License.
269 | 
270 | Section 4 -- Sui Generis Database Rights.
271 | 
272 | Where the Licensed Rights include Sui Generis Database Rights that
273 | apply to Your use of the Licensed Material:
274 | 
275 |   a. for the avoidance of doubt, Section 2(a)(1) grants You the right
276 |      to extract, reuse, reproduce, and Share all or a substantial
277 |      portion of the contents of the database for NonCommercial purposes
278 |      only;
279 | 
280 |   b. if You include all or a substantial portion of the database
281 |      contents in a database in which You have Sui Generis Database
282 |      Rights, then the database in which You have Sui Generis Database
283 |      Rights (but not its individual contents) is Adapted Material; and
284 | 
285 |   c. You must comply with the conditions in Section 3(a) if You Share
286 |      all or a substantial portion of the contents of the database.
287 | 
288 | For the avoidance of doubt, this Section 4 supplements and does not
289 | replace Your obligations under this Public License where the Licensed
290 | Rights include other Copyright and Similar Rights.
291 | 
292 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
293 | 
294 |   a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
295 |      EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
296 |      AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
297 |      ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
298 |      IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
299 |      WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
300 |      PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
301 |      ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
302 |      KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
303 |      ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
304 | 
305 |   b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
306 |      TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
307 |      NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
308 |      INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
309 |      COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
310 |      USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
311 |      ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
312 |      DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
313 |      IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
314 | 
315 |   c. The disclaimer of warranties and limitation of liability provided
316 |      above shall be interpreted in a manner that, to the extent
317 |      possible, most closely approximates an absolute disclaimer and
318 |      waiver of all liability.
319 | 
320 | Section 6 -- Term and Termination.
321 | 
322 |   a. This Public License applies for the term of the Copyright and
323 |      Similar Rights licensed here. However, if You fail to comply with
324 |      this Public License, then Your rights under this Public License
325 |      terminate automatically.
326 | 
327 |   b. Where Your right to use the Licensed Material has terminated under
328 |      Section 6(a), it reinstates:
329 | 
330 |        1. automatically as of the date the violation is cured, provided
331 |           it is cured within 30 days of Your discovery of the
332 |           violation; or
333 | 
334 |        2. upon express reinstatement by the Licensor.
335 | 
336 |      For the avoidance of doubt, this Section 6(b) does not affect any
337 |      right the Licensor may have to seek remedies for Your violations
338 |      of this Public License.
339 | 
340 |   c. For the avoidance of doubt, the Licensor may also offer the
341 |      Licensed Material under separate terms or conditions or stop
342 |      distributing the Licensed Material at any time; however, doing so
343 |      will not terminate this Public License.
344 | 
345 |   d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
346 |      License.
347 | 
348 | Section 7 -- Other Terms and Conditions.
349 | 
350 |   a. The Licensor shall not be bound by any additional or different
351 |      terms or conditions communicated by You unless expressly agreed.
352 | 
353 |   b. Any arrangements, understandings, or agreements regarding the
354 |      Licensed Material not stated herein are separate from and
355 |      independent of the terms and conditions of this Public License.
356 | 
357 | Section 8 -- Interpretation.
358 | 
359 |   a. For the avoidance of doubt, this Public License does not, and
360 |      shall not be interpreted to, reduce, limit, restrict, or impose
361 |      conditions on any use of the Licensed Material that could lawfully
362 |      be made without permission under this Public License.
363 | 
364 |   b. To the extent possible, if any provision of this Public License is
365 |      deemed unenforceable, it shall be automatically reformed to the
366 |      minimum extent necessary to make it enforceable. If the provision
367 |      cannot be reformed, it shall be severed from this Public License
368 |      without affecting the enforceability of the remaining terms and
369 |      conditions.
370 | 
371 |   c. No term or condition of this Public License will be waived and no
372 |      failure to comply consented to unless expressly agreed to by the
373 |      Licensor.
374 | 
375 |   d. Nothing in this Public License constitutes or may be interpreted
376 |      as a limitation upon, or waiver of, any privileges and immunities
377 |      that apply to the Licensor or You, including from the legal
378 |      processes of any jurisdiction or authority.
379 | 
380 | =======================================================================
381 | 
382 | Creative Commons is not a party to its public
383 | licenses. Notwithstanding, Creative Commons may elect to apply one of
384 | its public licenses to material it publishes and in those instances
385 | will be considered the “Licensor.” The text of the Creative Commons
386 | public licenses is dedicated to the public domain under the CC0 Public
387 | Domain Dedication. Except for the limited purpose of indicating that
388 | material is shared under a Creative Commons public license or as
389 | otherwise permitted by the Creative Commons policies published at
390 | creativecommons.org/policies, Creative Commons does not authorize the
391 | use of the trademark "Creative Commons" or any other trademark or logo
392 | of Creative Commons without its prior written consent including,
393 | without limitation, in connection with any unauthorized modifications
394 | to any of its public licenses or any other arrangements,
395 | understandings, or agreements concerning use of licensed material. For
396 | the avoidance of doubt, this paragraph does not form part of the
397 | public licenses.
398 | 
399 | Creative Commons may be contacted at creativecommons.org.
400 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Deep Clustering for Unsupervised Learning of Visual Features
  2 | 
  3 | ## News
  4 | We release [paper](https://arxiv.org/abs/2006.09882) and [code](https://github.com/facebookresearch/swav) for SwAV, our new self-supervised method.
  5 | SwAV pushes self-supervised learning to only 1.2% away from supervised learning on ImageNet with a ResNet-50!
  6 | It combines online clustering with a multi-crop data augmentation.
  7 | 
  8 | We also present DeepCluster-v2, which is an improved version of DeepCluster (ResNet-50, better data augmentation, cosine learning rate schedule, MLP projection head, use of centroids, ...).
  9 | Check out [DeepCluster-v2 code](https://github.com/facebookresearch/swav/blob/master/main_deepclusterv2.py).
 10 | 
 11 | ## DeepCluster
 12 | This code implements the unsupervised training of convolutional neural networks, or convnets, as described in the paper [Deep Clustering for Unsupervised Learning of Visual Features](https://arxiv.org/abs/1807.05520).
 13 | 
 14 | Moreover, we provide the evaluation protocol codes we used in the paper:
 15 | * Pascal VOC classification
 16 | * Linear classification on activations
 17 | * Instance-level image retrieval
 18 | 
 19 | Finally, this code also includes a visualisation module that allows to assess visually the quality of the learned features.
 20 | 
 21 | ## Requirements
 22 | 
 23 | - a Python installation version 2.7
 24 | - the SciPy and scikit-learn packages
 25 | - a PyTorch install version 0.1.8 ([pytorch.org](http://pytorch.org))
 26 | - CUDA 8.0
 27 | - a Faiss install ([Faiss](https://github.com/facebookresearch/faiss))
 28 | - The ImageNet dataset (which can be automatically downloaded by recent version of [torchvision](https://pytorch.org/docs/stable/torchvision/datasets.html#imagenet))
 29 | 
 30 | ## Pre-trained models
 31 | We provide pre-trained models with AlexNet and VGG-16 architectures, available for download.
 32 | * The models in Caffe format expect BGR inputs that range in [0, 255]. You do not need to subtract the per-color-channel mean image since the preprocessing of the data is already included in our released models.
 33 | * The models in PyTorch format expect RGB inputs that range in [0, 1]. You should preprocessed your data before passing them to the released models by normalizing them: ```mean_rgb = [0.485, 0.456, 0.406]```; ```std_rgb = [0.229, 0.224, 0.225] ```
 34 | Note that in all our released models, sobel filters are computed within the models as two convolutional layers (greyscale + sobel filters).
 35 | 
 36 | You can download all variants by running 
 37 | ```
 38 | $ ./download_model.sh
 39 | ```
 40 | This will fetch the models into `${HOME}/deepcluster_models` by default.
 41 | You can change that path in the environment variable.
 42 | Direct download links are provided here:
 43 | * [AlexNet-PyTorch](https://dl.fbaipublicfiles.com/deepcluster/alexnet/checkpoint.pth.tar)
 44 | * [AlexNet-prototxt](https://dl.fbaipublicfiles.com/deepcluster/alexnet/model.prototxt) + [AlexNet-caffemodel](https://dl.fbaipublicfiles.com/deepcluster/alexnet/model.caffemodel)
 45 | * [VGG16-PyTorch](https://dl.fbaipublicfiles.com/deepcluster/vgg16/checkpoint.pth.tar)
 46 | * [VGG16-prototxt](https://dl.fbaipublicfiles.com/deepcluster/vgg16/model.prototxt) + [VGG16-caffemodel](https://dl.fbaipublicfiles.com/deepcluster/vgg16/model.caffemodel)
 47 | 
 48 | We also provide the last epoch cluster assignments for these models. After downloading, open the file with Python 2:
 49 | ```
 50 | import pickle
 51 | with open("./alexnet_cluster_assignment.pickle", "rb") as f:
 52 |     b = pickle.load(f)
 53 | ```
 54 | If you're a Python 3 user, specify ```encoding='latin1'``` in the load fonction.
 55 | Each file is a list of (image path, cluster_index) tuples.
 56 | * [AlexNet-clusters](https://dl.fbaipublicfiles.com/deepcluster/alexnet/alexnet_cluster_assignment.pickle)
 57 | * [VGG16-clusters](https://dl.fbaipublicfiles.com/deepcluster/vgg16/vgg16_cluster_assignment.pickle)
 58 | 
 59 | Finally, we release the features extracted with DeepCluster model for ImageNet dataset.
 60 | These features are in dimension 4096 and correspond to a forward on the model up to the penultimate convolutional layer (just before last ReLU).
 61 | In you plan to cluster the features, don't forget to normalize and reduce/whiten them.
 62 | * [AlexNet-imnetfeatures](https://dl.fbaipublicfiles.com/deepcluster/alexnet/alexnet_features.pkl)
 63 | * [VGG16-imnetfeatures](https://dl.fbaipublicfiles.com/deepcluster/vgg16/vgg16_features.pkl)
 64 | 
 65 | ## Running the unsupervised training
 66 | 
 67 | Unsupervised training can be launched by running:
 68 | ```
 69 | $ ./main.sh
 70 | ```
 71 | Please provide the path to the data folder:
 72 | ```
 73 | DIR=/datasets01/imagenet_full_size/061417/train
 74 | ```
 75 | To train an AlexNet network, specify `ARCH=alexnet` whereas to train a VGG-16 convnet use `ARCH=vgg16`.
 76 | 
 77 | You can also specify where you want to save the clustering logs and checkpoints using:
 78 | ```
 79 | EXP=exp
 80 | ```
 81 | 
 82 | During training, models are saved every other n iterations (set using the `--checkpoints` flag), and can be found in for instance in `${EXP}/checkpoints/checkpoint_0.pth.tar`.
 83 | A log of the assignments in the clusters at each epoch can be found in the pickle file `${EXP}/clusters`.
 84 | 
 85 | 
 86 | Full documentation of the unsupervised training code `main.py`:
 87 | ```
 88 | usage: main.py [-h] [--arch ARCH] [--sobel] [--clustering {Kmeans,PIC}]
 89 |                [--nmb_cluster NMB_CLUSTER] [--lr LR] [--wd WD]
 90 |                [--reassign REASSIGN] [--workers WORKERS] [--epochs EPOCHS]
 91 |                [--start_epoch START_EPOCH] [--batch BATCH]
 92 |                [--momentum MOMENTUM] [--resume PATH]
 93 |                [--checkpoints CHECKPOINTS] [--seed SEED] [--exp EXP]
 94 |                [--verbose]
 95 |                DIR
 96 | 
 97 | PyTorch Implementation of DeepCluster
 98 | 
 99 | positional arguments:
100 |   DIR                   path to dataset
101 | 
102 | optional arguments:
103 |   -h, --help            show this help message and exit
104 |   --arch ARCH, -a ARCH  CNN architecture (default: alexnet)
105 |   --sobel               Sobel filtering
106 |   --clustering {Kmeans,PIC}
107 |                         clustering algorithm (default: Kmeans)
108 |   --nmb_cluster NMB_CLUSTER, --k NMB_CLUSTER
109 |                         number of cluster for k-means (default: 10000)
110 |   --lr LR               learning rate (default: 0.05)
111 |   --wd WD               weight decay pow (default: -5)
112 |   --reassign REASSIGN   how many epochs of training between two consecutive
113 |                         reassignments of clusters (default: 1)
114 |   --workers WORKERS     number of data loading workers (default: 4)
115 |   --epochs EPOCHS       number of total epochs to run (default: 200)
116 |   --start_epoch START_EPOCH
117 |                         manual epoch number (useful on restarts) (default: 0)
118 |   --batch BATCH         mini-batch size (default: 256)
119 |   --momentum MOMENTUM   momentum (default: 0.9)
120 |   --resume PATH         path to checkpoint (default: None)
121 |   --checkpoints CHECKPOINTS
122 |                         how many iterations between two checkpoints (default:
123 |                         25000)
124 |   --seed SEED           random seed (default: 31)
125 |   --exp EXP             path to exp folder
126 |   --verbose             chatty
127 | ```
128 | 
129 | 
130 | ## Evaluation protocols
131 | 
132 | ### Pascal VOC
133 | 
134 | To run the classification task with fine-tuning launch:
135 | ```
136 | ./eval_voc_classif_all.sh
137 | ```
138 | and with no finetuning:
139 | ```
140 | ./eval_voc_classif_fc6_8.sh
141 | ```
142 | 
143 | Both these scripts download [this code](https://github.com/philkr/voc-classification).
144 | You need to download the [VOC 2007 dataset](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/). Then, specify in both `./eval_voc_classif_all.sh` and `./eval_voc_classif_fc6_8.sh` scripts the path `CAFFE` to point to the caffe branch, and `VOC` to point to the Pascal VOC directory.
145 | Indicate in `PROTO` and `MODEL` respectively the path to the prototxt file of the model and the path to the model weights of the model to evaluate.
146 | The flag `--train-from` allows to indicate the separation between the frozen and to-train layers.
147 | 
148 | We implemented [voc classification](https://github.com/facebookresearch/deepcluster/blob/master/eval_voc_classif.py) with PyTorch.
149 | 
150 | Erratum: When training the MLP only (fc6-8), the parameters of scaling of the batch-norm layers in the whole network are trained. 
151 | With freezing these parameters we get 70.4 mAP.
152 | 
153 | ### Linear classification on activations
154 | 
155 | You can run these transfer tasks using:
156 | ```
157 | $ ./eval_linear.sh
158 | ```
159 | 
160 | You need to specify the path to the supervised data (ImageNet or Places):
161 | ```
162 | DATA=/datasets01/imagenet_full_size/061417/
163 | ```
164 | the path of your model:
165 | ```
166 | MODEL=/private/home/mathilde/deepcluster/checkpoint.pth.tar
167 | ```
168 | and on top of which convolutional layer to train the classifier:
169 | ```
170 | CONV=3
171 | ```
172 | 
173 | You can specify where you want to save the output of this experiment (checkpoints and best models) with
174 | ```
175 | EXP=exp
176 | ```
177 | 
178 | Full documentation for this task:
179 | ```
180 | usage: eval_linear.py [-h] [--data DATA] [--model MODEL] [--conv {1,2,3,4,5}]
181 |                       [--tencrops] [--exp EXP] [--workers WORKERS]
182 |                       [--epochs EPOCHS] [--batch_size BATCH_SIZE] [--lr LR]
183 |                       [--momentum MOMENTUM] [--weight_decay WEIGHT_DECAY]
184 |                       [--seed SEED] [--verbose]
185 | 
186 | Train linear classifier on top of frozen convolutional layers of an AlexNet.
187 | 
188 | optional arguments:
189 |   -h, --help            show this help message and exit
190 |   --data DATA           path to dataset
191 |   --model MODEL         path to model
192 |   --conv {1,2,3,4,5}    on top of which convolutional layer train logistic
193 |                         regression
194 |   --tencrops            validation accuracy averaged over 10 crops
195 |   --exp EXP             exp folder
196 |   --workers WORKERS     number of data loading workers (default: 4)
197 |   --epochs EPOCHS       number of total epochs to run (default: 90)
198 |   --batch_size BATCH_SIZE
199 |                         mini-batch size (default: 256)
200 |   --lr LR               learning rate
201 |   --momentum MOMENTUM   momentum (default: 0.9)
202 |   --weight_decay WEIGHT_DECAY, --wd WEIGHT_DECAY
203 |                         weight decay pow (default: -4)
204 |   --seed SEED           random seed
205 |   --verbose             chatty
206 | ```
207 | 
208 | ### Instance-level image retrieval
209 | 
210 | You can run the instance-level image retrieval transfer task using:
211 | ```
212 | ./eval_retrieval.sh
213 | ```
214 | 
215 | ## Visualisation
216 | 
217 | We provide two standard visualisation methods presented in our paper.
218 | 
219 | ### Filter visualisation with gradient ascent
220 | 
221 | First, it is posible to learn an input image that maximizes the activation of a given filter. We follow the process
222 | described by [Yosinki et al.](https://arxiv.org/abs/1506.06579) with a cross entropy function between the target
223 | filter and the other filters in the same layer.
224 | From the visu folder you can run
225 | ```
226 | ./gradient_ascent.sh
227 | ```
228 | You will need to specify the model path ```MODEL```, the architecture of your model ```ARCH```, the path of the folder in which you want to save the synthetic images ```EXP``` and the convolutional layer to consider ```CONV```.
229 | 
230 | Full documentation:
231 | ```
232 | usage: gradient_ascent.py [-h] [--model MODEL] [--arch {alexnet,vgg16}]
233 |                           [--conv CONV] [--exp EXP] [--lr LR] [--wd WD]
234 |                           [--sig SIG] [--step STEP] [--niter NITER]
235 |                           [--idim IDIM]
236 | 
237 | Gradient ascent visualisation
238 | 
239 | optional arguments:
240 |   -h, --help            show this help message and exit
241 |   --model MODEL         Model
242 |   --arch {alexnet,vgg16}
243 |                         arch
244 |   --conv CONV           convolutional layer
245 |   --exp EXP             path to res
246 |   --lr LR               learning rate (default: 3)
247 |   --wd WD               weight decay (default: 10^-5)
248 |   --sig SIG             gaussian blur (default: 0.3)
249 |   --step STEP           number of iter between gaussian blurs (default: 5)
250 |   --niter NITER         total number of iterations (default: 1000)
251 |   --idim IDIM           size of input image (default: 224)
252 | ```
253 | I recommand you play with the hyper-parameters to find a regime where the visualisations are good.
254 | For example with the pre-trained deepcluster AlexNet, for conv1 using a learning rate of 3 and 30.000 iterations works well.
255 | For conv5, using a learning rate of 30 and 3.000 iterations gives nice images with the other parameters set to their default values.
256 | 
257 | ### Top 9 maximally activated images in a dataset
258 | 
259 | Finally, we provide code to retrieve images in a dataset that maximally activate a given filter in the convnet.
260 | From the visu folder, after having changed the fields ```MODEL```, ```EXP```, ```CONV``` and ```DATA```, run
261 | ```
262 | ./activ-retrieval.sh
263 | ```
264 | 
265 | ## DeeperCluster
266 | 
267 | We have proposed another unsupervised feature learning paper at ICCV 2019.
268 | We have shown that unsupervised learning can be used to pre-train convnets, leading to a boost in performance on ImageNet classification.
269 | We achieve that by scaling DeepCluster to 96M images and mixing it with RotNet self-supervision.
270 | Check out the [paper](https://arxiv.org/abs/1905.01278) and [code](https://github.com/facebookresearch/DeeperCluster).
271 | 
272 | ## License
273 | 
274 | You may find out more about the license [here](https://github.com/facebookresearch/deepcluster/blob/master/LICENSE).
275 | 
276 | ## Reference
277 | 
278 | If you use this code, please cite the following paper:
279 | 
280 | Mathilde Caron, Piotr Bojanowski, Armand Joulin, and Matthijs Douze. "Deep Clustering for Unsupervised Learning of Visual Features." Proc. ECCV (2018).
281 | 
282 | ```
283 | @InProceedings{caron2018deep,
284 |   title={Deep Clustering for Unsupervised Learning of Visual Features},
285 |   author={Caron, Mathilde and Bojanowski, Piotr and Joulin, Armand and Douze, Matthijs},
286 |   booktitle={European Conference on Computer Vision},
287 |   year={2018},
288 | }
289 | ```
290 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/deepcluster/2d1927e8e3dd272329e879e510fbbdf1b1d02d17/__init__.py


--------------------------------------------------------------------------------
/clustering.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | #
  7 | import time
  8 | 
  9 | import faiss
 10 | import numpy as np
 11 | from PIL import Image
 12 | from PIL import ImageFile
 13 | from scipy.sparse import csr_matrix, find
 14 | import torch
 15 | import torch.utils.data as data
 16 | import torchvision.transforms as transforms
 17 | 
 18 | ImageFile.LOAD_TRUNCATED_IMAGES = True
 19 | 
 20 | __all__ = ['PIC', 'Kmeans', 'cluster_assign', 'arrange_clustering']
 21 | 
 22 | 
 23 | def pil_loader(path):
 24 |     """Loads an image.
 25 |     Args:
 26 |         path (string): path to image file
 27 |     Returns:
 28 |         Image
 29 |     """
 30 |     with open(path, 'rb') as f:
 31 |         img = Image.open(f)
 32 |         return img.convert('RGB')
 33 | 
 34 | 
 35 | class ReassignedDataset(data.Dataset):
 36 |     """A dataset where the new images labels are given in argument.
 37 |     Args:
 38 |         image_indexes (list): list of data indexes
 39 |         pseudolabels (list): list of labels for each data
 40 |         dataset (list): list of tuples with paths to images
 41 |         transform (callable, optional): a function/transform that takes in
 42 |                                         an PIL image and returns a
 43 |                                         transformed version
 44 |     """
 45 | 
 46 |     def __init__(self, image_indexes, pseudolabels, dataset, transform=None):
 47 |         self.imgs = self.make_dataset(image_indexes, pseudolabels, dataset)
 48 |         self.transform = transform
 49 | 
 50 |     def make_dataset(self, image_indexes, pseudolabels, dataset):
 51 |         label_to_idx = {label: idx for idx, label in enumerate(set(pseudolabels))}
 52 |         images = []
 53 |         for j, idx in enumerate(image_indexes):
 54 |             path = dataset[idx][0]
 55 |             pseudolabel = label_to_idx[pseudolabels[j]]
 56 |             images.append((path, pseudolabel))
 57 |         return images
 58 | 
 59 |     def __getitem__(self, index):
 60 |         """
 61 |         Args:
 62 |             index (int): index of data
 63 |         Returns:
 64 |             tuple: (image, pseudolabel) where pseudolabel is the cluster of index datapoint
 65 |         """
 66 |         path, pseudolabel = self.imgs[index]
 67 |         img = pil_loader(path)
 68 |         if self.transform is not None:
 69 |             img = self.transform(img)
 70 |         return img, pseudolabel
 71 | 
 72 |     def __len__(self):
 73 |         return len(self.imgs)
 74 | 
 75 | 
 76 | def preprocess_features(npdata, pca=256):
 77 |     """Preprocess an array of features.
 78 |     Args:
 79 |         npdata (np.array N * ndim): features to preprocess
 80 |         pca (int): dim of output
 81 |     Returns:
 82 |         np.array of dim N * pca: data PCA-reduced, whitened and L2-normalized
 83 |     """
 84 |     _, ndim = npdata.shape
 85 |     npdata =  npdata.astype('float32')
 86 | 
 87 |     # Apply PCA-whitening with Faiss
 88 |     mat = faiss.PCAMatrix (ndim, pca, eigen_power=-0.5)
 89 |     mat.train(npdata)
 90 |     assert mat.is_trained
 91 |     npdata = mat.apply_py(npdata)
 92 | 
 93 |     # L2 normalization
 94 |     row_sums = np.linalg.norm(npdata, axis=1)
 95 |     npdata = npdata / row_sums[:, np.newaxis]
 96 | 
 97 |     return npdata
 98 | 
 99 | 
100 | def make_graph(xb, nnn):
101 |     """Builds a graph of nearest neighbors.
102 |     Args:
103 |         xb (np.array): data
104 |         nnn (int): number of nearest neighbors
105 |     Returns:
106 |         list: for each data the list of ids to its nnn nearest neighbors
107 |         list: for each data the list of distances to its nnn NN
108 |     """
109 |     N, dim = xb.shape
110 | 
111 |     # we need only a StandardGpuResources per GPU
112 |     res = faiss.StandardGpuResources()
113 | 
114 |     # L2
115 |     flat_config = faiss.GpuIndexFlatConfig()
116 |     flat_config.device = int(torch.cuda.device_count()) - 1
117 |     index = faiss.GpuIndexFlatL2(res, dim, flat_config)
118 |     index.add(xb)
119 |     D, I = index.search(xb, nnn + 1)
120 |     return I, D
121 | 
122 | 
123 | def cluster_assign(images_lists, dataset):
124 |     """Creates a dataset from clustering, with clusters as labels.
125 |     Args:
126 |         images_lists (list of list): for each cluster, the list of image indexes
127 |                                     belonging to this cluster
128 |         dataset (list): initial dataset
129 |     Returns:
130 |         ReassignedDataset(torch.utils.data.Dataset): a dataset with clusters as
131 |                                                      labels
132 |     """
133 |     assert images_lists is not None
134 |     pseudolabels = []
135 |     image_indexes = []
136 |     for cluster, images in enumerate(images_lists):
137 |         image_indexes.extend(images)
138 |         pseudolabels.extend([cluster] * len(images))
139 | 
140 |     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
141 |                                      std=[0.229, 0.224, 0.225])
142 |     t = transforms.Compose([transforms.RandomResizedCrop(224),
143 |                             transforms.RandomHorizontalFlip(),
144 |                             transforms.ToTensor(),
145 |                             normalize])
146 | 
147 |     return ReassignedDataset(image_indexes, pseudolabels, dataset, t)
148 | 
149 | 
150 | def run_kmeans(x, nmb_clusters, verbose=False):
151 |     """Runs kmeans on 1 GPU.
152 |     Args:
153 |         x: data
154 |         nmb_clusters (int): number of clusters
155 |     Returns:
156 |         list: ids of data in each cluster
157 |     """
158 |     n_data, d = x.shape
159 | 
160 |     # faiss implementation of k-means
161 |     clus = faiss.Clustering(d, nmb_clusters)
162 | 
163 |     # Change faiss seed at each k-means so that the randomly picked
164 |     # initialization centroids do not correspond to the same feature ids
165 |     # from an epoch to another.
166 |     clus.seed = np.random.randint(1234)
167 | 
168 |     clus.niter = 20
169 |     clus.max_points_per_centroid = 10000000
170 |     res = faiss.StandardGpuResources()
171 |     flat_config = faiss.GpuIndexFlatConfig()
172 |     flat_config.useFloat16 = False
173 |     flat_config.device = 0
174 |     index = faiss.GpuIndexFlatL2(res, d, flat_config)
175 | 
176 |     # perform the training
177 |     clus.train(x, index)
178 |     _, I = index.search(x, 1)
179 |     losses = faiss.vector_to_array(clus.obj)
180 |     if verbose:
181 |         print('k-means loss evolution: {0}'.format(losses))
182 | 
183 |     return [int(n[0]) for n in I], losses[-1]
184 | 
185 | 
186 | def arrange_clustering(images_lists):
187 |     pseudolabels = []
188 |     image_indexes = []
189 |     for cluster, images in enumerate(images_lists):
190 |         image_indexes.extend(images)
191 |         pseudolabels.extend([cluster] * len(images))
192 |     indexes = np.argsort(image_indexes)
193 |     return np.asarray(pseudolabels)[indexes]
194 | 
195 | 
196 | class Kmeans(object):
197 |     def __init__(self, k):
198 |         self.k = k
199 | 
200 |     def cluster(self, data, verbose=False):
201 |         """Performs k-means clustering.
202 |             Args:
203 |                 x_data (np.array N * dim): data to cluster
204 |         """
205 |         end = time.time()
206 | 
207 |         # PCA-reducing, whitening and L2-normalization
208 |         xb = preprocess_features(data)
209 | 
210 |         # cluster the data
211 |         I, loss = run_kmeans(xb, self.k, verbose)
212 |         self.images_lists = [[] for i in range(self.k)]
213 |         for i in range(len(data)):
214 |             self.images_lists[I[i]].append(i)
215 | 
216 |         if verbose:
217 |             print('k-means time: {0:.0f} s'.format(time.time() - end))
218 | 
219 |         return loss
220 | 
221 | 
222 | def make_adjacencyW(I, D, sigma):
223 |     """Create adjacency matrix with a Gaussian kernel.
224 |     Args:
225 |         I (numpy array): for each vertex the ids to its nnn linked vertices
226 |                   + first column of identity.
227 |         D (numpy array): for each data the l2 distances to its nnn linked vertices
228 |                   + first column of zeros.
229 |         sigma (float): Bandwidth of the Gaussian kernel.
230 | 
231 |     Returns:
232 |         csr_matrix: affinity matrix of the graph.
233 |     """
234 |     V, k = I.shape
235 |     k = k - 1
236 |     indices = np.reshape(np.delete(I, 0, 1), (1, -1))
237 |     indptr = np.multiply(k, np.arange(V + 1))
238 | 
239 |     def exp_ker(d):
240 |         return np.exp(-d / sigma**2)
241 | 
242 |     exp_ker = np.vectorize(exp_ker)
243 |     res_D = exp_ker(D)
244 |     data = np.reshape(np.delete(res_D, 0, 1), (1, -1))
245 |     adj_matrix = csr_matrix((data[0], indices[0], indptr), shape=(V, V))
246 |     return adj_matrix
247 | 
248 | 
249 | def run_pic(I, D, sigma, alpha):
250 |     """Run PIC algorithm"""
251 |     a = make_adjacencyW(I, D, sigma)
252 |     graph = a + a.transpose()
253 |     cgraph = graph
254 |     nim = graph.shape[0]
255 | 
256 |     W = graph
257 |     t0 = time.time()
258 | 
259 |     v0 = np.ones(nim) / nim
260 | 
261 |     # power iterations
262 |     v = v0.astype('float32')
263 | 
264 |     t0 = time.time()
265 |     dt = 0
266 |     for i in range(200):
267 |         vnext = np.zeros(nim, dtype='float32')
268 | 
269 |         vnext = vnext + W.transpose().dot(v)
270 | 
271 |         vnext = alpha * vnext + (1 - alpha) / nim
272 |         # L1 normalize
273 |         vnext /= vnext.sum()
274 |         v = vnext
275 | 
276 |         if i == 200 - 1:
277 |             clust = find_maxima_cluster(W, v)
278 | 
279 |     return [int(i) for i in clust]
280 | 
281 | 
282 | def find_maxima_cluster(W, v):
283 |     n, m = W.shape
284 |     assert (n == m)
285 |     assign = np.zeros(n)
286 |     # for each node
287 |     pointers = list(range(n))
288 |     for i in range(n):
289 |         best_vi = 0
290 |         l0 = W.indptr[i]
291 |         l1 = W.indptr[i + 1]
292 |         for l in range(l0, l1):
293 |             j = W.indices[l]
294 |             vi = W.data[l] * (v[j] - v[i])
295 |             if vi > best_vi:
296 |                 best_vi = vi
297 |                 pointers[i] = j
298 |     n_clus = 0
299 |     cluster_ids = -1 * np.ones(n)
300 |     for i in range(n):
301 |         if pointers[i] == i:
302 |             cluster_ids[i] = n_clus
303 |             n_clus = n_clus + 1
304 |     for i in range(n):
305 |         # go from pointers to pointers starting from i until reached a local optim
306 |         current_node = i
307 |         while pointers[current_node] != current_node:
308 |             current_node = pointers[current_node]
309 | 
310 |         assign[i] = cluster_ids[current_node]
311 |         assert (assign[i] >= 0)
312 |     return assign
313 | 
314 | 
315 | class PIC(object):
316 |     """Class to perform Power Iteration Clustering on a graph of nearest neighbors.
317 |         Args:
318 |             args: for consistency with k-means init
319 |             sigma (float): bandwidth of the Gaussian kernel (default 0.2)
320 |             nnn (int): number of nearest neighbors (default 5)
321 |             alpha (float): parameter in PIC (default 0.001)
322 |             distribute_singletons (bool): If True, reassign each singleton to
323 |                                       the cluster of its closest non
324 |                                       singleton nearest neighbors (up to nnn
325 |                                       nearest neighbors).
326 |         Attributes:
327 |             images_lists (list of list): for each cluster, the list of image indexes
328 |                                          belonging to this cluster
329 |     """
330 | 
331 |     def __init__(self, args=None, sigma=0.2, nnn=5, alpha=0.001, distribute_singletons=True):
332 |         self.sigma = sigma
333 |         self.alpha = alpha
334 |         self.nnn = nnn
335 |         self.distribute_singletons = distribute_singletons
336 | 
337 |     def cluster(self, data, verbose=False):
338 |         end = time.time()
339 | 
340 |         # preprocess the data
341 |         xb = preprocess_features(data)
342 | 
343 |         # construct nnn graph
344 |         I, D = make_graph(xb, self.nnn)
345 | 
346 |         # run PIC
347 |         clust = run_pic(I, D, self.sigma, self.alpha)
348 |         images_lists = {}
349 |         for h in set(clust):
350 |             images_lists[h] = []
351 |         for data, c in enumerate(clust):
352 |             images_lists[c].append(data)
353 | 
354 |         # allocate singletons to clusters of their closest NN not singleton
355 |         if self.distribute_singletons:
356 |             clust_NN = {}
357 |             for i in images_lists:
358 |                 # if singleton
359 |                 if len(images_lists[i]) == 1:
360 |                     s = images_lists[i][0]
361 |                     # for NN
362 |                     for n in I[s, 1:]:
363 |                         # if NN is not a singleton
364 |                         if not len(images_lists[clust[n]]) == 1:
365 |                             clust_NN[s] = n
366 |                             break
367 |             for s in clust_NN:
368 |                 del images_lists[clust[s]]
369 |                 clust[s] = clust[clust_NN[s]]
370 |                 images_lists[clust[s]].append(s)
371 | 
372 |         self.images_lists = []
373 |         for c in images_lists:
374 |             self.images_lists.append(images_lists[c])
375 | 
376 |         if verbose:
377 |             print('pic time: {0:.0f} s'.format(time.time() - end))
378 |         return 0
379 | 


--------------------------------------------------------------------------------
/download_model.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | #!/bin/bash
 8 | 
 9 | MODELROOT="${HOME}/deepcluster_models"
10 | 
11 | mkdir -p ${MODELROOT}
12 | 
13 | for MODEL in alexnet vgg16
14 | do
15 |   mkdir -p "${MODELROOT}/${MODEL}"
16 |   for FILE in checkpoint.pth.tar model.caffemodel model.prototxt
17 |   do
18 |     wget -c "https://dl.fbaipublicfiles.com/deepcluster/${MODEL}/${FILE}" \
19 |       -P "${MODELROOT}/${MODEL}" 
20 | 
21 |   done
22 | done
23 | 


--------------------------------------------------------------------------------
/eval_linear.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | #
  7 | 
  8 | import argparse
  9 | import os
 10 | import time
 11 | 
 12 | import numpy as np
 13 | import torch
 14 | import torch.nn as nn
 15 | import torch.backends.cudnn as cudnn
 16 | import torch.optim
 17 | import torch.utils.data
 18 | import torchvision.transforms as transforms
 19 | import torchvision.datasets as datasets
 20 | 
 21 | from util import AverageMeter, learning_rate_decay, load_model, Logger
 22 | 
 23 | parser = argparse.ArgumentParser(description="""Train linear classifier on top
 24 |                                  of frozen convolutional layers of an AlexNet.""")
 25 | 
 26 | parser.add_argument('--data', type=str, help='path to dataset')
 27 | parser.add_argument('--model', type=str, help='path to model')
 28 | parser.add_argument('--conv', type=int, choices=[1, 2, 3, 4, 5],
 29 |                     help='on top of which convolutional layer train logistic regression')
 30 | parser.add_argument('--tencrops', action='store_true',
 31 |                     help='validation accuracy averaged over 10 crops')
 32 | parser.add_argument('--exp', type=str, default='', help='exp folder')
 33 | parser.add_argument('--workers', default=4, type=int,
 34 |                     help='number of data loading workers (default: 4)')
 35 | parser.add_argument('--epochs', type=int, default=90, help='number of total epochs to run (default: 90)')
 36 | parser.add_argument('--batch_size', default=256, type=int,
 37 |                     help='mini-batch size (default: 256)')
 38 | parser.add_argument('--lr', default=0.01, type=float, help='learning rate')
 39 | parser.add_argument('--momentum', default=0.9, type=float, help='momentum (default: 0.9)')
 40 | parser.add_argument('--weight_decay', '--wd', default=-4, type=float,
 41 |                     help='weight decay pow (default: -4)')
 42 | parser.add_argument('--seed', type=int, default=31, help='random seed')
 43 | parser.add_argument('--verbose', action='store_true', help='chatty')
 44 | 
 45 | 
 46 | def main():
 47 |     global args
 48 |     args = parser.parse_args()
 49 | 
 50 |     #fix random seeds
 51 |     torch.manual_seed(args.seed)
 52 |     torch.cuda.manual_seed_all(args.seed)
 53 |     np.random.seed(args.seed)
 54 | 
 55 |     best_prec1 = 0
 56 | 
 57 |     # load model
 58 |     model = load_model(args.model)
 59 |     model.cuda()
 60 |     cudnn.benchmark = True
 61 | 
 62 |     # freeze the features layers
 63 |     for param in model.features.parameters():
 64 |         param.requires_grad = False
 65 | 
 66 |     # define loss function (criterion) and optimizer
 67 |     criterion = nn.CrossEntropyLoss().cuda()
 68 | 
 69 |     # data loading code
 70 |     traindir = os.path.join(args.data, 'train')
 71 |     valdir = os.path.join(args.data, 'val')
 72 | 
 73 |     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
 74 |                                      std=[0.229, 0.224, 0.225])
 75 | 
 76 |     if args.tencrops:
 77 |         transformations_val = [
 78 |             transforms.Resize(256),
 79 |             transforms.TenCrop(224),
 80 |             transforms.Lambda(lambda crops: torch.stack([normalize(transforms.ToTensor()(crop)) for crop in crops])),
 81 |         ]
 82 |     else:
 83 |         transformations_val = [transforms.Resize(256),
 84 |                                transforms.CenterCrop(224),
 85 |                                transforms.ToTensor(),
 86 |                                normalize]
 87 | 
 88 |     transformations_train = [transforms.Resize(256),
 89 |                              transforms.CenterCrop(256),
 90 |                              transforms.RandomCrop(224),
 91 |                              transforms.RandomHorizontalFlip(),
 92 |                              transforms.ToTensor(),
 93 |                              normalize]
 94 |     train_dataset = datasets.ImageFolder(
 95 |         traindir,
 96 |         transform=transforms.Compose(transformations_train)
 97 |     )
 98 | 
 99 |     val_dataset = datasets.ImageFolder(
100 |         valdir,
101 |         transform=transforms.Compose(transformations_val)
102 |     )
103 |     train_loader = torch.utils.data.DataLoader(train_dataset,
104 |                                                batch_size=args.batch_size,
105 |                                                shuffle=True,
106 |                                                num_workers=args.workers,
107 |                                                pin_memory=True)
108 |     val_loader = torch.utils.data.DataLoader(val_dataset,
109 |                                              batch_size=int(args.batch_size/2),
110 |                                              shuffle=False,
111 |                                              num_workers=args.workers)
112 | 
113 |     # logistic regression
114 |     reglog = RegLog(args.conv, len(train_dataset.classes)).cuda()
115 |     optimizer = torch.optim.SGD(
116 |         filter(lambda x: x.requires_grad, reglog.parameters()),
117 |         args.lr,
118 |         momentum=args.momentum,
119 |         weight_decay=10**args.weight_decay
120 |     )
121 | 
122 |     # create logs
123 |     exp_log = os.path.join(args.exp, 'log')
124 |     if not os.path.isdir(exp_log):
125 |         os.makedirs(exp_log)
126 | 
127 |     loss_log = Logger(os.path.join(exp_log, 'loss_log'))
128 |     prec1_log = Logger(os.path.join(exp_log, 'prec1'))
129 |     prec5_log = Logger(os.path.join(exp_log, 'prec5'))
130 | 
131 |     for epoch in range(args.epochs):
132 |         end = time.time()
133 | 
134 |         # train for one epoch
135 |         train(train_loader, model, reglog, criterion, optimizer, epoch)
136 | 
137 |         # evaluate on validation set
138 |         prec1, prec5, loss = validate(val_loader, model, reglog, criterion)
139 | 
140 |         loss_log.log(loss)
141 |         prec1_log.log(prec1)
142 |         prec5_log.log(prec5)
143 | 
144 |         # remember best prec@1 and save checkpoint
145 |         is_best = prec1 > best_prec1
146 |         best_prec1 = max(prec1, best_prec1)
147 |         if is_best:
148 |             filename = 'model_best.pth.tar'
149 |         else:
150 |             filename = 'checkpoint.pth.tar'
151 |         torch.save({
152 |             'epoch': epoch + 1,
153 |             'arch': 'alexnet',
154 |             'state_dict': model.state_dict(),
155 |             'prec5': prec5,
156 |             'best_prec1': best_prec1,
157 |             'optimizer' : optimizer.state_dict(),
158 |         }, os.path.join(args.exp, filename))
159 | 
160 | 
161 | class RegLog(nn.Module):
162 |     """Creates logistic regression on top of frozen features"""
163 |     def __init__(self, conv, num_labels):
164 |         super(RegLog, self).__init__()
165 |         self.conv = conv
166 |         if conv==1:
167 |             self.av_pool = nn.AvgPool2d(6, stride=6, padding=3)
168 |             s = 9600
169 |         elif conv==2:
170 |             self.av_pool = nn.AvgPool2d(4, stride=4, padding=0)
171 |             s = 9216
172 |         elif conv==3:
173 |             self.av_pool = nn.AvgPool2d(3, stride=3, padding=1)
174 |             s = 9600
175 |         elif conv==4:
176 |             self.av_pool = nn.AvgPool2d(3, stride=3, padding=1)
177 |             s = 9600
178 |         elif conv==5:
179 |             self.av_pool = nn.AvgPool2d(2, stride=2, padding=0)
180 |             s = 9216
181 |         self.linear = nn.Linear(s, num_labels)
182 | 
183 |     def forward(self, x):
184 |         x = self.av_pool(x)
185 |         x = x.view(x.size(0), x.size(1) * x.size(2) * x.size(3))
186 |         return self.linear(x)
187 | 
188 | 
189 | def forward(x, model, conv):
190 |     if hasattr(model, 'sobel') and model.sobel is not None:
191 |         x = model.sobel(x)
192 |     count = 1
193 |     for m in model.features.modules():
194 |         if not isinstance(m, nn.Sequential):
195 |             x = m(x)
196 |         if isinstance(m, nn.ReLU):
197 |             if count == conv:
198 |                 return x
199 |             count = count + 1
200 |     return x
201 | 
202 | def accuracy(output, target, topk=(1,)):
203 |     """Computes the precision@k for the specified values of k"""
204 |     maxk = max(topk)
205 |     batch_size = target.size(0)
206 | 
207 |     _, pred = output.topk(maxk, 1, True, True)
208 |     pred = pred.t()
209 |     correct = pred.eq(target.view(1, -1).expand_as(pred))
210 | 
211 |     res = []
212 |     for k in topk:
213 |         correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
214 |         res.append(correct_k.mul_(100.0 / batch_size))
215 |     return res
216 | 
217 | def train(train_loader, model, reglog, criterion, optimizer, epoch):
218 |     batch_time = AverageMeter()
219 |     data_time = AverageMeter()
220 |     losses = AverageMeter()
221 |     top1 = AverageMeter()
222 |     top5 = AverageMeter()
223 | 
224 |     # freeze also batch norm layers
225 |     model.eval()
226 | 
227 |     end = time.time()
228 |     for i, (input, target) in enumerate(train_loader):
229 | 
230 |         # measure data loading time
231 |         data_time.update(time.time() - end)
232 | 
233 |         #adjust learning rate
234 |         learning_rate_decay(optimizer, len(train_loader) * epoch + i, args.lr)
235 | 
236 |         target = target.cuda(async=True)
237 |         input_var = torch.autograd.Variable(input.cuda())
238 |         target_var = torch.autograd.Variable(target)
239 |         # compute output
240 | 
241 |         output = forward(input_var, model, reglog.conv)
242 |         output = reglog(output)
243 |         loss = criterion(output, target_var)
244 |         # measure accuracy and record loss
245 |         prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
246 |         losses.update(loss.data[0], input.size(0))
247 |         top1.update(prec1[0], input.size(0))
248 |         top5.update(prec5[0], input.size(0))
249 | 
250 |         # compute gradient and do SGD step
251 |         optimizer.zero_grad()
252 |         loss.backward()
253 |         optimizer.step()
254 | 
255 |         # measure elapsed time
256 |         batch_time.update(time.time() - end)
257 |         end = time.time()
258 | 
259 |         if args.verbose and i % 100 == 0:
260 |             print('Epoch: [{0}][{1}/{2}]\t'
261 |                   'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
262 |                   'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
263 |                   'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
264 |                   'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
265 |                   'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'
266 |                   .format(epoch, i, len(train_loader), batch_time=batch_time,
267 |                    data_time=data_time, loss=losses, top1=top1, top5=top5))
268 | 
269 | 
270 | def validate(val_loader, model, reglog, criterion):
271 |     batch_time = AverageMeter()
272 |     losses = AverageMeter()
273 |     top1 = AverageMeter()
274 |     top5 = AverageMeter()
275 | 
276 |     # switch to evaluate mode
277 |     model.eval()
278 |     softmax = nn.Softmax(dim=1).cuda()
279 |     end = time.time()
280 |     for i, (input_tensor, target) in enumerate(val_loader):
281 |         if args.tencrops:
282 |             bs, ncrops, c, h, w = input_tensor.size()
283 |             input_tensor = input_tensor.view(-1, c, h, w)
284 |         target = target.cuda(async=True)
285 |         input_var = torch.autograd.Variable(input_tensor.cuda(), volatile=True)
286 |         target_var = torch.autograd.Variable(target, volatile=True)
287 | 
288 |         output = reglog(forward(input_var, model, reglog.conv))
289 | 
290 |         if args.tencrops:
291 |             output_central = output.view(bs, ncrops, -1)[: , ncrops / 2 - 1, :]
292 |             output = softmax(output)
293 |             output = torch.squeeze(output.view(bs, ncrops, -1).mean(1))
294 |         else:
295 |             output_central = output
296 | 
297 |         prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
298 |         top1.update(prec1[0], input_tensor.size(0))
299 |         top5.update(prec5[0], input_tensor.size(0))
300 |         loss = criterion(output_central, target_var)
301 |         losses.update(loss.data[0], input_tensor.size(0))
302 | 
303 |         # measure elapsed time
304 |         batch_time.update(time.time() - end)
305 |         end = time.time()
306 | 
307 |         if args.verbose and i % 100 == 0:
308 |             print('Validation: [{0}/{1}]\t'
309 |                   'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
310 |                   'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
311 |                   'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
312 |                   'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'
313 |                   .format(i, len(val_loader), batch_time=batch_time,
314 |                    loss=losses, top1=top1, top5=top5))
315 | 
316 |     return top1.avg, top5.avg, losses.avg
317 | 
318 | if __name__ == '__main__':
319 |     main()
320 | 


--------------------------------------------------------------------------------
/eval_linear.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | #!/bin/bash
 8 | 
 9 | DATA="/datasets01/imagenet_full_size/061417/"
10 | MODELROOT="${HOME}/deepcluster_models"
11 | MODEL="${MODELROOT}/alexnet/checkpoint.pth.tar"
12 | EXP="${HOME}/deepcluster_exp/linear_classif"
13 | 
14 | PYTHON="${HOME}/test/conda/bin/python"
15 | 
16 | mkdir -p ${EXP}
17 | 
18 | ${PYTHON} eval_linear.py --model ${MODEL} --data ${DATA} --conv 3 --lr 0.01 \
19 |   --wd -7 --tencrops --verbose --exp ${EXP} --workers 12
20 | 


--------------------------------------------------------------------------------
/eval_retrieval.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | #
  7 | 
  8 | 
  9 | import argparse
 10 | from collections import OrderedDict
 11 | import os
 12 | import pickle
 13 | import subprocess
 14 | import sys
 15 | 
 16 | import numpy as np
 17 | from PIL import Image
 18 | import torch
 19 | import torchvision
 20 | from torch.autograd import Variable
 21 | 
 22 | from util import load_model
 23 | 
 24 | 
 25 | class ImageHelper:
 26 |     def __init__(self, S, L, transforms):
 27 |         self.S = S
 28 |         self.L = L
 29 |         self.transforms = transforms
 30 | 
 31 |     def load_and_prepare_image(self, fname, roi=None):
 32 |         # Read image, get aspect ratio, and resize such as the largest side equals S
 33 |         im = Image.open(fname)
 34 |         im_size_hw = np.array((im.size[1], im.size[0]))
 35 |         if self.S == -1:
 36 |             ratio = 1.0
 37 |         elif self.S == -2:
 38 |             if np.max(im_size_hw) > 124:
 39 |                 ratio = 1024.0/np.max(im_size_hw)
 40 |             else:
 41 |                 ratio = -1
 42 |         else:
 43 |             ratio = float(self.S)/np.max(im_size_hw)
 44 |         new_size = tuple(np.round(im_size_hw * ratio).astype(np.int32))
 45 |         im_resized = self.transforms(im.resize((new_size[1], new_size[0]), Image.BILINEAR))
 46 |         # If there is a roi, adapt the roi to the new size and crop. Do not rescale
 47 |         # the image once again
 48 |         if roi is not None:
 49 |             # ROI format is (xmin,ymin,xmax,ymax)
 50 |             roi = np.round(roi * ratio).astype(np.int32)
 51 |             im_resized = im_resized[:, roi[1]:roi[3], roi[0]:roi[2]]
 52 |         return im_resized
 53 | 
 54 |     def get_rmac_region_coordinates(self, H, W, L):
 55 |         # Almost verbatim from Tolias et al Matlab implementation.
 56 |         # Could be heavily pythonized, but really not worth it...
 57 |         # Desired overlap of neighboring regions
 58 |         ovr = 0.4
 59 |         # Possible regions for the long dimension
 60 |         steps = np.array((2, 3, 4, 5, 6, 7), dtype=np.float32)
 61 |         w = np.minimum(H, W)
 62 | 
 63 |         b = (np.maximum(H, W) - w) / (steps - 1)
 64 |         # steps(idx) regions for long dimension. The +1 comes from Matlab
 65 |         # 1-indexing...
 66 |         idx = np.argmin(np.abs(((w**2 - w * b) / w**2) - ovr)) + 1
 67 | 
 68 |         # Region overplus per dimension
 69 |         Wd = 0
 70 |         Hd = 0
 71 |         if H < W:
 72 |             Wd = idx
 73 |         elif H > W:
 74 |             Hd = idx
 75 | 
 76 |         regions_xywh = []
 77 |         for l in range(1, L+1):
 78 |             wl = np.floor(2 * w / (l + 1))
 79 |             wl2 = np.floor(wl / 2 - 1)
 80 |             # Center coordinates
 81 |             if l + Wd - 1 > 0:
 82 |                 b = (W - wl) / (l + Wd - 1)
 83 |             else:
 84 |                 b = 0
 85 |             cenW = np.floor(wl2 + b * np.arange(l - 1 + Wd + 1)) - wl2
 86 |             # Center coordinates
 87 |             if l + Hd - 1 > 0:
 88 |                 b = (H - wl) / (l + Hd - 1)
 89 |             else:
 90 |                 b = 0
 91 |             cenH = np.floor(wl2 + b * np.arange(l - 1 + Hd + 1)) - wl2
 92 | 
 93 |             for i_ in cenH:
 94 |                 for j_ in cenW:
 95 |                     regions_xywh.append([j_, i_, wl, wl])
 96 | 
 97 |         # Round the regions. Careful with the borders!
 98 |         for i in range(len(regions_xywh)):
 99 |             for j in range(4):
100 |                 regions_xywh[i][j] = int(round(regions_xywh[i][j]))
101 |             if regions_xywh[i][0] + regions_xywh[i][2] > W:
102 |                 regions_xywh[i][0] -= ((regions_xywh[i][0] + regions_xywh[i][2]) - W)
103 |             if regions_xywh[i][1] + regions_xywh[i][3] > H:
104 |                 regions_xywh[i][1] -= ((regions_xywh[i][1] + regions_xywh[i][3]) - H)
105 |         return np.array(regions_xywh)
106 | 
107 | 
108 | class PCA(object):
109 |     '''
110 |     Fits and applies PCA whitening
111 |     '''
112 |     def __init__(self, n_components):
113 |         self.n_components = n_components
114 | 
115 |     def fit(self, X):
116 |         mean = X.mean(axis=0)
117 |         X -= mean
118 |         self.mean = Variable(torch.from_numpy(mean).view(1, -1))
119 |         Xcov = np.dot(X.T, X)
120 |         d, V = np.linalg.eigh(Xcov)
121 | 
122 |         eps = d.max() * 1e-5
123 |         n_0 = (d < eps).sum()
124 |         if n_0 > 0:
125 |             print("%d / %d singular values are 0" % (n_0, d.size))
126 |             d[d < eps] = eps
127 |         totenergy = d.sum()
128 |         idx = np.argsort(d)[::-1][:self.n_components]
129 |         d = d[idx]
130 |         V = V[:, idx]
131 | 
132 |         print("keeping %.2f %% of the energy" % (d.sum() / totenergy * 100.0))
133 | 
134 |         D = np.diag(1. / np.sqrt(d))
135 |         self.DVt = Variable(torch.from_numpy(np.dot(D, V.T)))
136 | 
137 |     def to_cuda(self):
138 |         self.mean = self.mean.cuda()
139 |         self.DVt = self.DVt.cuda()
140 | 
141 |     def apply(self, X):
142 |         X = X - self.mean
143 |         num = torch.mm(self.DVt, X.transpose(0, 1)).transpose(0, 1)
144 |         # L2 normalize on output
145 |         return num
146 | 
147 | 
148 | class Dataset:
149 |     def __init__(self, path, eval_binary_path):
150 |         self.path = path
151 |         self.eval_binary_path = eval_binary_path
152 |         # Some images from the Paris dataset are corrupted. Standard practice is
153 |         # to ignore them
154 |         self.blacklisted = set(["paris_louvre_000136",
155 |                             "paris_louvre_000146",
156 |                             "paris_moulinrouge_000422",
157 |                             "paris_museedorsay_001059",
158 |                             "paris_notredame_000188",
159 |                             "paris_pantheon_000284",
160 |                             "paris_pantheon_000960",
161 |                             "paris_pantheon_000974",
162 |                             "paris_pompidou_000195",
163 |                             "paris_pompidou_000196",
164 |                             "paris_pompidou_000201",
165 |                             "paris_pompidou_000467",
166 |                             "paris_pompidou_000640",
167 |                             "paris_sacrecoeur_000299",
168 |                             "paris_sacrecoeur_000330",
169 |                             "paris_sacrecoeur_000353",
170 |                             "paris_triomphe_000662",
171 |                             "paris_triomphe_000833",
172 |                             "paris_triomphe_000863",
173 |                             "paris_triomphe_000867"])
174 |         self.load()
175 | 
176 |     def load(self):
177 |         # Load the dataset GT
178 |         self.lab_root = '{0}/lab/'.format(self.path)
179 |         self.img_root = '{0}/jpg/'.format(self.path)
180 |         lab_filenames = np.sort(os.listdir(self.lab_root))
181 |         # Get the filenames without the extension
182 |         self.img_filenames = [e[:-4] for e in np.sort(os.listdir(self.img_root))
183 |                               if e[:-4] not in self.blacklisted]
184 | 
185 |         # Parse the label files. Some challenges as filenames do not correspond
186 |         # exactly to query names. Go through all the labels to:
187 |         # i) map names to filenames and vice versa
188 |         # ii) get the relevant regions of interest of the queries,
189 |         # iii) get the indexes of the dataset images that are queries
190 |         # iv) get the relevants / non-relevants list
191 |         self.relevants = {}
192 |         self.junk = {}
193 |         self.non_relevants = {}
194 | 
195 |         self.filename_to_name = {}
196 |         self.name_to_filename = OrderedDict()
197 |         self.q_roi = {}
198 |         for e in lab_filenames:
199 |             if e.endswith('_query.txt'):
200 |                 q_name = e[:-len('_query.txt')]
201 |                 q_data = open("{0}/{1}".format(self.lab_root, e)).readline().split(" ")
202 |                 q_filename = q_data[0][5:] if q_data[0].startswith('oxc1_') else q_data[0]
203 |                 self.filename_to_name[q_filename] = q_name
204 |                 self.name_to_filename[q_name] = q_filename
205 |                 good = set([e.strip() for e in open("{0}/{1}_ok.txt".format(self.lab_root, q_name))])
206 |                 good = good.union(set([e.strip() for e in open("{0}/{1}_good.txt".format(self.lab_root, q_name))]))
207 |                 junk = set([e.strip() for e in open("{0}/{1}_junk.txt".format(self.lab_root, q_name))])
208 |                 good_plus_junk = good.union(junk)
209 |                 self.relevants[q_name] = [i for i in range(len(self.img_filenames))
210 |                                           if self.img_filenames[i] in good]
211 |                 self.junk[q_name] = [i for i in range(len(self.img_filenames))
212 |                                      if self.img_filenames[i] in junk]
213 |                 self.non_relevants[q_name] = [i for i in range(len(self.img_filenames))
214 |                                               if self.img_filenames[i] not in good_plus_junk]
215 |                 self.q_roi[q_name] = np.array([float(q) for q in q_data[1:]], dtype=np.float32)
216 |                 #np.array(map(float, q_data[1:]), dtype=np.float32)
217 | 
218 |         self.q_names = self.name_to_filename.keys()
219 |         self.q_index = np.array([self.img_filenames.index(self.name_to_filename[qn])
220 |                                  for qn in self.q_names])
221 |         self.N_images = len(self.img_filenames)
222 |         self.N_queries = len(self.q_index)
223 | 
224 |     def score(self, sim, temp_dir, eval_bin):
225 |         if not os.path.exists(temp_dir):
226 |             os.makedirs(temp_dir)
227 |         idx = np.argsort(sim, axis=1)[:, ::-1]
228 |         maps = [self.score_rnk_partial(i, idx[i], temp_dir, eval_bin)
229 |                 for i in range(len(self.q_names))]
230 |         for i in range(len(self.q_names)):
231 |             print("{0}: {1:.2f}".format(self.q_names[i], 100 * maps[i]))
232 |         print(20 * "-")
233 |         print("Mean: {0:.2f}".format(100 * np.mean(maps)))
234 | 
235 |     def score_rnk_partial(self, i, idx, temp_dir, eval_bin):
236 |         rnk = np.array(self.img_filenames)[idx]
237 |         with open("{0}/{1}.rnk".format(temp_dir, self.q_names[i]), 'w') as f:
238 |             f.write("\n".join(rnk)+"\n")
239 |         cmd = "{0} {1}{2} {3}/{4}.rnk".format(eval_bin, self.lab_root, self.q_names[i], temp_dir, self.q_names[i])
240 |         p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
241 |         map_ = float(p.stdout.readlines()[0])
242 |         p.wait()
243 |         return map_
244 | 
245 |     def get_filename(self, i):
246 |         return os.path.normpath("{0}/{1}.jpg".format(self.img_root,
247 |                                                      self.img_filenames[i]))
248 | 
249 |     def get_query_filename(self, i):
250 |         return os.path.normpath("{0}/{1}.jpg".format(self.img_root,
251 |                                                      self.img_filenames[self.q_index[i]]))
252 | 
253 |     def get_query_roi(self, i):
254 |         return self.q_roi[self.q_names[i]]
255 | 
256 | 
257 | def ensure_directory_exists(fname):
258 |     dirname = fname[:fname.rfind('/')]
259 |     if not os.path.exists(dirname):
260 |         os.makedirs(dirname)
261 | 
262 | 
263 | def normalize_L2(a, dim):
264 |     norms = torch.sqrt(torch.sum(a**2, dim=dim, keepdim=True))
265 |     return a / norms
266 | 
267 | 
268 | def rmac(features, rmac_levels, pca=None):
269 |     nim, nc, xd, yd = features.size()
270 | 
271 |     rmac_regions = image_helper.get_rmac_region_coordinates(xd, yd, rmac_levels)
272 |     rmac_regions = rmac_regions.astype(np.int)
273 |     nr = len(rmac_regions)
274 | 
275 |     rmac_descriptors = []
276 |     for x0, y0, w, h in rmac_regions:
277 |         desc = features[:, :, y0:y0 + h, x0:x0 + w]
278 |         desc = torch.max(desc, 2, keepdim=True)[0]
279 |         desc = torch.max(desc, 3, keepdim=True)[0]
280 |         # insert an additional dimension for the cat to work
281 |         rmac_descriptors.append(desc.view(-1, 1, nc))
282 | 
283 |     rmac_descriptors = torch.cat(rmac_descriptors, 1)
284 | 
285 |     rmac_descriptors = normalize_L2(rmac_descriptors, 2)
286 | 
287 |     if pca is None:
288 |         return rmac_descriptors
289 | 
290 |     # PCA + whitening
291 |     npca = pca.n_components
292 |     rmac_descriptors = pca.apply(rmac_descriptors.view(nr * nim, nc))
293 |     rmac_descriptors = normalize_L2(rmac_descriptors, 1)
294 | 
295 |     rmac_descriptors = rmac_descriptors.view(nim, nr, npca)
296 | 
297 |     # Sum aggregation and L2-normalization
298 |     rmac_descriptors = torch.sum(rmac_descriptors, 1)
299 |     rmac_descriptors = normalize_L2(rmac_descriptors, 1)
300 |     return rmac_descriptors
301 | 
302 | 
303 | if __name__ == '__main__':
304 |     parser = argparse.ArgumentParser(description='Evaluate Oxford / Paris')
305 |     parser.add_argument('--S', type=int, default=1024,
306 |                         help='Resize larger side of image to S pixels (e.g. 800)')
307 |     parser.add_argument('--L', type=int, default=3,
308 |                         help='Use L spatial levels (e.g. 3)')
309 |     parser.add_argument('--n_pca', type=int, default=512,
310 |                         help='output dimension of PCA')
311 |     parser.add_argument('--model', type=str, default='pretrained',
312 |                         help='Model from which RMAC is computed')
313 |     parser.add_argument('--dataset', type=str, required=True,
314 |                         help='path to dataset')
315 |     parser.add_argument('--dataset_name', type=str, default='Oxford',
316 |                         choices=['Oxford', 'Paris'], help='Dataset name')
317 |     parser.add_argument('--stage', type=str, default='extract_train',
318 |                         choices=['extract_train', 'train_pca', 'db_features',
319 |                         'q_features', 'eval'], help='what action to perform ')
320 |     parser.add_argument('--eval_binary', type=str, required=True,
321 |                         help='Path to the compute_ap binary to evaluate Oxford / Paris')
322 |     parser.add_argument('--temp_dir', type=str, default='',
323 |                         help='Path to a temporary directory to store features and scores')
324 |     parser.add_argument('--multires', dest='multires', action='store_true',
325 |                         help='Enable multiresolution features')
326 |     parser.add_argument('--aqe', type=int, required=False,
327 |                         help='Average query expansion with k neighbors')
328 |     parser.add_argument('--dbe', type=int, required=False,
329 |                         help='Database expansion with k neighbors')
330 | 
331 |     parser.set_defaults(multires=False)
332 |     args = parser.parse_args()
333 | 
334 |     # Load the dataset and the image helper
335 |     print "Prepare the dataset from ", args.dataset
336 |     dataset = Dataset(args.dataset, args.eval_binary)
337 | 
338 |     ensure_directory_exists(args.temp_dir + '/')
339 | 
340 |     if args.stage in ('extract_train', 'db_features', 'q_features'):
341 | 
342 |         if args.model == 'pretrained':
343 |             print("loading supervised pretrained VGG-16")
344 |             net = torchvision.models.vgg16_bn(pretrained=True)
345 |         else:
346 |             net = load_model(args.model)
347 | 
348 |         transforms_comp = []
349 |         features_layers = list(net.features.children())[:-1]
350 |         net.features = torch.nn.Sequential(*features_layers)
351 |         transforms_comp.extend([
352 |             torchvision.transforms.ToTensor(),
353 |             torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
354 |                                              std=[0.229, 0.224, 0.225])
355 |         ])
356 | 
357 |         transforms = torchvision.transforms.Compose(transforms_comp)
358 | 
359 |         print("moving to GPU")
360 |         net.cuda()
361 |         net.eval()
362 |         print("  done")
363 | 
364 |         print("initialize image helper")
365 |         image_helper = ImageHelper(args.S, args.L, transforms)
366 | 
367 | 
368 |     if args.stage == 'extract_train':
369 |         print("extract regions for training")
370 |         # extract at a single scale
371 |         S = args.S
372 |         image_helper.S = S
373 |         N_dataset = dataset.N_images
374 |         def process_image(i):
375 |             print(i),
376 |             sys.stdout.flush()
377 |             fname_out = "{0}/{1}_S{2}_L{3}_regions/{4}.npy".format(args.temp_dir, args.dataset_name, S, args.L, i)
378 | 
379 |             ensure_directory_exists(fname_out)
380 |             I = image_helper.load_and_prepare_image(dataset.get_filename(i), roi=None)
381 |             v = torch.autograd.Variable(I.unsqueeze(0))
382 |             vc = v.cuda()
383 |             if hasattr(net, 'sobel') and net.sobel is not None:
384 |                 vc = net.sobel(vc)
385 |             activation_map = net.features(vc).cpu()
386 | 
387 |             rmac_descriptors = rmac(activation_map, args.L)
388 |             np.save(fname_out, rmac_descriptors.data.numpy())
389 | 
390 |         map(process_image, range(dataset.N_images))
391 | 
392 |     elif args.stage == 'train_pca':
393 |         # load training vectors
394 |         train_x = []
395 |         for i in range(10000):
396 |             fname_in = "{0}/{1}_S{2}_L{3}_regions/{4}.npy".format(args.temp_dir, args.dataset_name, args.S, args.L, i)
397 |             if not os.path.exists(fname_in):
398 |                 break
399 |             x = np.load(fname_in)
400 |             train_x.append(x)
401 | 
402 |         print("loaded %d train vectors" % len(train_x))
403 | 
404 |         train_x = np.vstack([x.reshape(-1, x.shape[-1]) for x in train_x])
405 |         print("   size", train_x.shape)
406 | 
407 |         pca = PCA(args.n_pca)
408 |         pca.fit(train_x)
409 |         pcaname = '%s/%s_S%d_PCA.pickle' % (args.temp_dir, args.dataset_name, args.S)
410 | 
411 |         print("writing", pcaname)
412 |         pickle.dump(pca, open(pcaname, 'w'), -1)
413 | 
414 |     elif args.stage == 'db_features' or args.stage == 'q_features':
415 |         # for tests on Paris, use Oxford PCA, and vice-versa
416 |         pcaname = '%s/%s_S%d_PCA.pickle' % (
417 |             args.temp_dir, 'Paris' if args.dataset_name == 'Oxford' else 'Oxford', args.S)
418 |         print("loading PCA from", pcaname)
419 |         pca = pickle.load(open(pcaname, 'r'))
420 | 
421 |         print("Compute features")
422 |         # extract at a single scale
423 |         S = args.S
424 |         image_helper.S = S
425 |         N_dataset = dataset.N_images
426 | 
427 |         def process_image(fname_in, roi, fname_out):
428 |             softmax = torch.nn.Softmax().cuda()
429 |             I = image_helper.load_and_prepare_image(fname_in, roi=roi)
430 |             v = torch.autograd.Variable(I.unsqueeze(0))
431 |             vc = v.cuda()
432 |             if hasattr(net, 'sobel') and net.sobel is not None:
433 |                 vc = net.sobel(vc)
434 |             activation_map = net.features(vc).cpu()
435 |             descriptors = rmac(activation_map, args.L, pca=pca)
436 |             np.save(fname_out, descriptors.data.numpy())
437 | 
438 |         if args.stage == 'db_features':
439 |             for i in range(dataset.N_images):
440 |                 fname_in = dataset.get_filename(i)
441 |                 fname_out = "{0}/{1}_S{2}_L{3}_db/{4}.npy".format(args.temp_dir, args.dataset_name, S, args.L, i)
442 |                 ensure_directory_exists(fname_out)
443 |                 print(i),
444 |                 sys.stdout.flush()
445 |                 process_image(fname_in, None, fname_out)
446 | 
447 |         elif args.stage == 'q_features':
448 |             for i in range(dataset.N_queries):
449 |                 fname_in = dataset.get_query_filename(i)
450 |                 roi = dataset.get_query_roi(i)
451 |                 fname_out = "{0}/{1}_S{2}_L{3}_q/{4}.npy".format(args.temp_dir, args.dataset_name, S, args.L, i)
452 |                 ensure_directory_exists(fname_out)
453 |                 print(i),
454 |                 sys.stdout.flush()
455 |                 process_image(fname_in, roi, fname_out)
456 | 
457 |     elif args.stage == 'eval':
458 |         S = args.S
459 | 
460 |         print("load query features")
461 |         features_queries = []
462 |         for i in range(dataset.N_queries):
463 |             fname = "{0}/{1}_S{2}_L{3}_q/{4}.npy".format(args.temp_dir, args.dataset_name, S, args.L, i)
464 |             features_queries.append(np.load(fname))
465 |         features_queries = np.vstack(features_queries)
466 | 
467 |         print("  size", features_queries.shape)
468 | 
469 |         print("load database features")
470 |         features_dataset = []
471 |         for i in range(dataset.N_images):
472 |             fname = "{0}/{1}_S{2}_L{3}_db/{4}.npy".format(args.temp_dir, args.dataset_name, S, args.L, i)
473 |             features_dataset.append(np.load(fname))
474 |         features_dataset = np.vstack(features_dataset)
475 |         print("  size", features_dataset.shape)
476 | 
477 |         # Compute similarity
478 |         sim = features_queries.dot(features_dataset.T)
479 | 
480 |         # Score
481 |         dataset.score(sim, args.temp_dir, args.eval_binary)
482 | 


--------------------------------------------------------------------------------
/eval_retrieval.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | #!/bin/bash
 8 | 
 9 | 
10 | # This source is adapted from the "deep_retrieval" package that comes with
11 | # Deep Image Retrieval: Learning global representations for image search. A. Gordo, J. Almazan, J. Revaud, and D. Larlus. In ECCV, 2016
12 | # The original source is not accessible anymore, but other people shared the code, see eg. https://github.com/figitaki/deep-retrieval
13 | # follow the instructions on that github repo to download the data, compile the evaluation package, and set the path to the resulting directory below:
14 | 
15 | #DATASETS='./datasets'
16 | 
17 | # load pytorch model from here
18 | MODEL='/private/home/mathilde/model-to-release/vgg16/checkpoint.pth.tar'
19 | 
20 | # this is to obtain the supervised performance
21 | #MODEL='pretrained'
22 | 
23 | TEMP='/private/home/mathilde/temp'
24 | 
25 | # should be compiled as part of the dataset preparation
26 | EVALBINARY="$DATASETS/evaluation/compute_ap"
27 | EVAL='Paris'
28 | PCA='Oxford'
29 | DATASETEVAL="$DATASETS/$EVAL"
30 | DATASETPCA="$DATASETS/$PCA"
31 | 
32 | python eval_retrieval.py --model ${MODEL} --eval_binary ${EVALBINARY} --temp_dir ${TEMP} --dataset ${DATASETPCA} --dataset_name ${PCA} --stage extract_train
33 | python eval_retrieval.py --model ${MODEL} --eval_binary ${EVALBINARY} --temp_dir ${TEMP} --dataset ${DATASETPCA} --dataset_name ${PCA} --stage train_pca
34 | python eval_retrieval.py --model ${MODEL} --eval_binary ${EVALBINARY} --temp_dir ${TEMP} --dataset ${DATASETEVAL} --dataset_name ${EVAL} --stage q_features
35 | python eval_retrieval.py --model ${MODEL} --eval_binary ${EVALBINARY} --temp_dir ${TEMP} --dataset ${DATASETEVAL} --dataset_name ${EVAL} --stage db_features
36 | python eval_retrieval.py --model ${MODEL} --eval_binary ${EVALBINARY} --temp_dir ${TEMP} --dataset ${DATASETEVAL} --dataset_name ${EVAL} --stage eval
37 | 


--------------------------------------------------------------------------------
/eval_voc_classif.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | #
  7 | #!/usr/bin/env python
  8 | # -*- coding: utf-8 -*-
  9 | 
 10 | import argparse
 11 | import os
 12 | import math
 13 | import time
 14 | import glob
 15 | from collections import defaultdict
 16 | 
 17 | import numpy as np
 18 | import torch
 19 | import torch.nn as nn
 20 | import torch.optim
 21 | import torch.utils.data
 22 | import torchvision
 23 | import torchvision.transforms as transforms
 24 | import torch.backends.cudnn as cudnn
 25 | from sklearn import metrics
 26 | from PIL import Image
 27 | from PIL import ImageFile
 28 | ImageFile.LOAD_TRUNCATED_IMAGES = True
 29 | 
 30 | from util import AverageMeter, load_model
 31 | from eval_linear import accuracy
 32 | 
 33 | 
 34 | parser = argparse.ArgumentParser()
 35 | parser.add_argument('--vocdir', type=str, required=False, default='', help='pascal voc 2007 dataset')
 36 | parser.add_argument('--split', type=str, required=False, default='train', choices=['train', 'trainval'], help='training split')
 37 | parser.add_argument('--model', type=str, required=False, default='',
 38 |                     help='evaluate this model')
 39 | parser.add_argument('--nit', type=int, default=80000, help='Number of training iterations')
 40 | parser.add_argument('--fc6_8', type=int, default=1, help='If true, train only the final classifier')
 41 | parser.add_argument('--train_batchnorm', type=int, default=0, help='If true, train batch-norm layer parameters')
 42 | parser.add_argument('--eval_random_crops', type=int, default=1, help='If true, eval on 10 random crops, otherwise eval on 10 fixed crops')
 43 | parser.add_argument('--stepsize', type=int, default=5000, help='Decay step')
 44 | parser.add_argument('--lr', type=float, required=False, default=0.003, help='learning rate')
 45 | parser.add_argument('--wd', type=float, required=False, default=1e-6, help='weight decay')
 46 | parser.add_argument('--min_scale', type=float, required=False, default=0.1, help='scale')
 47 | parser.add_argument('--max_scale', type=float, required=False, default=0.5, help='scale')
 48 | parser.add_argument('--seed', type=int, default=31, help='random seed')
 49 | 
 50 | def main():
 51 |     args = parser.parse_args()    
 52 |     print(args)
 53 | 
 54 |     # fix random seeds
 55 |     torch.manual_seed(args.seed)
 56 |     torch.cuda.manual_seed_all(args.seed)
 57 |     np.random.seed(args.seed)
 58 | 
 59 |     # create model and move it to gpu
 60 |     model = load_model(args.model)
 61 |     model.top_layer = nn.Linear(model.top_layer.weight.size(1), 20)
 62 |     model.cuda()
 63 |     cudnn.benchmark = True
 64 | 
 65 |     # what partition of the data to use
 66 |     if args.split == 'train':
 67 |         args.test = 'val'
 68 |     elif args.split == 'trainval':
 69 |         args.test = 'test'
 70 |     # data loader
 71 |     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
 72 |                                      std=[0.229, 0.224, 0.225])
 73 |     dataset = VOC2007_dataset(args.vocdir, split=args.split, transform=transforms.Compose([
 74 |             transforms.RandomHorizontalFlip(),
 75 |             transforms.RandomResizedCrop(224, scale=(args.min_scale, args.max_scale), ratio=(1, 1)),
 76 |             transforms.ToTensor(),
 77 |             normalize,
 78 |          ]))
 79 | 
 80 |     loader = torch.utils.data.DataLoader(dataset,
 81 |          batch_size=16, shuffle=False,
 82 |          num_workers=24, pin_memory=True)
 83 |     print('PASCAL VOC 2007 ' + args.split + ' dataset loaded')
 84 | 
 85 |     # re initialize classifier
 86 |     for y, m in enumerate(model.classifier.modules()):
 87 |         if isinstance(m, nn.Linear):
 88 |             m.weight.data.normal_(0, 0.01)
 89 |             m.bias.data.fill_(0.1)
 90 |     model.top_layer.bias.data.fill_(0.1)
 91 | 
 92 |     if args.fc6_8:
 93 |        # freeze some layers 
 94 |         for param in model.features.parameters():
 95 |             param.requires_grad = False
 96 |         # unfreeze batchnorm scaling
 97 |         if args.train_batchnorm:
 98 |             for layer in model.modules():
 99 |                 if isinstance(layer, torch.nn.BatchNorm2d):
100 |                     for param in layer.parameters():
101 |                         param.requires_grad = True
102 | 
103 |     # set optimizer
104 |     optimizer = torch.optim.SGD(
105 |         filter(lambda x: x.requires_grad, model.parameters()),
106 |         lr=args.lr,
107 |         momentum=0.9,
108 |         weight_decay=args.wd,
109 |     )
110 | 
111 |     criterion = nn.BCEWithLogitsLoss(reduction='none')
112 | 
113 |     print('Start training')
114 |     it = 0
115 |     losses = AverageMeter()
116 |     while it < args.nit:
117 |         it = train(
118 |             loader,
119 |             model,
120 |             optimizer,
121 |             criterion,
122 |             args.fc6_8,
123 |             losses,
124 |             it=it,
125 |             total_iterations=args.nit,
126 |             stepsize=args.stepsize,
127 |         )
128 | 
129 |     print('Evaluation')
130 |     if args.eval_random_crops:
131 |         transform_eval = [
132 |             transforms.RandomHorizontalFlip(),
133 |             transforms.RandomResizedCrop(224, scale=(args.min_scale, args.max_scale), ratio=(1, 1)), 
134 |             transforms.ToTensor(),
135 |             normalize,
136 |         ]
137 |     else:
138 |         transform_eval = [
139 |             transforms.Resize(256),
140 |             transforms.TenCrop(224),
141 |             transforms.Lambda(lambda crops: torch.stack([normalize(transforms.ToTensor()(crop)) for crop in crops]))
142 |         ] 
143 | 
144 |     print('Train set')
145 |     train_dataset = VOC2007_dataset(args.vocdir, split=args.split, transform=transforms.Compose(transform_eval))
146 |     train_loader = torch.utils.data.DataLoader(
147 |         train_dataset,
148 |         batch_size=1,
149 |         shuffle=False,
150 |         num_workers=24, 
151 |         pin_memory=True,
152 |     )
153 |     evaluate(train_loader, model, args.eval_random_crops)
154 | 
155 |     print('Test set')
156 |     test_dataset = VOC2007_dataset(args.vocdir, split=args.test, transform=transforms.Compose(transform_eval))
157 |     test_loader = torch.utils.data.DataLoader(
158 |         test_dataset,
159 |         batch_size=1,
160 |         shuffle=False,
161 |         num_workers=24, 
162 |         pin_memory=True,
163 |     )
164 |     evaluate(test_loader, model, args.eval_random_crops)
165 | 
166 | 
167 | def evaluate(loader, model, eval_random_crops):
168 |     model.eval()
169 |     gts = []
170 |     scr = []
171 |     for crop in range(9 * eval_random_crops + 1):
172 |         for i, (input, target) in enumerate(loader):
173 |             # move input to gpu and optionally reshape it
174 |             if len(input.size()) == 5:
175 |                 bs, ncrops, c, h, w = input.size()
176 |                 input = input.view(-1, c, h, w)
177 |             input = input.cuda(non_blocking=True)
178 | 
179 |             # forward pass without grad computation
180 |             with torch.no_grad():
181 |                 output = model(input)
182 |             if crop < 1 :
183 |                     scr.append(torch.sum(output, 0, keepdim=True).cpu().numpy())
184 |                     gts.append(target)
185 |             else:
186 |                     scr[i] += output.cpu().numpy()
187 |     gts = np.concatenate(gts, axis=0).T
188 |     scr = np.concatenate(scr, axis=0).T
189 |     aps = []
190 |     for i in range(20):
191 |         # Subtract eps from score to make AP work for tied scores
192 |         ap = metrics.average_precision_score(gts[i][gts[i]<=1], scr[i][gts[i]<=1]-1e-5*gts[i][gts[i]<=1])
193 |         aps.append( ap )
194 |     print(np.mean(aps), '  ', ' '.join(['%0.2f'%a for a in aps]))
195 | 
196 | 
197 | def train(loader, model, optimizer, criterion, fc6_8, losses, it=0, total_iterations=None, stepsize=None, verbose=True):
198 |     # to log
199 |     batch_time = AverageMeter()
200 |     data_time = AverageMeter()
201 |     top1 = AverageMeter()
202 |     end = time.time()
203 | 
204 |     current_iteration = it
205 | 
206 |     # use dropout for the MLP
207 |     model.train()
208 |     # in the batch norms always use global statistics
209 |     model.features.eval()
210 | 
211 |     for (input, target) in loader:
212 |         # measure data loading time
213 |         data_time.update(time.time() - end)
214 |         
215 |         # adjust learning rate
216 |         if current_iteration != 0 and current_iteration % stepsize == 0:
217 |             for param_group in optimizer.param_groups:
218 |                 param_group['lr'] = param_group['lr'] * 0.5
219 |                 print('iter {0} learning rate is {1}'.format(current_iteration, param_group['lr']))
220 | 
221 |         # move input to gpu
222 |         input = input.cuda(non_blocking=True)
223 | 
224 |         # forward pass with or without grad computation
225 |         output = model(input)
226 | 
227 |         target = target.float().cuda()
228 |         mask = (target == 255)
229 |         loss = torch.sum(criterion(output, target).masked_fill_(mask, 0)) / target.size(0)
230 | 
231 |         # backward 
232 |         optimizer.zero_grad()
233 |         loss.backward()
234 |         # clip gradients
235 |         torch.nn.utils.clip_grad_norm_(model.parameters(), 10)
236 |         # and weights update
237 |         optimizer.step()
238 | 
239 |         # measure accuracy and record loss
240 |         losses.update(loss.item(), input.size(0))
241 |         
242 |         # measure elapsed time
243 |         batch_time.update(time.time() - end)
244 |         end = time.time()
245 |         if verbose is True and current_iteration % 25 == 0:
246 |             print('Iteration[{0}]\t'
247 |                   'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
248 |                   'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
249 |                   'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
250 |                    current_iteration, batch_time=batch_time,
251 |                    data_time=data_time, loss=losses))
252 |         current_iteration = current_iteration + 1
253 |         if total_iterations is not None and current_iteration == total_iterations:
254 |             break
255 |     return current_iteration
256 | 
257 | 
258 | class VOC2007_dataset(torch.utils.data.Dataset):
259 |     def __init__(self, voc_dir, split='train', transform=None):
260 |         # Find the image sets
261 |         image_set_dir = os.path.join(voc_dir, 'ImageSets', 'Main')
262 |         image_sets = glob.glob(os.path.join(image_set_dir, '*_' + split + '.txt'))
263 |         assert len(image_sets) == 20
264 |         # Read the labels
265 |         self.n_labels = len(image_sets)
266 |         images = defaultdict(lambda:-np.ones(self.n_labels, dtype=np.uint8)) 
267 |         for k, s in enumerate(sorted(image_sets)):
268 |             for l in open(s, 'r'):
269 |                 name, lbl = l.strip().split()
270 |                 lbl = int(lbl)
271 |                 # Switch the ignore label and 0 label (in VOC -1: not present, 0: ignore)
272 |                 if lbl < 0:
273 |                     lbl = 0
274 |                 elif lbl == 0:
275 |                     lbl = 255
276 |                 images[os.path.join(voc_dir, 'JPEGImages', name + '.jpg')][k] = lbl
277 |         self.images = [(k, images[k]) for k in images.keys()]
278 |         np.random.shuffle(self.images)
279 |         self.transform = transform
280 | 
281 |     def __len__(self):
282 |         return len(self.images)
283 | 
284 |     def __getitem__(self, i):
285 |         img = Image.open(self.images[i][0])
286 |         img = img.convert('RGB')
287 |         if self.transform is not None:
288 |             img = self.transform(img)
289 |         return img, self.images[i][1]
290 | 
291 | if __name__ == '__main__':
292 |     main()
293 | 
294 | 


--------------------------------------------------------------------------------
/eval_voc_classif.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | #!/bin/bash
 8 | 
 9 | VOCDIR=""
10 | MODELROOT="${HOME}/deepcluster_models"
11 | MODEL="${MODELROOT}/alexnet/checkpoint.pth.tar"
12 | 
13 | PYTHON="${HOME}/test/conda/bin/python"
14 | 
15 | # with training the batch norm
16 | # 72.0 mAP
17 | $PYTHON eval_voc_classif.py --vocdir $VOCDIR --model $MODEL --split trainval --fc6_8 1 --train_batchnorm 1
18 | 
19 | # without training the batch norm
20 | # 70.4 mAP
21 | $PYTHON eval_voc_classif.py --vocdir $VOCDIR --model $MODEL --split trainval --fc6_8 1 --train_batchnorm 0
22 | 


--------------------------------------------------------------------------------
/eval_voc_classif_all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright (c) 2017-present, Facebook, Inc.
 4 | # All rights reserved.
 5 | #
 6 | # This source code is licensed under the license found in the
 7 | # LICENSE file in the root directory of this source tree.
 8 | 
 9 | VOC="/private/home/bojanowski/data/VOCdevkit/VOC2007"
10 | CAFFE="/private/home/bojanowski/code/unsup-eval-pascal/voc-classification/caffe"
11 | 
12 | # download code for pascal classification
13 | mkdir -p third-parties
14 | if [ ! -d third-parties/voc-classification ]; then
15 |   git clone https://github.com/philkr/voc-classification.git third-parties/voc-classification
16 | fi
17 | 
18 | # user config
19 | USERCONFIG=third-parties/voc-classification/src/user_config.py
20 | /bin/cat <<EOM >$USERCONFIG
21 | from os import path
22 | # Path to caffe
23 | CAFFE_DIR = "${CAFFE}"
24 | # Path to the VOC 2007 or 2012 directory
25 | VOC_DIR = "${VOC}"
26 | EOM
27 | 
28 | # change stepsize in train_cls.py
29 | sed -i -e "s/stepsize=10000/stepsize=20000/g" third-parties/voc-classification/src/train_cls.py
30 | sed -i -e "s/stepsize=5000/stepsize=20000/g" third-parties/voc-classification/src/train_cls.py
31 | 
32 | # run transfer
33 | MODELROOT="${HOME}/deepcluster_models"
34 | PROTO="${MODELROOT}/alexnet/model.prototxt"
35 | MODEL="${MODELROOT}/alexnet/model.caffemodel"
36 | EXP="${HOME}/deepcluster_exp/pascal_all"
37 | LR=0.001
38 | BSZ=16
39 | 
40 | mkdir -p ${EXP}
41 | 
42 | python third-parties/voc-classification/src/train_cls.py ${PROTO} ${MODEL} --output ${EXP}/ \
43 |   --clip ThresholdBackward28 --train-from ConvNdBackward5 \
44 |   --random-from DropoutBackward23 --gpu 0 --no-mean \
45 |   -lr ${LR} -bs ${BSZ} -nit 150000 2>&1 | tee ${EXP}/output.txt
46 | 


--------------------------------------------------------------------------------
/eval_voc_classif_fc6_8.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | #!/bin/bash
 8 | 
 9 | VOC='/private/home/bojanowski/data/VOCdevkit/VOC2007'
10 | CAFFE='/private/home/bojanowski/code/unsup-eval-pascal/voc-classification/caffe'
11 | 
12 | # download code for pascal classification
13 | mkdir -p third-parties
14 | if [ ! -d third-parties/voc-classification ]; then
15 |   git clone https://github.com/philkr/voc-classification.git third-parties/voc-classification
16 | fi
17 | 
18 | # user config
19 | USERCONFIG=third-parties/voc-classification/src/user_config.py
20 | /bin/cat <<EOM >$USERCONFIG
21 | from os import path
22 | # Path to caffe
23 | CAFFE_DIR = '${CAFFE}'
24 | # Path to the VOC 2007 or 2012 directory
25 | VOC_DIR = '${VOC}'
26 | EOM
27 | 
28 | # change stepsize in train_cls.py
29 | sed -i -e 's/stepsize=10000/stepsize=5000/g' third-parties/voc-classification/src/train_cls.py
30 | sed -i -e 's/stepsize=20000/stepsize=5000/g' third-parties/voc-classification/src/train_cls.py
31 | 
32 | # run transfer
33 | PROTO="/private/home/mathilde/model-to-release/alexnet/model.prototxt"
34 | MODEL="/private/home/mathilde/model-to-release/alexnet/model.caffemodel"
35 | LR=0.003
36 | BSZ=16
37 | EXP=""
38 | 
39 | mkdir -p ${EXP}
40 | 
41 | python third-parties/voc-classification/src/train_cls.py ${PROTO} ${MODEL} --output ${EXP}/ \
42 | --clip ThresholdBackward28 --train-from DropoutBackward23 \
43 | --random-from DropoutBackward23 --gpu 0 --no-mean \
44 | -lr ${LR} -bs ${BSZ} -nit 150000 2>&1 | tee ${EXP}/output.txt
45 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | #
  7 | import argparse
  8 | import os
  9 | import pickle
 10 | import time
 11 | 
 12 | import faiss
 13 | import numpy as np
 14 | from sklearn.metrics.cluster import normalized_mutual_info_score
 15 | import torch
 16 | import torch.nn as nn
 17 | import torch.nn.parallel
 18 | import torch.backends.cudnn as cudnn
 19 | import torch.optim
 20 | import torch.utils.data
 21 | import torchvision.transforms as transforms
 22 | import torchvision.datasets as datasets
 23 | 
 24 | import clustering
 25 | import models
 26 | from util import AverageMeter, Logger, UnifLabelSampler
 27 | 
 28 | 
 29 | def parse_args():
 30 |     parser = argparse.ArgumentParser(description='PyTorch Implementation of DeepCluster')
 31 | 
 32 |     parser.add_argument('data', metavar='DIR', help='path to dataset')
 33 |     parser.add_argument('--arch', '-a', type=str, metavar='ARCH',
 34 |                         choices=['alexnet', 'vgg16'], default='alexnet',
 35 |                         help='CNN architecture (default: alexnet)')
 36 |     parser.add_argument('--sobel', action='store_true', help='Sobel filtering')
 37 |     parser.add_argument('--clustering', type=str, choices=['Kmeans', 'PIC'],
 38 |                         default='Kmeans', help='clustering algorithm (default: Kmeans)')
 39 |     parser.add_argument('--nmb_cluster', '--k', type=int, default=10000,
 40 |                         help='number of cluster for k-means (default: 10000)')
 41 |     parser.add_argument('--lr', default=0.05, type=float,
 42 |                         help='learning rate (default: 0.05)')
 43 |     parser.add_argument('--wd', default=-5, type=float,
 44 |                         help='weight decay pow (default: -5)')
 45 |     parser.add_argument('--reassign', type=float, default=1.,
 46 |                         help="""how many epochs of training between two consecutive
 47 |                         reassignments of clusters (default: 1)""")
 48 |     parser.add_argument('--workers', default=4, type=int,
 49 |                         help='number of data loading workers (default: 4)')
 50 |     parser.add_argument('--epochs', type=int, default=200,
 51 |                         help='number of total epochs to run (default: 200)')
 52 |     parser.add_argument('--start_epoch', default=0, type=int,
 53 |                         help='manual epoch number (useful on restarts) (default: 0)')
 54 |     parser.add_argument('--batch', default=256, type=int,
 55 |                         help='mini-batch size (default: 256)')
 56 |     parser.add_argument('--momentum', default=0.9, type=float, help='momentum (default: 0.9)')
 57 |     parser.add_argument('--resume', default='', type=str, metavar='PATH',
 58 |                         help='path to checkpoint (default: None)')
 59 |     parser.add_argument('--checkpoints', type=int, default=25000,
 60 |                         help='how many iterations between two checkpoints (default: 25000)')
 61 |     parser.add_argument('--seed', type=int, default=31, help='random seed (default: 31)')
 62 |     parser.add_argument('--exp', type=str, default='', help='path to exp folder')
 63 |     parser.add_argument('--verbose', action='store_true', help='chatty')
 64 |     return parser.parse_args()
 65 | 
 66 | 
 67 | def main(args):
 68 |     # fix random seeds
 69 |     torch.manual_seed(args.seed)
 70 |     torch.cuda.manual_seed_all(args.seed)
 71 |     np.random.seed(args.seed)
 72 | 
 73 |     # CNN
 74 |     if args.verbose:
 75 |         print('Architecture: {}'.format(args.arch))
 76 |     model = models.__dict__[args.arch](sobel=args.sobel)
 77 |     fd = int(model.top_layer.weight.size()[1])
 78 |     model.top_layer = None
 79 |     model.features = torch.nn.DataParallel(model.features)
 80 |     model.cuda()
 81 |     cudnn.benchmark = True
 82 | 
 83 |     # create optimizer
 84 |     optimizer = torch.optim.SGD(
 85 |         filter(lambda x: x.requires_grad, model.parameters()),
 86 |         lr=args.lr,
 87 |         momentum=args.momentum,
 88 |         weight_decay=10**args.wd,
 89 |     )
 90 | 
 91 |     # define loss function
 92 |     criterion = nn.CrossEntropyLoss().cuda()
 93 | 
 94 |     # optionally resume from a checkpoint
 95 |     if args.resume:
 96 |         if os.path.isfile(args.resume):
 97 |             print("=> loading checkpoint '{}'".format(args.resume))
 98 |             checkpoint = torch.load(args.resume)
 99 |             args.start_epoch = checkpoint['epoch']
100 |             # remove top_layer parameters from checkpoint
101 |             for key in checkpoint['state_dict']:
102 |                 if 'top_layer' in key:
103 |                     del checkpoint['state_dict'][key]
104 |             model.load_state_dict(checkpoint['state_dict'])
105 |             optimizer.load_state_dict(checkpoint['optimizer'])
106 |             print("=> loaded checkpoint '{}' (epoch {})"
107 |                   .format(args.resume, checkpoint['epoch']))
108 |         else:
109 |             print("=> no checkpoint found at '{}'".format(args.resume))
110 | 
111 |     # creating checkpoint repo
112 |     exp_check = os.path.join(args.exp, 'checkpoints')
113 |     if not os.path.isdir(exp_check):
114 |         os.makedirs(exp_check)
115 | 
116 |     # creating cluster assignments log
117 |     cluster_log = Logger(os.path.join(args.exp, 'clusters'))
118 | 
119 |     # preprocessing of data
120 |     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
121 |                                      std=[0.229, 0.224, 0.225])
122 |     tra = [transforms.Resize(256),
123 |            transforms.CenterCrop(224),
124 |            transforms.ToTensor(),
125 |            normalize]
126 | 
127 |     # load the data
128 |     end = time.time()
129 |     dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra))
130 |     if args.verbose:
131 |         print('Load dataset: {0:.2f} s'.format(time.time() - end))
132 | 
133 |     dataloader = torch.utils.data.DataLoader(dataset,
134 |                                              batch_size=args.batch,
135 |                                              num_workers=args.workers,
136 |                                              pin_memory=True)
137 | 
138 |     # clustering algorithm to use
139 |     deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster)
140 | 
141 |     # training convnet with DeepCluster
142 |     for epoch in range(args.start_epoch, args.epochs):
143 |         end = time.time()
144 | 
145 |         # remove head
146 |         model.top_layer = None
147 |         model.classifier = nn.Sequential(*list(model.classifier.children())[:-1])
148 | 
149 |         # get the features for the whole dataset
150 |         features = compute_features(dataloader, model, len(dataset))
151 | 
152 |         # cluster the features
153 |         if args.verbose:
154 |             print('Cluster the features')
155 |         clustering_loss = deepcluster.cluster(features, verbose=args.verbose)
156 | 
157 |         # assign pseudo-labels
158 |         if args.verbose:
159 |             print('Assign pseudo labels')
160 |         train_dataset = clustering.cluster_assign(deepcluster.images_lists,
161 |                                                   dataset.imgs)
162 | 
163 |         # uniformly sample per target
164 |         sampler = UnifLabelSampler(int(args.reassign * len(train_dataset)),
165 |                                    deepcluster.images_lists)
166 | 
167 |         train_dataloader = torch.utils.data.DataLoader(
168 |             train_dataset,
169 |             batch_size=args.batch,
170 |             num_workers=args.workers,
171 |             sampler=sampler,
172 |             pin_memory=True,
173 |         )
174 | 
175 |         # set last fully connected layer
176 |         mlp = list(model.classifier.children())
177 |         mlp.append(nn.ReLU(inplace=True).cuda())
178 |         model.classifier = nn.Sequential(*mlp)
179 |         model.top_layer = nn.Linear(fd, len(deepcluster.images_lists))
180 |         model.top_layer.weight.data.normal_(0, 0.01)
181 |         model.top_layer.bias.data.zero_()
182 |         model.top_layer.cuda()
183 | 
184 |         # train network with clusters as pseudo-labels
185 |         end = time.time()
186 |         loss = train(train_dataloader, model, criterion, optimizer, epoch)
187 | 
188 |         # print log
189 |         if args.verbose:
190 |             print('###### Epoch [{0}] ###### \n'
191 |                   'Time: {1:.3f} s\n'
192 |                   'Clustering loss: {2:.3f} \n'
193 |                   'ConvNet loss: {3:.3f}'
194 |                   .format(epoch, time.time() - end, clustering_loss, loss))
195 |             try:
196 |                 nmi = normalized_mutual_info_score(
197 |                     clustering.arrange_clustering(deepcluster.images_lists),
198 |                     clustering.arrange_clustering(cluster_log.data[-1])
199 |                 )
200 |                 print('NMI against previous assignment: {0:.3f}'.format(nmi))
201 |             except IndexError:
202 |                 pass
203 |             print('####################### \n')
204 |         # save running checkpoint
205 |         torch.save({'epoch': epoch + 1,
206 |                     'arch': args.arch,
207 |                     'state_dict': model.state_dict(),
208 |                     'optimizer' : optimizer.state_dict()},
209 |                    os.path.join(args.exp, 'checkpoint.pth.tar'))
210 | 
211 |         # save cluster assignments
212 |         cluster_log.log(deepcluster.images_lists)
213 | 
214 | 
215 | def train(loader, model, crit, opt, epoch):
216 |     """Training of the CNN.
217 |         Args:
218 |             loader (torch.utils.data.DataLoader): Data loader
219 |             model (nn.Module): CNN
220 |             crit (torch.nn): loss
221 |             opt (torch.optim.SGD): optimizer for every parameters with True
222 |                                    requires_grad in model except top layer
223 |             epoch (int)
224 |     """
225 |     batch_time = AverageMeter()
226 |     losses = AverageMeter()
227 |     data_time = AverageMeter()
228 |     forward_time = AverageMeter()
229 |     backward_time = AverageMeter()
230 | 
231 |     # switch to train mode
232 |     model.train()
233 | 
234 |     # create an optimizer for the last fc layer
235 |     optimizer_tl = torch.optim.SGD(
236 |         model.top_layer.parameters(),
237 |         lr=args.lr,
238 |         weight_decay=10**args.wd,
239 |     )
240 | 
241 |     end = time.time()
242 |     for i, (input_tensor, target) in enumerate(loader):
243 |         data_time.update(time.time() - end)
244 | 
245 |         # save checkpoint
246 |         n = len(loader) * epoch + i
247 |         if n % args.checkpoints == 0:
248 |             path = os.path.join(
249 |                 args.exp,
250 |                 'checkpoints',
251 |                 'checkpoint_' + str(n / args.checkpoints) + '.pth.tar',
252 |             )
253 |             if args.verbose:
254 |                 print('Save checkpoint at: {0}'.format(path))
255 |             torch.save({
256 |                 'epoch': epoch + 1,
257 |                 'arch': args.arch,
258 |                 'state_dict': model.state_dict(),
259 |                 'optimizer' : opt.state_dict()
260 |             }, path)
261 | 
262 |         target = target.cuda(async=True)
263 |         input_var = torch.autograd.Variable(input_tensor.cuda())
264 |         target_var = torch.autograd.Variable(target)
265 | 
266 |         output = model(input_var)
267 |         loss = crit(output, target_var)
268 | 
269 |         # record loss
270 |         losses.update(loss.data[0], input_tensor.size(0))
271 | 
272 |         # compute gradient and do SGD step
273 |         opt.zero_grad()
274 |         optimizer_tl.zero_grad()
275 |         loss.backward()
276 |         opt.step()
277 |         optimizer_tl.step()
278 | 
279 |         # measure elapsed time
280 |         batch_time.update(time.time() - end)
281 |         end = time.time()
282 | 
283 |         if args.verbose and (i % 200) == 0:
284 |             print('Epoch: [{0}][{1}/{2}]\t'
285 |                   'Time: {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
286 |                   'Data: {data_time.val:.3f} ({data_time.avg:.3f})\t'
287 |                   'Loss: {loss.val:.4f} ({loss.avg:.4f})'
288 |                   .format(epoch, i, len(loader), batch_time=batch_time,
289 |                           data_time=data_time, loss=losses))
290 | 
291 |     return losses.avg
292 | 
293 | def compute_features(dataloader, model, N):
294 |     if args.verbose:
295 |         print('Compute features')
296 |     batch_time = AverageMeter()
297 |     end = time.time()
298 |     model.eval()
299 |     # discard the label information in the dataloader
300 |     for i, (input_tensor, _) in enumerate(dataloader):
301 |         input_var = torch.autograd.Variable(input_tensor.cuda(), volatile=True)
302 |         aux = model(input_var).data.cpu().numpy()
303 | 
304 |         if i == 0:
305 |             features = np.zeros((N, aux.shape[1]), dtype='float32')
306 | 
307 |         aux = aux.astype('float32')
308 |         if i < len(dataloader) - 1:
309 |             features[i * args.batch: (i + 1) * args.batch] = aux
310 |         else:
311 |             # special treatment for final batch
312 |             features[i * args.batch:] = aux
313 | 
314 |         # measure elapsed time
315 |         batch_time.update(time.time() - end)
316 |         end = time.time()
317 | 
318 |         if args.verbose and (i % 200) == 0:
319 |             print('{0} / {1}\t'
320 |                   'Time: {batch_time.val:.3f} ({batch_time.avg:.3f})'
321 |                   .format(i, len(dataloader), batch_time=batch_time))
322 |     return features
323 | 
324 | 
325 | if __name__ == '__main__':
326 |     args = parse_args()
327 |     main(args)
328 | 


--------------------------------------------------------------------------------
/main.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | #!/bin/bash
 8 | 
 9 | DIR="/datasets01/imagenet_full_size/061417/train"
10 | ARCH="alexnet"
11 | LR=0.05
12 | WD=-5
13 | K=10000
14 | WORKERS=12
15 | EXP="/private/home/${USER}/test/exp"
16 | PYTHON="/private/home/${USER}/test/conda/bin/python"
17 | 
18 | mkdir -p ${EXP}
19 | 
20 | CUDA_VISIBLE_DEVICES=0 ${PYTHON} main.py ${DIR} --exp ${EXP} --arch ${ARCH} \
21 |   --lr ${LR} --wd ${WD} --k ${K} --sobel --verbose --workers ${WORKERS}
22 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2017-present, Facebook, Inc.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | #
7 | from .vgg16 import *
8 | from .alexnet import *
9 | 


--------------------------------------------------------------------------------
/models/alexnet.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | import math
 8 | 
 9 | import numpy as np
10 | import torch
11 | import torch.nn as nn
12 | 
13 | __all__ = [ 'AlexNet', 'alexnet']
14 |  
15 | # (number of filters, kernel size, stride, pad)
16 | CFG = {
17 |     '2012': [(96, 11, 4, 2), 'M', (256, 5, 1, 2), 'M', (384, 3, 1, 1), (384, 3, 1, 1), (256, 3, 1, 1), 'M']
18 | }
19 | 
20 | 
21 | class AlexNet(nn.Module):
22 |     def __init__(self, features, num_classes, sobel):
23 |         super(AlexNet, self).__init__()
24 |         self.features = features
25 |         self.classifier = nn.Sequential(nn.Dropout(0.5),
26 |                             nn.Linear(256 * 6 * 6, 4096),
27 |                             nn.ReLU(inplace=True),
28 |                             nn.Dropout(0.5),
29 |                             nn.Linear(4096, 4096),
30 |                             nn.ReLU(inplace=True))
31 | 
32 |         self.top_layer = nn.Linear(4096, num_classes)
33 |         self._initialize_weights()
34 | 
35 |         if sobel:
36 |             grayscale = nn.Conv2d(3, 1, kernel_size=1, stride=1, padding=0)
37 |             grayscale.weight.data.fill_(1.0 / 3.0)
38 |             grayscale.bias.data.zero_()
39 |             sobel_filter = nn.Conv2d(1, 2, kernel_size=3, stride=1, padding=1)
40 |             sobel_filter.weight.data[0, 0].copy_(
41 |                 torch.FloatTensor([[1, 0, -1], [2, 0, -2], [1, 0, -1]])
42 |             )
43 |             sobel_filter.weight.data[1, 0].copy_(
44 |                 torch.FloatTensor([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])
45 |             )
46 |             sobel_filter.bias.data.zero_()
47 |             self.sobel = nn.Sequential(grayscale, sobel_filter)
48 |             for p in self.sobel.parameters():
49 |                 p.requires_grad = False
50 |         else:
51 |             self.sobel = None
52 | 
53 |     def forward(self, x):
54 |         if self.sobel:
55 |             x = self.sobel(x)
56 |         x = self.features(x)
57 |         x = x.view(x.size(0), 256 * 6 * 6)
58 |         x = self.classifier(x)
59 |         if self.top_layer:
60 |             x = self.top_layer(x)
61 |         return x
62 | 
63 |     def _initialize_weights(self):
64 |         for y, m in enumerate(self.modules()):
65 |             if isinstance(m, nn.Conv2d):
66 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
67 |                 for i in range(m.out_channels):
68 |                     m.weight.data[i].normal_(0, math.sqrt(2. / n))
69 |                 if m.bias is not None:
70 |                     m.bias.data.zero_()
71 |             elif isinstance(m, nn.BatchNorm2d):
72 |                 m.weight.data.fill_(1)
73 |                 m.bias.data.zero_()
74 |             elif isinstance(m, nn.Linear):
75 |                 m.weight.data.normal_(0, 0.01)
76 |                 m.bias.data.zero_()
77 | 
78 | 
79 | def make_layers_features(cfg, input_dim, bn):
80 |     layers = []
81 |     in_channels = input_dim
82 |     for v in cfg:
83 |         if v == 'M':
84 |             layers += [nn.MaxPool2d(kernel_size=3, stride=2)]
85 |         else:
86 |             conv2d = nn.Conv2d(in_channels, v[0], kernel_size=v[1], stride=v[2], padding=v[3])
87 |             if bn:
88 |                 layers += [conv2d, nn.BatchNorm2d(v[0]), nn.ReLU(inplace=True)]
89 |             else:
90 |                 layers += [conv2d, nn.ReLU(inplace=True)]
91 |             in_channels = v[0]
92 |     return nn.Sequential(*layers)
93 | 
94 | 
95 | def alexnet(sobel=False, bn=True, out=1000):
96 |     dim = 2 + int(not sobel)
97 |     model = AlexNet(make_layers_features(CFG['2012'], dim, bn=bn), out, sobel)
98 |     return model
99 | 


--------------------------------------------------------------------------------
/models/vgg16.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | import torch
 8 | import torch.nn as nn
 9 | import math
10 | from random import random as rd
11 | 
12 | __all__ = [ 'VGG', 'vgg16']
13 | 
14 | 
15 | class VGG(nn.Module):
16 | 
17 |     def __init__(self, features, num_classes, sobel):
18 |         super(VGG, self).__init__()
19 |         self.features = features
20 |         self.classifier = nn.Sequential(
21 |             nn.Linear(512 * 7 * 7, 4096),
22 |             nn.ReLU(True),
23 |             nn.Dropout(0.5),
24 |             nn.Linear(4096, 4096),
25 |             nn.ReLU(True)
26 |         )
27 |         self.top_layer = nn.Linear(4096, num_classes)
28 |         self._initialize_weights()
29 |         if sobel:
30 |             grayscale = nn.Conv2d(3, 1, kernel_size=1, stride=1, padding=0)
31 |             grayscale.weight.data.fill_(1.0 / 3.0)
32 |             grayscale.bias.data.zero_()
33 |             sobel_filter = nn.Conv2d(1, 2, kernel_size=3, stride=1, padding=1)
34 |             sobel_filter.weight.data[0,0].copy_(
35 |                 torch.FloatTensor([[1, 0, -1], [2, 0, -2], [1, 0, -1]])
36 |             )
37 |             sobel_filter.weight.data[1,0].copy_(
38 |                 torch.FloatTensor([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])
39 |             )
40 |             sobel_filter.bias.data.zero_()
41 |             self.sobel = nn.Sequential(grayscale, sobel_filter)
42 |             for p in self.sobel.parameters():
43 |                 p.requires_grad = False
44 |         else:
45 |             self.sobel = None
46 | 
47 |     def forward(self, x):
48 |         if self.sobel:
49 |             x = self.sobel(x)
50 |         x = self.features(x)
51 |         x = x.view(x.size(0), -1)
52 |         x = self.classifier(x)
53 |         if self.top_layer:
54 |             x = self.top_layer(x)
55 |         return x
56 | 
57 |     def _initialize_weights(self):
58 |         for y,m in enumerate(self.modules()):
59 |             if isinstance(m, nn.Conv2d):
60 |                 #print(y)
61 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
62 |                 for i in range(m.out_channels):
63 |                     m.weight.data[i].normal_(0, math.sqrt(2. / n))
64 |                 if m.bias is not None:
65 |                     m.bias.data.zero_()
66 |             elif isinstance(m, nn.BatchNorm2d):
67 |                 m.weight.data.fill_(1)
68 |                 m.bias.data.zero_()
69 |             elif isinstance(m, nn.Linear):
70 |                 m.weight.data.normal_(0, 0.01)
71 |                 m.bias.data.zero_()
72 | 
73 | 
74 | def make_layers(input_dim, batch_norm):
75 |     layers = []
76 |     in_channels = input_dim
77 |     cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']
78 |     for v in cfg:
79 |         if v == 'M':
80 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
81 |         else:
82 |             conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
83 |             if batch_norm:
84 |                 layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
85 |             else:
86 |                 layers += [conv2d, nn.ReLU(inplace=True)]
87 |             in_channels = v
88 |     return nn.Sequential(*layers)
89 | 
90 | 
91 | def vgg16(sobel=False, bn=True, out=1000):
92 |     dim = 2 + int(not sobel)
93 |     model = VGG(make_layers(dim, bn), out, sobel)
94 |     return model
95 | 


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | #
  7 | import os
  8 | import pickle
  9 | 
 10 | import numpy as np
 11 | import torch
 12 | from torch.utils.data.sampler import Sampler
 13 | 
 14 | import models
 15 | 
 16 | 
 17 | def load_model(path):
 18 |     """Loads model and return it without DataParallel table."""
 19 |     if os.path.isfile(path):
 20 |         print("=> loading checkpoint '{}'".format(path))
 21 |         checkpoint = torch.load(path)
 22 | 
 23 |         # size of the top layer
 24 |         N = checkpoint['state_dict']['top_layer.bias'].size()
 25 | 
 26 |         # build skeleton of the model
 27 |         sob = 'sobel.0.weight' in checkpoint['state_dict'].keys()
 28 |         model = models.__dict__[checkpoint['arch']](sobel=sob, out=int(N[0]))
 29 | 
 30 |         # deal with a dataparallel table
 31 |         def rename_key(key):
 32 |             if not 'module' in key:
 33 |                 return key
 34 |             return ''.join(key.split('.module'))
 35 | 
 36 |         checkpoint['state_dict'] = {rename_key(key): val
 37 |                                     for key, val
 38 |                                     in checkpoint['state_dict'].items()}
 39 | 
 40 |         # load weights
 41 |         model.load_state_dict(checkpoint['state_dict'])
 42 |         print("Loaded")
 43 |     else:
 44 |         model = None
 45 |         print("=> no checkpoint found at '{}'".format(path))
 46 |     return model
 47 | 
 48 | 
 49 | class UnifLabelSampler(Sampler):
 50 |     """Samples elements uniformely accross pseudolabels.
 51 |         Args:
 52 |             N (int): size of returned iterator.
 53 |             images_lists: dict of key (target), value (list of data with this target)
 54 |     """
 55 | 
 56 |     def __init__(self, N, images_lists):
 57 |         self.N = N
 58 |         self.images_lists = images_lists
 59 |         self.indexes = self.generate_indexes_epoch()
 60 | 
 61 |     def generate_indexes_epoch(self):
 62 |         nmb_non_empty_clusters = 0
 63 |         for i in range(len(self.images_lists)):
 64 |             if len(self.images_lists[i]) != 0:
 65 |                 nmb_non_empty_clusters += 1
 66 | 
 67 |         size_per_pseudolabel = int(self.N / nmb_non_empty_clusters) + 1
 68 |         res = np.array([])
 69 | 
 70 |         for i in range(len(self.images_lists)):
 71 |             # skip empty clusters
 72 |             if len(self.images_lists[i]) == 0:
 73 |                 continue
 74 |             indexes = np.random.choice(
 75 |                 self.images_lists[i],
 76 |                 size_per_pseudolabel,
 77 |                 replace=(len(self.images_lists[i]) <= size_per_pseudolabel)
 78 |             )
 79 |             res = np.concatenate((res, indexes))
 80 | 
 81 |         np.random.shuffle(res)
 82 |         res = list(res.astype('int'))
 83 |         if len(res) >= self.N:
 84 |             return res[:self.N]
 85 |         res += res[: (self.N - len(res))]
 86 |         return res
 87 | 
 88 |     def __iter__(self):
 89 |         return iter(self.indexes)
 90 | 
 91 |     def __len__(self):
 92 |         return len(self.indexes)
 93 | 
 94 | 
 95 | class AverageMeter(object):
 96 |     """Computes and stores the average and current value"""
 97 |     def __init__(self):
 98 |         self.reset()
 99 | 
100 |     def reset(self):
101 |         self.val = 0
102 |         self.avg = 0
103 |         self.sum = 0
104 |         self.count = 0
105 | 
106 |     def update(self, val, n=1):
107 |         self.val = val
108 |         self.sum += val * n
109 |         self.count += n
110 |         self.avg = self.sum / self.count
111 | 
112 | 
113 | def learning_rate_decay(optimizer, t, lr_0):
114 |     for param_group in optimizer.param_groups:
115 |         lr = lr_0 / np.sqrt(1 + lr_0 * param_group['weight_decay'] * t)
116 |         param_group['lr'] = lr
117 | 
118 | 
119 | class Logger(object):
120 |     """ Class to update every epoch to keep trace of the results
121 |     Methods:
122 |         - log() log and save
123 |     """
124 | 
125 |     def __init__(self, path):
126 |         self.path = path
127 |         self.data = []
128 | 
129 |     def log(self, train_point):
130 |         self.data.append(train_point)
131 |         with open(os.path.join(self.path), 'wb') as fp:
132 |             pickle.dump(self.data, fp, -1)
133 | 


--------------------------------------------------------------------------------
/visu/activ-retrieval.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | #
  7 | import argparse
  8 | import os
  9 | from shutil import copyfile
 10 | import sys
 11 | 
 12 | import numpy as np
 13 | from PIL import Image
 14 | import torch
 15 | import torch.nn as nn
 16 | import torchvision.transforms as transforms
 17 | import torchvision.datasets as datasets
 18 | 
 19 | sys.path.insert(0, '..')
 20 | from util import load_model
 21 | 
 22 | 
 23 | def parse_args():
 24 |     parser = argparse.ArgumentParser(description='Retrieve images with maximal activations')
 25 |     parser.add_argument('--data', type=str, help='path to dataset')
 26 |     parser.add_argument('--model', type=str, help='Model')
 27 |     parser.add_argument('--conv', type=int, default=1, help='convolutional layer')
 28 |     parser.add_argument('--exp', type=str, default='', help='path to res')
 29 |     parser.add_argument('--count', type=int, default=9, help='save this many images')
 30 |     parser.add_argument('--workers', default=4, type=int,
 31 |                         help='number of data loading workers (default: 4)')
 32 |     return parser.parse_args()
 33 | 
 34 | 
 35 | def main(args):
 36 |     # create repo
 37 |     repo = os.path.join(args.exp, 'conv' + str(args.conv))
 38 |     if not os.path.isdir(repo):
 39 |         os.makedirs(repo)
 40 | 
 41 |     # build model
 42 |     model = load_model(args.model)
 43 |     model.cuda()
 44 |     for params in model.parameters():
 45 |         params.requires_grad = False
 46 |     model.eval()
 47 | 
 48 |     #load data
 49 |     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
 50 |                                      std=[0.229, 0.224, 0.225])
 51 |     tra = [transforms.Resize(256),
 52 |            transforms.CenterCrop(224),
 53 |            transforms.ToTensor(),
 54 |            normalize]
 55 | 
 56 |     # dataset
 57 |     dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra))
 58 |     dataloader = torch.utils.data.DataLoader(dataset, batch_size=256,
 59 |                                              num_workers=args.workers)
 60 | 
 61 |     # keys are filters and value are arrays with activation scores for the whole dataset
 62 |     layers_activations = {}
 63 |     for i, (input_tensor, _) in enumerate(dataloader):
 64 |         input_var = torch.autograd.Variable(input_tensor.cuda(), volatile=True)
 65 |         activations = forward(model, args.conv, input_var)
 66 | 
 67 |         if i == 0:
 68 |             layers_activations = {filt: np.zeros(len(dataset)) for filt in activations}
 69 |         if i < len(dataloader) - 1:
 70 |             e_idx = (i + 1) * 256
 71 |         else:
 72 |             e_idx = len(dataset)
 73 |         s_idx = i * 256
 74 |         for filt in activations:
 75 |             layers_activations[filt][s_idx: e_idx] = activations[filt].cpu().data.numpy()
 76 | 
 77 |         if i % 100 == 0:
 78 |             print('{0}/{1}'.format(i, len(dataloader)))
 79 | 
 80 |     # save top N images for each filter
 81 |     for filt in layers_activations:
 82 |         repofilter = os.path.join(repo, filt)
 83 |         if not os.path.isdir(repofilter):
 84 |             os.mkdir(repofilter)
 85 |         top = np.argsort(layers_activations[filt])[::-1]
 86 |         if args.count > 0:
 87 |             top = top[:args.count]
 88 | 
 89 |         for pos, img in enumerate(top):
 90 |             src, _ = dataset.imgs[img]
 91 |             copyfile(src, os.path.join(repofilter, "{}_{}".format(pos, src.split('/')[-1])))
 92 | 
 93 | 
 94 | def forward(model, my_layer, x):
 95 |     if model.sobel is not None:
 96 |         x = model.sobel(x)
 97 |     layer = 1
 98 |     res = {}
 99 |     for m in model.features.modules():
100 |         if not isinstance(m, nn.Sequential):
101 |             x = m(x)
102 |             if isinstance(m, nn.ReLU):
103 |                 if layer == my_layer:
104 |                     for channel in range(int(x.size()[1])):
105 |                         key = 'layer' + str(layer) + '-channel' + str(channel)
106 |                         res[key] = torch.squeeze(x.mean(3).mean(2))[:, channel]
107 |                     return res
108 |                 layer = layer + 1
109 |     return res
110 | 
111 | 
112 | if __name__ == '__main__':
113 |     args = parse_args()
114 |     main(args)
115 | 


--------------------------------------------------------------------------------
/visu/activ-retrieval.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | #!/bin/bash
 8 | 
 9 | MODEL="$HOME/deepcluster_models/vgg16/checkpoint.pth.tar"
10 | EXP="$HOME/temp/"
11 | CONV=5
12 | DATA='/datasets01/imagenet_full_size/061417/val'
13 | 
14 | python activ-retrieval.py --model ${MODEL} --exp ${EXP} --conv ${CONV} --data ${DATA}
15 | 


--------------------------------------------------------------------------------
/visu/gradient_ascent.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | #
  7 | import argparse
  8 | import os
  9 | from scipy.ndimage.filters import gaussian_filter
 10 | import sys
 11 | 
 12 | import numpy as np
 13 | from PIL import Image
 14 | import torch
 15 | import torch.nn as nn
 16 | import torchvision
 17 | import torchvision.transforms as transforms
 18 | 
 19 | sys.path.insert(0, '..')
 20 | from util import load_model
 21 | 
 22 | parser = argparse.ArgumentParser(description='Gradient ascent visualisation')
 23 | parser.add_argument('--model', type=str, help='Model')
 24 | parser.add_argument('--arch', type=str, default='alexnet', choices=['alexnet', 'vgg16'], help='arch')
 25 | parser.add_argument('--conv', type=int, default=1, help='convolutional layer')
 26 | parser.add_argument('--exp', type=str, default='', help='path to res')
 27 | parser.add_argument('--lr', type=float, default=3, help='learning rate (default: 3)')
 28 | parser.add_argument('--wd', type=float, default=0.00001, help='weight decay (default: 10^-5)')
 29 | parser.add_argument('--sig', type=float, default=0.3, help='gaussian blur (default: 0.3)')
 30 | parser.add_argument('--step', type=int, default=5, help='number of iter between gaussian blurs (default: 5)')
 31 | parser.add_argument('--niter', type=int, default=1000, help='total number of iterations (default: 1000)')
 32 | parser.add_argument('--idim', type=int, default=224, help='size of input image (default: 224)')
 33 | 
 34 | CONV = {'alexnet': [96, 256, 384, 384, 256],
 35 |         'vgg16':     [64, 64, 128, 128, 256, 256, 256, 512, 512, 512, 512, 512, 512]}
 36 | 
 37 | 
 38 | def main():
 39 |     args = parser.parse_args()
 40 | 
 41 |     # sanity check
 42 |     if args.arch == 'alexnet':
 43 |         assert args.conv < 6
 44 |     elif args.arch == 'vgg16':
 45 |         assert args.conv < 14
 46 | 
 47 |     # create repo
 48 |     repo = os.path.join(args.exp, 'conv' + str(args.conv))
 49 |     if not os.path.isdir(repo):
 50 |         os.makedirs(repo)
 51 | 
 52 |     # build model
 53 |     model = load_model(args.model)
 54 |     model.cuda()
 55 |     for params in model.parameters():
 56 |         params.requires_grad = False
 57 |     model.eval()
 58 | 
 59 |     def gradient_ascent(f):
 60 |         print f,
 61 |         sys.stdout.flush()
 62 |         fname_out = '{0}/layer{1}-channel{2}.jpeg'.format(repo, args.conv, f)
 63 | 
 64 |         img_noise = np.random.normal(size=(args.idim, args.idim, 3)) * 20 + 128
 65 |         img_noise = img_noise.astype('float32')
 66 |         inp = transforms.ToTensor()(img_noise)
 67 |         inp = torch.unsqueeze(inp, 0)
 68 | 
 69 |         for it in range(args.niter):
 70 |             x = torch.autograd.Variable(inp.cuda(), requires_grad=True)
 71 |             out = forward(model, args.conv-1, f, x)
 72 |             criterion = nn.CrossEntropyLoss()
 73 |             filt_var = torch.autograd.Variable(torch.ones(1).long()*f).cuda()
 74 |             output = out.mean(3).mean(2)
 75 |             loss = - criterion(output, filt_var) - args.wd*torch.norm(x)**2
 76 | 
 77 |             # compute gradient
 78 |             loss.backward()
 79 | 
 80 |             # normalize gradient
 81 |             grads = x.grad.data.cpu()
 82 |             grads = grads.div(torch.norm(grads)+1e-8)
 83 | 
 84 |             # apply gradient
 85 |             inp = inp.add(args.lr*grads)
 86 | 
 87 |             # gaussian blur
 88 |             if it%args.step == 0:
 89 |                 inp = gaussian_filter(torch.squeeze(inp).numpy().transpose((2, 1, 0)),
 90 |                                        sigma=(args.sig, args.sig, 0))
 91 |                 inp = torch.unsqueeze(torch.from_numpy(inp).float().transpose(2, 0), 0)
 92 | 
 93 |             # save image at the last iteration
 94 |             if it == args.niter - 1:
 95 |                 a = deprocess_image(inp.numpy())
 96 |                 Image.fromarray(a).save(fname_out)
 97 | 
 98 |     map(gradient_ascent, range(CONV[args.arch][args.conv-1]))
 99 | 
100 | 
101 | def deprocess_image(x):
102 |     x = x[0, :, :, :]
103 |     # normalize tensor: center on 0., ensure std is 0.1
104 |     x -= x.mean()
105 |     x /= (x.std() + 1e-5)
106 |     x *= 0.1
107 | 
108 |     # clip to [0, 1]
109 |     x += 0.5
110 |     x = np.clip(x, 0, 1)
111 | 
112 |     # convert to RGB array
113 |     x *= 255
114 |     x = x.transpose((1, 2, 0))
115 |     x = np.clip(x, 0, 255).astype('uint8')
116 |     return x
117 | 
118 | 
119 | def forward(model, layer, channel, x):
120 |     if model.sobel is not None:
121 |         x = model.sobel(x)
122 |     count = 0
123 |     for y, m in enumerate(model.features.modules()):
124 |         if not isinstance(m, nn.Sequential):
125 |             x = m(x)
126 |             if isinstance(m, nn.Conv2d):
127 |                 if count == layer:
128 |                     res = x
129 |             if isinstance(m, nn.ReLU):
130 |                 if count == layer:
131 |                     # check if channel is not activated
132 |                     if x[:, channel, :, :].mean().data.cpu().numpy() == 0:
133 |                         return res
134 |                     return x
135 |                 count = count + 1
136 | 
137 | 
138 | if __name__ == '__main__':
139 |     main()
140 | 


--------------------------------------------------------------------------------
/visu/gradient_ascent.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | #!/bin/bash
 8 | 
 9 | MODEL='/private/home/mathilde/model-to-release/alexnet/checkpoint.pth.tar'
10 | ARCH='vgg16'
11 | EXP='/private/home/mathilde/temp'
12 | CONV=6
13 | 
14 | python gradient_ascent.py --model ${MODEL} --exp ${EXP} --conv ${CONV} --arch ${ARCH}
15 | 


--------------------------------------------------------------------------------