├── .gitignore
├── LICENSE
├── LICENSE-pytorch-cifar
├── README.md
├── batchboost.py
├── debug.py
├── figures
    ├── batches
    │   ├── img_1_new_10.png
    │   ├── img_1_new_11.png
    │   ├── img_1_new_6.png
    │   ├── img_1_new_7.png
    │   ├── img_1_new_8.png
    │   ├── img_1_new_9.png
    │   ├── img_1_old_0.png
    │   ├── img_1_old_1.png
    │   ├── img_1_old_2.png
    │   ├── img_1_old_3.png
    │   ├── img_1_old_4.png
    │   ├── img_1_old_5.png
    │   ├── img_2_new_10.png
    │   ├── img_2_new_11.png
    │   ├── img_2_new_6.png
    │   ├── img_2_new_7.png
    │   ├── img_2_new_8.png
    │   ├── img_2_new_9.png
    │   ├── img_2_old_0.png
    │   ├── img_2_old_1.png
    │   ├── img_2_old_2.png
    │   ├── img_2_old_3.png
    │   ├── img_2_old_4.png
    │   ├── img_2_old_5.png
    │   ├── img_3_new_10.png
    │   ├── img_3_new_11.png
    │   ├── img_3_new_6.png
    │   ├── img_3_new_7.png
    │   ├── img_3_new_8.png
    │   ├── img_3_new_9.png
    │   ├── img_3_old_0.png
    │   ├── img_3_old_1.png
    │   ├── img_3_old_2.png
    │   ├── img_3_old_3.png
    │   ├── img_3_old_4.png
    │   ├── img_3_old_5.png
    │   ├── img_4_new_10.png
    │   ├── img_4_new_11.png
    │   ├── img_4_new_6.png
    │   ├── img_4_new_7.png
    │   ├── img_4_new_8.png
    │   ├── img_4_new_9.png
    │   ├── img_4_old_0.png
    │   ├── img_4_old_1.png
    │   ├── img_4_old_2.png
    │   ├── img_4_old_3.png
    │   ├── img_4_old_4.png
    │   └── img_4_old_5.png
    ├── data_1.png
    ├── data_2.png
    ├── data_3.png
    ├── data_4.png
    ├── data_5.png
    ├── data_6.png
    ├── data_7.png
    ├── figure-1-loss-train-without-augment.pdf
    ├── figure-1-test-accuracy-without-augment.pdf
    ├── figure-2-test-accuracy-with-augment.pdf
    ├── figure-2-train-accuracy-with-augment.pdf
    ├── figure-abstract.pdf
    ├── figure-abstract.png
    ├── figure-abstract.svg
    ├── figure-feeding.pdf
    ├── figure-feeding.png
    ├── figure-feeding.svg
    ├── figure-multipass.png
    ├── for-repository-1.png
    ├── for-repository-2.png
    └── pp_logo.jpg
├── models
    ├── __init__.py
    ├── alldnet.py
    ├── densenet.py
    ├── densenet3.py
    ├── densenet_efficient_multi_gpu.py
    ├── googlenet.py
    ├── lenet.py
    ├── mobilenet.py
    ├── resnet.py
    ├── resnext.py
    └── vgg.py
├── paper
    ├── abstract.txt
    ├── arxiv-abstract-shadow.png
    ├── arxiv-abstract.png
    ├── arxiv.sty
    ├── batchboost.pdf
    ├── batchboost.tex
    ├── build.py
    ├── figure-1-loss-train-without-augment.pdf
    ├── figure-1-test-accuracy-without-augment.pdf
    ├── figure-2-test-accuracy-with-augment.pdf
    ├── figure-2-train-accuracy-with-augment.pdf
    ├── figure-abstract.pdf
    ├── figure-feeding.pdf
    ├── figure-multipass.png
    ├── notes_v2.md
    ├── references.bib
    └── texput.log
├── plot.py
├── results
    ├── decay=1e-4
    │   ├── log_EfficientNet_baseline_13.csv
    │   ├── log_EfficientNet_baseline_24.csv
    │   ├── log_EfficientNet_batchboost_1.csv
    │   ├── log_EfficientNet_batchboost_2.csv
    │   ├── log_EfficientNet_batchboost_3.csv
    │   ├── log_EfficientNet_batchboost_4.csv
    │   ├── log_EfficientNet_mixup_1.csv
    │   ├── log_EfficientNet_mixup_2.csv
    │   ├── log_EfficientNet_mixup_3.csv
    │   ├── log_EfficientNet_mixup_4.csv
    │   ├── loss-test-with-augment-.pdf
    │   ├── loss-test-without-augment-.pdf
    │   ├── test-accuracy-with-augment-.pdf
    │   ├── test-accuracy-without-augment-.pdf
    │   ├── train-accuracy-with-augment-.pdf
    │   └── train-accuracy-without-augment-.pdf
    └── decay=1e-5
    │   ├── log_EfficientNet_baseline_13.csv
    │   ├── log_EfficientNet_baseline_24.csv
    │   ├── log_EfficientNet_batchboost_1.csv
    │   ├── log_EfficientNet_batchboost_2.csv
    │   ├── log_EfficientNet_batchboost_3.csv
    │   ├── log_EfficientNet_batchboost_4.csv
    │   ├── log_EfficientNet_mixup_1.csv
    │   ├── log_EfficientNet_mixup_2.csv
    │   ├── log_EfficientNet_mixup_3.csv
    │   ├── log_EfficientNet_mixup_4.csv
    │   ├── loss-test-with-augment-.pdf
    │   ├── loss-test-without-augment-.pdf
    │   ├── test-accuracy-with-augment-.pdf
    │   ├── test-accuracy-without-augment-.pdf
    │   ├── train-accuracy-with-augment-.pdf
    │   └── train-accuracy-without-augment-.pdf
├── train.py
└── utils.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # project
 2 | .DS_Store
 3 | __pycache__
 4 | *.pyc
 5 | checkpoint
 6 | data/
 7 | 
 8 | # paper
 9 | *.aux
10 | *.log
11 | *.out
12 | *.bbl
13 | *.blg
14 | 
15 | # results
16 | results/*
17 | !results/decay=1e-4
18 | !results/decay=1e-5
19 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Attribution-NonCommercial 4.0 International
  2 | 
  3 | =======================================================================
  4 | 
  5 | Creative Commons Corporation ("Creative Commons") is not a law firm and
  6 | does not provide legal services or legal advice. Distribution of
  7 | Creative Commons public licenses does not create a lawyer-client or
  8 | other relationship. Creative Commons makes its licenses and related
  9 | information available on an "as-is" basis. Creative Commons gives no
 10 | warranties regarding its licenses, any material licensed under their
 11 | terms and conditions, or any related information. Creative Commons
 12 | disclaims all liability for damages resulting from their use to the
 13 | fullest extent possible.
 14 | 
 15 | Using Creative Commons Public Licenses
 16 | 
 17 | Creative Commons public licenses provide a standard set of terms and
 18 | conditions that creators and other rights holders may use to share
 19 | original works of authorship and other material subject to copyright
 20 | and certain other rights specified in the public license below. The
 21 | following considerations are for informational purposes only, are not
 22 | exhaustive, and do not form part of our licenses.
 23 | 
 24 |      Considerations for licensors: Our public licenses are
 25 |      intended for use by those authorized to give the public
 26 |      permission to use material in ways otherwise restricted by
 27 |      copyright and certain other rights. Our licenses are
 28 |      irrevocable. Licensors should read and understand the terms
 29 |      and conditions of the license they choose before applying it.
 30 |      Licensors should also secure all rights necessary before
 31 |      applying our licenses so that the public can reuse the
 32 |      material as expected. Licensors should clearly mark any
 33 |      material not subject to the license. This includes other CC-
 34 |      licensed material, or material used under an exception or
 35 |      limitation to copyright. More considerations for licensors:
 36 |     wiki.creativecommons.org/Considerations_for_licensors
 37 | 
 38 |      Considerations for the public: By using one of our public
 39 |      licenses, a licensor grants the public permission to use the
 40 |      licensed material under specified terms and conditions. If
 41 |      the licensor's permission is not necessary for any reason--for
 42 |      example, because of any applicable exception or limitation to
 43 |      copyright--then that use is not regulated by the license. Our
 44 |      licenses grant only permissions under copyright and certain
 45 |      other rights that a licensor has authority to grant. Use of
 46 |      the licensed material may still be restricted for other
 47 |      reasons, including because others have copyright or other
 48 |      rights in the material. A licensor may make special requests,
 49 |      such as asking that all changes be marked or described.
 50 |      Although not required by our licenses, you are encouraged to
 51 |      respect those requests where reasonable. More_considerations
 52 |      for the public:
 53 |     wiki.creativecommons.org/Considerations_for_licensees
 54 | 
 55 | =======================================================================
 56 | 
 57 | Creative Commons Attribution-NonCommercial 4.0 International Public
 58 | License
 59 | 
 60 | By exercising the Licensed Rights (defined below), You accept and agree
 61 | to be bound by the terms and conditions of this Creative Commons
 62 | Attribution-NonCommercial 4.0 International Public License ("Public
 63 | License"). To the extent this Public License may be interpreted as a
 64 | contract, You are granted the Licensed Rights in consideration of Your
 65 | acceptance of these terms and conditions, and the Licensor grants You
 66 | such rights in consideration of benefits the Licensor receives from
 67 | making the Licensed Material available under these terms and
 68 | conditions.
 69 | 
 70 | Section 1 -- Definitions.
 71 | 
 72 |   a. Adapted Material means material subject to Copyright and Similar
 73 |      Rights that is derived from or based upon the Licensed Material
 74 |      and in which the Licensed Material is translated, altered,
 75 |      arranged, transformed, or otherwise modified in a manner requiring
 76 |      permission under the Copyright and Similar Rights held by the
 77 |      Licensor. For purposes of this Public License, where the Licensed
 78 |      Material is a musical work, performance, or sound recording,
 79 |      Adapted Material is always produced where the Licensed Material is
 80 |      synched in timed relation with a moving image.
 81 | 
 82 |   b. Adapter's License means the license You apply to Your Copyright
 83 |      and Similar Rights in Your contributions to Adapted Material in
 84 |      accordance with the terms and conditions of this Public License.
 85 | 
 86 |   c. Copyright and Similar Rights means copyright and/or similar rights
 87 |      closely related to copyright including, without limitation,
 88 |      performance, broadcast, sound recording, and Sui Generis Database
 89 |      Rights, without regard to how the rights are labeled or
 90 |      categorized. For purposes of this Public License, the rights
 91 |      specified in Section 2(b)(1)-(2) are not Copyright and Similar
 92 |      Rights.
 93 |   d. Effective Technological Measures means those measures that, in the
 94 |      absence of proper authority, may not be circumvented under laws
 95 |      fulfilling obligations under Article 11 of the WIPO Copyright
 96 |      Treaty adopted on December 20, 1996, and/or similar international
 97 |      agreements.
 98 | 
 99 |   e. Exceptions and Limitations means fair use, fair dealing, and/or
100 |      any other exception or limitation to Copyright and Similar Rights
101 |      that applies to Your use of the Licensed Material.
102 | 
103 |   f. Licensed Material means the artistic or literary work, database,
104 |      or other material to which the Licensor applied this Public
105 |      License.
106 | 
107 |   g. Licensed Rights means the rights granted to You subject to the
108 |      terms and conditions of this Public License, which are limited to
109 |      all Copyright and Similar Rights that apply to Your use of the
110 |      Licensed Material and that the Licensor has authority to license.
111 | 
112 |   h. Licensor means the individual(s) or entity(ies) granting rights
113 |      under this Public License.
114 | 
115 |   i. NonCommercial means not primarily intended for or directed towards
116 |      commercial advantage or monetary compensation. For purposes of
117 |      this Public License, the exchange of the Licensed Material for
118 |      other material subject to Copyright and Similar Rights by digital
119 |      file-sharing or similar means is NonCommercial provided there is
120 |      no payment of monetary compensation in connection with the
121 |      exchange.
122 | 
123 |   j. Share means to provide material to the public by any means or
124 |      process that requires permission under the Licensed Rights, such
125 |      as reproduction, public display, public performance, distribution,
126 |      dissemination, communication, or importation, and to make material
127 |      available to the public including in ways that members of the
128 |      public may access the material from a place and at a time
129 |      individually chosen by them.
130 | 
131 |   k. Sui Generis Database Rights means rights other than copyright
132 |      resulting from Directive 96/9/EC of the European Parliament and of
133 |      the Council of 11 March 1996 on the legal protection of databases,
134 |      as amended and/or succeeded, as well as other essentially
135 |      equivalent rights anywhere in the world.
136 | 
137 |   l. You means the individual or entity exercising the Licensed Rights
138 |      under this Public License. Your has a corresponding meaning.
139 | 
140 | Section 2 -- Scope.
141 | 
142 |   a. License grant.
143 | 
144 |        1. Subject to the terms and conditions of this Public License,
145 |           the Licensor hereby grants You a worldwide, royalty-free,
146 |           non-sublicensable, non-exclusive, irrevocable license to
147 |           exercise the Licensed Rights in the Licensed Material to:
148 | 
149 |             a. reproduce and Share the Licensed Material, in whole or
150 |                in part, for NonCommercial purposes only; and
151 | 
152 |             b. produce, reproduce, and Share Adapted Material for
153 |                NonCommercial purposes only.
154 | 
155 |        2. Exceptions and Limitations. For the avoidance of doubt, where
156 |           Exceptions and Limitations apply to Your use, this Public
157 |           License does not apply, and You do not need to comply with
158 |           its terms and conditions.
159 | 
160 |        3. Term. The term of this Public License is specified in Section
161 |           6(a).
162 | 
163 |        4. Media and formats; technical modifications allowed. The
164 |           Licensor authorizes You to exercise the Licensed Rights in
165 |           all media and formats whether now known or hereafter created,
166 |           and to make technical modifications necessary to do so. The
167 |           Licensor waives and/or agrees not to assert any right or
168 |           authority to forbid You from making technical modifications
169 |           necessary to exercise the Licensed Rights, including
170 |           technical modifications necessary to circumvent Effective
171 |           Technological Measures. For purposes of this Public License,
172 |           simply making modifications authorized by this Section 2(a)
173 |           (4) never produces Adapted Material.
174 | 
175 |        5. Downstream recipients.
176 | 
177 |             a. Offer from the Licensor -- Licensed Material. Every
178 |                recipient of the Licensed Material automatically
179 |                receives an offer from the Licensor to exercise the
180 |                Licensed Rights under the terms and conditions of this
181 |                Public License.
182 | 
183 |             b. No downstream restrictions. You may not offer or impose
184 |                any additional or different terms or conditions on, or
185 |                apply any Effective Technological Measures to, the
186 |                Licensed Material if doing so restricts exercise of the
187 |                Licensed Rights by any recipient of the Licensed
188 |                Material.
189 | 
190 |        6. No endorsement. Nothing in this Public License constitutes or
191 |           may be construed as permission to assert or imply that You
192 |           are, or that Your use of the Licensed Material is, connected
193 |           with, or sponsored, endorsed, or granted official status by,
194 |           the Licensor or others designated to receive attribution as
195 |           provided in Section 3(a)(1)(A)(i).
196 | 
197 |   b. Other rights.
198 | 
199 |        1. Moral rights, such as the right of integrity, are not
200 |           licensed under this Public License, nor are publicity,
201 |           privacy, and/or other similar personality rights; however, to
202 |           the extent possible, the Licensor waives and/or agrees not to
203 |           assert any such rights held by the Licensor to the limited
204 |           extent necessary to allow You to exercise the Licensed
205 |           Rights, but not otherwise.
206 | 
207 |        2. Patent and trademark rights are not licensed under this
208 |           Public License.
209 | 
210 |        3. To the extent possible, the Licensor waives any right to
211 |           collect royalties from You for the exercise of the Licensed
212 |           Rights, whether directly or through a collecting society
213 |           under any voluntary or waivable statutory or compulsory
214 |           licensing scheme. In all other cases the Licensor expressly
215 |           reserves any right to collect such royalties, including when
216 |           the Licensed Material is used other than for NonCommercial
217 |           purposes.
218 | 
219 | Section 3 -- License Conditions.
220 | 
221 | Your exercise of the Licensed Rights is expressly made subject to the
222 | following conditions.
223 | 
224 |   a. Attribution.
225 | 
226 |        1. If You Share the Licensed Material (including in modified
227 |           form), You must:
228 | 
229 |             a. retain the following if it is supplied by the Licensor
230 |                with the Licensed Material:
231 | 
232 |                  i. identification of the creator(s) of the Licensed
233 |                     Material and any others designated to receive
234 |                     attribution, in any reasonable manner requested by
235 |                     the Licensor (including by pseudonym if
236 |                     designated);
237 | 
238 |                 ii. a copyright notice;
239 | 
240 |                iii. a notice that refers to this Public License;
241 | 
242 |                 iv. a notice that refers to the disclaimer of
243 |                     warranties;
244 | 
245 |                  v. a URI or hyperlink to the Licensed Material to the
246 |                     extent reasonably practicable;
247 | 
248 |             b. indicate if You modified the Licensed Material and
249 |                retain an indication of any previous modifications; and
250 | 
251 |             c. indicate the Licensed Material is licensed under this
252 |                Public License, and include the text of, or the URI or
253 |                hyperlink to, this Public License.
254 | 
255 |        2. You may satisfy the conditions in Section 3(a)(1) in any
256 |           reasonable manner based on the medium, means, and context in
257 |           which You Share the Licensed Material. For example, it may be
258 |           reasonable to satisfy the conditions by providing a URI or
259 |           hyperlink to a resource that includes the required
260 |           information.
261 | 
262 |        3. If requested by the Licensor, You must remove any of the
263 |           information required by Section 3(a)(1)(A) to the extent
264 |           reasonably practicable.
265 | 
266 |        4. If You Share Adapted Material You produce, the Adapter's
267 |           License You apply must not prevent recipients of the Adapted
268 |           Material from complying with this Public License.
269 | 
270 | Section 4 -- Sui Generis Database Rights.
271 | 
272 | Where the Licensed Rights include Sui Generis Database Rights that
273 | apply to Your use of the Licensed Material:
274 | 
275 |   a. for the avoidance of doubt, Section 2(a)(1) grants You the right
276 |      to extract, reuse, reproduce, and Share all or a substantial
277 |      portion of the contents of the database for NonCommercial purposes
278 |      only;
279 | 
280 |   b. if You include all or a substantial portion of the database
281 |      contents in a database in which You have Sui Generis Database
282 |      Rights, then the database in which You have Sui Generis Database
283 |      Rights (but not its individual contents) is Adapted Material; and
284 | 
285 |   c. You must comply with the conditions in Section 3(a) if You Share
286 |      all or a substantial portion of the contents of the database.
287 | 
288 | For the avoidance of doubt, this Section 4 supplements and does not
289 | replace Your obligations under this Public License where the Licensed
290 | Rights include other Copyright and Similar Rights.
291 | 
292 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
293 | 
294 |   a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
295 |      EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
296 |      AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
297 |      ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
298 |      IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
299 |      WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
300 |      PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
301 |      ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
302 |      KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
303 |      ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
304 | 
305 |   b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
306 |      TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
307 |      NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
308 |      INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
309 |      COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
310 |      USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
311 |      ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
312 |      DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
313 |      IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
314 | 
315 |   c. The disclaimer of warranties and limitation of liability provided
316 |      above shall be interpreted in a manner that, to the extent
317 |      possible, most closely approximates an absolute disclaimer and
318 |      waiver of all liability.
319 | 
320 | Section 6 -- Term and Termination.
321 | 
322 |   a. This Public License applies for the term of the Copyright and
323 |      Similar Rights licensed here. However, if You fail to comply with
324 |      this Public License, then Your rights under this Public License
325 |      terminate automatically.
326 | 
327 |   b. Where Your right to use the Licensed Material has terminated under
328 |      Section 6(a), it reinstates:
329 | 
330 |        1. automatically as of the date the violation is cured, provided
331 |           it is cured within 30 days of Your discovery of the
332 |           violation; or
333 | 
334 |        2. upon express reinstatement by the Licensor.
335 | 
336 |      For the avoidance of doubt, this Section 6(b) does not affect any
337 |      right the Licensor may have to seek remedies for Your violations
338 |      of this Public License.
339 | 
340 |   c. For the avoidance of doubt, the Licensor may also offer the
341 |      Licensed Material under separate terms or conditions or stop
342 |      distributing the Licensed Material at any time; however, doing so
343 |      will not terminate this Public License.
344 | 
345 |   d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
346 |      License.
347 | 
348 | Section 7 -- Other Terms and Conditions.
349 | 
350 |   a. The Licensor shall not be bound by any additional or different
351 |      terms or conditions communicated by You unless expressly agreed.
352 | 
353 |   b. Any arrangements, understandings, or agreements regarding the
354 |      Licensed Material not stated herein are separate from and
355 |      independent of the terms and conditions of this Public License.
356 | 
357 | Section 8 -- Interpretation.
358 | 
359 |   a. For the avoidance of doubt, this Public License does not, and
360 |      shall not be interpreted to, reduce, limit, restrict, or impose
361 |      conditions on any use of the Licensed Material that could lawfully
362 |      be made without permission under this Public License.
363 | 
364 |   b. To the extent possible, if any provision of this Public License is
365 |      deemed unenforceable, it shall be automatically reformed to the
366 |      minimum extent necessary to make it enforceable. If the provision
367 |      cannot be reformed, it shall be severed from this Public License
368 |      without affecting the enforceability of the remaining terms and
369 |      conditions.
370 | 
371 |   c. No term or condition of this Public License will be waived and no
372 |      failure to comply consented to unless expressly agreed to by the
373 |      Licensor.
374 | 
375 |   d. Nothing in this Public License constitutes or may be interpreted
376 |      as a limitation upon, or waiver of, any privileges and immunities
377 |      that apply to the Licensor or You, including from the legal
378 |      processes of any jurisdiction or authority.
379 | 
380 | =======================================================================
381 | 
382 | Creative Commons is not a party to its public
383 | licenses. Notwithstanding, Creative Commons may elect to apply one of
384 | its public licenses to material it publishes and in those instances
385 | will be considered the “Licensor.” The text of the Creative Commons
386 | public licenses is dedicated to the public domain under the CC0 Public
387 | Domain Dedication. Except for the limited purpose of indicating that
388 | material is shared under a Creative Commons public license or as
389 | otherwise permitted by the Creative Commons policies published at
390 | creativecommons.org/policies, Creative Commons does not authorize the
391 | use of the trademark "Creative Commons" or any other trademark or logo
392 | of Creative Commons without its prior written consent including,
393 | without limitation, in connection with any unauthorized modifications
394 | to any of its public licenses or any other arrangements,
395 | understandings, or agreements concerning use of licensed material. For
396 | the avoidance of doubt, this paragraph does not form part of the
397 | public licenses.
398 | 
399 | Creative Commons may be contacted at creativecommons.org.
400 | 


--------------------------------------------------------------------------------
/LICENSE-pytorch-cifar:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 liukuang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # batchboost (currently a draft)
  2 | By [Maciej A. Czyzewski](https://github.com/maciejczyzewski)
  3 | 
  4 | This repository contains the implementation used for the results in
  5 | our paper (https://arxiv.org/abs/2001.07627).
  6 | 
  7 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/batchboost-regularization-for-stabilizing/image-classification-on-cifar-10)](https://paperswithcode.com/sota/image-classification-on-cifar-10?p=batchboost-regularization-for-stabilizing)
  8 | 
  9 | ---
 10 | 
 11 | _Batchboost_ is a simple technique to accelerate ML model training by adaptively feeding mini-batches with artificial samples which are created by mixing two examples from previous step - in favor of pairing those that produce the difficult one.
 12 | 
 13 | <div align="center">
 14 | 	<img src="figures/figure-abstract.png" width="50%">
 15 | 	<img src="figures/figure-feeding.png" width="49%">
 16 | </div>
 17 | 
 18 | ## Introduction
 19 | 
 20 | > **UPDATE 24/01/2020:** Thank you for your e-mails asking about _batchboost_. As promised, I will update the results soon and present comparisons with other solutions (paperswithcode.com). This is a draft and research needs to be continued to be complete work, if someone is interested in helping me, please contact.
 21 | 
 22 | ### Overview
 23 | 
 24 | In this research, we state the hypothesis that mixing many images together can
 25 | be more effective than just two.  To make it efficient, we propose a new method of
 26 | creating mini-batches, where each sample from dataset is propagated with
 27 | subsequent iterations with less and less importance until the end of learning
 28 | process.
 29 | 
 30 | Batchboost pipeline has three stages:
 31 | (a) _pairing_: method of selecting two samples from previous step.
 32 | (b) _mixing_: method of creating a new artificial example from two selected samples.
 33 | (c) _feeding_: constructing training mini-batch with created examples and new samples from dataset (concat with ratio γ).
 34 | Note that sample from dataset propagates with subsequent iterations with less and less importance until the end of training.
 35 | 
 36 | <div align="left">
 37 | 	<a href="https://arxiv.org/abs/2001.07627">
 38 | 		<img src="paper/arxiv-abstract-shadow.png" height="325">
 39 | 	</a>
 40 | </div>
 41 | 
 42 | ### Results
 43 | 
 44 | > **COMING:** comparison of _batchboost_ applied: to different architectures, to different problems (small datasets), for training GAN-s, with/without augmentation, with different parameters {window_normal, window_boost, factor} (hyperparameter tuning).
 45 | 
 46 | The results will be updated and saved to [`results/`](https://github.com/maciejczyzewski/batchboost/tree/master/results).
 47 | 
 48 | <b>Underfitting & Stabilizing Training</b>
 49 | <div>
 50 | 	<img src="figures/for-repository-1.png" height="325">
 51 | </div>
 52 | 
 53 | _Figure 1:_ Evaluation on _CIFAR-10_, for _EfficientNet-b0_ and
 54 | _SGD(weight-decay=10e-4, lr=0.1)_ (as recommended in the _mixup_ research), same
 55 | parameters for each model. As a result, the models behave differently, although
 56 | they differ only in the method of constructing the mini-batch.
 57 | 
 58 | <b>Overfitting (comparison to mixup)</b>
 59 | <div>
 60 | 	<img src="figures/for-repository-2.png" height="325">
 61 | </div>
 62 | 
 63 | _Figure 2:_ _batchboost_ is a new state-of-the-art because it is a slightly better than _mixup_ (here _mixup_ has been tuned for best parameters, _batchboost_ uses configuration from _Figure 1_).
 64 | 
 65 | ## Requirements and Installation
 66 | 
 67 | * A computer running macOS or Linux
 68 | * For training new models, you'll also need a NVIDIA GPU and [NCCL](https://github.com/NVIDIA/nccl)
 69 | * Python version 3.6
 70 | * A [PyTorch installation](http://pytorch.org/)
 71 | 
 72 | ## Training
 73 | 
 74 | Use `python train.py` to train a new model.
 75 | Here is an example setting:
 76 | ```bash
 77 | # for batchboost
 78 | $ CUDA_VISIBLE_DEVICES=0 python3 train.py --decay=1e-4 --no-augment --seed=1 \
 79 | 	--name=batchboost --model=efficientnet-b0 --epoch=30
 80 | # for mixup
 81 | $ CUDA_VISIBLE_DEVICES=0 python3 train.py --decay=1e-4 --no-augment --seed=1 \
 82 | 	--name=mixup --model=efficientnet-b0 --epoch=30
 83 | ```
 84 | 
 85 | ## Using
 86 | 
 87 | File [`batchboost.py`](https://github.com/maciejczyzewski/batchboost/tree/master/batchboost.py) should be portable, just copy into your path and write the following:
 88 | 
 89 | ```python3
 90 | from batchboost import BatchBoost
 91 | 
 92 | # how to calculate error per sample?
 93 | def fn_error(outputs, targets):
 94 |     logsoftmax = nn.LogSoftmax(dim=1)
 95 |     return torch.sum(-outputs * logsoftmax(targets), dim=1)
 96 | 
 97 | # how to represent target in linear form (label -> one-hot)
 98 | def fn_linearize(x, num_classes=10):
 99 |     _x = torch.zeros(x.size(0), num_classes)
100 |     _x[range(x.size(0)), x] = 1
101 |     return _x
102 | 
103 | # how to get back (one-hot -> label)
104 | def fn_unlinearize(x):
105 |     _, _x = torch.max(x, 1)
106 |     return _x
107 | 
108 | BatchBoost.fn_error = fn_error
109 | BatchBoost.fn_linearize = fn_linearize
110 | BatchBoost.fn_unlinearize = fn_unlinearize
111 | 
112 | #     if you don't want to train everything using `batchboost` method
113 | # epoch: [... -> window_normal -> window_boost -> window_normal -> ...]
114 | #               (  batches    )  (  batches   )  (  batches    )
115 | 
116 | BB = BatchBoost(
117 |     alpha=args.alpha, # alpha parameter for mixup
118 |     window_normal=0,  # consecutive batch fits: normal
119 |     window_boost=10,  #                       : batchboost
120 |     factor=1 / 2,     # ratio between new information and feeded/mixed
121 |     use_cuda=True,
122 | )
123 | 
124 | ...
125 | ```
126 | 
127 | And slightly change your training loop:
128 | 
129 | ```python3
130 | ...
131 | 
132 | for batch_idx, (new_inputs, new_targets) in enumerate(trainloader):
133 | 	if use_cuda:
134 | 		new_inputs, new_targets = new_inputs.cuda(), new_targets.cuda()
135 | 
136 | 	# -----> (a) feed with new information
137 | 	if not BB.feed(new_inputs, new_targets):
138 | 		continue
139 | 	
140 | 	# -----> (b) apply concat: BB.inputs, BB.targets
141 | 	outputs = net(BB.inputs)
142 | 
143 | 	# -----> (c) calculate: loss (mixup like style \lambda)
144 | 	loss = BB.criterion(criterion, outputs)
145 | 
146 | 	train_loss += loss.data
147 | 	_, predicted = torch.max(outputs.data, 1)
148 | 	total += BB.inputs.size(0) # -----> remember to use concat
149 | 
150 | 	# -----> (d) calculate: accuracy
151 | 	correct += BB.correct(predicted)
152 | 
153 | 	# -----> (e) pairing & mixing
154 | 	BB.mixing(criterion, outputs)
155 | 
156 | 	...
157 | ```
158 | 
159 | ## Citation
160 | 
161 | If you find _batchboost_ useful in your research, please consider citing:
162 | 
163 | ```bibtex
164 | @misc{czyzewski2020batchboost,
165 |     title={batchboost: regularization for stabilizing training with resistance to underfitting & overfitting},
166 |     author={Maciej A. Czyzewski},
167 |     year={2020},
168 |     eprint={2001.07627},
169 |     archivePrefix={arXiv},
170 |     primaryClass={cs.LG}
171 | }
172 | ```
173 | 
174 | _An interesting topic for further research and discussion are
175 | combination of batchboost and existing methods._
176 | 
177 | ## License
178 | 
179 | Implemented as fork of ["mixup-cifar10 / facebook"](https://github.com/facebookresearch/mixup-cifar10).
180 | This project is CC-BY-NC-licensed.
181 | 
182 | <img src="figures/pp_logo.jpg" width="350px">
183 | 


--------------------------------------------------------------------------------
/batchboost.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | from torch.autograd import Variable
  5 | 
  6 | 
  7 | class BatchBoost:
  8 |     """
  9 |     batchboost: regularization for stabilizing training 
 10 |                 with resistance to underfitting & overfitting
 11 |     Maciej A. Czyzewski
 12 |     https://arxiv.org/abs/2001.07627
 13 |     """
 14 | 
 15 |     def __init__(
 16 |         self,
 17 |         alpha=1.0,
 18 |         window_normal=0,
 19 |         window_boost=10,
 20 |         factor=1 / 3,
 21 |         use_cuda=False,
 22 |         debug=False,
 23 |     ):
 24 |         self.alpha = alpha
 25 |         self.window_normal = window_normal
 26 |         self.window_boost = window_boost
 27 |         self.factor = factor
 28 |         self.use_cuda = use_cuda
 29 |         self.debug = debug
 30 |         self.clear()
 31 | 
 32 |         if self.debug:
 33 |             print(
 34 |                 f"[BatchBoost] alpha={alpha} ratio={factor} \
 35 | window_normal={window_normal} window_boost={window_boost}"
 36 |             )
 37 | 
 38 |     def clear(self):
 39 |         if self.debug:
 40 |             print(f"[BatchBoost] resetting")
 41 |         self.mixup_lambda = 1
 42 |         self.inputs = None
 43 |         self.y1 = self.y2 = None
 44 |         self.iter_normal = self.window_normal
 45 |         self.iter_boost = self.window_boost
 46 | 
 47 |     @staticmethod
 48 |     def mixup(x, y, index_left, index_right, mixup_lambda=1.0):
 49 |         """Returns mixed inputs, pairs of targets, and lambda
 50 |         https://arxiv.org/abs/1710.09412"""
 51 |         mixed_x = (
 52 |             mixup_lambda * x[index_left, :]
 53 |             + (1 - mixup_lambda) * x[index_right, :]
 54 |         )
 55 |         # mixed_y = (mixup_lambda * y[index_left, :] +
 56 |         #           (1 - mixup_lambda) * y[index_right, :])
 57 |         # return mixed_x, mixed_y, mixup_lambda
 58 |         y1, y2 = y[index_left], y[index_right]
 59 |         return mixed_x, y1, y2
 60 | 
 61 |     @staticmethod
 62 |     def fn_error(outputs, targets):
 63 |         logsoftmax = nn.LogSoftmax(dim=1)
 64 |         return torch.sum(-outputs * logsoftmax(targets), dim=1)
 65 | 
 66 |     @staticmethod
 67 |     def fn_linearize(x, num_classes=10):
 68 |         _x = torch.zeros(x.size(0), num_classes)
 69 |         _x[range(x.size(0)), x] = 1
 70 |         return _x
 71 | 
 72 |     @staticmethod
 73 |     def fn_unlinearize(x):
 74 |         _, _x = torch.max(x, 1)
 75 |         return _x
 76 | 
 77 |     def criterion(self, criterion, outputs):
 78 |         _y1 = BatchBoost.fn_unlinearize(self.y1)
 79 |         _y2 = BatchBoost.fn_unlinearize(self.y2)
 80 |         return self.mixup_lambda * criterion(outputs, _y1) + (
 81 |             1 - self.mixup_lambda
 82 |         ) * criterion(outputs, _y2)
 83 | 
 84 |     def correct(self, predicted):
 85 |         _y1 = BatchBoost.fn_unlinearize(self.y1)
 86 |         _y2 = BatchBoost.fn_unlinearize(self.y2)
 87 |         return (
 88 |             self.mixup_lambda * predicted.eq(_y1).cpu().sum().float()
 89 |             + (1 - self.mixup_lambda) * predicted.eq(_y2).cpu().sum().float()
 90 |         )
 91 | 
 92 |     def pairing(self, errvec):
 93 |         batch_size = errvec.size()[0]
 94 |         _, index = torch.sort(errvec, dim=0, descending=True)
 95 |         return (
 96 |             index[0 : int(batch_size * self.factor)],
 97 |             reversed(index[batch_size - int(batch_size * self.factor) :]),
 98 |         )
 99 | 
100 |     def mixing(self, criterion, outputs):
101 |         if self.iter_boost + self.iter_normal == 0:
102 |             self.iter_normal = self.window_normal
103 |             self.iter_boost = self.window_boost
104 |         if self.iter_boost > 0:
105 |             if self.debug:
106 |                 print("[BatchBoost]: half-batch + feed-batch")
107 |             errvec = BatchBoost.fn_error(outputs, self.targets)
108 |             index_left, index_right = self.pairing(errvec)
109 | 
110 |             if self.alpha > 0:
111 |                 self.mixup_lambda = np.random.beta(self.alpha, self.alpha)
112 |             else:
113 |                 self.mixup_lambda = 1
114 | 
115 |             self.inputs, self.y1, self.y2 = BatchBoost.mixup(
116 |                 self.inputs,
117 |                 y=self.targets,
118 |                 index_left=index_right,
119 |                 index_right=index_left,
120 |                 mixup_lambda=self.mixup_lambda,
121 |             )
122 |             self.iter_boost -= 1
123 |         elif self.iter_normal > 0:
124 |             if self.debug:
125 |                 print("[BatchBoost] normal batch")
126 |             batch_size = self.inputs.size(0)
127 |             self.inputs = self.inputs[int(batch_size * self.factor) :]
128 |             self.y1 = self.y1[int(batch_size * self.factor) :]
129 |             self.y2 = self.y2[int(batch_size * self.factor) :]
130 |             self.mixup_lambda = 1
131 |             self.iter_normal -= 1
132 | 
133 |     def feed(self, new_inputs, _new_targets):
134 |         new_targets = Variable(BatchBoost.fn_linearize(_new_targets))
135 |         if self.use_cuda:
136 |             new_targets = new_targets.cuda()
137 |         # no mixing (first iteration)
138 |         if self.inputs is None:
139 |             self.inputs = Variable(new_inputs)
140 |             self.y1 = new_targets
141 |             self.y2 = new_targets
142 |             return False
143 |         # concat
144 |         self.inputs = torch.cat([self.inputs, new_inputs], dim=0)
145 |         self.y1 = torch.cat([self.y1, new_targets], dim=0)
146 |         self.y2 = torch.cat([self.y2, new_targets], dim=0)
147 |         # virtual targets
148 |         self.targets = (
149 |             self.mixup_lambda * self.y1 + (1 - self.mixup_lambda) * self.y2
150 |         )
151 |         return True
152 | 


--------------------------------------------------------------------------------
/debug.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | import torchvision.transforms as transforms
  6 | import torchvision.datasets as datasets
  7 | 
  8 | # FIXME: move to models and split for CIFAR-10/Fashion-MNIST and others
  9 | 
 10 | 
 11 | class ResNet100k(nn.Module):
 12 |     def __init__(self, num_classes=10):
 13 |         super(ResNet100k, self).__init__()
 14 |         self.num_filter1 = 8
 15 |         self.num_filter2 = 16
 16 |         self.num_padding = 2
 17 |         # input is 28x28
 18 |         # padding=2 for same padding
 19 |         self.conv1 = nn.Conv2d(1, self.num_filter1, 5, padding=self.num_padding)
 20 |         nn.init.xavier_uniform_(self.conv1.weight)
 21 |         # feature map size is 14*14 by pooling
 22 |         # padding=2 for same padding
 23 |         self.conv2 = nn.Conv2d(
 24 |             self.num_filter1, self.num_filter2, 5, padding=self.num_padding
 25 |         )
 26 |         nn.init.xavier_uniform_(self.conv2.weight)
 27 |         # feature map size is 7*7 by pooling
 28 |         self.fc = nn.Linear(self.num_filter2 * 7 * 7, num_classes)
 29 | 
 30 |     def forward(self, x):
 31 |         x = F.max_pool2d(F.relu(self.conv1(x)), 2)
 32 |         x = F.max_pool2d(F.relu(self.conv2(x)), 2)
 33 |         x = x.view(-1, self.num_filter2 * 7 * 7)  # reshape Variable
 34 |         x = self.fc(x)
 35 |         return x
 36 |         # return F.log_softmax(x, dim=1)
 37 |         # return F.softmax(x, dim=1)
 38 | 
 39 | 
 40 | class ResNet100kv2(nn.Module):
 41 |     def __init__(self):
 42 |         super(ResNet100kv2, self).__init__()
 43 | 
 44 |         self.cnn1 = nn.Conv2d(
 45 |             in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2
 46 |         )
 47 |         self.relu1 = nn.ReLU()
 48 |         self.norm1 = nn.BatchNorm2d(16)
 49 |         nn.init.xavier_uniform(self.cnn1.weight)
 50 | 
 51 |         self.maxpool1 = nn.MaxPool2d(kernel_size=2)
 52 | 
 53 |         self.cnn2 = nn.Conv2d(
 54 |             in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=2
 55 |         )
 56 |         self.relu2 = nn.ReLU()
 57 |         self.norm2 = nn.BatchNorm2d(32)
 58 |         nn.init.xavier_uniform(self.cnn2.weight)
 59 | 
 60 |         self.maxpool2 = nn.MaxPool2d(kernel_size=2)
 61 | 
 62 |         self.fc1 = nn.Linear(2048, 128)
 63 |         self.fcrelu = nn.ReLU()
 64 | 
 65 |         self.fc2 = nn.Linear(128, 10)
 66 | 
 67 |     def forward(self, x):
 68 |         out = self.cnn1(x)
 69 |         out = self.relu1(out)
 70 |         out = self.norm1(out)
 71 | 
 72 |         out = self.maxpool1(out)
 73 | 
 74 |         out = self.cnn2(out)
 75 |         out = self.relu2(out)
 76 |         out = self.norm2(out)
 77 | 
 78 |         out = self.maxpool2(out)
 79 | 
 80 |         out = out.view(out.size(0), -1)
 81 | 
 82 |         out = self.fc1(out)
 83 |         out = self.fcrelu(out)
 84 | 
 85 |         out = self.fc2(out)
 86 |         return out
 87 | 
 88 | 
 89 | def FashionMNIST_loaders(args):
 90 |     transform = transforms.Compose(
 91 |         [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
 92 |     )
 93 | 
 94 |     if args.augment:
 95 |         transform_train = transforms.Compose(
 96 |             [
 97 |                 transforms.RandomCrop(28, padding=4),
 98 |                 transforms.RandomHorizontalFlip(),
 99 |                 transforms.ToTensor(),
100 |                 transforms.Normalize((0.1307,), (0.3081,)),
101 |             ]
102 |         )
103 |     else:
104 |         transform_train = transform
105 |     transform_test = transform
106 | 
107 |     trainset = datasets.FashionMNIST(
108 |         root="./data", train=True, download=True, transform=transform_train
109 |     )
110 |     trainloader = torch.utils.data.DataLoader(
111 |         trainset, batch_size=args.batch_size, shuffle=True, num_workers=8
112 |     )
113 | 
114 |     testset = datasets.FashionMNIST(
115 |         root="./data", train=False, download=True, transform=transform_test
116 |     )
117 |     testloader = torch.utils.data.DataLoader(
118 |         testset, batch_size=100, shuffle=False, num_workers=8
119 |     )
120 | 
121 |     return trainloader, testloader
122 | 


--------------------------------------------------------------------------------
/figures/batches/img_1_new_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_new_10.png


--------------------------------------------------------------------------------
/figures/batches/img_1_new_11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_new_11.png


--------------------------------------------------------------------------------
/figures/batches/img_1_new_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_new_6.png


--------------------------------------------------------------------------------
/figures/batches/img_1_new_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_new_7.png


--------------------------------------------------------------------------------
/figures/batches/img_1_new_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_new_8.png


--------------------------------------------------------------------------------
/figures/batches/img_1_new_9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_new_9.png


--------------------------------------------------------------------------------
/figures/batches/img_1_old_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_old_0.png


--------------------------------------------------------------------------------
/figures/batches/img_1_old_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_old_1.png


--------------------------------------------------------------------------------
/figures/batches/img_1_old_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_old_2.png


--------------------------------------------------------------------------------
/figures/batches/img_1_old_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_old_3.png


--------------------------------------------------------------------------------
/figures/batches/img_1_old_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_old_4.png


--------------------------------------------------------------------------------
/figures/batches/img_1_old_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_old_5.png


--------------------------------------------------------------------------------
/figures/batches/img_2_new_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_new_10.png


--------------------------------------------------------------------------------
/figures/batches/img_2_new_11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_new_11.png


--------------------------------------------------------------------------------
/figures/batches/img_2_new_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_new_6.png


--------------------------------------------------------------------------------
/figures/batches/img_2_new_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_new_7.png


--------------------------------------------------------------------------------
/figures/batches/img_2_new_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_new_8.png


--------------------------------------------------------------------------------
/figures/batches/img_2_new_9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_new_9.png


--------------------------------------------------------------------------------
/figures/batches/img_2_old_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_old_0.png


--------------------------------------------------------------------------------
/figures/batches/img_2_old_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_old_1.png


--------------------------------------------------------------------------------
/figures/batches/img_2_old_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_old_2.png


--------------------------------------------------------------------------------
/figures/batches/img_2_old_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_old_3.png


--------------------------------------------------------------------------------
/figures/batches/img_2_old_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_old_4.png


--------------------------------------------------------------------------------
/figures/batches/img_2_old_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_old_5.png


--------------------------------------------------------------------------------
/figures/batches/img_3_new_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_new_10.png


--------------------------------------------------------------------------------
/figures/batches/img_3_new_11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_new_11.png


--------------------------------------------------------------------------------
/figures/batches/img_3_new_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_new_6.png


--------------------------------------------------------------------------------
/figures/batches/img_3_new_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_new_7.png


--------------------------------------------------------------------------------
/figures/batches/img_3_new_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_new_8.png


--------------------------------------------------------------------------------
/figures/batches/img_3_new_9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_new_9.png


--------------------------------------------------------------------------------
/figures/batches/img_3_old_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_old_0.png


--------------------------------------------------------------------------------
/figures/batches/img_3_old_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_old_1.png


--------------------------------------------------------------------------------
/figures/batches/img_3_old_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_old_2.png


--------------------------------------------------------------------------------
/figures/batches/img_3_old_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_old_3.png


--------------------------------------------------------------------------------
/figures/batches/img_3_old_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_old_4.png


--------------------------------------------------------------------------------
/figures/batches/img_3_old_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_old_5.png


--------------------------------------------------------------------------------
/figures/batches/img_4_new_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_new_10.png


--------------------------------------------------------------------------------
/figures/batches/img_4_new_11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_new_11.png


--------------------------------------------------------------------------------
/figures/batches/img_4_new_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_new_6.png


--------------------------------------------------------------------------------
/figures/batches/img_4_new_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_new_7.png


--------------------------------------------------------------------------------
/figures/batches/img_4_new_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_new_8.png


--------------------------------------------------------------------------------
/figures/batches/img_4_new_9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_new_9.png


--------------------------------------------------------------------------------
/figures/batches/img_4_old_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_old_0.png


--------------------------------------------------------------------------------
/figures/batches/img_4_old_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_old_1.png


--------------------------------------------------------------------------------
/figures/batches/img_4_old_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_old_2.png


--------------------------------------------------------------------------------
/figures/batches/img_4_old_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_old_3.png


--------------------------------------------------------------------------------
/figures/batches/img_4_old_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_old_4.png


--------------------------------------------------------------------------------
/figures/batches/img_4_old_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_old_5.png


--------------------------------------------------------------------------------
/figures/data_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/data_1.png


--------------------------------------------------------------------------------
/figures/data_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/data_2.png


--------------------------------------------------------------------------------
/figures/data_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/data_3.png


--------------------------------------------------------------------------------
/figures/data_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/data_4.png


--------------------------------------------------------------------------------
/figures/data_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/data_5.png


--------------------------------------------------------------------------------
/figures/data_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/data_6.png


--------------------------------------------------------------------------------
/figures/data_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/data_7.png


--------------------------------------------------------------------------------
/figures/figure-1-loss-train-without-augment.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-1-loss-train-without-augment.pdf


--------------------------------------------------------------------------------
/figures/figure-1-test-accuracy-without-augment.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-1-test-accuracy-without-augment.pdf


--------------------------------------------------------------------------------
/figures/figure-2-test-accuracy-with-augment.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-2-test-accuracy-with-augment.pdf


--------------------------------------------------------------------------------
/figures/figure-2-train-accuracy-with-augment.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-2-train-accuracy-with-augment.pdf


--------------------------------------------------------------------------------
/figures/figure-abstract.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-abstract.pdf


--------------------------------------------------------------------------------
/figures/figure-abstract.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-abstract.png


--------------------------------------------------------------------------------
/figures/figure-feeding.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-feeding.pdf


--------------------------------------------------------------------------------
/figures/figure-feeding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-feeding.png


--------------------------------------------------------------------------------
/figures/figure-multipass.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-multipass.png


--------------------------------------------------------------------------------
/figures/for-repository-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/for-repository-1.png


--------------------------------------------------------------------------------
/figures/for-repository-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/for-repository-2.png


--------------------------------------------------------------------------------
/figures/pp_logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/pp_logo.jpg


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .vgg import *
 2 | from .lenet import *
 3 | from .resnet import *
 4 | from .resnext import *
 5 | from .densenet import *
 6 | from .googlenet import *
 7 | from .mobilenet import *
 8 | from .densenet_efficient_multi_gpu import DenseNet190
 9 | from .densenet3 import DenseNet190
10 | 


--------------------------------------------------------------------------------
/models/alldnet.py:
--------------------------------------------------------------------------------
 1 | '''LeNet in PyTorch.'''
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from torch.autograd import Variable
 5 | 
 6 | class AllDNet(nn.Module):
 7 |     def __init__(self):
 8 |         super(AllDNet, self).__init__()
 9 |         self.conv1 = nn.Conv2d(3, 6, 5)
10 |         self.conv2 = nn.Conv2d(6, 16, 5)
11 |         # self.conv2 = nn.Linear(6*14*14, 16*10*10)
12 |         self.fc1   = nn.Linear(16*5*5, 120)
13 |         self.fc2   = nn.Linear(120, 84)
14 |         self.fc3   = nn.Linear(84, 10)
15 | 
16 |     def forward(self, x):
17 |         activations = []
18 |         out = F.relu(self.conv1(x))
19 |         out = F.max_pool2d(out, 2)
20 |         # out = out.view(out.size(0), -1)
21 |         # activations.append(out)
22 |         out = F.relu(self.conv2(out))
23 |         # out = out.view(out.size(0), 16, 10, -1)
24 |         out = F.max_pool2d(out, 2)
25 |         out = out.view(out.size(0), -1)
26 |         activations.append(out)
27 |         out = F.relu(self.fc1(out))
28 |         activations.append(out)
29 |         out = F.relu(self.fc2(out))
30 |         activations.append(out)
31 |         out = self.fc3(out)
32 |         return out, activations
33 | 
34 | 


--------------------------------------------------------------------------------
/models/densenet.py:
--------------------------------------------------------------------------------
  1 | '''DenseNet in PyTorch.'''
  2 | import math
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | from torch.autograd import Variable
  9 | 
 10 | 
 11 | class Bottleneck(nn.Module):
 12 |     def __init__(self, in_planes, growth_rate):
 13 |         super(Bottleneck, self).__init__()
 14 |         self.bn1 = nn.BatchNorm2d(in_planes)
 15 |         self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False)
 16 |         self.bn2 = nn.BatchNorm2d(4*growth_rate)
 17 |         self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)
 18 | 
 19 |     def forward(self, x):
 20 |         out = self.conv1(F.relu(self.bn1(x)))
 21 |         out = self.conv2(F.relu(self.bn2(out)))
 22 |         out = torch.cat([out,x], 1)
 23 |         return out
 24 | 
 25 | 
 26 | class Transition(nn.Module):
 27 |     def __init__(self, in_planes, out_planes):
 28 |         super(Transition, self).__init__()
 29 |         self.bn = nn.BatchNorm2d(in_planes)
 30 |         self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)
 31 | 
 32 |     def forward(self, x):
 33 |         out = self.conv(F.relu(self.bn(x)))
 34 |         out = F.avg_pool2d(out, 2)
 35 |         return out
 36 | 
 37 | 
 38 | class DenseNet(nn.Module):
 39 |     def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
 40 |         super(DenseNet, self).__init__()
 41 |         self.growth_rate = growth_rate
 42 | 
 43 |         num_planes = 2*growth_rate
 44 |         self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)
 45 | 
 46 |         self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
 47 |         num_planes += nblocks[0]*growth_rate
 48 |         out_planes = int(math.floor(num_planes*reduction))
 49 |         self.trans1 = Transition(num_planes, out_planes)
 50 |         num_planes = out_planes
 51 | 
 52 |         self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
 53 |         num_planes += nblocks[1]*growth_rate
 54 |         out_planes = int(math.floor(num_planes*reduction))
 55 |         self.trans2 = Transition(num_planes, out_planes)
 56 |         num_planes = out_planes
 57 | 
 58 |         self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
 59 |         num_planes += nblocks[2]*growth_rate
 60 |         out_planes = int(math.floor(num_planes*reduction))
 61 |         self.trans3 = Transition(num_planes, out_planes)
 62 |         num_planes = out_planes
 63 | 
 64 |         self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
 65 |         num_planes += nblocks[3]*growth_rate
 66 | 
 67 |         self.bn = nn.BatchNorm2d(num_planes)
 68 |         self.linear = nn.Linear(num_planes, num_classes)
 69 | 
 70 |     def _make_dense_layers(self, block, in_planes, nblock):
 71 |         layers = []
 72 |         for i in range(nblock):
 73 |             layers.append(block(in_planes, self.growth_rate))
 74 |             in_planes += self.growth_rate
 75 |         return nn.Sequential(*layers)
 76 | 
 77 |     def forward(self, x):
 78 |         out = self.conv1(x)
 79 |         out = self.trans1(self.dense1(out))
 80 |         out = self.trans2(self.dense2(out))
 81 |         out = self.trans3(self.dense3(out))
 82 |         out = self.dense4(out)
 83 |         out = F.avg_pool2d(F.relu(self.bn(out)), 4)
 84 |         out = out.view(out.size(0), -1)
 85 |         out = self.linear(out)
 86 |         return out
 87 | 
 88 | def DenseNet121():
 89 |     return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32)
 90 | 
 91 | def DenseNet169():
 92 |     return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32)
 93 | 
 94 | def DenseNet201():
 95 |     return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32)
 96 | 
 97 | def DenseNet161():
 98 |     return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48)
 99 | 
100 | def densenet_cifar():
101 |     return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12)
102 | 
103 | def test_densenet():
104 |     net = densenet_cifar()
105 |     x = torch.randn(1,3,32,32)
106 |     y = net(Variable(x))
107 |     print(y)
108 | 
109 | # test_densenet()
110 | 


--------------------------------------------------------------------------------
/models/densenet3.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | 
  7 | class BasicBlock(nn.Module):
  8 |     def __init__(self, in_planes, out_planes, dropRate=0.0):
  9 |         super(BasicBlock, self).__init__()
 10 |         self.bn1 = nn.BatchNorm2d(in_planes)
 11 |         self.relu = nn.ReLU(inplace=True)
 12 |         self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=1,
 13 |                                padding=1, bias=False)
 14 |         self.droprate = dropRate
 15 |     def forward(self, x):
 16 |         out = self.conv1(self.relu(self.bn1(x)))
 17 |         if self.droprate > 0:
 18 |             out = F.dropout(out, p=self.droprate, training=self.training)
 19 |         return torch.cat([x, out], 1)
 20 | 
 21 | class BottleneckBlock(nn.Module):
 22 |     def __init__(self, in_planes, out_planes, dropRate=0.0):
 23 |         super(BottleneckBlock, self).__init__()
 24 |         inter_planes = out_planes * 4
 25 |         self.bn1 = nn.BatchNorm2d(in_planes)
 26 |         self.relu = nn.ReLU(inplace=True)
 27 |         self.conv1 = nn.Conv2d(in_planes, inter_planes, kernel_size=1, stride=1,
 28 |                                padding=0, bias=False)
 29 |         self.bn2 = nn.BatchNorm2d(inter_planes)
 30 |         self.conv2 = nn.Conv2d(inter_planes, out_planes, kernel_size=3, stride=1,
 31 |                                padding=1, bias=False)
 32 |         self.droprate = dropRate
 33 |     def forward(self, x):
 34 |         out = self.conv1(self.relu(self.bn1(x)))
 35 |         if self.droprate > 0:
 36 |             out = F.dropout(out, p=self.droprate, inplace=False, training=self.training)
 37 |         out = self.conv2(self.relu(self.bn2(out)))
 38 |         if self.droprate > 0:
 39 |             out = F.dropout(out, p=self.droprate, inplace=False, training=self.training)
 40 |         return torch.cat([x, out], 1)
 41 | 
 42 | class TransitionBlock(nn.Module):
 43 |     def __init__(self, in_planes, out_planes, dropRate=0.0):
 44 |         super(TransitionBlock, self).__init__()
 45 |         self.bn1 = nn.BatchNorm2d(in_planes)
 46 |         self.relu = nn.ReLU(inplace=True)
 47 |         self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1,
 48 |                                padding=0, bias=False)
 49 |         self.droprate = dropRate
 50 |     def forward(self, x):
 51 |         out = self.conv1(self.relu(self.bn1(x)))
 52 |         if self.droprate > 0:
 53 |             out = F.dropout(out, p=self.droprate, inplace=False, training=self.training)
 54 |         return F.avg_pool2d(out, 2)
 55 | 
 56 | class DenseBlock(nn.Module):
 57 |     def __init__(self, nb_layers, in_planes, growth_rate, block, dropRate=0.0):
 58 |         super(DenseBlock, self).__init__()
 59 |         self.layer = self._make_layer(block, in_planes, growth_rate, nb_layers, dropRate)
 60 |     def _make_layer(self, block, in_planes, growth_rate, nb_layers, dropRate):
 61 |         layers = []
 62 |         for i in range(nb_layers):
 63 |             layers.append(block(in_planes+i*growth_rate, growth_rate, dropRate))
 64 |         return nn.Sequential(*layers)
 65 |     def forward(self, x):
 66 |         return self.layer(x)
 67 | 
 68 | class DenseNet3(nn.Module):
 69 |     def __init__(self, depth, num_classes, growth_rate=12,
 70 |                  reduction=0.5, bottleneck=True, dropRate=0.0):
 71 |         super(DenseNet3, self).__init__()
 72 |         in_planes = 2 * growth_rate
 73 |         n = (depth - 4) // 3
 74 |         if bottleneck == True:
 75 |             n = n//2
 76 |             block = BottleneckBlock
 77 |         else:
 78 |             block = BasicBlock
 79 |         # 1st conv before any dense block
 80 |         self.conv1 = nn.Conv2d(3, in_planes, kernel_size=3, stride=1,
 81 |                                padding=1, bias=False)
 82 |         # 1st block
 83 |         self.block1 = DenseBlock(n, in_planes, growth_rate, block, dropRate)
 84 |         in_planes = int(in_planes+n*growth_rate)
 85 |         self.trans1 = TransitionBlock(in_planes, int(math.floor(in_planes*reduction)), dropRate=dropRate)
 86 |         in_planes = int(math.floor(in_planes*reduction))
 87 |         # 2nd block
 88 |         self.block2 = DenseBlock(n, in_planes, growth_rate, block, dropRate)
 89 |         in_planes = int(in_planes+n*growth_rate)
 90 |         self.trans2 = TransitionBlock(in_planes, int(math.floor(in_planes*reduction)), dropRate=dropRate)
 91 |         in_planes = int(math.floor(in_planes*reduction))
 92 |         # 3rd block
 93 |         self.block3 = DenseBlock(n, in_planes, growth_rate, block, dropRate)
 94 |         in_planes = int(in_planes+n*growth_rate)
 95 |         # global average pooling and classifier
 96 |         self.bn1 = nn.BatchNorm2d(in_planes)
 97 |         self.relu = nn.ReLU(inplace=True)
 98 |         self.fc = nn.Linear(in_planes, num_classes)
 99 |         self.in_planes = in_planes
100 | 
101 |         for m in self.modules():
102 |             if isinstance(m, nn.Conv2d):
103 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
104 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
105 |             elif isinstance(m, nn.BatchNorm2d):
106 |                 m.weight.data.fill_(1)
107 |                 m.bias.data.zero_()
108 |             elif isinstance(m, nn.Linear):
109 |                 m.bias.data.zero_()
110 |     def forward(self, x):
111 |         out = self.conv1(x)
112 |         out = self.trans1(self.block1(out))
113 |         out = self.trans2(self.block2(out))
114 |         out = self.block3(out)
115 |         out = self.relu(self.bn1(out))
116 |         out = F.avg_pool2d(out, 8)
117 |         out = out.view(-1, self.in_planes)
118 |         return self.fc(out)
119 | 
120 | def DenseNet190():
121 |     return DenseNet3(190, 10, growth_rate=40)
122 | 


--------------------------------------------------------------------------------
/models/googlenet.py:
--------------------------------------------------------------------------------
  1 | '''GoogLeNet with PyTorch.'''
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | from torch.autograd import Variable
  7 | 
  8 | 
  9 | class Inception(nn.Module):
 10 |     def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
 11 |         super(Inception, self).__init__()
 12 |         # 1x1 conv branch
 13 |         self.b1 = nn.Sequential(
 14 |             nn.Conv2d(in_planes, n1x1, kernel_size=1),
 15 |             nn.BatchNorm2d(n1x1),
 16 |             nn.ReLU(True),
 17 |         )
 18 | 
 19 |         # 1x1 conv -> 3x3 conv branch
 20 |         self.b2 = nn.Sequential(
 21 |             nn.Conv2d(in_planes, n3x3red, kernel_size=1),
 22 |             nn.BatchNorm2d(n3x3red),
 23 |             nn.ReLU(True),
 24 |             nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1),
 25 |             nn.BatchNorm2d(n3x3),
 26 |             nn.ReLU(True),
 27 |         )
 28 | 
 29 |         # 1x1 conv -> 5x5 conv branch
 30 |         self.b3 = nn.Sequential(
 31 |             nn.Conv2d(in_planes, n5x5red, kernel_size=1),
 32 |             nn.BatchNorm2d(n5x5red),
 33 |             nn.ReLU(True),
 34 |             nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1),
 35 |             nn.BatchNorm2d(n5x5),
 36 |             nn.ReLU(True),
 37 |             nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1),
 38 |             nn.BatchNorm2d(n5x5),
 39 |             nn.ReLU(True),
 40 |         )
 41 | 
 42 |         # 3x3 pool -> 1x1 conv branch
 43 |         self.b4 = nn.Sequential(
 44 |             nn.MaxPool2d(3, stride=1, padding=1),
 45 |             nn.Conv2d(in_planes, pool_planes, kernel_size=1),
 46 |             nn.BatchNorm2d(pool_planes),
 47 |             nn.ReLU(True),
 48 |         )
 49 | 
 50 |     def forward(self, x):
 51 |         y1 = self.b1(x)
 52 |         y2 = self.b2(x)
 53 |         y3 = self.b3(x)
 54 |         y4 = self.b4(x)
 55 |         return torch.cat([y1,y2,y3,y4], 1)
 56 | 
 57 | 
 58 | class GoogLeNet(nn.Module):
 59 |     def __init__(self):
 60 |         super(GoogLeNet, self).__init__()
 61 |         self.pre_layers = nn.Sequential(
 62 |             nn.Conv2d(3, 192, kernel_size=3, padding=1),
 63 |             nn.BatchNorm2d(192),
 64 |             nn.ReLU(True),
 65 |         )
 66 | 
 67 |         self.a3 = Inception(192,  64,  96, 128, 16, 32, 32)
 68 |         self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
 69 | 
 70 |         self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
 71 | 
 72 |         self.a4 = Inception(480, 192,  96, 208, 16,  48,  64)
 73 |         self.b4 = Inception(512, 160, 112, 224, 24,  64,  64)
 74 |         self.c4 = Inception(512, 128, 128, 256, 24,  64,  64)
 75 |         self.d4 = Inception(512, 112, 144, 288, 32,  64,  64)
 76 |         self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
 77 | 
 78 |         self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
 79 |         self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
 80 | 
 81 |         self.avgpool = nn.AvgPool2d(8, stride=1)
 82 |         self.linear = nn.Linear(1024, 10)
 83 | 
 84 |     def forward(self, x):
 85 |         out = self.pre_layers(x)
 86 |         out = self.a3(out)
 87 |         out = self.b3(out)
 88 |         out = self.maxpool(out)
 89 |         out = self.a4(out)
 90 |         out = self.b4(out)
 91 |         out = self.c4(out)
 92 |         out = self.d4(out)
 93 |         out = self.e4(out)
 94 |         out = self.maxpool(out)
 95 |         out = self.a5(out)
 96 |         out = self.b5(out)
 97 |         out = self.avgpool(out)
 98 |         out = out.view(out.size(0), -1)
 99 |         out = self.linear(out)
100 |         return out
101 | 
102 | # net = GoogLeNet()
103 | # x = torch.randn(1,3,32,32)
104 | # y = net(Variable(x))
105 | # print(y.size())
106 | 


--------------------------------------------------------------------------------
/models/lenet.py:
--------------------------------------------------------------------------------
 1 | '''LeNet in PyTorch.'''
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class LeNet(nn.Module):
 6 |     def __init__(self):
 7 |         super(LeNet, self).__init__()
 8 |         self.conv1 = nn.Conv2d(3, 6, 5)
 9 |         self.conv2 = nn.Conv2d(6, 16, 5)
10 |         self.fc1   = nn.Linear(16*5*5, 120)
11 |         self.fc2   = nn.Linear(120, 84)
12 |         self.fc3   = nn.Linear(84, 10)
13 | 
14 |     def forward(self, x):
15 |         out = F.relu(self.conv1(x))
16 |         out = F.max_pool2d(out, 2)
17 |         out = F.relu(self.conv2(out))
18 |         out = F.max_pool2d(out, 2)
19 |         out = out.view(out.size(0), -1)
20 |         out = F.relu(self.fc1(out))
21 |         out = F.relu(self.fc2(out))
22 |         out = self.fc3(out)
23 |         return out
24 | 


--------------------------------------------------------------------------------
/models/mobilenet.py:
--------------------------------------------------------------------------------
 1 | '''MobileNet in PyTorch.
 2 | 
 3 | See the paper "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications"
 4 | for more details.
 5 | '''
 6 | import torch
 7 | import torch.nn as nn
 8 | import torch.nn.functional as F
 9 | 
10 | from torch.autograd import Variable
11 | 
12 | 
13 | class Block(nn.Module):
14 |     '''Depthwise conv + Pointwise conv'''
15 |     def __init__(self, in_planes, out_planes, stride=1):
16 |         super(Block, self).__init__()
17 |         self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False)
18 |         self.bn1 = nn.BatchNorm2d(in_planes)
19 |         self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
20 |         self.bn2 = nn.BatchNorm2d(out_planes)
21 | 
22 |     def forward(self, x):
23 |         out = F.relu(self.bn1(self.conv1(x)))
24 |         out = F.relu(self.bn2(self.conv2(out)))
25 |         return out
26 | 
27 | 
28 | class MobileNet(nn.Module):
29 |     # (128,2) means conv planes=128, conv stride=2, by default conv stride=1
30 |     cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]
31 | 
32 |     def __init__(self, num_classes=10):
33 |         super(MobileNet, self).__init__()
34 |         self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
35 |         self.bn1 = nn.BatchNorm2d(32)
36 |         self.layers = self._make_layers(in_planes=32)
37 |         self.linear = nn.Linear(1024, num_classes)
38 | 
39 |     def _make_layers(self, in_planes):
40 |         layers = []
41 |         for x in self.cfg:
42 |             out_planes = x if isinstance(x, int) else x[0]
43 |             stride = 1 if isinstance(x, int) else x[1]
44 |             layers.append(Block(in_planes, out_planes, stride))
45 |             in_planes = out_planes
46 |         return nn.Sequential(*layers)
47 | 
48 |     def forward(self, x):
49 |         out = F.relu(self.bn1(self.conv1(x)))
50 |         out = self.layers(out)
51 |         out = F.avg_pool2d(out, 2)
52 |         out = out.view(out.size(0), -1)
53 |         out = self.linear(out)
54 |         return out
55 | 
56 | 
57 | def test():
58 |     net = MobileNet()
59 |     x = torch.randn(1,3,32,32)
60 |     y = net(Variable(x))
61 |     print(y.size())
62 | 
63 | # test()
64 | 


--------------------------------------------------------------------------------
/models/resnet.py:
--------------------------------------------------------------------------------
  1 | '''ResNet in PyTorch.
  2 | 
  3 | BasicBlock and Bottleneck module is from the original ResNet paper:
  4 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
  5 |     Deep Residual Learning for Image Recognition. arXiv:1512.03385
  6 | 
  7 | PreActBlock and PreActBottleneck module is from the later paper:
  8 | [2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
  9 |     Identity Mappings in Deep Residual Networks. arXiv:1603.05027
 10 | '''
 11 | import torch
 12 | import torch.nn as nn
 13 | import torch.nn.functional as F
 14 | 
 15 | from torch.autograd import Variable
 16 | 
 17 | 
 18 | def conv3x3(in_planes, out_planes, stride=1):
 19 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
 20 | 
 21 | 
 22 | class BasicBlock(nn.Module):
 23 |     expansion = 1
 24 | 
 25 |     def __init__(self, in_planes, planes, stride=1):
 26 |         super(BasicBlock, self).__init__()
 27 |         self.conv1 = conv3x3(in_planes, planes, stride)
 28 |         self.bn1 = nn.BatchNorm2d(planes)
 29 |         self.conv2 = conv3x3(planes, planes)
 30 |         self.bn2 = nn.BatchNorm2d(planes)
 31 | 
 32 |         self.shortcut = nn.Sequential()
 33 |         if stride != 1 or in_planes != self.expansion*planes:
 34 |             self.shortcut = nn.Sequential(
 35 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
 36 |                 nn.BatchNorm2d(self.expansion*planes)
 37 |             )
 38 | 
 39 |     def forward(self, x):
 40 |         out = F.relu(self.bn1(self.conv1(x)))
 41 |         out = self.bn2(self.conv2(out))
 42 |         out += self.shortcut(x)
 43 |         out = F.relu(out)
 44 |         return out
 45 | 
 46 | 
 47 | class PreActBlock(nn.Module):
 48 |     '''Pre-activation version of the BasicBlock.'''
 49 |     expansion = 1
 50 | 
 51 |     def __init__(self, in_planes, planes, stride=1):
 52 |         super(PreActBlock, self).__init__()
 53 |         self.bn1 = nn.BatchNorm2d(in_planes)
 54 |         self.conv1 = conv3x3(in_planes, planes, stride)
 55 |         self.bn2 = nn.BatchNorm2d(planes)
 56 |         self.conv2 = conv3x3(planes, planes)
 57 | 
 58 |         self.shortcut = nn.Sequential()
 59 |         if stride != 1 or in_planes != self.expansion*planes:
 60 |             self.shortcut = nn.Sequential(
 61 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
 62 |             )
 63 | 
 64 |     def forward(self, x):
 65 |         out = F.relu(self.bn1(x))
 66 |         shortcut = self.shortcut(out)
 67 |         out = self.conv1(out)
 68 |         out = self.conv2(F.relu(self.bn2(out)))
 69 |         out += shortcut
 70 |         return out
 71 | 
 72 | 
 73 | class Bottleneck(nn.Module):
 74 |     expansion = 4
 75 | 
 76 |     def __init__(self, in_planes, planes, stride=1):
 77 |         super(Bottleneck, self).__init__()
 78 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 79 |         self.bn1 = nn.BatchNorm2d(planes)
 80 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 81 |         self.bn2 = nn.BatchNorm2d(planes)
 82 |         self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
 83 |         self.bn3 = nn.BatchNorm2d(self.expansion*planes)
 84 | 
 85 |         self.shortcut = nn.Sequential()
 86 |         if stride != 1 or in_planes != self.expansion*planes:
 87 |             self.shortcut = nn.Sequential(
 88 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
 89 |                 nn.BatchNorm2d(self.expansion*planes)
 90 |             )
 91 | 
 92 |     def forward(self, x):
 93 |         out = F.relu(self.bn1(self.conv1(x)))
 94 |         out = F.relu(self.bn2(self.conv2(out)))
 95 |         out = self.bn3(self.conv3(out))
 96 |         out += self.shortcut(x)
 97 |         out = F.relu(out)
 98 |         return out
 99 | 
100 | 
101 | class PreActBottleneck(nn.Module):
102 |     '''Pre-activation version of the original Bottleneck module.'''
103 |     expansion = 4
104 | 
105 |     def __init__(self, in_planes, planes, stride=1):
106 |         super(PreActBottleneck, self).__init__()
107 |         self.bn1 = nn.BatchNorm2d(in_planes)
108 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
109 |         self.bn2 = nn.BatchNorm2d(planes)
110 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
111 |         self.bn3 = nn.BatchNorm2d(planes)
112 |         self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
113 | 
114 |         self.shortcut = nn.Sequential()
115 |         if stride != 1 or in_planes != self.expansion*planes:
116 |             self.shortcut = nn.Sequential(
117 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
118 |             )
119 | 
120 |     def forward(self, x):
121 |         out = F.relu(self.bn1(x))
122 |         shortcut = self.shortcut(out)
123 |         out = self.conv1(out)
124 |         out = self.conv2(F.relu(self.bn2(out)))
125 |         out = self.conv3(F.relu(self.bn3(out)))
126 |         out += shortcut
127 |         return out
128 | 
129 | 
130 | class ResNet(nn.Module):
131 |     def __init__(self, block, num_blocks, num_classes=10):
132 |         super(ResNet, self).__init__()
133 |         self.in_planes = 64
134 | 
135 |         self.conv1 = conv3x3(3,64)
136 |         self.bn1 = nn.BatchNorm2d(64)
137 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
138 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
139 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
140 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
141 |         self.linear = nn.Linear(512*block.expansion, num_classes)
142 | 
143 |     def _make_layer(self, block, planes, num_blocks, stride):
144 |         strides = [stride] + [1]*(num_blocks-1)
145 |         layers = []
146 |         for stride in strides:
147 |             layers.append(block(self.in_planes, planes, stride))
148 |             self.in_planes = planes * block.expansion
149 |         return nn.Sequential(*layers)
150 | 
151 |     def forward(self, x, lin=0, lout=5):
152 |         out = x
153 |         if lin < 1 and lout > -1:
154 |             out = self.conv1(out)
155 |             out = self.bn1(out)
156 |             out = F.relu(out)
157 |         if lin < 2 and lout > 0:
158 |             out = self.layer1(out)
159 |         if lin < 3 and lout > 1:
160 |             out = self.layer2(out)
161 |         if lin < 4 and lout > 2:
162 |             out = self.layer3(out)
163 |         if lin < 5 and lout > 3:
164 |             out = self.layer4(out)
165 |         if lout > 4:
166 |             out = F.avg_pool2d(out, 4)
167 |             out = out.view(out.size(0), -1)
168 |             out = self.linear(out)
169 |         return out
170 | 
171 | 
172 | def ResNet18():
173 |     return ResNet(PreActBlock, [2,2,2,2])
174 | 
175 | def ResNet34():
176 |     return ResNet(BasicBlock, [3,4,6,3])
177 | 
178 | def ResNet50():
179 |     return ResNet(Bottleneck, [3,4,6,3])
180 | 
181 | def ResNet101():
182 |     return ResNet(Bottleneck, [3,4,23,3])
183 | 
184 | def ResNet152():
185 |     return ResNet(Bottleneck, [3,8,36,3])
186 | 
187 | 
188 | def test():
189 |     net = ResNet18()
190 |     y = net(Variable(torch.randn(1,3,32,32)))
191 |     print(y.size())
192 | 
193 | # test()
194 | 


--------------------------------------------------------------------------------
/models/resnext.py:
--------------------------------------------------------------------------------
 1 | '''ResNeXt in PyTorch.
 2 | 
 3 | See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details.
 4 | '''
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | 
 9 | from torch.autograd import Variable
10 | 
11 | 
12 | class Block(nn.Module):
13 |     '''Grouped convolution block.'''
14 |     expansion = 2
15 | 
16 |     def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
17 |         super(Block, self).__init__()
18 |         group_width = cardinality * bottleneck_width
19 |         self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
20 |         self.bn1 = nn.BatchNorm2d(group_width)
21 |         self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
22 |         self.bn2 = nn.BatchNorm2d(group_width)
23 |         self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False)
24 |         self.bn3 = nn.BatchNorm2d(self.expansion*group_width)
25 | 
26 |         self.shortcut = nn.Sequential()
27 |         if stride != 1 or in_planes != self.expansion*group_width:
28 |             self.shortcut = nn.Sequential(
29 |                 nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False),
30 |                 nn.BatchNorm2d(self.expansion*group_width)
31 |             )
32 | 
33 |     def forward(self, x):
34 |         out = F.relu(self.bn1(self.conv1(x)))
35 |         out = F.relu(self.bn2(self.conv2(out)))
36 |         out = self.bn3(self.conv3(out))
37 |         out += self.shortcut(x)
38 |         out = F.relu(out)
39 |         return out
40 | 
41 | 
42 | class ResNeXt(nn.Module):
43 |     def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
44 |         super(ResNeXt, self).__init__()
45 |         self.cardinality = cardinality
46 |         self.bottleneck_width = bottleneck_width
47 |         self.in_planes = 64
48 | 
49 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
50 |         self.bn1 = nn.BatchNorm2d(64)
51 |         self.layer1 = self._make_layer(num_blocks[0], 1)
52 |         self.layer2 = self._make_layer(num_blocks[1], 2)
53 |         self.layer3 = self._make_layer(num_blocks[2], 2)
54 |         # self.layer4 = self._make_layer(num_blocks[3], 2)
55 |         self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes)
56 | 
57 |     def _make_layer(self, num_blocks, stride):
58 |         strides = [stride] + [1]*(num_blocks-1)
59 |         layers = []
60 |         for stride in strides:
61 |             layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride))
62 |             self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
63 |         # Increase bottleneck_width by 2 after each stage.
64 |         self.bottleneck_width *= 2
65 |         return nn.Sequential(*layers)
66 | 
67 |     def forward(self, x):
68 |         out = F.relu(self.bn1(self.conv1(x)))
69 |         out = self.layer1(out)
70 |         out = self.layer2(out)
71 |         out = self.layer3(out)
72 |         # out = self.layer4(out)
73 |         out = F.avg_pool2d(out, 8)
74 |         out = out.view(out.size(0), -1)
75 |         out = self.linear(out)
76 |         return out
77 | 
78 | 
79 | def ResNeXt29_2x64d():
80 |     return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64)
81 | 
82 | def ResNeXt29_4x64d():
83 |     return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64)
84 | 
85 | def ResNeXt29_8x64d():
86 |     return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64)
87 | 
88 | def ResNeXt29_32x4d():
89 |     return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4)
90 | 
91 | def test_resnext():
92 |     net = ResNeXt29_2x64d()
93 |     x = torch.randn(1,3,32,32)
94 |     y = net(Variable(x))
95 |     print(y.size())
96 | 
97 | # test_resnext()
98 | 


--------------------------------------------------------------------------------
/models/vgg.py:
--------------------------------------------------------------------------------
 1 | '''VGG11/13/16/19 in Pytorch.'''
 2 | import torch
 3 | import torch.nn as nn
 4 | from torch.autograd import Variable
 5 | 
 6 | 
 7 | cfg = {
 8 |     'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
 9 |     'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
10 |     'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
11 |     'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
12 | }
13 | 
14 | 
15 | class VGG(nn.Module):
16 |     def __init__(self, vgg_name):
17 |         super(VGG, self).__init__()
18 |         self.features = self._make_layers(cfg[vgg_name])
19 |         self.classifier = nn.Linear(512, 10)
20 | 
21 |     def forward(self, x):
22 |         out = self.features(x)
23 |         out = out.view(out.size(0), -1)
24 |         out = self.classifier(out)
25 |         return out
26 | 
27 |     def _make_layers(self, cfg):
28 |         layers = []
29 |         in_channels = 3
30 |         for x in cfg:
31 |             if x == 'M':
32 |                 layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
33 |             else:
34 |                 layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
35 |                            nn.BatchNorm2d(x),
36 |                            nn.ReLU(inplace=True)]
37 |                 in_channels = x
38 |         layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
39 |         return nn.Sequential(*layers)
40 | 
41 | # net = VGG('VGG11')
42 | # x = torch.randn(2,3,32,32)
43 | # print(net(Variable(x)).size())
44 | 


--------------------------------------------------------------------------------
/paper/abstract.txt:
--------------------------------------------------------------------------------
 1 | Overfitting & underfitting and stable training are an important challenges in
 2 | machine learning.
 3 | Current approaches for these issues are mixup, SamplePairing and BC learning.
 4 | In our work, we state the hypothesis that mixing many images together can be more
 5 | effective than just two.
 6 | Batchboost pipeline has three stages:
 7 | (a) pairing: method of selecting two samples.
 8 | (b) mixing: how to create a new one from two samples.
 9 | (c) feeding: combining mixed samples with new ones from dataset into batch (with ratio $\gamma$).
10 | Note that sample that appears in our batch propagates with
11 | subsequent iterations with less and less importance until the end of training.
12 | Pairing stage calculates the error per sample, sorts the samples and pairs
13 | with strategy: hardest with easiest one, than mixing stage merges two samples
14 | using mixup, $x_1 + (1-\lambda)x_2$. Finally, feeding stage combines
15 | new samples with mixed by ratio 1:1. 
16 | Batchboost has 0.5-3% better accuracy than the current
17 | state-of-the-art mixup regularization on CIFAR-10 & Fashion-MNIST.
18 | Our method is slightly better than SamplePairing technique
19 | on small datasets (up to 5%).
20 | Batchboost provides stable training on not tuned parameters (like weight
21 | decay), thus its a good method to test performance of different architectures.
22 | Source code is at: https://github.com/maciejczyzewski/batchboost
23 | 
24 | 


--------------------------------------------------------------------------------
/paper/arxiv-abstract-shadow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/arxiv-abstract-shadow.png


--------------------------------------------------------------------------------
/paper/arxiv-abstract.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/arxiv-abstract.png


--------------------------------------------------------------------------------
/paper/arxiv.sty:
--------------------------------------------------------------------------------
  1 | \NeedsTeXFormat{LaTeX2e}
  2 | 
  3 | \ProcessOptions\relax
  4 | 
  5 | % fonts
  6 | \renewcommand{\rmdefault}{ptm}
  7 | \renewcommand{\sfdefault}{phv}
  8 | 
  9 | % set page geometry
 10 | \usepackage[verbose=true,letterpaper]{geometry}
 11 | \AtBeginDocument{
 12 |   \newgeometry{
 13 |     textheight=9in,
 14 |     textwidth=6.5in,
 15 |     top=1in,
 16 |     headheight=14pt,
 17 |     headsep=25pt,
 18 |     footskip=30pt
 19 |   }
 20 | }
 21 | 
 22 | \widowpenalty=10000
 23 | \clubpenalty=10000
 24 | \flushbottom
 25 | \sloppy
 26 | 
 27 | 
 28 | 
 29 | \newcommand{\headeright}{A Preprint}
 30 | \newcommand{\undertitle}{A Preprint}
 31 | 
 32 | \usepackage{fancyhdr}
 33 | \fancyhf{}
 34 | \pagestyle{fancy}
 35 | \renewcommand{\headrulewidth}{0.4pt}
 36 | \fancyheadoffset{0pt}
 37 | \rhead{\scshape \footnotesize \headeright}
 38 | \chead{\@title}
 39 | \cfoot{\thepage}
 40 | 
 41 | 
 42 | %Handling Keywords
 43 | \def\keywordname{{\bfseries \emph Keywords}}%
 44 | \def\keywords#1{\par\addvspace\medskipamount{\rightskip=0pt plus1cm
 45 | \def\and{\ifhmode\unskip\nobreak\fi\ $\cdot$
 46 | }\noindent\keywordname\enspace\ignorespaces#1\par}}
 47 | 
 48 | % font sizes with reduced leading
 49 | \renewcommand{\normalsize}{%
 50 |   \@setfontsize\normalsize\@xpt\@xipt
 51 |   \abovedisplayskip      7\p@ \@plus 2\p@ \@minus 5\p@
 52 |   \abovedisplayshortskip \z@ \@plus 3\p@
 53 |   \belowdisplayskip      \abovedisplayskip
 54 |   \belowdisplayshortskip 4\p@ \@plus 3\p@ \@minus 3\p@
 55 | }
 56 | \normalsize
 57 | \renewcommand{\small}{%
 58 |   \@setfontsize\small\@ixpt\@xpt
 59 |   \abovedisplayskip      6\p@ \@plus 1.5\p@ \@minus 4\p@
 60 |   \abovedisplayshortskip \z@  \@plus 2\p@
 61 |   \belowdisplayskip      \abovedisplayskip
 62 |   \belowdisplayshortskip 3\p@ \@plus 2\p@   \@minus 2\p@
 63 | }
 64 | \renewcommand{\footnotesize}{\@setfontsize\footnotesize\@ixpt\@xpt}
 65 | \renewcommand{\scriptsize}{\@setfontsize\scriptsize\@viipt\@viiipt}
 66 | \renewcommand{\tiny}{\@setfontsize\tiny\@vipt\@viipt}
 67 | \renewcommand{\large}{\@setfontsize\large\@xiipt{14}}
 68 | \renewcommand{\Large}{\@setfontsize\Large\@xivpt{16}}
 69 | \renewcommand{\LARGE}{\@setfontsize\LARGE\@xviipt{20}}
 70 | \renewcommand{\huge}{\@setfontsize\huge\@xxpt{23}}
 71 | \renewcommand{\Huge}{\@setfontsize\Huge\@xxvpt{28}}
 72 | 
 73 | % sections with less space
 74 | \providecommand{\section}{}
 75 | \renewcommand{\section}{%
 76 |   \@startsection{section}{1}{\z@}%
 77 |                 {-2.0ex \@plus -0.5ex \@minus -0.2ex}%
 78 |                 { 1.5ex \@plus  0.3ex \@minus  0.2ex}%
 79 |                 {\large\bf\raggedright}%
 80 | }
 81 | \providecommand{\subsection}{}
 82 | \renewcommand{\subsection}{%
 83 |   \@startsection{subsection}{2}{\z@}%
 84 |                 {-1.8ex \@plus -0.5ex \@minus -0.2ex}%
 85 |                 { 0.8ex \@plus  0.2ex}%
 86 |                 {\normalsize\bf\raggedright}%
 87 | }
 88 | \providecommand{\subsubsection}{}
 89 | \renewcommand{\subsubsection}{%
 90 |   \@startsection{subsubsection}{3}{\z@}%
 91 |                 {-1.5ex \@plus -0.5ex \@minus -0.2ex}%
 92 |                 { 0.5ex \@plus  0.2ex}%
 93 |                 {\normalsize\bf\raggedright}%
 94 | }
 95 | \providecommand{\paragraph}{}
 96 | \renewcommand{\paragraph}{%
 97 |   \@startsection{paragraph}{4}{\z@}%
 98 |                 {1.5ex \@plus 0.5ex \@minus 0.2ex}%
 99 |                 {-1em}%
100 |                 {\normalsize\bf}%
101 | }
102 | \providecommand{\subparagraph}{}
103 | \renewcommand{\subparagraph}{%
104 |   \@startsection{subparagraph}{5}{\z@}%
105 |                 {1.5ex \@plus 0.5ex \@minus 0.2ex}%
106 |                 {-1em}%
107 |                 {\normalsize\bf}%
108 | }
109 | \providecommand{\subsubsubsection}{}
110 | \renewcommand{\subsubsubsection}{%
111 |   \vskip5pt{\noindent\normalsize\rm\raggedright}%
112 | }
113 | 
114 | % float placement
115 | \renewcommand{\topfraction      }{0.85}
116 | \renewcommand{\bottomfraction   }{0.4}
117 | \renewcommand{\textfraction     }{0.1}
118 | \renewcommand{\floatpagefraction}{0.7}
119 | 
120 | \newlength{\@abovecaptionskip}\setlength{\@abovecaptionskip}{7\p@}
121 | \newlength{\@belowcaptionskip}\setlength{\@belowcaptionskip}{\z@}
122 | 
123 | \setlength{\abovecaptionskip}{\@abovecaptionskip}
124 | \setlength{\belowcaptionskip}{\@belowcaptionskip}
125 | 
126 | % swap above/belowcaptionskip lengths for tables
127 | \renewenvironment{table}
128 |   {\setlength{\abovecaptionskip}{\@belowcaptionskip}%
129 |    \setlength{\belowcaptionskip}{\@abovecaptionskip}%
130 |    \@float{table}}
131 |   {\end@float}
132 | 
133 | % footnote formatting
134 | \setlength{\footnotesep }{6.65\p@}
135 | \setlength{\skip\footins}{9\p@ \@plus 4\p@ \@minus 2\p@}
136 | \renewcommand{\footnoterule}{\kern-3\p@ \hrule width 12pc \kern 2.6\p@}
137 | \setcounter{footnote}{0}
138 | 
139 | % paragraph formatting
140 | \setlength{\parindent}{\z@}
141 | \setlength{\parskip  }{5.5\p@}
142 | 
143 | % list formatting
144 | \setlength{\topsep       }{4\p@ \@plus 1\p@   \@minus 2\p@}
145 | \setlength{\partopsep    }{1\p@ \@plus 0.5\p@ \@minus 0.5\p@}
146 | \setlength{\itemsep      }{2\p@ \@plus 1\p@   \@minus 0.5\p@}
147 | \setlength{\parsep       }{2\p@ \@plus 1\p@   \@minus 0.5\p@}
148 | \setlength{\leftmargin   }{3pc}
149 | \setlength{\leftmargini  }{\leftmargin}
150 | \setlength{\leftmarginii }{2em}
151 | \setlength{\leftmarginiii}{1.5em}
152 | \setlength{\leftmarginiv }{1.0em}
153 | \setlength{\leftmarginv  }{0.5em}
154 | \def\@listi  {\leftmargin\leftmargini}
155 | \def\@listii {\leftmargin\leftmarginii
156 |               \labelwidth\leftmarginii
157 |               \advance\labelwidth-\labelsep
158 |               \topsep  2\p@ \@plus 1\p@    \@minus 0.5\p@
159 |               \parsep  1\p@ \@plus 0.5\p@ \@minus 0.5\p@
160 |               \itemsep \parsep}
161 | \def\@listiii{\leftmargin\leftmarginiii
162 |               \labelwidth\leftmarginiii
163 |               \advance\labelwidth-\labelsep
164 |               \topsep    1\p@ \@plus 0.5\p@ \@minus 0.5\p@
165 |               \parsep    \z@
166 |               \partopsep 0.5\p@ \@plus 0\p@ \@minus 0.5\p@
167 |               \itemsep \topsep}
168 | \def\@listiv {\leftmargin\leftmarginiv
169 |               \labelwidth\leftmarginiv
170 |               \advance\labelwidth-\labelsep}
171 | \def\@listv  {\leftmargin\leftmarginv
172 |               \labelwidth\leftmarginv
173 |               \advance\labelwidth-\labelsep}
174 | \def\@listvi {\leftmargin\leftmarginvi
175 |               \labelwidth\leftmarginvi
176 |               \advance\labelwidth-\labelsep}
177 | 
178 | % create title
179 | \providecommand{\maketitle}{}
180 | \renewcommand{\maketitle}{%
181 |   \par
182 |   \begingroup
183 |     \renewcommand{\thefootnote}{\fnsymbol{footnote}}
184 |     % for perfect author name centering
185 |     \renewcommand{\@makefnmark}{\hbox to \z@{$^{\@thefnmark}$\hss}}
186 |     % The footnote-mark was overlapping the footnote-text,
187 |     % added the following to fix this problem               (MK)
188 |     \long\def\@makefntext##1{%
189 |       \parindent 1em\noindent
190 |       \hbox to 1.8em{\hss $\m@th ^{\@thefnmark}$}##1
191 |     }
192 |     \thispagestyle{empty}
193 |     \@maketitle
194 |     \@thanks
195 |     %\@notice
196 |   \endgroup
197 |   \let\maketitle\relax
198 |   \let\thanks\relax
199 | }
200 | 
201 | % rules for title box at top of first page
202 | \newcommand{\@toptitlebar}{
203 |   \hrule height 2\p@
204 |   \vskip 0.25in
205 |   \vskip -\parskip%
206 | }
207 | \newcommand{\@bottomtitlebar}{
208 |   \vskip 0.29in
209 |   \vskip -\parskip
210 |   \hrule height 2\p@
211 |   \vskip 0.09in%
212 | }
213 | 
214 | % create title (includes both anonymized and non-anonymized versions)
215 | \providecommand{\@maketitle}{}
216 | \renewcommand{\@maketitle}{%
217 |   \vbox{%
218 |     \hsize\textwidth
219 |     \linewidth\hsize
220 |     \vskip 0.1in
221 |     \@toptitlebar
222 |     \centering
223 |     {\LARGE\sc \@title\par}
224 |     \@bottomtitlebar
225 |     \textsc{\undertitle}\\
226 |     \vskip 0.1in
227 |     \def\And{%
228 |       \end{tabular}\hfil\linebreak[0]\hfil%
229 |       \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces%
230 |     }
231 |     \def\AND{%
232 |       \end{tabular}\hfil\linebreak[4]\hfil%
233 |       \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces%
234 |     }
235 |     \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\@author\end{tabular}%
236 |   \vskip 0.4in \@minus 0.1in \center{\@date}   \vskip 0.2in
237 |   }
238 | }
239 | 
240 | % add conference notice to bottom of first page
241 | \newcommand{\ftype@noticebox}{8}
242 | \newcommand{\@notice}{%
243 |   % give a bit of extra room back to authors on first page
244 |   \enlargethispage{2\baselineskip}%
245 |   \@float{noticebox}[b]%
246 |     \footnotesize\@noticestring%
247 |   \end@float%
248 | }
249 | 
250 | % abstract styling
251 | \renewenvironment{abstract}
252 | {
253 |   \centerline
254 |   {\large \bfseries \scshape Abstract}
255 |   \begin{quote}
256 | }
257 | {
258 |   \end{quote}
259 | }
260 | 
261 | \endinput
262 | 


--------------------------------------------------------------------------------
/paper/batchboost.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/batchboost.pdf


--------------------------------------------------------------------------------
/paper/batchboost.tex:
--------------------------------------------------------------------------------
  1 | \documentclass{article}
  2 | 
  3 | 
  4 | 
  5 | \usepackage{arxiv}
  6 | 
  7 | \usepackage[utf8]{inputenc} % allow utf-8 input
  8 | \usepackage[T1]{fontenc}    % use 8-bit T1 fonts
  9 | \usepackage{hyperref}       % hyperlinks
 10 | \usepackage{url}            % simple URL typesetting
 11 | \usepackage{booktabs}       % professional-quality tables
 12 | \usepackage{amsfonts}       % blackboard math symbols
 13 | \usepackage{nicefrac}       % compact symbols for 1/2, etc.
 14 | \usepackage{microtype}      % microtypography
 15 | \usepackage{lipsum}		% Can be removed after putting your text content
 16 | 
 17 | \usepackage{graphicx}
 18 | \usepackage{caption}
 19 | \usepackage{float}
 20 | \usepackage{subcaption}
 21 | \usepackage{amsmath}
 22 | 
 23 | \renewcommand{\headeright}{}
 24 | \renewcommand{\undertitle}{Draft}
 25 | 
 26 | % Version 2:
 27 | % FIXME: gradient clipping (mixup more likely do explode, but why?)
 28 | % FIXME: what about adam, sgd and other optimizers with different weight decay?
 29 | % FIXME: what about different types of data (tabular)?
 30 | % FIXME: adversarial attacks?
 31 | 
 32 | \title{\emph{batchboost}: regularization for stabilizing training with
 33 | 	resistance to underfitting \& overfitting}
 34 | 
 35 | \author{
 36 |   Maciej A.~Czyzewski\\
 37 |   Institute of Computing Science\\
 38 |   Poznan University of Technology\\
 39 |   Piotrowo 2, 60-965 Poznan, Poland\\
 40 |   \texttt{maciejanthonyczyzewski@gmail.com} \\
 41 | }
 42 | 
 43 | \begin{document}
 44 | \maketitle
 45 | 
 46 | % BC learning: https://arxiv.org/pdf/1711.10284.pdf
 47 | % EfficientNet: https://arxiv.org/pdf/1905.11946.pdf
 48 | % Mixup: https://arxiv.org/pdf/1710.09412.pdf
 49 | % SamplePairing: https://arxiv.org/pdf/1801.02929.pdf
 50 | % ShakeDrop: https://arxiv.org/pdf/1802.02375.pdf
 51 | % ShakeShake: https://arxiv.org/pdf/1705.07485.pdf
 52 | 
 53 | \begin{abstract}
 54 | 	Overfitting \& underfitting and stable training are an important challenges in
 55 | 	machine learning.
 56 | 	%
 57 | 	Current approaches for these issues are \emph{mixup}\cite{zhang2017mixup},
 58 | 	\emph{SamplePairing}\cite{inoue2018data}
 59 | 	and \emph{BC learning}\cite{tokozume2018between}.
 60 | 	In our work, we state the hypothesis that mixing many images together can be more
 61 | 	effective than just two.
 62 | 	\emph{batchboost} pipeline has three stages:
 63 | 	(a) pairing: method of selecting two samples.
 64 | 	(b) mixing: how to create a new one from two samples.
 65 | 	(c) feeding: combining mixed samples with new ones from dataset into batch (with ratio $\gamma$).
 66 | 	Note that sample that appears in our batch propagates with
 67 | 	subsequent iterations with less and less importance until the end of training.
 68 | 	%
 69 | 	Pairing stage calculates the error per sample, sorts the samples and pairs
 70 | 	with strategy: hardest with easiest one, than mixing stage merges two samples
 71 | 	using \emph{mixup}, $x_1 + (1-\lambda)x_2$. Finally, feeding stage combines
 72 | 	new samples with mixed by ratio 1:1. 
 73 | 	%
 74 | 	\emph{batchboost} has 0.5-3\% better accuracy than the current
 75 | 	state-of-the-art \emph{mixup} regularization on
 76 | 	CIFAR-10\cite{krizhevsky2009learning} \&
 77 | 	Fashion-MNIST\cite{xiao2017}.
 78 | 	%
 79 | 	Our method is slightly better than SamplePairing technique
 80 | 	on small datasets (up to 5\%).
 81 | 	%
 82 | 	\emph{batchboost} provides stable training on not tuned parameters (like weight
 83 | 	decay), thus its a good method to test performance of different architectures.
 84 | 	%
 85 | 	Source code is at: \url{https://github.com/maciejczyzewski/batchboost}
 86 | \end{abstract}
 87 | 
 88 | \keywords{regularization \and underfitting \and overfitting \
 89 | 	\and generalization \and mixup}
 90 | 
 91 | \section{Introduction}
 92 | \label{sec:introduction}
 93 | 
 94 | In order to improve test errors, regularization methods which are processes to
 95 | introduce additional information to DNN have been proposed\cite{miyato2018virtual}. Widely
 96 | used regularization methods include \emph{data augmentation}, \emph{stochastic
 97 | 	gradient descent} (SGD) \cite{zhang2016understanding}, \emph{weight decay}
 98 | \cite{krogh1992simple}, \emph{batch normalization} (BN) \cite{ioffe2015batch},
 99 | \emph{label
100 | 	smoothing}\cite{szegedy2016rethinking} and \emph{mixup}\cite{zhang2017mixup}.
101 | %
102 | Our idea comes from \emph{mixup} flaws. In a nutshell, \emph{mixup} constructs
103 | virtual training example from two samples. In term of batch construction, it
104 | simply gets some random samples from dataset and randomly mix together.
105 | %
106 | The overlapping example of many samples (more than two) has not been considered
107 | in previous work. Probably because the imposition of 3 examples significantly affects the model leading to underfitting.
108 | %
109 | It turned out that in many tasks, linear mixing (like \emph{BC learning} or
110 | \emph{mixup}) leads to underfitting (figure \ref{fig:under}). Therefore, these methods are not applicable as universal tools.
111 | 
112 | \textbf{Contribution} Our work shows that the imposition of many examples in
113 | subsequent iterations (which are slowly suppressed by new overlays) can improve efficiency, but most importantly it ensures stability of training and resistance to attacks.
114 | %
115 | However, it must be done wisely: that's why we implemented two basic mechanisms:
116 | \begin{itemize}
117 | \item (a) new information is provided gradually, thus \emph{half-batch} adds
118 | new examples without mixing
119 | \item (b) mixing is carried out according to some criterion, in our case it is the
120 | best-the-worst strategy to mediate the error
121 | \end{itemize}
122 | %
123 | The whole procedure is made in three steps to make it more understandable:
124 | \begin{itemize}
125 | \item (a) \emph{pairing}: a method for selecting two samples
126 | \item (b) \emph{mixing}: how to create a new one from two samples
127 | \item (c) \emph{feeding}: to the mixed samples it supplements the batch with new examples
128 | from datasets
129 | \end{itemize}
130 | %
131 | Note that sample that appears in our batch propagates with
132 | subsequent iterations with less and less importance until the end of training.
133 | %
134 | Source code with sample implementation and experiments to verify the results
135 | we present here:
136 | 
137 | \begin{center}
138 | \url{https://github.com/maciejczyzewski/batchboost}
139 | \end{center}
140 | 
141 | To understand the effects of \emph{bootstrap}, we conduct a
142 | thorough set of study experiments (Section \ref{sec:results}).
143 | 
144 | \section{Overview}
145 | \label{sec:overview}
146 | 
147 | \begin{figure}[H]
148 |   \centering
149 |   \includegraphics[width=\linewidth]{figure-abstract}
150 |   \caption{\emph{batchboost} presented in three phases: (a) pairing by sorting
151 | 	  error (b) mixing with \emph{mixup} (c) feeding: a mixed feed-batch and new
152 | 	  samples in half-batch by 1:1 ratio.}
153 |   \label{fig:abstract}
154 | \end{figure}
155 | 
156 | Batch as input for training is a combination of two different mini-batches:
157 | \begin{itemize}
158 | \item (a) \emph{half-batch}: new samples from dataset, classical augmentation is possible here
159 | \item (b) \emph{feed-batch} (mixup): samples mixed together (in-order presented in
160 | figure \ref{fig:abstract})
161 | \end{itemize}
162 | 
163 | Parameter $\gamma$ means the ratio of the number of samples in half-batch to
164 | feed-batch, in our work we have not considered other values than 1. However, we believe that this is an interesting topic for further research and discussion.
165 | 
166 | \subsection{Pairing Method}
167 | \label{sec:pairing}
168 | 
169 | Combining many overlapping samples may have a negative impact on our optimizer
170 | used in training.  In our implementation, it calculates the error for each
171 | sample in batch.  Then it sorts this vector, and pairs samples by connecting the
172 | easiest (smallest error) with the most difficult sample.  The goal of this
173 | procedure is to create new artificial samples that are between classes, as
174 | described in \emph{BC learning}.
175 | 
176 | However, in this case they are not random pairs, but those that 'require'
177 | additional work. In this way, the learning process is more stable because there
178 | are no cases when it mix only difficult with difficult or easy with easy (likely
179 | is at the beginning or end of the learning process).
180 | %
181 | In our case, the error was calculated using L2 metric between one-hot labels and
182 | the predictions (thus we analyzed \emph{batchboost} only on classification
183 | problems like CIFAR-10\cite{krizhevsky2009learning} or
184 | Fashion-MNIST\cite{xiao2017}). For other problems, there is probably
185 | a need to change the metric/method of error calculation.
186 | %
187 | We were also thinking about using RL to pair samples. However, it turns out to
188 | be a more complicated problem thus we leave it here for further discussion.
189 | 
190 | \subsection{Mixing Method}
191 | \label{sec:mixing}
192 | 
193 | Selected two samples should be combined into one.
194 | There are three methods for linearly mixing samples: \emph{SamplePairing},
195 | \emph{Mixup}, \emph{BC Learning}. Due to the simplicity of implementation and
196 | the highest scores, we used a mixup, which looks like this:
197 | %
198 | \begin{align*}
199 |   \tilde{x} &= \lambda x_i + (1 - \lambda) x_j,\qquad \text{where~} x_i, x_j \text{~are~raw~input~vectors}\\
200 |   \tilde{y} &= \lambda y_i + (1 - \lambda) y_j,\qquad \text{where~} y_i, y_j \text{~are~one-hot~label~encodings}
201 | \end{align*}
202 | $(x_i, y_i)$ and $(x_j, y_j)$ are two examples drawn at random from our
203 | training data, and $\lambda \in [0,1]$.
204 | Label for many samples was averaged over the last 2 labels (due to small differences in results, and large tradeof in memory).
205 | 
206 | Why it works?
207 | The good explanation is provided in BC learning research, that images and sound
208 | can be represented as waves. Mixing is an interpolation that human don't
209 | understand but machine could interpret.
210 | However, also a good explanation of this process is: that by training on
211 | artificial samples, we supplement the training data by artificial examples between-classes
212 | (visually, it fills space between clusters in UMAP/t-SNE visualization).
213 | Thus, it generalizes problem more by aggressive cluster separation during
214 | training (the clusters are moving away from each other, because model learns
215 | artificial clusters made up by mixing).
216 | Mixing multiple classes allows for more accurate separation (higher dimensions), however model starts to depart from original problem (new distribution) losing accuracy on test dataset.
217 | 
218 | The question is whether linear interpolation is good for all problems.
219 | Probably the best solution would be to use a GAN for this purpose (two inputs +
220 | noise to control). We tried to use the technique described in
221 | SinGAN\cite{shaham2019singan} but it
222 | failed in \emph{batchboost}.  It was unsuccessful due to the high cost of
223 | maintaining such a structure.
224 | 
225 | \subsection{Continuous Feeding}
226 | \label{sec:feeding}
227 | 
228 | The final stage is for 'feeding' new artificial samples on the model's input. In
229 | the previous researches, considered were only cases with mixing two samples along
230 | batch. \emph{batchboost} do this by adding new samples with $\gamma$ ratio to
231 | mixed ones.
232 | %
233 | An interesting observation is that once we mix samples, they are in learning
234 | process till end (at each batch continuously).
235 | When applying a mixing it has only three options: (a) new sample with new sample
236 | (b) new sample with previously mixed sample (c) previously mixed sample with
237 | previously mixed sample. Pairing method cannot choose only one option for all samples
238 | because of non-zero $\gamma$ ratio.
239 | 
240 | To maintain compatibility with the mixup
241 | algorithm, it chooses new $\lambda$ when constructing the batch.
242 | That is why past samples have less and less significance in training process,
243 | until they disappear completely (figure \ref{fig:feeding}).
244 | 
245 | \begin{figure}[H]
246 |   \hspace{0.5cm}
247 |   \includegraphics[width=\linewidth]{figure-feeding}
248 |   \caption{Orange squares indicates how information is propagated between
249 | 	  batches in the \emph{batchboost} method.}
250 |   \label{fig:feeding}
251 | \end{figure}
252 | 
253 | We found that for problems by nature not linear, for which the mixup did poorly,
254 | it was caused by the fact that model learned at the time when very low/high
255 | $\lambda$ was assigned (i.e. model learned on a single example, without mixing).
256 | %
257 | In \emph{batchboost} it doesn't look much better. However, \emph{half-batch}
258 | contains new information, and \emph{feed-batch} has examples mixed not randomly but
259 | by pairing method. With this clues, optimizer can slightly improve the direction of
260 | optimization by better interpreting loss landscape.
261 | 
262 | \section{Results}
263 | \label{sec:results}
264 | 
265 | We focused on the current state-of-the-art \emph{mixup}. The architecture we
266 | used was \emph{EfficientNet-b0}\cite{tan2019efficientnet} and
267 | \emph{ResNet100k}\cite{DBLP:journals/corr/HeZRS15} (having only 100k
268 | parameters from DAWNBench\cite{coleman2017dawnbench}). The problems we've evolved are CIFAR-10 and
269 | Fashion-MNIST.
270 | %
271 | We intend to update this work with more detailed comparisons and experiments,
272 | test on different architectures and parameters. The most interesting
273 | issue which requires additional research is artificial attacks.
274 | 
275 | \subsection{Underfitting \& Stabilizing Training}
276 | \label{sec:under}
277 | 
278 | We described this problem in the (section \ref{sec:feeding}). The main factors
279 | that stabilize training are: (a) the appropriate pairing of samples for mixing,
280 | i.e. by error per sample (b) propagation of new information in \emph{half-batch}.
281 | 
282 | \begin{figure}[H]
283 |   \centering
284 | \begin{minipage}{.3\textwidth}
285 |   \hspace{-0.65cm}
286 |   \includegraphics[totalheight=5.6cm]{figure-1-test-accuracy-without-augment}
287 | \end{minipage}
288 | \begin{minipage}{.65\textwidth}\vspace{-0.00cm}\hspace{0.865cm}
289 |   \includegraphics[totalheight=5.6cm]{figure-1-loss-train-without-augment}
290 | \end{minipage}%
291 | \caption{Evaluation on \emph{CIFAR-10}, for \emph{EfficientNet-b0} and
292 | 	\emph{SGD(weight-decay=10e-4, lr=0.1)} (as
293 | 	recommended in the \emph{mixup} research), same parameters for each model.
294 | 	As a result, the models behave differently, although they differ only in the
295 | 	method of constructing the batch.}
296 | \label{fig:under}
297 | \end{figure}
298 | 
299 | Another problem that \emph{mixup} often encounters is very unstable loss
300 | landscape. Therefore, without a well-chosen weight decay, it cannot stabilize in
301 | minimums. To solve this problem, we tune the optimizer parameters
302 | for \emph{mixup}, after that it could achieve a similar result to
303 | \emph{batchboost} (figure \ref{fig:over}).
304 | 
305 | \subsection{Overfitting (comparison to \emph{mixup})}
306 | \label{sec:over}
307 | 
308 | The most important observation of this section is that \emph{batchboost} retains
309 | the properties of the \emph{mixup} (similarly to \emph{SamplePairing} or
310 | \emph{BC learning}). It protects against overfitting, having slightly better results.
311 | 
312 | \begin{figure}[H]
313 |   \centering
314 | \begin{minipage}{.3\textwidth}
315 |   \hspace{-0.65cm}
316 |   \includegraphics[totalheight=5.6cm]{figure-2-train-accuracy-with-augment}
317 | \end{minipage}
318 | \begin{minipage}{.65\textwidth}\vspace{-0.00cm}\hspace{0.865cm}
319 | 	\includegraphics[totalheight=5.6cm]{figure-2-test-accuracy-with-augment}
320 | \end{minipage}%
321 | \caption{\emph{batchboost} is a new state-of-the-art because it is a slightly
322 | 	better than \emph{mixup} (here \emph{mixup} has been tuned for best
323 | 	parameters, \emph{batchboost} uses configuration from figure \ref{fig:under}).}
324 | \label{fig:over}
325 | \end{figure}
326 | 
327 | The only difference is that the $\alpha$ coefficient from the original
328 | \emph{mixup} is weakened.
329 | 
330 | \subsection{Accelerating Training \& Adversarial Attacks}
331 | \label{sec:attacks}
332 | 
333 | In the early stages, it learns faster than a classic \emph{mixup}.
334 | The difference becomes significant when working on very small datasets, e.g.
335 | medical challenges on Kaggle. In this work, we have limited \emph{Fashion-MNIST}
336 | to 64 examples we compared to the classic model and \emph{SamplePairing}. The results were better by 5\$.
337 | When the model perform well at small datasets, it means that training
338 | generalizes problem. On (figure \ref{fig:multipass}) we present samples
339 | generated during this process.
340 | 
341 | \begin{figure}[H]
342 |   \centering
343 |   \includegraphics[width=10.5cm]{figure-multipass}
344 |   \caption{More than two samples have been mixed.}
345 |   \label{fig:multipass}
346 | \end{figure}
347 | 
348 | We tried to modify \emph{batchboost} to generate samples similar to those of
349 | adversarial attacks (by uniformly mixing all samples backward with some Gaussian
350 | noise) without any reasonable results.
351 | 
352 | \section{Conclusion}
353 | \label{sec:conclusion}
354 | 
355 | Our method is easy to implement and can be used for any
356 | model as an additional BlackBox at input.
357 | It provides stability and slightly better results.
358 | Using \emph{batchboost} is certainly more important in problems with small data sets.
359 | Thanks to the property of avoiding underfitting for misconfigured parameters,
360 | this is a good regularization method for people who want to compare two
361 | architectures without parameter tuning.
362 | Retains all properties of \emph{mixup}, \emph{SamplePairing} and \emph{BC learning}.
363 | 
364 | \bibliographystyle{unsrt}
365 | \bibliography{references}
366 | 
367 | \end{document}
368 | 


--------------------------------------------------------------------------------
/paper/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import time
 4 | 
 5 | from watchdog.observers import Observer
 6 | from watchdog.events import FileSystemEventHandler
 7 | 
 8 | 
 9 | class LatexEventHandler(FileSystemEventHandler):
10 |     LATEX_FLAGS = "-interaction nonstopmode -halt-on-error -file-line-error"
11 |     FILETYPE_INPUT = [".tex"]
12 | 
13 |     def on_any_event(self, event):
14 |         for ext in self.FILETYPE_INPUT:
15 |             if event.src_path.endswith(ext):
16 |                 self.compile(event)
17 | 
18 |     def compile(self, event):
19 |         print("=== LATEX ===")
20 |         os.system(f"pdflatex {self.LATEX_FLAGS} {event.src_path}")
21 |         os.system(f"md5 {event.src_path}")
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     path = sys.argv[1] if len(sys.argv) > 1 else "."
26 | 
27 |     observer = Observer()
28 |     observer.schedule(LatexEventHandler(), path, recursive=True)
29 |     observer.start()
30 | 
31 |     try:
32 |         while True:
33 |             time.sleep(1)
34 |     except KeyboardInterrupt:
35 |         observer.stop()
36 |     observer.join()
37 | 


--------------------------------------------------------------------------------
/paper/figure-1-loss-train-without-augment.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/figure-1-loss-train-without-augment.pdf


--------------------------------------------------------------------------------
/paper/figure-1-test-accuracy-without-augment.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/figure-1-test-accuracy-without-augment.pdf


--------------------------------------------------------------------------------
/paper/figure-2-test-accuracy-with-augment.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/figure-2-test-accuracy-with-augment.pdf


--------------------------------------------------------------------------------
/paper/figure-2-train-accuracy-with-augment.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/figure-2-train-accuracy-with-augment.pdf


--------------------------------------------------------------------------------
/paper/figure-abstract.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/figure-abstract.pdf


--------------------------------------------------------------------------------
/paper/figure-feeding.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/figure-feeding.pdf


--------------------------------------------------------------------------------
/paper/figure-multipass.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/figure-multipass.png


--------------------------------------------------------------------------------
/paper/notes_v2.md:
--------------------------------------------------------------------------------
 1 | # Research Brief (Brief intro of the research (50 + words))
 2 | 
 3 | Batchboost is a simple technique to accelerate ML model training by adaptively feeding mini-batches with artificial samples which are created by mixing two examples from previous step - in favor of pairing those that produce the difficult one.
 4 | 
 5 | # What’s New (What’s new in this research?)
 6 | 
 7 | In this research, we state the hypothesis that mixing many images together can
 8 | be more effective than just two.  To make it efficient, we propose a new method of
 9 | creating mini-batches, where each sample from dataset is propagated with
10 | subsequent iterations with less and less importance until the end of learning
11 | process.
12 | 
13 | # How It Works (How this research works?)
14 | 
15 | Batchboost pipeline has three stages:
16 | (a) pairing: method of selecting two samples from previous step.
17 | (b) mixing: method of creating a new artificial example from two selected samples.
18 | (c) feeding: constructing training mini-batch with created examples and new samples from dataset (concat with ratio γ).
19 | Note that sample from dataset propagates with subsequent iterations with less and less importance until the end of training. 
20 | 
21 | Our baseline implements pairing stage as sorting by sample error, where hardest examples are paired with easiest ones. Mixing stage
22 | merges to samples using mixup, x1+(1−λ)x2. Feeding stage combines new samples with ratio 1:1 using concat.
23 | 
24 | # Key Insights (What are the main takeaways from this research?)
25 | 
26 | The results are promising. Batchboost has 0.5-3% better accuracy than the current state-of-the-art mixup regularization on CIFAR-10 (#10 place in https://paperswithcode.com/) & Fashion-MNIST.
27 | (we hope to see our method in action, for example, on Kaggle as trick to improve a bit test accuracy)
28 | 
29 | # Behind The Scenes (Any interesting ideas or research tips you - would like to share with our AI Community?)
30 | 
31 | There is a lot to improve in data augmentation and regularization methods.
32 | 
33 | # Anything else? (Bottlenecks and future trend?)
34 | 
35 | An interesting topic for further research and discussion are
36 | combination of batchboost and existing methods.
37 | 


--------------------------------------------------------------------------------
/paper/references.bib:
--------------------------------------------------------------------------------
  1 | @article{miyato2018virtual,
  2 |   title={Virtual adversarial training: a regularization method for supervised and semi-supervised learning},
  3 |   author={Miyato, Takeru and Maeda, Shin-ichi and Koyama, Masanori and Ishii, Shin},
  4 |   journal={IEEE transactions on pattern analysis and machine intelligence},
  5 |   volume={41},
  6 |   number={8},
  7 |   pages={1979--1993},
  8 |   year={2018},
  9 |   publisher={IEEE}
 10 | }
 11 | 
 12 | @article{zhang2016understanding,
 13 |   title={Understanding deep learning requires rethinking generalization},
 14 |   author={Zhang, Chiyuan and Bengio, Samy and Hardt, Moritz and Recht, Benjamin and Vinyals, Oriol},
 15 |   journal={arXiv preprint arXiv:1611.03530},
 16 |   year={2016}
 17 | }
 18 | 
 19 | @inproceedings{krogh1992simple,
 20 |   title={A simple weight decay can improve generalization},
 21 |   author={Krogh, Anders and Hertz, John A},
 22 |   booktitle={Advances in neural information processing systems},
 23 |   pages={950--957},
 24 |   year={1992}
 25 | }
 26 | 
 27 | @article{zhang2017mixup,
 28 |   title={mixup: Beyond empirical risk minimization},
 29 |   author={Zhang, Hongyi and Cisse, Moustapha and Dauphin, Yann N and Lopez-Paz, David},
 30 |   journal={arXiv preprint arXiv:1710.09412},
 31 |   year={2017}
 32 | }
 33 | 
 34 | @article{ioffe2015batch,
 35 |   title={Batch normalization: Accelerating deep network training by reducing internal covariate shift},
 36 |   author={Ioffe, Sergey and Szegedy, Christian},
 37 |   journal={arXiv preprint arXiv:1502.03167},
 38 |   year={2015}
 39 | }
 40 | 
 41 | @inproceedings{szegedy2016rethinking,
 42 |   title={Rethinking the inception architecture for computer vision},
 43 |   author={Szegedy, Christian and Vanhoucke, Vincent and Ioffe, Sergey and Shlens, Jon and Wojna, Zbigniew},
 44 |   booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
 45 |   pages={2818--2826},
 46 |   year={2016}
 47 | }
 48 | 
 49 | @article{coleman2017dawnbench,
 50 |   title={Dawnbench: An end-to-end deep learning benchmark and competition},
 51 |   author={Coleman, Cody and Narayanan, Deepak and Kang, Daniel and Zhao, Tian and Zhang, Jian and Nardi, Luigi and Bailis, Peter and Olukotun, Kunle and R{\'e}, Chris and Zaharia, Matei},
 52 |   journal={Training},
 53 |   volume={100},
 54 |   number={101},
 55 |   pages={102},
 56 |   year={2017}
 57 | }
 58 | 
 59 | @article{inoue2018data,
 60 |   title={Data augmentation by pairing samples for images classification},
 61 |   author={Inoue, Hiroshi},
 62 |   journal={arXiv preprint arXiv:1801.02929},
 63 |   year={2018}
 64 | }
 65 | 
 66 | @inproceedings{tokozume2018between,
 67 |   title={Between-class learning for image classification},
 68 |   author={Tokozume, Yuji and Ushiku, Yoshitaka and Harada, Tatsuya},
 69 |   booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
 70 |   pages={5486--5494},
 71 |   year={2018}
 72 | }
 73 | 
 74 | @article{xiao2017,
 75 |   author       = {Han Xiao and Kashif Rasul and Roland Vollgraf},
 76 |   title        = {Fashion-MNIST: a Novel Image Dataset for Benchmarking Machine Learning Algorithms},
 77 |   date         = {2017-08-28},
 78 |   year         = {2017},
 79 |   eprintclass  = {cs.LG},
 80 |   eprinttype   = {arXiv},
 81 |   eprint       = {cs.LG/1708.07747},
 82 | }
 83 | 
 84 | @article{krizhevsky2009learning,
 85 |   title={Learning multiple layers of features from tiny images},
 86 |   author={Krizhevsky, Alex and Hinton, Geoffrey and others},
 87 |   year={2009},
 88 |   publisher={Citeseer}
 89 | }
 90 | 
 91 | @inproceedings{shaham2019singan,
 92 |   title={Singan: Learning a generative model from a single natural image},
 93 |   author={Shaham, Tamar Rott and Dekel, Tali and Michaeli, Tomer},
 94 |   booktitle={Proceedings of the IEEE International Conference on Computer Vision},
 95 |   pages={4570--4580},
 96 |   year={2019}
 97 | }
 98 | 
 99 | @article{tan2019efficientnet,
100 |   title={Efficientnet: Rethinking model scaling for convolutional neural networks},
101 |   author={Tan, Mingxing and Le, Quoc V},
102 |   journal={arXiv preprint arXiv:1905.11946},
103 |   year={2019}
104 | }
105 | 
106 | @article{DBLP:journals/corr/HeZRS15,
107 |   author    = {Kaiming He and
108 |                Xiangyu Zhang and
109 |                Shaoqing Ren and
110 |                Jian Sun},
111 |   title     = {Deep Residual Learning for Image Recognition},
112 |   journal   = {CoRR},
113 |   volume    = {abs/1512.03385},
114 |   year      = {2015},
115 |   url       = {http://arxiv.org/abs/1512.03385},
116 |   archivePrefix = {arXiv},
117 |   eprint    = {1512.03385},
118 |   timestamp = {Wed, 17 Apr 2019 17:23:45 +0200},
119 |   biburl    = {https://dblp.org/rec/bib/journals/corr/HeZRS15},
120 |   bibsource = {dblp computer science bibliography, https://dblp.org}
121 | }
122 | 


--------------------------------------------------------------------------------
/paper/texput.log:
--------------------------------------------------------------------------------
 1 | This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019) (preloaded format=pdflatex 2019.10.12)  21 JAN 2020 02:38
 2 | entering extended mode
 3 |  restricted \write18 enabled.
 4 |  file:line:error style messages enabled.
 5 |  %&-line parsing enabled.
 6 | **./batchboost_polish.tex
 7 | 
 8 | ! Emergency stop.
 9 | <*> ./batchboost_polish.tex
10 |                             
11 | Here is how much of TeX's memory you used:
12 |  3 strings out of 492616
13 |  112 string characters out of 6129480
14 |  57117 words of memory out of 5000000
15 |  4025 multiletter control sequences out of 15000+600000
16 |  3640 words of font info for 14 fonts, out of 8000000 for 9000
17 |  1141 hyphenation exceptions out of 8191
18 |  0i,0n,0p,1b,6s stack positions out of 5000i,500n,10000p,200000b,80000s
19 | 
20 | !  ==> Fatal error occurred, no output PDF file produced!
21 | 


--------------------------------------------------------------------------------
/plot.py:
--------------------------------------------------------------------------------
  1 | # 1 - without augment alpha=1.0
  2 | # 2 - with    augment alpha=1.0
  3 | # 3 - without augment alpha=0.4
  4 | # 4 - with    augment alpha=0.4
  5 | 
  6 | import matplotlib.pyplot as plt
  7 | import numpy as np
  8 | import unidecode
  9 | import csv
 10 | import re
 11 | 
 12 | 
 13 | def slugify(text):
 14 |     text = unidecode.unidecode(text).lower()
 15 |     text = re.sub(r"[\W_]+", "-", text)
 16 |     if text[-1] == "-":
 17 |         return text[0:-1]
 18 |     return text
 19 | 
 20 | 
 21 | class figure:
 22 |     def __init__(self, name=None, prefix=None):
 23 |         self.name = name
 24 |         self.prefix = prefix
 25 | 
 26 |     def __enter__(self):
 27 |         print("--- FIGURE ---")
 28 |         print(f"`{self.name}`")
 29 |         plt.cla()
 30 |         plt.title(self.name)
 31 | 
 32 |     def __exit__(self, x, y, z):
 33 |         print("--- SAVE ---")
 34 |         figure_prefix = "figure-"
 35 |         if self.prefix is not None:
 36 |             figure_prefix += f"{str(self.prefix)}-"
 37 |         fig.savefig(f"figures/{figure_prefix}{slugify(self.name)}.pdf")
 38 | 
 39 | 
 40 | # (1) better style
 41 | plt.style.use(["science", "ieee"])
 42 | 
 43 | fig, ax = plt.subplots()
 44 | ax.autoscale(tight=True)
 45 | 
 46 | 
 47 | def read_file(path="log_EfficientNet_batchboost_1", col=5):
 48 |     X, Y = [], []
 49 |     with open(f"results/{path}.csv", "r") as csvfile:
 50 |         plots = csv.reader(csvfile, delimiter=",")
 51 |         next(plots, None)
 52 |         for row in plots:
 53 |             X.append(int(row[0]))
 54 |             Y.append(
 55 |                 float(row[col].replace(", device='cuda:0'",
 56 |                                        "").replace("tensor(",
 57 |                                                    "").replace(")", "")))
 58 |     return X, Y
 59 | 
 60 | 
 61 | def fill_between(X, Y, color="blue", alpha=0.05, factor=1):
 62 |     sigma = factor * np.array(Y).std(axis=0)  # ls = '--'
 63 |     ax.fill_between(X, Y + sigma, Y - sigma, facecolor=color, alpha=alpha)
 64 | 
 65 | 
 66 | ### FIGURE (1): underfitting ###
 67 | 
 68 | with figure("test accuracy (without augment)", prefix=1):
 69 |     x1, y1 = read_file("decay=1e-4/log_EfficientNet_batchboost_1")
 70 |     plt.plot(x1, y1, label="boostbatch (alpha=1.0)", color="darkred")
 71 | 
 72 |     x1, y1 = read_file("decay=1e-4/log_EfficientNet_batchboost_3")
 73 |     plt.plot(x1, y1, label="boostbatch (alpha=0.4)", color="red")
 74 | 
 75 |     x2, y2 = read_file("decay=1e-4/log_EfficientNet_mixup_1")
 76 |     plt.plot(x2, y2, label="mixup (alpha=1.0)", color="darkblue")
 77 | 
 78 |     x2, y2 = read_file("decay=1e-4/log_EfficientNet_mixup_3")
 79 |     plt.plot(x2, y2, label="mixup (alpha=0.4)", color="blue")
 80 | 
 81 |     x3, y3 = read_file("decay=1e-4/log_EfficientNet_baseline_13")
 82 |     plt.plot(x3, y3, label="baseline", color="black")
 83 | 
 84 |     plt.ylabel("accuracy")
 85 |     plt.xlabel("epoch")
 86 |     # plt.legend(loc="center left", bbox_to_anchor=(1, 0.5))
 87 | 
 88 | with figure("loss train (without augment)", prefix=1):
 89 |     x1a, y1a = read_file("decay=1e-4/log_EfficientNet_batchboost_1", col=1)
 90 |     plt.plot(x1a, y1a, label="boostbatch (alpha=1.0)", color="darkred")
 91 | 
 92 |     x1b, y1b = read_file("decay=1e-4/log_EfficientNet_batchboost_3", col=1)
 93 |     plt.plot(x1b, y1b, label="boostbatch (alpha=0.4)", color="red")
 94 | 
 95 |     fill_between(x1a,
 96 |                  np.mean([y1a, y1b], axis=0),
 97 |                  color="red",
 98 |                  factor=1,
 99 |                  alpha=0.1)
100 | 
101 |     x2a, y2a = read_file("decay=1e-4/log_EfficientNet_mixup_1", col=1)
102 |     plt.plot(x2a, y2a, label="mixup (alpha=1.0)", color="darkblue")
103 | 
104 |     x2b, y2b = read_file("decay=1e-4/log_EfficientNet_mixup_3", col=1)
105 |     plt.plot(x2b, y2b, label="mixup (alpha=0.4)", color="blue")
106 | 
107 |     fill_between(x2a,
108 |                  np.mean([y2a, y2b], axis=0),
109 |                  color="blue",
110 |                  factor=1,
111 |                  alpha=0.1)
112 | 
113 |     x3, y3 = read_file("decay=1e-4/log_EfficientNet_baseline_13", col=1)
114 |     plt.plot(x3, y3, label="baseline", color="black")
115 | 
116 |     plt.ylabel("loss")
117 |     plt.xlabel("epoch")
118 |     plt.legend(loc="center left", bbox_to_anchor=(1, 0.5))
119 | 
120 | ### FIGURE (2): overfitting (compirason to mixup) ###
121 | 
122 | with figure("test accuracy (with augment)", prefix=2):
123 |     x1a, y1a = read_file("decay=1e-5/log_EfficientNet_batchboost_2")
124 |     plt.plot(x1a, y1a, label="boostbatch (alpha=1.0)", color="darkred")
125 | 
126 |     x1b, y1b = read_file("decay=1e-5/log_EfficientNet_batchboost_4")
127 |     plt.plot(x1b, y1b, label="boostbatch (alpha=0.4)", color="red")
128 | 
129 |     fill_between(x1a,
130 |                  np.mean([y1a, y1b], axis=0),
131 |                  color="red",
132 |                  factor=0.5,
133 |                  alpha=0.1)
134 | 
135 |     x2a, y2a = read_file("decay=1e-5/log_EfficientNet_mixup_2")
136 |     plt.plot(x2a, y2a, label="mixup (alpha=1.0)", color="darkblue")
137 | 
138 |     x2b, y2b = read_file("decay=1e-5/log_EfficientNet_mixup_4")
139 |     plt.plot(x2b, y2b, label="mixup (alpha=0.4)", color="blue")
140 | 
141 |     fill_between(x2a,
142 |                  np.mean([y2a, y2b], axis=0),
143 |                  color="blue",
144 |                  factor=0.5,
145 |                  alpha=0.1)
146 | 
147 |     # x3, y3 = read_file("decay=1e-5/log_EfficientNet_baseline_24")
148 |     # plt.plot(x3, y3, label="baseline", color="black")
149 | 
150 |     plt.ylabel("accuracy")
151 |     plt.xlabel("epoch")
152 |     plt.legend(loc="center left", bbox_to_anchor=(1, 0.5))
153 | 
154 | with figure("train accuracy (with augment)", prefix=2):
155 |     x1, y1 = read_file("decay=1e-5/log_EfficientNet_batchboost_2", col=3)
156 |     plt.plot(x1, y1, label="boostbatch (alpha=1.0)", color="darkred")
157 | 
158 |     x1, y1 = read_file("decay=1e-5/log_EfficientNet_batchboost_4", col=3)
159 |     plt.plot(x1, y1, label="boostbatch (alpha=0.4)", color="red")
160 | 
161 |     x2, y2 = read_file("decay=1e-5/log_EfficientNet_mixup_2", col=3)
162 |     plt.plot(x2, y2, label="mixup (alpha=1.0)", color="darkblue")
163 | 
164 |     x2, y2 = read_file("decay=1e-5/log_EfficientNet_mixup_4", col=3)
165 |     plt.plot(x2, y2, label="mixup (alpha=0.4)", color="blue")
166 | 
167 |     # x3, y3 = read_file("decay=1e-5/log_EfficientNet_baseline_24", col=3)
168 |     # plt.plot(x3, y3, label="baseline", color="black")
169 | 
170 |     plt.ylabel("accuracy")
171 |     plt.xlabel("epoch")
172 |     # plt.legend(loc="center left", bbox_to_anchor=(1, 0.5))
173 | 


--------------------------------------------------------------------------------
/results/decay=1e-4/log_EfficientNet_baseline_13.csv:
--------------------------------------------------------------------------------
 1 | epoch,train loss,reg loss,train acc,test loss,test acc
 2 | 0,"tensor(2.6055, device='cuda:0')",0.0,tensor(19.8040),"tensor(2.1569, device='cuda:0')",tensor(18.9400)
 3 | 1,"tensor(1.8451, device='cuda:0')",0.0,tensor(31.3720),"tensor(2.0787, device='cuda:0')",tensor(25.8400)
 4 | 2,"tensor(1.7183, device='cuda:0')",0.0,tensor(36.4540),"tensor(1.8010, device='cuda:0')",tensor(35.2200)
 5 | 3,"tensor(1.5847, device='cuda:0')",0.0,tensor(41.5820),"tensor(1.7851, device='cuda:0')",tensor(35.3800)
 6 | 4,"tensor(1.5116, device='cuda:0')",0.0,tensor(44.7700),"tensor(1.9935, device='cuda:0')",tensor(31.7000)
 7 | 5,"tensor(1.4671, device='cuda:0')",0.0,tensor(46.4440),"tensor(1.7445, device='cuda:0')",tensor(36.8900)
 8 | 6,"tensor(1.4378, device='cuda:0')",0.0,tensor(47.5760),"tensor(1.5938, device='cuda:0')",tensor(43.9800)
 9 | 7,"tensor(1.3857, device='cuda:0')",0.0,tensor(49.7040),"tensor(1.4793, device='cuda:0')",tensor(46.5200)
10 | 8,"tensor(1.3346, device='cuda:0')",0.0,tensor(51.8020),"tensor(1.3809, device='cuda:0')",tensor(51.0100)
11 | 9,"tensor(1.2833, device='cuda:0')",0.0,tensor(53.8740),"tensor(1.5019, device='cuda:0')",tensor(45.8600)
12 | 10,"tensor(1.2222, device='cuda:0')",0.0,tensor(56.4380),"tensor(1.4066, device='cuda:0')",tensor(49.5100)
13 | 11,"tensor(1.2230, device='cuda:0')",0.0,tensor(56.4160),"tensor(1.2856, device='cuda:0')",tensor(54.3400)
14 | 12,"tensor(1.2277, device='cuda:0')",0.0,tensor(56.4400),"tensor(1.6038, device='cuda:0')",tensor(46.1800)
15 | 13,"tensor(1.1926, device='cuda:0')",0.0,tensor(57.5240),"tensor(1.2848, device='cuda:0')",tensor(54.5300)
16 | 14,"tensor(1.1133, device='cuda:0')",0.0,tensor(60.5220),"tensor(1.4080, device='cuda:0')",tensor(51.5900)
17 | 15,"tensor(1.1373, device='cuda:0')",0.0,tensor(59.6840),"tensor(1.4613, device='cuda:0')",tensor(51.1200)
18 | 16,"tensor(1.0855, device='cuda:0')",0.0,tensor(61.9180),"tensor(1.3164, device='cuda:0')",tensor(55.0600)
19 | 17,"tensor(0.9908, device='cuda:0')",0.0,tensor(65.1260),"tensor(1.1663, device='cuda:0')",tensor(59.6400)
20 | 18,"tensor(0.9379, device='cuda:0')",0.0,tensor(67.0500),"tensor(1.1016, device='cuda:0')",tensor(61.4500)
21 | 19,"tensor(0.8914, device='cuda:0')",0.0,tensor(68.8620),"tensor(1.3252, device='cuda:0')",tensor(55.9400)
22 | 20,"tensor(0.8710, device='cuda:0')",0.0,tensor(69.7060),"tensor(0.9524, device='cuda:0')",tensor(67.3000)
23 | 21,"tensor(0.8302, device='cuda:0')",0.0,tensor(71.0920),"tensor(0.9558, device='cuda:0')",tensor(66.6600)
24 | 22,"tensor(0.7480, device='cuda:0')",0.0,tensor(73.9720),"tensor(0.8940, device='cuda:0')",tensor(68.3900)
25 | 23,"tensor(0.7158, device='cuda:0')",0.0,tensor(75.1660),"tensor(0.8884, device='cuda:0')",tensor(69.3300)
26 | 24,"tensor(0.6930, device='cuda:0')",0.0,tensor(75.9360),"tensor(0.8584, device='cuda:0')",tensor(70.1800)
27 | 25,"tensor(0.6635, device='cuda:0')",0.0,tensor(77.0360),"tensor(0.9007, device='cuda:0')",tensor(69.3000)
28 | 26,"tensor(0.6355, device='cuda:0')",0.0,tensor(77.8820),"tensor(0.8464, device='cuda:0')",tensor(71.1100)
29 | 27,"tensor(0.6146, device='cuda:0')",0.0,tensor(78.8440),"tensor(0.8866, device='cuda:0')",tensor(70.1600)
30 | 28,"tensor(0.6022, device='cuda:0')",0.0,tensor(79.0920),"tensor(0.8468, device='cuda:0')",tensor(70.8300)
31 | 29,"tensor(0.5818, device='cuda:0')",0.0,tensor(79.8040),"tensor(0.8185, device='cuda:0')",tensor(71.9300)
32 | 


--------------------------------------------------------------------------------
/results/decay=1e-4/log_EfficientNet_baseline_24.csv:
--------------------------------------------------------------------------------
 1 | epoch,train loss,reg loss,train acc,test loss,test acc
 2 | 0,"tensor(2.3113, device='cuda:0')",0.0,tensor(21.0420),"tensor(2.1255, device='cuda:0')",tensor(23.4300)
 3 | 1,"tensor(1.9387, device='cuda:0')",0.0,tensor(28.4900),"tensor(1.9492, device='cuda:0')",tensor(28.3900)
 4 | 2,"tensor(1.8972, device='cuda:0')",0.0,tensor(29.2980),"tensor(2.0775, device='cuda:0')",tensor(24.6300)
 5 | 3,"tensor(1.8820, device='cuda:0')",0.0,tensor(30.2560),"tensor(1.8191, device='cuda:0')",tensor(33.)
 6 | 4,"tensor(1.8923, device='cuda:0')",0.0,tensor(30.0400),"tensor(2.2398, device='cuda:0')",tensor(21.7100)
 7 | 5,"tensor(1.8451, device='cuda:0')",0.0,tensor(31.4360),"tensor(1.9829, device='cuda:0')",tensor(27.5600)
 8 | 6,"tensor(1.9566, device='cuda:0')",0.0,tensor(27.0860),"tensor(2.0375, device='cuda:0')",tensor(24.5400)
 9 | 7,"tensor(1.9607, device='cuda:0')",0.0,tensor(26.0460),"tensor(1.9464, device='cuda:0')",tensor(27.0500)
10 | 8,"tensor(1.9515, device='cuda:0')",0.0,tensor(26.9960),"tensor(2.2721, device='cuda:0')",tensor(16.3300)
11 | 9,"tensor(1.9365, device='cuda:0')",0.0,tensor(26.5980),"tensor(1.8653, device='cuda:0')",tensor(29.7700)
12 | 10,"tensor(1.8948, device='cuda:0')",0.0,tensor(28.5580),"tensor(2.0207, device='cuda:0')",tensor(24.9000)
13 | 11,"tensor(1.9236, device='cuda:0')",0.0,tensor(27.8180),"tensor(2.0142, device='cuda:0')",tensor(27.8200)
14 | 12,"tensor(1.8476, device='cuda:0')",0.0,tensor(31.1720),"tensor(1.8535, device='cuda:0')",tensor(29.2100)
15 | 13,"tensor(1.9081, device='cuda:0')",0.0,tensor(27.9320),"tensor(1.9645, device='cuda:0')",tensor(28.5600)
16 | 14,"tensor(1.7877, device='cuda:0')",0.0,tensor(33.2980),"tensor(2.0359, device='cuda:0')",tensor(25.7000)
17 | 15,"tensor(1.9540, device='cuda:0')",0.0,tensor(26.3880),"tensor(2.0166, device='cuda:0')",tensor(23.4300)
18 | 16,"tensor(2.0129, device='cuda:0')",0.0,tensor(22.7800),"tensor(4.3970, device='cuda:0')",tensor(11.3500)
19 | 17,"tensor(1.8895, device='cuda:0')",0.0,tensor(27.6220),"tensor(1.9178, device='cuda:0')",tensor(29.3400)
20 | 18,"tensor(1.7812, device='cuda:0')",0.0,tensor(32.4420),"tensor(1.8813, device='cuda:0')",tensor(29.7900)
21 | 19,"tensor(1.7666, device='cuda:0')",0.0,tensor(33.9020),"tensor(1.9696, device='cuda:0')",tensor(27.2200)
22 | 20,"tensor(1.7717, device='cuda:0')",0.0,tensor(33.8160),"tensor(1.9529, device='cuda:0')",tensor(28.1800)
23 | 21,"tensor(1.7940, device='cuda:0')",0.0,tensor(33.0480),"tensor(2.6551, device='cuda:0')",tensor(18.9900)
24 | 22,"tensor(1.8595, device='cuda:0')",0.0,tensor(30.2860),"tensor(1.8495, device='cuda:0')",tensor(32.4800)
25 | 23,"tensor(1.7393, device='cuda:0')",0.0,tensor(35.3400),"tensor(1.9325, device='cuda:0')",tensor(30.7600)
26 | 24,"tensor(1.8225, device='cuda:0')",0.0,tensor(32.5100),"tensor(2.7612, device='cuda:0')",tensor(16.9500)
27 | 25,"tensor(1.7819, device='cuda:0')",0.0,tensor(33.8340),"tensor(2.0891, device='cuda:0')",tensor(28.2000)
28 | 26,"tensor(1.7530, device='cuda:0')",0.0,tensor(35.2620),"tensor(3.0886, device='cuda:0')",tensor(15.7500)
29 | 27,"tensor(1.7344, device='cuda:0')",0.0,tensor(35.9620),"tensor(3.1807, device='cuda:0')",tensor(17.5200)
30 | 28,"tensor(1.7010, device='cuda:0')",0.0,tensor(36.9760),"tensor(1.8512, device='cuda:0')",tensor(32.6100)
31 | 29,"tensor(1.5543, device='cuda:0')",0.0,tensor(42.4600),"tensor(1.6730, device='cuda:0')",tensor(38.7000)
32 | 


--------------------------------------------------------------------------------
/results/decay=1e-4/log_EfficientNet_batchboost_1.csv:
--------------------------------------------------------------------------------
 1 | epoch,train loss,reg loss,train acc,test loss,test acc
 2 | 0,"tensor(1.8619, device='cuda:0')",0.0,tensor(33.8036),"tensor(1.3804, device='cuda:0')",tensor(51.5800)
 3 | 1,"tensor(1.4982, device='cuda:0')",0.0,tensor(47.6485),"tensor(1.2721, device='cuda:0')",tensor(56.7600)
 4 | 2,"tensor(1.3898, device='cuda:0')",0.0,tensor(52.0369),"tensor(1.1117, device='cuda:0')",tensor(64.9300)
 5 | 3,"tensor(1.3058, device='cuda:0')",0.0,tensor(55.1306),"tensor(0.9486, device='cuda:0')",tensor(71.2200)
 6 | 4,"tensor(1.2607, device='cuda:0')",0.0,tensor(57.0095),"tensor(1.0737, device='cuda:0')",tensor(65.9600)
 7 | 5,"tensor(1.2342, device='cuda:0')",0.0,tensor(57.9863),"tensor(1.0367, device='cuda:0')",tensor(67.0500)
 8 | 6,"tensor(1.2004, device='cuda:0')",0.0,tensor(59.0677),"tensor(1.0099, device='cuda:0')",tensor(68.6700)
 9 | 7,"tensor(1.1755, device='cuda:0')",0.0,tensor(59.9862),"tensor(0.8343, device='cuda:0')",tensor(75.2200)
10 | 8,"tensor(1.1527, device='cuda:0')",0.0,tensor(61.4485),"tensor(0.9027, device='cuda:0')",tensor(72.7400)
11 | 9,"tensor(1.1438, device='cuda:0')",0.0,tensor(61.4357),"tensor(0.8255, device='cuda:0')",tensor(75.1500)
12 | 10,"tensor(1.1069, device='cuda:0')",0.0,tensor(63.1542),"tensor(0.8173, device='cuda:0')",tensor(74.4400)
13 | 11,"tensor(1.1128, device='cuda:0')",0.0,tensor(62.5874),"tensor(0.8894, device='cuda:0')",tensor(74.8200)
14 | 12,"tensor(1.1052, device='cuda:0')",0.0,tensor(63.1480),"tensor(0.7840, device='cuda:0')",tensor(76.5000)
15 | 13,"tensor(1.0902, device='cuda:0')",0.0,tensor(63.5684),"tensor(0.9525, device='cuda:0')",tensor(74.1600)
16 | 14,"tensor(1.0729, device='cuda:0')",0.0,tensor(64.0899),"tensor(0.7787, device='cuda:0')",tensor(75.4700)
17 | 15,"tensor(1.0845, device='cuda:0')",0.0,tensor(63.7330),"tensor(0.7887, device='cuda:0')",tensor(75.8800)
18 | 16,"tensor(1.0568, device='cuda:0')",0.0,tensor(64.9602),"tensor(0.8254, device='cuda:0')",tensor(77.1600)
19 | 17,"tensor(1.0605, device='cuda:0')",0.0,tensor(64.6255),"tensor(0.7816, device='cuda:0')",tensor(75.3900)
20 | 18,"tensor(1.0434, device='cuda:0')",0.0,tensor(65.2123),"tensor(0.8675, device='cuda:0')",tensor(74.2000)
21 | 19,"tensor(1.0560, device='cuda:0')",0.0,tensor(64.7336),"tensor(0.7365, device='cuda:0')",tensor(77.9200)
22 | 20,"tensor(1.0462, device='cuda:0')",0.0,tensor(65.3066),"tensor(0.7319, device='cuda:0')",tensor(77.6900)
23 | 21,"tensor(1.0569, device='cuda:0')",0.0,tensor(64.3693),"tensor(0.8474, device='cuda:0')",tensor(73.5000)
24 | 22,"tensor(1.0264, device='cuda:0')",0.0,tensor(65.7328),"tensor(0.7160, device='cuda:0')",tensor(78.1200)
25 | 23,"tensor(1.0208, device='cuda:0')",0.0,tensor(66.2139),"tensor(0.7352, device='cuda:0')",tensor(76.9700)
26 | 24,"tensor(1.0331, device='cuda:0')",0.0,tensor(65.7051),"tensor(0.7542, device='cuda:0')",tensor(76.3700)
27 | 25,"tensor(1.0031, device='cuda:0')",0.0,tensor(66.7205),"tensor(0.7212, device='cuda:0')",tensor(77.5600)
28 | 26,"tensor(1.0169, device='cuda:0')",0.0,tensor(66.5317),"tensor(0.7610, device='cuda:0')",tensor(77.4400)
29 | 27,"tensor(1.0109, device='cuda:0')",0.0,tensor(66.3433),"tensor(0.8155, device='cuda:0')",tensor(74.9300)
30 | 28,"tensor(1.0128, device='cuda:0')",0.0,tensor(66.4252),"tensor(0.7375, device='cuda:0')",tensor(78.9300)
31 | 29,"tensor(1.0011, device='cuda:0')",0.0,tensor(67.0343),"tensor(0.7180, device='cuda:0')",tensor(78.8500)
32 | 


--------------------------------------------------------------------------------
/results/decay=1e-4/log_EfficientNet_batchboost_2.csv:
--------------------------------------------------------------------------------
 1 | epoch,train loss,reg loss,train acc,test loss,test acc
 2 | 0,"tensor(1.7333, device='cuda:0')",0.0,tensor(39.3799),"tensor(1.1499, device='cuda:0')",tensor(61.7400)
 3 | 1,"tensor(1.3994, device='cuda:0')",0.0,tensor(51.3792),"tensor(0.9294, device='cuda:0')",tensor(69.9400)
 4 | 2,"tensor(1.3037, device='cuda:0')",0.0,tensor(55.3760),"tensor(0.8431, device='cuda:0')",tensor(72.7200)
 5 | 3,"tensor(1.2557, device='cuda:0')",0.0,tensor(56.8880),"tensor(0.9415, device='cuda:0')",tensor(70.2800)
 6 | 4,"tensor(1.2112, device='cuda:0')",0.0,tensor(58.3391),"tensor(0.8276, device='cuda:0')",tensor(74.7100)
 7 | 5,"tensor(1.1966, device='cuda:0')",0.0,tensor(58.7091),"tensor(0.9025, device='cuda:0')",tensor(76.0500)
 8 | 6,"tensor(1.1778, device='cuda:0')",0.0,tensor(59.5382),"tensor(0.8087, device='cuda:0')",tensor(76.6200)
 9 | 7,"tensor(1.1527, device='cuda:0')",0.0,tensor(60.4451),"tensor(0.8012, device='cuda:0')",tensor(76.3000)
10 | 8,"tensor(1.1488, device='cuda:0')",0.0,tensor(60.5774),"tensor(0.7704, device='cuda:0')",tensor(77.2800)
11 | 9,"tensor(1.1375, device='cuda:0')",0.0,tensor(61.0278),"tensor(0.8168, device='cuda:0')",tensor(75.4900)
12 | 10,"tensor(1.1269, device='cuda:0')",0.0,tensor(61.5662),"tensor(0.6920, device='cuda:0')",tensor(79.1700)
13 | 11,"tensor(1.1198, device='cuda:0')",0.0,tensor(61.5416),"tensor(0.8677, device='cuda:0')",tensor(75.0100)
14 | 12,"tensor(1.1440, device='cuda:0')",0.0,tensor(60.5554),"tensor(0.8204, device='cuda:0')",tensor(77.2300)
15 | 13,"tensor(1.1188, device='cuda:0')",0.0,tensor(61.5085),"tensor(0.8043, device='cuda:0')",tensor(79.0300)
16 | 14,"tensor(1.1159, device='cuda:0')",0.0,tensor(61.5262),"tensor(0.7955, device='cuda:0')",tensor(78.3400)
17 | 15,"tensor(1.1178, device='cuda:0')",0.0,tensor(61.5627),"tensor(0.8294, device='cuda:0')",tensor(78.3200)
18 | 16,"tensor(1.1095, device='cuda:0')",0.0,tensor(61.8756),"tensor(0.7635, device='cuda:0')",tensor(80.1700)
19 | 17,"tensor(1.0935, device='cuda:0')",0.0,tensor(62.4646),"tensor(0.9610, device='cuda:0')",tensor(77.0600)
20 | 18,"tensor(1.0986, device='cuda:0')",0.0,tensor(62.2386),"tensor(0.7500, device='cuda:0')",tensor(78.7500)
21 | 19,"tensor(1.1026, device='cuda:0')",0.0,tensor(62.0449),"tensor(0.9191, device='cuda:0')",tensor(74.9000)
22 | 20,"tensor(1.1038, device='cuda:0')",0.0,tensor(61.9949),"tensor(0.7838, device='cuda:0')",tensor(80.7900)
23 | 21,"tensor(1.0884, device='cuda:0')",0.0,tensor(62.6618),"tensor(0.7176, device='cuda:0')",tensor(79.9900)
24 | 22,"tensor(1.0818, device='cuda:0')",0.0,tensor(63.2573),"tensor(0.7445, device='cuda:0')",tensor(80.0500)
25 | 23,"tensor(1.0898, device='cuda:0')",0.0,tensor(62.4544),"tensor(0.7628, device='cuda:0')",tensor(77.1100)
26 | 24,"tensor(1.0846, device='cuda:0')",0.0,tensor(62.7730),"tensor(0.6846, device='cuda:0')",tensor(79.1600)
27 | 25,"tensor(1.0942, device='cuda:0')",0.0,tensor(62.2764),"tensor(0.7224, device='cuda:0')",tensor(82.3300)
28 | 26,"tensor(1.0587, device='cuda:0')",0.0,tensor(63.9383),"tensor(0.8021, device='cuda:0')",tensor(79.6800)
29 | 27,"tensor(1.0782, device='cuda:0')",0.0,tensor(62.7933),"tensor(0.6434, device='cuda:0')",tensor(81.9000)
30 | 28,"tensor(1.0600, device='cuda:0')",0.0,tensor(63.8398),"tensor(0.7467, device='cuda:0')",tensor(81.3600)
31 | 29,"tensor(1.0518, device='cuda:0')",0.0,tensor(64.3532),"tensor(0.6855, device='cuda:0')",tensor(81.1100)
32 | 


--------------------------------------------------------------------------------
/results/decay=1e-4/log_EfficientNet_batchboost_3.csv:
--------------------------------------------------------------------------------
 1 | epoch,train loss,reg loss,train acc,test loss,test acc
 2 | 0,"tensor(1.8809, device='cuda:0')",0.0,tensor(34.8699),"tensor(1.3911, device='cuda:0')",tensor(52.6000)
 3 | 1,"tensor(1.5065, device='cuda:0')",0.0,tensor(48.1545),"tensor(1.3242, device='cuda:0')",tensor(57.3100)
 4 | 2,"tensor(1.3962, device='cuda:0')",0.0,tensor(52.7664),"tensor(1.1488, device='cuda:0')",tensor(62.4500)
 5 | 3,"tensor(1.3444, device='cuda:0')",0.0,tensor(55.5993),"tensor(1.0894, device='cuda:0')",tensor(66.3500)
 6 | 4,"tensor(1.2820, device='cuda:0')",0.0,tensor(57.0650),"tensor(1.2615, device='cuda:0')",tensor(57.5000)
 7 | 5,"tensor(1.2676, device='cuda:0')",0.0,tensor(57.9336),"tensor(0.9373, device='cuda:0')",tensor(71.1500)
 8 | 6,"tensor(1.2373, device='cuda:0')",0.0,tensor(58.6707),"tensor(1.1060, device='cuda:0')",tensor(68.0700)
 9 | 7,"tensor(1.1939, device='cuda:0')",0.0,tensor(60.6067),"tensor(1.0972, device='cuda:0')",tensor(67.8900)
10 | 8,"tensor(1.1939, device='cuda:0')",0.0,tensor(60.5827),"tensor(0.9282, device='cuda:0')",tensor(73.8600)
11 | 9,"tensor(1.1518, device='cuda:0')",0.0,tensor(62.1907),"tensor(0.9273, device='cuda:0')",tensor(72.4700)
12 | 10,"tensor(1.1649, device='cuda:0')",0.0,tensor(61.7164),"tensor(0.9758, device='cuda:0')",tensor(69.7300)
13 | 11,"tensor(1.1431, device='cuda:0')",0.0,tensor(62.9341),"tensor(0.9097, device='cuda:0')",tensor(74.4200)
14 | 12,"tensor(1.1476, device='cuda:0')",0.0,tensor(62.7877),"tensor(0.9629, device='cuda:0')",tensor(73.8700)
15 | 13,"tensor(1.1407, device='cuda:0')",0.0,tensor(63.4019),"tensor(0.8294, device='cuda:0')",tensor(75.0800)
16 | 14,"tensor(1.1065, device='cuda:0')",0.0,tensor(64.3079),"tensor(1.1083, device='cuda:0')",tensor(65.9900)
17 | 15,"tensor(1.1262, device='cuda:0')",0.0,tensor(63.1819),"tensor(0.9628, device='cuda:0')",tensor(69.4500)
18 | 16,"tensor(1.1226, device='cuda:0')",0.0,tensor(64.0045),"tensor(0.8510, device='cuda:0')",tensor(77.3200)
19 | 17,"tensor(1.0906, device='cuda:0')",0.0,tensor(64.7494),"tensor(0.9986, device='cuda:0')",tensor(74.8900)
20 | 18,"tensor(1.1165, device='cuda:0')",0.0,tensor(63.7127),"tensor(0.8170, device='cuda:0')",tensor(76.5200)
21 | 19,"tensor(1.0888, device='cuda:0')",0.0,tensor(65.2943),"tensor(0.7733, device='cuda:0')",tensor(78.0600)
22 | 20,"tensor(1.0663, device='cuda:0')",0.0,tensor(65.6215),"tensor(0.9420, device='cuda:0')",tensor(75.6900)
23 | 21,"tensor(1.0590, device='cuda:0')",0.0,tensor(66.2072),"tensor(0.9275, device='cuda:0')",tensor(74.3500)
24 | 22,"tensor(1.0551, device='cuda:0')",0.0,tensor(66.5175),"tensor(0.7751, device='cuda:0')",tensor(76.9400)
25 | 23,"tensor(1.0443, device='cuda:0')",0.0,tensor(66.5285),"tensor(0.8900, device='cuda:0')",tensor(74.5400)
26 | 24,"tensor(1.0459, device='cuda:0')",0.0,tensor(67.2152),"tensor(0.8153, device='cuda:0')",tensor(76.8400)
27 | 25,"tensor(1.0383, device='cuda:0')",0.0,tensor(66.9208),"tensor(0.8578, device='cuda:0')",tensor(78.5000)
28 | 26,"tensor(1.0275, device='cuda:0')",0.0,tensor(66.9116),"tensor(0.8216, device='cuda:0')",tensor(76.6500)
29 | 27,"tensor(1.0292, device='cuda:0')",0.0,tensor(67.0629),"tensor(0.8585, device='cuda:0')",tensor(75.7100)
30 | 28,"tensor(1.0197, device='cuda:0')",0.0,tensor(67.4291),"tensor(0.8267, device='cuda:0')",tensor(77.0400)
31 | 29,"tensor(1.0209, device='cuda:0')",0.0,tensor(67.4399),"tensor(0.8536, device='cuda:0')",tensor(77.8800)
32 | 


--------------------------------------------------------------------------------
/results/decay=1e-4/log_EfficientNet_batchboost_4.csv:
--------------------------------------------------------------------------------
 1 | epoch,train loss,reg loss,train acc,test loss,test acc
 2 | 0,"tensor(1.8195, device='cuda:0')",0.0,tensor(37.0720),"tensor(1.2780, device='cuda:0')",tensor(57.6400)
 3 | 1,"tensor(1.4345, device='cuda:0')",0.0,tensor(50.7976),"tensor(1.5002, device='cuda:0')",tensor(53.0400)
 4 | 2,"tensor(1.3146, device='cuda:0')",0.0,tensor(55.4534),"tensor(1.0494, device='cuda:0')",tensor(69.7100)
 5 | 3,"tensor(1.2690, device='cuda:0')",0.0,tensor(56.8110),"tensor(0.9850, device='cuda:0')",tensor(72.5800)
 6 | 4,"tensor(1.2196, device='cuda:0')",0.0,tensor(58.6434),"tensor(0.9985, device='cuda:0')",tensor(75.2000)
 7 | 5,"tensor(1.2061, device='cuda:0')",0.0,tensor(58.8863),"tensor(1.4348, device='cuda:0')",tensor(57.9500)
 8 | 6,"tensor(1.1976, device='cuda:0')",0.0,tensor(59.3146),"tensor(1.0232, device='cuda:0')",tensor(72.1400)
 9 | 7,"tensor(1.1794, device='cuda:0')",0.0,tensor(59.9166),"tensor(0.9893, device='cuda:0')",tensor(73.5000)
10 | 8,"tensor(1.1581, device='cuda:0')",0.0,tensor(60.9214),"tensor(1.0609, device='cuda:0')",tensor(73.5500)
11 | 9,"tensor(1.1470, device='cuda:0')",0.0,tensor(61.0844),"tensor(0.8182, device='cuda:0')",tensor(77.2100)
12 | 10,"tensor(1.1350, device='cuda:0')",0.0,tensor(61.7524),"tensor(0.9231, device='cuda:0')",tensor(76.4500)
13 | 11,"tensor(1.1393, device='cuda:0')",0.0,tensor(61.7179),"tensor(0.8716, device='cuda:0')",tensor(75.6600)
14 | 12,"tensor(1.1489, device='cuda:0')",0.0,tensor(60.7491),"tensor(0.9923, device='cuda:0')",tensor(73.5100)
15 | 13,"tensor(1.1502, device='cuda:0')",0.0,tensor(61.0766),"tensor(1.0965, device='cuda:0')",tensor(66.0500)
16 | 14,"tensor(1.1292, device='cuda:0')",0.0,tensor(61.5851),"tensor(0.9852, device='cuda:0')",tensor(76.9900)
17 | 15,"tensor(1.1075, device='cuda:0')",0.0,tensor(62.6161),"tensor(0.7420, device='cuda:0')",tensor(79.5800)
18 | 16,"tensor(1.1258, device='cuda:0')",0.0,tensor(62.1972),"tensor(1.0127, device='cuda:0')",tensor(74.4300)
19 | 17,"tensor(1.1076, device='cuda:0')",0.0,tensor(62.2725),"tensor(0.7605, device='cuda:0')",tensor(81.1600)
20 | 18,"tensor(1.0996, device='cuda:0')",0.0,tensor(62.6641),"tensor(0.7235, device='cuda:0')",tensor(81.5700)
21 | 19,"tensor(1.1162, device='cuda:0')",0.0,tensor(62.1934),"tensor(0.7670, device='cuda:0')",tensor(78.8400)
22 | 20,"tensor(1.0989, device='cuda:0')",0.0,tensor(62.4432),"tensor(0.9550, device='cuda:0')",tensor(76.0800)
23 | 21,"tensor(1.1023, device='cuda:0')",0.0,tensor(62.6439),"tensor(0.6734, device='cuda:0')",tensor(81.0800)
24 | 22,"tensor(1.1070, device='cuda:0')",0.0,tensor(61.7068),"tensor(0.7493, device='cuda:0')",tensor(80.0400)
25 | 23,"tensor(1.0937, device='cuda:0')",0.0,tensor(62.6318),"tensor(0.7509, device='cuda:0')",tensor(78.7200)
26 | 24,"tensor(1.0963, device='cuda:0')",0.0,tensor(63.1412),"tensor(0.7149, device='cuda:0')",tensor(78.7300)
27 | 25,"tensor(1.0731, device='cuda:0')",0.0,tensor(63.3816),"tensor(0.7929, device='cuda:0')",tensor(78.1600)
28 | 26,"tensor(1.0687, device='cuda:0')",0.0,tensor(63.5898),"tensor(0.7151, device='cuda:0')",tensor(80.9300)
29 | 27,"tensor(1.0815, device='cuda:0')",0.0,tensor(63.2100),"tensor(0.6483, device='cuda:0')",tensor(81.8100)
30 | 28,"tensor(1.0759, device='cuda:0')",0.0,tensor(63.6988),"tensor(0.7087, device='cuda:0')",tensor(81.4700)
31 | 29,"tensor(1.0640, device='cuda:0')",0.0,tensor(64.1290),"tensor(0.7718, device='cuda:0')",tensor(80.0900)
32 | 


--------------------------------------------------------------------------------
/results/decay=1e-4/log_EfficientNet_mixup_1.csv:
--------------------------------------------------------------------------------
 1 | epoch,train loss,reg loss,train acc,test loss,test acc
 2 | 0,"tensor(2.2877, device='cuda:0')",0.0,tensor(21.4552),"tensor(1.8986, device='cuda:0')",tensor(30.5100)
 3 | 1,"tensor(1.9524, device='cuda:0')",0.0,tensor(30.9192),"tensor(1.7501, device='cuda:0')",tensor(38.8000)
 4 | 2,"tensor(1.8855, device='cuda:0')",0.0,tensor(34.1426),"tensor(1.8834, device='cuda:0')",tensor(33.3300)
 5 | 3,"tensor(1.8253, device='cuda:0')",0.0,tensor(37.0241),"tensor(1.6134, device='cuda:0')",tensor(45.4500)
 6 | 4,"tensor(1.9348, device='cuda:0')",0.0,tensor(31.7827),"tensor(1.9646, device='cuda:0')",tensor(27.4900)
 7 | 5,"tensor(1.8398, device='cuda:0')",0.0,tensor(36.3115),"tensor(1.5600, device='cuda:0')",tensor(49.8100)
 8 | 6,"tensor(1.7886, device='cuda:0')",0.0,tensor(38.7104),"tensor(1.6014, device='cuda:0')",tensor(44.6800)
 9 | 7,"tensor(1.7828, device='cuda:0')",0.0,tensor(39.3375),"tensor(1.5151, device='cuda:0')",tensor(49.4600)
10 | 8,"tensor(1.7540, device='cuda:0')",0.0,tensor(40.5212),"tensor(1.6683, device='cuda:0')",tensor(44.1300)
11 | 9,"tensor(1.7623, device='cuda:0')",0.0,tensor(39.9576),"tensor(1.6402, device='cuda:0')",tensor(43.8600)
12 | 10,"tensor(1.7885, device='cuda:0')",0.0,tensor(38.9384),"tensor(2.4766, device='cuda:0')",tensor(15.3800)
13 | 11,"tensor(1.8959, device='cuda:0')",0.0,tensor(33.9427),"tensor(1.6782, device='cuda:0')",tensor(41.1700)
14 | 12,"tensor(1.8126, device='cuda:0')",0.0,tensor(37.8754),"tensor(1.5495, device='cuda:0')",tensor(46.3900)
15 | 13,"tensor(1.7411, device='cuda:0')",0.0,tensor(41.1904),"tensor(1.5361, device='cuda:0')",tensor(48.6700)
16 | 14,"tensor(1.7836, device='cuda:0')",0.0,tensor(39.3387),"tensor(1.5355, device='cuda:0')",tensor(50.3900)
17 | 15,"tensor(1.7489, device='cuda:0')",0.0,tensor(40.8408),"tensor(1.5699, device='cuda:0')",tensor(45.6300)
18 | 16,"tensor(1.7543, device='cuda:0')",0.0,tensor(40.5420),"tensor(2.2198, device='cuda:0')",tensor(43.3000)
19 | 17,"tensor(1.7222, device='cuda:0')",0.0,tensor(42.4087),"tensor(1.4898, device='cuda:0')",tensor(51.3100)
20 | 18,"tensor(1.8517, device='cuda:0')",0.0,tensor(36.3109),"tensor(1.5565, device='cuda:0')",tensor(46.4700)
21 | 19,"tensor(1.8607, device='cuda:0')",0.0,tensor(35.7716),"tensor(1.7921, device='cuda:0')",tensor(37.4400)
22 | 20,"tensor(1.7999, device='cuda:0')",0.0,tensor(38.6506),"tensor(1.4950, device='cuda:0')",tensor(49.7900)
23 | 21,"tensor(1.7446, device='cuda:0')",0.0,tensor(41.0905),"tensor(1.4899, device='cuda:0')",tensor(49.6300)
24 | 22,"tensor(1.6839, device='cuda:0')",0.0,tensor(43.4721),"tensor(1.5370, device='cuda:0')",tensor(48.2100)
25 | 23,"tensor(1.7687, device='cuda:0')",0.0,tensor(40.1902),"tensor(1.4819, device='cuda:0')",tensor(50.0900)
26 | 24,"tensor(1.7312, device='cuda:0')",0.0,tensor(41.9068),"tensor(1.4197, device='cuda:0')",tensor(53.6700)
27 | 25,"tensor(1.7332, device='cuda:0')",0.0,tensor(41.6716),"tensor(1.2940, device='cuda:0')",tensor(58.0100)
28 | 26,"tensor(1.6708, device='cuda:0')",0.0,tensor(44.7846),"tensor(1.3072, device='cuda:0')",tensor(57.9900)
29 | 27,"tensor(1.6245, device='cuda:0')",0.0,tensor(46.8689),"tensor(1.2321, device='cuda:0')",tensor(61.3700)
30 | 28,"tensor(1.6105, device='cuda:0')",0.0,tensor(47.2492),"tensor(1.6507, device='cuda:0')",tensor(42.4700)
31 | 29,"tensor(1.6017, device='cuda:0')",0.0,tensor(47.6939),"tensor(1.2439, device='cuda:0')",tensor(61.4700)
32 | 


--------------------------------------------------------------------------------
/results/decay=1e-4/log_EfficientNet_mixup_2.csv:
--------------------------------------------------------------------------------
 1 | epoch,train loss,reg loss,train acc,test loss,test acc
 2 | 0,"tensor(2.5988, device='cuda:0')",0.0,tensor(16.4150),"tensor(2.1065, device='cuda:0')",tensor(24.0700)
 3 | 1,"tensor(2.1234, device='cuda:0')",0.0,tensor(22.1954),"tensor(1.9791, device='cuda:0')",tensor(26.8100)
 4 | 2,"tensor(2.0674, device='cuda:0')",0.0,tensor(24.8577),"tensor(1.9732, device='cuda:0')",tensor(28.4500)
 5 | 3,"tensor(2.0405, device='cuda:0')",0.0,tensor(26.0568),"tensor(2.0376, device='cuda:0')",tensor(22.8200)
 6 | 4,"tensor(1.9996, device='cuda:0')",0.0,tensor(28.6330),"tensor(1.8742, device='cuda:0')",tensor(32.5600)
 7 | 5,"tensor(1.9977, device='cuda:0')",0.0,tensor(28.8430),"tensor(2.1198, device='cuda:0')",tensor(20.6700)
 8 | 6,"tensor(2.0493, device='cuda:0')",0.0,tensor(25.7799),"tensor(2.2385, device='cuda:0')",tensor(19.0900)
 9 | 7,"tensor(2.0589, device='cuda:0')",0.0,tensor(25.2904),"tensor(3.0567, device='cuda:0')",tensor(13.4100)
10 | 8,"tensor(2.0175, device='cuda:0')",0.0,tensor(27.3789),"tensor(1.8305, device='cuda:0')",tensor(35.8500)
11 | 9,"tensor(2.0298, device='cuda:0')",0.0,tensor(26.2188),"tensor(1.9093, device='cuda:0')",tensor(28.9200)
12 | 10,"tensor(2.0976, device='cuda:0')",0.0,tensor(22.3591),"tensor(1.9503, device='cuda:0')",tensor(28.7000)
13 | 11,"tensor(2.0748, device='cuda:0')",0.0,tensor(24.0372),"tensor(2.0584, device='cuda:0')",tensor(23.9000)
14 | 12,"tensor(2.0370, device='cuda:0')",0.0,tensor(25.6861),"tensor(1.7837, device='cuda:0')",tensor(35.7100)
15 | 13,"tensor(2.0515, device='cuda:0')",0.0,tensor(24.8760),"tensor(1.9619, device='cuda:0')",tensor(31.7300)
16 | 14,"tensor(2.0787, device='cuda:0')",0.0,tensor(23.7376),"tensor(2.0201, device='cuda:0')",tensor(26.3000)
17 | 15,"tensor(2.0419, device='cuda:0')",0.0,tensor(25.5730),"tensor(1.8605, device='cuda:0')",tensor(28.4800)
18 | 16,"tensor(2.0032, device='cuda:0')",0.0,tensor(27.5435),"tensor(2.5678, device='cuda:0')",tensor(20.5900)
19 | 17,"tensor(2.0786, device='cuda:0')",0.0,tensor(23.8160),"tensor(1.9983, device='cuda:0')",tensor(25.7300)
20 | 18,"tensor(2.0086, device='cuda:0')",0.0,tensor(27.0779),"tensor(1.8201, device='cuda:0')",tensor(34.4400)
21 | 19,"tensor(1.9817, device='cuda:0')",0.0,tensor(28.8634),"tensor(1.7823, device='cuda:0')",tensor(36.3100)
22 | 20,"tensor(1.9784, device='cuda:0')",0.0,tensor(28.6708),"tensor(2.1509, device='cuda:0')",tensor(21.9000)
23 | 21,"tensor(1.9974, device='cuda:0')",0.0,tensor(28.1345),"tensor(1.8063, device='cuda:0')",tensor(35.5100)
24 | 22,"tensor(1.9424, device='cuda:0')",0.0,tensor(30.4217),"tensor(1.7292, device='cuda:0')",tensor(39.7000)
25 | 23,"tensor(1.9676, device='cuda:0')",0.0,tensor(29.5585),"tensor(1.7449, device='cuda:0')",tensor(36.6700)
26 | 24,"tensor(1.9825, device='cuda:0')",0.0,tensor(28.9549),"tensor(2.0882, device='cuda:0')",tensor(24.8400)
27 | 25,"tensor(2.0198, device='cuda:0')",0.0,tensor(27.3775),"tensor(1.8630, device='cuda:0')",tensor(33.4500)
28 | 26,"tensor(1.9847, device='cuda:0')",0.0,tensor(28.6669),"tensor(1.8658, device='cuda:0')",tensor(35.1400)
29 | 27,"tensor(1.9420, device='cuda:0')",0.0,tensor(30.4228),"tensor(1.6885, device='cuda:0')",tensor(40.3000)
30 | 28,"tensor(1.9979, device='cuda:0')",0.0,tensor(27.8036),"tensor(1.9690, device='cuda:0')",tensor(26.8700)
31 | 29,"tensor(2.0084, device='cuda:0')",0.0,tensor(26.5612),"tensor(1.9115, device='cuda:0')",tensor(30.1100)
32 | 


--------------------------------------------------------------------------------
/results/decay=1e-4/log_EfficientNet_mixup_3.csv:
--------------------------------------------------------------------------------
 1 | epoch,train loss,reg loss,train acc,test loss,test acc
 2 | 0,"tensor(2.2490, device='cuda:0')",0.0,tensor(21.8189),"tensor(2.0380, device='cuda:0')",tensor(27.1200)
 3 | 1,"tensor(1.9387, device='cuda:0')",0.0,tensor(29.3909),"tensor(1.9303, device='cuda:0')",tensor(31.1300)
 4 | 2,"tensor(1.8109, device='cuda:0')",0.0,tensor(36.0116),"tensor(1.6713, device='cuda:0')",tensor(39.9300)
 5 | 3,"tensor(1.7671, device='cuda:0')",0.0,tensor(38.3374),"tensor(1.6934, device='cuda:0')",tensor(40.4100)
 6 | 4,"tensor(1.7966, device='cuda:0')",0.0,tensor(37.2334),"tensor(1.8982, device='cuda:0')",tensor(31.0800)
 7 | 5,"tensor(1.7525, device='cuda:0')",0.0,tensor(38.9498),"tensor(1.7087, device='cuda:0')",tensor(39.7500)
 8 | 6,"tensor(1.7477, device='cuda:0')",0.0,tensor(39.3013),"tensor(1.9661, device='cuda:0')",tensor(27.9700)
 9 | 7,"tensor(1.9798, device='cuda:0')",0.0,tensor(27.6844),"tensor(2.6448, device='cuda:0')",tensor(15.8900)
10 | 8,"tensor(1.9137, device='cuda:0')",0.0,tensor(30.8651),"tensor(1.8522, device='cuda:0')",tensor(32.0600)
11 | 9,"tensor(1.8364, device='cuda:0')",0.0,tensor(34.7881),"tensor(1.7626, device='cuda:0')",tensor(35.0900)
12 | 10,"tensor(1.7537, device='cuda:0')",0.0,tensor(38.3701),"tensor(1.8617, device='cuda:0')",tensor(32.2400)
13 | 11,"tensor(1.7696, device='cuda:0')",0.0,tensor(38.2061),"tensor(2.3542, device='cuda:0')",tensor(21.3600)
14 | 12,"tensor(1.7851, device='cuda:0')",0.0,tensor(37.1738),"tensor(1.8924, device='cuda:0')",tensor(31.7200)
15 | 13,"tensor(1.8270, device='cuda:0')",0.0,tensor(35.4236),"tensor(2.1773, device='cuda:0')",tensor(21.2600)
16 | 14,"tensor(1.8555, device='cuda:0')",0.0,tensor(33.7255),"tensor(2.0021, device='cuda:0')",tensor(28.4100)
17 | 15,"tensor(1.7799, device='cuda:0')",0.0,tensor(37.4785),"tensor(2.4709, device='cuda:0')",tensor(19.6500)
18 | 16,"tensor(1.8668, device='cuda:0')",0.0,tensor(33.5824),"tensor(1.7088, device='cuda:0')",tensor(39.9200)
19 | 17,"tensor(1.8082, device='cuda:0')",0.0,tensor(36.2666),"tensor(1.9701, device='cuda:0')",tensor(34.8700)
20 | 18,"tensor(1.7453, device='cuda:0')",0.0,tensor(39.2025),"tensor(1.6074, device='cuda:0')",tensor(41.0800)
21 | 19,"tensor(1.7906, device='cuda:0')",0.0,tensor(37.2583),"tensor(1.8329, device='cuda:0')",tensor(33.3000)
22 | 20,"tensor(1.8071, device='cuda:0')",0.0,tensor(36.7620),"tensor(1.7982, device='cuda:0')",tensor(36.2900)
23 | 21,"tensor(1.8230, device='cuda:0')",0.0,tensor(35.6204),"tensor(1.9685, device='cuda:0')",tensor(28.9000)
24 | 22,"tensor(1.7635, device='cuda:0')",0.0,tensor(38.3622),"tensor(1.7303, device='cuda:0')",tensor(38.0600)
25 | 23,"tensor(1.7182, device='cuda:0')",0.0,tensor(40.4985),"tensor(1.8462, device='cuda:0')",tensor(34.1400)
26 | 24,"tensor(1.6942, device='cuda:0')",0.0,tensor(41.4110),"tensor(1.5842, device='cuda:0')",tensor(45.4500)
27 | 25,"tensor(1.6960, device='cuda:0')",0.0,tensor(41.3809),"tensor(1.6349, device='cuda:0')",tensor(42.6300)
28 | 26,"tensor(1.7126, device='cuda:0')",0.0,tensor(40.4428),"tensor(1.9265, device='cuda:0')",tensor(29.4000)
29 | 27,"tensor(1.6109, device='cuda:0')",0.0,tensor(45.3281),"tensor(1.5128, device='cuda:0')",tensor(49.6700)
30 | 28,"tensor(1.5759, device='cuda:0')",0.0,tensor(46.9403),"tensor(1.3375, device='cuda:0')",tensor(53.4300)
31 | 29,"tensor(1.5298, device='cuda:0')",0.0,tensor(48.8387),"tensor(2.6476, device='cuda:0')",tensor(15.5000)
32 | 


--------------------------------------------------------------------------------
/results/decay=1e-4/log_EfficientNet_mixup_4.csv:
--------------------------------------------------------------------------------
 1 | epoch,train loss,reg loss,train acc,test loss,test acc
 2 | 0,"tensor(2.5290, device='cuda:0')",0.0,tensor(13.9961),"tensor(2.4831, device='cuda:0')",tensor(12.3400)
 3 | 1,"tensor(2.1623, device='cuda:0')",0.0,tensor(19.7158),"tensor(2.0269, device='cuda:0')",tensor(25.4800)
 4 | 2,"tensor(2.0835, device='cuda:0')",0.0,tensor(23.1430),"tensor(2.1210, device='cuda:0')",tensor(22.5500)
 5 | 3,"tensor(2.0396, device='cuda:0')",0.0,tensor(25.3663),"tensor(1.9627, device='cuda:0')",tensor(26.9200)
 6 | 4,"tensor(1.9988, device='cuda:0')",0.0,tensor(26.9953),"tensor(1.9151, device='cuda:0')",tensor(31.6100)
 7 | 5,"tensor(1.9487, device='cuda:0')",0.0,tensor(29.6776),"tensor(1.8142, device='cuda:0')",tensor(33.3800)
 8 | 6,"tensor(1.9233, device='cuda:0')",0.0,tensor(30.8187),"tensor(1.7458, device='cuda:0')",tensor(36.6100)
 9 | 7,"tensor(1.8914, device='cuda:0')",0.0,tensor(32.2555),"tensor(2.1836, device='cuda:0')",tensor(23.9400)
10 | 8,"tensor(1.9096, device='cuda:0')",0.0,tensor(31.4838),"tensor(1.9482, device='cuda:0')",tensor(29.3700)
11 | 9,"tensor(1.8889, device='cuda:0')",0.0,tensor(32.2597),"tensor(2.0828, device='cuda:0')",tensor(24.2100)
12 | 10,"tensor(1.8749, device='cuda:0')",0.0,tensor(32.6502),"tensor(1.7281, device='cuda:0')",tensor(39.9700)
13 | 11,"tensor(1.8640, device='cuda:0')",0.0,tensor(33.4449),"tensor(1.7451, device='cuda:0')",tensor(36.1600)
14 | 12,"tensor(1.8373, device='cuda:0')",0.0,tensor(34.6387),"tensor(1.6598, device='cuda:0')",tensor(41.2200)
15 | 13,"tensor(1.8216, device='cuda:0')",0.0,tensor(35.6718),"tensor(1.8085, device='cuda:0')",tensor(35.6300)
16 | 14,"tensor(1.8988, device='cuda:0')",0.0,tensor(32.2875),"tensor(2.0386, device='cuda:0')",tensor(32.9800)
17 | 15,"tensor(1.9598, device='cuda:0')",0.0,tensor(28.9572),"tensor(1.8898, device='cuda:0')",tensor(27.9000)
18 | 16,"tensor(1.9162, device='cuda:0')",0.0,tensor(31.3641),"tensor(2.2362, device='cuda:0')",tensor(21.0600)
19 | 17,"tensor(1.9672, device='cuda:0')",0.0,tensor(28.2926),"tensor(2.4693, device='cuda:0')",tensor(19.2000)
20 | 18,"tensor(1.9095, device='cuda:0')",0.0,tensor(30.7386),"tensor(1.8700, device='cuda:0')",tensor(31.7700)
21 | 19,"tensor(2.0299, device='cuda:0')",0.0,tensor(26.0223),"tensor(2.3860, device='cuda:0')",tensor(17.8100)
22 | 20,"tensor(2.1327, device='cuda:0')",0.0,tensor(21.0535),"tensor(2.1328, device='cuda:0')",tensor(23.5400)
23 | 21,"tensor(2.0320, device='cuda:0')",0.0,tensor(25.6313),"tensor(1.8574, device='cuda:0')",tensor(31.4700)
24 | 22,"tensor(1.9575, device='cuda:0')",0.0,tensor(28.9095),"tensor(1.9248, device='cuda:0')",tensor(30.7500)
25 | 23,"tensor(1.9607, device='cuda:0')",0.0,tensor(29.1441),"tensor(1.8371, device='cuda:0')",tensor(33.9000)
26 | 24,"tensor(1.9145, device='cuda:0')",0.0,tensor(30.7228),"tensor(3.1385, device='cuda:0')",tensor(13.3300)
27 | 25,"tensor(1.9261, device='cuda:0')",0.0,tensor(30.4129),"tensor(1.8249, device='cuda:0')",tensor(30.9200)
28 | 26,"tensor(1.9393, device='cuda:0')",0.0,tensor(30.0298),"tensor(1.9120, device='cuda:0')",tensor(31.6800)
29 | 27,"tensor(1.9288, device='cuda:0')",0.0,tensor(30.3117),"tensor(2.2512, device='cuda:0')",tensor(17.3600)
30 | 28,"tensor(1.9740, device='cuda:0')",0.0,tensor(26.8164),"tensor(1.7792, device='cuda:0')",tensor(33.3700)
31 | 29,"tensor(1.9213, device='cuda:0')",0.0,tensor(30.0489),"tensor(1.7236, device='cuda:0')",tensor(37.6600)
32 | 


--------------------------------------------------------------------------------
/results/decay=1e-4/loss-test-with-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-4/loss-test-with-augment-.pdf


--------------------------------------------------------------------------------
/results/decay=1e-4/loss-test-without-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-4/loss-test-without-augment-.pdf


--------------------------------------------------------------------------------
/results/decay=1e-4/test-accuracy-with-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-4/test-accuracy-with-augment-.pdf


--------------------------------------------------------------------------------
/results/decay=1e-4/test-accuracy-without-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-4/test-accuracy-without-augment-.pdf


--------------------------------------------------------------------------------
/results/decay=1e-4/train-accuracy-with-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-4/train-accuracy-with-augment-.pdf


--------------------------------------------------------------------------------
/results/decay=1e-4/train-accuracy-without-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-4/train-accuracy-without-augment-.pdf


--------------------------------------------------------------------------------
/results/decay=1e-5/log_EfficientNet_baseline_13.csv:
--------------------------------------------------------------------------------
 1 | epoch,train loss,reg loss,train acc,test loss,test acc
 2 | 0,"tensor(1.1174, device='cuda:0')",0.0,tensor(62.1440),"tensor(1.0998, device='cuda:0')",tensor(65.9500)
 3 | 1,"tensor(0.6976, device='cuda:0')",0.0,tensor(76.6220),"tensor(0.7366, device='cuda:0')",tensor(76.8200)
 4 | 2,"tensor(0.5567, device='cuda:0')",0.0,tensor(81.5380),"tensor(0.6900, device='cuda:0')",tensor(77.7300)
 5 | 3,"tensor(0.4774, device='cuda:0')",0.0,tensor(84.0140),"tensor(0.6509, device='cuda:0')",tensor(79.5800)
 6 | 4,"tensor(0.4144, device='cuda:0')",0.0,tensor(86.1160),"tensor(0.7071, device='cuda:0')",tensor(79.2800)
 7 | 5,"tensor(0.3718, device='cuda:0')",0.0,tensor(87.4440),"tensor(0.8875, device='cuda:0')",tensor(75.5800)
 8 | 6,"tensor(0.3258, device='cuda:0')",0.0,tensor(88.8200),"tensor(0.7190, device='cuda:0')",tensor(79.7200)
 9 | 7,"tensor(0.2987, device='cuda:0')",0.0,tensor(89.7940),"tensor(0.6827, device='cuda:0')",tensor(80.2900)
10 | 8,"tensor(0.2675, device='cuda:0')",0.0,tensor(90.8120),"tensor(0.6776, device='cuda:0')",tensor(80.4700)
11 | 9,"tensor(0.2377, device='cuda:0')",0.0,tensor(91.9300),"tensor(0.6298, device='cuda:0')",tensor(82.1400)
12 | 10,"tensor(0.2226, device='cuda:0')",0.0,tensor(92.3820),"tensor(0.6568, device='cuda:0')",tensor(81.3000)
13 | 11,"tensor(0.1994, device='cuda:0')",0.0,tensor(93.2340),"tensor(0.6691, device='cuda:0')",tensor(81.8800)
14 | 12,"tensor(0.1838, device='cuda:0')",0.0,tensor(93.7300),"tensor(0.7464, device='cuda:0')",tensor(80.8300)
15 | 13,"tensor(0.1758, device='cuda:0')",0.0,tensor(94.0700),"tensor(0.7183, device='cuda:0')",tensor(81.5600)
16 | 14,"tensor(0.1605, device='cuda:0')",0.0,tensor(94.5300),"tensor(0.7699, device='cuda:0')",tensor(81.1300)
17 | 15,"tensor(0.1528, device='cuda:0')",0.0,tensor(94.8600),"tensor(0.6828, device='cuda:0')",tensor(82.4600)
18 | 16,"tensor(0.1449, device='cuda:0')",0.0,tensor(95.0100),"tensor(0.7295, device='cuda:0')",tensor(82.0800)
19 | 17,"tensor(0.1357, device='cuda:0')",0.0,tensor(95.3840),"tensor(0.6751, device='cuda:0')",tensor(82.7300)
20 | 18,"tensor(0.1281, device='cuda:0')",0.0,tensor(95.6720),"tensor(0.7107, device='cuda:0')",tensor(82.9000)
21 | 19,"tensor(0.1224, device='cuda:0')",0.0,tensor(95.7900),"tensor(0.7698, device='cuda:0')",tensor(81.5900)
22 | 20,"tensor(0.1124, device='cuda:0')",0.0,tensor(96.1800),"tensor(0.8016, device='cuda:0')",tensor(80.9400)
23 | 21,"tensor(0.1125, device='cuda:0')",0.0,tensor(96.2200),"tensor(0.7759, device='cuda:0')",tensor(81.8900)
24 | 22,"tensor(0.1068, device='cuda:0')",0.0,tensor(96.3560),"tensor(0.7618, device='cuda:0')",tensor(82.1800)
25 | 23,"tensor(0.1021, device='cuda:0')",0.0,tensor(96.5300),"tensor(0.8303, device='cuda:0')",tensor(81.1600)
26 | 24,"tensor(0.0936, device='cuda:0')",0.0,tensor(96.8120),"tensor(0.8282, device='cuda:0')",tensor(81.7000)
27 | 25,"tensor(0.0946, device='cuda:0')",0.0,tensor(96.7540),"tensor(0.8808, device='cuda:0')",tensor(81.1200)
28 | 26,"tensor(0.0903, device='cuda:0')",0.0,tensor(96.9500),"tensor(0.8434, device='cuda:0')",tensor(82.0400)
29 | 27,"tensor(0.0870, device='cuda:0')",0.0,tensor(97.0700),"tensor(0.8061, device='cuda:0')",tensor(82.3100)
30 | 28,"tensor(0.0831, device='cuda:0')",0.0,tensor(97.1400),"tensor(0.7824, device='cuda:0')",tensor(82.5900)
31 | 29,"tensor(0.0821, device='cuda:0')",0.0,tensor(97.2800),"tensor(0.7807, device='cuda:0')",tensor(82.9600)
32 | 


--------------------------------------------------------------------------------
/results/decay=1e-5/log_EfficientNet_baseline_24.csv:
--------------------------------------------------------------------------------
 1 | epoch,train loss,reg loss,train acc,test loss,test acc
 2 | 0,"tensor(1.2443, device='cuda:0')",0.0,tensor(57.3480),"tensor(1.0896, device='cuda:0')",tensor(69.4200)
 3 | 1,"tensor(0.8371, device='cuda:0')",0.0,tensor(72.3440),"tensor(0.8776, device='cuda:0')",tensor(72.6300)
 4 | 2,"tensor(0.7215, device='cuda:0')",0.0,tensor(75.8100),"tensor(0.7997, device='cuda:0')",tensor(74.9500)
 5 | 3,"tensor(0.6623, device='cuda:0')",0.0,tensor(77.9160),"tensor(0.7626, device='cuda:0')",tensor(74.9000)
 6 | 4,"tensor(0.6108, device='cuda:0')",0.0,tensor(79.4600),"tensor(0.6899, device='cuda:0')",tensor(78.3100)
 7 | 5,"tensor(0.5772, device='cuda:0')",0.0,tensor(80.5560),"tensor(0.6956, device='cuda:0')",tensor(77.9600)
 8 | 6,"tensor(0.5470, device='cuda:0')",0.0,tensor(81.6640),"tensor(0.5464, device='cuda:0')",tensor(82.1200)
 9 | 7,"tensor(0.5215, device='cuda:0')",0.0,tensor(82.3800),"tensor(0.6220, device='cuda:0')",tensor(79.6600)
10 | 8,"tensor(0.4999, device='cuda:0')",0.0,tensor(83.1800),"tensor(0.5817, device='cuda:0')",tensor(80.9500)
11 | 9,"tensor(0.4810, device='cuda:0')",0.0,tensor(83.7660),"tensor(0.5612, device='cuda:0')",tensor(81.9400)
12 | 10,"tensor(0.4606, device='cuda:0')",0.0,tensor(84.4180),"tensor(0.5870, device='cuda:0')",tensor(80.0800)
13 | 11,"tensor(0.4426, device='cuda:0')",0.0,tensor(85.0140),"tensor(0.6265, device='cuda:0')",tensor(80.0200)
14 | 12,"tensor(0.4267, device='cuda:0')",0.0,tensor(85.5280),"tensor(0.5357, device='cuda:0')",tensor(82.2200)
15 | 13,"tensor(0.4118, device='cuda:0')",0.0,tensor(85.8580),"tensor(0.5398, device='cuda:0')",tensor(82.6000)
16 | 14,"tensor(0.4024, device='cuda:0')",0.0,tensor(86.0660),"tensor(0.5989, device='cuda:0')",tensor(80.7400)
17 | 15,"tensor(0.3909, device='cuda:0')",0.0,tensor(86.6220),"tensor(0.5086, device='cuda:0')",tensor(83.2600)
18 | 16,"tensor(0.3736, device='cuda:0')",0.0,tensor(87.2380),"tensor(0.5118, device='cuda:0')",tensor(83.7200)
19 | 17,"tensor(0.3650, device='cuda:0')",0.0,tensor(87.5220),"tensor(0.5101, device='cuda:0')",tensor(83.6400)
20 | 18,"tensor(0.3528, device='cuda:0')",0.0,tensor(87.8760),"tensor(0.4698, device='cuda:0')",tensor(85.1200)
21 | 19,"tensor(0.3448, device='cuda:0')",0.0,tensor(88.1500),"tensor(0.5130, device='cuda:0')",tensor(83.4900)
22 | 20,"tensor(0.3308, device='cuda:0')",0.0,tensor(88.6460),"tensor(0.5559, device='cuda:0')",tensor(83.1100)
23 | 21,"tensor(0.3232, device='cuda:0')",0.0,tensor(88.9540),"tensor(0.5446, device='cuda:0')",tensor(82.6800)
24 | 22,"tensor(0.3126, device='cuda:0')",0.0,tensor(89.3020),"tensor(0.4592, device='cuda:0')",tensor(85.2100)
25 | 23,"tensor(0.3053, device='cuda:0')",0.0,tensor(89.3980),"tensor(0.5744, device='cuda:0')",tensor(82.6400)
26 | 24,"tensor(0.2983, device='cuda:0')",0.0,tensor(89.7100),"tensor(0.4908, device='cuda:0')",tensor(84.1000)
27 | 25,"tensor(0.2849, device='cuda:0')",0.0,tensor(90.1560),"tensor(0.5206, device='cuda:0')",tensor(83.9400)
28 | 26,"tensor(0.2834, device='cuda:0')",0.0,tensor(90.0800),"tensor(0.4873, device='cuda:0')",tensor(85.2400)
29 | 27,"tensor(0.2689, device='cuda:0')",0.0,tensor(90.5860),"tensor(0.4795, device='cuda:0')",tensor(85.2500)
30 | 28,"tensor(0.2666, device='cuda:0')",0.0,tensor(90.7340),"tensor(0.5007, device='cuda:0')",tensor(85.1100)
31 | 29,"tensor(0.2569, device='cuda:0')",0.0,tensor(91.1420),"tensor(0.5082, device='cuda:0')",tensor(84.2000)
32 | 


--------------------------------------------------------------------------------
/results/decay=1e-5/log_EfficientNet_batchboost_1.csv:
--------------------------------------------------------------------------------
 1 | epoch,train loss,reg loss,train acc,test loss,test acc
 2 | 0,"tensor(1.5489, device='cuda:0')",0.0,tensor(46.8700),"tensor(1.0325, device='cuda:0')",tensor(70.1200)
 3 | 1,"tensor(1.2682, device='cuda:0')",0.0,tensor(57.2556),"tensor(0.7428, device='cuda:0')",tensor(75.8100)
 4 | 2,"tensor(1.1674, device='cuda:0')",0.0,tensor(60.8273),"tensor(0.6898, device='cuda:0')",tensor(78.9600)
 5 | 3,"tensor(1.1057, device='cuda:0')",0.0,tensor(63.5734),"tensor(0.6742, device='cuda:0')",tensor(78.3000)
 6 | 4,"tensor(1.0625, device='cuda:0')",0.0,tensor(65.0731),"tensor(0.6306, device='cuda:0')",tensor(80.7000)
 7 | 5,"tensor(1.0479, device='cuda:0')",0.0,tensor(65.2568),"tensor(0.6129, device='cuda:0')",tensor(81.7700)
 8 | 6,"tensor(1.0239, device='cuda:0')",0.0,tensor(65.9444),"tensor(0.6028, device='cuda:0')",tensor(81.2800)
 9 | 7,"tensor(0.9993, device='cuda:0')",0.0,tensor(66.7977),"tensor(0.6886, device='cuda:0')",tensor(79.3700)
10 | 8,"tensor(0.9763, device='cuda:0')",0.0,tensor(67.9500),"tensor(0.6598, device='cuda:0')",tensor(81.3100)
11 | 9,"tensor(0.9645, device='cuda:0')",0.0,tensor(68.0379),"tensor(0.7042, device='cuda:0')",tensor(79.8300)
12 | 10,"tensor(0.9515, device='cuda:0')",0.0,tensor(68.5777),"tensor(0.6745, device='cuda:0')",tensor(81.1900)
13 | 11,"tensor(0.9238, device='cuda:0')",0.0,tensor(69.6884),"tensor(0.8189, device='cuda:0')",tensor(78.4200)
14 | 12,"tensor(0.9414, device='cuda:0')",0.0,tensor(68.5351),"tensor(0.6701, device='cuda:0')",tensor(82.0300)
15 | 13,"tensor(0.9389, device='cuda:0')",0.0,tensor(68.8745),"tensor(0.5832, device='cuda:0')",tensor(83.6000)
16 | 14,"tensor(0.9155, device='cuda:0')",0.0,tensor(69.9621),"tensor(0.6897, device='cuda:0')",tensor(81.4700)
17 | 15,"tensor(0.9061, device='cuda:0')",0.0,tensor(70.3822),"tensor(0.6672, device='cuda:0')",tensor(82.3000)
18 | 16,"tensor(0.9020, device='cuda:0')",0.0,tensor(70.4629),"tensor(0.6488, device='cuda:0')",tensor(82.9000)
19 | 17,"tensor(0.8953, device='cuda:0')",0.0,tensor(70.4594),"tensor(0.6244, device='cuda:0')",tensor(82.6800)
20 | 18,"tensor(0.8896, device='cuda:0')",0.0,tensor(70.6783),"tensor(0.6720, device='cuda:0')",tensor(82.1500)
21 | 19,"tensor(0.8901, device='cuda:0')",0.0,tensor(70.6816),"tensor(0.6343, device='cuda:0')",tensor(83.1400)
22 | 20,"tensor(0.8715, device='cuda:0')",0.0,tensor(71.9698),"tensor(0.6502, device='cuda:0')",tensor(83.8300)
23 | 21,"tensor(0.8699, device='cuda:0')",0.0,tensor(71.6310),"tensor(0.6583, device='cuda:0')",tensor(82.7900)
24 | 22,"tensor(0.8638, device='cuda:0')",0.0,tensor(71.6807),"tensor(0.6244, device='cuda:0')",tensor(83.9400)
25 | 23,"tensor(0.8714, device='cuda:0')",0.0,tensor(71.4544),"tensor(0.7419, device='cuda:0')",tensor(82.3300)
26 | 24,"tensor(0.8673, device='cuda:0')",0.0,tensor(71.8646),"tensor(0.7027, device='cuda:0')",tensor(82.9300)
27 | 25,"tensor(0.8617, device='cuda:0')",0.0,tensor(71.8118),"tensor(0.6836, device='cuda:0')",tensor(82.6300)
28 | 26,"tensor(0.8752, device='cuda:0')",0.0,tensor(71.3329),"tensor(0.6698, device='cuda:0')",tensor(83.4400)
29 | 27,"tensor(0.8760, device='cuda:0')",0.0,tensor(71.2223),"tensor(0.6813, device='cuda:0')",tensor(83.0900)
30 | 28,"tensor(0.8627, device='cuda:0')",0.0,tensor(72.0735),"tensor(0.6109, device='cuda:0')",tensor(83.7200)
31 | 29,"tensor(0.8564, device='cuda:0')",0.0,tensor(71.9139),"tensor(0.6498, device='cuda:0')",tensor(83.1300)
32 | 


--------------------------------------------------------------------------------
/results/decay=1e-5/log_EfficientNet_batchboost_2.csv:
--------------------------------------------------------------------------------
 1 | epoch,train loss,reg loss,train acc,test loss,test acc
 2 | 0,"tensor(1.6238, device='cuda:0')",0.0,tensor(43.7457),"tensor(1.0339, device='cuda:0')",tensor(68.1200)
 3 | 1,"tensor(1.3345, device='cuda:0')",0.0,tensor(54.3368),"tensor(0.9366, device='cuda:0')",tensor(70.6600)
 4 | 2,"tensor(1.2593, device='cuda:0')",0.0,tensor(56.6156),"tensor(0.6572, device='cuda:0')",tensor(78.8400)
 5 | 3,"tensor(1.2066, device='cuda:0')",0.0,tensor(58.5272),"tensor(0.6226, device='cuda:0')",tensor(79.5300)
 6 | 4,"tensor(1.1670, device='cuda:0')",0.0,tensor(60.1677),"tensor(0.6359, device='cuda:0')",tensor(79.3300)
 7 | 5,"tensor(1.1355, device='cuda:0')",0.0,tensor(61.4253),"tensor(0.6235, device='cuda:0')",tensor(81.0400)
 8 | 6,"tensor(1.1357, device='cuda:0')",0.0,tensor(60.6668),"tensor(0.5750, device='cuda:0')",tensor(82.2400)
 9 | 7,"tensor(1.1101, device='cuda:0')",0.0,tensor(61.9495),"tensor(0.5551, device='cuda:0')",tensor(82.6200)
10 | 8,"tensor(1.0871, device='cuda:0')",0.0,tensor(62.7897),"tensor(0.5791, device='cuda:0')",tensor(81.8900)
11 | 9,"tensor(1.0692, device='cuda:0')",0.0,tensor(63.2579),"tensor(0.6077, device='cuda:0')",tensor(81.1100)
12 | 10,"tensor(1.0782, device='cuda:0')",0.0,tensor(62.9700),"tensor(0.5328, device='cuda:0')",tensor(83.2500)
13 | 11,"tensor(1.0514, device='cuda:0')",0.0,tensor(64.1702),"tensor(0.5064, device='cuda:0')",tensor(83.9100)
14 | 12,"tensor(1.0488, device='cuda:0')",0.0,tensor(63.8546),"tensor(0.5531, device='cuda:0')",tensor(83.2200)
15 | 13,"tensor(1.0729, device='cuda:0')",0.0,tensor(62.7652),"tensor(0.5439, device='cuda:0')",tensor(82.8400)
16 | 14,"tensor(1.0357, device='cuda:0')",0.0,tensor(64.4369),"tensor(0.5264, device='cuda:0')",tensor(83.7300)
17 | 15,"tensor(1.0342, device='cuda:0')",0.0,tensor(64.5673),"tensor(0.5263, device='cuda:0')",tensor(83.7500)
18 | 16,"tensor(1.0292, device='cuda:0')",0.0,tensor(64.6994),"tensor(0.5558, device='cuda:0')",tensor(83.1900)
19 | 17,"tensor(1.0299, device='cuda:0')",0.0,tensor(64.6908),"tensor(0.5957, device='cuda:0')",tensor(82.6200)
20 | 18,"tensor(1.0225, device='cuda:0')",0.0,tensor(64.9278),"tensor(0.6015, device='cuda:0')",tensor(82.0700)
21 | 19,"tensor(1.0114, device='cuda:0')",0.0,tensor(65.4504),"tensor(0.5689, device='cuda:0')",tensor(82.4400)
22 | 20,"tensor(1.0239, device='cuda:0')",0.0,tensor(64.8615),"tensor(0.5855, device='cuda:0')",tensor(82.6400)
23 | 21,"tensor(1.0217, device='cuda:0')",0.0,tensor(64.8218),"tensor(0.5125, device='cuda:0')",tensor(84.2600)
24 | 22,"tensor(0.9914, device='cuda:0')",0.0,tensor(66.0869),"tensor(0.4711, device='cuda:0')",tensor(85.5200)
25 | 23,"tensor(1.0080, device='cuda:0')",0.0,tensor(65.1572),"tensor(0.5901, device='cuda:0')",tensor(82.2200)
26 | 24,"tensor(0.9865, device='cuda:0')",0.0,tensor(66.1673),"tensor(0.6213, device='cuda:0')",tensor(84.8400)
27 | 25,"tensor(0.9787, device='cuda:0')",0.0,tensor(66.4097),"tensor(0.5190, device='cuda:0')",tensor(84.2700)
28 | 26,"tensor(0.9911, device='cuda:0')",0.0,tensor(66.1076),"tensor(0.5294, device='cuda:0')",tensor(83.2000)
29 | 27,"tensor(0.9689, device='cuda:0')",0.0,tensor(67.0958),"tensor(0.5068, device='cuda:0')",tensor(84.8300)
30 | 28,"tensor(0.9989, device='cuda:0')",0.0,tensor(65.1615),"tensor(0.5430, device='cuda:0')",tensor(83.9000)
31 | 29,"tensor(0.9745, device='cuda:0')",0.0,tensor(66.6269),"tensor(0.5172, device='cuda:0')",tensor(84.7300)
32 | 


--------------------------------------------------------------------------------
/results/decay=1e-5/log_EfficientNet_batchboost_3.csv:
--------------------------------------------------------------------------------
 1 | epoch,train loss,reg loss,train acc,test loss,test acc
 2 | 0,"tensor(1.5592, device='cuda:0')",0.0,tensor(47.6954),"tensor(1.0072, device='cuda:0')",tensor(69.8600)
 3 | 1,"tensor(1.2895, device='cuda:0')",0.0,tensor(57.9795),"tensor(1.0657, device='cuda:0')",tensor(66.3500)
 4 | 2,"tensor(1.1896, device='cuda:0')",0.0,tensor(61.5622),"tensor(0.7285, device='cuda:0')",tensor(76.8400)
 5 | 3,"tensor(1.1315, device='cuda:0')",0.0,tensor(63.4467),"tensor(0.7342, device='cuda:0')",tensor(77.6600)
 6 | 4,"tensor(1.0813, device='cuda:0')",0.0,tensor(65.5488),"tensor(0.6676, device='cuda:0')",tensor(80.4200)
 7 | 5,"tensor(1.0439, device='cuda:0')",0.0,tensor(67.3616),"tensor(0.6663, device='cuda:0')",tensor(80.1300)
 8 | 6,"tensor(1.0427, device='cuda:0')",0.0,tensor(67.1914),"tensor(0.6639, device='cuda:0')",tensor(80.0600)
 9 | 7,"tensor(1.0141, device='cuda:0')",0.0,tensor(67.3071),"tensor(0.6258, device='cuda:0')",tensor(82.3600)
10 | 8,"tensor(0.9851, device='cuda:0')",0.0,tensor(68.5185),"tensor(0.6693, device='cuda:0')",tensor(80.8600)
11 | 9,"tensor(0.9848, device='cuda:0')",0.0,tensor(69.1618),"tensor(0.6606, device='cuda:0')",tensor(81.1300)
12 | 10,"tensor(0.9466, device='cuda:0')",0.0,tensor(69.9205),"tensor(0.7316, device='cuda:0')",tensor(80.4600)
13 | 11,"tensor(0.9591, device='cuda:0')",0.0,tensor(69.1466),"tensor(0.6989, device='cuda:0')",tensor(81.5800)
14 | 12,"tensor(0.9271, device='cuda:0')",0.0,tensor(71.0775),"tensor(0.6770, device='cuda:0')",tensor(81.7100)
15 | 13,"tensor(0.9348, device='cuda:0')",0.0,tensor(71.0280),"tensor(0.7440, device='cuda:0')",tensor(79.7700)
16 | 14,"tensor(0.9291, device='cuda:0')",0.0,tensor(70.5768),"tensor(0.6676, device='cuda:0')",tensor(82.1900)
17 | 15,"tensor(0.9004, device='cuda:0')",0.0,tensor(71.5198),"tensor(0.6743, device='cuda:0')",tensor(82.0400)
18 | 16,"tensor(0.9146, device='cuda:0')",0.0,tensor(71.4545),"tensor(0.6879, device='cuda:0')",tensor(81.6000)
19 | 17,"tensor(0.9019, device='cuda:0')",0.0,tensor(71.4323),"tensor(0.7090, device='cuda:0')",tensor(81.7600)
20 | 18,"tensor(0.8996, device='cuda:0')",0.0,tensor(71.2211),"tensor(0.6541, device='cuda:0')",tensor(82.9500)
21 | 19,"tensor(0.8976, device='cuda:0')",0.0,tensor(71.3187),"tensor(0.7200, device='cuda:0')",tensor(82.4200)
22 | 20,"tensor(0.8834, device='cuda:0')",0.0,tensor(72.5813),"tensor(0.6829, device='cuda:0')",tensor(82.7700)
23 | 21,"tensor(0.8742, device='cuda:0')",0.0,tensor(72.3386),"tensor(0.6557, device='cuda:0')",tensor(84.1600)
24 | 22,"tensor(0.8692, device='cuda:0')",0.0,tensor(73.2286),"tensor(0.6941, device='cuda:0')",tensor(82.3000)
25 | 23,"tensor(0.8712, device='cuda:0')",0.0,tensor(72.0759),"tensor(0.7901, device='cuda:0')",tensor(80.2200)
26 | 24,"tensor(0.8599, device='cuda:0')",0.0,tensor(73.2842),"tensor(0.7298, device='cuda:0')",tensor(81.8300)
27 | 25,"tensor(0.8663, device='cuda:0')",0.0,tensor(72.8395),"tensor(0.7081, device='cuda:0')",tensor(81.4800)
28 | 26,"tensor(0.8650, device='cuda:0')",0.0,tensor(72.3468),"tensor(0.7650, device='cuda:0')",tensor(80.4900)
29 | 27,"tensor(0.8632, device='cuda:0')",0.0,tensor(72.6353),"tensor(0.6907, device='cuda:0')",tensor(82.5700)
30 | 28,"tensor(0.8460, device='cuda:0')",0.0,tensor(73.5868),"tensor(0.6731, device='cuda:0')",tensor(83.1100)
31 | 29,"tensor(0.8814, device='cuda:0')",0.0,tensor(72.5378),"tensor(0.7574, device='cuda:0')",tensor(82.3700)
32 | 


--------------------------------------------------------------------------------
/results/decay=1e-5/log_EfficientNet_batchboost_4.csv:
--------------------------------------------------------------------------------
 1 | epoch,train loss,reg loss,train acc,test loss,test acc
 2 | 0,"tensor(1.6433, device='cuda:0')",0.0,tensor(43.3735),"tensor(1.2930, device='cuda:0')",tensor(64.3100)
 3 | 1,"tensor(1.3486, device='cuda:0')",0.0,tensor(54.5961),"tensor(0.8134, device='cuda:0')",tensor(74.3800)
 4 | 2,"tensor(1.2502, device='cuda:0')",0.0,tensor(57.8238),"tensor(0.8502, device='cuda:0')",tensor(73.4500)
 5 | 3,"tensor(1.2160, device='cuda:0')",0.0,tensor(59.2377),"tensor(0.7695, device='cuda:0')",tensor(75.6300)
 6 | 4,"tensor(1.1928, device='cuda:0')",0.0,tensor(59.9123),"tensor(0.6782, device='cuda:0')",tensor(78.4400)
 7 | 5,"tensor(1.1497, device='cuda:0')",0.0,tensor(61.3737),"tensor(0.7590, device='cuda:0')",tensor(76.6500)
 8 | 6,"tensor(1.1221, device='cuda:0')",0.0,tensor(62.2608),"tensor(0.5987, device='cuda:0')",tensor(80.9100)
 9 | 7,"tensor(1.0959, device='cuda:0')",0.0,tensor(63.0191),"tensor(0.5961, device='cuda:0')",tensor(82.1600)
10 | 8,"tensor(1.0877, device='cuda:0')",0.0,tensor(62.8546),"tensor(0.6956, device='cuda:0')",tensor(81.2600)
11 | 9,"tensor(1.0709, device='cuda:0')",0.0,tensor(63.7188),"tensor(0.6949, device='cuda:0')",tensor(79.6000)
12 | 10,"tensor(1.0627, device='cuda:0')",0.0,tensor(64.1292),"tensor(0.6470, device='cuda:0')",tensor(81.0400)
13 | 11,"tensor(1.0550, device='cuda:0')",0.0,tensor(64.2661),"tensor(0.5805, device='cuda:0')",tensor(82.6400)
14 | 12,"tensor(1.0621, device='cuda:0')",0.0,tensor(63.8284),"tensor(0.6214, device='cuda:0')",tensor(82.5600)
15 | 13,"tensor(1.0382, device='cuda:0')",0.0,tensor(64.9477),"tensor(0.6153, device='cuda:0')",tensor(82.1300)
16 | 14,"tensor(1.0352, device='cuda:0')",0.0,tensor(64.9522),"tensor(0.7908, device='cuda:0')",tensor(81.0800)
17 | 15,"tensor(1.0338, device='cuda:0')",0.0,tensor(64.9852),"tensor(0.5995, device='cuda:0')",tensor(83.2200)
18 | 16,"tensor(1.0070, device='cuda:0')",0.0,tensor(65.8552),"tensor(0.7508, device='cuda:0')",tensor(83.)
19 | 17,"tensor(1.0091, device='cuda:0')",0.0,tensor(65.8721),"tensor(0.5977, device='cuda:0')",tensor(83.7400)
20 | 18,"tensor(0.9970, device='cuda:0')",0.0,tensor(66.2594),"tensor(0.6023, device='cuda:0')",tensor(84.2600)
21 | 19,"tensor(1.0157, device='cuda:0')",0.0,tensor(65.6327),"tensor(0.6868, device='cuda:0')",tensor(83.2400)
22 | 20,"tensor(1.0010, device='cuda:0')",0.0,tensor(65.8058),"tensor(0.5992, device='cuda:0')",tensor(84.3700)
23 | 21,"tensor(0.9925, device='cuda:0')",0.0,tensor(66.2249),"tensor(0.5742, device='cuda:0')",tensor(84.0400)
24 | 22,"tensor(0.9776, device='cuda:0')",0.0,tensor(66.9663),"tensor(0.5625, device='cuda:0')",tensor(85.3400)
25 | 23,"tensor(0.9731, device='cuda:0')",0.0,tensor(66.9961),"tensor(0.6398, device='cuda:0')",tensor(82.9700)
26 | 24,"tensor(0.9685, device='cuda:0')",0.0,tensor(67.0790),"tensor(0.6912, device='cuda:0')",tensor(83.5300)
27 | 25,"tensor(0.9792, device='cuda:0')",0.0,tensor(66.3659),"tensor(0.5675, device='cuda:0')",tensor(84.5900)
28 | 26,"tensor(0.9639, device='cuda:0')",0.0,tensor(66.8584),"tensor(0.6594, device='cuda:0')",tensor(82.6600)
29 | 27,"tensor(0.9792, device='cuda:0')",0.0,tensor(66.7141),"tensor(0.6313, device='cuda:0')",tensor(84.2700)
30 | 28,"tensor(0.9402, device='cuda:0')",0.0,tensor(68.3393),"tensor(0.7023, device='cuda:0')",tensor(84.2200)
31 | 29,"tensor(0.9429, device='cuda:0')",0.0,tensor(68.2936),"tensor(0.5501, device='cuda:0')",tensor(83.8200)
32 | 


--------------------------------------------------------------------------------
/results/decay=1e-5/log_EfficientNet_mixup_1.csv:
--------------------------------------------------------------------------------
 1 | epoch,train loss,reg loss,train acc,test loss,test acc
 2 | 0,"tensor(1.8401, device='cuda:0')",0.0,tensor(38.9253),"tensor(1.2284, device='cuda:0')",tensor(59.7100)
 3 | 1,"tensor(1.5538, device='cuda:0')",0.0,tensor(51.7325),"tensor(0.9910, device='cuda:0')",tensor(67.6500)
 4 | 2,"tensor(1.4499, device='cuda:0')",0.0,tensor(55.7960),"tensor(0.9132, device='cuda:0')",tensor(71.8100)
 5 | 3,"tensor(1.3914, device='cuda:0')",0.0,tensor(57.5044),"tensor(0.9560, device='cuda:0')",tensor(69.8500)
 6 | 4,"tensor(1.3419, device='cuda:0')",0.0,tensor(59.3992),"tensor(0.8385, device='cuda:0')",tensor(75.3200)
 7 | 5,"tensor(1.3383, device='cuda:0')",0.0,tensor(59.3667),"tensor(0.7956, device='cuda:0')",tensor(77.3500)
 8 | 6,"tensor(1.2864, device='cuda:0')",0.0,tensor(61.4852),"tensor(0.7863, device='cuda:0')",tensor(77.5400)
 9 | 7,"tensor(1.2726, device='cuda:0')",0.0,tensor(61.9529),"tensor(0.6963, device='cuda:0')",tensor(80.4200)
10 | 8,"tensor(1.2563, device='cuda:0')",0.0,tensor(62.3388),"tensor(0.7173, device='cuda:0')",tensor(80.2200)
11 | 9,"tensor(1.2053, device='cuda:0')",0.0,tensor(63.9532),"tensor(0.6878, device='cuda:0')",tensor(80.1900)
12 | 10,"tensor(1.1979, device='cuda:0')",0.0,tensor(64.5873),"tensor(0.7827, device='cuda:0')",tensor(78.1600)
13 | 11,"tensor(1.1863, device='cuda:0')",0.0,tensor(64.5691),"tensor(0.6630, device='cuda:0')",tensor(81.8200)
14 | 12,"tensor(1.1745, device='cuda:0')",0.0,tensor(65.2032),"tensor(0.7188, device='cuda:0')",tensor(80.1400)
15 | 13,"tensor(1.1348, device='cuda:0')",0.0,tensor(66.7967),"tensor(0.6287, device='cuda:0')",tensor(82.4100)
16 | 14,"tensor(1.1840, device='cuda:0')",0.0,tensor(64.6255),"tensor(0.6893, device='cuda:0')",tensor(81.7900)
17 | 15,"tensor(1.1713, device='cuda:0')",0.0,tensor(65.3257),"tensor(0.6450, device='cuda:0')",tensor(82.2600)
18 | 16,"tensor(1.1160, device='cuda:0')",0.0,tensor(67.3888),"tensor(0.6603, device='cuda:0')",tensor(81.3100)
19 | 17,"tensor(1.1688, device='cuda:0')",0.0,tensor(65.2723),"tensor(0.6629, device='cuda:0')",tensor(82.1300)
20 | 18,"tensor(1.1515, device='cuda:0')",0.0,tensor(65.9117),"tensor(0.6897, device='cuda:0')",tensor(81.3000)
21 | 19,"tensor(1.1372, device='cuda:0')",0.0,tensor(66.8916),"tensor(0.7103, device='cuda:0')",tensor(81.3300)
22 | 20,"tensor(1.1010, device='cuda:0')",0.0,tensor(67.7973),"tensor(0.7031, device='cuda:0')",tensor(80.9700)
23 | 21,"tensor(1.1538, device='cuda:0')",0.0,tensor(65.6317),"tensor(0.7909, device='cuda:0')",tensor(76.7500)
24 | 22,"tensor(1.1011, device='cuda:0')",0.0,tensor(67.7370),"tensor(0.6757, device='cuda:0')",tensor(81.4900)
25 | 23,"tensor(1.1160, device='cuda:0')",0.0,tensor(67.2205),"tensor(0.6688, device='cuda:0')",tensor(82.1600)
26 | 24,"tensor(1.1190, device='cuda:0')",0.0,tensor(67.3427),"tensor(0.6924, device='cuda:0')",tensor(81.7000)
27 | 25,"tensor(1.1283, device='cuda:0')",0.0,tensor(66.5068),"tensor(0.6460, device='cuda:0')",tensor(82.2600)
28 | 26,"tensor(1.1064, device='cuda:0')",0.0,tensor(66.7692),"tensor(0.6949, device='cuda:0')",tensor(80.3300)
29 | 27,"tensor(1.0782, device='cuda:0')",0.0,tensor(68.4394),"tensor(0.6394, device='cuda:0')",tensor(83.2100)
30 | 28,"tensor(1.0723, device='cuda:0')",0.0,tensor(69.0931),"tensor(0.6562, device='cuda:0')",tensor(82.2100)
31 | 29,"tensor(1.0710, device='cuda:0')",0.0,tensor(68.6733),"tensor(0.6450, device='cuda:0')",tensor(82.4100)
32 | 


--------------------------------------------------------------------------------
/results/decay=1e-5/log_EfficientNet_mixup_2.csv:
--------------------------------------------------------------------------------
 1 | epoch,train loss,reg loss,train acc,test loss,test acc
 2 | 0,"tensor(1.8520, device='cuda:0')",0.0,tensor(38.1330),"tensor(1.1646, device='cuda:0')",tensor(62.2000)
 3 | 1,"tensor(1.5934, device='cuda:0')",0.0,tensor(50.0632),"tensor(1.2364, device='cuda:0')",tensor(61.6900)
 4 | 2,"tensor(1.5097, device='cuda:0')",0.0,tensor(53.3947),"tensor(0.8179, device='cuda:0')",tensor(75.3000)
 5 | 3,"tensor(1.4606, device='cuda:0')",0.0,tensor(55.3491),"tensor(0.8479, device='cuda:0')",tensor(74.7500)
 6 | 4,"tensor(1.4019, device='cuda:0')",0.0,tensor(56.9812),"tensor(0.7395, device='cuda:0')",tensor(77.9400)
 7 | 5,"tensor(1.3898, device='cuda:0')",0.0,tensor(57.3724),"tensor(0.8446, device='cuda:0')",tensor(74.3800)
 8 | 6,"tensor(1.3492, device='cuda:0')",0.0,tensor(58.9751),"tensor(0.8912, device='cuda:0')",tensor(74.4000)
 9 | 7,"tensor(1.3546, device='cuda:0')",0.0,tensor(58.6492),"tensor(0.7657, device='cuda:0')",tensor(77.7300)
10 | 8,"tensor(1.3673, device='cuda:0')",0.0,tensor(58.3711),"tensor(0.7878, device='cuda:0')",tensor(76.7400)
11 | 9,"tensor(1.3872, device='cuda:0')",0.0,tensor(57.2477),"tensor(0.8376, device='cuda:0')",tensor(74.4900)
12 | 10,"tensor(1.3261, device='cuda:0')",0.0,tensor(59.6321),"tensor(0.6846, device='cuda:0')",tensor(79.8800)
13 | 11,"tensor(1.3214, device='cuda:0')",0.0,tensor(59.6932),"tensor(0.6900, device='cuda:0')",tensor(79.9700)
14 | 12,"tensor(1.3452, device='cuda:0')",0.0,tensor(58.7390),"tensor(0.7950, device='cuda:0')",tensor(76.3100)
15 | 13,"tensor(1.2936, device='cuda:0')",0.0,tensor(60.6500),"tensor(0.7583, device='cuda:0')",tensor(78.3400)
16 | 14,"tensor(1.3206, device='cuda:0')",0.0,tensor(59.7003),"tensor(0.7124, device='cuda:0')",tensor(80.5700)
17 | 15,"tensor(1.3420, device='cuda:0')",0.0,tensor(58.6945),"tensor(0.7584, device='cuda:0')",tensor(77.7100)
18 | 16,"tensor(1.3114, device='cuda:0')",0.0,tensor(59.9868),"tensor(0.8013, device='cuda:0')",tensor(75.6700)
19 | 17,"tensor(1.2843, device='cuda:0')",0.0,tensor(60.8332),"tensor(0.6785, device='cuda:0')",tensor(81.8400)
20 | 18,"tensor(1.3101, device='cuda:0')",0.0,tensor(59.7496),"tensor(0.7049, device='cuda:0')",tensor(81.2700)
21 | 19,"tensor(1.3010, device='cuda:0')",0.0,tensor(60.2414),"tensor(0.7181, device='cuda:0')",tensor(80.5100)
22 | 20,"tensor(1.2733, device='cuda:0')",0.0,tensor(61.4875),"tensor(0.6511, device='cuda:0')",tensor(82.2900)
23 | 21,"tensor(1.2929, device='cuda:0')",0.0,tensor(60.6969),"tensor(0.7355, device='cuda:0')",tensor(78.9600)
24 | 22,"tensor(1.2743, device='cuda:0')",0.0,tensor(61.4696),"tensor(0.7093, device='cuda:0')",tensor(81.5200)
25 | 23,"tensor(1.2770, device='cuda:0')",0.0,tensor(61.1682),"tensor(0.7249, device='cuda:0')",tensor(79.7100)
26 | 24,"tensor(1.2967, device='cuda:0')",0.0,tensor(60.5049),"tensor(0.7013, device='cuda:0')",tensor(81.3100)
27 | 25,"tensor(1.2891, device='cuda:0')",0.0,tensor(60.6730),"tensor(0.7856, device='cuda:0')",tensor(78.5100)
28 | 26,"tensor(1.2615, device='cuda:0')",0.0,tensor(61.9244),"tensor(0.7153, device='cuda:0')",tensor(80.0600)
29 | 27,"tensor(1.2511, device='cuda:0')",0.0,tensor(61.9265),"tensor(0.6894, device='cuda:0')",tensor(81.4500)
30 | 28,"tensor(1.2783, device='cuda:0')",0.0,tensor(61.2908),"tensor(0.7402, device='cuda:0')",tensor(79.2100)
31 | 29,"tensor(1.2632, device='cuda:0')",0.0,tensor(61.6038),"tensor(0.7070, device='cuda:0')",tensor(80.5200)
32 | 


--------------------------------------------------------------------------------
/results/decay=1e-5/log_EfficientNet_mixup_3.csv:
--------------------------------------------------------------------------------
 1 | epoch,train loss,reg loss,train acc,test loss,test acc
 2 | 0,"tensor(1.5852, device='cuda:0')",0.0,tensor(49.3322),"tensor(0.9136, device='cuda:0')",tensor(72.1700)
 3 | 1,"tensor(1.2562, device='cuda:0')",0.0,tensor(63.1246),"tensor(0.7735, device='cuda:0')",tensor(75.3300)
 4 | 2,"tensor(1.1676, device='cuda:0')",0.0,tensor(66.3281),"tensor(0.7485, device='cuda:0')",tensor(76.2400)
 5 | 3,"tensor(1.1023, device='cuda:0')",0.0,tensor(68.3585),"tensor(0.8598, device='cuda:0')",tensor(73.6100)
 6 | 4,"tensor(1.0596, device='cuda:0')",0.0,tensor(69.9488),"tensor(0.6650, device='cuda:0')",tensor(79.8500)
 7 | 5,"tensor(1.0365, device='cuda:0')",0.0,tensor(70.5446),"tensor(0.7256, device='cuda:0')",tensor(78.5300)
 8 | 6,"tensor(0.9986, device='cuda:0')",0.0,tensor(71.8226),"tensor(0.6412, device='cuda:0')",tensor(80.4700)
 9 | 7,"tensor(0.9505, device='cuda:0')",0.0,tensor(73.5068),"tensor(0.6535, device='cuda:0')",tensor(80.2000)
10 | 8,"tensor(0.9408, device='cuda:0')",0.0,tensor(73.3418),"tensor(0.6131, device='cuda:0')",tensor(81.1600)
11 | 9,"tensor(0.9354, device='cuda:0')",0.0,tensor(73.9009),"tensor(0.5872, device='cuda:0')",tensor(82.3400)
12 | 10,"tensor(0.9187, device='cuda:0')",0.0,tensor(74.3704),"tensor(0.6324, device='cuda:0')",tensor(80.2100)
13 | 11,"tensor(0.9036, device='cuda:0')",0.0,tensor(74.8706),"tensor(0.6270, device='cuda:0')",tensor(80.6800)
14 | 12,"tensor(0.9332, device='cuda:0')",0.0,tensor(74.1035),"tensor(0.6375, device='cuda:0')",tensor(82.4800)
15 | 13,"tensor(0.8700, device='cuda:0')",0.0,tensor(75.8445),"tensor(0.6200, device='cuda:0')",tensor(81.8900)
16 | 14,"tensor(0.8629, device='cuda:0')",0.0,tensor(76.1854),"tensor(0.6110, device='cuda:0')",tensor(82.0400)
17 | 15,"tensor(0.8222, device='cuda:0')",0.0,tensor(77.5554),"tensor(0.5758, device='cuda:0')",tensor(82.9600)
18 | 16,"tensor(0.8364, device='cuda:0')",0.0,tensor(77.2764),"tensor(0.6242, device='cuda:0')",tensor(81.4800)
19 | 17,"tensor(0.8431, device='cuda:0')",0.0,tensor(77.0262),"tensor(0.6181, device='cuda:0')",tensor(81.6900)
20 | 18,"tensor(0.8176, device='cuda:0')",0.0,tensor(77.7661),"tensor(0.6617, device='cuda:0')",tensor(79.7000)
21 | 19,"tensor(0.7869, device='cuda:0')",0.0,tensor(78.5704),"tensor(0.5973, device='cuda:0')",tensor(83.1200)
22 | 20,"tensor(0.8909, device='cuda:0')",0.0,tensor(75.0715),"tensor(0.6385, device='cuda:0')",tensor(81.9800)
23 | 21,"tensor(0.8334, device='cuda:0')",0.0,tensor(77.2383),"tensor(0.5729, device='cuda:0')",tensor(83.1800)
24 | 22,"tensor(0.8060, device='cuda:0')",0.0,tensor(78.0292),"tensor(0.5911, device='cuda:0')",tensor(83.3900)
25 | 23,"tensor(0.8087, device='cuda:0')",0.0,tensor(78.1002),"tensor(0.6059, device='cuda:0')",tensor(82.7800)
26 | 24,"tensor(0.7846, device='cuda:0')",0.0,tensor(78.7637),"tensor(0.5727, device='cuda:0')",tensor(83.8000)
27 | 25,"tensor(0.7515, device='cuda:0')",0.0,tensor(79.7625),"tensor(0.5971, device='cuda:0')",tensor(83.2400)
28 | 26,"tensor(0.7517, device='cuda:0')",0.0,tensor(79.7170),"tensor(0.5974, device='cuda:0')",tensor(82.0700)
29 | 27,"tensor(0.7605, device='cuda:0')",0.0,tensor(79.5556),"tensor(0.5745, device='cuda:0')",tensor(83.7200)
30 | 28,"tensor(0.7874, device='cuda:0')",0.0,tensor(78.7155),"tensor(0.6164, device='cuda:0')",tensor(83.2200)
31 | 29,"tensor(0.7087, device='cuda:0')",0.0,tensor(81.2600),"tensor(0.5618, device='cuda:0')",tensor(83.8500)
32 | 


--------------------------------------------------------------------------------
/results/decay=1e-5/log_EfficientNet_mixup_4.csv:
--------------------------------------------------------------------------------
 1 | epoch,train loss,reg loss,train acc,test loss,test acc
 2 | 0,"tensor(1.7396, device='cuda:0')",0.0,tensor(42.0760),"tensor(1.3003, device='cuda:0')",tensor(59.5700)
 3 | 1,"tensor(1.4385, device='cuda:0')",0.0,tensor(56.0643),"tensor(1.0127, device='cuda:0')",tensor(67.5900)
 4 | 2,"tensor(1.3099, device='cuda:0')",0.0,tensor(60.8499),"tensor(0.8299, device='cuda:0')",tensor(72.1900)
 5 | 3,"tensor(1.2170, device='cuda:0')",0.0,tensor(63.8931),"tensor(0.7399, device='cuda:0')",tensor(77.3000)
 6 | 4,"tensor(1.1810, device='cuda:0')",0.0,tensor(64.8712),"tensor(0.7139, device='cuda:0')",tensor(78.7800)
 7 | 5,"tensor(1.1764, device='cuda:0')",0.0,tensor(65.1266),"tensor(0.6911, device='cuda:0')",tensor(79.9600)
 8 | 6,"tensor(1.1081, device='cuda:0')",0.0,tensor(67.2872),"tensor(0.7118, device='cuda:0')",tensor(77.5100)
 9 | 7,"tensor(1.1507, device='cuda:0')",0.0,tensor(65.9127),"tensor(0.7807, device='cuda:0')",tensor(75.6400)
10 | 8,"tensor(1.1161, device='cuda:0')",0.0,tensor(66.8880),"tensor(0.6945, device='cuda:0')",tensor(78.5200)
11 | 9,"tensor(1.0964, device='cuda:0')",0.0,tensor(67.6024),"tensor(0.6623, device='cuda:0')",tensor(80.1500)
12 | 10,"tensor(1.1009, device='cuda:0')",0.0,tensor(67.1994),"tensor(0.6494, device='cuda:0')",tensor(80.6700)
13 | 11,"tensor(1.1054, device='cuda:0')",0.0,tensor(67.0964),"tensor(0.7119, device='cuda:0')",tensor(78.2400)
14 | 12,"tensor(1.0592, device='cuda:0')",0.0,tensor(68.9080),"tensor(0.6337, device='cuda:0')",tensor(80.5600)
15 | 13,"tensor(1.0479, device='cuda:0')",0.0,tensor(69.0810),"tensor(0.6223, device='cuda:0')",tensor(81.3000)
16 | 14,"tensor(1.0443, device='cuda:0')",0.0,tensor(68.9712),"tensor(0.6930, device='cuda:0')",tensor(78.9500)
17 | 15,"tensor(1.0767, device='cuda:0')",0.0,tensor(68.1365),"tensor(0.6540, device='cuda:0')",tensor(80.5200)
18 | 16,"tensor(1.0659, device='cuda:0')",0.0,tensor(68.1313),"tensor(0.6185, device='cuda:0')",tensor(81.2800)
19 | 17,"tensor(1.0341, device='cuda:0')",0.0,tensor(69.6075),"tensor(0.6211, device='cuda:0')",tensor(80.8700)
20 | 18,"tensor(1.0334, device='cuda:0')",0.0,tensor(69.8162),"tensor(0.5778, device='cuda:0')",tensor(82.3300)
21 | 19,"tensor(1.0137, device='cuda:0')",0.0,tensor(69.9117),"tensor(0.6499, device='cuda:0')",tensor(81.0500)
22 | 20,"tensor(1.0188, device='cuda:0')",0.0,tensor(69.6947),"tensor(0.5940, device='cuda:0')",tensor(82.2600)
23 | 21,"tensor(0.9738, device='cuda:0')",0.0,tensor(71.7237),"tensor(0.6454, device='cuda:0')",tensor(81.3700)
24 | 22,"tensor(1.0088, device='cuda:0')",0.0,tensor(70.4746),"tensor(0.6181, device='cuda:0')",tensor(81.5900)
25 | 23,"tensor(1.0384, device='cuda:0')",0.0,tensor(69.2434),"tensor(0.6176, device='cuda:0')",tensor(82.4300)
26 | 24,"tensor(0.9576, device='cuda:0')",0.0,tensor(71.7219),"tensor(0.5998, device='cuda:0')",tensor(82.6100)
27 | 25,"tensor(0.9787, device='cuda:0')",0.0,tensor(71.2283),"tensor(0.5762, device='cuda:0')",tensor(82.7300)
28 | 26,"tensor(0.9771, device='cuda:0')",0.0,tensor(71.1901),"tensor(0.5692, device='cuda:0')",tensor(83.8800)
29 | 27,"tensor(0.9588, device='cuda:0')",0.0,tensor(71.6358),"tensor(0.6037, device='cuda:0')",tensor(82.)
30 | 28,"tensor(0.9769, device='cuda:0')",0.0,tensor(71.1616),"tensor(0.6231, device='cuda:0')",tensor(81.2100)
31 | 29,"tensor(0.9710, device='cuda:0')",0.0,tensor(71.7734),"tensor(0.5814, device='cuda:0')",tensor(83.0300)
32 | 


--------------------------------------------------------------------------------
/results/decay=1e-5/loss-test-with-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-5/loss-test-with-augment-.pdf


--------------------------------------------------------------------------------
/results/decay=1e-5/loss-test-without-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-5/loss-test-without-augment-.pdf


--------------------------------------------------------------------------------
/results/decay=1e-5/test-accuracy-with-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-5/test-accuracy-with-augment-.pdf


--------------------------------------------------------------------------------
/results/decay=1e-5/test-accuracy-without-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-5/test-accuracy-without-augment-.pdf


--------------------------------------------------------------------------------
/results/decay=1e-5/train-accuracy-with-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-5/train-accuracy-with-augment-.pdf


--------------------------------------------------------------------------------
/results/decay=1e-5/train-accuracy-without-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-5/train-accuracy-without-augment-.pdf


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3 -u
  2 | # Copyright (c) 2017-present, Facebook, Inc. (mixup)
  3 | # Copyright (c) 2020-present, Maciej A. Czyzewski (batchboost)
  4 | # All rights reserved.
  5 | #
  6 | # This source code is licensed under the license found in the LICENSE file in
  7 | # the root directory of this source tree.
  8 | from __future__ import print_function
  9 | 
 10 | import argparse
 11 | import csv
 12 | import os
 13 | 
 14 | import numpy as np
 15 | import torch
 16 | from torch.autograd import Variable
 17 | import torch.backends.cudnn as cudnn
 18 | import torch.nn as nn
 19 | import torch.optim as optim
 20 | import torchvision.transforms as transforms
 21 | import torchvision.datasets as datasets
 22 | 
 23 | """
 24 | !pip install efficientnet_pytorch
 25 | from google.colab import drive
 26 | drive.mount('/content/gdrive', force_remount=True)
 27 | !cp gdrive/My\ Drive/<path>/utils.py .
 28 | !cp gdrive/My\ Drive/<path>/train.py .
 29 | !nvcc --version
 30 | !pip3 install --upgrade --force-reinstall torch torchvision
 31 | import torch
 32 | print('Torch', torch.__version__, 'CUDA', torch.version.cuda)
 33 | print('Device:', torch.device('cuda:0'), torch.cuda.is_available())
 34 | # --- START ---
 35 | !python3 train.py --decay=1e-5 --no-augment --seed=1 \
 36 | 	--name=batchboost --model=efficientnet-b0 --epoch=30
 37 | """
 38 | 
 39 | # FIXME: rewrite it clean
 40 | import debug
 41 | from utils import progress_bar
 42 | 
 43 | try:
 44 |     import models
 45 | 
 46 |     COLAB = False
 47 | except:
 48 |     # FIXME: detect environment?
 49 |     print("=== GOOGLE COLAB ENVIRONMENT ===")
 50 |     COLAB = True
 51 | 
 52 | parser = argparse.ArgumentParser(description="PyTorch CIFAR10 Training")
 53 | parser.add_argument("--lr", default=0.1, type=float, help="learning rate")
 54 | parser.add_argument(
 55 |     "--resume", "-r", action="store_true", help="resume from checkpoint"
 56 | )
 57 | parser.add_argument(
 58 |     "--model",
 59 |     default="ResNet18",
 60 |     type=str,
 61 |     help="model type (default: ResNet18)",
 62 | )
 63 | parser.add_argument("--name", default="0", type=str, help="name of run")
 64 | parser.add_argument("--seed", default=0, type=int, help="random seed")
 65 | parser.add_argument("--batch-size", default=128, type=int, help="batch size")
 66 | parser.add_argument(
 67 |     "--epoch", default=200, type=int, help="total epochs to run"
 68 | )
 69 | parser.add_argument(
 70 |     "--no-augment",
 71 |     dest="augment",
 72 |     action="store_false",
 73 |     help="use standard augmentation (default: True)",
 74 | )
 75 | parser.add_argument(
 76 |     "--optimizer",
 77 |     type=str,
 78 |     default="lamb",
 79 |     choices=["lamb", "adam"],
 80 |     help="which optimizer to use",
 81 | )
 82 | parser.add_argument("--decay", default=1e-5, type=float, help="weight decay")
 83 | parser.add_argument(
 84 |     "--alpha",
 85 |     default=1.0,
 86 |     type=float,
 87 |     help="mixup interpolation coefficient (default: 1)",
 88 | )
 89 | parser.add_argument(
 90 |     "--debug",
 91 |     "-d",
 92 |     action="store_true",
 93 |     help="debug on FashionMNIST and ResNet100k network",
 94 | )
 95 | args = parser.parse_args()
 96 | 
 97 | use_cuda = torch.cuda.is_available()
 98 | 
 99 | best_acc = 0  # best test accuracy
100 | start_epoch = 0  # start from epoch 0 or last checkpoint epoch
101 | 
102 | if args.seed != 0:
103 |     torch.manual_seed(args.seed)
104 | 
105 | # Data
106 | print("==> Preparing data..")
107 | num_classes = 10
108 | 
109 | if args.debug:
110 |     trainloader, testloader = debug.FashionMNIST_loaders(args)
111 | else:
112 |     if args.augment:
113 |         transform_train = transforms.Compose(
114 |             [
115 |                 transforms.RandomCrop(32, padding=4),
116 |                 transforms.RandomHorizontalFlip(),
117 |                 transforms.ToTensor(),
118 |                 transforms.Normalize(
119 |                     (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
120 |                 ),
121 |             ]
122 |         )
123 |     else:
124 |         transform_train = transforms.Compose(
125 |             [
126 |                 transforms.ToTensor(),
127 |                 transforms.Normalize(
128 |                     (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
129 |                 ),
130 |             ]
131 |         )
132 | 
133 |     transform_test = transforms.Compose(
134 |         [
135 |             transforms.ToTensor(),
136 |             transforms.Normalize(
137 |                 (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
138 |             ),
139 |         ]
140 |     )
141 | 
142 |     trainset = datasets.CIFAR10(
143 |         root="./data", train=True, download=True, transform=transform_train
144 |     )
145 |     trainloader = torch.utils.data.DataLoader(
146 |         trainset, batch_size=args.batch_size, shuffle=True, num_workers=8
147 |     )
148 | 
149 |     testset = datasets.CIFAR10(
150 |         root="./data", train=False, download=True, transform=transform_test
151 |     )
152 |     testloader = torch.utils.data.DataLoader(
153 |         testset, batch_size=100, shuffle=False, num_workers=8
154 |     )
155 | 
156 | # Model
157 | if args.resume:
158 |     # Load checkpoint.
159 |     print("==> Resuming from checkpoint..")
160 |     assert os.path.isdir("checkpoint"), "Error: no checkpoint directory found!"
161 |     checkpoint = torch.load(
162 |         "./checkpoint/ckpt.t7" + args.name + "_" + str(args.seed)
163 |     )
164 |     net = checkpoint["net"]
165 |     best_acc = checkpoint["acc"]
166 |     start_epoch = checkpoint["epoch"] + 1
167 |     rng_state = checkpoint["rng_state"]
168 |     torch.set_rng_state(rng_state)
169 | else:
170 |     print("==> Building model..")
171 |     if args.model.startswith("efficientnet"):
172 |         from efficientnet_pytorch import EfficientNet
173 | 
174 |         net = EfficientNet.from_pretrained(args.model, num_classes=num_classes)
175 |     elif args.debug:
176 |         net = debug.ResNet100k()
177 |     else:
178 |         net = models.__dict__[args.model]()
179 | 
180 | if not os.path.isdir("results"):
181 |     os.mkdir("results")
182 | logname = (
183 |     "results/log_"
184 |     + net.__class__.__name__
185 |     + "_"
186 |     + args.name
187 |     + "_"
188 |     + str(args.seed)
189 |     + ".csv"
190 | )
191 | 
192 | if use_cuda:
193 |     net.cuda()
194 |     net = torch.nn.DataParallel(net)
195 |     print("device_count =", torch.cuda.device_count())
196 |     cudnn.benchmark = True
197 |     print("Using CUDA...")
198 | 
199 | criterion = nn.CrossEntropyLoss()
200 | 
201 | optimizer = optim.SGD(
202 |     net.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.decay
203 | )
204 | 
205 | ### MIXUP ######################################################################
206 | 
207 | 
208 | def mixup_data(x, y, index_left, index_right, alpha=1.0, use_cuda=True):
209 |     """Returns mixed inputs, pairs of targets, and lambda"""
210 |     if alpha > 0:
211 |         lam = np.random.beta(alpha, alpha)
212 |     else:
213 |         lam = 1
214 | 
215 |     mixed_x = lam * x[index_left, :] + (1 - lam) * x[index_right, :]
216 |     y_a, y_b = y[index_left], y[index_right]
217 |     return mixed_x, y_a, y_b, lam
218 | 
219 | 
220 | def mixup_criterion(criterion, pred, y_a, y_b, lam):
221 |     return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)
222 | 
223 | 
224 | def train_mixup(epoch):
225 |     print("MIXUP")
226 |     print("\nEpoch: %d" % epoch)
227 |     net.train()
228 |     train_loss = 0
229 |     reg_loss = 0
230 |     correct = 0
231 |     total = 0
232 |     for batch_idx, (inputs, targets) in enumerate(trainloader):
233 |         if use_cuda:
234 |             inputs, targets = inputs.cuda(), targets.cuda()
235 | 
236 |         batch_size = inputs.shape[0]
237 |         if use_cuda:
238 |             index = torch.randperm(batch_size).cuda()
239 |         else:
240 |             index = torch.randperm(batch_size)
241 | 
242 |         inputs, targets_a, targets_b, lam = mixup_data(
243 |             inputs, targets, range(batch_size), index, args.alpha, use_cuda
244 |         )
245 |         inputs, targets_a, targets_b = map(
246 |             Variable, (inputs, targets_a, targets_b)
247 |         )
248 |         outputs = net(inputs)
249 |         loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
250 |         train_loss += loss.data
251 |         _, predicted = torch.max(outputs.data, 1)
252 |         total += inputs.size(0)
253 |         correct += (
254 |             lam * predicted.eq(targets_a.data).cpu().sum().float()
255 |             + (1 - lam) * predicted.eq(targets_b.data).cpu().sum().float()
256 |         )
257 | 
258 |         optimizer.zero_grad()
259 |         loss.backward()
260 |         torch.nn.utils.clip_grad_norm_(net.parameters(), 1)
261 |         optimizer.step()
262 | 
263 |         progress_bar(
264 |             batch_idx,
265 |             len(trainloader),
266 |             "Loss: %.3f | Reg: %.5f | Acc: %.3f%% (%d/%d)"
267 |             % (
268 |                 train_loss / (batch_idx + 1),
269 |                 reg_loss / (batch_idx + 1),
270 |                 100.0 * correct / total,
271 |                 correct,
272 |                 total,
273 |             ),
274 |         )
275 |     return (
276 |         train_loss / batch_idx,
277 |         reg_loss / batch_idx,
278 |         100.0 * correct / total,
279 |     )
280 | 
281 | 
282 | ### BATCHBOOST #################################################################
283 | 
284 | from batchboost import BatchBoost
285 | 
286 | 
287 | def fn_error(outputs, targets):
288 |     logsoftmax = nn.LogSoftmax(dim=1)
289 |     return torch.sum(-outputs * logsoftmax(targets), dim=1)
290 | 
291 | 
292 | def fn_linearize(x, num_classes=10):
293 |     _x = torch.zeros(x.size(0), num_classes)
294 |     _x[range(x.size(0)), x] = 1
295 |     return _x
296 | 
297 | 
298 | def fn_unlinearize(x):
299 |     _, _x = torch.max(x, 1)
300 |     return _x
301 | 
302 | 
303 | BatchBoost.fn_error = fn_error
304 | BatchBoost.fn_linearize = fn_linearize
305 | BatchBoost.fn_unlinearize = fn_unlinearize
306 | 
307 | # FIXME: add arguments to command-line
308 | BB = BatchBoost(
309 |     alpha=args.alpha,
310 |     window_normal=0,
311 |     window_boost=10,
312 |     factor=1 / 2,
313 |     use_cuda=use_cuda,
314 | )
315 | 
316 | 
317 | def train_batchboost(epoch):
318 |     global inputs, targets_a, targets_b, lam
319 |     print("BATCHBOOST")
320 |     print("\nEpoch: %d" % epoch)
321 |     net.train()
322 |     train_loss = 0
323 |     reg_loss = 0
324 |     correct = 0
325 |     total = 0
326 | 
327 |     BB.clear()
328 |     for batch_idx, (new_inputs, new_targets) in enumerate(trainloader):
329 |         if use_cuda:
330 |             new_inputs, new_targets = new_inputs.cuda(), new_targets.cuda()
331 | 
332 |         # -----> (a) feed with new information
333 |         if not BB.feed(new_inputs, new_targets):
334 |             continue
335 | 
336 |         # -----> (b) apply concat: BB.inputs, BB.targets
337 |         outputs = net(BB.inputs)
338 | 
339 |         # -----> (c) calculate: loss (mixup like style \lambda)
340 |         loss = BB.criterion(criterion, outputs)
341 | 
342 |         train_loss += loss.data
343 |         _, predicted = torch.max(outputs.data, 1)
344 |         total += BB.inputs.size(0)  # -----> remember to use concat
345 | 
346 |         # -----> (d) calculate: accuracy
347 |         correct += BB.correct(predicted)
348 | 
349 |         # -----> (e) pairing & mixing
350 |         BB.mixing(criterion, outputs)
351 | 
352 |         optimizer.zero_grad()
353 |         loss.backward()
354 |         torch.nn.utils.clip_grad_norm_(net.parameters(), 1)
355 |         optimizer.step()
356 | 
357 |         progress_bar(
358 |             batch_idx,
359 |             len(trainloader),
360 |             "Loss: %.3f | Reg: %.5f | Acc: %.3f%% (%d/%d)"
361 |             % (
362 |                 train_loss / (batch_idx + 1),
363 |                 reg_loss / (batch_idx + 1),
364 |                 100.0 * correct / total,
365 |                 correct,
366 |                 total,
367 |             ),
368 |         )
369 |     if total == 0:
370 |         total = len(batch_size)
371 |     return (
372 |         train_loss / batch_idx,
373 |         reg_loss / batch_idx,
374 |         100.0 * correct / (total + 0.000001),
375 |     )
376 | 
377 | 
378 | ### BASELINE ###################################################################
379 | 
380 | 
381 | def train_baseline(epoch):
382 |     print("BASELINE")
383 |     print("\nEpoch: %d" % epoch)
384 |     net.train()
385 |     train_loss = 0
386 |     reg_loss = 0
387 |     correct = 0
388 |     total = 0
389 |     for batch_idx, (inputs, targets) in enumerate(trainloader):
390 |         if use_cuda:
391 |             inputs, targets = inputs.cuda(), targets.cuda()
392 | 
393 |         outputs = net(inputs)
394 |         loss = criterion(outputs, targets)
395 |         train_loss += loss.data
396 |         _, predicted = torch.max(outputs.data, 1)
397 |         total += inputs.size(0)
398 |         correct += predicted.eq(targets.data).cpu().sum().float()
399 | 
400 |         optimizer.zero_grad()
401 |         loss.backward()
402 |         torch.nn.utils.clip_grad_norm_(net.parameters(), 1)
403 |         optimizer.step()
404 | 
405 |         progress_bar(
406 |             batch_idx,
407 |             len(trainloader),
408 |             "Loss: %.3f | Reg: %.5f | Acc: %.3f%% (%d/%d)"
409 |             % (
410 |                 train_loss / (batch_idx + 1),
411 |                 reg_loss / (batch_idx + 1),
412 |                 100.0 * correct / total,
413 |                 correct,
414 |                 total,
415 |             ),
416 |         )
417 |     return (
418 |         train_loss / batch_idx,
419 |         reg_loss / batch_idx,
420 |         100.0 * correct / total,
421 |     )
422 | 
423 | 
424 | def test(epoch):
425 |     global best_acc
426 |     net.eval()
427 |     test_loss = 0
428 |     correct = 0
429 |     total = 0
430 |     with torch.no_grad():
431 |         for batch_idx, (inputs, targets) in enumerate(testloader):
432 |             if use_cuda:
433 |                 inputs, targets = inputs.cuda(), targets.cuda()
434 |             inputs, targets = Variable(inputs), Variable(targets)
435 |             outputs = net(inputs)
436 |             loss = criterion(outputs, targets)
437 | 
438 |             test_loss += loss.data
439 |             _, predicted = torch.max(outputs.data, 1)
440 |             total += targets.size(0)
441 |             correct += predicted.eq(targets.data).cpu().sum()
442 | 
443 |             progress_bar(
444 |                 batch_idx,
445 |                 len(testloader),
446 |                 "Loss: %.3f | Acc: %.3f%% (%d/%d)"
447 |                 % (
448 |                     test_loss / (batch_idx + 1),
449 |                     100.0 * correct / total,
450 |                     correct,
451 |                     total,
452 |                 ),
453 |             )
454 |     acc = 100.0 * correct / total
455 |     if epoch == start_epoch + args.epoch - 1 or acc > best_acc:
456 |         checkpoint(acc, epoch)
457 |     if acc > best_acc:
458 |         best_acc = acc
459 |     return (test_loss / batch_idx, 100.0 * correct / total)
460 | 
461 | 
462 | def checkpoint(acc, epoch):
463 |     # Save checkpoint.
464 |     print("Saving..")
465 |     state = {
466 |         "net": net,
467 |         "acc": acc,
468 |         "epoch": epoch,
469 |         "rng_state": torch.get_rng_state(),
470 |     }
471 |     if not os.path.isdir("checkpoint"):
472 |         os.mkdir("checkpoint")
473 |     torch.save(state, "./checkpoint/ckpt.t7" + args.name + "_" + str(args.seed))
474 | 
475 | 
476 | def adjust_learning_rate(optimizer, epoch):
477 |     """decrease the learning rate at 100 and 150 epoch"""
478 |     lr = args.lr
479 |     if epoch >= 100:
480 |         lr /= 10
481 |     if epoch >= 150:
482 |         lr /= 10
483 |     for param_group in optimizer.param_groups:
484 |         param_group["lr"] = lr
485 | 
486 | 
487 | if not os.path.exists(logname):
488 |     with open(logname, "w") as logfile:
489 |         logwriter = csv.writer(logfile, delimiter=",")
490 |         logwriter.writerow(
491 |             [
492 |                 "epoch",
493 |                 "train loss",
494 |                 "reg loss",
495 |                 "train acc",
496 |                 "test loss",
497 |                 "test acc",
498 |             ]
499 |         )
500 | 
501 | if args.name == "batchboost":
502 |     train_func = train_batchboost
503 | elif args.name == "mixup":
504 |     train_func = train_mixup
505 | else:
506 |     train_func = train_baseline
507 | 
508 | for epoch in range(start_epoch, args.epoch):
509 |     train_loss, reg_loss, train_acc = train_func(epoch)
510 |     test_loss, test_acc = test(epoch)
511 |     adjust_learning_rate(optimizer, epoch)
512 |     with open(logname, "a") as logfile:
513 |         logwriter = csv.writer(logfile, delimiter=",")
514 |         logwriter.writerow(
515 |             [epoch, train_loss, reg_loss, train_acc, test_loss, test_acc]
516 |         )
517 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | """Some helper functions for PyTorch, including:
 2 |     - progress_bar: progress bar mimic xlua.progress.
 3 | """
 4 | import os
 5 | import sys
 6 | import time
 7 | import math
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | 
12 | try:
13 |     _, term_width = os.popen("stty size", "r").read().split()
14 | except:
15 |     term_with = "80"
16 | term_width = int(term_width)
17 | 
18 | TOTAL_BAR_LENGTH = 86.0
19 | last_time = time.time()
20 | begin_time = last_time
21 | 
22 | 
23 | def progress_bar(current, total, msg=None):
24 |     global last_time, begin_time
25 |     if current == 0:
26 |         begin_time = time.time()  # Reset for new bar.
27 | 
28 |     cur_len = int(TOTAL_BAR_LENGTH * current / total)
29 |     rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1
30 | 
31 |     sys.stdout.write(" [")
32 |     for i in range(cur_len):
33 |         sys.stdout.write("=")
34 |     sys.stdout.write(">")
35 |     for i in range(rest_len):
36 |         sys.stdout.write(".")
37 |     sys.stdout.write("]")
38 | 
39 |     cur_time = time.time()
40 |     step_time = cur_time - last_time
41 |     last_time = cur_time
42 |     tot_time = cur_time - begin_time
43 | 
44 |     L = []
45 |     L.append("  Step: %s" % format_time(step_time))
46 |     L.append(" | Tot: %s" % format_time(tot_time))
47 |     if msg:
48 |         L.append(" | " + msg)
49 | 
50 |     msg = "".join(L)
51 |     sys.stdout.write(msg)
52 |     for i in range(term_width - int(TOTAL_BAR_LENGTH) - len(msg) - 3):
53 |         sys.stdout.write(" ")
54 | 
55 |     # Go back to the center of the bar.
56 |     for i in range(term_width - int(TOTAL_BAR_LENGTH / 2)):
57 |         sys.stdout.write("\b")
58 |     sys.stdout.write(" %d/%d " % (current + 1, total))
59 | 
60 |     if current < total - 1:
61 |         sys.stdout.write("\r")
62 |     else:
63 |         sys.stdout.write("\n")
64 |     sys.stdout.flush()
65 | 
66 | 
67 | def format_time(seconds):
68 |     days = int(seconds / 3600 / 24)
69 |     seconds = seconds - days * 3600 * 24
70 |     hours = int(seconds / 3600)
71 |     seconds = seconds - hours * 3600
72 |     minutes = int(seconds / 60)
73 |     seconds = seconds - minutes * 60
74 |     secondsf = int(seconds)
75 |     seconds = seconds - secondsf
76 |     millis = int(seconds * 1000)
77 | 
78 |     f = ""
79 |     i = 1
80 |     if days > 0:
81 |         f += str(days) + "D"
82 |         i += 1
83 |     if hours > 0 and i <= 2:
84 |         f += str(hours) + "h"
85 |         i += 1
86 |     if minutes > 0 and i <= 2:
87 |         f += str(minutes) + "m"
88 |         i += 1
89 |     if secondsf > 0 and i <= 2:
90 |         f += str(secondsf) + "s"
91 |         i += 1
92 |     if millis > 0 and i <= 2:
93 |         f += str(millis) + "ms"
94 |         i += 1
95 |     if f == "":
96 |         f = "0ms"
97 |     return f
98 | 


--------------------------------------------------------------------------------