├── .gitignore
├── LICENSE
├── LICENSE-pytorch-cifar
├── README.md
├── batchboost.py
├── debug.py
├── figures
├── batches
│ ├── img_1_new_10.png
│ ├── img_1_new_11.png
│ ├── img_1_new_6.png
│ ├── img_1_new_7.png
│ ├── img_1_new_8.png
│ ├── img_1_new_9.png
│ ├── img_1_old_0.png
│ ├── img_1_old_1.png
│ ├── img_1_old_2.png
│ ├── img_1_old_3.png
│ ├── img_1_old_4.png
│ ├── img_1_old_5.png
│ ├── img_2_new_10.png
│ ├── img_2_new_11.png
│ ├── img_2_new_6.png
│ ├── img_2_new_7.png
│ ├── img_2_new_8.png
│ ├── img_2_new_9.png
│ ├── img_2_old_0.png
│ ├── img_2_old_1.png
│ ├── img_2_old_2.png
│ ├── img_2_old_3.png
│ ├── img_2_old_4.png
│ ├── img_2_old_5.png
│ ├── img_3_new_10.png
│ ├── img_3_new_11.png
│ ├── img_3_new_6.png
│ ├── img_3_new_7.png
│ ├── img_3_new_8.png
│ ├── img_3_new_9.png
│ ├── img_3_old_0.png
│ ├── img_3_old_1.png
│ ├── img_3_old_2.png
│ ├── img_3_old_3.png
│ ├── img_3_old_4.png
│ ├── img_3_old_5.png
│ ├── img_4_new_10.png
│ ├── img_4_new_11.png
│ ├── img_4_new_6.png
│ ├── img_4_new_7.png
│ ├── img_4_new_8.png
│ ├── img_4_new_9.png
│ ├── img_4_old_0.png
│ ├── img_4_old_1.png
│ ├── img_4_old_2.png
│ ├── img_4_old_3.png
│ ├── img_4_old_4.png
│ └── img_4_old_5.png
├── data_1.png
├── data_2.png
├── data_3.png
├── data_4.png
├── data_5.png
├── data_6.png
├── data_7.png
├── figure-1-loss-train-without-augment.pdf
├── figure-1-test-accuracy-without-augment.pdf
├── figure-2-test-accuracy-with-augment.pdf
├── figure-2-train-accuracy-with-augment.pdf
├── figure-abstract.pdf
├── figure-abstract.png
├── figure-abstract.svg
├── figure-feeding.pdf
├── figure-feeding.png
├── figure-feeding.svg
├── figure-multipass.png
├── for-repository-1.png
├── for-repository-2.png
└── pp_logo.jpg
├── models
├── __init__.py
├── alldnet.py
├── densenet.py
├── densenet3.py
├── densenet_efficient_multi_gpu.py
├── googlenet.py
├── lenet.py
├── mobilenet.py
├── resnet.py
├── resnext.py
└── vgg.py
├── paper
├── abstract.txt
├── arxiv-abstract-shadow.png
├── arxiv-abstract.png
├── arxiv.sty
├── batchboost.pdf
├── batchboost.tex
├── build.py
├── figure-1-loss-train-without-augment.pdf
├── figure-1-test-accuracy-without-augment.pdf
├── figure-2-test-accuracy-with-augment.pdf
├── figure-2-train-accuracy-with-augment.pdf
├── figure-abstract.pdf
├── figure-feeding.pdf
├── figure-multipass.png
├── notes_v2.md
├── references.bib
└── texput.log
├── plot.py
├── results
├── decay=1e-4
│ ├── log_EfficientNet_baseline_13.csv
│ ├── log_EfficientNet_baseline_24.csv
│ ├── log_EfficientNet_batchboost_1.csv
│ ├── log_EfficientNet_batchboost_2.csv
│ ├── log_EfficientNet_batchboost_3.csv
│ ├── log_EfficientNet_batchboost_4.csv
│ ├── log_EfficientNet_mixup_1.csv
│ ├── log_EfficientNet_mixup_2.csv
│ ├── log_EfficientNet_mixup_3.csv
│ ├── log_EfficientNet_mixup_4.csv
│ ├── loss-test-with-augment-.pdf
│ ├── loss-test-without-augment-.pdf
│ ├── test-accuracy-with-augment-.pdf
│ ├── test-accuracy-without-augment-.pdf
│ ├── train-accuracy-with-augment-.pdf
│ └── train-accuracy-without-augment-.pdf
└── decay=1e-5
│ ├── log_EfficientNet_baseline_13.csv
│ ├── log_EfficientNet_baseline_24.csv
│ ├── log_EfficientNet_batchboost_1.csv
│ ├── log_EfficientNet_batchboost_2.csv
│ ├── log_EfficientNet_batchboost_3.csv
│ ├── log_EfficientNet_batchboost_4.csv
│ ├── log_EfficientNet_mixup_1.csv
│ ├── log_EfficientNet_mixup_2.csv
│ ├── log_EfficientNet_mixup_3.csv
│ ├── log_EfficientNet_mixup_4.csv
│ ├── loss-test-with-augment-.pdf
│ ├── loss-test-without-augment-.pdf
│ ├── test-accuracy-with-augment-.pdf
│ ├── test-accuracy-without-augment-.pdf
│ ├── train-accuracy-with-augment-.pdf
│ └── train-accuracy-without-augment-.pdf
├── train.py
└── utils.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # project
2 | .DS_Store
3 | __pycache__
4 | *.pyc
5 | checkpoint
6 | data/
7 |
8 | # paper
9 | *.aux
10 | *.log
11 | *.out
12 | *.bbl
13 | *.blg
14 |
15 | # results
16 | results/*
17 | !results/decay=1e-4
18 | !results/decay=1e-5
19 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Attribution-NonCommercial 4.0 International
2 |
3 | =======================================================================
4 |
5 | Creative Commons Corporation ("Creative Commons") is not a law firm and
6 | does not provide legal services or legal advice. Distribution of
7 | Creative Commons public licenses does not create a lawyer-client or
8 | other relationship. Creative Commons makes its licenses and related
9 | information available on an "as-is" basis. Creative Commons gives no
10 | warranties regarding its licenses, any material licensed under their
11 | terms and conditions, or any related information. Creative Commons
12 | disclaims all liability for damages resulting from their use to the
13 | fullest extent possible.
14 |
15 | Using Creative Commons Public Licenses
16 |
17 | Creative Commons public licenses provide a standard set of terms and
18 | conditions that creators and other rights holders may use to share
19 | original works of authorship and other material subject to copyright
20 | and certain other rights specified in the public license below. The
21 | following considerations are for informational purposes only, are not
22 | exhaustive, and do not form part of our licenses.
23 |
24 | Considerations for licensors: Our public licenses are
25 | intended for use by those authorized to give the public
26 | permission to use material in ways otherwise restricted by
27 | copyright and certain other rights. Our licenses are
28 | irrevocable. Licensors should read and understand the terms
29 | and conditions of the license they choose before applying it.
30 | Licensors should also secure all rights necessary before
31 | applying our licenses so that the public can reuse the
32 | material as expected. Licensors should clearly mark any
33 | material not subject to the license. This includes other CC-
34 | licensed material, or material used under an exception or
35 | limitation to copyright. More considerations for licensors:
36 | wiki.creativecommons.org/Considerations_for_licensors
37 |
38 | Considerations for the public: By using one of our public
39 | licenses, a licensor grants the public permission to use the
40 | licensed material under specified terms and conditions. If
41 | the licensor's permission is not necessary for any reason--for
42 | example, because of any applicable exception or limitation to
43 | copyright--then that use is not regulated by the license. Our
44 | licenses grant only permissions under copyright and certain
45 | other rights that a licensor has authority to grant. Use of
46 | the licensed material may still be restricted for other
47 | reasons, including because others have copyright or other
48 | rights in the material. A licensor may make special requests,
49 | such as asking that all changes be marked or described.
50 | Although not required by our licenses, you are encouraged to
51 | respect those requests where reasonable. More_considerations
52 | for the public:
53 | wiki.creativecommons.org/Considerations_for_licensees
54 |
55 | =======================================================================
56 |
57 | Creative Commons Attribution-NonCommercial 4.0 International Public
58 | License
59 |
60 | By exercising the Licensed Rights (defined below), You accept and agree
61 | to be bound by the terms and conditions of this Creative Commons
62 | Attribution-NonCommercial 4.0 International Public License ("Public
63 | License"). To the extent this Public License may be interpreted as a
64 | contract, You are granted the Licensed Rights in consideration of Your
65 | acceptance of these terms and conditions, and the Licensor grants You
66 | such rights in consideration of benefits the Licensor receives from
67 | making the Licensed Material available under these terms and
68 | conditions.
69 |
70 | Section 1 -- Definitions.
71 |
72 | a. Adapted Material means material subject to Copyright and Similar
73 | Rights that is derived from or based upon the Licensed Material
74 | and in which the Licensed Material is translated, altered,
75 | arranged, transformed, or otherwise modified in a manner requiring
76 | permission under the Copyright and Similar Rights held by the
77 | Licensor. For purposes of this Public License, where the Licensed
78 | Material is a musical work, performance, or sound recording,
79 | Adapted Material is always produced where the Licensed Material is
80 | synched in timed relation with a moving image.
81 |
82 | b. Adapter's License means the license You apply to Your Copyright
83 | and Similar Rights in Your contributions to Adapted Material in
84 | accordance with the terms and conditions of this Public License.
85 |
86 | c. Copyright and Similar Rights means copyright and/or similar rights
87 | closely related to copyright including, without limitation,
88 | performance, broadcast, sound recording, and Sui Generis Database
89 | Rights, without regard to how the rights are labeled or
90 | categorized. For purposes of this Public License, the rights
91 | specified in Section 2(b)(1)-(2) are not Copyright and Similar
92 | Rights.
93 | d. Effective Technological Measures means those measures that, in the
94 | absence of proper authority, may not be circumvented under laws
95 | fulfilling obligations under Article 11 of the WIPO Copyright
96 | Treaty adopted on December 20, 1996, and/or similar international
97 | agreements.
98 |
99 | e. Exceptions and Limitations means fair use, fair dealing, and/or
100 | any other exception or limitation to Copyright and Similar Rights
101 | that applies to Your use of the Licensed Material.
102 |
103 | f. Licensed Material means the artistic or literary work, database,
104 | or other material to which the Licensor applied this Public
105 | License.
106 |
107 | g. Licensed Rights means the rights granted to You subject to the
108 | terms and conditions of this Public License, which are limited to
109 | all Copyright and Similar Rights that apply to Your use of the
110 | Licensed Material and that the Licensor has authority to license.
111 |
112 | h. Licensor means the individual(s) or entity(ies) granting rights
113 | under this Public License.
114 |
115 | i. NonCommercial means not primarily intended for or directed towards
116 | commercial advantage or monetary compensation. For purposes of
117 | this Public License, the exchange of the Licensed Material for
118 | other material subject to Copyright and Similar Rights by digital
119 | file-sharing or similar means is NonCommercial provided there is
120 | no payment of monetary compensation in connection with the
121 | exchange.
122 |
123 | j. Share means to provide material to the public by any means or
124 | process that requires permission under the Licensed Rights, such
125 | as reproduction, public display, public performance, distribution,
126 | dissemination, communication, or importation, and to make material
127 | available to the public including in ways that members of the
128 | public may access the material from a place and at a time
129 | individually chosen by them.
130 |
131 | k. Sui Generis Database Rights means rights other than copyright
132 | resulting from Directive 96/9/EC of the European Parliament and of
133 | the Council of 11 March 1996 on the legal protection of databases,
134 | as amended and/or succeeded, as well as other essentially
135 | equivalent rights anywhere in the world.
136 |
137 | l. You means the individual or entity exercising the Licensed Rights
138 | under this Public License. Your has a corresponding meaning.
139 |
140 | Section 2 -- Scope.
141 |
142 | a. License grant.
143 |
144 | 1. Subject to the terms and conditions of this Public License,
145 | the Licensor hereby grants You a worldwide, royalty-free,
146 | non-sublicensable, non-exclusive, irrevocable license to
147 | exercise the Licensed Rights in the Licensed Material to:
148 |
149 | a. reproduce and Share the Licensed Material, in whole or
150 | in part, for NonCommercial purposes only; and
151 |
152 | b. produce, reproduce, and Share Adapted Material for
153 | NonCommercial purposes only.
154 |
155 | 2. Exceptions and Limitations. For the avoidance of doubt, where
156 | Exceptions and Limitations apply to Your use, this Public
157 | License does not apply, and You do not need to comply with
158 | its terms and conditions.
159 |
160 | 3. Term. The term of this Public License is specified in Section
161 | 6(a).
162 |
163 | 4. Media and formats; technical modifications allowed. The
164 | Licensor authorizes You to exercise the Licensed Rights in
165 | all media and formats whether now known or hereafter created,
166 | and to make technical modifications necessary to do so. The
167 | Licensor waives and/or agrees not to assert any right or
168 | authority to forbid You from making technical modifications
169 | necessary to exercise the Licensed Rights, including
170 | technical modifications necessary to circumvent Effective
171 | Technological Measures. For purposes of this Public License,
172 | simply making modifications authorized by this Section 2(a)
173 | (4) never produces Adapted Material.
174 |
175 | 5. Downstream recipients.
176 |
177 | a. Offer from the Licensor -- Licensed Material. Every
178 | recipient of the Licensed Material automatically
179 | receives an offer from the Licensor to exercise the
180 | Licensed Rights under the terms and conditions of this
181 | Public License.
182 |
183 | b. No downstream restrictions. You may not offer or impose
184 | any additional or different terms or conditions on, or
185 | apply any Effective Technological Measures to, the
186 | Licensed Material if doing so restricts exercise of the
187 | Licensed Rights by any recipient of the Licensed
188 | Material.
189 |
190 | 6. No endorsement. Nothing in this Public License constitutes or
191 | may be construed as permission to assert or imply that You
192 | are, or that Your use of the Licensed Material is, connected
193 | with, or sponsored, endorsed, or granted official status by,
194 | the Licensor or others designated to receive attribution as
195 | provided in Section 3(a)(1)(A)(i).
196 |
197 | b. Other rights.
198 |
199 | 1. Moral rights, such as the right of integrity, are not
200 | licensed under this Public License, nor are publicity,
201 | privacy, and/or other similar personality rights; however, to
202 | the extent possible, the Licensor waives and/or agrees not to
203 | assert any such rights held by the Licensor to the limited
204 | extent necessary to allow You to exercise the Licensed
205 | Rights, but not otherwise.
206 |
207 | 2. Patent and trademark rights are not licensed under this
208 | Public License.
209 |
210 | 3. To the extent possible, the Licensor waives any right to
211 | collect royalties from You for the exercise of the Licensed
212 | Rights, whether directly or through a collecting society
213 | under any voluntary or waivable statutory or compulsory
214 | licensing scheme. In all other cases the Licensor expressly
215 | reserves any right to collect such royalties, including when
216 | the Licensed Material is used other than for NonCommercial
217 | purposes.
218 |
219 | Section 3 -- License Conditions.
220 |
221 | Your exercise of the Licensed Rights is expressly made subject to the
222 | following conditions.
223 |
224 | a. Attribution.
225 |
226 | 1. If You Share the Licensed Material (including in modified
227 | form), You must:
228 |
229 | a. retain the following if it is supplied by the Licensor
230 | with the Licensed Material:
231 |
232 | i. identification of the creator(s) of the Licensed
233 | Material and any others designated to receive
234 | attribution, in any reasonable manner requested by
235 | the Licensor (including by pseudonym if
236 | designated);
237 |
238 | ii. a copyright notice;
239 |
240 | iii. a notice that refers to this Public License;
241 |
242 | iv. a notice that refers to the disclaimer of
243 | warranties;
244 |
245 | v. a URI or hyperlink to the Licensed Material to the
246 | extent reasonably practicable;
247 |
248 | b. indicate if You modified the Licensed Material and
249 | retain an indication of any previous modifications; and
250 |
251 | c. indicate the Licensed Material is licensed under this
252 | Public License, and include the text of, or the URI or
253 | hyperlink to, this Public License.
254 |
255 | 2. You may satisfy the conditions in Section 3(a)(1) in any
256 | reasonable manner based on the medium, means, and context in
257 | which You Share the Licensed Material. For example, it may be
258 | reasonable to satisfy the conditions by providing a URI or
259 | hyperlink to a resource that includes the required
260 | information.
261 |
262 | 3. If requested by the Licensor, You must remove any of the
263 | information required by Section 3(a)(1)(A) to the extent
264 | reasonably practicable.
265 |
266 | 4. If You Share Adapted Material You produce, the Adapter's
267 | License You apply must not prevent recipients of the Adapted
268 | Material from complying with this Public License.
269 |
270 | Section 4 -- Sui Generis Database Rights.
271 |
272 | Where the Licensed Rights include Sui Generis Database Rights that
273 | apply to Your use of the Licensed Material:
274 |
275 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right
276 | to extract, reuse, reproduce, and Share all or a substantial
277 | portion of the contents of the database for NonCommercial purposes
278 | only;
279 |
280 | b. if You include all or a substantial portion of the database
281 | contents in a database in which You have Sui Generis Database
282 | Rights, then the database in which You have Sui Generis Database
283 | Rights (but not its individual contents) is Adapted Material; and
284 |
285 | c. You must comply with the conditions in Section 3(a) if You Share
286 | all or a substantial portion of the contents of the database.
287 |
288 | For the avoidance of doubt, this Section 4 supplements and does not
289 | replace Your obligations under this Public License where the Licensed
290 | Rights include other Copyright and Similar Rights.
291 |
292 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
293 |
294 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
295 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
296 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
297 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
298 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
299 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
300 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
301 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
302 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
303 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
304 |
305 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
306 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
307 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
308 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
309 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
310 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
311 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
312 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
313 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
314 |
315 | c. The disclaimer of warranties and limitation of liability provided
316 | above shall be interpreted in a manner that, to the extent
317 | possible, most closely approximates an absolute disclaimer and
318 | waiver of all liability.
319 |
320 | Section 6 -- Term and Termination.
321 |
322 | a. This Public License applies for the term of the Copyright and
323 | Similar Rights licensed here. However, if You fail to comply with
324 | this Public License, then Your rights under this Public License
325 | terminate automatically.
326 |
327 | b. Where Your right to use the Licensed Material has terminated under
328 | Section 6(a), it reinstates:
329 |
330 | 1. automatically as of the date the violation is cured, provided
331 | it is cured within 30 days of Your discovery of the
332 | violation; or
333 |
334 | 2. upon express reinstatement by the Licensor.
335 |
336 | For the avoidance of doubt, this Section 6(b) does not affect any
337 | right the Licensor may have to seek remedies for Your violations
338 | of this Public License.
339 |
340 | c. For the avoidance of doubt, the Licensor may also offer the
341 | Licensed Material under separate terms or conditions or stop
342 | distributing the Licensed Material at any time; however, doing so
343 | will not terminate this Public License.
344 |
345 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
346 | License.
347 |
348 | Section 7 -- Other Terms and Conditions.
349 |
350 | a. The Licensor shall not be bound by any additional or different
351 | terms or conditions communicated by You unless expressly agreed.
352 |
353 | b. Any arrangements, understandings, or agreements regarding the
354 | Licensed Material not stated herein are separate from and
355 | independent of the terms and conditions of this Public License.
356 |
357 | Section 8 -- Interpretation.
358 |
359 | a. For the avoidance of doubt, this Public License does not, and
360 | shall not be interpreted to, reduce, limit, restrict, or impose
361 | conditions on any use of the Licensed Material that could lawfully
362 | be made without permission under this Public License.
363 |
364 | b. To the extent possible, if any provision of this Public License is
365 | deemed unenforceable, it shall be automatically reformed to the
366 | minimum extent necessary to make it enforceable. If the provision
367 | cannot be reformed, it shall be severed from this Public License
368 | without affecting the enforceability of the remaining terms and
369 | conditions.
370 |
371 | c. No term or condition of this Public License will be waived and no
372 | failure to comply consented to unless expressly agreed to by the
373 | Licensor.
374 |
375 | d. Nothing in this Public License constitutes or may be interpreted
376 | as a limitation upon, or waiver of, any privileges and immunities
377 | that apply to the Licensor or You, including from the legal
378 | processes of any jurisdiction or authority.
379 |
380 | =======================================================================
381 |
382 | Creative Commons is not a party to its public
383 | licenses. Notwithstanding, Creative Commons may elect to apply one of
384 | its public licenses to material it publishes and in those instances
385 | will be considered the “Licensor.” The text of the Creative Commons
386 | public licenses is dedicated to the public domain under the CC0 Public
387 | Domain Dedication. Except for the limited purpose of indicating that
388 | material is shared under a Creative Commons public license or as
389 | otherwise permitted by the Creative Commons policies published at
390 | creativecommons.org/policies, Creative Commons does not authorize the
391 | use of the trademark "Creative Commons" or any other trademark or logo
392 | of Creative Commons without its prior written consent including,
393 | without limitation, in connection with any unauthorized modifications
394 | to any of its public licenses or any other arrangements,
395 | understandings, or agreements concerning use of licensed material. For
396 | the avoidance of doubt, this paragraph does not form part of the
397 | public licenses.
398 |
399 | Creative Commons may be contacted at creativecommons.org.
400 |
--------------------------------------------------------------------------------
/LICENSE-pytorch-cifar:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 liukuang
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # batchboost (currently a draft)
2 | By [Maciej A. Czyzewski](https://github.com/maciejczyzewski)
3 |
4 | This repository contains the implementation used for the results in
5 | our paper (https://arxiv.org/abs/2001.07627).
6 |
7 | [](https://paperswithcode.com/sota/image-classification-on-cifar-10?p=batchboost-regularization-for-stabilizing)
8 |
9 | ---
10 |
11 | _Batchboost_ is a simple technique to accelerate ML model training by adaptively feeding mini-batches with artificial samples which are created by mixing two examples from previous step - in favor of pairing those that produce the difficult one.
12 |
13 |
14 |

15 |

16 |
17 |
18 | ## Introduction
19 |
20 | > **UPDATE 24/01/2020:** Thank you for your e-mails asking about _batchboost_. As promised, I will update the results soon and present comparisons with other solutions (paperswithcode.com). This is a draft and research needs to be continued to be complete work, if someone is interested in helping me, please contact.
21 |
22 | ### Overview
23 |
24 | In this research, we state the hypothesis that mixing many images together can
25 | be more effective than just two. To make it efficient, we propose a new method of
26 | creating mini-batches, where each sample from dataset is propagated with
27 | subsequent iterations with less and less importance until the end of learning
28 | process.
29 |
30 | Batchboost pipeline has three stages:
31 | (a) _pairing_: method of selecting two samples from previous step.
32 | (b) _mixing_: method of creating a new artificial example from two selected samples.
33 | (c) _feeding_: constructing training mini-batch with created examples and new samples from dataset (concat with ratio γ).
34 | Note that sample from dataset propagates with subsequent iterations with less and less importance until the end of training.
35 |
36 |
41 |
42 | ### Results
43 |
44 | > **COMING:** comparison of _batchboost_ applied: to different architectures, to different problems (small datasets), for training GAN-s, with/without augmentation, with different parameters {window_normal, window_boost, factor} (hyperparameter tuning).
45 |
46 | The results will be updated and saved to [`results/`](https://github.com/maciejczyzewski/batchboost/tree/master/results).
47 |
48 | Underfitting & Stabilizing Training
49 |
50 |

51 |
52 |
53 | _Figure 1:_ Evaluation on _CIFAR-10_, for _EfficientNet-b0_ and
54 | _SGD(weight-decay=10e-4, lr=0.1)_ (as recommended in the _mixup_ research), same
55 | parameters for each model. As a result, the models behave differently, although
56 | they differ only in the method of constructing the mini-batch.
57 |
58 | Overfitting (comparison to mixup)
59 |
60 |

61 |
62 |
63 | _Figure 2:_ _batchboost_ is a new state-of-the-art because it is a slightly better than _mixup_ (here _mixup_ has been tuned for best parameters, _batchboost_ uses configuration from _Figure 1_).
64 |
65 | ## Requirements and Installation
66 |
67 | * A computer running macOS or Linux
68 | * For training new models, you'll also need a NVIDIA GPU and [NCCL](https://github.com/NVIDIA/nccl)
69 | * Python version 3.6
70 | * A [PyTorch installation](http://pytorch.org/)
71 |
72 | ## Training
73 |
74 | Use `python train.py` to train a new model.
75 | Here is an example setting:
76 | ```bash
77 | # for batchboost
78 | $ CUDA_VISIBLE_DEVICES=0 python3 train.py --decay=1e-4 --no-augment --seed=1 \
79 | --name=batchboost --model=efficientnet-b0 --epoch=30
80 | # for mixup
81 | $ CUDA_VISIBLE_DEVICES=0 python3 train.py --decay=1e-4 --no-augment --seed=1 \
82 | --name=mixup --model=efficientnet-b0 --epoch=30
83 | ```
84 |
85 | ## Using
86 |
87 | File [`batchboost.py`](https://github.com/maciejczyzewski/batchboost/tree/master/batchboost.py) should be portable, just copy into your path and write the following:
88 |
89 | ```python3
90 | from batchboost import BatchBoost
91 |
92 | # how to calculate error per sample?
93 | def fn_error(outputs, targets):
94 | logsoftmax = nn.LogSoftmax(dim=1)
95 | return torch.sum(-outputs * logsoftmax(targets), dim=1)
96 |
97 | # how to represent target in linear form (label -> one-hot)
98 | def fn_linearize(x, num_classes=10):
99 | _x = torch.zeros(x.size(0), num_classes)
100 | _x[range(x.size(0)), x] = 1
101 | return _x
102 |
103 | # how to get back (one-hot -> label)
104 | def fn_unlinearize(x):
105 | _, _x = torch.max(x, 1)
106 | return _x
107 |
108 | BatchBoost.fn_error = fn_error
109 | BatchBoost.fn_linearize = fn_linearize
110 | BatchBoost.fn_unlinearize = fn_unlinearize
111 |
112 | # if you don't want to train everything using `batchboost` method
113 | # epoch: [... -> window_normal -> window_boost -> window_normal -> ...]
114 | # ( batches ) ( batches ) ( batches )
115 |
116 | BB = BatchBoost(
117 | alpha=args.alpha, # alpha parameter for mixup
118 | window_normal=0, # consecutive batch fits: normal
119 | window_boost=10, # : batchboost
120 | factor=1 / 2, # ratio between new information and feeded/mixed
121 | use_cuda=True,
122 | )
123 |
124 | ...
125 | ```
126 |
127 | And slightly change your training loop:
128 |
129 | ```python3
130 | ...
131 |
132 | for batch_idx, (new_inputs, new_targets) in enumerate(trainloader):
133 | if use_cuda:
134 | new_inputs, new_targets = new_inputs.cuda(), new_targets.cuda()
135 |
136 | # -----> (a) feed with new information
137 | if not BB.feed(new_inputs, new_targets):
138 | continue
139 |
140 | # -----> (b) apply concat: BB.inputs, BB.targets
141 | outputs = net(BB.inputs)
142 |
143 | # -----> (c) calculate: loss (mixup like style \lambda)
144 | loss = BB.criterion(criterion, outputs)
145 |
146 | train_loss += loss.data
147 | _, predicted = torch.max(outputs.data, 1)
148 | total += BB.inputs.size(0) # -----> remember to use concat
149 |
150 | # -----> (d) calculate: accuracy
151 | correct += BB.correct(predicted)
152 |
153 | # -----> (e) pairing & mixing
154 | BB.mixing(criterion, outputs)
155 |
156 | ...
157 | ```
158 |
159 | ## Citation
160 |
161 | If you find _batchboost_ useful in your research, please consider citing:
162 |
163 | ```bibtex
164 | @misc{czyzewski2020batchboost,
165 | title={batchboost: regularization for stabilizing training with resistance to underfitting & overfitting},
166 | author={Maciej A. Czyzewski},
167 | year={2020},
168 | eprint={2001.07627},
169 | archivePrefix={arXiv},
170 | primaryClass={cs.LG}
171 | }
172 | ```
173 |
174 | _An interesting topic for further research and discussion are
175 | combination of batchboost and existing methods._
176 |
177 | ## License
178 |
179 | Implemented as fork of ["mixup-cifar10 / facebook"](https://github.com/facebookresearch/mixup-cifar10).
180 | This project is CC-BY-NC-licensed.
181 |
182 |
183 |
--------------------------------------------------------------------------------
/batchboost.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import numpy as np
4 | from torch.autograd import Variable
5 |
6 |
7 | class BatchBoost:
8 | """
9 | batchboost: regularization for stabilizing training
10 | with resistance to underfitting & overfitting
11 | Maciej A. Czyzewski
12 | https://arxiv.org/abs/2001.07627
13 | """
14 |
15 | def __init__(
16 | self,
17 | alpha=1.0,
18 | window_normal=0,
19 | window_boost=10,
20 | factor=1 / 3,
21 | use_cuda=False,
22 | debug=False,
23 | ):
24 | self.alpha = alpha
25 | self.window_normal = window_normal
26 | self.window_boost = window_boost
27 | self.factor = factor
28 | self.use_cuda = use_cuda
29 | self.debug = debug
30 | self.clear()
31 |
32 | if self.debug:
33 | print(
34 | f"[BatchBoost] alpha={alpha} ratio={factor} \
35 | window_normal={window_normal} window_boost={window_boost}"
36 | )
37 |
38 | def clear(self):
39 | if self.debug:
40 | print(f"[BatchBoost] resetting")
41 | self.mixup_lambda = 1
42 | self.inputs = None
43 | self.y1 = self.y2 = None
44 | self.iter_normal = self.window_normal
45 | self.iter_boost = self.window_boost
46 |
47 | @staticmethod
48 | def mixup(x, y, index_left, index_right, mixup_lambda=1.0):
49 | """Returns mixed inputs, pairs of targets, and lambda
50 | https://arxiv.org/abs/1710.09412"""
51 | mixed_x = (
52 | mixup_lambda * x[index_left, :]
53 | + (1 - mixup_lambda) * x[index_right, :]
54 | )
55 | # mixed_y = (mixup_lambda * y[index_left, :] +
56 | # (1 - mixup_lambda) * y[index_right, :])
57 | # return mixed_x, mixed_y, mixup_lambda
58 | y1, y2 = y[index_left], y[index_right]
59 | return mixed_x, y1, y2
60 |
61 | @staticmethod
62 | def fn_error(outputs, targets):
63 | logsoftmax = nn.LogSoftmax(dim=1)
64 | return torch.sum(-outputs * logsoftmax(targets), dim=1)
65 |
66 | @staticmethod
67 | def fn_linearize(x, num_classes=10):
68 | _x = torch.zeros(x.size(0), num_classes)
69 | _x[range(x.size(0)), x] = 1
70 | return _x
71 |
72 | @staticmethod
73 | def fn_unlinearize(x):
74 | _, _x = torch.max(x, 1)
75 | return _x
76 |
77 | def criterion(self, criterion, outputs):
78 | _y1 = BatchBoost.fn_unlinearize(self.y1)
79 | _y2 = BatchBoost.fn_unlinearize(self.y2)
80 | return self.mixup_lambda * criterion(outputs, _y1) + (
81 | 1 - self.mixup_lambda
82 | ) * criterion(outputs, _y2)
83 |
84 | def correct(self, predicted):
85 | _y1 = BatchBoost.fn_unlinearize(self.y1)
86 | _y2 = BatchBoost.fn_unlinearize(self.y2)
87 | return (
88 | self.mixup_lambda * predicted.eq(_y1).cpu().sum().float()
89 | + (1 - self.mixup_lambda) * predicted.eq(_y2).cpu().sum().float()
90 | )
91 |
92 | def pairing(self, errvec):
93 | batch_size = errvec.size()[0]
94 | _, index = torch.sort(errvec, dim=0, descending=True)
95 | return (
96 | index[0 : int(batch_size * self.factor)],
97 | reversed(index[batch_size - int(batch_size * self.factor) :]),
98 | )
99 |
100 | def mixing(self, criterion, outputs):
101 | if self.iter_boost + self.iter_normal == 0:
102 | self.iter_normal = self.window_normal
103 | self.iter_boost = self.window_boost
104 | if self.iter_boost > 0:
105 | if self.debug:
106 | print("[BatchBoost]: half-batch + feed-batch")
107 | errvec = BatchBoost.fn_error(outputs, self.targets)
108 | index_left, index_right = self.pairing(errvec)
109 |
110 | if self.alpha > 0:
111 | self.mixup_lambda = np.random.beta(self.alpha, self.alpha)
112 | else:
113 | self.mixup_lambda = 1
114 |
115 | self.inputs, self.y1, self.y2 = BatchBoost.mixup(
116 | self.inputs,
117 | y=self.targets,
118 | index_left=index_right,
119 | index_right=index_left,
120 | mixup_lambda=self.mixup_lambda,
121 | )
122 | self.iter_boost -= 1
123 | elif self.iter_normal > 0:
124 | if self.debug:
125 | print("[BatchBoost] normal batch")
126 | batch_size = self.inputs.size(0)
127 | self.inputs = self.inputs[int(batch_size * self.factor) :]
128 | self.y1 = self.y1[int(batch_size * self.factor) :]
129 | self.y2 = self.y2[int(batch_size * self.factor) :]
130 | self.mixup_lambda = 1
131 | self.iter_normal -= 1
132 |
133 | def feed(self, new_inputs, _new_targets):
134 | new_targets = Variable(BatchBoost.fn_linearize(_new_targets))
135 | if self.use_cuda:
136 | new_targets = new_targets.cuda()
137 | # no mixing (first iteration)
138 | if self.inputs is None:
139 | self.inputs = Variable(new_inputs)
140 | self.y1 = new_targets
141 | self.y2 = new_targets
142 | return False
143 | # concat
144 | self.inputs = torch.cat([self.inputs, new_inputs], dim=0)
145 | self.y1 = torch.cat([self.y1, new_targets], dim=0)
146 | self.y2 = torch.cat([self.y2, new_targets], dim=0)
147 | # virtual targets
148 | self.targets = (
149 | self.mixup_lambda * self.y1 + (1 - self.mixup_lambda) * self.y2
150 | )
151 | return True
152 |
--------------------------------------------------------------------------------
/debug.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | import torchvision.transforms as transforms
6 | import torchvision.datasets as datasets
7 |
8 | # FIXME: move to models and split for CIFAR-10/Fashion-MNIST and others
9 |
10 |
11 | class ResNet100k(nn.Module):
12 | def __init__(self, num_classes=10):
13 | super(ResNet100k, self).__init__()
14 | self.num_filter1 = 8
15 | self.num_filter2 = 16
16 | self.num_padding = 2
17 | # input is 28x28
18 | # padding=2 for same padding
19 | self.conv1 = nn.Conv2d(1, self.num_filter1, 5, padding=self.num_padding)
20 | nn.init.xavier_uniform_(self.conv1.weight)
21 | # feature map size is 14*14 by pooling
22 | # padding=2 for same padding
23 | self.conv2 = nn.Conv2d(
24 | self.num_filter1, self.num_filter2, 5, padding=self.num_padding
25 | )
26 | nn.init.xavier_uniform_(self.conv2.weight)
27 | # feature map size is 7*7 by pooling
28 | self.fc = nn.Linear(self.num_filter2 * 7 * 7, num_classes)
29 |
30 | def forward(self, x):
31 | x = F.max_pool2d(F.relu(self.conv1(x)), 2)
32 | x = F.max_pool2d(F.relu(self.conv2(x)), 2)
33 | x = x.view(-1, self.num_filter2 * 7 * 7) # reshape Variable
34 | x = self.fc(x)
35 | return x
36 | # return F.log_softmax(x, dim=1)
37 | # return F.softmax(x, dim=1)
38 |
39 |
40 | class ResNet100kv2(nn.Module):
41 | def __init__(self):
42 | super(ResNet100kv2, self).__init__()
43 |
44 | self.cnn1 = nn.Conv2d(
45 | in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2
46 | )
47 | self.relu1 = nn.ReLU()
48 | self.norm1 = nn.BatchNorm2d(16)
49 | nn.init.xavier_uniform(self.cnn1.weight)
50 |
51 | self.maxpool1 = nn.MaxPool2d(kernel_size=2)
52 |
53 | self.cnn2 = nn.Conv2d(
54 | in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=2
55 | )
56 | self.relu2 = nn.ReLU()
57 | self.norm2 = nn.BatchNorm2d(32)
58 | nn.init.xavier_uniform(self.cnn2.weight)
59 |
60 | self.maxpool2 = nn.MaxPool2d(kernel_size=2)
61 |
62 | self.fc1 = nn.Linear(2048, 128)
63 | self.fcrelu = nn.ReLU()
64 |
65 | self.fc2 = nn.Linear(128, 10)
66 |
67 | def forward(self, x):
68 | out = self.cnn1(x)
69 | out = self.relu1(out)
70 | out = self.norm1(out)
71 |
72 | out = self.maxpool1(out)
73 |
74 | out = self.cnn2(out)
75 | out = self.relu2(out)
76 | out = self.norm2(out)
77 |
78 | out = self.maxpool2(out)
79 |
80 | out = out.view(out.size(0), -1)
81 |
82 | out = self.fc1(out)
83 | out = self.fcrelu(out)
84 |
85 | out = self.fc2(out)
86 | return out
87 |
88 |
89 | def FashionMNIST_loaders(args):
90 | transform = transforms.Compose(
91 | [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
92 | )
93 |
94 | if args.augment:
95 | transform_train = transforms.Compose(
96 | [
97 | transforms.RandomCrop(28, padding=4),
98 | transforms.RandomHorizontalFlip(),
99 | transforms.ToTensor(),
100 | transforms.Normalize((0.1307,), (0.3081,)),
101 | ]
102 | )
103 | else:
104 | transform_train = transform
105 | transform_test = transform
106 |
107 | trainset = datasets.FashionMNIST(
108 | root="./data", train=True, download=True, transform=transform_train
109 | )
110 | trainloader = torch.utils.data.DataLoader(
111 | trainset, batch_size=args.batch_size, shuffle=True, num_workers=8
112 | )
113 |
114 | testset = datasets.FashionMNIST(
115 | root="./data", train=False, download=True, transform=transform_test
116 | )
117 | testloader = torch.utils.data.DataLoader(
118 | testset, batch_size=100, shuffle=False, num_workers=8
119 | )
120 |
121 | return trainloader, testloader
122 |
--------------------------------------------------------------------------------
/figures/batches/img_1_new_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_new_10.png
--------------------------------------------------------------------------------
/figures/batches/img_1_new_11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_new_11.png
--------------------------------------------------------------------------------
/figures/batches/img_1_new_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_new_6.png
--------------------------------------------------------------------------------
/figures/batches/img_1_new_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_new_7.png
--------------------------------------------------------------------------------
/figures/batches/img_1_new_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_new_8.png
--------------------------------------------------------------------------------
/figures/batches/img_1_new_9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_new_9.png
--------------------------------------------------------------------------------
/figures/batches/img_1_old_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_old_0.png
--------------------------------------------------------------------------------
/figures/batches/img_1_old_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_old_1.png
--------------------------------------------------------------------------------
/figures/batches/img_1_old_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_old_2.png
--------------------------------------------------------------------------------
/figures/batches/img_1_old_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_old_3.png
--------------------------------------------------------------------------------
/figures/batches/img_1_old_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_old_4.png
--------------------------------------------------------------------------------
/figures/batches/img_1_old_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_old_5.png
--------------------------------------------------------------------------------
/figures/batches/img_2_new_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_new_10.png
--------------------------------------------------------------------------------
/figures/batches/img_2_new_11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_new_11.png
--------------------------------------------------------------------------------
/figures/batches/img_2_new_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_new_6.png
--------------------------------------------------------------------------------
/figures/batches/img_2_new_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_new_7.png
--------------------------------------------------------------------------------
/figures/batches/img_2_new_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_new_8.png
--------------------------------------------------------------------------------
/figures/batches/img_2_new_9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_new_9.png
--------------------------------------------------------------------------------
/figures/batches/img_2_old_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_old_0.png
--------------------------------------------------------------------------------
/figures/batches/img_2_old_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_old_1.png
--------------------------------------------------------------------------------
/figures/batches/img_2_old_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_old_2.png
--------------------------------------------------------------------------------
/figures/batches/img_2_old_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_old_3.png
--------------------------------------------------------------------------------
/figures/batches/img_2_old_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_old_4.png
--------------------------------------------------------------------------------
/figures/batches/img_2_old_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_old_5.png
--------------------------------------------------------------------------------
/figures/batches/img_3_new_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_new_10.png
--------------------------------------------------------------------------------
/figures/batches/img_3_new_11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_new_11.png
--------------------------------------------------------------------------------
/figures/batches/img_3_new_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_new_6.png
--------------------------------------------------------------------------------
/figures/batches/img_3_new_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_new_7.png
--------------------------------------------------------------------------------
/figures/batches/img_3_new_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_new_8.png
--------------------------------------------------------------------------------
/figures/batches/img_3_new_9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_new_9.png
--------------------------------------------------------------------------------
/figures/batches/img_3_old_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_old_0.png
--------------------------------------------------------------------------------
/figures/batches/img_3_old_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_old_1.png
--------------------------------------------------------------------------------
/figures/batches/img_3_old_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_old_2.png
--------------------------------------------------------------------------------
/figures/batches/img_3_old_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_old_3.png
--------------------------------------------------------------------------------
/figures/batches/img_3_old_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_old_4.png
--------------------------------------------------------------------------------
/figures/batches/img_3_old_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_old_5.png
--------------------------------------------------------------------------------
/figures/batches/img_4_new_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_new_10.png
--------------------------------------------------------------------------------
/figures/batches/img_4_new_11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_new_11.png
--------------------------------------------------------------------------------
/figures/batches/img_4_new_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_new_6.png
--------------------------------------------------------------------------------
/figures/batches/img_4_new_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_new_7.png
--------------------------------------------------------------------------------
/figures/batches/img_4_new_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_new_8.png
--------------------------------------------------------------------------------
/figures/batches/img_4_new_9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_new_9.png
--------------------------------------------------------------------------------
/figures/batches/img_4_old_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_old_0.png
--------------------------------------------------------------------------------
/figures/batches/img_4_old_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_old_1.png
--------------------------------------------------------------------------------
/figures/batches/img_4_old_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_old_2.png
--------------------------------------------------------------------------------
/figures/batches/img_4_old_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_old_3.png
--------------------------------------------------------------------------------
/figures/batches/img_4_old_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_old_4.png
--------------------------------------------------------------------------------
/figures/batches/img_4_old_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_old_5.png
--------------------------------------------------------------------------------
/figures/data_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/data_1.png
--------------------------------------------------------------------------------
/figures/data_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/data_2.png
--------------------------------------------------------------------------------
/figures/data_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/data_3.png
--------------------------------------------------------------------------------
/figures/data_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/data_4.png
--------------------------------------------------------------------------------
/figures/data_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/data_5.png
--------------------------------------------------------------------------------
/figures/data_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/data_6.png
--------------------------------------------------------------------------------
/figures/data_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/data_7.png
--------------------------------------------------------------------------------
/figures/figure-1-loss-train-without-augment.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-1-loss-train-without-augment.pdf
--------------------------------------------------------------------------------
/figures/figure-1-test-accuracy-without-augment.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-1-test-accuracy-without-augment.pdf
--------------------------------------------------------------------------------
/figures/figure-2-test-accuracy-with-augment.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-2-test-accuracy-with-augment.pdf
--------------------------------------------------------------------------------
/figures/figure-2-train-accuracy-with-augment.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-2-train-accuracy-with-augment.pdf
--------------------------------------------------------------------------------
/figures/figure-abstract.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-abstract.pdf
--------------------------------------------------------------------------------
/figures/figure-abstract.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-abstract.png
--------------------------------------------------------------------------------
/figures/figure-feeding.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-feeding.pdf
--------------------------------------------------------------------------------
/figures/figure-feeding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-feeding.png
--------------------------------------------------------------------------------
/figures/figure-multipass.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-multipass.png
--------------------------------------------------------------------------------
/figures/for-repository-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/for-repository-1.png
--------------------------------------------------------------------------------
/figures/for-repository-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/for-repository-2.png
--------------------------------------------------------------------------------
/figures/pp_logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/pp_logo.jpg
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .vgg import *
2 | from .lenet import *
3 | from .resnet import *
4 | from .resnext import *
5 | from .densenet import *
6 | from .googlenet import *
7 | from .mobilenet import *
8 | from .densenet_efficient_multi_gpu import DenseNet190
9 | from .densenet3 import DenseNet190
10 |
--------------------------------------------------------------------------------
/models/alldnet.py:
--------------------------------------------------------------------------------
1 | '''LeNet in PyTorch.'''
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from torch.autograd import Variable
5 |
6 | class AllDNet(nn.Module):
7 | def __init__(self):
8 | super(AllDNet, self).__init__()
9 | self.conv1 = nn.Conv2d(3, 6, 5)
10 | self.conv2 = nn.Conv2d(6, 16, 5)
11 | # self.conv2 = nn.Linear(6*14*14, 16*10*10)
12 | self.fc1 = nn.Linear(16*5*5, 120)
13 | self.fc2 = nn.Linear(120, 84)
14 | self.fc3 = nn.Linear(84, 10)
15 |
16 | def forward(self, x):
17 | activations = []
18 | out = F.relu(self.conv1(x))
19 | out = F.max_pool2d(out, 2)
20 | # out = out.view(out.size(0), -1)
21 | # activations.append(out)
22 | out = F.relu(self.conv2(out))
23 | # out = out.view(out.size(0), 16, 10, -1)
24 | out = F.max_pool2d(out, 2)
25 | out = out.view(out.size(0), -1)
26 | activations.append(out)
27 | out = F.relu(self.fc1(out))
28 | activations.append(out)
29 | out = F.relu(self.fc2(out))
30 | activations.append(out)
31 | out = self.fc3(out)
32 | return out, activations
33 |
34 |
--------------------------------------------------------------------------------
/models/densenet.py:
--------------------------------------------------------------------------------
1 | '''DenseNet in PyTorch.'''
2 | import math
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 |
8 | from torch.autograd import Variable
9 |
10 |
11 | class Bottleneck(nn.Module):
12 | def __init__(self, in_planes, growth_rate):
13 | super(Bottleneck, self).__init__()
14 | self.bn1 = nn.BatchNorm2d(in_planes)
15 | self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False)
16 | self.bn2 = nn.BatchNorm2d(4*growth_rate)
17 | self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)
18 |
19 | def forward(self, x):
20 | out = self.conv1(F.relu(self.bn1(x)))
21 | out = self.conv2(F.relu(self.bn2(out)))
22 | out = torch.cat([out,x], 1)
23 | return out
24 |
25 |
26 | class Transition(nn.Module):
27 | def __init__(self, in_planes, out_planes):
28 | super(Transition, self).__init__()
29 | self.bn = nn.BatchNorm2d(in_planes)
30 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)
31 |
32 | def forward(self, x):
33 | out = self.conv(F.relu(self.bn(x)))
34 | out = F.avg_pool2d(out, 2)
35 | return out
36 |
37 |
38 | class DenseNet(nn.Module):
39 | def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
40 | super(DenseNet, self).__init__()
41 | self.growth_rate = growth_rate
42 |
43 | num_planes = 2*growth_rate
44 | self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)
45 |
46 | self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
47 | num_planes += nblocks[0]*growth_rate
48 | out_planes = int(math.floor(num_planes*reduction))
49 | self.trans1 = Transition(num_planes, out_planes)
50 | num_planes = out_planes
51 |
52 | self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
53 | num_planes += nblocks[1]*growth_rate
54 | out_planes = int(math.floor(num_planes*reduction))
55 | self.trans2 = Transition(num_planes, out_planes)
56 | num_planes = out_planes
57 |
58 | self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
59 | num_planes += nblocks[2]*growth_rate
60 | out_planes = int(math.floor(num_planes*reduction))
61 | self.trans3 = Transition(num_planes, out_planes)
62 | num_planes = out_planes
63 |
64 | self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
65 | num_planes += nblocks[3]*growth_rate
66 |
67 | self.bn = nn.BatchNorm2d(num_planes)
68 | self.linear = nn.Linear(num_planes, num_classes)
69 |
70 | def _make_dense_layers(self, block, in_planes, nblock):
71 | layers = []
72 | for i in range(nblock):
73 | layers.append(block(in_planes, self.growth_rate))
74 | in_planes += self.growth_rate
75 | return nn.Sequential(*layers)
76 |
77 | def forward(self, x):
78 | out = self.conv1(x)
79 | out = self.trans1(self.dense1(out))
80 | out = self.trans2(self.dense2(out))
81 | out = self.trans3(self.dense3(out))
82 | out = self.dense4(out)
83 | out = F.avg_pool2d(F.relu(self.bn(out)), 4)
84 | out = out.view(out.size(0), -1)
85 | out = self.linear(out)
86 | return out
87 |
88 | def DenseNet121():
89 | return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32)
90 |
91 | def DenseNet169():
92 | return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32)
93 |
94 | def DenseNet201():
95 | return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32)
96 |
97 | def DenseNet161():
98 | return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48)
99 |
100 | def densenet_cifar():
101 | return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12)
102 |
103 | def test_densenet():
104 | net = densenet_cifar()
105 | x = torch.randn(1,3,32,32)
106 | y = net(Variable(x))
107 | print(y)
108 |
109 | # test_densenet()
110 |
--------------------------------------------------------------------------------
/models/densenet3.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 |
6 |
7 | class BasicBlock(nn.Module):
8 | def __init__(self, in_planes, out_planes, dropRate=0.0):
9 | super(BasicBlock, self).__init__()
10 | self.bn1 = nn.BatchNorm2d(in_planes)
11 | self.relu = nn.ReLU(inplace=True)
12 | self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=1,
13 | padding=1, bias=False)
14 | self.droprate = dropRate
15 | def forward(self, x):
16 | out = self.conv1(self.relu(self.bn1(x)))
17 | if self.droprate > 0:
18 | out = F.dropout(out, p=self.droprate, training=self.training)
19 | return torch.cat([x, out], 1)
20 |
21 | class BottleneckBlock(nn.Module):
22 | def __init__(self, in_planes, out_planes, dropRate=0.0):
23 | super(BottleneckBlock, self).__init__()
24 | inter_planes = out_planes * 4
25 | self.bn1 = nn.BatchNorm2d(in_planes)
26 | self.relu = nn.ReLU(inplace=True)
27 | self.conv1 = nn.Conv2d(in_planes, inter_planes, kernel_size=1, stride=1,
28 | padding=0, bias=False)
29 | self.bn2 = nn.BatchNorm2d(inter_planes)
30 | self.conv2 = nn.Conv2d(inter_planes, out_planes, kernel_size=3, stride=1,
31 | padding=1, bias=False)
32 | self.droprate = dropRate
33 | def forward(self, x):
34 | out = self.conv1(self.relu(self.bn1(x)))
35 | if self.droprate > 0:
36 | out = F.dropout(out, p=self.droprate, inplace=False, training=self.training)
37 | out = self.conv2(self.relu(self.bn2(out)))
38 | if self.droprate > 0:
39 | out = F.dropout(out, p=self.droprate, inplace=False, training=self.training)
40 | return torch.cat([x, out], 1)
41 |
42 | class TransitionBlock(nn.Module):
43 | def __init__(self, in_planes, out_planes, dropRate=0.0):
44 | super(TransitionBlock, self).__init__()
45 | self.bn1 = nn.BatchNorm2d(in_planes)
46 | self.relu = nn.ReLU(inplace=True)
47 | self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1,
48 | padding=0, bias=False)
49 | self.droprate = dropRate
50 | def forward(self, x):
51 | out = self.conv1(self.relu(self.bn1(x)))
52 | if self.droprate > 0:
53 | out = F.dropout(out, p=self.droprate, inplace=False, training=self.training)
54 | return F.avg_pool2d(out, 2)
55 |
56 | class DenseBlock(nn.Module):
57 | def __init__(self, nb_layers, in_planes, growth_rate, block, dropRate=0.0):
58 | super(DenseBlock, self).__init__()
59 | self.layer = self._make_layer(block, in_planes, growth_rate, nb_layers, dropRate)
60 | def _make_layer(self, block, in_planes, growth_rate, nb_layers, dropRate):
61 | layers = []
62 | for i in range(nb_layers):
63 | layers.append(block(in_planes+i*growth_rate, growth_rate, dropRate))
64 | return nn.Sequential(*layers)
65 | def forward(self, x):
66 | return self.layer(x)
67 |
68 | class DenseNet3(nn.Module):
69 | def __init__(self, depth, num_classes, growth_rate=12,
70 | reduction=0.5, bottleneck=True, dropRate=0.0):
71 | super(DenseNet3, self).__init__()
72 | in_planes = 2 * growth_rate
73 | n = (depth - 4) // 3
74 | if bottleneck == True:
75 | n = n//2
76 | block = BottleneckBlock
77 | else:
78 | block = BasicBlock
79 | # 1st conv before any dense block
80 | self.conv1 = nn.Conv2d(3, in_planes, kernel_size=3, stride=1,
81 | padding=1, bias=False)
82 | # 1st block
83 | self.block1 = DenseBlock(n, in_planes, growth_rate, block, dropRate)
84 | in_planes = int(in_planes+n*growth_rate)
85 | self.trans1 = TransitionBlock(in_planes, int(math.floor(in_planes*reduction)), dropRate=dropRate)
86 | in_planes = int(math.floor(in_planes*reduction))
87 | # 2nd block
88 | self.block2 = DenseBlock(n, in_planes, growth_rate, block, dropRate)
89 | in_planes = int(in_planes+n*growth_rate)
90 | self.trans2 = TransitionBlock(in_planes, int(math.floor(in_planes*reduction)), dropRate=dropRate)
91 | in_planes = int(math.floor(in_planes*reduction))
92 | # 3rd block
93 | self.block3 = DenseBlock(n, in_planes, growth_rate, block, dropRate)
94 | in_planes = int(in_planes+n*growth_rate)
95 | # global average pooling and classifier
96 | self.bn1 = nn.BatchNorm2d(in_planes)
97 | self.relu = nn.ReLU(inplace=True)
98 | self.fc = nn.Linear(in_planes, num_classes)
99 | self.in_planes = in_planes
100 |
101 | for m in self.modules():
102 | if isinstance(m, nn.Conv2d):
103 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
104 | m.weight.data.normal_(0, math.sqrt(2. / n))
105 | elif isinstance(m, nn.BatchNorm2d):
106 | m.weight.data.fill_(1)
107 | m.bias.data.zero_()
108 | elif isinstance(m, nn.Linear):
109 | m.bias.data.zero_()
110 | def forward(self, x):
111 | out = self.conv1(x)
112 | out = self.trans1(self.block1(out))
113 | out = self.trans2(self.block2(out))
114 | out = self.block3(out)
115 | out = self.relu(self.bn1(out))
116 | out = F.avg_pool2d(out, 8)
117 | out = out.view(-1, self.in_planes)
118 | return self.fc(out)
119 |
120 | def DenseNet190():
121 | return DenseNet3(190, 10, growth_rate=40)
122 |
--------------------------------------------------------------------------------
/models/googlenet.py:
--------------------------------------------------------------------------------
1 | '''GoogLeNet with PyTorch.'''
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 |
6 | from torch.autograd import Variable
7 |
8 |
9 | class Inception(nn.Module):
10 | def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
11 | super(Inception, self).__init__()
12 | # 1x1 conv branch
13 | self.b1 = nn.Sequential(
14 | nn.Conv2d(in_planes, n1x1, kernel_size=1),
15 | nn.BatchNorm2d(n1x1),
16 | nn.ReLU(True),
17 | )
18 |
19 | # 1x1 conv -> 3x3 conv branch
20 | self.b2 = nn.Sequential(
21 | nn.Conv2d(in_planes, n3x3red, kernel_size=1),
22 | nn.BatchNorm2d(n3x3red),
23 | nn.ReLU(True),
24 | nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1),
25 | nn.BatchNorm2d(n3x3),
26 | nn.ReLU(True),
27 | )
28 |
29 | # 1x1 conv -> 5x5 conv branch
30 | self.b3 = nn.Sequential(
31 | nn.Conv2d(in_planes, n5x5red, kernel_size=1),
32 | nn.BatchNorm2d(n5x5red),
33 | nn.ReLU(True),
34 | nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1),
35 | nn.BatchNorm2d(n5x5),
36 | nn.ReLU(True),
37 | nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1),
38 | nn.BatchNorm2d(n5x5),
39 | nn.ReLU(True),
40 | )
41 |
42 | # 3x3 pool -> 1x1 conv branch
43 | self.b4 = nn.Sequential(
44 | nn.MaxPool2d(3, stride=1, padding=1),
45 | nn.Conv2d(in_planes, pool_planes, kernel_size=1),
46 | nn.BatchNorm2d(pool_planes),
47 | nn.ReLU(True),
48 | )
49 |
50 | def forward(self, x):
51 | y1 = self.b1(x)
52 | y2 = self.b2(x)
53 | y3 = self.b3(x)
54 | y4 = self.b4(x)
55 | return torch.cat([y1,y2,y3,y4], 1)
56 |
57 |
58 | class GoogLeNet(nn.Module):
59 | def __init__(self):
60 | super(GoogLeNet, self).__init__()
61 | self.pre_layers = nn.Sequential(
62 | nn.Conv2d(3, 192, kernel_size=3, padding=1),
63 | nn.BatchNorm2d(192),
64 | nn.ReLU(True),
65 | )
66 |
67 | self.a3 = Inception(192, 64, 96, 128, 16, 32, 32)
68 | self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
69 |
70 | self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
71 |
72 | self.a4 = Inception(480, 192, 96, 208, 16, 48, 64)
73 | self.b4 = Inception(512, 160, 112, 224, 24, 64, 64)
74 | self.c4 = Inception(512, 128, 128, 256, 24, 64, 64)
75 | self.d4 = Inception(512, 112, 144, 288, 32, 64, 64)
76 | self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
77 |
78 | self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
79 | self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
80 |
81 | self.avgpool = nn.AvgPool2d(8, stride=1)
82 | self.linear = nn.Linear(1024, 10)
83 |
84 | def forward(self, x):
85 | out = self.pre_layers(x)
86 | out = self.a3(out)
87 | out = self.b3(out)
88 | out = self.maxpool(out)
89 | out = self.a4(out)
90 | out = self.b4(out)
91 | out = self.c4(out)
92 | out = self.d4(out)
93 | out = self.e4(out)
94 | out = self.maxpool(out)
95 | out = self.a5(out)
96 | out = self.b5(out)
97 | out = self.avgpool(out)
98 | out = out.view(out.size(0), -1)
99 | out = self.linear(out)
100 | return out
101 |
102 | # net = GoogLeNet()
103 | # x = torch.randn(1,3,32,32)
104 | # y = net(Variable(x))
105 | # print(y.size())
106 |
--------------------------------------------------------------------------------
/models/lenet.py:
--------------------------------------------------------------------------------
1 | '''LeNet in PyTorch.'''
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | class LeNet(nn.Module):
6 | def __init__(self):
7 | super(LeNet, self).__init__()
8 | self.conv1 = nn.Conv2d(3, 6, 5)
9 | self.conv2 = nn.Conv2d(6, 16, 5)
10 | self.fc1 = nn.Linear(16*5*5, 120)
11 | self.fc2 = nn.Linear(120, 84)
12 | self.fc3 = nn.Linear(84, 10)
13 |
14 | def forward(self, x):
15 | out = F.relu(self.conv1(x))
16 | out = F.max_pool2d(out, 2)
17 | out = F.relu(self.conv2(out))
18 | out = F.max_pool2d(out, 2)
19 | out = out.view(out.size(0), -1)
20 | out = F.relu(self.fc1(out))
21 | out = F.relu(self.fc2(out))
22 | out = self.fc3(out)
23 | return out
24 |
--------------------------------------------------------------------------------
/models/mobilenet.py:
--------------------------------------------------------------------------------
1 | '''MobileNet in PyTorch.
2 |
3 | See the paper "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications"
4 | for more details.
5 | '''
6 | import torch
7 | import torch.nn as nn
8 | import torch.nn.functional as F
9 |
10 | from torch.autograd import Variable
11 |
12 |
13 | class Block(nn.Module):
14 | '''Depthwise conv + Pointwise conv'''
15 | def __init__(self, in_planes, out_planes, stride=1):
16 | super(Block, self).__init__()
17 | self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False)
18 | self.bn1 = nn.BatchNorm2d(in_planes)
19 | self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
20 | self.bn2 = nn.BatchNorm2d(out_planes)
21 |
22 | def forward(self, x):
23 | out = F.relu(self.bn1(self.conv1(x)))
24 | out = F.relu(self.bn2(self.conv2(out)))
25 | return out
26 |
27 |
28 | class MobileNet(nn.Module):
29 | # (128,2) means conv planes=128, conv stride=2, by default conv stride=1
30 | cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]
31 |
32 | def __init__(self, num_classes=10):
33 | super(MobileNet, self).__init__()
34 | self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
35 | self.bn1 = nn.BatchNorm2d(32)
36 | self.layers = self._make_layers(in_planes=32)
37 | self.linear = nn.Linear(1024, num_classes)
38 |
39 | def _make_layers(self, in_planes):
40 | layers = []
41 | for x in self.cfg:
42 | out_planes = x if isinstance(x, int) else x[0]
43 | stride = 1 if isinstance(x, int) else x[1]
44 | layers.append(Block(in_planes, out_planes, stride))
45 | in_planes = out_planes
46 | return nn.Sequential(*layers)
47 |
48 | def forward(self, x):
49 | out = F.relu(self.bn1(self.conv1(x)))
50 | out = self.layers(out)
51 | out = F.avg_pool2d(out, 2)
52 | out = out.view(out.size(0), -1)
53 | out = self.linear(out)
54 | return out
55 |
56 |
57 | def test():
58 | net = MobileNet()
59 | x = torch.randn(1,3,32,32)
60 | y = net(Variable(x))
61 | print(y.size())
62 |
63 | # test()
64 |
--------------------------------------------------------------------------------
/models/resnet.py:
--------------------------------------------------------------------------------
1 | '''ResNet in PyTorch.
2 |
3 | BasicBlock and Bottleneck module is from the original ResNet paper:
4 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
5 | Deep Residual Learning for Image Recognition. arXiv:1512.03385
6 |
7 | PreActBlock and PreActBottleneck module is from the later paper:
8 | [2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
9 | Identity Mappings in Deep Residual Networks. arXiv:1603.05027
10 | '''
11 | import torch
12 | import torch.nn as nn
13 | import torch.nn.functional as F
14 |
15 | from torch.autograd import Variable
16 |
17 |
18 | def conv3x3(in_planes, out_planes, stride=1):
19 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
20 |
21 |
22 | class BasicBlock(nn.Module):
23 | expansion = 1
24 |
25 | def __init__(self, in_planes, planes, stride=1):
26 | super(BasicBlock, self).__init__()
27 | self.conv1 = conv3x3(in_planes, planes, stride)
28 | self.bn1 = nn.BatchNorm2d(planes)
29 | self.conv2 = conv3x3(planes, planes)
30 | self.bn2 = nn.BatchNorm2d(planes)
31 |
32 | self.shortcut = nn.Sequential()
33 | if stride != 1 or in_planes != self.expansion*planes:
34 | self.shortcut = nn.Sequential(
35 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
36 | nn.BatchNorm2d(self.expansion*planes)
37 | )
38 |
39 | def forward(self, x):
40 | out = F.relu(self.bn1(self.conv1(x)))
41 | out = self.bn2(self.conv2(out))
42 | out += self.shortcut(x)
43 | out = F.relu(out)
44 | return out
45 |
46 |
47 | class PreActBlock(nn.Module):
48 | '''Pre-activation version of the BasicBlock.'''
49 | expansion = 1
50 |
51 | def __init__(self, in_planes, planes, stride=1):
52 | super(PreActBlock, self).__init__()
53 | self.bn1 = nn.BatchNorm2d(in_planes)
54 | self.conv1 = conv3x3(in_planes, planes, stride)
55 | self.bn2 = nn.BatchNorm2d(planes)
56 | self.conv2 = conv3x3(planes, planes)
57 |
58 | self.shortcut = nn.Sequential()
59 | if stride != 1 or in_planes != self.expansion*planes:
60 | self.shortcut = nn.Sequential(
61 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
62 | )
63 |
64 | def forward(self, x):
65 | out = F.relu(self.bn1(x))
66 | shortcut = self.shortcut(out)
67 | out = self.conv1(out)
68 | out = self.conv2(F.relu(self.bn2(out)))
69 | out += shortcut
70 | return out
71 |
72 |
73 | class Bottleneck(nn.Module):
74 | expansion = 4
75 |
76 | def __init__(self, in_planes, planes, stride=1):
77 | super(Bottleneck, self).__init__()
78 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
79 | self.bn1 = nn.BatchNorm2d(planes)
80 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
81 | self.bn2 = nn.BatchNorm2d(planes)
82 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
83 | self.bn3 = nn.BatchNorm2d(self.expansion*planes)
84 |
85 | self.shortcut = nn.Sequential()
86 | if stride != 1 or in_planes != self.expansion*planes:
87 | self.shortcut = nn.Sequential(
88 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
89 | nn.BatchNorm2d(self.expansion*planes)
90 | )
91 |
92 | def forward(self, x):
93 | out = F.relu(self.bn1(self.conv1(x)))
94 | out = F.relu(self.bn2(self.conv2(out)))
95 | out = self.bn3(self.conv3(out))
96 | out += self.shortcut(x)
97 | out = F.relu(out)
98 | return out
99 |
100 |
101 | class PreActBottleneck(nn.Module):
102 | '''Pre-activation version of the original Bottleneck module.'''
103 | expansion = 4
104 |
105 | def __init__(self, in_planes, planes, stride=1):
106 | super(PreActBottleneck, self).__init__()
107 | self.bn1 = nn.BatchNorm2d(in_planes)
108 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
109 | self.bn2 = nn.BatchNorm2d(planes)
110 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
111 | self.bn3 = nn.BatchNorm2d(planes)
112 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
113 |
114 | self.shortcut = nn.Sequential()
115 | if stride != 1 or in_planes != self.expansion*planes:
116 | self.shortcut = nn.Sequential(
117 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
118 | )
119 |
120 | def forward(self, x):
121 | out = F.relu(self.bn1(x))
122 | shortcut = self.shortcut(out)
123 | out = self.conv1(out)
124 | out = self.conv2(F.relu(self.bn2(out)))
125 | out = self.conv3(F.relu(self.bn3(out)))
126 | out += shortcut
127 | return out
128 |
129 |
130 | class ResNet(nn.Module):
131 | def __init__(self, block, num_blocks, num_classes=10):
132 | super(ResNet, self).__init__()
133 | self.in_planes = 64
134 |
135 | self.conv1 = conv3x3(3,64)
136 | self.bn1 = nn.BatchNorm2d(64)
137 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
138 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
139 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
140 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
141 | self.linear = nn.Linear(512*block.expansion, num_classes)
142 |
143 | def _make_layer(self, block, planes, num_blocks, stride):
144 | strides = [stride] + [1]*(num_blocks-1)
145 | layers = []
146 | for stride in strides:
147 | layers.append(block(self.in_planes, planes, stride))
148 | self.in_planes = planes * block.expansion
149 | return nn.Sequential(*layers)
150 |
151 | def forward(self, x, lin=0, lout=5):
152 | out = x
153 | if lin < 1 and lout > -1:
154 | out = self.conv1(out)
155 | out = self.bn1(out)
156 | out = F.relu(out)
157 | if lin < 2 and lout > 0:
158 | out = self.layer1(out)
159 | if lin < 3 and lout > 1:
160 | out = self.layer2(out)
161 | if lin < 4 and lout > 2:
162 | out = self.layer3(out)
163 | if lin < 5 and lout > 3:
164 | out = self.layer4(out)
165 | if lout > 4:
166 | out = F.avg_pool2d(out, 4)
167 | out = out.view(out.size(0), -1)
168 | out = self.linear(out)
169 | return out
170 |
171 |
172 | def ResNet18():
173 | return ResNet(PreActBlock, [2,2,2,2])
174 |
175 | def ResNet34():
176 | return ResNet(BasicBlock, [3,4,6,3])
177 |
178 | def ResNet50():
179 | return ResNet(Bottleneck, [3,4,6,3])
180 |
181 | def ResNet101():
182 | return ResNet(Bottleneck, [3,4,23,3])
183 |
184 | def ResNet152():
185 | return ResNet(Bottleneck, [3,8,36,3])
186 |
187 |
188 | def test():
189 | net = ResNet18()
190 | y = net(Variable(torch.randn(1,3,32,32)))
191 | print(y.size())
192 |
193 | # test()
194 |
--------------------------------------------------------------------------------
/models/resnext.py:
--------------------------------------------------------------------------------
1 | '''ResNeXt in PyTorch.
2 |
3 | See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details.
4 | '''
5 | import torch
6 | import torch.nn as nn
7 | import torch.nn.functional as F
8 |
9 | from torch.autograd import Variable
10 |
11 |
12 | class Block(nn.Module):
13 | '''Grouped convolution block.'''
14 | expansion = 2
15 |
16 | def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
17 | super(Block, self).__init__()
18 | group_width = cardinality * bottleneck_width
19 | self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
20 | self.bn1 = nn.BatchNorm2d(group_width)
21 | self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
22 | self.bn2 = nn.BatchNorm2d(group_width)
23 | self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False)
24 | self.bn3 = nn.BatchNorm2d(self.expansion*group_width)
25 |
26 | self.shortcut = nn.Sequential()
27 | if stride != 1 or in_planes != self.expansion*group_width:
28 | self.shortcut = nn.Sequential(
29 | nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False),
30 | nn.BatchNorm2d(self.expansion*group_width)
31 | )
32 |
33 | def forward(self, x):
34 | out = F.relu(self.bn1(self.conv1(x)))
35 | out = F.relu(self.bn2(self.conv2(out)))
36 | out = self.bn3(self.conv3(out))
37 | out += self.shortcut(x)
38 | out = F.relu(out)
39 | return out
40 |
41 |
42 | class ResNeXt(nn.Module):
43 | def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
44 | super(ResNeXt, self).__init__()
45 | self.cardinality = cardinality
46 | self.bottleneck_width = bottleneck_width
47 | self.in_planes = 64
48 |
49 | self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
50 | self.bn1 = nn.BatchNorm2d(64)
51 | self.layer1 = self._make_layer(num_blocks[0], 1)
52 | self.layer2 = self._make_layer(num_blocks[1], 2)
53 | self.layer3 = self._make_layer(num_blocks[2], 2)
54 | # self.layer4 = self._make_layer(num_blocks[3], 2)
55 | self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes)
56 |
57 | def _make_layer(self, num_blocks, stride):
58 | strides = [stride] + [1]*(num_blocks-1)
59 | layers = []
60 | for stride in strides:
61 | layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride))
62 | self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
63 | # Increase bottleneck_width by 2 after each stage.
64 | self.bottleneck_width *= 2
65 | return nn.Sequential(*layers)
66 |
67 | def forward(self, x):
68 | out = F.relu(self.bn1(self.conv1(x)))
69 | out = self.layer1(out)
70 | out = self.layer2(out)
71 | out = self.layer3(out)
72 | # out = self.layer4(out)
73 | out = F.avg_pool2d(out, 8)
74 | out = out.view(out.size(0), -1)
75 | out = self.linear(out)
76 | return out
77 |
78 |
79 | def ResNeXt29_2x64d():
80 | return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64)
81 |
82 | def ResNeXt29_4x64d():
83 | return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64)
84 |
85 | def ResNeXt29_8x64d():
86 | return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64)
87 |
88 | def ResNeXt29_32x4d():
89 | return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4)
90 |
91 | def test_resnext():
92 | net = ResNeXt29_2x64d()
93 | x = torch.randn(1,3,32,32)
94 | y = net(Variable(x))
95 | print(y.size())
96 |
97 | # test_resnext()
98 |
--------------------------------------------------------------------------------
/models/vgg.py:
--------------------------------------------------------------------------------
1 | '''VGG11/13/16/19 in Pytorch.'''
2 | import torch
3 | import torch.nn as nn
4 | from torch.autograd import Variable
5 |
6 |
7 | cfg = {
8 | 'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
9 | 'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
10 | 'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
11 | 'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
12 | }
13 |
14 |
15 | class VGG(nn.Module):
16 | def __init__(self, vgg_name):
17 | super(VGG, self).__init__()
18 | self.features = self._make_layers(cfg[vgg_name])
19 | self.classifier = nn.Linear(512, 10)
20 |
21 | def forward(self, x):
22 | out = self.features(x)
23 | out = out.view(out.size(0), -1)
24 | out = self.classifier(out)
25 | return out
26 |
27 | def _make_layers(self, cfg):
28 | layers = []
29 | in_channels = 3
30 | for x in cfg:
31 | if x == 'M':
32 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
33 | else:
34 | layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
35 | nn.BatchNorm2d(x),
36 | nn.ReLU(inplace=True)]
37 | in_channels = x
38 | layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
39 | return nn.Sequential(*layers)
40 |
41 | # net = VGG('VGG11')
42 | # x = torch.randn(2,3,32,32)
43 | # print(net(Variable(x)).size())
44 |
--------------------------------------------------------------------------------
/paper/abstract.txt:
--------------------------------------------------------------------------------
1 | Overfitting & underfitting and stable training are an important challenges in
2 | machine learning.
3 | Current approaches for these issues are mixup, SamplePairing and BC learning.
4 | In our work, we state the hypothesis that mixing many images together can be more
5 | effective than just two.
6 | Batchboost pipeline has three stages:
7 | (a) pairing: method of selecting two samples.
8 | (b) mixing: how to create a new one from two samples.
9 | (c) feeding: combining mixed samples with new ones from dataset into batch (with ratio $\gamma$).
10 | Note that sample that appears in our batch propagates with
11 | subsequent iterations with less and less importance until the end of training.
12 | Pairing stage calculates the error per sample, sorts the samples and pairs
13 | with strategy: hardest with easiest one, than mixing stage merges two samples
14 | using mixup, $x_1 + (1-\lambda)x_2$. Finally, feeding stage combines
15 | new samples with mixed by ratio 1:1.
16 | Batchboost has 0.5-3% better accuracy than the current
17 | state-of-the-art mixup regularization on CIFAR-10 & Fashion-MNIST.
18 | Our method is slightly better than SamplePairing technique
19 | on small datasets (up to 5%).
20 | Batchboost provides stable training on not tuned parameters (like weight
21 | decay), thus its a good method to test performance of different architectures.
22 | Source code is at: https://github.com/maciejczyzewski/batchboost
23 |
24 |
--------------------------------------------------------------------------------
/paper/arxiv-abstract-shadow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/arxiv-abstract-shadow.png
--------------------------------------------------------------------------------
/paper/arxiv-abstract.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/arxiv-abstract.png
--------------------------------------------------------------------------------
/paper/arxiv.sty:
--------------------------------------------------------------------------------
1 | \NeedsTeXFormat{LaTeX2e}
2 |
3 | \ProcessOptions\relax
4 |
5 | % fonts
6 | \renewcommand{\rmdefault}{ptm}
7 | \renewcommand{\sfdefault}{phv}
8 |
9 | % set page geometry
10 | \usepackage[verbose=true,letterpaper]{geometry}
11 | \AtBeginDocument{
12 | \newgeometry{
13 | textheight=9in,
14 | textwidth=6.5in,
15 | top=1in,
16 | headheight=14pt,
17 | headsep=25pt,
18 | footskip=30pt
19 | }
20 | }
21 |
22 | \widowpenalty=10000
23 | \clubpenalty=10000
24 | \flushbottom
25 | \sloppy
26 |
27 |
28 |
29 | \newcommand{\headeright}{A Preprint}
30 | \newcommand{\undertitle}{A Preprint}
31 |
32 | \usepackage{fancyhdr}
33 | \fancyhf{}
34 | \pagestyle{fancy}
35 | \renewcommand{\headrulewidth}{0.4pt}
36 | \fancyheadoffset{0pt}
37 | \rhead{\scshape \footnotesize \headeright}
38 | \chead{\@title}
39 | \cfoot{\thepage}
40 |
41 |
42 | %Handling Keywords
43 | \def\keywordname{{\bfseries \emph Keywords}}%
44 | \def\keywords#1{\par\addvspace\medskipamount{\rightskip=0pt plus1cm
45 | \def\and{\ifhmode\unskip\nobreak\fi\ $\cdot$
46 | }\noindent\keywordname\enspace\ignorespaces#1\par}}
47 |
48 | % font sizes with reduced leading
49 | \renewcommand{\normalsize}{%
50 | \@setfontsize\normalsize\@xpt\@xipt
51 | \abovedisplayskip 7\p@ \@plus 2\p@ \@minus 5\p@
52 | \abovedisplayshortskip \z@ \@plus 3\p@
53 | \belowdisplayskip \abovedisplayskip
54 | \belowdisplayshortskip 4\p@ \@plus 3\p@ \@minus 3\p@
55 | }
56 | \normalsize
57 | \renewcommand{\small}{%
58 | \@setfontsize\small\@ixpt\@xpt
59 | \abovedisplayskip 6\p@ \@plus 1.5\p@ \@minus 4\p@
60 | \abovedisplayshortskip \z@ \@plus 2\p@
61 | \belowdisplayskip \abovedisplayskip
62 | \belowdisplayshortskip 3\p@ \@plus 2\p@ \@minus 2\p@
63 | }
64 | \renewcommand{\footnotesize}{\@setfontsize\footnotesize\@ixpt\@xpt}
65 | \renewcommand{\scriptsize}{\@setfontsize\scriptsize\@viipt\@viiipt}
66 | \renewcommand{\tiny}{\@setfontsize\tiny\@vipt\@viipt}
67 | \renewcommand{\large}{\@setfontsize\large\@xiipt{14}}
68 | \renewcommand{\Large}{\@setfontsize\Large\@xivpt{16}}
69 | \renewcommand{\LARGE}{\@setfontsize\LARGE\@xviipt{20}}
70 | \renewcommand{\huge}{\@setfontsize\huge\@xxpt{23}}
71 | \renewcommand{\Huge}{\@setfontsize\Huge\@xxvpt{28}}
72 |
73 | % sections with less space
74 | \providecommand{\section}{}
75 | \renewcommand{\section}{%
76 | \@startsection{section}{1}{\z@}%
77 | {-2.0ex \@plus -0.5ex \@minus -0.2ex}%
78 | { 1.5ex \@plus 0.3ex \@minus 0.2ex}%
79 | {\large\bf\raggedright}%
80 | }
81 | \providecommand{\subsection}{}
82 | \renewcommand{\subsection}{%
83 | \@startsection{subsection}{2}{\z@}%
84 | {-1.8ex \@plus -0.5ex \@minus -0.2ex}%
85 | { 0.8ex \@plus 0.2ex}%
86 | {\normalsize\bf\raggedright}%
87 | }
88 | \providecommand{\subsubsection}{}
89 | \renewcommand{\subsubsection}{%
90 | \@startsection{subsubsection}{3}{\z@}%
91 | {-1.5ex \@plus -0.5ex \@minus -0.2ex}%
92 | { 0.5ex \@plus 0.2ex}%
93 | {\normalsize\bf\raggedright}%
94 | }
95 | \providecommand{\paragraph}{}
96 | \renewcommand{\paragraph}{%
97 | \@startsection{paragraph}{4}{\z@}%
98 | {1.5ex \@plus 0.5ex \@minus 0.2ex}%
99 | {-1em}%
100 | {\normalsize\bf}%
101 | }
102 | \providecommand{\subparagraph}{}
103 | \renewcommand{\subparagraph}{%
104 | \@startsection{subparagraph}{5}{\z@}%
105 | {1.5ex \@plus 0.5ex \@minus 0.2ex}%
106 | {-1em}%
107 | {\normalsize\bf}%
108 | }
109 | \providecommand{\subsubsubsection}{}
110 | \renewcommand{\subsubsubsection}{%
111 | \vskip5pt{\noindent\normalsize\rm\raggedright}%
112 | }
113 |
114 | % float placement
115 | \renewcommand{\topfraction }{0.85}
116 | \renewcommand{\bottomfraction }{0.4}
117 | \renewcommand{\textfraction }{0.1}
118 | \renewcommand{\floatpagefraction}{0.7}
119 |
120 | \newlength{\@abovecaptionskip}\setlength{\@abovecaptionskip}{7\p@}
121 | \newlength{\@belowcaptionskip}\setlength{\@belowcaptionskip}{\z@}
122 |
123 | \setlength{\abovecaptionskip}{\@abovecaptionskip}
124 | \setlength{\belowcaptionskip}{\@belowcaptionskip}
125 |
126 | % swap above/belowcaptionskip lengths for tables
127 | \renewenvironment{table}
128 | {\setlength{\abovecaptionskip}{\@belowcaptionskip}%
129 | \setlength{\belowcaptionskip}{\@abovecaptionskip}%
130 | \@float{table}}
131 | {\end@float}
132 |
133 | % footnote formatting
134 | \setlength{\footnotesep }{6.65\p@}
135 | \setlength{\skip\footins}{9\p@ \@plus 4\p@ \@minus 2\p@}
136 | \renewcommand{\footnoterule}{\kern-3\p@ \hrule width 12pc \kern 2.6\p@}
137 | \setcounter{footnote}{0}
138 |
139 | % paragraph formatting
140 | \setlength{\parindent}{\z@}
141 | \setlength{\parskip }{5.5\p@}
142 |
143 | % list formatting
144 | \setlength{\topsep }{4\p@ \@plus 1\p@ \@minus 2\p@}
145 | \setlength{\partopsep }{1\p@ \@plus 0.5\p@ \@minus 0.5\p@}
146 | \setlength{\itemsep }{2\p@ \@plus 1\p@ \@minus 0.5\p@}
147 | \setlength{\parsep }{2\p@ \@plus 1\p@ \@minus 0.5\p@}
148 | \setlength{\leftmargin }{3pc}
149 | \setlength{\leftmargini }{\leftmargin}
150 | \setlength{\leftmarginii }{2em}
151 | \setlength{\leftmarginiii}{1.5em}
152 | \setlength{\leftmarginiv }{1.0em}
153 | \setlength{\leftmarginv }{0.5em}
154 | \def\@listi {\leftmargin\leftmargini}
155 | \def\@listii {\leftmargin\leftmarginii
156 | \labelwidth\leftmarginii
157 | \advance\labelwidth-\labelsep
158 | \topsep 2\p@ \@plus 1\p@ \@minus 0.5\p@
159 | \parsep 1\p@ \@plus 0.5\p@ \@minus 0.5\p@
160 | \itemsep \parsep}
161 | \def\@listiii{\leftmargin\leftmarginiii
162 | \labelwidth\leftmarginiii
163 | \advance\labelwidth-\labelsep
164 | \topsep 1\p@ \@plus 0.5\p@ \@minus 0.5\p@
165 | \parsep \z@
166 | \partopsep 0.5\p@ \@plus 0\p@ \@minus 0.5\p@
167 | \itemsep \topsep}
168 | \def\@listiv {\leftmargin\leftmarginiv
169 | \labelwidth\leftmarginiv
170 | \advance\labelwidth-\labelsep}
171 | \def\@listv {\leftmargin\leftmarginv
172 | \labelwidth\leftmarginv
173 | \advance\labelwidth-\labelsep}
174 | \def\@listvi {\leftmargin\leftmarginvi
175 | \labelwidth\leftmarginvi
176 | \advance\labelwidth-\labelsep}
177 |
178 | % create title
179 | \providecommand{\maketitle}{}
180 | \renewcommand{\maketitle}{%
181 | \par
182 | \begingroup
183 | \renewcommand{\thefootnote}{\fnsymbol{footnote}}
184 | % for perfect author name centering
185 | \renewcommand{\@makefnmark}{\hbox to \z@{$^{\@thefnmark}$\hss}}
186 | % The footnote-mark was overlapping the footnote-text,
187 | % added the following to fix this problem (MK)
188 | \long\def\@makefntext##1{%
189 | \parindent 1em\noindent
190 | \hbox to 1.8em{\hss $\m@th ^{\@thefnmark}$}##1
191 | }
192 | \thispagestyle{empty}
193 | \@maketitle
194 | \@thanks
195 | %\@notice
196 | \endgroup
197 | \let\maketitle\relax
198 | \let\thanks\relax
199 | }
200 |
201 | % rules for title box at top of first page
202 | \newcommand{\@toptitlebar}{
203 | \hrule height 2\p@
204 | \vskip 0.25in
205 | \vskip -\parskip%
206 | }
207 | \newcommand{\@bottomtitlebar}{
208 | \vskip 0.29in
209 | \vskip -\parskip
210 | \hrule height 2\p@
211 | \vskip 0.09in%
212 | }
213 |
214 | % create title (includes both anonymized and non-anonymized versions)
215 | \providecommand{\@maketitle}{}
216 | \renewcommand{\@maketitle}{%
217 | \vbox{%
218 | \hsize\textwidth
219 | \linewidth\hsize
220 | \vskip 0.1in
221 | \@toptitlebar
222 | \centering
223 | {\LARGE\sc \@title\par}
224 | \@bottomtitlebar
225 | \textsc{\undertitle}\\
226 | \vskip 0.1in
227 | \def\And{%
228 | \end{tabular}\hfil\linebreak[0]\hfil%
229 | \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces%
230 | }
231 | \def\AND{%
232 | \end{tabular}\hfil\linebreak[4]\hfil%
233 | \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces%
234 | }
235 | \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\@author\end{tabular}%
236 | \vskip 0.4in \@minus 0.1in \center{\@date} \vskip 0.2in
237 | }
238 | }
239 |
240 | % add conference notice to bottom of first page
241 | \newcommand{\ftype@noticebox}{8}
242 | \newcommand{\@notice}{%
243 | % give a bit of extra room back to authors on first page
244 | \enlargethispage{2\baselineskip}%
245 | \@float{noticebox}[b]%
246 | \footnotesize\@noticestring%
247 | \end@float%
248 | }
249 |
250 | % abstract styling
251 | \renewenvironment{abstract}
252 | {
253 | \centerline
254 | {\large \bfseries \scshape Abstract}
255 | \begin{quote}
256 | }
257 | {
258 | \end{quote}
259 | }
260 |
261 | \endinput
262 |
--------------------------------------------------------------------------------
/paper/batchboost.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/batchboost.pdf
--------------------------------------------------------------------------------
/paper/batchboost.tex:
--------------------------------------------------------------------------------
1 | \documentclass{article}
2 |
3 |
4 |
5 | \usepackage{arxiv}
6 |
7 | \usepackage[utf8]{inputenc} % allow utf-8 input
8 | \usepackage[T1]{fontenc} % use 8-bit T1 fonts
9 | \usepackage{hyperref} % hyperlinks
10 | \usepackage{url} % simple URL typesetting
11 | \usepackage{booktabs} % professional-quality tables
12 | \usepackage{amsfonts} % blackboard math symbols
13 | \usepackage{nicefrac} % compact symbols for 1/2, etc.
14 | \usepackage{microtype} % microtypography
15 | \usepackage{lipsum} % Can be removed after putting your text content
16 |
17 | \usepackage{graphicx}
18 | \usepackage{caption}
19 | \usepackage{float}
20 | \usepackage{subcaption}
21 | \usepackage{amsmath}
22 |
23 | \renewcommand{\headeright}{}
24 | \renewcommand{\undertitle}{Draft}
25 |
26 | % Version 2:
27 | % FIXME: gradient clipping (mixup more likely do explode, but why?)
28 | % FIXME: what about adam, sgd and other optimizers with different weight decay?
29 | % FIXME: what about different types of data (tabular)?
30 | % FIXME: adversarial attacks?
31 |
32 | \title{\emph{batchboost}: regularization for stabilizing training with
33 | resistance to underfitting \& overfitting}
34 |
35 | \author{
36 | Maciej A.~Czyzewski\\
37 | Institute of Computing Science\\
38 | Poznan University of Technology\\
39 | Piotrowo 2, 60-965 Poznan, Poland\\
40 | \texttt{maciejanthonyczyzewski@gmail.com} \\
41 | }
42 |
43 | \begin{document}
44 | \maketitle
45 |
46 | % BC learning: https://arxiv.org/pdf/1711.10284.pdf
47 | % EfficientNet: https://arxiv.org/pdf/1905.11946.pdf
48 | % Mixup: https://arxiv.org/pdf/1710.09412.pdf
49 | % SamplePairing: https://arxiv.org/pdf/1801.02929.pdf
50 | % ShakeDrop: https://arxiv.org/pdf/1802.02375.pdf
51 | % ShakeShake: https://arxiv.org/pdf/1705.07485.pdf
52 |
53 | \begin{abstract}
54 | Overfitting \& underfitting and stable training are an important challenges in
55 | machine learning.
56 | %
57 | Current approaches for these issues are \emph{mixup}\cite{zhang2017mixup},
58 | \emph{SamplePairing}\cite{inoue2018data}
59 | and \emph{BC learning}\cite{tokozume2018between}.
60 | In our work, we state the hypothesis that mixing many images together can be more
61 | effective than just two.
62 | \emph{batchboost} pipeline has three stages:
63 | (a) pairing: method of selecting two samples.
64 | (b) mixing: how to create a new one from two samples.
65 | (c) feeding: combining mixed samples with new ones from dataset into batch (with ratio $\gamma$).
66 | Note that sample that appears in our batch propagates with
67 | subsequent iterations with less and less importance until the end of training.
68 | %
69 | Pairing stage calculates the error per sample, sorts the samples and pairs
70 | with strategy: hardest with easiest one, than mixing stage merges two samples
71 | using \emph{mixup}, $x_1 + (1-\lambda)x_2$. Finally, feeding stage combines
72 | new samples with mixed by ratio 1:1.
73 | %
74 | \emph{batchboost} has 0.5-3\% better accuracy than the current
75 | state-of-the-art \emph{mixup} regularization on
76 | CIFAR-10\cite{krizhevsky2009learning} \&
77 | Fashion-MNIST\cite{xiao2017}.
78 | %
79 | Our method is slightly better than SamplePairing technique
80 | on small datasets (up to 5\%).
81 | %
82 | \emph{batchboost} provides stable training on not tuned parameters (like weight
83 | decay), thus its a good method to test performance of different architectures.
84 | %
85 | Source code is at: \url{https://github.com/maciejczyzewski/batchboost}
86 | \end{abstract}
87 |
88 | \keywords{regularization \and underfitting \and overfitting \
89 | \and generalization \and mixup}
90 |
91 | \section{Introduction}
92 | \label{sec:introduction}
93 |
94 | In order to improve test errors, regularization methods which are processes to
95 | introduce additional information to DNN have been proposed\cite{miyato2018virtual}. Widely
96 | used regularization methods include \emph{data augmentation}, \emph{stochastic
97 | gradient descent} (SGD) \cite{zhang2016understanding}, \emph{weight decay}
98 | \cite{krogh1992simple}, \emph{batch normalization} (BN) \cite{ioffe2015batch},
99 | \emph{label
100 | smoothing}\cite{szegedy2016rethinking} and \emph{mixup}\cite{zhang2017mixup}.
101 | %
102 | Our idea comes from \emph{mixup} flaws. In a nutshell, \emph{mixup} constructs
103 | virtual training example from two samples. In term of batch construction, it
104 | simply gets some random samples from dataset and randomly mix together.
105 | %
106 | The overlapping example of many samples (more than two) has not been considered
107 | in previous work. Probably because the imposition of 3 examples significantly affects the model leading to underfitting.
108 | %
109 | It turned out that in many tasks, linear mixing (like \emph{BC learning} or
110 | \emph{mixup}) leads to underfitting (figure \ref{fig:under}). Therefore, these methods are not applicable as universal tools.
111 |
112 | \textbf{Contribution} Our work shows that the imposition of many examples in
113 | subsequent iterations (which are slowly suppressed by new overlays) can improve efficiency, but most importantly it ensures stability of training and resistance to attacks.
114 | %
115 | However, it must be done wisely: that's why we implemented two basic mechanisms:
116 | \begin{itemize}
117 | \item (a) new information is provided gradually, thus \emph{half-batch} adds
118 | new examples without mixing
119 | \item (b) mixing is carried out according to some criterion, in our case it is the
120 | best-the-worst strategy to mediate the error
121 | \end{itemize}
122 | %
123 | The whole procedure is made in three steps to make it more understandable:
124 | \begin{itemize}
125 | \item (a) \emph{pairing}: a method for selecting two samples
126 | \item (b) \emph{mixing}: how to create a new one from two samples
127 | \item (c) \emph{feeding}: to the mixed samples it supplements the batch with new examples
128 | from datasets
129 | \end{itemize}
130 | %
131 | Note that sample that appears in our batch propagates with
132 | subsequent iterations with less and less importance until the end of training.
133 | %
134 | Source code with sample implementation and experiments to verify the results
135 | we present here:
136 |
137 | \begin{center}
138 | \url{https://github.com/maciejczyzewski/batchboost}
139 | \end{center}
140 |
141 | To understand the effects of \emph{bootstrap}, we conduct a
142 | thorough set of study experiments (Section \ref{sec:results}).
143 |
144 | \section{Overview}
145 | \label{sec:overview}
146 |
147 | \begin{figure}[H]
148 | \centering
149 | \includegraphics[width=\linewidth]{figure-abstract}
150 | \caption{\emph{batchboost} presented in three phases: (a) pairing by sorting
151 | error (b) mixing with \emph{mixup} (c) feeding: a mixed feed-batch and new
152 | samples in half-batch by 1:1 ratio.}
153 | \label{fig:abstract}
154 | \end{figure}
155 |
156 | Batch as input for training is a combination of two different mini-batches:
157 | \begin{itemize}
158 | \item (a) \emph{half-batch}: new samples from dataset, classical augmentation is possible here
159 | \item (b) \emph{feed-batch} (mixup): samples mixed together (in-order presented in
160 | figure \ref{fig:abstract})
161 | \end{itemize}
162 |
163 | Parameter $\gamma$ means the ratio of the number of samples in half-batch to
164 | feed-batch, in our work we have not considered other values than 1. However, we believe that this is an interesting topic for further research and discussion.
165 |
166 | \subsection{Pairing Method}
167 | \label{sec:pairing}
168 |
169 | Combining many overlapping samples may have a negative impact on our optimizer
170 | used in training. In our implementation, it calculates the error for each
171 | sample in batch. Then it sorts this vector, and pairs samples by connecting the
172 | easiest (smallest error) with the most difficult sample. The goal of this
173 | procedure is to create new artificial samples that are between classes, as
174 | described in \emph{BC learning}.
175 |
176 | However, in this case they are not random pairs, but those that 'require'
177 | additional work. In this way, the learning process is more stable because there
178 | are no cases when it mix only difficult with difficult or easy with easy (likely
179 | is at the beginning or end of the learning process).
180 | %
181 | In our case, the error was calculated using L2 metric between one-hot labels and
182 | the predictions (thus we analyzed \emph{batchboost} only on classification
183 | problems like CIFAR-10\cite{krizhevsky2009learning} or
184 | Fashion-MNIST\cite{xiao2017}). For other problems, there is probably
185 | a need to change the metric/method of error calculation.
186 | %
187 | We were also thinking about using RL to pair samples. However, it turns out to
188 | be a more complicated problem thus we leave it here for further discussion.
189 |
190 | \subsection{Mixing Method}
191 | \label{sec:mixing}
192 |
193 | Selected two samples should be combined into one.
194 | There are three methods for linearly mixing samples: \emph{SamplePairing},
195 | \emph{Mixup}, \emph{BC Learning}. Due to the simplicity of implementation and
196 | the highest scores, we used a mixup, which looks like this:
197 | %
198 | \begin{align*}
199 | \tilde{x} &= \lambda x_i + (1 - \lambda) x_j,\qquad \text{where~} x_i, x_j \text{~are~raw~input~vectors}\\
200 | \tilde{y} &= \lambda y_i + (1 - \lambda) y_j,\qquad \text{where~} y_i, y_j \text{~are~one-hot~label~encodings}
201 | \end{align*}
202 | $(x_i, y_i)$ and $(x_j, y_j)$ are two examples drawn at random from our
203 | training data, and $\lambda \in [0,1]$.
204 | Label for many samples was averaged over the last 2 labels (due to small differences in results, and large tradeof in memory).
205 |
206 | Why it works?
207 | The good explanation is provided in BC learning research, that images and sound
208 | can be represented as waves. Mixing is an interpolation that human don't
209 | understand but machine could interpret.
210 | However, also a good explanation of this process is: that by training on
211 | artificial samples, we supplement the training data by artificial examples between-classes
212 | (visually, it fills space between clusters in UMAP/t-SNE visualization).
213 | Thus, it generalizes problem more by aggressive cluster separation during
214 | training (the clusters are moving away from each other, because model learns
215 | artificial clusters made up by mixing).
216 | Mixing multiple classes allows for more accurate separation (higher dimensions), however model starts to depart from original problem (new distribution) losing accuracy on test dataset.
217 |
218 | The question is whether linear interpolation is good for all problems.
219 | Probably the best solution would be to use a GAN for this purpose (two inputs +
220 | noise to control). We tried to use the technique described in
221 | SinGAN\cite{shaham2019singan} but it
222 | failed in \emph{batchboost}. It was unsuccessful due to the high cost of
223 | maintaining such a structure.
224 |
225 | \subsection{Continuous Feeding}
226 | \label{sec:feeding}
227 |
228 | The final stage is for 'feeding' new artificial samples on the model's input. In
229 | the previous researches, considered were only cases with mixing two samples along
230 | batch. \emph{batchboost} do this by adding new samples with $\gamma$ ratio to
231 | mixed ones.
232 | %
233 | An interesting observation is that once we mix samples, they are in learning
234 | process till end (at each batch continuously).
235 | When applying a mixing it has only three options: (a) new sample with new sample
236 | (b) new sample with previously mixed sample (c) previously mixed sample with
237 | previously mixed sample. Pairing method cannot choose only one option for all samples
238 | because of non-zero $\gamma$ ratio.
239 |
240 | To maintain compatibility with the mixup
241 | algorithm, it chooses new $\lambda$ when constructing the batch.
242 | That is why past samples have less and less significance in training process,
243 | until they disappear completely (figure \ref{fig:feeding}).
244 |
245 | \begin{figure}[H]
246 | \hspace{0.5cm}
247 | \includegraphics[width=\linewidth]{figure-feeding}
248 | \caption{Orange squares indicates how information is propagated between
249 | batches in the \emph{batchboost} method.}
250 | \label{fig:feeding}
251 | \end{figure}
252 |
253 | We found that for problems by nature not linear, for which the mixup did poorly,
254 | it was caused by the fact that model learned at the time when very low/high
255 | $\lambda$ was assigned (i.e. model learned on a single example, without mixing).
256 | %
257 | In \emph{batchboost} it doesn't look much better. However, \emph{half-batch}
258 | contains new information, and \emph{feed-batch} has examples mixed not randomly but
259 | by pairing method. With this clues, optimizer can slightly improve the direction of
260 | optimization by better interpreting loss landscape.
261 |
262 | \section{Results}
263 | \label{sec:results}
264 |
265 | We focused on the current state-of-the-art \emph{mixup}. The architecture we
266 | used was \emph{EfficientNet-b0}\cite{tan2019efficientnet} and
267 | \emph{ResNet100k}\cite{DBLP:journals/corr/HeZRS15} (having only 100k
268 | parameters from DAWNBench\cite{coleman2017dawnbench}). The problems we've evolved are CIFAR-10 and
269 | Fashion-MNIST.
270 | %
271 | We intend to update this work with more detailed comparisons and experiments,
272 | test on different architectures and parameters. The most interesting
273 | issue which requires additional research is artificial attacks.
274 |
275 | \subsection{Underfitting \& Stabilizing Training}
276 | \label{sec:under}
277 |
278 | We described this problem in the (section \ref{sec:feeding}). The main factors
279 | that stabilize training are: (a) the appropriate pairing of samples for mixing,
280 | i.e. by error per sample (b) propagation of new information in \emph{half-batch}.
281 |
282 | \begin{figure}[H]
283 | \centering
284 | \begin{minipage}{.3\textwidth}
285 | \hspace{-0.65cm}
286 | \includegraphics[totalheight=5.6cm]{figure-1-test-accuracy-without-augment}
287 | \end{minipage}
288 | \begin{minipage}{.65\textwidth}\vspace{-0.00cm}\hspace{0.865cm}
289 | \includegraphics[totalheight=5.6cm]{figure-1-loss-train-without-augment}
290 | \end{minipage}%
291 | \caption{Evaluation on \emph{CIFAR-10}, for \emph{EfficientNet-b0} and
292 | \emph{SGD(weight-decay=10e-4, lr=0.1)} (as
293 | recommended in the \emph{mixup} research), same parameters for each model.
294 | As a result, the models behave differently, although they differ only in the
295 | method of constructing the batch.}
296 | \label{fig:under}
297 | \end{figure}
298 |
299 | Another problem that \emph{mixup} often encounters is very unstable loss
300 | landscape. Therefore, without a well-chosen weight decay, it cannot stabilize in
301 | minimums. To solve this problem, we tune the optimizer parameters
302 | for \emph{mixup}, after that it could achieve a similar result to
303 | \emph{batchboost} (figure \ref{fig:over}).
304 |
305 | \subsection{Overfitting (comparison to \emph{mixup})}
306 | \label{sec:over}
307 |
308 | The most important observation of this section is that \emph{batchboost} retains
309 | the properties of the \emph{mixup} (similarly to \emph{SamplePairing} or
310 | \emph{BC learning}). It protects against overfitting, having slightly better results.
311 |
312 | \begin{figure}[H]
313 | \centering
314 | \begin{minipage}{.3\textwidth}
315 | \hspace{-0.65cm}
316 | \includegraphics[totalheight=5.6cm]{figure-2-train-accuracy-with-augment}
317 | \end{minipage}
318 | \begin{minipage}{.65\textwidth}\vspace{-0.00cm}\hspace{0.865cm}
319 | \includegraphics[totalheight=5.6cm]{figure-2-test-accuracy-with-augment}
320 | \end{minipage}%
321 | \caption{\emph{batchboost} is a new state-of-the-art because it is a slightly
322 | better than \emph{mixup} (here \emph{mixup} has been tuned for best
323 | parameters, \emph{batchboost} uses configuration from figure \ref{fig:under}).}
324 | \label{fig:over}
325 | \end{figure}
326 |
327 | The only difference is that the $\alpha$ coefficient from the original
328 | \emph{mixup} is weakened.
329 |
330 | \subsection{Accelerating Training \& Adversarial Attacks}
331 | \label{sec:attacks}
332 |
333 | In the early stages, it learns faster than a classic \emph{mixup}.
334 | The difference becomes significant when working on very small datasets, e.g.
335 | medical challenges on Kaggle. In this work, we have limited \emph{Fashion-MNIST}
336 | to 64 examples we compared to the classic model and \emph{SamplePairing}. The results were better by 5\$.
337 | When the model perform well at small datasets, it means that training
338 | generalizes problem. On (figure \ref{fig:multipass}) we present samples
339 | generated during this process.
340 |
341 | \begin{figure}[H]
342 | \centering
343 | \includegraphics[width=10.5cm]{figure-multipass}
344 | \caption{More than two samples have been mixed.}
345 | \label{fig:multipass}
346 | \end{figure}
347 |
348 | We tried to modify \emph{batchboost} to generate samples similar to those of
349 | adversarial attacks (by uniformly mixing all samples backward with some Gaussian
350 | noise) without any reasonable results.
351 |
352 | \section{Conclusion}
353 | \label{sec:conclusion}
354 |
355 | Our method is easy to implement and can be used for any
356 | model as an additional BlackBox at input.
357 | It provides stability and slightly better results.
358 | Using \emph{batchboost} is certainly more important in problems with small data sets.
359 | Thanks to the property of avoiding underfitting for misconfigured parameters,
360 | this is a good regularization method for people who want to compare two
361 | architectures without parameter tuning.
362 | Retains all properties of \emph{mixup}, \emph{SamplePairing} and \emph{BC learning}.
363 |
364 | \bibliographystyle{unsrt}
365 | \bibliography{references}
366 |
367 | \end{document}
368 |
--------------------------------------------------------------------------------
/paper/build.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import time
4 |
5 | from watchdog.observers import Observer
6 | from watchdog.events import FileSystemEventHandler
7 |
8 |
9 | class LatexEventHandler(FileSystemEventHandler):
10 | LATEX_FLAGS = "-interaction nonstopmode -halt-on-error -file-line-error"
11 | FILETYPE_INPUT = [".tex"]
12 |
13 | def on_any_event(self, event):
14 | for ext in self.FILETYPE_INPUT:
15 | if event.src_path.endswith(ext):
16 | self.compile(event)
17 |
18 | def compile(self, event):
19 | print("=== LATEX ===")
20 | os.system(f"pdflatex {self.LATEX_FLAGS} {event.src_path}")
21 | os.system(f"md5 {event.src_path}")
22 |
23 |
24 | if __name__ == "__main__":
25 | path = sys.argv[1] if len(sys.argv) > 1 else "."
26 |
27 | observer = Observer()
28 | observer.schedule(LatexEventHandler(), path, recursive=True)
29 | observer.start()
30 |
31 | try:
32 | while True:
33 | time.sleep(1)
34 | except KeyboardInterrupt:
35 | observer.stop()
36 | observer.join()
37 |
--------------------------------------------------------------------------------
/paper/figure-1-loss-train-without-augment.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/figure-1-loss-train-without-augment.pdf
--------------------------------------------------------------------------------
/paper/figure-1-test-accuracy-without-augment.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/figure-1-test-accuracy-without-augment.pdf
--------------------------------------------------------------------------------
/paper/figure-2-test-accuracy-with-augment.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/figure-2-test-accuracy-with-augment.pdf
--------------------------------------------------------------------------------
/paper/figure-2-train-accuracy-with-augment.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/figure-2-train-accuracy-with-augment.pdf
--------------------------------------------------------------------------------
/paper/figure-abstract.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/figure-abstract.pdf
--------------------------------------------------------------------------------
/paper/figure-feeding.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/figure-feeding.pdf
--------------------------------------------------------------------------------
/paper/figure-multipass.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/figure-multipass.png
--------------------------------------------------------------------------------
/paper/notes_v2.md:
--------------------------------------------------------------------------------
1 | # Research Brief (Brief intro of the research (50 + words))
2 |
3 | Batchboost is a simple technique to accelerate ML model training by adaptively feeding mini-batches with artificial samples which are created by mixing two examples from previous step - in favor of pairing those that produce the difficult one.
4 |
5 | # What’s New (What’s new in this research?)
6 |
7 | In this research, we state the hypothesis that mixing many images together can
8 | be more effective than just two. To make it efficient, we propose a new method of
9 | creating mini-batches, where each sample from dataset is propagated with
10 | subsequent iterations with less and less importance until the end of learning
11 | process.
12 |
13 | # How It Works (How this research works?)
14 |
15 | Batchboost pipeline has three stages:
16 | (a) pairing: method of selecting two samples from previous step.
17 | (b) mixing: method of creating a new artificial example from two selected samples.
18 | (c) feeding: constructing training mini-batch with created examples and new samples from dataset (concat with ratio γ).
19 | Note that sample from dataset propagates with subsequent iterations with less and less importance until the end of training.
20 |
21 | Our baseline implements pairing stage as sorting by sample error, where hardest examples are paired with easiest ones. Mixing stage
22 | merges to samples using mixup, x1+(1−λ)x2. Feeding stage combines new samples with ratio 1:1 using concat.
23 |
24 | # Key Insights (What are the main takeaways from this research?)
25 |
26 | The results are promising. Batchboost has 0.5-3% better accuracy than the current state-of-the-art mixup regularization on CIFAR-10 (#10 place in https://paperswithcode.com/) & Fashion-MNIST.
27 | (we hope to see our method in action, for example, on Kaggle as trick to improve a bit test accuracy)
28 |
29 | # Behind The Scenes (Any interesting ideas or research tips you - would like to share with our AI Community?)
30 |
31 | There is a lot to improve in data augmentation and regularization methods.
32 |
33 | # Anything else? (Bottlenecks and future trend?)
34 |
35 | An interesting topic for further research and discussion are
36 | combination of batchboost and existing methods.
37 |
--------------------------------------------------------------------------------
/paper/references.bib:
--------------------------------------------------------------------------------
1 | @article{miyato2018virtual,
2 | title={Virtual adversarial training: a regularization method for supervised and semi-supervised learning},
3 | author={Miyato, Takeru and Maeda, Shin-ichi and Koyama, Masanori and Ishii, Shin},
4 | journal={IEEE transactions on pattern analysis and machine intelligence},
5 | volume={41},
6 | number={8},
7 | pages={1979--1993},
8 | year={2018},
9 | publisher={IEEE}
10 | }
11 |
12 | @article{zhang2016understanding,
13 | title={Understanding deep learning requires rethinking generalization},
14 | author={Zhang, Chiyuan and Bengio, Samy and Hardt, Moritz and Recht, Benjamin and Vinyals, Oriol},
15 | journal={arXiv preprint arXiv:1611.03530},
16 | year={2016}
17 | }
18 |
19 | @inproceedings{krogh1992simple,
20 | title={A simple weight decay can improve generalization},
21 | author={Krogh, Anders and Hertz, John A},
22 | booktitle={Advances in neural information processing systems},
23 | pages={950--957},
24 | year={1992}
25 | }
26 |
27 | @article{zhang2017mixup,
28 | title={mixup: Beyond empirical risk minimization},
29 | author={Zhang, Hongyi and Cisse, Moustapha and Dauphin, Yann N and Lopez-Paz, David},
30 | journal={arXiv preprint arXiv:1710.09412},
31 | year={2017}
32 | }
33 |
34 | @article{ioffe2015batch,
35 | title={Batch normalization: Accelerating deep network training by reducing internal covariate shift},
36 | author={Ioffe, Sergey and Szegedy, Christian},
37 | journal={arXiv preprint arXiv:1502.03167},
38 | year={2015}
39 | }
40 |
41 | @inproceedings{szegedy2016rethinking,
42 | title={Rethinking the inception architecture for computer vision},
43 | author={Szegedy, Christian and Vanhoucke, Vincent and Ioffe, Sergey and Shlens, Jon and Wojna, Zbigniew},
44 | booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
45 | pages={2818--2826},
46 | year={2016}
47 | }
48 |
49 | @article{coleman2017dawnbench,
50 | title={Dawnbench: An end-to-end deep learning benchmark and competition},
51 | author={Coleman, Cody and Narayanan, Deepak and Kang, Daniel and Zhao, Tian and Zhang, Jian and Nardi, Luigi and Bailis, Peter and Olukotun, Kunle and R{\'e}, Chris and Zaharia, Matei},
52 | journal={Training},
53 | volume={100},
54 | number={101},
55 | pages={102},
56 | year={2017}
57 | }
58 |
59 | @article{inoue2018data,
60 | title={Data augmentation by pairing samples for images classification},
61 | author={Inoue, Hiroshi},
62 | journal={arXiv preprint arXiv:1801.02929},
63 | year={2018}
64 | }
65 |
66 | @inproceedings{tokozume2018between,
67 | title={Between-class learning for image classification},
68 | author={Tokozume, Yuji and Ushiku, Yoshitaka and Harada, Tatsuya},
69 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
70 | pages={5486--5494},
71 | year={2018}
72 | }
73 |
74 | @article{xiao2017,
75 | author = {Han Xiao and Kashif Rasul and Roland Vollgraf},
76 | title = {Fashion-MNIST: a Novel Image Dataset for Benchmarking Machine Learning Algorithms},
77 | date = {2017-08-28},
78 | year = {2017},
79 | eprintclass = {cs.LG},
80 | eprinttype = {arXiv},
81 | eprint = {cs.LG/1708.07747},
82 | }
83 |
84 | @article{krizhevsky2009learning,
85 | title={Learning multiple layers of features from tiny images},
86 | author={Krizhevsky, Alex and Hinton, Geoffrey and others},
87 | year={2009},
88 | publisher={Citeseer}
89 | }
90 |
91 | @inproceedings{shaham2019singan,
92 | title={Singan: Learning a generative model from a single natural image},
93 | author={Shaham, Tamar Rott and Dekel, Tali and Michaeli, Tomer},
94 | booktitle={Proceedings of the IEEE International Conference on Computer Vision},
95 | pages={4570--4580},
96 | year={2019}
97 | }
98 |
99 | @article{tan2019efficientnet,
100 | title={Efficientnet: Rethinking model scaling for convolutional neural networks},
101 | author={Tan, Mingxing and Le, Quoc V},
102 | journal={arXiv preprint arXiv:1905.11946},
103 | year={2019}
104 | }
105 |
106 | @article{DBLP:journals/corr/HeZRS15,
107 | author = {Kaiming He and
108 | Xiangyu Zhang and
109 | Shaoqing Ren and
110 | Jian Sun},
111 | title = {Deep Residual Learning for Image Recognition},
112 | journal = {CoRR},
113 | volume = {abs/1512.03385},
114 | year = {2015},
115 | url = {http://arxiv.org/abs/1512.03385},
116 | archivePrefix = {arXiv},
117 | eprint = {1512.03385},
118 | timestamp = {Wed, 17 Apr 2019 17:23:45 +0200},
119 | biburl = {https://dblp.org/rec/bib/journals/corr/HeZRS15},
120 | bibsource = {dblp computer science bibliography, https://dblp.org}
121 | }
122 |
--------------------------------------------------------------------------------
/paper/texput.log:
--------------------------------------------------------------------------------
1 | This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019) (preloaded format=pdflatex 2019.10.12) 21 JAN 2020 02:38
2 | entering extended mode
3 | restricted \write18 enabled.
4 | file:line:error style messages enabled.
5 | %&-line parsing enabled.
6 | **./batchboost_polish.tex
7 |
8 | ! Emergency stop.
9 | <*> ./batchboost_polish.tex
10 |
11 | Here is how much of TeX's memory you used:
12 | 3 strings out of 492616
13 | 112 string characters out of 6129480
14 | 57117 words of memory out of 5000000
15 | 4025 multiletter control sequences out of 15000+600000
16 | 3640 words of font info for 14 fonts, out of 8000000 for 9000
17 | 1141 hyphenation exceptions out of 8191
18 | 0i,0n,0p,1b,6s stack positions out of 5000i,500n,10000p,200000b,80000s
19 |
20 | ! ==> Fatal error occurred, no output PDF file produced!
21 |
--------------------------------------------------------------------------------
/plot.py:
--------------------------------------------------------------------------------
1 | # 1 - without augment alpha=1.0
2 | # 2 - with augment alpha=1.0
3 | # 3 - without augment alpha=0.4
4 | # 4 - with augment alpha=0.4
5 |
6 | import matplotlib.pyplot as plt
7 | import numpy as np
8 | import unidecode
9 | import csv
10 | import re
11 |
12 |
13 | def slugify(text):
14 | text = unidecode.unidecode(text).lower()
15 | text = re.sub(r"[\W_]+", "-", text)
16 | if text[-1] == "-":
17 | return text[0:-1]
18 | return text
19 |
20 |
21 | class figure:
22 | def __init__(self, name=None, prefix=None):
23 | self.name = name
24 | self.prefix = prefix
25 |
26 | def __enter__(self):
27 | print("--- FIGURE ---")
28 | print(f"`{self.name}`")
29 | plt.cla()
30 | plt.title(self.name)
31 |
32 | def __exit__(self, x, y, z):
33 | print("--- SAVE ---")
34 | figure_prefix = "figure-"
35 | if self.prefix is not None:
36 | figure_prefix += f"{str(self.prefix)}-"
37 | fig.savefig(f"figures/{figure_prefix}{slugify(self.name)}.pdf")
38 |
39 |
40 | # (1) better style
41 | plt.style.use(["science", "ieee"])
42 |
43 | fig, ax = plt.subplots()
44 | ax.autoscale(tight=True)
45 |
46 |
47 | def read_file(path="log_EfficientNet_batchboost_1", col=5):
48 | X, Y = [], []
49 | with open(f"results/{path}.csv", "r") as csvfile:
50 | plots = csv.reader(csvfile, delimiter=",")
51 | next(plots, None)
52 | for row in plots:
53 | X.append(int(row[0]))
54 | Y.append(
55 | float(row[col].replace(", device='cuda:0'",
56 | "").replace("tensor(",
57 | "").replace(")", "")))
58 | return X, Y
59 |
60 |
61 | def fill_between(X, Y, color="blue", alpha=0.05, factor=1):
62 | sigma = factor * np.array(Y).std(axis=0) # ls = '--'
63 | ax.fill_between(X, Y + sigma, Y - sigma, facecolor=color, alpha=alpha)
64 |
65 |
66 | ### FIGURE (1): underfitting ###
67 |
68 | with figure("test accuracy (without augment)", prefix=1):
69 | x1, y1 = read_file("decay=1e-4/log_EfficientNet_batchboost_1")
70 | plt.plot(x1, y1, label="boostbatch (alpha=1.0)", color="darkred")
71 |
72 | x1, y1 = read_file("decay=1e-4/log_EfficientNet_batchboost_3")
73 | plt.plot(x1, y1, label="boostbatch (alpha=0.4)", color="red")
74 |
75 | x2, y2 = read_file("decay=1e-4/log_EfficientNet_mixup_1")
76 | plt.plot(x2, y2, label="mixup (alpha=1.0)", color="darkblue")
77 |
78 | x2, y2 = read_file("decay=1e-4/log_EfficientNet_mixup_3")
79 | plt.plot(x2, y2, label="mixup (alpha=0.4)", color="blue")
80 |
81 | x3, y3 = read_file("decay=1e-4/log_EfficientNet_baseline_13")
82 | plt.plot(x3, y3, label="baseline", color="black")
83 |
84 | plt.ylabel("accuracy")
85 | plt.xlabel("epoch")
86 | # plt.legend(loc="center left", bbox_to_anchor=(1, 0.5))
87 |
88 | with figure("loss train (without augment)", prefix=1):
89 | x1a, y1a = read_file("decay=1e-4/log_EfficientNet_batchboost_1", col=1)
90 | plt.plot(x1a, y1a, label="boostbatch (alpha=1.0)", color="darkred")
91 |
92 | x1b, y1b = read_file("decay=1e-4/log_EfficientNet_batchboost_3", col=1)
93 | plt.plot(x1b, y1b, label="boostbatch (alpha=0.4)", color="red")
94 |
95 | fill_between(x1a,
96 | np.mean([y1a, y1b], axis=0),
97 | color="red",
98 | factor=1,
99 | alpha=0.1)
100 |
101 | x2a, y2a = read_file("decay=1e-4/log_EfficientNet_mixup_1", col=1)
102 | plt.plot(x2a, y2a, label="mixup (alpha=1.0)", color="darkblue")
103 |
104 | x2b, y2b = read_file("decay=1e-4/log_EfficientNet_mixup_3", col=1)
105 | plt.plot(x2b, y2b, label="mixup (alpha=0.4)", color="blue")
106 |
107 | fill_between(x2a,
108 | np.mean([y2a, y2b], axis=0),
109 | color="blue",
110 | factor=1,
111 | alpha=0.1)
112 |
113 | x3, y3 = read_file("decay=1e-4/log_EfficientNet_baseline_13", col=1)
114 | plt.plot(x3, y3, label="baseline", color="black")
115 |
116 | plt.ylabel("loss")
117 | plt.xlabel("epoch")
118 | plt.legend(loc="center left", bbox_to_anchor=(1, 0.5))
119 |
120 | ### FIGURE (2): overfitting (compirason to mixup) ###
121 |
122 | with figure("test accuracy (with augment)", prefix=2):
123 | x1a, y1a = read_file("decay=1e-5/log_EfficientNet_batchboost_2")
124 | plt.plot(x1a, y1a, label="boostbatch (alpha=1.0)", color="darkred")
125 |
126 | x1b, y1b = read_file("decay=1e-5/log_EfficientNet_batchboost_4")
127 | plt.plot(x1b, y1b, label="boostbatch (alpha=0.4)", color="red")
128 |
129 | fill_between(x1a,
130 | np.mean([y1a, y1b], axis=0),
131 | color="red",
132 | factor=0.5,
133 | alpha=0.1)
134 |
135 | x2a, y2a = read_file("decay=1e-5/log_EfficientNet_mixup_2")
136 | plt.plot(x2a, y2a, label="mixup (alpha=1.0)", color="darkblue")
137 |
138 | x2b, y2b = read_file("decay=1e-5/log_EfficientNet_mixup_4")
139 | plt.plot(x2b, y2b, label="mixup (alpha=0.4)", color="blue")
140 |
141 | fill_between(x2a,
142 | np.mean([y2a, y2b], axis=0),
143 | color="blue",
144 | factor=0.5,
145 | alpha=0.1)
146 |
147 | # x3, y3 = read_file("decay=1e-5/log_EfficientNet_baseline_24")
148 | # plt.plot(x3, y3, label="baseline", color="black")
149 |
150 | plt.ylabel("accuracy")
151 | plt.xlabel("epoch")
152 | plt.legend(loc="center left", bbox_to_anchor=(1, 0.5))
153 |
154 | with figure("train accuracy (with augment)", prefix=2):
155 | x1, y1 = read_file("decay=1e-5/log_EfficientNet_batchboost_2", col=3)
156 | plt.plot(x1, y1, label="boostbatch (alpha=1.0)", color="darkred")
157 |
158 | x1, y1 = read_file("decay=1e-5/log_EfficientNet_batchboost_4", col=3)
159 | plt.plot(x1, y1, label="boostbatch (alpha=0.4)", color="red")
160 |
161 | x2, y2 = read_file("decay=1e-5/log_EfficientNet_mixup_2", col=3)
162 | plt.plot(x2, y2, label="mixup (alpha=1.0)", color="darkblue")
163 |
164 | x2, y2 = read_file("decay=1e-5/log_EfficientNet_mixup_4", col=3)
165 | plt.plot(x2, y2, label="mixup (alpha=0.4)", color="blue")
166 |
167 | # x3, y3 = read_file("decay=1e-5/log_EfficientNet_baseline_24", col=3)
168 | # plt.plot(x3, y3, label="baseline", color="black")
169 |
170 | plt.ylabel("accuracy")
171 | plt.xlabel("epoch")
172 | # plt.legend(loc="center left", bbox_to_anchor=(1, 0.5))
173 |
--------------------------------------------------------------------------------
/results/decay=1e-4/log_EfficientNet_baseline_13.csv:
--------------------------------------------------------------------------------
1 | epoch,train loss,reg loss,train acc,test loss,test acc
2 | 0,"tensor(2.6055, device='cuda:0')",0.0,tensor(19.8040),"tensor(2.1569, device='cuda:0')",tensor(18.9400)
3 | 1,"tensor(1.8451, device='cuda:0')",0.0,tensor(31.3720),"tensor(2.0787, device='cuda:0')",tensor(25.8400)
4 | 2,"tensor(1.7183, device='cuda:0')",0.0,tensor(36.4540),"tensor(1.8010, device='cuda:0')",tensor(35.2200)
5 | 3,"tensor(1.5847, device='cuda:0')",0.0,tensor(41.5820),"tensor(1.7851, device='cuda:0')",tensor(35.3800)
6 | 4,"tensor(1.5116, device='cuda:0')",0.0,tensor(44.7700),"tensor(1.9935, device='cuda:0')",tensor(31.7000)
7 | 5,"tensor(1.4671, device='cuda:0')",0.0,tensor(46.4440),"tensor(1.7445, device='cuda:0')",tensor(36.8900)
8 | 6,"tensor(1.4378, device='cuda:0')",0.0,tensor(47.5760),"tensor(1.5938, device='cuda:0')",tensor(43.9800)
9 | 7,"tensor(1.3857, device='cuda:0')",0.0,tensor(49.7040),"tensor(1.4793, device='cuda:0')",tensor(46.5200)
10 | 8,"tensor(1.3346, device='cuda:0')",0.0,tensor(51.8020),"tensor(1.3809, device='cuda:0')",tensor(51.0100)
11 | 9,"tensor(1.2833, device='cuda:0')",0.0,tensor(53.8740),"tensor(1.5019, device='cuda:0')",tensor(45.8600)
12 | 10,"tensor(1.2222, device='cuda:0')",0.0,tensor(56.4380),"tensor(1.4066, device='cuda:0')",tensor(49.5100)
13 | 11,"tensor(1.2230, device='cuda:0')",0.0,tensor(56.4160),"tensor(1.2856, device='cuda:0')",tensor(54.3400)
14 | 12,"tensor(1.2277, device='cuda:0')",0.0,tensor(56.4400),"tensor(1.6038, device='cuda:0')",tensor(46.1800)
15 | 13,"tensor(1.1926, device='cuda:0')",0.0,tensor(57.5240),"tensor(1.2848, device='cuda:0')",tensor(54.5300)
16 | 14,"tensor(1.1133, device='cuda:0')",0.0,tensor(60.5220),"tensor(1.4080, device='cuda:0')",tensor(51.5900)
17 | 15,"tensor(1.1373, device='cuda:0')",0.0,tensor(59.6840),"tensor(1.4613, device='cuda:0')",tensor(51.1200)
18 | 16,"tensor(1.0855, device='cuda:0')",0.0,tensor(61.9180),"tensor(1.3164, device='cuda:0')",tensor(55.0600)
19 | 17,"tensor(0.9908, device='cuda:0')",0.0,tensor(65.1260),"tensor(1.1663, device='cuda:0')",tensor(59.6400)
20 | 18,"tensor(0.9379, device='cuda:0')",0.0,tensor(67.0500),"tensor(1.1016, device='cuda:0')",tensor(61.4500)
21 | 19,"tensor(0.8914, device='cuda:0')",0.0,tensor(68.8620),"tensor(1.3252, device='cuda:0')",tensor(55.9400)
22 | 20,"tensor(0.8710, device='cuda:0')",0.0,tensor(69.7060),"tensor(0.9524, device='cuda:0')",tensor(67.3000)
23 | 21,"tensor(0.8302, device='cuda:0')",0.0,tensor(71.0920),"tensor(0.9558, device='cuda:0')",tensor(66.6600)
24 | 22,"tensor(0.7480, device='cuda:0')",0.0,tensor(73.9720),"tensor(0.8940, device='cuda:0')",tensor(68.3900)
25 | 23,"tensor(0.7158, device='cuda:0')",0.0,tensor(75.1660),"tensor(0.8884, device='cuda:0')",tensor(69.3300)
26 | 24,"tensor(0.6930, device='cuda:0')",0.0,tensor(75.9360),"tensor(0.8584, device='cuda:0')",tensor(70.1800)
27 | 25,"tensor(0.6635, device='cuda:0')",0.0,tensor(77.0360),"tensor(0.9007, device='cuda:0')",tensor(69.3000)
28 | 26,"tensor(0.6355, device='cuda:0')",0.0,tensor(77.8820),"tensor(0.8464, device='cuda:0')",tensor(71.1100)
29 | 27,"tensor(0.6146, device='cuda:0')",0.0,tensor(78.8440),"tensor(0.8866, device='cuda:0')",tensor(70.1600)
30 | 28,"tensor(0.6022, device='cuda:0')",0.0,tensor(79.0920),"tensor(0.8468, device='cuda:0')",tensor(70.8300)
31 | 29,"tensor(0.5818, device='cuda:0')",0.0,tensor(79.8040),"tensor(0.8185, device='cuda:0')",tensor(71.9300)
32 |
--------------------------------------------------------------------------------
/results/decay=1e-4/log_EfficientNet_baseline_24.csv:
--------------------------------------------------------------------------------
1 | epoch,train loss,reg loss,train acc,test loss,test acc
2 | 0,"tensor(2.3113, device='cuda:0')",0.0,tensor(21.0420),"tensor(2.1255, device='cuda:0')",tensor(23.4300)
3 | 1,"tensor(1.9387, device='cuda:0')",0.0,tensor(28.4900),"tensor(1.9492, device='cuda:0')",tensor(28.3900)
4 | 2,"tensor(1.8972, device='cuda:0')",0.0,tensor(29.2980),"tensor(2.0775, device='cuda:0')",tensor(24.6300)
5 | 3,"tensor(1.8820, device='cuda:0')",0.0,tensor(30.2560),"tensor(1.8191, device='cuda:0')",tensor(33.)
6 | 4,"tensor(1.8923, device='cuda:0')",0.0,tensor(30.0400),"tensor(2.2398, device='cuda:0')",tensor(21.7100)
7 | 5,"tensor(1.8451, device='cuda:0')",0.0,tensor(31.4360),"tensor(1.9829, device='cuda:0')",tensor(27.5600)
8 | 6,"tensor(1.9566, device='cuda:0')",0.0,tensor(27.0860),"tensor(2.0375, device='cuda:0')",tensor(24.5400)
9 | 7,"tensor(1.9607, device='cuda:0')",0.0,tensor(26.0460),"tensor(1.9464, device='cuda:0')",tensor(27.0500)
10 | 8,"tensor(1.9515, device='cuda:0')",0.0,tensor(26.9960),"tensor(2.2721, device='cuda:0')",tensor(16.3300)
11 | 9,"tensor(1.9365, device='cuda:0')",0.0,tensor(26.5980),"tensor(1.8653, device='cuda:0')",tensor(29.7700)
12 | 10,"tensor(1.8948, device='cuda:0')",0.0,tensor(28.5580),"tensor(2.0207, device='cuda:0')",tensor(24.9000)
13 | 11,"tensor(1.9236, device='cuda:0')",0.0,tensor(27.8180),"tensor(2.0142, device='cuda:0')",tensor(27.8200)
14 | 12,"tensor(1.8476, device='cuda:0')",0.0,tensor(31.1720),"tensor(1.8535, device='cuda:0')",tensor(29.2100)
15 | 13,"tensor(1.9081, device='cuda:0')",0.0,tensor(27.9320),"tensor(1.9645, device='cuda:0')",tensor(28.5600)
16 | 14,"tensor(1.7877, device='cuda:0')",0.0,tensor(33.2980),"tensor(2.0359, device='cuda:0')",tensor(25.7000)
17 | 15,"tensor(1.9540, device='cuda:0')",0.0,tensor(26.3880),"tensor(2.0166, device='cuda:0')",tensor(23.4300)
18 | 16,"tensor(2.0129, device='cuda:0')",0.0,tensor(22.7800),"tensor(4.3970, device='cuda:0')",tensor(11.3500)
19 | 17,"tensor(1.8895, device='cuda:0')",0.0,tensor(27.6220),"tensor(1.9178, device='cuda:0')",tensor(29.3400)
20 | 18,"tensor(1.7812, device='cuda:0')",0.0,tensor(32.4420),"tensor(1.8813, device='cuda:0')",tensor(29.7900)
21 | 19,"tensor(1.7666, device='cuda:0')",0.0,tensor(33.9020),"tensor(1.9696, device='cuda:0')",tensor(27.2200)
22 | 20,"tensor(1.7717, device='cuda:0')",0.0,tensor(33.8160),"tensor(1.9529, device='cuda:0')",tensor(28.1800)
23 | 21,"tensor(1.7940, device='cuda:0')",0.0,tensor(33.0480),"tensor(2.6551, device='cuda:0')",tensor(18.9900)
24 | 22,"tensor(1.8595, device='cuda:0')",0.0,tensor(30.2860),"tensor(1.8495, device='cuda:0')",tensor(32.4800)
25 | 23,"tensor(1.7393, device='cuda:0')",0.0,tensor(35.3400),"tensor(1.9325, device='cuda:0')",tensor(30.7600)
26 | 24,"tensor(1.8225, device='cuda:0')",0.0,tensor(32.5100),"tensor(2.7612, device='cuda:0')",tensor(16.9500)
27 | 25,"tensor(1.7819, device='cuda:0')",0.0,tensor(33.8340),"tensor(2.0891, device='cuda:0')",tensor(28.2000)
28 | 26,"tensor(1.7530, device='cuda:0')",0.0,tensor(35.2620),"tensor(3.0886, device='cuda:0')",tensor(15.7500)
29 | 27,"tensor(1.7344, device='cuda:0')",0.0,tensor(35.9620),"tensor(3.1807, device='cuda:0')",tensor(17.5200)
30 | 28,"tensor(1.7010, device='cuda:0')",0.0,tensor(36.9760),"tensor(1.8512, device='cuda:0')",tensor(32.6100)
31 | 29,"tensor(1.5543, device='cuda:0')",0.0,tensor(42.4600),"tensor(1.6730, device='cuda:0')",tensor(38.7000)
32 |
--------------------------------------------------------------------------------
/results/decay=1e-4/log_EfficientNet_batchboost_1.csv:
--------------------------------------------------------------------------------
1 | epoch,train loss,reg loss,train acc,test loss,test acc
2 | 0,"tensor(1.8619, device='cuda:0')",0.0,tensor(33.8036),"tensor(1.3804, device='cuda:0')",tensor(51.5800)
3 | 1,"tensor(1.4982, device='cuda:0')",0.0,tensor(47.6485),"tensor(1.2721, device='cuda:0')",tensor(56.7600)
4 | 2,"tensor(1.3898, device='cuda:0')",0.0,tensor(52.0369),"tensor(1.1117, device='cuda:0')",tensor(64.9300)
5 | 3,"tensor(1.3058, device='cuda:0')",0.0,tensor(55.1306),"tensor(0.9486, device='cuda:0')",tensor(71.2200)
6 | 4,"tensor(1.2607, device='cuda:0')",0.0,tensor(57.0095),"tensor(1.0737, device='cuda:0')",tensor(65.9600)
7 | 5,"tensor(1.2342, device='cuda:0')",0.0,tensor(57.9863),"tensor(1.0367, device='cuda:0')",tensor(67.0500)
8 | 6,"tensor(1.2004, device='cuda:0')",0.0,tensor(59.0677),"tensor(1.0099, device='cuda:0')",tensor(68.6700)
9 | 7,"tensor(1.1755, device='cuda:0')",0.0,tensor(59.9862),"tensor(0.8343, device='cuda:0')",tensor(75.2200)
10 | 8,"tensor(1.1527, device='cuda:0')",0.0,tensor(61.4485),"tensor(0.9027, device='cuda:0')",tensor(72.7400)
11 | 9,"tensor(1.1438, device='cuda:0')",0.0,tensor(61.4357),"tensor(0.8255, device='cuda:0')",tensor(75.1500)
12 | 10,"tensor(1.1069, device='cuda:0')",0.0,tensor(63.1542),"tensor(0.8173, device='cuda:0')",tensor(74.4400)
13 | 11,"tensor(1.1128, device='cuda:0')",0.0,tensor(62.5874),"tensor(0.8894, device='cuda:0')",tensor(74.8200)
14 | 12,"tensor(1.1052, device='cuda:0')",0.0,tensor(63.1480),"tensor(0.7840, device='cuda:0')",tensor(76.5000)
15 | 13,"tensor(1.0902, device='cuda:0')",0.0,tensor(63.5684),"tensor(0.9525, device='cuda:0')",tensor(74.1600)
16 | 14,"tensor(1.0729, device='cuda:0')",0.0,tensor(64.0899),"tensor(0.7787, device='cuda:0')",tensor(75.4700)
17 | 15,"tensor(1.0845, device='cuda:0')",0.0,tensor(63.7330),"tensor(0.7887, device='cuda:0')",tensor(75.8800)
18 | 16,"tensor(1.0568, device='cuda:0')",0.0,tensor(64.9602),"tensor(0.8254, device='cuda:0')",tensor(77.1600)
19 | 17,"tensor(1.0605, device='cuda:0')",0.0,tensor(64.6255),"tensor(0.7816, device='cuda:0')",tensor(75.3900)
20 | 18,"tensor(1.0434, device='cuda:0')",0.0,tensor(65.2123),"tensor(0.8675, device='cuda:0')",tensor(74.2000)
21 | 19,"tensor(1.0560, device='cuda:0')",0.0,tensor(64.7336),"tensor(0.7365, device='cuda:0')",tensor(77.9200)
22 | 20,"tensor(1.0462, device='cuda:0')",0.0,tensor(65.3066),"tensor(0.7319, device='cuda:0')",tensor(77.6900)
23 | 21,"tensor(1.0569, device='cuda:0')",0.0,tensor(64.3693),"tensor(0.8474, device='cuda:0')",tensor(73.5000)
24 | 22,"tensor(1.0264, device='cuda:0')",0.0,tensor(65.7328),"tensor(0.7160, device='cuda:0')",tensor(78.1200)
25 | 23,"tensor(1.0208, device='cuda:0')",0.0,tensor(66.2139),"tensor(0.7352, device='cuda:0')",tensor(76.9700)
26 | 24,"tensor(1.0331, device='cuda:0')",0.0,tensor(65.7051),"tensor(0.7542, device='cuda:0')",tensor(76.3700)
27 | 25,"tensor(1.0031, device='cuda:0')",0.0,tensor(66.7205),"tensor(0.7212, device='cuda:0')",tensor(77.5600)
28 | 26,"tensor(1.0169, device='cuda:0')",0.0,tensor(66.5317),"tensor(0.7610, device='cuda:0')",tensor(77.4400)
29 | 27,"tensor(1.0109, device='cuda:0')",0.0,tensor(66.3433),"tensor(0.8155, device='cuda:0')",tensor(74.9300)
30 | 28,"tensor(1.0128, device='cuda:0')",0.0,tensor(66.4252),"tensor(0.7375, device='cuda:0')",tensor(78.9300)
31 | 29,"tensor(1.0011, device='cuda:0')",0.0,tensor(67.0343),"tensor(0.7180, device='cuda:0')",tensor(78.8500)
32 |
--------------------------------------------------------------------------------
/results/decay=1e-4/log_EfficientNet_batchboost_2.csv:
--------------------------------------------------------------------------------
1 | epoch,train loss,reg loss,train acc,test loss,test acc
2 | 0,"tensor(1.7333, device='cuda:0')",0.0,tensor(39.3799),"tensor(1.1499, device='cuda:0')",tensor(61.7400)
3 | 1,"tensor(1.3994, device='cuda:0')",0.0,tensor(51.3792),"tensor(0.9294, device='cuda:0')",tensor(69.9400)
4 | 2,"tensor(1.3037, device='cuda:0')",0.0,tensor(55.3760),"tensor(0.8431, device='cuda:0')",tensor(72.7200)
5 | 3,"tensor(1.2557, device='cuda:0')",0.0,tensor(56.8880),"tensor(0.9415, device='cuda:0')",tensor(70.2800)
6 | 4,"tensor(1.2112, device='cuda:0')",0.0,tensor(58.3391),"tensor(0.8276, device='cuda:0')",tensor(74.7100)
7 | 5,"tensor(1.1966, device='cuda:0')",0.0,tensor(58.7091),"tensor(0.9025, device='cuda:0')",tensor(76.0500)
8 | 6,"tensor(1.1778, device='cuda:0')",0.0,tensor(59.5382),"tensor(0.8087, device='cuda:0')",tensor(76.6200)
9 | 7,"tensor(1.1527, device='cuda:0')",0.0,tensor(60.4451),"tensor(0.8012, device='cuda:0')",tensor(76.3000)
10 | 8,"tensor(1.1488, device='cuda:0')",0.0,tensor(60.5774),"tensor(0.7704, device='cuda:0')",tensor(77.2800)
11 | 9,"tensor(1.1375, device='cuda:0')",0.0,tensor(61.0278),"tensor(0.8168, device='cuda:0')",tensor(75.4900)
12 | 10,"tensor(1.1269, device='cuda:0')",0.0,tensor(61.5662),"tensor(0.6920, device='cuda:0')",tensor(79.1700)
13 | 11,"tensor(1.1198, device='cuda:0')",0.0,tensor(61.5416),"tensor(0.8677, device='cuda:0')",tensor(75.0100)
14 | 12,"tensor(1.1440, device='cuda:0')",0.0,tensor(60.5554),"tensor(0.8204, device='cuda:0')",tensor(77.2300)
15 | 13,"tensor(1.1188, device='cuda:0')",0.0,tensor(61.5085),"tensor(0.8043, device='cuda:0')",tensor(79.0300)
16 | 14,"tensor(1.1159, device='cuda:0')",0.0,tensor(61.5262),"tensor(0.7955, device='cuda:0')",tensor(78.3400)
17 | 15,"tensor(1.1178, device='cuda:0')",0.0,tensor(61.5627),"tensor(0.8294, device='cuda:0')",tensor(78.3200)
18 | 16,"tensor(1.1095, device='cuda:0')",0.0,tensor(61.8756),"tensor(0.7635, device='cuda:0')",tensor(80.1700)
19 | 17,"tensor(1.0935, device='cuda:0')",0.0,tensor(62.4646),"tensor(0.9610, device='cuda:0')",tensor(77.0600)
20 | 18,"tensor(1.0986, device='cuda:0')",0.0,tensor(62.2386),"tensor(0.7500, device='cuda:0')",tensor(78.7500)
21 | 19,"tensor(1.1026, device='cuda:0')",0.0,tensor(62.0449),"tensor(0.9191, device='cuda:0')",tensor(74.9000)
22 | 20,"tensor(1.1038, device='cuda:0')",0.0,tensor(61.9949),"tensor(0.7838, device='cuda:0')",tensor(80.7900)
23 | 21,"tensor(1.0884, device='cuda:0')",0.0,tensor(62.6618),"tensor(0.7176, device='cuda:0')",tensor(79.9900)
24 | 22,"tensor(1.0818, device='cuda:0')",0.0,tensor(63.2573),"tensor(0.7445, device='cuda:0')",tensor(80.0500)
25 | 23,"tensor(1.0898, device='cuda:0')",0.0,tensor(62.4544),"tensor(0.7628, device='cuda:0')",tensor(77.1100)
26 | 24,"tensor(1.0846, device='cuda:0')",0.0,tensor(62.7730),"tensor(0.6846, device='cuda:0')",tensor(79.1600)
27 | 25,"tensor(1.0942, device='cuda:0')",0.0,tensor(62.2764),"tensor(0.7224, device='cuda:0')",tensor(82.3300)
28 | 26,"tensor(1.0587, device='cuda:0')",0.0,tensor(63.9383),"tensor(0.8021, device='cuda:0')",tensor(79.6800)
29 | 27,"tensor(1.0782, device='cuda:0')",0.0,tensor(62.7933),"tensor(0.6434, device='cuda:0')",tensor(81.9000)
30 | 28,"tensor(1.0600, device='cuda:0')",0.0,tensor(63.8398),"tensor(0.7467, device='cuda:0')",tensor(81.3600)
31 | 29,"tensor(1.0518, device='cuda:0')",0.0,tensor(64.3532),"tensor(0.6855, device='cuda:0')",tensor(81.1100)
32 |
--------------------------------------------------------------------------------
/results/decay=1e-4/log_EfficientNet_batchboost_3.csv:
--------------------------------------------------------------------------------
1 | epoch,train loss,reg loss,train acc,test loss,test acc
2 | 0,"tensor(1.8809, device='cuda:0')",0.0,tensor(34.8699),"tensor(1.3911, device='cuda:0')",tensor(52.6000)
3 | 1,"tensor(1.5065, device='cuda:0')",0.0,tensor(48.1545),"tensor(1.3242, device='cuda:0')",tensor(57.3100)
4 | 2,"tensor(1.3962, device='cuda:0')",0.0,tensor(52.7664),"tensor(1.1488, device='cuda:0')",tensor(62.4500)
5 | 3,"tensor(1.3444, device='cuda:0')",0.0,tensor(55.5993),"tensor(1.0894, device='cuda:0')",tensor(66.3500)
6 | 4,"tensor(1.2820, device='cuda:0')",0.0,tensor(57.0650),"tensor(1.2615, device='cuda:0')",tensor(57.5000)
7 | 5,"tensor(1.2676, device='cuda:0')",0.0,tensor(57.9336),"tensor(0.9373, device='cuda:0')",tensor(71.1500)
8 | 6,"tensor(1.2373, device='cuda:0')",0.0,tensor(58.6707),"tensor(1.1060, device='cuda:0')",tensor(68.0700)
9 | 7,"tensor(1.1939, device='cuda:0')",0.0,tensor(60.6067),"tensor(1.0972, device='cuda:0')",tensor(67.8900)
10 | 8,"tensor(1.1939, device='cuda:0')",0.0,tensor(60.5827),"tensor(0.9282, device='cuda:0')",tensor(73.8600)
11 | 9,"tensor(1.1518, device='cuda:0')",0.0,tensor(62.1907),"tensor(0.9273, device='cuda:0')",tensor(72.4700)
12 | 10,"tensor(1.1649, device='cuda:0')",0.0,tensor(61.7164),"tensor(0.9758, device='cuda:0')",tensor(69.7300)
13 | 11,"tensor(1.1431, device='cuda:0')",0.0,tensor(62.9341),"tensor(0.9097, device='cuda:0')",tensor(74.4200)
14 | 12,"tensor(1.1476, device='cuda:0')",0.0,tensor(62.7877),"tensor(0.9629, device='cuda:0')",tensor(73.8700)
15 | 13,"tensor(1.1407, device='cuda:0')",0.0,tensor(63.4019),"tensor(0.8294, device='cuda:0')",tensor(75.0800)
16 | 14,"tensor(1.1065, device='cuda:0')",0.0,tensor(64.3079),"tensor(1.1083, device='cuda:0')",tensor(65.9900)
17 | 15,"tensor(1.1262, device='cuda:0')",0.0,tensor(63.1819),"tensor(0.9628, device='cuda:0')",tensor(69.4500)
18 | 16,"tensor(1.1226, device='cuda:0')",0.0,tensor(64.0045),"tensor(0.8510, device='cuda:0')",tensor(77.3200)
19 | 17,"tensor(1.0906, device='cuda:0')",0.0,tensor(64.7494),"tensor(0.9986, device='cuda:0')",tensor(74.8900)
20 | 18,"tensor(1.1165, device='cuda:0')",0.0,tensor(63.7127),"tensor(0.8170, device='cuda:0')",tensor(76.5200)
21 | 19,"tensor(1.0888, device='cuda:0')",0.0,tensor(65.2943),"tensor(0.7733, device='cuda:0')",tensor(78.0600)
22 | 20,"tensor(1.0663, device='cuda:0')",0.0,tensor(65.6215),"tensor(0.9420, device='cuda:0')",tensor(75.6900)
23 | 21,"tensor(1.0590, device='cuda:0')",0.0,tensor(66.2072),"tensor(0.9275, device='cuda:0')",tensor(74.3500)
24 | 22,"tensor(1.0551, device='cuda:0')",0.0,tensor(66.5175),"tensor(0.7751, device='cuda:0')",tensor(76.9400)
25 | 23,"tensor(1.0443, device='cuda:0')",0.0,tensor(66.5285),"tensor(0.8900, device='cuda:0')",tensor(74.5400)
26 | 24,"tensor(1.0459, device='cuda:0')",0.0,tensor(67.2152),"tensor(0.8153, device='cuda:0')",tensor(76.8400)
27 | 25,"tensor(1.0383, device='cuda:0')",0.0,tensor(66.9208),"tensor(0.8578, device='cuda:0')",tensor(78.5000)
28 | 26,"tensor(1.0275, device='cuda:0')",0.0,tensor(66.9116),"tensor(0.8216, device='cuda:0')",tensor(76.6500)
29 | 27,"tensor(1.0292, device='cuda:0')",0.0,tensor(67.0629),"tensor(0.8585, device='cuda:0')",tensor(75.7100)
30 | 28,"tensor(1.0197, device='cuda:0')",0.0,tensor(67.4291),"tensor(0.8267, device='cuda:0')",tensor(77.0400)
31 | 29,"tensor(1.0209, device='cuda:0')",0.0,tensor(67.4399),"tensor(0.8536, device='cuda:0')",tensor(77.8800)
32 |
--------------------------------------------------------------------------------
/results/decay=1e-4/log_EfficientNet_batchboost_4.csv:
--------------------------------------------------------------------------------
1 | epoch,train loss,reg loss,train acc,test loss,test acc
2 | 0,"tensor(1.8195, device='cuda:0')",0.0,tensor(37.0720),"tensor(1.2780, device='cuda:0')",tensor(57.6400)
3 | 1,"tensor(1.4345, device='cuda:0')",0.0,tensor(50.7976),"tensor(1.5002, device='cuda:0')",tensor(53.0400)
4 | 2,"tensor(1.3146, device='cuda:0')",0.0,tensor(55.4534),"tensor(1.0494, device='cuda:0')",tensor(69.7100)
5 | 3,"tensor(1.2690, device='cuda:0')",0.0,tensor(56.8110),"tensor(0.9850, device='cuda:0')",tensor(72.5800)
6 | 4,"tensor(1.2196, device='cuda:0')",0.0,tensor(58.6434),"tensor(0.9985, device='cuda:0')",tensor(75.2000)
7 | 5,"tensor(1.2061, device='cuda:0')",0.0,tensor(58.8863),"tensor(1.4348, device='cuda:0')",tensor(57.9500)
8 | 6,"tensor(1.1976, device='cuda:0')",0.0,tensor(59.3146),"tensor(1.0232, device='cuda:0')",tensor(72.1400)
9 | 7,"tensor(1.1794, device='cuda:0')",0.0,tensor(59.9166),"tensor(0.9893, device='cuda:0')",tensor(73.5000)
10 | 8,"tensor(1.1581, device='cuda:0')",0.0,tensor(60.9214),"tensor(1.0609, device='cuda:0')",tensor(73.5500)
11 | 9,"tensor(1.1470, device='cuda:0')",0.0,tensor(61.0844),"tensor(0.8182, device='cuda:0')",tensor(77.2100)
12 | 10,"tensor(1.1350, device='cuda:0')",0.0,tensor(61.7524),"tensor(0.9231, device='cuda:0')",tensor(76.4500)
13 | 11,"tensor(1.1393, device='cuda:0')",0.0,tensor(61.7179),"tensor(0.8716, device='cuda:0')",tensor(75.6600)
14 | 12,"tensor(1.1489, device='cuda:0')",0.0,tensor(60.7491),"tensor(0.9923, device='cuda:0')",tensor(73.5100)
15 | 13,"tensor(1.1502, device='cuda:0')",0.0,tensor(61.0766),"tensor(1.0965, device='cuda:0')",tensor(66.0500)
16 | 14,"tensor(1.1292, device='cuda:0')",0.0,tensor(61.5851),"tensor(0.9852, device='cuda:0')",tensor(76.9900)
17 | 15,"tensor(1.1075, device='cuda:0')",0.0,tensor(62.6161),"tensor(0.7420, device='cuda:0')",tensor(79.5800)
18 | 16,"tensor(1.1258, device='cuda:0')",0.0,tensor(62.1972),"tensor(1.0127, device='cuda:0')",tensor(74.4300)
19 | 17,"tensor(1.1076, device='cuda:0')",0.0,tensor(62.2725),"tensor(0.7605, device='cuda:0')",tensor(81.1600)
20 | 18,"tensor(1.0996, device='cuda:0')",0.0,tensor(62.6641),"tensor(0.7235, device='cuda:0')",tensor(81.5700)
21 | 19,"tensor(1.1162, device='cuda:0')",0.0,tensor(62.1934),"tensor(0.7670, device='cuda:0')",tensor(78.8400)
22 | 20,"tensor(1.0989, device='cuda:0')",0.0,tensor(62.4432),"tensor(0.9550, device='cuda:0')",tensor(76.0800)
23 | 21,"tensor(1.1023, device='cuda:0')",0.0,tensor(62.6439),"tensor(0.6734, device='cuda:0')",tensor(81.0800)
24 | 22,"tensor(1.1070, device='cuda:0')",0.0,tensor(61.7068),"tensor(0.7493, device='cuda:0')",tensor(80.0400)
25 | 23,"tensor(1.0937, device='cuda:0')",0.0,tensor(62.6318),"tensor(0.7509, device='cuda:0')",tensor(78.7200)
26 | 24,"tensor(1.0963, device='cuda:0')",0.0,tensor(63.1412),"tensor(0.7149, device='cuda:0')",tensor(78.7300)
27 | 25,"tensor(1.0731, device='cuda:0')",0.0,tensor(63.3816),"tensor(0.7929, device='cuda:0')",tensor(78.1600)
28 | 26,"tensor(1.0687, device='cuda:0')",0.0,tensor(63.5898),"tensor(0.7151, device='cuda:0')",tensor(80.9300)
29 | 27,"tensor(1.0815, device='cuda:0')",0.0,tensor(63.2100),"tensor(0.6483, device='cuda:0')",tensor(81.8100)
30 | 28,"tensor(1.0759, device='cuda:0')",0.0,tensor(63.6988),"tensor(0.7087, device='cuda:0')",tensor(81.4700)
31 | 29,"tensor(1.0640, device='cuda:0')",0.0,tensor(64.1290),"tensor(0.7718, device='cuda:0')",tensor(80.0900)
32 |
--------------------------------------------------------------------------------
/results/decay=1e-4/log_EfficientNet_mixup_1.csv:
--------------------------------------------------------------------------------
1 | epoch,train loss,reg loss,train acc,test loss,test acc
2 | 0,"tensor(2.2877, device='cuda:0')",0.0,tensor(21.4552),"tensor(1.8986, device='cuda:0')",tensor(30.5100)
3 | 1,"tensor(1.9524, device='cuda:0')",0.0,tensor(30.9192),"tensor(1.7501, device='cuda:0')",tensor(38.8000)
4 | 2,"tensor(1.8855, device='cuda:0')",0.0,tensor(34.1426),"tensor(1.8834, device='cuda:0')",tensor(33.3300)
5 | 3,"tensor(1.8253, device='cuda:0')",0.0,tensor(37.0241),"tensor(1.6134, device='cuda:0')",tensor(45.4500)
6 | 4,"tensor(1.9348, device='cuda:0')",0.0,tensor(31.7827),"tensor(1.9646, device='cuda:0')",tensor(27.4900)
7 | 5,"tensor(1.8398, device='cuda:0')",0.0,tensor(36.3115),"tensor(1.5600, device='cuda:0')",tensor(49.8100)
8 | 6,"tensor(1.7886, device='cuda:0')",0.0,tensor(38.7104),"tensor(1.6014, device='cuda:0')",tensor(44.6800)
9 | 7,"tensor(1.7828, device='cuda:0')",0.0,tensor(39.3375),"tensor(1.5151, device='cuda:0')",tensor(49.4600)
10 | 8,"tensor(1.7540, device='cuda:0')",0.0,tensor(40.5212),"tensor(1.6683, device='cuda:0')",tensor(44.1300)
11 | 9,"tensor(1.7623, device='cuda:0')",0.0,tensor(39.9576),"tensor(1.6402, device='cuda:0')",tensor(43.8600)
12 | 10,"tensor(1.7885, device='cuda:0')",0.0,tensor(38.9384),"tensor(2.4766, device='cuda:0')",tensor(15.3800)
13 | 11,"tensor(1.8959, device='cuda:0')",0.0,tensor(33.9427),"tensor(1.6782, device='cuda:0')",tensor(41.1700)
14 | 12,"tensor(1.8126, device='cuda:0')",0.0,tensor(37.8754),"tensor(1.5495, device='cuda:0')",tensor(46.3900)
15 | 13,"tensor(1.7411, device='cuda:0')",0.0,tensor(41.1904),"tensor(1.5361, device='cuda:0')",tensor(48.6700)
16 | 14,"tensor(1.7836, device='cuda:0')",0.0,tensor(39.3387),"tensor(1.5355, device='cuda:0')",tensor(50.3900)
17 | 15,"tensor(1.7489, device='cuda:0')",0.0,tensor(40.8408),"tensor(1.5699, device='cuda:0')",tensor(45.6300)
18 | 16,"tensor(1.7543, device='cuda:0')",0.0,tensor(40.5420),"tensor(2.2198, device='cuda:0')",tensor(43.3000)
19 | 17,"tensor(1.7222, device='cuda:0')",0.0,tensor(42.4087),"tensor(1.4898, device='cuda:0')",tensor(51.3100)
20 | 18,"tensor(1.8517, device='cuda:0')",0.0,tensor(36.3109),"tensor(1.5565, device='cuda:0')",tensor(46.4700)
21 | 19,"tensor(1.8607, device='cuda:0')",0.0,tensor(35.7716),"tensor(1.7921, device='cuda:0')",tensor(37.4400)
22 | 20,"tensor(1.7999, device='cuda:0')",0.0,tensor(38.6506),"tensor(1.4950, device='cuda:0')",tensor(49.7900)
23 | 21,"tensor(1.7446, device='cuda:0')",0.0,tensor(41.0905),"tensor(1.4899, device='cuda:0')",tensor(49.6300)
24 | 22,"tensor(1.6839, device='cuda:0')",0.0,tensor(43.4721),"tensor(1.5370, device='cuda:0')",tensor(48.2100)
25 | 23,"tensor(1.7687, device='cuda:0')",0.0,tensor(40.1902),"tensor(1.4819, device='cuda:0')",tensor(50.0900)
26 | 24,"tensor(1.7312, device='cuda:0')",0.0,tensor(41.9068),"tensor(1.4197, device='cuda:0')",tensor(53.6700)
27 | 25,"tensor(1.7332, device='cuda:0')",0.0,tensor(41.6716),"tensor(1.2940, device='cuda:0')",tensor(58.0100)
28 | 26,"tensor(1.6708, device='cuda:0')",0.0,tensor(44.7846),"tensor(1.3072, device='cuda:0')",tensor(57.9900)
29 | 27,"tensor(1.6245, device='cuda:0')",0.0,tensor(46.8689),"tensor(1.2321, device='cuda:0')",tensor(61.3700)
30 | 28,"tensor(1.6105, device='cuda:0')",0.0,tensor(47.2492),"tensor(1.6507, device='cuda:0')",tensor(42.4700)
31 | 29,"tensor(1.6017, device='cuda:0')",0.0,tensor(47.6939),"tensor(1.2439, device='cuda:0')",tensor(61.4700)
32 |
--------------------------------------------------------------------------------
/results/decay=1e-4/log_EfficientNet_mixup_2.csv:
--------------------------------------------------------------------------------
1 | epoch,train loss,reg loss,train acc,test loss,test acc
2 | 0,"tensor(2.5988, device='cuda:0')",0.0,tensor(16.4150),"tensor(2.1065, device='cuda:0')",tensor(24.0700)
3 | 1,"tensor(2.1234, device='cuda:0')",0.0,tensor(22.1954),"tensor(1.9791, device='cuda:0')",tensor(26.8100)
4 | 2,"tensor(2.0674, device='cuda:0')",0.0,tensor(24.8577),"tensor(1.9732, device='cuda:0')",tensor(28.4500)
5 | 3,"tensor(2.0405, device='cuda:0')",0.0,tensor(26.0568),"tensor(2.0376, device='cuda:0')",tensor(22.8200)
6 | 4,"tensor(1.9996, device='cuda:0')",0.0,tensor(28.6330),"tensor(1.8742, device='cuda:0')",tensor(32.5600)
7 | 5,"tensor(1.9977, device='cuda:0')",0.0,tensor(28.8430),"tensor(2.1198, device='cuda:0')",tensor(20.6700)
8 | 6,"tensor(2.0493, device='cuda:0')",0.0,tensor(25.7799),"tensor(2.2385, device='cuda:0')",tensor(19.0900)
9 | 7,"tensor(2.0589, device='cuda:0')",0.0,tensor(25.2904),"tensor(3.0567, device='cuda:0')",tensor(13.4100)
10 | 8,"tensor(2.0175, device='cuda:0')",0.0,tensor(27.3789),"tensor(1.8305, device='cuda:0')",tensor(35.8500)
11 | 9,"tensor(2.0298, device='cuda:0')",0.0,tensor(26.2188),"tensor(1.9093, device='cuda:0')",tensor(28.9200)
12 | 10,"tensor(2.0976, device='cuda:0')",0.0,tensor(22.3591),"tensor(1.9503, device='cuda:0')",tensor(28.7000)
13 | 11,"tensor(2.0748, device='cuda:0')",0.0,tensor(24.0372),"tensor(2.0584, device='cuda:0')",tensor(23.9000)
14 | 12,"tensor(2.0370, device='cuda:0')",0.0,tensor(25.6861),"tensor(1.7837, device='cuda:0')",tensor(35.7100)
15 | 13,"tensor(2.0515, device='cuda:0')",0.0,tensor(24.8760),"tensor(1.9619, device='cuda:0')",tensor(31.7300)
16 | 14,"tensor(2.0787, device='cuda:0')",0.0,tensor(23.7376),"tensor(2.0201, device='cuda:0')",tensor(26.3000)
17 | 15,"tensor(2.0419, device='cuda:0')",0.0,tensor(25.5730),"tensor(1.8605, device='cuda:0')",tensor(28.4800)
18 | 16,"tensor(2.0032, device='cuda:0')",0.0,tensor(27.5435),"tensor(2.5678, device='cuda:0')",tensor(20.5900)
19 | 17,"tensor(2.0786, device='cuda:0')",0.0,tensor(23.8160),"tensor(1.9983, device='cuda:0')",tensor(25.7300)
20 | 18,"tensor(2.0086, device='cuda:0')",0.0,tensor(27.0779),"tensor(1.8201, device='cuda:0')",tensor(34.4400)
21 | 19,"tensor(1.9817, device='cuda:0')",0.0,tensor(28.8634),"tensor(1.7823, device='cuda:0')",tensor(36.3100)
22 | 20,"tensor(1.9784, device='cuda:0')",0.0,tensor(28.6708),"tensor(2.1509, device='cuda:0')",tensor(21.9000)
23 | 21,"tensor(1.9974, device='cuda:0')",0.0,tensor(28.1345),"tensor(1.8063, device='cuda:0')",tensor(35.5100)
24 | 22,"tensor(1.9424, device='cuda:0')",0.0,tensor(30.4217),"tensor(1.7292, device='cuda:0')",tensor(39.7000)
25 | 23,"tensor(1.9676, device='cuda:0')",0.0,tensor(29.5585),"tensor(1.7449, device='cuda:0')",tensor(36.6700)
26 | 24,"tensor(1.9825, device='cuda:0')",0.0,tensor(28.9549),"tensor(2.0882, device='cuda:0')",tensor(24.8400)
27 | 25,"tensor(2.0198, device='cuda:0')",0.0,tensor(27.3775),"tensor(1.8630, device='cuda:0')",tensor(33.4500)
28 | 26,"tensor(1.9847, device='cuda:0')",0.0,tensor(28.6669),"tensor(1.8658, device='cuda:0')",tensor(35.1400)
29 | 27,"tensor(1.9420, device='cuda:0')",0.0,tensor(30.4228),"tensor(1.6885, device='cuda:0')",tensor(40.3000)
30 | 28,"tensor(1.9979, device='cuda:0')",0.0,tensor(27.8036),"tensor(1.9690, device='cuda:0')",tensor(26.8700)
31 | 29,"tensor(2.0084, device='cuda:0')",0.0,tensor(26.5612),"tensor(1.9115, device='cuda:0')",tensor(30.1100)
32 |
--------------------------------------------------------------------------------
/results/decay=1e-4/log_EfficientNet_mixup_3.csv:
--------------------------------------------------------------------------------
1 | epoch,train loss,reg loss,train acc,test loss,test acc
2 | 0,"tensor(2.2490, device='cuda:0')",0.0,tensor(21.8189),"tensor(2.0380, device='cuda:0')",tensor(27.1200)
3 | 1,"tensor(1.9387, device='cuda:0')",0.0,tensor(29.3909),"tensor(1.9303, device='cuda:0')",tensor(31.1300)
4 | 2,"tensor(1.8109, device='cuda:0')",0.0,tensor(36.0116),"tensor(1.6713, device='cuda:0')",tensor(39.9300)
5 | 3,"tensor(1.7671, device='cuda:0')",0.0,tensor(38.3374),"tensor(1.6934, device='cuda:0')",tensor(40.4100)
6 | 4,"tensor(1.7966, device='cuda:0')",0.0,tensor(37.2334),"tensor(1.8982, device='cuda:0')",tensor(31.0800)
7 | 5,"tensor(1.7525, device='cuda:0')",0.0,tensor(38.9498),"tensor(1.7087, device='cuda:0')",tensor(39.7500)
8 | 6,"tensor(1.7477, device='cuda:0')",0.0,tensor(39.3013),"tensor(1.9661, device='cuda:0')",tensor(27.9700)
9 | 7,"tensor(1.9798, device='cuda:0')",0.0,tensor(27.6844),"tensor(2.6448, device='cuda:0')",tensor(15.8900)
10 | 8,"tensor(1.9137, device='cuda:0')",0.0,tensor(30.8651),"tensor(1.8522, device='cuda:0')",tensor(32.0600)
11 | 9,"tensor(1.8364, device='cuda:0')",0.0,tensor(34.7881),"tensor(1.7626, device='cuda:0')",tensor(35.0900)
12 | 10,"tensor(1.7537, device='cuda:0')",0.0,tensor(38.3701),"tensor(1.8617, device='cuda:0')",tensor(32.2400)
13 | 11,"tensor(1.7696, device='cuda:0')",0.0,tensor(38.2061),"tensor(2.3542, device='cuda:0')",tensor(21.3600)
14 | 12,"tensor(1.7851, device='cuda:0')",0.0,tensor(37.1738),"tensor(1.8924, device='cuda:0')",tensor(31.7200)
15 | 13,"tensor(1.8270, device='cuda:0')",0.0,tensor(35.4236),"tensor(2.1773, device='cuda:0')",tensor(21.2600)
16 | 14,"tensor(1.8555, device='cuda:0')",0.0,tensor(33.7255),"tensor(2.0021, device='cuda:0')",tensor(28.4100)
17 | 15,"tensor(1.7799, device='cuda:0')",0.0,tensor(37.4785),"tensor(2.4709, device='cuda:0')",tensor(19.6500)
18 | 16,"tensor(1.8668, device='cuda:0')",0.0,tensor(33.5824),"tensor(1.7088, device='cuda:0')",tensor(39.9200)
19 | 17,"tensor(1.8082, device='cuda:0')",0.0,tensor(36.2666),"tensor(1.9701, device='cuda:0')",tensor(34.8700)
20 | 18,"tensor(1.7453, device='cuda:0')",0.0,tensor(39.2025),"tensor(1.6074, device='cuda:0')",tensor(41.0800)
21 | 19,"tensor(1.7906, device='cuda:0')",0.0,tensor(37.2583),"tensor(1.8329, device='cuda:0')",tensor(33.3000)
22 | 20,"tensor(1.8071, device='cuda:0')",0.0,tensor(36.7620),"tensor(1.7982, device='cuda:0')",tensor(36.2900)
23 | 21,"tensor(1.8230, device='cuda:0')",0.0,tensor(35.6204),"tensor(1.9685, device='cuda:0')",tensor(28.9000)
24 | 22,"tensor(1.7635, device='cuda:0')",0.0,tensor(38.3622),"tensor(1.7303, device='cuda:0')",tensor(38.0600)
25 | 23,"tensor(1.7182, device='cuda:0')",0.0,tensor(40.4985),"tensor(1.8462, device='cuda:0')",tensor(34.1400)
26 | 24,"tensor(1.6942, device='cuda:0')",0.0,tensor(41.4110),"tensor(1.5842, device='cuda:0')",tensor(45.4500)
27 | 25,"tensor(1.6960, device='cuda:0')",0.0,tensor(41.3809),"tensor(1.6349, device='cuda:0')",tensor(42.6300)
28 | 26,"tensor(1.7126, device='cuda:0')",0.0,tensor(40.4428),"tensor(1.9265, device='cuda:0')",tensor(29.4000)
29 | 27,"tensor(1.6109, device='cuda:0')",0.0,tensor(45.3281),"tensor(1.5128, device='cuda:0')",tensor(49.6700)
30 | 28,"tensor(1.5759, device='cuda:0')",0.0,tensor(46.9403),"tensor(1.3375, device='cuda:0')",tensor(53.4300)
31 | 29,"tensor(1.5298, device='cuda:0')",0.0,tensor(48.8387),"tensor(2.6476, device='cuda:0')",tensor(15.5000)
32 |
--------------------------------------------------------------------------------
/results/decay=1e-4/log_EfficientNet_mixup_4.csv:
--------------------------------------------------------------------------------
1 | epoch,train loss,reg loss,train acc,test loss,test acc
2 | 0,"tensor(2.5290, device='cuda:0')",0.0,tensor(13.9961),"tensor(2.4831, device='cuda:0')",tensor(12.3400)
3 | 1,"tensor(2.1623, device='cuda:0')",0.0,tensor(19.7158),"tensor(2.0269, device='cuda:0')",tensor(25.4800)
4 | 2,"tensor(2.0835, device='cuda:0')",0.0,tensor(23.1430),"tensor(2.1210, device='cuda:0')",tensor(22.5500)
5 | 3,"tensor(2.0396, device='cuda:0')",0.0,tensor(25.3663),"tensor(1.9627, device='cuda:0')",tensor(26.9200)
6 | 4,"tensor(1.9988, device='cuda:0')",0.0,tensor(26.9953),"tensor(1.9151, device='cuda:0')",tensor(31.6100)
7 | 5,"tensor(1.9487, device='cuda:0')",0.0,tensor(29.6776),"tensor(1.8142, device='cuda:0')",tensor(33.3800)
8 | 6,"tensor(1.9233, device='cuda:0')",0.0,tensor(30.8187),"tensor(1.7458, device='cuda:0')",tensor(36.6100)
9 | 7,"tensor(1.8914, device='cuda:0')",0.0,tensor(32.2555),"tensor(2.1836, device='cuda:0')",tensor(23.9400)
10 | 8,"tensor(1.9096, device='cuda:0')",0.0,tensor(31.4838),"tensor(1.9482, device='cuda:0')",tensor(29.3700)
11 | 9,"tensor(1.8889, device='cuda:0')",0.0,tensor(32.2597),"tensor(2.0828, device='cuda:0')",tensor(24.2100)
12 | 10,"tensor(1.8749, device='cuda:0')",0.0,tensor(32.6502),"tensor(1.7281, device='cuda:0')",tensor(39.9700)
13 | 11,"tensor(1.8640, device='cuda:0')",0.0,tensor(33.4449),"tensor(1.7451, device='cuda:0')",tensor(36.1600)
14 | 12,"tensor(1.8373, device='cuda:0')",0.0,tensor(34.6387),"tensor(1.6598, device='cuda:0')",tensor(41.2200)
15 | 13,"tensor(1.8216, device='cuda:0')",0.0,tensor(35.6718),"tensor(1.8085, device='cuda:0')",tensor(35.6300)
16 | 14,"tensor(1.8988, device='cuda:0')",0.0,tensor(32.2875),"tensor(2.0386, device='cuda:0')",tensor(32.9800)
17 | 15,"tensor(1.9598, device='cuda:0')",0.0,tensor(28.9572),"tensor(1.8898, device='cuda:0')",tensor(27.9000)
18 | 16,"tensor(1.9162, device='cuda:0')",0.0,tensor(31.3641),"tensor(2.2362, device='cuda:0')",tensor(21.0600)
19 | 17,"tensor(1.9672, device='cuda:0')",0.0,tensor(28.2926),"tensor(2.4693, device='cuda:0')",tensor(19.2000)
20 | 18,"tensor(1.9095, device='cuda:0')",0.0,tensor(30.7386),"tensor(1.8700, device='cuda:0')",tensor(31.7700)
21 | 19,"tensor(2.0299, device='cuda:0')",0.0,tensor(26.0223),"tensor(2.3860, device='cuda:0')",tensor(17.8100)
22 | 20,"tensor(2.1327, device='cuda:0')",0.0,tensor(21.0535),"tensor(2.1328, device='cuda:0')",tensor(23.5400)
23 | 21,"tensor(2.0320, device='cuda:0')",0.0,tensor(25.6313),"tensor(1.8574, device='cuda:0')",tensor(31.4700)
24 | 22,"tensor(1.9575, device='cuda:0')",0.0,tensor(28.9095),"tensor(1.9248, device='cuda:0')",tensor(30.7500)
25 | 23,"tensor(1.9607, device='cuda:0')",0.0,tensor(29.1441),"tensor(1.8371, device='cuda:0')",tensor(33.9000)
26 | 24,"tensor(1.9145, device='cuda:0')",0.0,tensor(30.7228),"tensor(3.1385, device='cuda:0')",tensor(13.3300)
27 | 25,"tensor(1.9261, device='cuda:0')",0.0,tensor(30.4129),"tensor(1.8249, device='cuda:0')",tensor(30.9200)
28 | 26,"tensor(1.9393, device='cuda:0')",0.0,tensor(30.0298),"tensor(1.9120, device='cuda:0')",tensor(31.6800)
29 | 27,"tensor(1.9288, device='cuda:0')",0.0,tensor(30.3117),"tensor(2.2512, device='cuda:0')",tensor(17.3600)
30 | 28,"tensor(1.9740, device='cuda:0')",0.0,tensor(26.8164),"tensor(1.7792, device='cuda:0')",tensor(33.3700)
31 | 29,"tensor(1.9213, device='cuda:0')",0.0,tensor(30.0489),"tensor(1.7236, device='cuda:0')",tensor(37.6600)
32 |
--------------------------------------------------------------------------------
/results/decay=1e-4/loss-test-with-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-4/loss-test-with-augment-.pdf
--------------------------------------------------------------------------------
/results/decay=1e-4/loss-test-without-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-4/loss-test-without-augment-.pdf
--------------------------------------------------------------------------------
/results/decay=1e-4/test-accuracy-with-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-4/test-accuracy-with-augment-.pdf
--------------------------------------------------------------------------------
/results/decay=1e-4/test-accuracy-without-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-4/test-accuracy-without-augment-.pdf
--------------------------------------------------------------------------------
/results/decay=1e-4/train-accuracy-with-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-4/train-accuracy-with-augment-.pdf
--------------------------------------------------------------------------------
/results/decay=1e-4/train-accuracy-without-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-4/train-accuracy-without-augment-.pdf
--------------------------------------------------------------------------------
/results/decay=1e-5/log_EfficientNet_baseline_13.csv:
--------------------------------------------------------------------------------
1 | epoch,train loss,reg loss,train acc,test loss,test acc
2 | 0,"tensor(1.1174, device='cuda:0')",0.0,tensor(62.1440),"tensor(1.0998, device='cuda:0')",tensor(65.9500)
3 | 1,"tensor(0.6976, device='cuda:0')",0.0,tensor(76.6220),"tensor(0.7366, device='cuda:0')",tensor(76.8200)
4 | 2,"tensor(0.5567, device='cuda:0')",0.0,tensor(81.5380),"tensor(0.6900, device='cuda:0')",tensor(77.7300)
5 | 3,"tensor(0.4774, device='cuda:0')",0.0,tensor(84.0140),"tensor(0.6509, device='cuda:0')",tensor(79.5800)
6 | 4,"tensor(0.4144, device='cuda:0')",0.0,tensor(86.1160),"tensor(0.7071, device='cuda:0')",tensor(79.2800)
7 | 5,"tensor(0.3718, device='cuda:0')",0.0,tensor(87.4440),"tensor(0.8875, device='cuda:0')",tensor(75.5800)
8 | 6,"tensor(0.3258, device='cuda:0')",0.0,tensor(88.8200),"tensor(0.7190, device='cuda:0')",tensor(79.7200)
9 | 7,"tensor(0.2987, device='cuda:0')",0.0,tensor(89.7940),"tensor(0.6827, device='cuda:0')",tensor(80.2900)
10 | 8,"tensor(0.2675, device='cuda:0')",0.0,tensor(90.8120),"tensor(0.6776, device='cuda:0')",tensor(80.4700)
11 | 9,"tensor(0.2377, device='cuda:0')",0.0,tensor(91.9300),"tensor(0.6298, device='cuda:0')",tensor(82.1400)
12 | 10,"tensor(0.2226, device='cuda:0')",0.0,tensor(92.3820),"tensor(0.6568, device='cuda:0')",tensor(81.3000)
13 | 11,"tensor(0.1994, device='cuda:0')",0.0,tensor(93.2340),"tensor(0.6691, device='cuda:0')",tensor(81.8800)
14 | 12,"tensor(0.1838, device='cuda:0')",0.0,tensor(93.7300),"tensor(0.7464, device='cuda:0')",tensor(80.8300)
15 | 13,"tensor(0.1758, device='cuda:0')",0.0,tensor(94.0700),"tensor(0.7183, device='cuda:0')",tensor(81.5600)
16 | 14,"tensor(0.1605, device='cuda:0')",0.0,tensor(94.5300),"tensor(0.7699, device='cuda:0')",tensor(81.1300)
17 | 15,"tensor(0.1528, device='cuda:0')",0.0,tensor(94.8600),"tensor(0.6828, device='cuda:0')",tensor(82.4600)
18 | 16,"tensor(0.1449, device='cuda:0')",0.0,tensor(95.0100),"tensor(0.7295, device='cuda:0')",tensor(82.0800)
19 | 17,"tensor(0.1357, device='cuda:0')",0.0,tensor(95.3840),"tensor(0.6751, device='cuda:0')",tensor(82.7300)
20 | 18,"tensor(0.1281, device='cuda:0')",0.0,tensor(95.6720),"tensor(0.7107, device='cuda:0')",tensor(82.9000)
21 | 19,"tensor(0.1224, device='cuda:0')",0.0,tensor(95.7900),"tensor(0.7698, device='cuda:0')",tensor(81.5900)
22 | 20,"tensor(0.1124, device='cuda:0')",0.0,tensor(96.1800),"tensor(0.8016, device='cuda:0')",tensor(80.9400)
23 | 21,"tensor(0.1125, device='cuda:0')",0.0,tensor(96.2200),"tensor(0.7759, device='cuda:0')",tensor(81.8900)
24 | 22,"tensor(0.1068, device='cuda:0')",0.0,tensor(96.3560),"tensor(0.7618, device='cuda:0')",tensor(82.1800)
25 | 23,"tensor(0.1021, device='cuda:0')",0.0,tensor(96.5300),"tensor(0.8303, device='cuda:0')",tensor(81.1600)
26 | 24,"tensor(0.0936, device='cuda:0')",0.0,tensor(96.8120),"tensor(0.8282, device='cuda:0')",tensor(81.7000)
27 | 25,"tensor(0.0946, device='cuda:0')",0.0,tensor(96.7540),"tensor(0.8808, device='cuda:0')",tensor(81.1200)
28 | 26,"tensor(0.0903, device='cuda:0')",0.0,tensor(96.9500),"tensor(0.8434, device='cuda:0')",tensor(82.0400)
29 | 27,"tensor(0.0870, device='cuda:0')",0.0,tensor(97.0700),"tensor(0.8061, device='cuda:0')",tensor(82.3100)
30 | 28,"tensor(0.0831, device='cuda:0')",0.0,tensor(97.1400),"tensor(0.7824, device='cuda:0')",tensor(82.5900)
31 | 29,"tensor(0.0821, device='cuda:0')",0.0,tensor(97.2800),"tensor(0.7807, device='cuda:0')",tensor(82.9600)
32 |
--------------------------------------------------------------------------------
/results/decay=1e-5/log_EfficientNet_baseline_24.csv:
--------------------------------------------------------------------------------
1 | epoch,train loss,reg loss,train acc,test loss,test acc
2 | 0,"tensor(1.2443, device='cuda:0')",0.0,tensor(57.3480),"tensor(1.0896, device='cuda:0')",tensor(69.4200)
3 | 1,"tensor(0.8371, device='cuda:0')",0.0,tensor(72.3440),"tensor(0.8776, device='cuda:0')",tensor(72.6300)
4 | 2,"tensor(0.7215, device='cuda:0')",0.0,tensor(75.8100),"tensor(0.7997, device='cuda:0')",tensor(74.9500)
5 | 3,"tensor(0.6623, device='cuda:0')",0.0,tensor(77.9160),"tensor(0.7626, device='cuda:0')",tensor(74.9000)
6 | 4,"tensor(0.6108, device='cuda:0')",0.0,tensor(79.4600),"tensor(0.6899, device='cuda:0')",tensor(78.3100)
7 | 5,"tensor(0.5772, device='cuda:0')",0.0,tensor(80.5560),"tensor(0.6956, device='cuda:0')",tensor(77.9600)
8 | 6,"tensor(0.5470, device='cuda:0')",0.0,tensor(81.6640),"tensor(0.5464, device='cuda:0')",tensor(82.1200)
9 | 7,"tensor(0.5215, device='cuda:0')",0.0,tensor(82.3800),"tensor(0.6220, device='cuda:0')",tensor(79.6600)
10 | 8,"tensor(0.4999, device='cuda:0')",0.0,tensor(83.1800),"tensor(0.5817, device='cuda:0')",tensor(80.9500)
11 | 9,"tensor(0.4810, device='cuda:0')",0.0,tensor(83.7660),"tensor(0.5612, device='cuda:0')",tensor(81.9400)
12 | 10,"tensor(0.4606, device='cuda:0')",0.0,tensor(84.4180),"tensor(0.5870, device='cuda:0')",tensor(80.0800)
13 | 11,"tensor(0.4426, device='cuda:0')",0.0,tensor(85.0140),"tensor(0.6265, device='cuda:0')",tensor(80.0200)
14 | 12,"tensor(0.4267, device='cuda:0')",0.0,tensor(85.5280),"tensor(0.5357, device='cuda:0')",tensor(82.2200)
15 | 13,"tensor(0.4118, device='cuda:0')",0.0,tensor(85.8580),"tensor(0.5398, device='cuda:0')",tensor(82.6000)
16 | 14,"tensor(0.4024, device='cuda:0')",0.0,tensor(86.0660),"tensor(0.5989, device='cuda:0')",tensor(80.7400)
17 | 15,"tensor(0.3909, device='cuda:0')",0.0,tensor(86.6220),"tensor(0.5086, device='cuda:0')",tensor(83.2600)
18 | 16,"tensor(0.3736, device='cuda:0')",0.0,tensor(87.2380),"tensor(0.5118, device='cuda:0')",tensor(83.7200)
19 | 17,"tensor(0.3650, device='cuda:0')",0.0,tensor(87.5220),"tensor(0.5101, device='cuda:0')",tensor(83.6400)
20 | 18,"tensor(0.3528, device='cuda:0')",0.0,tensor(87.8760),"tensor(0.4698, device='cuda:0')",tensor(85.1200)
21 | 19,"tensor(0.3448, device='cuda:0')",0.0,tensor(88.1500),"tensor(0.5130, device='cuda:0')",tensor(83.4900)
22 | 20,"tensor(0.3308, device='cuda:0')",0.0,tensor(88.6460),"tensor(0.5559, device='cuda:0')",tensor(83.1100)
23 | 21,"tensor(0.3232, device='cuda:0')",0.0,tensor(88.9540),"tensor(0.5446, device='cuda:0')",tensor(82.6800)
24 | 22,"tensor(0.3126, device='cuda:0')",0.0,tensor(89.3020),"tensor(0.4592, device='cuda:0')",tensor(85.2100)
25 | 23,"tensor(0.3053, device='cuda:0')",0.0,tensor(89.3980),"tensor(0.5744, device='cuda:0')",tensor(82.6400)
26 | 24,"tensor(0.2983, device='cuda:0')",0.0,tensor(89.7100),"tensor(0.4908, device='cuda:0')",tensor(84.1000)
27 | 25,"tensor(0.2849, device='cuda:0')",0.0,tensor(90.1560),"tensor(0.5206, device='cuda:0')",tensor(83.9400)
28 | 26,"tensor(0.2834, device='cuda:0')",0.0,tensor(90.0800),"tensor(0.4873, device='cuda:0')",tensor(85.2400)
29 | 27,"tensor(0.2689, device='cuda:0')",0.0,tensor(90.5860),"tensor(0.4795, device='cuda:0')",tensor(85.2500)
30 | 28,"tensor(0.2666, device='cuda:0')",0.0,tensor(90.7340),"tensor(0.5007, device='cuda:0')",tensor(85.1100)
31 | 29,"tensor(0.2569, device='cuda:0')",0.0,tensor(91.1420),"tensor(0.5082, device='cuda:0')",tensor(84.2000)
32 |
--------------------------------------------------------------------------------
/results/decay=1e-5/log_EfficientNet_batchboost_1.csv:
--------------------------------------------------------------------------------
1 | epoch,train loss,reg loss,train acc,test loss,test acc
2 | 0,"tensor(1.5489, device='cuda:0')",0.0,tensor(46.8700),"tensor(1.0325, device='cuda:0')",tensor(70.1200)
3 | 1,"tensor(1.2682, device='cuda:0')",0.0,tensor(57.2556),"tensor(0.7428, device='cuda:0')",tensor(75.8100)
4 | 2,"tensor(1.1674, device='cuda:0')",0.0,tensor(60.8273),"tensor(0.6898, device='cuda:0')",tensor(78.9600)
5 | 3,"tensor(1.1057, device='cuda:0')",0.0,tensor(63.5734),"tensor(0.6742, device='cuda:0')",tensor(78.3000)
6 | 4,"tensor(1.0625, device='cuda:0')",0.0,tensor(65.0731),"tensor(0.6306, device='cuda:0')",tensor(80.7000)
7 | 5,"tensor(1.0479, device='cuda:0')",0.0,tensor(65.2568),"tensor(0.6129, device='cuda:0')",tensor(81.7700)
8 | 6,"tensor(1.0239, device='cuda:0')",0.0,tensor(65.9444),"tensor(0.6028, device='cuda:0')",tensor(81.2800)
9 | 7,"tensor(0.9993, device='cuda:0')",0.0,tensor(66.7977),"tensor(0.6886, device='cuda:0')",tensor(79.3700)
10 | 8,"tensor(0.9763, device='cuda:0')",0.0,tensor(67.9500),"tensor(0.6598, device='cuda:0')",tensor(81.3100)
11 | 9,"tensor(0.9645, device='cuda:0')",0.0,tensor(68.0379),"tensor(0.7042, device='cuda:0')",tensor(79.8300)
12 | 10,"tensor(0.9515, device='cuda:0')",0.0,tensor(68.5777),"tensor(0.6745, device='cuda:0')",tensor(81.1900)
13 | 11,"tensor(0.9238, device='cuda:0')",0.0,tensor(69.6884),"tensor(0.8189, device='cuda:0')",tensor(78.4200)
14 | 12,"tensor(0.9414, device='cuda:0')",0.0,tensor(68.5351),"tensor(0.6701, device='cuda:0')",tensor(82.0300)
15 | 13,"tensor(0.9389, device='cuda:0')",0.0,tensor(68.8745),"tensor(0.5832, device='cuda:0')",tensor(83.6000)
16 | 14,"tensor(0.9155, device='cuda:0')",0.0,tensor(69.9621),"tensor(0.6897, device='cuda:0')",tensor(81.4700)
17 | 15,"tensor(0.9061, device='cuda:0')",0.0,tensor(70.3822),"tensor(0.6672, device='cuda:0')",tensor(82.3000)
18 | 16,"tensor(0.9020, device='cuda:0')",0.0,tensor(70.4629),"tensor(0.6488, device='cuda:0')",tensor(82.9000)
19 | 17,"tensor(0.8953, device='cuda:0')",0.0,tensor(70.4594),"tensor(0.6244, device='cuda:0')",tensor(82.6800)
20 | 18,"tensor(0.8896, device='cuda:0')",0.0,tensor(70.6783),"tensor(0.6720, device='cuda:0')",tensor(82.1500)
21 | 19,"tensor(0.8901, device='cuda:0')",0.0,tensor(70.6816),"tensor(0.6343, device='cuda:0')",tensor(83.1400)
22 | 20,"tensor(0.8715, device='cuda:0')",0.0,tensor(71.9698),"tensor(0.6502, device='cuda:0')",tensor(83.8300)
23 | 21,"tensor(0.8699, device='cuda:0')",0.0,tensor(71.6310),"tensor(0.6583, device='cuda:0')",tensor(82.7900)
24 | 22,"tensor(0.8638, device='cuda:0')",0.0,tensor(71.6807),"tensor(0.6244, device='cuda:0')",tensor(83.9400)
25 | 23,"tensor(0.8714, device='cuda:0')",0.0,tensor(71.4544),"tensor(0.7419, device='cuda:0')",tensor(82.3300)
26 | 24,"tensor(0.8673, device='cuda:0')",0.0,tensor(71.8646),"tensor(0.7027, device='cuda:0')",tensor(82.9300)
27 | 25,"tensor(0.8617, device='cuda:0')",0.0,tensor(71.8118),"tensor(0.6836, device='cuda:0')",tensor(82.6300)
28 | 26,"tensor(0.8752, device='cuda:0')",0.0,tensor(71.3329),"tensor(0.6698, device='cuda:0')",tensor(83.4400)
29 | 27,"tensor(0.8760, device='cuda:0')",0.0,tensor(71.2223),"tensor(0.6813, device='cuda:0')",tensor(83.0900)
30 | 28,"tensor(0.8627, device='cuda:0')",0.0,tensor(72.0735),"tensor(0.6109, device='cuda:0')",tensor(83.7200)
31 | 29,"tensor(0.8564, device='cuda:0')",0.0,tensor(71.9139),"tensor(0.6498, device='cuda:0')",tensor(83.1300)
32 |
--------------------------------------------------------------------------------
/results/decay=1e-5/log_EfficientNet_batchboost_2.csv:
--------------------------------------------------------------------------------
1 | epoch,train loss,reg loss,train acc,test loss,test acc
2 | 0,"tensor(1.6238, device='cuda:0')",0.0,tensor(43.7457),"tensor(1.0339, device='cuda:0')",tensor(68.1200)
3 | 1,"tensor(1.3345, device='cuda:0')",0.0,tensor(54.3368),"tensor(0.9366, device='cuda:0')",tensor(70.6600)
4 | 2,"tensor(1.2593, device='cuda:0')",0.0,tensor(56.6156),"tensor(0.6572, device='cuda:0')",tensor(78.8400)
5 | 3,"tensor(1.2066, device='cuda:0')",0.0,tensor(58.5272),"tensor(0.6226, device='cuda:0')",tensor(79.5300)
6 | 4,"tensor(1.1670, device='cuda:0')",0.0,tensor(60.1677),"tensor(0.6359, device='cuda:0')",tensor(79.3300)
7 | 5,"tensor(1.1355, device='cuda:0')",0.0,tensor(61.4253),"tensor(0.6235, device='cuda:0')",tensor(81.0400)
8 | 6,"tensor(1.1357, device='cuda:0')",0.0,tensor(60.6668),"tensor(0.5750, device='cuda:0')",tensor(82.2400)
9 | 7,"tensor(1.1101, device='cuda:0')",0.0,tensor(61.9495),"tensor(0.5551, device='cuda:0')",tensor(82.6200)
10 | 8,"tensor(1.0871, device='cuda:0')",0.0,tensor(62.7897),"tensor(0.5791, device='cuda:0')",tensor(81.8900)
11 | 9,"tensor(1.0692, device='cuda:0')",0.0,tensor(63.2579),"tensor(0.6077, device='cuda:0')",tensor(81.1100)
12 | 10,"tensor(1.0782, device='cuda:0')",0.0,tensor(62.9700),"tensor(0.5328, device='cuda:0')",tensor(83.2500)
13 | 11,"tensor(1.0514, device='cuda:0')",0.0,tensor(64.1702),"tensor(0.5064, device='cuda:0')",tensor(83.9100)
14 | 12,"tensor(1.0488, device='cuda:0')",0.0,tensor(63.8546),"tensor(0.5531, device='cuda:0')",tensor(83.2200)
15 | 13,"tensor(1.0729, device='cuda:0')",0.0,tensor(62.7652),"tensor(0.5439, device='cuda:0')",tensor(82.8400)
16 | 14,"tensor(1.0357, device='cuda:0')",0.0,tensor(64.4369),"tensor(0.5264, device='cuda:0')",tensor(83.7300)
17 | 15,"tensor(1.0342, device='cuda:0')",0.0,tensor(64.5673),"tensor(0.5263, device='cuda:0')",tensor(83.7500)
18 | 16,"tensor(1.0292, device='cuda:0')",0.0,tensor(64.6994),"tensor(0.5558, device='cuda:0')",tensor(83.1900)
19 | 17,"tensor(1.0299, device='cuda:0')",0.0,tensor(64.6908),"tensor(0.5957, device='cuda:0')",tensor(82.6200)
20 | 18,"tensor(1.0225, device='cuda:0')",0.0,tensor(64.9278),"tensor(0.6015, device='cuda:0')",tensor(82.0700)
21 | 19,"tensor(1.0114, device='cuda:0')",0.0,tensor(65.4504),"tensor(0.5689, device='cuda:0')",tensor(82.4400)
22 | 20,"tensor(1.0239, device='cuda:0')",0.0,tensor(64.8615),"tensor(0.5855, device='cuda:0')",tensor(82.6400)
23 | 21,"tensor(1.0217, device='cuda:0')",0.0,tensor(64.8218),"tensor(0.5125, device='cuda:0')",tensor(84.2600)
24 | 22,"tensor(0.9914, device='cuda:0')",0.0,tensor(66.0869),"tensor(0.4711, device='cuda:0')",tensor(85.5200)
25 | 23,"tensor(1.0080, device='cuda:0')",0.0,tensor(65.1572),"tensor(0.5901, device='cuda:0')",tensor(82.2200)
26 | 24,"tensor(0.9865, device='cuda:0')",0.0,tensor(66.1673),"tensor(0.6213, device='cuda:0')",tensor(84.8400)
27 | 25,"tensor(0.9787, device='cuda:0')",0.0,tensor(66.4097),"tensor(0.5190, device='cuda:0')",tensor(84.2700)
28 | 26,"tensor(0.9911, device='cuda:0')",0.0,tensor(66.1076),"tensor(0.5294, device='cuda:0')",tensor(83.2000)
29 | 27,"tensor(0.9689, device='cuda:0')",0.0,tensor(67.0958),"tensor(0.5068, device='cuda:0')",tensor(84.8300)
30 | 28,"tensor(0.9989, device='cuda:0')",0.0,tensor(65.1615),"tensor(0.5430, device='cuda:0')",tensor(83.9000)
31 | 29,"tensor(0.9745, device='cuda:0')",0.0,tensor(66.6269),"tensor(0.5172, device='cuda:0')",tensor(84.7300)
32 |
--------------------------------------------------------------------------------
/results/decay=1e-5/log_EfficientNet_batchboost_3.csv:
--------------------------------------------------------------------------------
1 | epoch,train loss,reg loss,train acc,test loss,test acc
2 | 0,"tensor(1.5592, device='cuda:0')",0.0,tensor(47.6954),"tensor(1.0072, device='cuda:0')",tensor(69.8600)
3 | 1,"tensor(1.2895, device='cuda:0')",0.0,tensor(57.9795),"tensor(1.0657, device='cuda:0')",tensor(66.3500)
4 | 2,"tensor(1.1896, device='cuda:0')",0.0,tensor(61.5622),"tensor(0.7285, device='cuda:0')",tensor(76.8400)
5 | 3,"tensor(1.1315, device='cuda:0')",0.0,tensor(63.4467),"tensor(0.7342, device='cuda:0')",tensor(77.6600)
6 | 4,"tensor(1.0813, device='cuda:0')",0.0,tensor(65.5488),"tensor(0.6676, device='cuda:0')",tensor(80.4200)
7 | 5,"tensor(1.0439, device='cuda:0')",0.0,tensor(67.3616),"tensor(0.6663, device='cuda:0')",tensor(80.1300)
8 | 6,"tensor(1.0427, device='cuda:0')",0.0,tensor(67.1914),"tensor(0.6639, device='cuda:0')",tensor(80.0600)
9 | 7,"tensor(1.0141, device='cuda:0')",0.0,tensor(67.3071),"tensor(0.6258, device='cuda:0')",tensor(82.3600)
10 | 8,"tensor(0.9851, device='cuda:0')",0.0,tensor(68.5185),"tensor(0.6693, device='cuda:0')",tensor(80.8600)
11 | 9,"tensor(0.9848, device='cuda:0')",0.0,tensor(69.1618),"tensor(0.6606, device='cuda:0')",tensor(81.1300)
12 | 10,"tensor(0.9466, device='cuda:0')",0.0,tensor(69.9205),"tensor(0.7316, device='cuda:0')",tensor(80.4600)
13 | 11,"tensor(0.9591, device='cuda:0')",0.0,tensor(69.1466),"tensor(0.6989, device='cuda:0')",tensor(81.5800)
14 | 12,"tensor(0.9271, device='cuda:0')",0.0,tensor(71.0775),"tensor(0.6770, device='cuda:0')",tensor(81.7100)
15 | 13,"tensor(0.9348, device='cuda:0')",0.0,tensor(71.0280),"tensor(0.7440, device='cuda:0')",tensor(79.7700)
16 | 14,"tensor(0.9291, device='cuda:0')",0.0,tensor(70.5768),"tensor(0.6676, device='cuda:0')",tensor(82.1900)
17 | 15,"tensor(0.9004, device='cuda:0')",0.0,tensor(71.5198),"tensor(0.6743, device='cuda:0')",tensor(82.0400)
18 | 16,"tensor(0.9146, device='cuda:0')",0.0,tensor(71.4545),"tensor(0.6879, device='cuda:0')",tensor(81.6000)
19 | 17,"tensor(0.9019, device='cuda:0')",0.0,tensor(71.4323),"tensor(0.7090, device='cuda:0')",tensor(81.7600)
20 | 18,"tensor(0.8996, device='cuda:0')",0.0,tensor(71.2211),"tensor(0.6541, device='cuda:0')",tensor(82.9500)
21 | 19,"tensor(0.8976, device='cuda:0')",0.0,tensor(71.3187),"tensor(0.7200, device='cuda:0')",tensor(82.4200)
22 | 20,"tensor(0.8834, device='cuda:0')",0.0,tensor(72.5813),"tensor(0.6829, device='cuda:0')",tensor(82.7700)
23 | 21,"tensor(0.8742, device='cuda:0')",0.0,tensor(72.3386),"tensor(0.6557, device='cuda:0')",tensor(84.1600)
24 | 22,"tensor(0.8692, device='cuda:0')",0.0,tensor(73.2286),"tensor(0.6941, device='cuda:0')",tensor(82.3000)
25 | 23,"tensor(0.8712, device='cuda:0')",0.0,tensor(72.0759),"tensor(0.7901, device='cuda:0')",tensor(80.2200)
26 | 24,"tensor(0.8599, device='cuda:0')",0.0,tensor(73.2842),"tensor(0.7298, device='cuda:0')",tensor(81.8300)
27 | 25,"tensor(0.8663, device='cuda:0')",0.0,tensor(72.8395),"tensor(0.7081, device='cuda:0')",tensor(81.4800)
28 | 26,"tensor(0.8650, device='cuda:0')",0.0,tensor(72.3468),"tensor(0.7650, device='cuda:0')",tensor(80.4900)
29 | 27,"tensor(0.8632, device='cuda:0')",0.0,tensor(72.6353),"tensor(0.6907, device='cuda:0')",tensor(82.5700)
30 | 28,"tensor(0.8460, device='cuda:0')",0.0,tensor(73.5868),"tensor(0.6731, device='cuda:0')",tensor(83.1100)
31 | 29,"tensor(0.8814, device='cuda:0')",0.0,tensor(72.5378),"tensor(0.7574, device='cuda:0')",tensor(82.3700)
32 |
--------------------------------------------------------------------------------
/results/decay=1e-5/log_EfficientNet_batchboost_4.csv:
--------------------------------------------------------------------------------
1 | epoch,train loss,reg loss,train acc,test loss,test acc
2 | 0,"tensor(1.6433, device='cuda:0')",0.0,tensor(43.3735),"tensor(1.2930, device='cuda:0')",tensor(64.3100)
3 | 1,"tensor(1.3486, device='cuda:0')",0.0,tensor(54.5961),"tensor(0.8134, device='cuda:0')",tensor(74.3800)
4 | 2,"tensor(1.2502, device='cuda:0')",0.0,tensor(57.8238),"tensor(0.8502, device='cuda:0')",tensor(73.4500)
5 | 3,"tensor(1.2160, device='cuda:0')",0.0,tensor(59.2377),"tensor(0.7695, device='cuda:0')",tensor(75.6300)
6 | 4,"tensor(1.1928, device='cuda:0')",0.0,tensor(59.9123),"tensor(0.6782, device='cuda:0')",tensor(78.4400)
7 | 5,"tensor(1.1497, device='cuda:0')",0.0,tensor(61.3737),"tensor(0.7590, device='cuda:0')",tensor(76.6500)
8 | 6,"tensor(1.1221, device='cuda:0')",0.0,tensor(62.2608),"tensor(0.5987, device='cuda:0')",tensor(80.9100)
9 | 7,"tensor(1.0959, device='cuda:0')",0.0,tensor(63.0191),"tensor(0.5961, device='cuda:0')",tensor(82.1600)
10 | 8,"tensor(1.0877, device='cuda:0')",0.0,tensor(62.8546),"tensor(0.6956, device='cuda:0')",tensor(81.2600)
11 | 9,"tensor(1.0709, device='cuda:0')",0.0,tensor(63.7188),"tensor(0.6949, device='cuda:0')",tensor(79.6000)
12 | 10,"tensor(1.0627, device='cuda:0')",0.0,tensor(64.1292),"tensor(0.6470, device='cuda:0')",tensor(81.0400)
13 | 11,"tensor(1.0550, device='cuda:0')",0.0,tensor(64.2661),"tensor(0.5805, device='cuda:0')",tensor(82.6400)
14 | 12,"tensor(1.0621, device='cuda:0')",0.0,tensor(63.8284),"tensor(0.6214, device='cuda:0')",tensor(82.5600)
15 | 13,"tensor(1.0382, device='cuda:0')",0.0,tensor(64.9477),"tensor(0.6153, device='cuda:0')",tensor(82.1300)
16 | 14,"tensor(1.0352, device='cuda:0')",0.0,tensor(64.9522),"tensor(0.7908, device='cuda:0')",tensor(81.0800)
17 | 15,"tensor(1.0338, device='cuda:0')",0.0,tensor(64.9852),"tensor(0.5995, device='cuda:0')",tensor(83.2200)
18 | 16,"tensor(1.0070, device='cuda:0')",0.0,tensor(65.8552),"tensor(0.7508, device='cuda:0')",tensor(83.)
19 | 17,"tensor(1.0091, device='cuda:0')",0.0,tensor(65.8721),"tensor(0.5977, device='cuda:0')",tensor(83.7400)
20 | 18,"tensor(0.9970, device='cuda:0')",0.0,tensor(66.2594),"tensor(0.6023, device='cuda:0')",tensor(84.2600)
21 | 19,"tensor(1.0157, device='cuda:0')",0.0,tensor(65.6327),"tensor(0.6868, device='cuda:0')",tensor(83.2400)
22 | 20,"tensor(1.0010, device='cuda:0')",0.0,tensor(65.8058),"tensor(0.5992, device='cuda:0')",tensor(84.3700)
23 | 21,"tensor(0.9925, device='cuda:0')",0.0,tensor(66.2249),"tensor(0.5742, device='cuda:0')",tensor(84.0400)
24 | 22,"tensor(0.9776, device='cuda:0')",0.0,tensor(66.9663),"tensor(0.5625, device='cuda:0')",tensor(85.3400)
25 | 23,"tensor(0.9731, device='cuda:0')",0.0,tensor(66.9961),"tensor(0.6398, device='cuda:0')",tensor(82.9700)
26 | 24,"tensor(0.9685, device='cuda:0')",0.0,tensor(67.0790),"tensor(0.6912, device='cuda:0')",tensor(83.5300)
27 | 25,"tensor(0.9792, device='cuda:0')",0.0,tensor(66.3659),"tensor(0.5675, device='cuda:0')",tensor(84.5900)
28 | 26,"tensor(0.9639, device='cuda:0')",0.0,tensor(66.8584),"tensor(0.6594, device='cuda:0')",tensor(82.6600)
29 | 27,"tensor(0.9792, device='cuda:0')",0.0,tensor(66.7141),"tensor(0.6313, device='cuda:0')",tensor(84.2700)
30 | 28,"tensor(0.9402, device='cuda:0')",0.0,tensor(68.3393),"tensor(0.7023, device='cuda:0')",tensor(84.2200)
31 | 29,"tensor(0.9429, device='cuda:0')",0.0,tensor(68.2936),"tensor(0.5501, device='cuda:0')",tensor(83.8200)
32 |
--------------------------------------------------------------------------------
/results/decay=1e-5/log_EfficientNet_mixup_1.csv:
--------------------------------------------------------------------------------
1 | epoch,train loss,reg loss,train acc,test loss,test acc
2 | 0,"tensor(1.8401, device='cuda:0')",0.0,tensor(38.9253),"tensor(1.2284, device='cuda:0')",tensor(59.7100)
3 | 1,"tensor(1.5538, device='cuda:0')",0.0,tensor(51.7325),"tensor(0.9910, device='cuda:0')",tensor(67.6500)
4 | 2,"tensor(1.4499, device='cuda:0')",0.0,tensor(55.7960),"tensor(0.9132, device='cuda:0')",tensor(71.8100)
5 | 3,"tensor(1.3914, device='cuda:0')",0.0,tensor(57.5044),"tensor(0.9560, device='cuda:0')",tensor(69.8500)
6 | 4,"tensor(1.3419, device='cuda:0')",0.0,tensor(59.3992),"tensor(0.8385, device='cuda:0')",tensor(75.3200)
7 | 5,"tensor(1.3383, device='cuda:0')",0.0,tensor(59.3667),"tensor(0.7956, device='cuda:0')",tensor(77.3500)
8 | 6,"tensor(1.2864, device='cuda:0')",0.0,tensor(61.4852),"tensor(0.7863, device='cuda:0')",tensor(77.5400)
9 | 7,"tensor(1.2726, device='cuda:0')",0.0,tensor(61.9529),"tensor(0.6963, device='cuda:0')",tensor(80.4200)
10 | 8,"tensor(1.2563, device='cuda:0')",0.0,tensor(62.3388),"tensor(0.7173, device='cuda:0')",tensor(80.2200)
11 | 9,"tensor(1.2053, device='cuda:0')",0.0,tensor(63.9532),"tensor(0.6878, device='cuda:0')",tensor(80.1900)
12 | 10,"tensor(1.1979, device='cuda:0')",0.0,tensor(64.5873),"tensor(0.7827, device='cuda:0')",tensor(78.1600)
13 | 11,"tensor(1.1863, device='cuda:0')",0.0,tensor(64.5691),"tensor(0.6630, device='cuda:0')",tensor(81.8200)
14 | 12,"tensor(1.1745, device='cuda:0')",0.0,tensor(65.2032),"tensor(0.7188, device='cuda:0')",tensor(80.1400)
15 | 13,"tensor(1.1348, device='cuda:0')",0.0,tensor(66.7967),"tensor(0.6287, device='cuda:0')",tensor(82.4100)
16 | 14,"tensor(1.1840, device='cuda:0')",0.0,tensor(64.6255),"tensor(0.6893, device='cuda:0')",tensor(81.7900)
17 | 15,"tensor(1.1713, device='cuda:0')",0.0,tensor(65.3257),"tensor(0.6450, device='cuda:0')",tensor(82.2600)
18 | 16,"tensor(1.1160, device='cuda:0')",0.0,tensor(67.3888),"tensor(0.6603, device='cuda:0')",tensor(81.3100)
19 | 17,"tensor(1.1688, device='cuda:0')",0.0,tensor(65.2723),"tensor(0.6629, device='cuda:0')",tensor(82.1300)
20 | 18,"tensor(1.1515, device='cuda:0')",0.0,tensor(65.9117),"tensor(0.6897, device='cuda:0')",tensor(81.3000)
21 | 19,"tensor(1.1372, device='cuda:0')",0.0,tensor(66.8916),"tensor(0.7103, device='cuda:0')",tensor(81.3300)
22 | 20,"tensor(1.1010, device='cuda:0')",0.0,tensor(67.7973),"tensor(0.7031, device='cuda:0')",tensor(80.9700)
23 | 21,"tensor(1.1538, device='cuda:0')",0.0,tensor(65.6317),"tensor(0.7909, device='cuda:0')",tensor(76.7500)
24 | 22,"tensor(1.1011, device='cuda:0')",0.0,tensor(67.7370),"tensor(0.6757, device='cuda:0')",tensor(81.4900)
25 | 23,"tensor(1.1160, device='cuda:0')",0.0,tensor(67.2205),"tensor(0.6688, device='cuda:0')",tensor(82.1600)
26 | 24,"tensor(1.1190, device='cuda:0')",0.0,tensor(67.3427),"tensor(0.6924, device='cuda:0')",tensor(81.7000)
27 | 25,"tensor(1.1283, device='cuda:0')",0.0,tensor(66.5068),"tensor(0.6460, device='cuda:0')",tensor(82.2600)
28 | 26,"tensor(1.1064, device='cuda:0')",0.0,tensor(66.7692),"tensor(0.6949, device='cuda:0')",tensor(80.3300)
29 | 27,"tensor(1.0782, device='cuda:0')",0.0,tensor(68.4394),"tensor(0.6394, device='cuda:0')",tensor(83.2100)
30 | 28,"tensor(1.0723, device='cuda:0')",0.0,tensor(69.0931),"tensor(0.6562, device='cuda:0')",tensor(82.2100)
31 | 29,"tensor(1.0710, device='cuda:0')",0.0,tensor(68.6733),"tensor(0.6450, device='cuda:0')",tensor(82.4100)
32 |
--------------------------------------------------------------------------------
/results/decay=1e-5/log_EfficientNet_mixup_2.csv:
--------------------------------------------------------------------------------
1 | epoch,train loss,reg loss,train acc,test loss,test acc
2 | 0,"tensor(1.8520, device='cuda:0')",0.0,tensor(38.1330),"tensor(1.1646, device='cuda:0')",tensor(62.2000)
3 | 1,"tensor(1.5934, device='cuda:0')",0.0,tensor(50.0632),"tensor(1.2364, device='cuda:0')",tensor(61.6900)
4 | 2,"tensor(1.5097, device='cuda:0')",0.0,tensor(53.3947),"tensor(0.8179, device='cuda:0')",tensor(75.3000)
5 | 3,"tensor(1.4606, device='cuda:0')",0.0,tensor(55.3491),"tensor(0.8479, device='cuda:0')",tensor(74.7500)
6 | 4,"tensor(1.4019, device='cuda:0')",0.0,tensor(56.9812),"tensor(0.7395, device='cuda:0')",tensor(77.9400)
7 | 5,"tensor(1.3898, device='cuda:0')",0.0,tensor(57.3724),"tensor(0.8446, device='cuda:0')",tensor(74.3800)
8 | 6,"tensor(1.3492, device='cuda:0')",0.0,tensor(58.9751),"tensor(0.8912, device='cuda:0')",tensor(74.4000)
9 | 7,"tensor(1.3546, device='cuda:0')",0.0,tensor(58.6492),"tensor(0.7657, device='cuda:0')",tensor(77.7300)
10 | 8,"tensor(1.3673, device='cuda:0')",0.0,tensor(58.3711),"tensor(0.7878, device='cuda:0')",tensor(76.7400)
11 | 9,"tensor(1.3872, device='cuda:0')",0.0,tensor(57.2477),"tensor(0.8376, device='cuda:0')",tensor(74.4900)
12 | 10,"tensor(1.3261, device='cuda:0')",0.0,tensor(59.6321),"tensor(0.6846, device='cuda:0')",tensor(79.8800)
13 | 11,"tensor(1.3214, device='cuda:0')",0.0,tensor(59.6932),"tensor(0.6900, device='cuda:0')",tensor(79.9700)
14 | 12,"tensor(1.3452, device='cuda:0')",0.0,tensor(58.7390),"tensor(0.7950, device='cuda:0')",tensor(76.3100)
15 | 13,"tensor(1.2936, device='cuda:0')",0.0,tensor(60.6500),"tensor(0.7583, device='cuda:0')",tensor(78.3400)
16 | 14,"tensor(1.3206, device='cuda:0')",0.0,tensor(59.7003),"tensor(0.7124, device='cuda:0')",tensor(80.5700)
17 | 15,"tensor(1.3420, device='cuda:0')",0.0,tensor(58.6945),"tensor(0.7584, device='cuda:0')",tensor(77.7100)
18 | 16,"tensor(1.3114, device='cuda:0')",0.0,tensor(59.9868),"tensor(0.8013, device='cuda:0')",tensor(75.6700)
19 | 17,"tensor(1.2843, device='cuda:0')",0.0,tensor(60.8332),"tensor(0.6785, device='cuda:0')",tensor(81.8400)
20 | 18,"tensor(1.3101, device='cuda:0')",0.0,tensor(59.7496),"tensor(0.7049, device='cuda:0')",tensor(81.2700)
21 | 19,"tensor(1.3010, device='cuda:0')",0.0,tensor(60.2414),"tensor(0.7181, device='cuda:0')",tensor(80.5100)
22 | 20,"tensor(1.2733, device='cuda:0')",0.0,tensor(61.4875),"tensor(0.6511, device='cuda:0')",tensor(82.2900)
23 | 21,"tensor(1.2929, device='cuda:0')",0.0,tensor(60.6969),"tensor(0.7355, device='cuda:0')",tensor(78.9600)
24 | 22,"tensor(1.2743, device='cuda:0')",0.0,tensor(61.4696),"tensor(0.7093, device='cuda:0')",tensor(81.5200)
25 | 23,"tensor(1.2770, device='cuda:0')",0.0,tensor(61.1682),"tensor(0.7249, device='cuda:0')",tensor(79.7100)
26 | 24,"tensor(1.2967, device='cuda:0')",0.0,tensor(60.5049),"tensor(0.7013, device='cuda:0')",tensor(81.3100)
27 | 25,"tensor(1.2891, device='cuda:0')",0.0,tensor(60.6730),"tensor(0.7856, device='cuda:0')",tensor(78.5100)
28 | 26,"tensor(1.2615, device='cuda:0')",0.0,tensor(61.9244),"tensor(0.7153, device='cuda:0')",tensor(80.0600)
29 | 27,"tensor(1.2511, device='cuda:0')",0.0,tensor(61.9265),"tensor(0.6894, device='cuda:0')",tensor(81.4500)
30 | 28,"tensor(1.2783, device='cuda:0')",0.0,tensor(61.2908),"tensor(0.7402, device='cuda:0')",tensor(79.2100)
31 | 29,"tensor(1.2632, device='cuda:0')",0.0,tensor(61.6038),"tensor(0.7070, device='cuda:0')",tensor(80.5200)
32 |
--------------------------------------------------------------------------------
/results/decay=1e-5/log_EfficientNet_mixup_3.csv:
--------------------------------------------------------------------------------
1 | epoch,train loss,reg loss,train acc,test loss,test acc
2 | 0,"tensor(1.5852, device='cuda:0')",0.0,tensor(49.3322),"tensor(0.9136, device='cuda:0')",tensor(72.1700)
3 | 1,"tensor(1.2562, device='cuda:0')",0.0,tensor(63.1246),"tensor(0.7735, device='cuda:0')",tensor(75.3300)
4 | 2,"tensor(1.1676, device='cuda:0')",0.0,tensor(66.3281),"tensor(0.7485, device='cuda:0')",tensor(76.2400)
5 | 3,"tensor(1.1023, device='cuda:0')",0.0,tensor(68.3585),"tensor(0.8598, device='cuda:0')",tensor(73.6100)
6 | 4,"tensor(1.0596, device='cuda:0')",0.0,tensor(69.9488),"tensor(0.6650, device='cuda:0')",tensor(79.8500)
7 | 5,"tensor(1.0365, device='cuda:0')",0.0,tensor(70.5446),"tensor(0.7256, device='cuda:0')",tensor(78.5300)
8 | 6,"tensor(0.9986, device='cuda:0')",0.0,tensor(71.8226),"tensor(0.6412, device='cuda:0')",tensor(80.4700)
9 | 7,"tensor(0.9505, device='cuda:0')",0.0,tensor(73.5068),"tensor(0.6535, device='cuda:0')",tensor(80.2000)
10 | 8,"tensor(0.9408, device='cuda:0')",0.0,tensor(73.3418),"tensor(0.6131, device='cuda:0')",tensor(81.1600)
11 | 9,"tensor(0.9354, device='cuda:0')",0.0,tensor(73.9009),"tensor(0.5872, device='cuda:0')",tensor(82.3400)
12 | 10,"tensor(0.9187, device='cuda:0')",0.0,tensor(74.3704),"tensor(0.6324, device='cuda:0')",tensor(80.2100)
13 | 11,"tensor(0.9036, device='cuda:0')",0.0,tensor(74.8706),"tensor(0.6270, device='cuda:0')",tensor(80.6800)
14 | 12,"tensor(0.9332, device='cuda:0')",0.0,tensor(74.1035),"tensor(0.6375, device='cuda:0')",tensor(82.4800)
15 | 13,"tensor(0.8700, device='cuda:0')",0.0,tensor(75.8445),"tensor(0.6200, device='cuda:0')",tensor(81.8900)
16 | 14,"tensor(0.8629, device='cuda:0')",0.0,tensor(76.1854),"tensor(0.6110, device='cuda:0')",tensor(82.0400)
17 | 15,"tensor(0.8222, device='cuda:0')",0.0,tensor(77.5554),"tensor(0.5758, device='cuda:0')",tensor(82.9600)
18 | 16,"tensor(0.8364, device='cuda:0')",0.0,tensor(77.2764),"tensor(0.6242, device='cuda:0')",tensor(81.4800)
19 | 17,"tensor(0.8431, device='cuda:0')",0.0,tensor(77.0262),"tensor(0.6181, device='cuda:0')",tensor(81.6900)
20 | 18,"tensor(0.8176, device='cuda:0')",0.0,tensor(77.7661),"tensor(0.6617, device='cuda:0')",tensor(79.7000)
21 | 19,"tensor(0.7869, device='cuda:0')",0.0,tensor(78.5704),"tensor(0.5973, device='cuda:0')",tensor(83.1200)
22 | 20,"tensor(0.8909, device='cuda:0')",0.0,tensor(75.0715),"tensor(0.6385, device='cuda:0')",tensor(81.9800)
23 | 21,"tensor(0.8334, device='cuda:0')",0.0,tensor(77.2383),"tensor(0.5729, device='cuda:0')",tensor(83.1800)
24 | 22,"tensor(0.8060, device='cuda:0')",0.0,tensor(78.0292),"tensor(0.5911, device='cuda:0')",tensor(83.3900)
25 | 23,"tensor(0.8087, device='cuda:0')",0.0,tensor(78.1002),"tensor(0.6059, device='cuda:0')",tensor(82.7800)
26 | 24,"tensor(0.7846, device='cuda:0')",0.0,tensor(78.7637),"tensor(0.5727, device='cuda:0')",tensor(83.8000)
27 | 25,"tensor(0.7515, device='cuda:0')",0.0,tensor(79.7625),"tensor(0.5971, device='cuda:0')",tensor(83.2400)
28 | 26,"tensor(0.7517, device='cuda:0')",0.0,tensor(79.7170),"tensor(0.5974, device='cuda:0')",tensor(82.0700)
29 | 27,"tensor(0.7605, device='cuda:0')",0.0,tensor(79.5556),"tensor(0.5745, device='cuda:0')",tensor(83.7200)
30 | 28,"tensor(0.7874, device='cuda:0')",0.0,tensor(78.7155),"tensor(0.6164, device='cuda:0')",tensor(83.2200)
31 | 29,"tensor(0.7087, device='cuda:0')",0.0,tensor(81.2600),"tensor(0.5618, device='cuda:0')",tensor(83.8500)
32 |
--------------------------------------------------------------------------------
/results/decay=1e-5/log_EfficientNet_mixup_4.csv:
--------------------------------------------------------------------------------
1 | epoch,train loss,reg loss,train acc,test loss,test acc
2 | 0,"tensor(1.7396, device='cuda:0')",0.0,tensor(42.0760),"tensor(1.3003, device='cuda:0')",tensor(59.5700)
3 | 1,"tensor(1.4385, device='cuda:0')",0.0,tensor(56.0643),"tensor(1.0127, device='cuda:0')",tensor(67.5900)
4 | 2,"tensor(1.3099, device='cuda:0')",0.0,tensor(60.8499),"tensor(0.8299, device='cuda:0')",tensor(72.1900)
5 | 3,"tensor(1.2170, device='cuda:0')",0.0,tensor(63.8931),"tensor(0.7399, device='cuda:0')",tensor(77.3000)
6 | 4,"tensor(1.1810, device='cuda:0')",0.0,tensor(64.8712),"tensor(0.7139, device='cuda:0')",tensor(78.7800)
7 | 5,"tensor(1.1764, device='cuda:0')",0.0,tensor(65.1266),"tensor(0.6911, device='cuda:0')",tensor(79.9600)
8 | 6,"tensor(1.1081, device='cuda:0')",0.0,tensor(67.2872),"tensor(0.7118, device='cuda:0')",tensor(77.5100)
9 | 7,"tensor(1.1507, device='cuda:0')",0.0,tensor(65.9127),"tensor(0.7807, device='cuda:0')",tensor(75.6400)
10 | 8,"tensor(1.1161, device='cuda:0')",0.0,tensor(66.8880),"tensor(0.6945, device='cuda:0')",tensor(78.5200)
11 | 9,"tensor(1.0964, device='cuda:0')",0.0,tensor(67.6024),"tensor(0.6623, device='cuda:0')",tensor(80.1500)
12 | 10,"tensor(1.1009, device='cuda:0')",0.0,tensor(67.1994),"tensor(0.6494, device='cuda:0')",tensor(80.6700)
13 | 11,"tensor(1.1054, device='cuda:0')",0.0,tensor(67.0964),"tensor(0.7119, device='cuda:0')",tensor(78.2400)
14 | 12,"tensor(1.0592, device='cuda:0')",0.0,tensor(68.9080),"tensor(0.6337, device='cuda:0')",tensor(80.5600)
15 | 13,"tensor(1.0479, device='cuda:0')",0.0,tensor(69.0810),"tensor(0.6223, device='cuda:0')",tensor(81.3000)
16 | 14,"tensor(1.0443, device='cuda:0')",0.0,tensor(68.9712),"tensor(0.6930, device='cuda:0')",tensor(78.9500)
17 | 15,"tensor(1.0767, device='cuda:0')",0.0,tensor(68.1365),"tensor(0.6540, device='cuda:0')",tensor(80.5200)
18 | 16,"tensor(1.0659, device='cuda:0')",0.0,tensor(68.1313),"tensor(0.6185, device='cuda:0')",tensor(81.2800)
19 | 17,"tensor(1.0341, device='cuda:0')",0.0,tensor(69.6075),"tensor(0.6211, device='cuda:0')",tensor(80.8700)
20 | 18,"tensor(1.0334, device='cuda:0')",0.0,tensor(69.8162),"tensor(0.5778, device='cuda:0')",tensor(82.3300)
21 | 19,"tensor(1.0137, device='cuda:0')",0.0,tensor(69.9117),"tensor(0.6499, device='cuda:0')",tensor(81.0500)
22 | 20,"tensor(1.0188, device='cuda:0')",0.0,tensor(69.6947),"tensor(0.5940, device='cuda:0')",tensor(82.2600)
23 | 21,"tensor(0.9738, device='cuda:0')",0.0,tensor(71.7237),"tensor(0.6454, device='cuda:0')",tensor(81.3700)
24 | 22,"tensor(1.0088, device='cuda:0')",0.0,tensor(70.4746),"tensor(0.6181, device='cuda:0')",tensor(81.5900)
25 | 23,"tensor(1.0384, device='cuda:0')",0.0,tensor(69.2434),"tensor(0.6176, device='cuda:0')",tensor(82.4300)
26 | 24,"tensor(0.9576, device='cuda:0')",0.0,tensor(71.7219),"tensor(0.5998, device='cuda:0')",tensor(82.6100)
27 | 25,"tensor(0.9787, device='cuda:0')",0.0,tensor(71.2283),"tensor(0.5762, device='cuda:0')",tensor(82.7300)
28 | 26,"tensor(0.9771, device='cuda:0')",0.0,tensor(71.1901),"tensor(0.5692, device='cuda:0')",tensor(83.8800)
29 | 27,"tensor(0.9588, device='cuda:0')",0.0,tensor(71.6358),"tensor(0.6037, device='cuda:0')",tensor(82.)
30 | 28,"tensor(0.9769, device='cuda:0')",0.0,tensor(71.1616),"tensor(0.6231, device='cuda:0')",tensor(81.2100)
31 | 29,"tensor(0.9710, device='cuda:0')",0.0,tensor(71.7734),"tensor(0.5814, device='cuda:0')",tensor(83.0300)
32 |
--------------------------------------------------------------------------------
/results/decay=1e-5/loss-test-with-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-5/loss-test-with-augment-.pdf
--------------------------------------------------------------------------------
/results/decay=1e-5/loss-test-without-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-5/loss-test-without-augment-.pdf
--------------------------------------------------------------------------------
/results/decay=1e-5/test-accuracy-with-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-5/test-accuracy-with-augment-.pdf
--------------------------------------------------------------------------------
/results/decay=1e-5/test-accuracy-without-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-5/test-accuracy-without-augment-.pdf
--------------------------------------------------------------------------------
/results/decay=1e-5/train-accuracy-with-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-5/train-accuracy-with-augment-.pdf
--------------------------------------------------------------------------------
/results/decay=1e-5/train-accuracy-without-augment-.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-5/train-accuracy-without-augment-.pdf
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3 -u
2 | # Copyright (c) 2017-present, Facebook, Inc. (mixup)
3 | # Copyright (c) 2020-present, Maciej A. Czyzewski (batchboost)
4 | # All rights reserved.
5 | #
6 | # This source code is licensed under the license found in the LICENSE file in
7 | # the root directory of this source tree.
8 | from __future__ import print_function
9 |
10 | import argparse
11 | import csv
12 | import os
13 |
14 | import numpy as np
15 | import torch
16 | from torch.autograd import Variable
17 | import torch.backends.cudnn as cudnn
18 | import torch.nn as nn
19 | import torch.optim as optim
20 | import torchvision.transforms as transforms
21 | import torchvision.datasets as datasets
22 |
23 | """
24 | !pip install efficientnet_pytorch
25 | from google.colab import drive
26 | drive.mount('/content/gdrive', force_remount=True)
27 | !cp gdrive/My\ Drive//utils.py .
28 | !cp gdrive/My\ Drive//train.py .
29 | !nvcc --version
30 | !pip3 install --upgrade --force-reinstall torch torchvision
31 | import torch
32 | print('Torch', torch.__version__, 'CUDA', torch.version.cuda)
33 | print('Device:', torch.device('cuda:0'), torch.cuda.is_available())
34 | # --- START ---
35 | !python3 train.py --decay=1e-5 --no-augment --seed=1 \
36 | --name=batchboost --model=efficientnet-b0 --epoch=30
37 | """
38 |
39 | # FIXME: rewrite it clean
40 | import debug
41 | from utils import progress_bar
42 |
43 | try:
44 | import models
45 |
46 | COLAB = False
47 | except:
48 | # FIXME: detect environment?
49 | print("=== GOOGLE COLAB ENVIRONMENT ===")
50 | COLAB = True
51 |
52 | parser = argparse.ArgumentParser(description="PyTorch CIFAR10 Training")
53 | parser.add_argument("--lr", default=0.1, type=float, help="learning rate")
54 | parser.add_argument(
55 | "--resume", "-r", action="store_true", help="resume from checkpoint"
56 | )
57 | parser.add_argument(
58 | "--model",
59 | default="ResNet18",
60 | type=str,
61 | help="model type (default: ResNet18)",
62 | )
63 | parser.add_argument("--name", default="0", type=str, help="name of run")
64 | parser.add_argument("--seed", default=0, type=int, help="random seed")
65 | parser.add_argument("--batch-size", default=128, type=int, help="batch size")
66 | parser.add_argument(
67 | "--epoch", default=200, type=int, help="total epochs to run"
68 | )
69 | parser.add_argument(
70 | "--no-augment",
71 | dest="augment",
72 | action="store_false",
73 | help="use standard augmentation (default: True)",
74 | )
75 | parser.add_argument(
76 | "--optimizer",
77 | type=str,
78 | default="lamb",
79 | choices=["lamb", "adam"],
80 | help="which optimizer to use",
81 | )
82 | parser.add_argument("--decay", default=1e-5, type=float, help="weight decay")
83 | parser.add_argument(
84 | "--alpha",
85 | default=1.0,
86 | type=float,
87 | help="mixup interpolation coefficient (default: 1)",
88 | )
89 | parser.add_argument(
90 | "--debug",
91 | "-d",
92 | action="store_true",
93 | help="debug on FashionMNIST and ResNet100k network",
94 | )
95 | args = parser.parse_args()
96 |
97 | use_cuda = torch.cuda.is_available()
98 |
99 | best_acc = 0 # best test accuracy
100 | start_epoch = 0 # start from epoch 0 or last checkpoint epoch
101 |
102 | if args.seed != 0:
103 | torch.manual_seed(args.seed)
104 |
105 | # Data
106 | print("==> Preparing data..")
107 | num_classes = 10
108 |
109 | if args.debug:
110 | trainloader, testloader = debug.FashionMNIST_loaders(args)
111 | else:
112 | if args.augment:
113 | transform_train = transforms.Compose(
114 | [
115 | transforms.RandomCrop(32, padding=4),
116 | transforms.RandomHorizontalFlip(),
117 | transforms.ToTensor(),
118 | transforms.Normalize(
119 | (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
120 | ),
121 | ]
122 | )
123 | else:
124 | transform_train = transforms.Compose(
125 | [
126 | transforms.ToTensor(),
127 | transforms.Normalize(
128 | (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
129 | ),
130 | ]
131 | )
132 |
133 | transform_test = transforms.Compose(
134 | [
135 | transforms.ToTensor(),
136 | transforms.Normalize(
137 | (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
138 | ),
139 | ]
140 | )
141 |
142 | trainset = datasets.CIFAR10(
143 | root="./data", train=True, download=True, transform=transform_train
144 | )
145 | trainloader = torch.utils.data.DataLoader(
146 | trainset, batch_size=args.batch_size, shuffle=True, num_workers=8
147 | )
148 |
149 | testset = datasets.CIFAR10(
150 | root="./data", train=False, download=True, transform=transform_test
151 | )
152 | testloader = torch.utils.data.DataLoader(
153 | testset, batch_size=100, shuffle=False, num_workers=8
154 | )
155 |
156 | # Model
157 | if args.resume:
158 | # Load checkpoint.
159 | print("==> Resuming from checkpoint..")
160 | assert os.path.isdir("checkpoint"), "Error: no checkpoint directory found!"
161 | checkpoint = torch.load(
162 | "./checkpoint/ckpt.t7" + args.name + "_" + str(args.seed)
163 | )
164 | net = checkpoint["net"]
165 | best_acc = checkpoint["acc"]
166 | start_epoch = checkpoint["epoch"] + 1
167 | rng_state = checkpoint["rng_state"]
168 | torch.set_rng_state(rng_state)
169 | else:
170 | print("==> Building model..")
171 | if args.model.startswith("efficientnet"):
172 | from efficientnet_pytorch import EfficientNet
173 |
174 | net = EfficientNet.from_pretrained(args.model, num_classes=num_classes)
175 | elif args.debug:
176 | net = debug.ResNet100k()
177 | else:
178 | net = models.__dict__[args.model]()
179 |
180 | if not os.path.isdir("results"):
181 | os.mkdir("results")
182 | logname = (
183 | "results/log_"
184 | + net.__class__.__name__
185 | + "_"
186 | + args.name
187 | + "_"
188 | + str(args.seed)
189 | + ".csv"
190 | )
191 |
192 | if use_cuda:
193 | net.cuda()
194 | net = torch.nn.DataParallel(net)
195 | print("device_count =", torch.cuda.device_count())
196 | cudnn.benchmark = True
197 | print("Using CUDA...")
198 |
199 | criterion = nn.CrossEntropyLoss()
200 |
201 | optimizer = optim.SGD(
202 | net.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.decay
203 | )
204 |
205 | ### MIXUP ######################################################################
206 |
207 |
208 | def mixup_data(x, y, index_left, index_right, alpha=1.0, use_cuda=True):
209 | """Returns mixed inputs, pairs of targets, and lambda"""
210 | if alpha > 0:
211 | lam = np.random.beta(alpha, alpha)
212 | else:
213 | lam = 1
214 |
215 | mixed_x = lam * x[index_left, :] + (1 - lam) * x[index_right, :]
216 | y_a, y_b = y[index_left], y[index_right]
217 | return mixed_x, y_a, y_b, lam
218 |
219 |
220 | def mixup_criterion(criterion, pred, y_a, y_b, lam):
221 | return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)
222 |
223 |
224 | def train_mixup(epoch):
225 | print("MIXUP")
226 | print("\nEpoch: %d" % epoch)
227 | net.train()
228 | train_loss = 0
229 | reg_loss = 0
230 | correct = 0
231 | total = 0
232 | for batch_idx, (inputs, targets) in enumerate(trainloader):
233 | if use_cuda:
234 | inputs, targets = inputs.cuda(), targets.cuda()
235 |
236 | batch_size = inputs.shape[0]
237 | if use_cuda:
238 | index = torch.randperm(batch_size).cuda()
239 | else:
240 | index = torch.randperm(batch_size)
241 |
242 | inputs, targets_a, targets_b, lam = mixup_data(
243 | inputs, targets, range(batch_size), index, args.alpha, use_cuda
244 | )
245 | inputs, targets_a, targets_b = map(
246 | Variable, (inputs, targets_a, targets_b)
247 | )
248 | outputs = net(inputs)
249 | loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
250 | train_loss += loss.data
251 | _, predicted = torch.max(outputs.data, 1)
252 | total += inputs.size(0)
253 | correct += (
254 | lam * predicted.eq(targets_a.data).cpu().sum().float()
255 | + (1 - lam) * predicted.eq(targets_b.data).cpu().sum().float()
256 | )
257 |
258 | optimizer.zero_grad()
259 | loss.backward()
260 | torch.nn.utils.clip_grad_norm_(net.parameters(), 1)
261 | optimizer.step()
262 |
263 | progress_bar(
264 | batch_idx,
265 | len(trainloader),
266 | "Loss: %.3f | Reg: %.5f | Acc: %.3f%% (%d/%d)"
267 | % (
268 | train_loss / (batch_idx + 1),
269 | reg_loss / (batch_idx + 1),
270 | 100.0 * correct / total,
271 | correct,
272 | total,
273 | ),
274 | )
275 | return (
276 | train_loss / batch_idx,
277 | reg_loss / batch_idx,
278 | 100.0 * correct / total,
279 | )
280 |
281 |
282 | ### BATCHBOOST #################################################################
283 |
284 | from batchboost import BatchBoost
285 |
286 |
287 | def fn_error(outputs, targets):
288 | logsoftmax = nn.LogSoftmax(dim=1)
289 | return torch.sum(-outputs * logsoftmax(targets), dim=1)
290 |
291 |
292 | def fn_linearize(x, num_classes=10):
293 | _x = torch.zeros(x.size(0), num_classes)
294 | _x[range(x.size(0)), x] = 1
295 | return _x
296 |
297 |
298 | def fn_unlinearize(x):
299 | _, _x = torch.max(x, 1)
300 | return _x
301 |
302 |
303 | BatchBoost.fn_error = fn_error
304 | BatchBoost.fn_linearize = fn_linearize
305 | BatchBoost.fn_unlinearize = fn_unlinearize
306 |
307 | # FIXME: add arguments to command-line
308 | BB = BatchBoost(
309 | alpha=args.alpha,
310 | window_normal=0,
311 | window_boost=10,
312 | factor=1 / 2,
313 | use_cuda=use_cuda,
314 | )
315 |
316 |
317 | def train_batchboost(epoch):
318 | global inputs, targets_a, targets_b, lam
319 | print("BATCHBOOST")
320 | print("\nEpoch: %d" % epoch)
321 | net.train()
322 | train_loss = 0
323 | reg_loss = 0
324 | correct = 0
325 | total = 0
326 |
327 | BB.clear()
328 | for batch_idx, (new_inputs, new_targets) in enumerate(trainloader):
329 | if use_cuda:
330 | new_inputs, new_targets = new_inputs.cuda(), new_targets.cuda()
331 |
332 | # -----> (a) feed with new information
333 | if not BB.feed(new_inputs, new_targets):
334 | continue
335 |
336 | # -----> (b) apply concat: BB.inputs, BB.targets
337 | outputs = net(BB.inputs)
338 |
339 | # -----> (c) calculate: loss (mixup like style \lambda)
340 | loss = BB.criterion(criterion, outputs)
341 |
342 | train_loss += loss.data
343 | _, predicted = torch.max(outputs.data, 1)
344 | total += BB.inputs.size(0) # -----> remember to use concat
345 |
346 | # -----> (d) calculate: accuracy
347 | correct += BB.correct(predicted)
348 |
349 | # -----> (e) pairing & mixing
350 | BB.mixing(criterion, outputs)
351 |
352 | optimizer.zero_grad()
353 | loss.backward()
354 | torch.nn.utils.clip_grad_norm_(net.parameters(), 1)
355 | optimizer.step()
356 |
357 | progress_bar(
358 | batch_idx,
359 | len(trainloader),
360 | "Loss: %.3f | Reg: %.5f | Acc: %.3f%% (%d/%d)"
361 | % (
362 | train_loss / (batch_idx + 1),
363 | reg_loss / (batch_idx + 1),
364 | 100.0 * correct / total,
365 | correct,
366 | total,
367 | ),
368 | )
369 | if total == 0:
370 | total = len(batch_size)
371 | return (
372 | train_loss / batch_idx,
373 | reg_loss / batch_idx,
374 | 100.0 * correct / (total + 0.000001),
375 | )
376 |
377 |
378 | ### BASELINE ###################################################################
379 |
380 |
381 | def train_baseline(epoch):
382 | print("BASELINE")
383 | print("\nEpoch: %d" % epoch)
384 | net.train()
385 | train_loss = 0
386 | reg_loss = 0
387 | correct = 0
388 | total = 0
389 | for batch_idx, (inputs, targets) in enumerate(trainloader):
390 | if use_cuda:
391 | inputs, targets = inputs.cuda(), targets.cuda()
392 |
393 | outputs = net(inputs)
394 | loss = criterion(outputs, targets)
395 | train_loss += loss.data
396 | _, predicted = torch.max(outputs.data, 1)
397 | total += inputs.size(0)
398 | correct += predicted.eq(targets.data).cpu().sum().float()
399 |
400 | optimizer.zero_grad()
401 | loss.backward()
402 | torch.nn.utils.clip_grad_norm_(net.parameters(), 1)
403 | optimizer.step()
404 |
405 | progress_bar(
406 | batch_idx,
407 | len(trainloader),
408 | "Loss: %.3f | Reg: %.5f | Acc: %.3f%% (%d/%d)"
409 | % (
410 | train_loss / (batch_idx + 1),
411 | reg_loss / (batch_idx + 1),
412 | 100.0 * correct / total,
413 | correct,
414 | total,
415 | ),
416 | )
417 | return (
418 | train_loss / batch_idx,
419 | reg_loss / batch_idx,
420 | 100.0 * correct / total,
421 | )
422 |
423 |
424 | def test(epoch):
425 | global best_acc
426 | net.eval()
427 | test_loss = 0
428 | correct = 0
429 | total = 0
430 | with torch.no_grad():
431 | for batch_idx, (inputs, targets) in enumerate(testloader):
432 | if use_cuda:
433 | inputs, targets = inputs.cuda(), targets.cuda()
434 | inputs, targets = Variable(inputs), Variable(targets)
435 | outputs = net(inputs)
436 | loss = criterion(outputs, targets)
437 |
438 | test_loss += loss.data
439 | _, predicted = torch.max(outputs.data, 1)
440 | total += targets.size(0)
441 | correct += predicted.eq(targets.data).cpu().sum()
442 |
443 | progress_bar(
444 | batch_idx,
445 | len(testloader),
446 | "Loss: %.3f | Acc: %.3f%% (%d/%d)"
447 | % (
448 | test_loss / (batch_idx + 1),
449 | 100.0 * correct / total,
450 | correct,
451 | total,
452 | ),
453 | )
454 | acc = 100.0 * correct / total
455 | if epoch == start_epoch + args.epoch - 1 or acc > best_acc:
456 | checkpoint(acc, epoch)
457 | if acc > best_acc:
458 | best_acc = acc
459 | return (test_loss / batch_idx, 100.0 * correct / total)
460 |
461 |
462 | def checkpoint(acc, epoch):
463 | # Save checkpoint.
464 | print("Saving..")
465 | state = {
466 | "net": net,
467 | "acc": acc,
468 | "epoch": epoch,
469 | "rng_state": torch.get_rng_state(),
470 | }
471 | if not os.path.isdir("checkpoint"):
472 | os.mkdir("checkpoint")
473 | torch.save(state, "./checkpoint/ckpt.t7" + args.name + "_" + str(args.seed))
474 |
475 |
476 | def adjust_learning_rate(optimizer, epoch):
477 | """decrease the learning rate at 100 and 150 epoch"""
478 | lr = args.lr
479 | if epoch >= 100:
480 | lr /= 10
481 | if epoch >= 150:
482 | lr /= 10
483 | for param_group in optimizer.param_groups:
484 | param_group["lr"] = lr
485 |
486 |
487 | if not os.path.exists(logname):
488 | with open(logname, "w") as logfile:
489 | logwriter = csv.writer(logfile, delimiter=",")
490 | logwriter.writerow(
491 | [
492 | "epoch",
493 | "train loss",
494 | "reg loss",
495 | "train acc",
496 | "test loss",
497 | "test acc",
498 | ]
499 | )
500 |
501 | if args.name == "batchboost":
502 | train_func = train_batchboost
503 | elif args.name == "mixup":
504 | train_func = train_mixup
505 | else:
506 | train_func = train_baseline
507 |
508 | for epoch in range(start_epoch, args.epoch):
509 | train_loss, reg_loss, train_acc = train_func(epoch)
510 | test_loss, test_acc = test(epoch)
511 | adjust_learning_rate(optimizer, epoch)
512 | with open(logname, "a") as logfile:
513 | logwriter = csv.writer(logfile, delimiter=",")
514 | logwriter.writerow(
515 | [epoch, train_loss, reg_loss, train_acc, test_loss, test_acc]
516 | )
517 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | """Some helper functions for PyTorch, including:
2 | - progress_bar: progress bar mimic xlua.progress.
3 | """
4 | import os
5 | import sys
6 | import time
7 | import math
8 |
9 | import torch
10 | import torch.nn as nn
11 |
12 | try:
13 | _, term_width = os.popen("stty size", "r").read().split()
14 | except:
15 | term_with = "80"
16 | term_width = int(term_width)
17 |
18 | TOTAL_BAR_LENGTH = 86.0
19 | last_time = time.time()
20 | begin_time = last_time
21 |
22 |
23 | def progress_bar(current, total, msg=None):
24 | global last_time, begin_time
25 | if current == 0:
26 | begin_time = time.time() # Reset for new bar.
27 |
28 | cur_len = int(TOTAL_BAR_LENGTH * current / total)
29 | rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1
30 |
31 | sys.stdout.write(" [")
32 | for i in range(cur_len):
33 | sys.stdout.write("=")
34 | sys.stdout.write(">")
35 | for i in range(rest_len):
36 | sys.stdout.write(".")
37 | sys.stdout.write("]")
38 |
39 | cur_time = time.time()
40 | step_time = cur_time - last_time
41 | last_time = cur_time
42 | tot_time = cur_time - begin_time
43 |
44 | L = []
45 | L.append(" Step: %s" % format_time(step_time))
46 | L.append(" | Tot: %s" % format_time(tot_time))
47 | if msg:
48 | L.append(" | " + msg)
49 |
50 | msg = "".join(L)
51 | sys.stdout.write(msg)
52 | for i in range(term_width - int(TOTAL_BAR_LENGTH) - len(msg) - 3):
53 | sys.stdout.write(" ")
54 |
55 | # Go back to the center of the bar.
56 | for i in range(term_width - int(TOTAL_BAR_LENGTH / 2)):
57 | sys.stdout.write("\b")
58 | sys.stdout.write(" %d/%d " % (current + 1, total))
59 |
60 | if current < total - 1:
61 | sys.stdout.write("\r")
62 | else:
63 | sys.stdout.write("\n")
64 | sys.stdout.flush()
65 |
66 |
67 | def format_time(seconds):
68 | days = int(seconds / 3600 / 24)
69 | seconds = seconds - days * 3600 * 24
70 | hours = int(seconds / 3600)
71 | seconds = seconds - hours * 3600
72 | minutes = int(seconds / 60)
73 | seconds = seconds - minutes * 60
74 | secondsf = int(seconds)
75 | seconds = seconds - secondsf
76 | millis = int(seconds * 1000)
77 |
78 | f = ""
79 | i = 1
80 | if days > 0:
81 | f += str(days) + "D"
82 | i += 1
83 | if hours > 0 and i <= 2:
84 | f += str(hours) + "h"
85 | i += 1
86 | if minutes > 0 and i <= 2:
87 | f += str(minutes) + "m"
88 | i += 1
89 | if secondsf > 0 and i <= 2:
90 | f += str(secondsf) + "s"
91 | i += 1
92 | if millis > 0 and i <= 2:
93 | f += str(millis) + "ms"
94 | i += 1
95 | if f == "":
96 | f = "0ms"
97 | return f
98 |
--------------------------------------------------------------------------------