├── .gitignore ├── LICENSE ├── LICENSE-pytorch-cifar ├── README.md ├── batchboost.py ├── debug.py ├── figures ├── batches │ ├── img_1_new_10.png │ ├── img_1_new_11.png │ ├── img_1_new_6.png │ ├── img_1_new_7.png │ ├── img_1_new_8.png │ ├── img_1_new_9.png │ ├── img_1_old_0.png │ ├── img_1_old_1.png │ ├── img_1_old_2.png │ ├── img_1_old_3.png │ ├── img_1_old_4.png │ ├── img_1_old_5.png │ ├── img_2_new_10.png │ ├── img_2_new_11.png │ ├── img_2_new_6.png │ ├── img_2_new_7.png │ ├── img_2_new_8.png │ ├── img_2_new_9.png │ ├── img_2_old_0.png │ ├── img_2_old_1.png │ ├── img_2_old_2.png │ ├── img_2_old_3.png │ ├── img_2_old_4.png │ ├── img_2_old_5.png │ ├── img_3_new_10.png │ ├── img_3_new_11.png │ ├── img_3_new_6.png │ ├── img_3_new_7.png │ ├── img_3_new_8.png │ ├── img_3_new_9.png │ ├── img_3_old_0.png │ ├── img_3_old_1.png │ ├── img_3_old_2.png │ ├── img_3_old_3.png │ ├── img_3_old_4.png │ ├── img_3_old_5.png │ ├── img_4_new_10.png │ ├── img_4_new_11.png │ ├── img_4_new_6.png │ ├── img_4_new_7.png │ ├── img_4_new_8.png │ ├── img_4_new_9.png │ ├── img_4_old_0.png │ ├── img_4_old_1.png │ ├── img_4_old_2.png │ ├── img_4_old_3.png │ ├── img_4_old_4.png │ └── img_4_old_5.png ├── data_1.png ├── data_2.png ├── data_3.png ├── data_4.png ├── data_5.png ├── data_6.png ├── data_7.png ├── figure-1-loss-train-without-augment.pdf ├── figure-1-test-accuracy-without-augment.pdf ├── figure-2-test-accuracy-with-augment.pdf ├── figure-2-train-accuracy-with-augment.pdf ├── figure-abstract.pdf ├── figure-abstract.png ├── figure-abstract.svg ├── figure-feeding.pdf ├── figure-feeding.png ├── figure-feeding.svg ├── figure-multipass.png ├── for-repository-1.png ├── for-repository-2.png └── pp_logo.jpg ├── models ├── __init__.py ├── alldnet.py ├── densenet.py ├── densenet3.py ├── densenet_efficient_multi_gpu.py ├── googlenet.py ├── lenet.py ├── mobilenet.py ├── resnet.py ├── resnext.py └── vgg.py ├── paper ├── abstract.txt ├── arxiv-abstract-shadow.png ├── arxiv-abstract.png ├── arxiv.sty ├── batchboost.pdf ├── batchboost.tex ├── build.py ├── figure-1-loss-train-without-augment.pdf ├── figure-1-test-accuracy-without-augment.pdf ├── figure-2-test-accuracy-with-augment.pdf ├── figure-2-train-accuracy-with-augment.pdf ├── figure-abstract.pdf ├── figure-feeding.pdf ├── figure-multipass.png ├── notes_v2.md ├── references.bib └── texput.log ├── plot.py ├── results ├── decay=1e-4 │ ├── log_EfficientNet_baseline_13.csv │ ├── log_EfficientNet_baseline_24.csv │ ├── log_EfficientNet_batchboost_1.csv │ ├── log_EfficientNet_batchboost_2.csv │ ├── log_EfficientNet_batchboost_3.csv │ ├── log_EfficientNet_batchboost_4.csv │ ├── log_EfficientNet_mixup_1.csv │ ├── log_EfficientNet_mixup_2.csv │ ├── log_EfficientNet_mixup_3.csv │ ├── log_EfficientNet_mixup_4.csv │ ├── loss-test-with-augment-.pdf │ ├── loss-test-without-augment-.pdf │ ├── test-accuracy-with-augment-.pdf │ ├── test-accuracy-without-augment-.pdf │ ├── train-accuracy-with-augment-.pdf │ └── train-accuracy-without-augment-.pdf └── decay=1e-5 │ ├── log_EfficientNet_baseline_13.csv │ ├── log_EfficientNet_baseline_24.csv │ ├── log_EfficientNet_batchboost_1.csv │ ├── log_EfficientNet_batchboost_2.csv │ ├── log_EfficientNet_batchboost_3.csv │ ├── log_EfficientNet_batchboost_4.csv │ ├── log_EfficientNet_mixup_1.csv │ ├── log_EfficientNet_mixup_2.csv │ ├── log_EfficientNet_mixup_3.csv │ ├── log_EfficientNet_mixup_4.csv │ ├── loss-test-with-augment-.pdf │ ├── loss-test-without-augment-.pdf │ ├── test-accuracy-with-augment-.pdf │ ├── test-accuracy-without-augment-.pdf │ ├── train-accuracy-with-augment-.pdf │ └── train-accuracy-without-augment-.pdf ├── train.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # project 2 | .DS_Store 3 | __pycache__ 4 | *.pyc 5 | checkpoint 6 | data/ 7 | 8 | # paper 9 | *.aux 10 | *.log 11 | *.out 12 | *.bbl 13 | *.blg 14 | 15 | # results 16 | results/* 17 | !results/decay=1e-4 18 | !results/decay=1e-5 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Attribution-NonCommercial 4.0 International 2 | 3 | ======================================================================= 4 | 5 | Creative Commons Corporation ("Creative Commons") is not a law firm and 6 | does not provide legal services or legal advice. Distribution of 7 | Creative Commons public licenses does not create a lawyer-client or 8 | other relationship. Creative Commons makes its licenses and related 9 | information available on an "as-is" basis. Creative Commons gives no 10 | warranties regarding its licenses, any material licensed under their 11 | terms and conditions, or any related information. Creative Commons 12 | disclaims all liability for damages resulting from their use to the 13 | fullest extent possible. 14 | 15 | Using Creative Commons Public Licenses 16 | 17 | Creative Commons public licenses provide a standard set of terms and 18 | conditions that creators and other rights holders may use to share 19 | original works of authorship and other material subject to copyright 20 | and certain other rights specified in the public license below. The 21 | following considerations are for informational purposes only, are not 22 | exhaustive, and do not form part of our licenses. 23 | 24 | Considerations for licensors: Our public licenses are 25 | intended for use by those authorized to give the public 26 | permission to use material in ways otherwise restricted by 27 | copyright and certain other rights. Our licenses are 28 | irrevocable. Licensors should read and understand the terms 29 | and conditions of the license they choose before applying it. 30 | Licensors should also secure all rights necessary before 31 | applying our licenses so that the public can reuse the 32 | material as expected. Licensors should clearly mark any 33 | material not subject to the license. This includes other CC- 34 | licensed material, or material used under an exception or 35 | limitation to copyright. More considerations for licensors: 36 | wiki.creativecommons.org/Considerations_for_licensors 37 | 38 | Considerations for the public: By using one of our public 39 | licenses, a licensor grants the public permission to use the 40 | licensed material under specified terms and conditions. If 41 | the licensor's permission is not necessary for any reason--for 42 | example, because of any applicable exception or limitation to 43 | copyright--then that use is not regulated by the license. Our 44 | licenses grant only permissions under copyright and certain 45 | other rights that a licensor has authority to grant. Use of 46 | the licensed material may still be restricted for other 47 | reasons, including because others have copyright or other 48 | rights in the material. A licensor may make special requests, 49 | such as asking that all changes be marked or described. 50 | Although not required by our licenses, you are encouraged to 51 | respect those requests where reasonable. More_considerations 52 | for the public: 53 | wiki.creativecommons.org/Considerations_for_licensees 54 | 55 | ======================================================================= 56 | 57 | Creative Commons Attribution-NonCommercial 4.0 International Public 58 | License 59 | 60 | By exercising the Licensed Rights (defined below), You accept and agree 61 | to be bound by the terms and conditions of this Creative Commons 62 | Attribution-NonCommercial 4.0 International Public License ("Public 63 | License"). To the extent this Public License may be interpreted as a 64 | contract, You are granted the Licensed Rights in consideration of Your 65 | acceptance of these terms and conditions, and the Licensor grants You 66 | such rights in consideration of benefits the Licensor receives from 67 | making the Licensed Material available under these terms and 68 | conditions. 69 | 70 | Section 1 -- Definitions. 71 | 72 | a. Adapted Material means material subject to Copyright and Similar 73 | Rights that is derived from or based upon the Licensed Material 74 | and in which the Licensed Material is translated, altered, 75 | arranged, transformed, or otherwise modified in a manner requiring 76 | permission under the Copyright and Similar Rights held by the 77 | Licensor. For purposes of this Public License, where the Licensed 78 | Material is a musical work, performance, or sound recording, 79 | Adapted Material is always produced where the Licensed Material is 80 | synched in timed relation with a moving image. 81 | 82 | b. Adapter's License means the license You apply to Your Copyright 83 | and Similar Rights in Your contributions to Adapted Material in 84 | accordance with the terms and conditions of this Public License. 85 | 86 | c. Copyright and Similar Rights means copyright and/or similar rights 87 | closely related to copyright including, without limitation, 88 | performance, broadcast, sound recording, and Sui Generis Database 89 | Rights, without regard to how the rights are labeled or 90 | categorized. For purposes of this Public License, the rights 91 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 92 | Rights. 93 | d. Effective Technological Measures means those measures that, in the 94 | absence of proper authority, may not be circumvented under laws 95 | fulfilling obligations under Article 11 of the WIPO Copyright 96 | Treaty adopted on December 20, 1996, and/or similar international 97 | agreements. 98 | 99 | e. Exceptions and Limitations means fair use, fair dealing, and/or 100 | any other exception or limitation to Copyright and Similar Rights 101 | that applies to Your use of the Licensed Material. 102 | 103 | f. Licensed Material means the artistic or literary work, database, 104 | or other material to which the Licensor applied this Public 105 | License. 106 | 107 | g. Licensed Rights means the rights granted to You subject to the 108 | terms and conditions of this Public License, which are limited to 109 | all Copyright and Similar Rights that apply to Your use of the 110 | Licensed Material and that the Licensor has authority to license. 111 | 112 | h. Licensor means the individual(s) or entity(ies) granting rights 113 | under this Public License. 114 | 115 | i. NonCommercial means not primarily intended for or directed towards 116 | commercial advantage or monetary compensation. For purposes of 117 | this Public License, the exchange of the Licensed Material for 118 | other material subject to Copyright and Similar Rights by digital 119 | file-sharing or similar means is NonCommercial provided there is 120 | no payment of monetary compensation in connection with the 121 | exchange. 122 | 123 | j. Share means to provide material to the public by any means or 124 | process that requires permission under the Licensed Rights, such 125 | as reproduction, public display, public performance, distribution, 126 | dissemination, communication, or importation, and to make material 127 | available to the public including in ways that members of the 128 | public may access the material from a place and at a time 129 | individually chosen by them. 130 | 131 | k. Sui Generis Database Rights means rights other than copyright 132 | resulting from Directive 96/9/EC of the European Parliament and of 133 | the Council of 11 March 1996 on the legal protection of databases, 134 | as amended and/or succeeded, as well as other essentially 135 | equivalent rights anywhere in the world. 136 | 137 | l. You means the individual or entity exercising the Licensed Rights 138 | under this Public License. Your has a corresponding meaning. 139 | 140 | Section 2 -- Scope. 141 | 142 | a. License grant. 143 | 144 | 1. Subject to the terms and conditions of this Public License, 145 | the Licensor hereby grants You a worldwide, royalty-free, 146 | non-sublicensable, non-exclusive, irrevocable license to 147 | exercise the Licensed Rights in the Licensed Material to: 148 | 149 | a. reproduce and Share the Licensed Material, in whole or 150 | in part, for NonCommercial purposes only; and 151 | 152 | b. produce, reproduce, and Share Adapted Material for 153 | NonCommercial purposes only. 154 | 155 | 2. Exceptions and Limitations. For the avoidance of doubt, where 156 | Exceptions and Limitations apply to Your use, this Public 157 | License does not apply, and You do not need to comply with 158 | its terms and conditions. 159 | 160 | 3. Term. The term of this Public License is specified in Section 161 | 6(a). 162 | 163 | 4. Media and formats; technical modifications allowed. The 164 | Licensor authorizes You to exercise the Licensed Rights in 165 | all media and formats whether now known or hereafter created, 166 | and to make technical modifications necessary to do so. The 167 | Licensor waives and/or agrees not to assert any right or 168 | authority to forbid You from making technical modifications 169 | necessary to exercise the Licensed Rights, including 170 | technical modifications necessary to circumvent Effective 171 | Technological Measures. For purposes of this Public License, 172 | simply making modifications authorized by this Section 2(a) 173 | (4) never produces Adapted Material. 174 | 175 | 5. Downstream recipients. 176 | 177 | a. Offer from the Licensor -- Licensed Material. Every 178 | recipient of the Licensed Material automatically 179 | receives an offer from the Licensor to exercise the 180 | Licensed Rights under the terms and conditions of this 181 | Public License. 182 | 183 | b. No downstream restrictions. You may not offer or impose 184 | any additional or different terms or conditions on, or 185 | apply any Effective Technological Measures to, the 186 | Licensed Material if doing so restricts exercise of the 187 | Licensed Rights by any recipient of the Licensed 188 | Material. 189 | 190 | 6. No endorsement. Nothing in this Public License constitutes or 191 | may be construed as permission to assert or imply that You 192 | are, or that Your use of the Licensed Material is, connected 193 | with, or sponsored, endorsed, or granted official status by, 194 | the Licensor or others designated to receive attribution as 195 | provided in Section 3(a)(1)(A)(i). 196 | 197 | b. Other rights. 198 | 199 | 1. Moral rights, such as the right of integrity, are not 200 | licensed under this Public License, nor are publicity, 201 | privacy, and/or other similar personality rights; however, to 202 | the extent possible, the Licensor waives and/or agrees not to 203 | assert any such rights held by the Licensor to the limited 204 | extent necessary to allow You to exercise the Licensed 205 | Rights, but not otherwise. 206 | 207 | 2. Patent and trademark rights are not licensed under this 208 | Public License. 209 | 210 | 3. To the extent possible, the Licensor waives any right to 211 | collect royalties from You for the exercise of the Licensed 212 | Rights, whether directly or through a collecting society 213 | under any voluntary or waivable statutory or compulsory 214 | licensing scheme. In all other cases the Licensor expressly 215 | reserves any right to collect such royalties, including when 216 | the Licensed Material is used other than for NonCommercial 217 | purposes. 218 | 219 | Section 3 -- License Conditions. 220 | 221 | Your exercise of the Licensed Rights is expressly made subject to the 222 | following conditions. 223 | 224 | a. Attribution. 225 | 226 | 1. If You Share the Licensed Material (including in modified 227 | form), You must: 228 | 229 | a. retain the following if it is supplied by the Licensor 230 | with the Licensed Material: 231 | 232 | i. identification of the creator(s) of the Licensed 233 | Material and any others designated to receive 234 | attribution, in any reasonable manner requested by 235 | the Licensor (including by pseudonym if 236 | designated); 237 | 238 | ii. a copyright notice; 239 | 240 | iii. a notice that refers to this Public License; 241 | 242 | iv. a notice that refers to the disclaimer of 243 | warranties; 244 | 245 | v. a URI or hyperlink to the Licensed Material to the 246 | extent reasonably practicable; 247 | 248 | b. indicate if You modified the Licensed Material and 249 | retain an indication of any previous modifications; and 250 | 251 | c. indicate the Licensed Material is licensed under this 252 | Public License, and include the text of, or the URI or 253 | hyperlink to, this Public License. 254 | 255 | 2. You may satisfy the conditions in Section 3(a)(1) in any 256 | reasonable manner based on the medium, means, and context in 257 | which You Share the Licensed Material. For example, it may be 258 | reasonable to satisfy the conditions by providing a URI or 259 | hyperlink to a resource that includes the required 260 | information. 261 | 262 | 3. If requested by the Licensor, You must remove any of the 263 | information required by Section 3(a)(1)(A) to the extent 264 | reasonably practicable. 265 | 266 | 4. If You Share Adapted Material You produce, the Adapter's 267 | License You apply must not prevent recipients of the Adapted 268 | Material from complying with this Public License. 269 | 270 | Section 4 -- Sui Generis Database Rights. 271 | 272 | Where the Licensed Rights include Sui Generis Database Rights that 273 | apply to Your use of the Licensed Material: 274 | 275 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 276 | to extract, reuse, reproduce, and Share all or a substantial 277 | portion of the contents of the database for NonCommercial purposes 278 | only; 279 | 280 | b. if You include all or a substantial portion of the database 281 | contents in a database in which You have Sui Generis Database 282 | Rights, then the database in which You have Sui Generis Database 283 | Rights (but not its individual contents) is Adapted Material; and 284 | 285 | c. You must comply with the conditions in Section 3(a) if You Share 286 | all or a substantial portion of the contents of the database. 287 | 288 | For the avoidance of doubt, this Section 4 supplements and does not 289 | replace Your obligations under this Public License where the Licensed 290 | Rights include other Copyright and Similar Rights. 291 | 292 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 293 | 294 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 295 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 296 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 297 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 298 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 299 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 300 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 301 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 302 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 303 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 304 | 305 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 306 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 307 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 308 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 309 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 310 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 311 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 312 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 313 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 314 | 315 | c. The disclaimer of warranties and limitation of liability provided 316 | above shall be interpreted in a manner that, to the extent 317 | possible, most closely approximates an absolute disclaimer and 318 | waiver of all liability. 319 | 320 | Section 6 -- Term and Termination. 321 | 322 | a. This Public License applies for the term of the Copyright and 323 | Similar Rights licensed here. However, if You fail to comply with 324 | this Public License, then Your rights under this Public License 325 | terminate automatically. 326 | 327 | b. Where Your right to use the Licensed Material has terminated under 328 | Section 6(a), it reinstates: 329 | 330 | 1. automatically as of the date the violation is cured, provided 331 | it is cured within 30 days of Your discovery of the 332 | violation; or 333 | 334 | 2. upon express reinstatement by the Licensor. 335 | 336 | For the avoidance of doubt, this Section 6(b) does not affect any 337 | right the Licensor may have to seek remedies for Your violations 338 | of this Public License. 339 | 340 | c. For the avoidance of doubt, the Licensor may also offer the 341 | Licensed Material under separate terms or conditions or stop 342 | distributing the Licensed Material at any time; however, doing so 343 | will not terminate this Public License. 344 | 345 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 346 | License. 347 | 348 | Section 7 -- Other Terms and Conditions. 349 | 350 | a. The Licensor shall not be bound by any additional or different 351 | terms or conditions communicated by You unless expressly agreed. 352 | 353 | b. Any arrangements, understandings, or agreements regarding the 354 | Licensed Material not stated herein are separate from and 355 | independent of the terms and conditions of this Public License. 356 | 357 | Section 8 -- Interpretation. 358 | 359 | a. For the avoidance of doubt, this Public License does not, and 360 | shall not be interpreted to, reduce, limit, restrict, or impose 361 | conditions on any use of the Licensed Material that could lawfully 362 | be made without permission under this Public License. 363 | 364 | b. To the extent possible, if any provision of this Public License is 365 | deemed unenforceable, it shall be automatically reformed to the 366 | minimum extent necessary to make it enforceable. If the provision 367 | cannot be reformed, it shall be severed from this Public License 368 | without affecting the enforceability of the remaining terms and 369 | conditions. 370 | 371 | c. No term or condition of this Public License will be waived and no 372 | failure to comply consented to unless expressly agreed to by the 373 | Licensor. 374 | 375 | d. Nothing in this Public License constitutes or may be interpreted 376 | as a limitation upon, or waiver of, any privileges and immunities 377 | that apply to the Licensor or You, including from the legal 378 | processes of any jurisdiction or authority. 379 | 380 | ======================================================================= 381 | 382 | Creative Commons is not a party to its public 383 | licenses. Notwithstanding, Creative Commons may elect to apply one of 384 | its public licenses to material it publishes and in those instances 385 | will be considered the “Licensor.” The text of the Creative Commons 386 | public licenses is dedicated to the public domain under the CC0 Public 387 | Domain Dedication. Except for the limited purpose of indicating that 388 | material is shared under a Creative Commons public license or as 389 | otherwise permitted by the Creative Commons policies published at 390 | creativecommons.org/policies, Creative Commons does not authorize the 391 | use of the trademark "Creative Commons" or any other trademark or logo 392 | of Creative Commons without its prior written consent including, 393 | without limitation, in connection with any unauthorized modifications 394 | to any of its public licenses or any other arrangements, 395 | understandings, or agreements concerning use of licensed material. For 396 | the avoidance of doubt, this paragraph does not form part of the 397 | public licenses. 398 | 399 | Creative Commons may be contacted at creativecommons.org. 400 | -------------------------------------------------------------------------------- /LICENSE-pytorch-cifar: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 liukuang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # batchboost (currently a draft) 2 | By [Maciej A. Czyzewski](https://github.com/maciejczyzewski) 3 | 4 | This repository contains the implementation used for the results in 5 | our paper (https://arxiv.org/abs/2001.07627). 6 | 7 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/batchboost-regularization-for-stabilizing/image-classification-on-cifar-10)](https://paperswithcode.com/sota/image-classification-on-cifar-10?p=batchboost-regularization-for-stabilizing) 8 | 9 | --- 10 | 11 | _Batchboost_ is a simple technique to accelerate ML model training by adaptively feeding mini-batches with artificial samples which are created by mixing two examples from previous step - in favor of pairing those that produce the difficult one. 12 | 13 |
14 | 15 | 16 |
17 | 18 | ## Introduction 19 | 20 | > **UPDATE 24/01/2020:** Thank you for your e-mails asking about _batchboost_. As promised, I will update the results soon and present comparisons with other solutions (paperswithcode.com). This is a draft and research needs to be continued to be complete work, if someone is interested in helping me, please contact. 21 | 22 | ### Overview 23 | 24 | In this research, we state the hypothesis that mixing many images together can 25 | be more effective than just two. To make it efficient, we propose a new method of 26 | creating mini-batches, where each sample from dataset is propagated with 27 | subsequent iterations with less and less importance until the end of learning 28 | process. 29 | 30 | Batchboost pipeline has three stages: 31 | (a) _pairing_: method of selecting two samples from previous step. 32 | (b) _mixing_: method of creating a new artificial example from two selected samples. 33 | (c) _feeding_: constructing training mini-batch with created examples and new samples from dataset (concat with ratio γ). 34 | Note that sample from dataset propagates with subsequent iterations with less and less importance until the end of training. 35 | 36 |
37 | 38 | 39 | 40 |
41 | 42 | ### Results 43 | 44 | > **COMING:** comparison of _batchboost_ applied: to different architectures, to different problems (small datasets), for training GAN-s, with/without augmentation, with different parameters {window_normal, window_boost, factor} (hyperparameter tuning). 45 | 46 | The results will be updated and saved to [`results/`](https://github.com/maciejczyzewski/batchboost/tree/master/results). 47 | 48 | Underfitting & Stabilizing Training 49 |
50 | 51 |
52 | 53 | _Figure 1:_ Evaluation on _CIFAR-10_, for _EfficientNet-b0_ and 54 | _SGD(weight-decay=10e-4, lr=0.1)_ (as recommended in the _mixup_ research), same 55 | parameters for each model. As a result, the models behave differently, although 56 | they differ only in the method of constructing the mini-batch. 57 | 58 | Overfitting (comparison to mixup) 59 |
60 | 61 |
62 | 63 | _Figure 2:_ _batchboost_ is a new state-of-the-art because it is a slightly better than _mixup_ (here _mixup_ has been tuned for best parameters, _batchboost_ uses configuration from _Figure 1_). 64 | 65 | ## Requirements and Installation 66 | 67 | * A computer running macOS or Linux 68 | * For training new models, you'll also need a NVIDIA GPU and [NCCL](https://github.com/NVIDIA/nccl) 69 | * Python version 3.6 70 | * A [PyTorch installation](http://pytorch.org/) 71 | 72 | ## Training 73 | 74 | Use `python train.py` to train a new model. 75 | Here is an example setting: 76 | ```bash 77 | # for batchboost 78 | $ CUDA_VISIBLE_DEVICES=0 python3 train.py --decay=1e-4 --no-augment --seed=1 \ 79 | --name=batchboost --model=efficientnet-b0 --epoch=30 80 | # for mixup 81 | $ CUDA_VISIBLE_DEVICES=0 python3 train.py --decay=1e-4 --no-augment --seed=1 \ 82 | --name=mixup --model=efficientnet-b0 --epoch=30 83 | ``` 84 | 85 | ## Using 86 | 87 | File [`batchboost.py`](https://github.com/maciejczyzewski/batchboost/tree/master/batchboost.py) should be portable, just copy into your path and write the following: 88 | 89 | ```python3 90 | from batchboost import BatchBoost 91 | 92 | # how to calculate error per sample? 93 | def fn_error(outputs, targets): 94 | logsoftmax = nn.LogSoftmax(dim=1) 95 | return torch.sum(-outputs * logsoftmax(targets), dim=1) 96 | 97 | # how to represent target in linear form (label -> one-hot) 98 | def fn_linearize(x, num_classes=10): 99 | _x = torch.zeros(x.size(0), num_classes) 100 | _x[range(x.size(0)), x] = 1 101 | return _x 102 | 103 | # how to get back (one-hot -> label) 104 | def fn_unlinearize(x): 105 | _, _x = torch.max(x, 1) 106 | return _x 107 | 108 | BatchBoost.fn_error = fn_error 109 | BatchBoost.fn_linearize = fn_linearize 110 | BatchBoost.fn_unlinearize = fn_unlinearize 111 | 112 | # if you don't want to train everything using `batchboost` method 113 | # epoch: [... -> window_normal -> window_boost -> window_normal -> ...] 114 | # ( batches ) ( batches ) ( batches ) 115 | 116 | BB = BatchBoost( 117 | alpha=args.alpha, # alpha parameter for mixup 118 | window_normal=0, # consecutive batch fits: normal 119 | window_boost=10, # : batchboost 120 | factor=1 / 2, # ratio between new information and feeded/mixed 121 | use_cuda=True, 122 | ) 123 | 124 | ... 125 | ``` 126 | 127 | And slightly change your training loop: 128 | 129 | ```python3 130 | ... 131 | 132 | for batch_idx, (new_inputs, new_targets) in enumerate(trainloader): 133 | if use_cuda: 134 | new_inputs, new_targets = new_inputs.cuda(), new_targets.cuda() 135 | 136 | # -----> (a) feed with new information 137 | if not BB.feed(new_inputs, new_targets): 138 | continue 139 | 140 | # -----> (b) apply concat: BB.inputs, BB.targets 141 | outputs = net(BB.inputs) 142 | 143 | # -----> (c) calculate: loss (mixup like style \lambda) 144 | loss = BB.criterion(criterion, outputs) 145 | 146 | train_loss += loss.data 147 | _, predicted = torch.max(outputs.data, 1) 148 | total += BB.inputs.size(0) # -----> remember to use concat 149 | 150 | # -----> (d) calculate: accuracy 151 | correct += BB.correct(predicted) 152 | 153 | # -----> (e) pairing & mixing 154 | BB.mixing(criterion, outputs) 155 | 156 | ... 157 | ``` 158 | 159 | ## Citation 160 | 161 | If you find _batchboost_ useful in your research, please consider citing: 162 | 163 | ```bibtex 164 | @misc{czyzewski2020batchboost, 165 | title={batchboost: regularization for stabilizing training with resistance to underfitting & overfitting}, 166 | author={Maciej A. Czyzewski}, 167 | year={2020}, 168 | eprint={2001.07627}, 169 | archivePrefix={arXiv}, 170 | primaryClass={cs.LG} 171 | } 172 | ``` 173 | 174 | _An interesting topic for further research and discussion are 175 | combination of batchboost and existing methods._ 176 | 177 | ## License 178 | 179 | Implemented as fork of ["mixup-cifar10 / facebook"](https://github.com/facebookresearch/mixup-cifar10). 180 | This project is CC-BY-NC-licensed. 181 | 182 | 183 | -------------------------------------------------------------------------------- /batchboost.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from torch.autograd import Variable 5 | 6 | 7 | class BatchBoost: 8 | """ 9 | batchboost: regularization for stabilizing training 10 | with resistance to underfitting & overfitting 11 | Maciej A. Czyzewski 12 | https://arxiv.org/abs/2001.07627 13 | """ 14 | 15 | def __init__( 16 | self, 17 | alpha=1.0, 18 | window_normal=0, 19 | window_boost=10, 20 | factor=1 / 3, 21 | use_cuda=False, 22 | debug=False, 23 | ): 24 | self.alpha = alpha 25 | self.window_normal = window_normal 26 | self.window_boost = window_boost 27 | self.factor = factor 28 | self.use_cuda = use_cuda 29 | self.debug = debug 30 | self.clear() 31 | 32 | if self.debug: 33 | print( 34 | f"[BatchBoost] alpha={alpha} ratio={factor} \ 35 | window_normal={window_normal} window_boost={window_boost}" 36 | ) 37 | 38 | def clear(self): 39 | if self.debug: 40 | print(f"[BatchBoost] resetting") 41 | self.mixup_lambda = 1 42 | self.inputs = None 43 | self.y1 = self.y2 = None 44 | self.iter_normal = self.window_normal 45 | self.iter_boost = self.window_boost 46 | 47 | @staticmethod 48 | def mixup(x, y, index_left, index_right, mixup_lambda=1.0): 49 | """Returns mixed inputs, pairs of targets, and lambda 50 | https://arxiv.org/abs/1710.09412""" 51 | mixed_x = ( 52 | mixup_lambda * x[index_left, :] 53 | + (1 - mixup_lambda) * x[index_right, :] 54 | ) 55 | # mixed_y = (mixup_lambda * y[index_left, :] + 56 | # (1 - mixup_lambda) * y[index_right, :]) 57 | # return mixed_x, mixed_y, mixup_lambda 58 | y1, y2 = y[index_left], y[index_right] 59 | return mixed_x, y1, y2 60 | 61 | @staticmethod 62 | def fn_error(outputs, targets): 63 | logsoftmax = nn.LogSoftmax(dim=1) 64 | return torch.sum(-outputs * logsoftmax(targets), dim=1) 65 | 66 | @staticmethod 67 | def fn_linearize(x, num_classes=10): 68 | _x = torch.zeros(x.size(0), num_classes) 69 | _x[range(x.size(0)), x] = 1 70 | return _x 71 | 72 | @staticmethod 73 | def fn_unlinearize(x): 74 | _, _x = torch.max(x, 1) 75 | return _x 76 | 77 | def criterion(self, criterion, outputs): 78 | _y1 = BatchBoost.fn_unlinearize(self.y1) 79 | _y2 = BatchBoost.fn_unlinearize(self.y2) 80 | return self.mixup_lambda * criterion(outputs, _y1) + ( 81 | 1 - self.mixup_lambda 82 | ) * criterion(outputs, _y2) 83 | 84 | def correct(self, predicted): 85 | _y1 = BatchBoost.fn_unlinearize(self.y1) 86 | _y2 = BatchBoost.fn_unlinearize(self.y2) 87 | return ( 88 | self.mixup_lambda * predicted.eq(_y1).cpu().sum().float() 89 | + (1 - self.mixup_lambda) * predicted.eq(_y2).cpu().sum().float() 90 | ) 91 | 92 | def pairing(self, errvec): 93 | batch_size = errvec.size()[0] 94 | _, index = torch.sort(errvec, dim=0, descending=True) 95 | return ( 96 | index[0 : int(batch_size * self.factor)], 97 | reversed(index[batch_size - int(batch_size * self.factor) :]), 98 | ) 99 | 100 | def mixing(self, criterion, outputs): 101 | if self.iter_boost + self.iter_normal == 0: 102 | self.iter_normal = self.window_normal 103 | self.iter_boost = self.window_boost 104 | if self.iter_boost > 0: 105 | if self.debug: 106 | print("[BatchBoost]: half-batch + feed-batch") 107 | errvec = BatchBoost.fn_error(outputs, self.targets) 108 | index_left, index_right = self.pairing(errvec) 109 | 110 | if self.alpha > 0: 111 | self.mixup_lambda = np.random.beta(self.alpha, self.alpha) 112 | else: 113 | self.mixup_lambda = 1 114 | 115 | self.inputs, self.y1, self.y2 = BatchBoost.mixup( 116 | self.inputs, 117 | y=self.targets, 118 | index_left=index_right, 119 | index_right=index_left, 120 | mixup_lambda=self.mixup_lambda, 121 | ) 122 | self.iter_boost -= 1 123 | elif self.iter_normal > 0: 124 | if self.debug: 125 | print("[BatchBoost] normal batch") 126 | batch_size = self.inputs.size(0) 127 | self.inputs = self.inputs[int(batch_size * self.factor) :] 128 | self.y1 = self.y1[int(batch_size * self.factor) :] 129 | self.y2 = self.y2[int(batch_size * self.factor) :] 130 | self.mixup_lambda = 1 131 | self.iter_normal -= 1 132 | 133 | def feed(self, new_inputs, _new_targets): 134 | new_targets = Variable(BatchBoost.fn_linearize(_new_targets)) 135 | if self.use_cuda: 136 | new_targets = new_targets.cuda() 137 | # no mixing (first iteration) 138 | if self.inputs is None: 139 | self.inputs = Variable(new_inputs) 140 | self.y1 = new_targets 141 | self.y2 = new_targets 142 | return False 143 | # concat 144 | self.inputs = torch.cat([self.inputs, new_inputs], dim=0) 145 | self.y1 = torch.cat([self.y1, new_targets], dim=0) 146 | self.y2 = torch.cat([self.y2, new_targets], dim=0) 147 | # virtual targets 148 | self.targets = ( 149 | self.mixup_lambda * self.y1 + (1 - self.mixup_lambda) * self.y2 150 | ) 151 | return True 152 | -------------------------------------------------------------------------------- /debug.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | import torchvision.transforms as transforms 6 | import torchvision.datasets as datasets 7 | 8 | # FIXME: move to models and split for CIFAR-10/Fashion-MNIST and others 9 | 10 | 11 | class ResNet100k(nn.Module): 12 | def __init__(self, num_classes=10): 13 | super(ResNet100k, self).__init__() 14 | self.num_filter1 = 8 15 | self.num_filter2 = 16 16 | self.num_padding = 2 17 | # input is 28x28 18 | # padding=2 for same padding 19 | self.conv1 = nn.Conv2d(1, self.num_filter1, 5, padding=self.num_padding) 20 | nn.init.xavier_uniform_(self.conv1.weight) 21 | # feature map size is 14*14 by pooling 22 | # padding=2 for same padding 23 | self.conv2 = nn.Conv2d( 24 | self.num_filter1, self.num_filter2, 5, padding=self.num_padding 25 | ) 26 | nn.init.xavier_uniform_(self.conv2.weight) 27 | # feature map size is 7*7 by pooling 28 | self.fc = nn.Linear(self.num_filter2 * 7 * 7, num_classes) 29 | 30 | def forward(self, x): 31 | x = F.max_pool2d(F.relu(self.conv1(x)), 2) 32 | x = F.max_pool2d(F.relu(self.conv2(x)), 2) 33 | x = x.view(-1, self.num_filter2 * 7 * 7) # reshape Variable 34 | x = self.fc(x) 35 | return x 36 | # return F.log_softmax(x, dim=1) 37 | # return F.softmax(x, dim=1) 38 | 39 | 40 | class ResNet100kv2(nn.Module): 41 | def __init__(self): 42 | super(ResNet100kv2, self).__init__() 43 | 44 | self.cnn1 = nn.Conv2d( 45 | in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2 46 | ) 47 | self.relu1 = nn.ReLU() 48 | self.norm1 = nn.BatchNorm2d(16) 49 | nn.init.xavier_uniform(self.cnn1.weight) 50 | 51 | self.maxpool1 = nn.MaxPool2d(kernel_size=2) 52 | 53 | self.cnn2 = nn.Conv2d( 54 | in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=2 55 | ) 56 | self.relu2 = nn.ReLU() 57 | self.norm2 = nn.BatchNorm2d(32) 58 | nn.init.xavier_uniform(self.cnn2.weight) 59 | 60 | self.maxpool2 = nn.MaxPool2d(kernel_size=2) 61 | 62 | self.fc1 = nn.Linear(2048, 128) 63 | self.fcrelu = nn.ReLU() 64 | 65 | self.fc2 = nn.Linear(128, 10) 66 | 67 | def forward(self, x): 68 | out = self.cnn1(x) 69 | out = self.relu1(out) 70 | out = self.norm1(out) 71 | 72 | out = self.maxpool1(out) 73 | 74 | out = self.cnn2(out) 75 | out = self.relu2(out) 76 | out = self.norm2(out) 77 | 78 | out = self.maxpool2(out) 79 | 80 | out = out.view(out.size(0), -1) 81 | 82 | out = self.fc1(out) 83 | out = self.fcrelu(out) 84 | 85 | out = self.fc2(out) 86 | return out 87 | 88 | 89 | def FashionMNIST_loaders(args): 90 | transform = transforms.Compose( 91 | [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] 92 | ) 93 | 94 | if args.augment: 95 | transform_train = transforms.Compose( 96 | [ 97 | transforms.RandomCrop(28, padding=4), 98 | transforms.RandomHorizontalFlip(), 99 | transforms.ToTensor(), 100 | transforms.Normalize((0.1307,), (0.3081,)), 101 | ] 102 | ) 103 | else: 104 | transform_train = transform 105 | transform_test = transform 106 | 107 | trainset = datasets.FashionMNIST( 108 | root="./data", train=True, download=True, transform=transform_train 109 | ) 110 | trainloader = torch.utils.data.DataLoader( 111 | trainset, batch_size=args.batch_size, shuffle=True, num_workers=8 112 | ) 113 | 114 | testset = datasets.FashionMNIST( 115 | root="./data", train=False, download=True, transform=transform_test 116 | ) 117 | testloader = torch.utils.data.DataLoader( 118 | testset, batch_size=100, shuffle=False, num_workers=8 119 | ) 120 | 121 | return trainloader, testloader 122 | -------------------------------------------------------------------------------- /figures/batches/img_1_new_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_new_10.png -------------------------------------------------------------------------------- /figures/batches/img_1_new_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_new_11.png -------------------------------------------------------------------------------- /figures/batches/img_1_new_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_new_6.png -------------------------------------------------------------------------------- /figures/batches/img_1_new_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_new_7.png -------------------------------------------------------------------------------- /figures/batches/img_1_new_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_new_8.png -------------------------------------------------------------------------------- /figures/batches/img_1_new_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_new_9.png -------------------------------------------------------------------------------- /figures/batches/img_1_old_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_old_0.png -------------------------------------------------------------------------------- /figures/batches/img_1_old_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_old_1.png -------------------------------------------------------------------------------- /figures/batches/img_1_old_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_old_2.png -------------------------------------------------------------------------------- /figures/batches/img_1_old_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_old_3.png -------------------------------------------------------------------------------- /figures/batches/img_1_old_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_old_4.png -------------------------------------------------------------------------------- /figures/batches/img_1_old_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_1_old_5.png -------------------------------------------------------------------------------- /figures/batches/img_2_new_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_new_10.png -------------------------------------------------------------------------------- /figures/batches/img_2_new_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_new_11.png -------------------------------------------------------------------------------- /figures/batches/img_2_new_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_new_6.png -------------------------------------------------------------------------------- /figures/batches/img_2_new_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_new_7.png -------------------------------------------------------------------------------- /figures/batches/img_2_new_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_new_8.png -------------------------------------------------------------------------------- /figures/batches/img_2_new_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_new_9.png -------------------------------------------------------------------------------- /figures/batches/img_2_old_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_old_0.png -------------------------------------------------------------------------------- /figures/batches/img_2_old_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_old_1.png -------------------------------------------------------------------------------- /figures/batches/img_2_old_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_old_2.png -------------------------------------------------------------------------------- /figures/batches/img_2_old_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_old_3.png -------------------------------------------------------------------------------- /figures/batches/img_2_old_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_old_4.png -------------------------------------------------------------------------------- /figures/batches/img_2_old_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_2_old_5.png -------------------------------------------------------------------------------- /figures/batches/img_3_new_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_new_10.png -------------------------------------------------------------------------------- /figures/batches/img_3_new_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_new_11.png -------------------------------------------------------------------------------- /figures/batches/img_3_new_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_new_6.png -------------------------------------------------------------------------------- /figures/batches/img_3_new_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_new_7.png -------------------------------------------------------------------------------- /figures/batches/img_3_new_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_new_8.png -------------------------------------------------------------------------------- /figures/batches/img_3_new_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_new_9.png -------------------------------------------------------------------------------- /figures/batches/img_3_old_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_old_0.png -------------------------------------------------------------------------------- /figures/batches/img_3_old_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_old_1.png -------------------------------------------------------------------------------- /figures/batches/img_3_old_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_old_2.png -------------------------------------------------------------------------------- /figures/batches/img_3_old_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_old_3.png -------------------------------------------------------------------------------- /figures/batches/img_3_old_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_old_4.png -------------------------------------------------------------------------------- /figures/batches/img_3_old_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_3_old_5.png -------------------------------------------------------------------------------- /figures/batches/img_4_new_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_new_10.png -------------------------------------------------------------------------------- /figures/batches/img_4_new_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_new_11.png -------------------------------------------------------------------------------- /figures/batches/img_4_new_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_new_6.png -------------------------------------------------------------------------------- /figures/batches/img_4_new_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_new_7.png -------------------------------------------------------------------------------- /figures/batches/img_4_new_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_new_8.png -------------------------------------------------------------------------------- /figures/batches/img_4_new_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_new_9.png -------------------------------------------------------------------------------- /figures/batches/img_4_old_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_old_0.png -------------------------------------------------------------------------------- /figures/batches/img_4_old_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_old_1.png -------------------------------------------------------------------------------- /figures/batches/img_4_old_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_old_2.png -------------------------------------------------------------------------------- /figures/batches/img_4_old_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_old_3.png -------------------------------------------------------------------------------- /figures/batches/img_4_old_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_old_4.png -------------------------------------------------------------------------------- /figures/batches/img_4_old_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/batches/img_4_old_5.png -------------------------------------------------------------------------------- /figures/data_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/data_1.png -------------------------------------------------------------------------------- /figures/data_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/data_2.png -------------------------------------------------------------------------------- /figures/data_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/data_3.png -------------------------------------------------------------------------------- /figures/data_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/data_4.png -------------------------------------------------------------------------------- /figures/data_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/data_5.png -------------------------------------------------------------------------------- /figures/data_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/data_6.png -------------------------------------------------------------------------------- /figures/data_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/data_7.png -------------------------------------------------------------------------------- /figures/figure-1-loss-train-without-augment.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-1-loss-train-without-augment.pdf -------------------------------------------------------------------------------- /figures/figure-1-test-accuracy-without-augment.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-1-test-accuracy-without-augment.pdf -------------------------------------------------------------------------------- /figures/figure-2-test-accuracy-with-augment.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-2-test-accuracy-with-augment.pdf -------------------------------------------------------------------------------- /figures/figure-2-train-accuracy-with-augment.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-2-train-accuracy-with-augment.pdf -------------------------------------------------------------------------------- /figures/figure-abstract.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-abstract.pdf -------------------------------------------------------------------------------- /figures/figure-abstract.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-abstract.png -------------------------------------------------------------------------------- /figures/figure-feeding.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-feeding.pdf -------------------------------------------------------------------------------- /figures/figure-feeding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-feeding.png -------------------------------------------------------------------------------- /figures/figure-multipass.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/figure-multipass.png -------------------------------------------------------------------------------- /figures/for-repository-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/for-repository-1.png -------------------------------------------------------------------------------- /figures/for-repository-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/for-repository-2.png -------------------------------------------------------------------------------- /figures/pp_logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/figures/pp_logo.jpg -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | from .vgg import * 2 | from .lenet import * 3 | from .resnet import * 4 | from .resnext import * 5 | from .densenet import * 6 | from .googlenet import * 7 | from .mobilenet import * 8 | from .densenet_efficient_multi_gpu import DenseNet190 9 | from .densenet3 import DenseNet190 10 | -------------------------------------------------------------------------------- /models/alldnet.py: -------------------------------------------------------------------------------- 1 | '''LeNet in PyTorch.''' 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | 6 | class AllDNet(nn.Module): 7 | def __init__(self): 8 | super(AllDNet, self).__init__() 9 | self.conv1 = nn.Conv2d(3, 6, 5) 10 | self.conv2 = nn.Conv2d(6, 16, 5) 11 | # self.conv2 = nn.Linear(6*14*14, 16*10*10) 12 | self.fc1 = nn.Linear(16*5*5, 120) 13 | self.fc2 = nn.Linear(120, 84) 14 | self.fc3 = nn.Linear(84, 10) 15 | 16 | def forward(self, x): 17 | activations = [] 18 | out = F.relu(self.conv1(x)) 19 | out = F.max_pool2d(out, 2) 20 | # out = out.view(out.size(0), -1) 21 | # activations.append(out) 22 | out = F.relu(self.conv2(out)) 23 | # out = out.view(out.size(0), 16, 10, -1) 24 | out = F.max_pool2d(out, 2) 25 | out = out.view(out.size(0), -1) 26 | activations.append(out) 27 | out = F.relu(self.fc1(out)) 28 | activations.append(out) 29 | out = F.relu(self.fc2(out)) 30 | activations.append(out) 31 | out = self.fc3(out) 32 | return out, activations 33 | 34 | -------------------------------------------------------------------------------- /models/densenet.py: -------------------------------------------------------------------------------- 1 | '''DenseNet in PyTorch.''' 2 | import math 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | from torch.autograd import Variable 9 | 10 | 11 | class Bottleneck(nn.Module): 12 | def __init__(self, in_planes, growth_rate): 13 | super(Bottleneck, self).__init__() 14 | self.bn1 = nn.BatchNorm2d(in_planes) 15 | self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False) 16 | self.bn2 = nn.BatchNorm2d(4*growth_rate) 17 | self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False) 18 | 19 | def forward(self, x): 20 | out = self.conv1(F.relu(self.bn1(x))) 21 | out = self.conv2(F.relu(self.bn2(out))) 22 | out = torch.cat([out,x], 1) 23 | return out 24 | 25 | 26 | class Transition(nn.Module): 27 | def __init__(self, in_planes, out_planes): 28 | super(Transition, self).__init__() 29 | self.bn = nn.BatchNorm2d(in_planes) 30 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False) 31 | 32 | def forward(self, x): 33 | out = self.conv(F.relu(self.bn(x))) 34 | out = F.avg_pool2d(out, 2) 35 | return out 36 | 37 | 38 | class DenseNet(nn.Module): 39 | def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10): 40 | super(DenseNet, self).__init__() 41 | self.growth_rate = growth_rate 42 | 43 | num_planes = 2*growth_rate 44 | self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False) 45 | 46 | self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0]) 47 | num_planes += nblocks[0]*growth_rate 48 | out_planes = int(math.floor(num_planes*reduction)) 49 | self.trans1 = Transition(num_planes, out_planes) 50 | num_planes = out_planes 51 | 52 | self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1]) 53 | num_planes += nblocks[1]*growth_rate 54 | out_planes = int(math.floor(num_planes*reduction)) 55 | self.trans2 = Transition(num_planes, out_planes) 56 | num_planes = out_planes 57 | 58 | self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2]) 59 | num_planes += nblocks[2]*growth_rate 60 | out_planes = int(math.floor(num_planes*reduction)) 61 | self.trans3 = Transition(num_planes, out_planes) 62 | num_planes = out_planes 63 | 64 | self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3]) 65 | num_planes += nblocks[3]*growth_rate 66 | 67 | self.bn = nn.BatchNorm2d(num_planes) 68 | self.linear = nn.Linear(num_planes, num_classes) 69 | 70 | def _make_dense_layers(self, block, in_planes, nblock): 71 | layers = [] 72 | for i in range(nblock): 73 | layers.append(block(in_planes, self.growth_rate)) 74 | in_planes += self.growth_rate 75 | return nn.Sequential(*layers) 76 | 77 | def forward(self, x): 78 | out = self.conv1(x) 79 | out = self.trans1(self.dense1(out)) 80 | out = self.trans2(self.dense2(out)) 81 | out = self.trans3(self.dense3(out)) 82 | out = self.dense4(out) 83 | out = F.avg_pool2d(F.relu(self.bn(out)), 4) 84 | out = out.view(out.size(0), -1) 85 | out = self.linear(out) 86 | return out 87 | 88 | def DenseNet121(): 89 | return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32) 90 | 91 | def DenseNet169(): 92 | return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32) 93 | 94 | def DenseNet201(): 95 | return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32) 96 | 97 | def DenseNet161(): 98 | return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48) 99 | 100 | def densenet_cifar(): 101 | return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12) 102 | 103 | def test_densenet(): 104 | net = densenet_cifar() 105 | x = torch.randn(1,3,32,32) 106 | y = net(Variable(x)) 107 | print(y) 108 | 109 | # test_densenet() 110 | -------------------------------------------------------------------------------- /models/densenet3.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | 7 | class BasicBlock(nn.Module): 8 | def __init__(self, in_planes, out_planes, dropRate=0.0): 9 | super(BasicBlock, self).__init__() 10 | self.bn1 = nn.BatchNorm2d(in_planes) 11 | self.relu = nn.ReLU(inplace=True) 12 | self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=1, 13 | padding=1, bias=False) 14 | self.droprate = dropRate 15 | def forward(self, x): 16 | out = self.conv1(self.relu(self.bn1(x))) 17 | if self.droprate > 0: 18 | out = F.dropout(out, p=self.droprate, training=self.training) 19 | return torch.cat([x, out], 1) 20 | 21 | class BottleneckBlock(nn.Module): 22 | def __init__(self, in_planes, out_planes, dropRate=0.0): 23 | super(BottleneckBlock, self).__init__() 24 | inter_planes = out_planes * 4 25 | self.bn1 = nn.BatchNorm2d(in_planes) 26 | self.relu = nn.ReLU(inplace=True) 27 | self.conv1 = nn.Conv2d(in_planes, inter_planes, kernel_size=1, stride=1, 28 | padding=0, bias=False) 29 | self.bn2 = nn.BatchNorm2d(inter_planes) 30 | self.conv2 = nn.Conv2d(inter_planes, out_planes, kernel_size=3, stride=1, 31 | padding=1, bias=False) 32 | self.droprate = dropRate 33 | def forward(self, x): 34 | out = self.conv1(self.relu(self.bn1(x))) 35 | if self.droprate > 0: 36 | out = F.dropout(out, p=self.droprate, inplace=False, training=self.training) 37 | out = self.conv2(self.relu(self.bn2(out))) 38 | if self.droprate > 0: 39 | out = F.dropout(out, p=self.droprate, inplace=False, training=self.training) 40 | return torch.cat([x, out], 1) 41 | 42 | class TransitionBlock(nn.Module): 43 | def __init__(self, in_planes, out_planes, dropRate=0.0): 44 | super(TransitionBlock, self).__init__() 45 | self.bn1 = nn.BatchNorm2d(in_planes) 46 | self.relu = nn.ReLU(inplace=True) 47 | self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, 48 | padding=0, bias=False) 49 | self.droprate = dropRate 50 | def forward(self, x): 51 | out = self.conv1(self.relu(self.bn1(x))) 52 | if self.droprate > 0: 53 | out = F.dropout(out, p=self.droprate, inplace=False, training=self.training) 54 | return F.avg_pool2d(out, 2) 55 | 56 | class DenseBlock(nn.Module): 57 | def __init__(self, nb_layers, in_planes, growth_rate, block, dropRate=0.0): 58 | super(DenseBlock, self).__init__() 59 | self.layer = self._make_layer(block, in_planes, growth_rate, nb_layers, dropRate) 60 | def _make_layer(self, block, in_planes, growth_rate, nb_layers, dropRate): 61 | layers = [] 62 | for i in range(nb_layers): 63 | layers.append(block(in_planes+i*growth_rate, growth_rate, dropRate)) 64 | return nn.Sequential(*layers) 65 | def forward(self, x): 66 | return self.layer(x) 67 | 68 | class DenseNet3(nn.Module): 69 | def __init__(self, depth, num_classes, growth_rate=12, 70 | reduction=0.5, bottleneck=True, dropRate=0.0): 71 | super(DenseNet3, self).__init__() 72 | in_planes = 2 * growth_rate 73 | n = (depth - 4) // 3 74 | if bottleneck == True: 75 | n = n//2 76 | block = BottleneckBlock 77 | else: 78 | block = BasicBlock 79 | # 1st conv before any dense block 80 | self.conv1 = nn.Conv2d(3, in_planes, kernel_size=3, stride=1, 81 | padding=1, bias=False) 82 | # 1st block 83 | self.block1 = DenseBlock(n, in_planes, growth_rate, block, dropRate) 84 | in_planes = int(in_planes+n*growth_rate) 85 | self.trans1 = TransitionBlock(in_planes, int(math.floor(in_planes*reduction)), dropRate=dropRate) 86 | in_planes = int(math.floor(in_planes*reduction)) 87 | # 2nd block 88 | self.block2 = DenseBlock(n, in_planes, growth_rate, block, dropRate) 89 | in_planes = int(in_planes+n*growth_rate) 90 | self.trans2 = TransitionBlock(in_planes, int(math.floor(in_planes*reduction)), dropRate=dropRate) 91 | in_planes = int(math.floor(in_planes*reduction)) 92 | # 3rd block 93 | self.block3 = DenseBlock(n, in_planes, growth_rate, block, dropRate) 94 | in_planes = int(in_planes+n*growth_rate) 95 | # global average pooling and classifier 96 | self.bn1 = nn.BatchNorm2d(in_planes) 97 | self.relu = nn.ReLU(inplace=True) 98 | self.fc = nn.Linear(in_planes, num_classes) 99 | self.in_planes = in_planes 100 | 101 | for m in self.modules(): 102 | if isinstance(m, nn.Conv2d): 103 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 104 | m.weight.data.normal_(0, math.sqrt(2. / n)) 105 | elif isinstance(m, nn.BatchNorm2d): 106 | m.weight.data.fill_(1) 107 | m.bias.data.zero_() 108 | elif isinstance(m, nn.Linear): 109 | m.bias.data.zero_() 110 | def forward(self, x): 111 | out = self.conv1(x) 112 | out = self.trans1(self.block1(out)) 113 | out = self.trans2(self.block2(out)) 114 | out = self.block3(out) 115 | out = self.relu(self.bn1(out)) 116 | out = F.avg_pool2d(out, 8) 117 | out = out.view(-1, self.in_planes) 118 | return self.fc(out) 119 | 120 | def DenseNet190(): 121 | return DenseNet3(190, 10, growth_rate=40) 122 | -------------------------------------------------------------------------------- /models/googlenet.py: -------------------------------------------------------------------------------- 1 | '''GoogLeNet with PyTorch.''' 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from torch.autograd import Variable 7 | 8 | 9 | class Inception(nn.Module): 10 | def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes): 11 | super(Inception, self).__init__() 12 | # 1x1 conv branch 13 | self.b1 = nn.Sequential( 14 | nn.Conv2d(in_planes, n1x1, kernel_size=1), 15 | nn.BatchNorm2d(n1x1), 16 | nn.ReLU(True), 17 | ) 18 | 19 | # 1x1 conv -> 3x3 conv branch 20 | self.b2 = nn.Sequential( 21 | nn.Conv2d(in_planes, n3x3red, kernel_size=1), 22 | nn.BatchNorm2d(n3x3red), 23 | nn.ReLU(True), 24 | nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1), 25 | nn.BatchNorm2d(n3x3), 26 | nn.ReLU(True), 27 | ) 28 | 29 | # 1x1 conv -> 5x5 conv branch 30 | self.b3 = nn.Sequential( 31 | nn.Conv2d(in_planes, n5x5red, kernel_size=1), 32 | nn.BatchNorm2d(n5x5red), 33 | nn.ReLU(True), 34 | nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1), 35 | nn.BatchNorm2d(n5x5), 36 | nn.ReLU(True), 37 | nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1), 38 | nn.BatchNorm2d(n5x5), 39 | nn.ReLU(True), 40 | ) 41 | 42 | # 3x3 pool -> 1x1 conv branch 43 | self.b4 = nn.Sequential( 44 | nn.MaxPool2d(3, stride=1, padding=1), 45 | nn.Conv2d(in_planes, pool_planes, kernel_size=1), 46 | nn.BatchNorm2d(pool_planes), 47 | nn.ReLU(True), 48 | ) 49 | 50 | def forward(self, x): 51 | y1 = self.b1(x) 52 | y2 = self.b2(x) 53 | y3 = self.b3(x) 54 | y4 = self.b4(x) 55 | return torch.cat([y1,y2,y3,y4], 1) 56 | 57 | 58 | class GoogLeNet(nn.Module): 59 | def __init__(self): 60 | super(GoogLeNet, self).__init__() 61 | self.pre_layers = nn.Sequential( 62 | nn.Conv2d(3, 192, kernel_size=3, padding=1), 63 | nn.BatchNorm2d(192), 64 | nn.ReLU(True), 65 | ) 66 | 67 | self.a3 = Inception(192, 64, 96, 128, 16, 32, 32) 68 | self.b3 = Inception(256, 128, 128, 192, 32, 96, 64) 69 | 70 | self.maxpool = nn.MaxPool2d(3, stride=2, padding=1) 71 | 72 | self.a4 = Inception(480, 192, 96, 208, 16, 48, 64) 73 | self.b4 = Inception(512, 160, 112, 224, 24, 64, 64) 74 | self.c4 = Inception(512, 128, 128, 256, 24, 64, 64) 75 | self.d4 = Inception(512, 112, 144, 288, 32, 64, 64) 76 | self.e4 = Inception(528, 256, 160, 320, 32, 128, 128) 77 | 78 | self.a5 = Inception(832, 256, 160, 320, 32, 128, 128) 79 | self.b5 = Inception(832, 384, 192, 384, 48, 128, 128) 80 | 81 | self.avgpool = nn.AvgPool2d(8, stride=1) 82 | self.linear = nn.Linear(1024, 10) 83 | 84 | def forward(self, x): 85 | out = self.pre_layers(x) 86 | out = self.a3(out) 87 | out = self.b3(out) 88 | out = self.maxpool(out) 89 | out = self.a4(out) 90 | out = self.b4(out) 91 | out = self.c4(out) 92 | out = self.d4(out) 93 | out = self.e4(out) 94 | out = self.maxpool(out) 95 | out = self.a5(out) 96 | out = self.b5(out) 97 | out = self.avgpool(out) 98 | out = out.view(out.size(0), -1) 99 | out = self.linear(out) 100 | return out 101 | 102 | # net = GoogLeNet() 103 | # x = torch.randn(1,3,32,32) 104 | # y = net(Variable(x)) 105 | # print(y.size()) 106 | -------------------------------------------------------------------------------- /models/lenet.py: -------------------------------------------------------------------------------- 1 | '''LeNet in PyTorch.''' 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class LeNet(nn.Module): 6 | def __init__(self): 7 | super(LeNet, self).__init__() 8 | self.conv1 = nn.Conv2d(3, 6, 5) 9 | self.conv2 = nn.Conv2d(6, 16, 5) 10 | self.fc1 = nn.Linear(16*5*5, 120) 11 | self.fc2 = nn.Linear(120, 84) 12 | self.fc3 = nn.Linear(84, 10) 13 | 14 | def forward(self, x): 15 | out = F.relu(self.conv1(x)) 16 | out = F.max_pool2d(out, 2) 17 | out = F.relu(self.conv2(out)) 18 | out = F.max_pool2d(out, 2) 19 | out = out.view(out.size(0), -1) 20 | out = F.relu(self.fc1(out)) 21 | out = F.relu(self.fc2(out)) 22 | out = self.fc3(out) 23 | return out 24 | -------------------------------------------------------------------------------- /models/mobilenet.py: -------------------------------------------------------------------------------- 1 | '''MobileNet in PyTorch. 2 | 3 | See the paper "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" 4 | for more details. 5 | ''' 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | from torch.autograd import Variable 11 | 12 | 13 | class Block(nn.Module): 14 | '''Depthwise conv + Pointwise conv''' 15 | def __init__(self, in_planes, out_planes, stride=1): 16 | super(Block, self).__init__() 17 | self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False) 18 | self.bn1 = nn.BatchNorm2d(in_planes) 19 | self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) 20 | self.bn2 = nn.BatchNorm2d(out_planes) 21 | 22 | def forward(self, x): 23 | out = F.relu(self.bn1(self.conv1(x))) 24 | out = F.relu(self.bn2(self.conv2(out))) 25 | return out 26 | 27 | 28 | class MobileNet(nn.Module): 29 | # (128,2) means conv planes=128, conv stride=2, by default conv stride=1 30 | cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024] 31 | 32 | def __init__(self, num_classes=10): 33 | super(MobileNet, self).__init__() 34 | self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False) 35 | self.bn1 = nn.BatchNorm2d(32) 36 | self.layers = self._make_layers(in_planes=32) 37 | self.linear = nn.Linear(1024, num_classes) 38 | 39 | def _make_layers(self, in_planes): 40 | layers = [] 41 | for x in self.cfg: 42 | out_planes = x if isinstance(x, int) else x[0] 43 | stride = 1 if isinstance(x, int) else x[1] 44 | layers.append(Block(in_planes, out_planes, stride)) 45 | in_planes = out_planes 46 | return nn.Sequential(*layers) 47 | 48 | def forward(self, x): 49 | out = F.relu(self.bn1(self.conv1(x))) 50 | out = self.layers(out) 51 | out = F.avg_pool2d(out, 2) 52 | out = out.view(out.size(0), -1) 53 | out = self.linear(out) 54 | return out 55 | 56 | 57 | def test(): 58 | net = MobileNet() 59 | x = torch.randn(1,3,32,32) 60 | y = net(Variable(x)) 61 | print(y.size()) 62 | 63 | # test() 64 | -------------------------------------------------------------------------------- /models/resnet.py: -------------------------------------------------------------------------------- 1 | '''ResNet in PyTorch. 2 | 3 | BasicBlock and Bottleneck module is from the original ResNet paper: 4 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 5 | Deep Residual Learning for Image Recognition. arXiv:1512.03385 6 | 7 | PreActBlock and PreActBottleneck module is from the later paper: 8 | [2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 9 | Identity Mappings in Deep Residual Networks. arXiv:1603.05027 10 | ''' 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | 15 | from torch.autograd import Variable 16 | 17 | 18 | def conv3x3(in_planes, out_planes, stride=1): 19 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) 20 | 21 | 22 | class BasicBlock(nn.Module): 23 | expansion = 1 24 | 25 | def __init__(self, in_planes, planes, stride=1): 26 | super(BasicBlock, self).__init__() 27 | self.conv1 = conv3x3(in_planes, planes, stride) 28 | self.bn1 = nn.BatchNorm2d(planes) 29 | self.conv2 = conv3x3(planes, planes) 30 | self.bn2 = nn.BatchNorm2d(planes) 31 | 32 | self.shortcut = nn.Sequential() 33 | if stride != 1 or in_planes != self.expansion*planes: 34 | self.shortcut = nn.Sequential( 35 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 36 | nn.BatchNorm2d(self.expansion*planes) 37 | ) 38 | 39 | def forward(self, x): 40 | out = F.relu(self.bn1(self.conv1(x))) 41 | out = self.bn2(self.conv2(out)) 42 | out += self.shortcut(x) 43 | out = F.relu(out) 44 | return out 45 | 46 | 47 | class PreActBlock(nn.Module): 48 | '''Pre-activation version of the BasicBlock.''' 49 | expansion = 1 50 | 51 | def __init__(self, in_planes, planes, stride=1): 52 | super(PreActBlock, self).__init__() 53 | self.bn1 = nn.BatchNorm2d(in_planes) 54 | self.conv1 = conv3x3(in_planes, planes, stride) 55 | self.bn2 = nn.BatchNorm2d(planes) 56 | self.conv2 = conv3x3(planes, planes) 57 | 58 | self.shortcut = nn.Sequential() 59 | if stride != 1 or in_planes != self.expansion*planes: 60 | self.shortcut = nn.Sequential( 61 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) 62 | ) 63 | 64 | def forward(self, x): 65 | out = F.relu(self.bn1(x)) 66 | shortcut = self.shortcut(out) 67 | out = self.conv1(out) 68 | out = self.conv2(F.relu(self.bn2(out))) 69 | out += shortcut 70 | return out 71 | 72 | 73 | class Bottleneck(nn.Module): 74 | expansion = 4 75 | 76 | def __init__(self, in_planes, planes, stride=1): 77 | super(Bottleneck, self).__init__() 78 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 79 | self.bn1 = nn.BatchNorm2d(planes) 80 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 81 | self.bn2 = nn.BatchNorm2d(planes) 82 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 83 | self.bn3 = nn.BatchNorm2d(self.expansion*planes) 84 | 85 | self.shortcut = nn.Sequential() 86 | if stride != 1 or in_planes != self.expansion*planes: 87 | self.shortcut = nn.Sequential( 88 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 89 | nn.BatchNorm2d(self.expansion*planes) 90 | ) 91 | 92 | def forward(self, x): 93 | out = F.relu(self.bn1(self.conv1(x))) 94 | out = F.relu(self.bn2(self.conv2(out))) 95 | out = self.bn3(self.conv3(out)) 96 | out += self.shortcut(x) 97 | out = F.relu(out) 98 | return out 99 | 100 | 101 | class PreActBottleneck(nn.Module): 102 | '''Pre-activation version of the original Bottleneck module.''' 103 | expansion = 4 104 | 105 | def __init__(self, in_planes, planes, stride=1): 106 | super(PreActBottleneck, self).__init__() 107 | self.bn1 = nn.BatchNorm2d(in_planes) 108 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 109 | self.bn2 = nn.BatchNorm2d(planes) 110 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 111 | self.bn3 = nn.BatchNorm2d(planes) 112 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 113 | 114 | self.shortcut = nn.Sequential() 115 | if stride != 1 or in_planes != self.expansion*planes: 116 | self.shortcut = nn.Sequential( 117 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) 118 | ) 119 | 120 | def forward(self, x): 121 | out = F.relu(self.bn1(x)) 122 | shortcut = self.shortcut(out) 123 | out = self.conv1(out) 124 | out = self.conv2(F.relu(self.bn2(out))) 125 | out = self.conv3(F.relu(self.bn3(out))) 126 | out += shortcut 127 | return out 128 | 129 | 130 | class ResNet(nn.Module): 131 | def __init__(self, block, num_blocks, num_classes=10): 132 | super(ResNet, self).__init__() 133 | self.in_planes = 64 134 | 135 | self.conv1 = conv3x3(3,64) 136 | self.bn1 = nn.BatchNorm2d(64) 137 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) 138 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) 139 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) 140 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) 141 | self.linear = nn.Linear(512*block.expansion, num_classes) 142 | 143 | def _make_layer(self, block, planes, num_blocks, stride): 144 | strides = [stride] + [1]*(num_blocks-1) 145 | layers = [] 146 | for stride in strides: 147 | layers.append(block(self.in_planes, planes, stride)) 148 | self.in_planes = planes * block.expansion 149 | return nn.Sequential(*layers) 150 | 151 | def forward(self, x, lin=0, lout=5): 152 | out = x 153 | if lin < 1 and lout > -1: 154 | out = self.conv1(out) 155 | out = self.bn1(out) 156 | out = F.relu(out) 157 | if lin < 2 and lout > 0: 158 | out = self.layer1(out) 159 | if lin < 3 and lout > 1: 160 | out = self.layer2(out) 161 | if lin < 4 and lout > 2: 162 | out = self.layer3(out) 163 | if lin < 5 and lout > 3: 164 | out = self.layer4(out) 165 | if lout > 4: 166 | out = F.avg_pool2d(out, 4) 167 | out = out.view(out.size(0), -1) 168 | out = self.linear(out) 169 | return out 170 | 171 | 172 | def ResNet18(): 173 | return ResNet(PreActBlock, [2,2,2,2]) 174 | 175 | def ResNet34(): 176 | return ResNet(BasicBlock, [3,4,6,3]) 177 | 178 | def ResNet50(): 179 | return ResNet(Bottleneck, [3,4,6,3]) 180 | 181 | def ResNet101(): 182 | return ResNet(Bottleneck, [3,4,23,3]) 183 | 184 | def ResNet152(): 185 | return ResNet(Bottleneck, [3,8,36,3]) 186 | 187 | 188 | def test(): 189 | net = ResNet18() 190 | y = net(Variable(torch.randn(1,3,32,32))) 191 | print(y.size()) 192 | 193 | # test() 194 | -------------------------------------------------------------------------------- /models/resnext.py: -------------------------------------------------------------------------------- 1 | '''ResNeXt in PyTorch. 2 | 3 | See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details. 4 | ''' 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | from torch.autograd import Variable 10 | 11 | 12 | class Block(nn.Module): 13 | '''Grouped convolution block.''' 14 | expansion = 2 15 | 16 | def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1): 17 | super(Block, self).__init__() 18 | group_width = cardinality * bottleneck_width 19 | self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False) 20 | self.bn1 = nn.BatchNorm2d(group_width) 21 | self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False) 22 | self.bn2 = nn.BatchNorm2d(group_width) 23 | self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False) 24 | self.bn3 = nn.BatchNorm2d(self.expansion*group_width) 25 | 26 | self.shortcut = nn.Sequential() 27 | if stride != 1 or in_planes != self.expansion*group_width: 28 | self.shortcut = nn.Sequential( 29 | nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False), 30 | nn.BatchNorm2d(self.expansion*group_width) 31 | ) 32 | 33 | def forward(self, x): 34 | out = F.relu(self.bn1(self.conv1(x))) 35 | out = F.relu(self.bn2(self.conv2(out))) 36 | out = self.bn3(self.conv3(out)) 37 | out += self.shortcut(x) 38 | out = F.relu(out) 39 | return out 40 | 41 | 42 | class ResNeXt(nn.Module): 43 | def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10): 44 | super(ResNeXt, self).__init__() 45 | self.cardinality = cardinality 46 | self.bottleneck_width = bottleneck_width 47 | self.in_planes = 64 48 | 49 | self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False) 50 | self.bn1 = nn.BatchNorm2d(64) 51 | self.layer1 = self._make_layer(num_blocks[0], 1) 52 | self.layer2 = self._make_layer(num_blocks[1], 2) 53 | self.layer3 = self._make_layer(num_blocks[2], 2) 54 | # self.layer4 = self._make_layer(num_blocks[3], 2) 55 | self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes) 56 | 57 | def _make_layer(self, num_blocks, stride): 58 | strides = [stride] + [1]*(num_blocks-1) 59 | layers = [] 60 | for stride in strides: 61 | layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride)) 62 | self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width 63 | # Increase bottleneck_width by 2 after each stage. 64 | self.bottleneck_width *= 2 65 | return nn.Sequential(*layers) 66 | 67 | def forward(self, x): 68 | out = F.relu(self.bn1(self.conv1(x))) 69 | out = self.layer1(out) 70 | out = self.layer2(out) 71 | out = self.layer3(out) 72 | # out = self.layer4(out) 73 | out = F.avg_pool2d(out, 8) 74 | out = out.view(out.size(0), -1) 75 | out = self.linear(out) 76 | return out 77 | 78 | 79 | def ResNeXt29_2x64d(): 80 | return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64) 81 | 82 | def ResNeXt29_4x64d(): 83 | return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64) 84 | 85 | def ResNeXt29_8x64d(): 86 | return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64) 87 | 88 | def ResNeXt29_32x4d(): 89 | return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4) 90 | 91 | def test_resnext(): 92 | net = ResNeXt29_2x64d() 93 | x = torch.randn(1,3,32,32) 94 | y = net(Variable(x)) 95 | print(y.size()) 96 | 97 | # test_resnext() 98 | -------------------------------------------------------------------------------- /models/vgg.py: -------------------------------------------------------------------------------- 1 | '''VGG11/13/16/19 in Pytorch.''' 2 | import torch 3 | import torch.nn as nn 4 | from torch.autograd import Variable 5 | 6 | 7 | cfg = { 8 | 'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 9 | 'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 10 | 'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 11 | 'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], 12 | } 13 | 14 | 15 | class VGG(nn.Module): 16 | def __init__(self, vgg_name): 17 | super(VGG, self).__init__() 18 | self.features = self._make_layers(cfg[vgg_name]) 19 | self.classifier = nn.Linear(512, 10) 20 | 21 | def forward(self, x): 22 | out = self.features(x) 23 | out = out.view(out.size(0), -1) 24 | out = self.classifier(out) 25 | return out 26 | 27 | def _make_layers(self, cfg): 28 | layers = [] 29 | in_channels = 3 30 | for x in cfg: 31 | if x == 'M': 32 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 33 | else: 34 | layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1), 35 | nn.BatchNorm2d(x), 36 | nn.ReLU(inplace=True)] 37 | in_channels = x 38 | layers += [nn.AvgPool2d(kernel_size=1, stride=1)] 39 | return nn.Sequential(*layers) 40 | 41 | # net = VGG('VGG11') 42 | # x = torch.randn(2,3,32,32) 43 | # print(net(Variable(x)).size()) 44 | -------------------------------------------------------------------------------- /paper/abstract.txt: -------------------------------------------------------------------------------- 1 | Overfitting & underfitting and stable training are an important challenges in 2 | machine learning. 3 | Current approaches for these issues are mixup, SamplePairing and BC learning. 4 | In our work, we state the hypothesis that mixing many images together can be more 5 | effective than just two. 6 | Batchboost pipeline has three stages: 7 | (a) pairing: method of selecting two samples. 8 | (b) mixing: how to create a new one from two samples. 9 | (c) feeding: combining mixed samples with new ones from dataset into batch (with ratio $\gamma$). 10 | Note that sample that appears in our batch propagates with 11 | subsequent iterations with less and less importance until the end of training. 12 | Pairing stage calculates the error per sample, sorts the samples and pairs 13 | with strategy: hardest with easiest one, than mixing stage merges two samples 14 | using mixup, $x_1 + (1-\lambda)x_2$. Finally, feeding stage combines 15 | new samples with mixed by ratio 1:1. 16 | Batchboost has 0.5-3% better accuracy than the current 17 | state-of-the-art mixup regularization on CIFAR-10 & Fashion-MNIST. 18 | Our method is slightly better than SamplePairing technique 19 | on small datasets (up to 5%). 20 | Batchboost provides stable training on not tuned parameters (like weight 21 | decay), thus its a good method to test performance of different architectures. 22 | Source code is at: https://github.com/maciejczyzewski/batchboost 23 | 24 | -------------------------------------------------------------------------------- /paper/arxiv-abstract-shadow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/arxiv-abstract-shadow.png -------------------------------------------------------------------------------- /paper/arxiv-abstract.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/arxiv-abstract.png -------------------------------------------------------------------------------- /paper/arxiv.sty: -------------------------------------------------------------------------------- 1 | \NeedsTeXFormat{LaTeX2e} 2 | 3 | \ProcessOptions\relax 4 | 5 | % fonts 6 | \renewcommand{\rmdefault}{ptm} 7 | \renewcommand{\sfdefault}{phv} 8 | 9 | % set page geometry 10 | \usepackage[verbose=true,letterpaper]{geometry} 11 | \AtBeginDocument{ 12 | \newgeometry{ 13 | textheight=9in, 14 | textwidth=6.5in, 15 | top=1in, 16 | headheight=14pt, 17 | headsep=25pt, 18 | footskip=30pt 19 | } 20 | } 21 | 22 | \widowpenalty=10000 23 | \clubpenalty=10000 24 | \flushbottom 25 | \sloppy 26 | 27 | 28 | 29 | \newcommand{\headeright}{A Preprint} 30 | \newcommand{\undertitle}{A Preprint} 31 | 32 | \usepackage{fancyhdr} 33 | \fancyhf{} 34 | \pagestyle{fancy} 35 | \renewcommand{\headrulewidth}{0.4pt} 36 | \fancyheadoffset{0pt} 37 | \rhead{\scshape \footnotesize \headeright} 38 | \chead{\@title} 39 | \cfoot{\thepage} 40 | 41 | 42 | %Handling Keywords 43 | \def\keywordname{{\bfseries \emph Keywords}}% 44 | \def\keywords#1{\par\addvspace\medskipamount{\rightskip=0pt plus1cm 45 | \def\and{\ifhmode\unskip\nobreak\fi\ $\cdot$ 46 | }\noindent\keywordname\enspace\ignorespaces#1\par}} 47 | 48 | % font sizes with reduced leading 49 | \renewcommand{\normalsize}{% 50 | \@setfontsize\normalsize\@xpt\@xipt 51 | \abovedisplayskip 7\p@ \@plus 2\p@ \@minus 5\p@ 52 | \abovedisplayshortskip \z@ \@plus 3\p@ 53 | \belowdisplayskip \abovedisplayskip 54 | \belowdisplayshortskip 4\p@ \@plus 3\p@ \@minus 3\p@ 55 | } 56 | \normalsize 57 | \renewcommand{\small}{% 58 | \@setfontsize\small\@ixpt\@xpt 59 | \abovedisplayskip 6\p@ \@plus 1.5\p@ \@minus 4\p@ 60 | \abovedisplayshortskip \z@ \@plus 2\p@ 61 | \belowdisplayskip \abovedisplayskip 62 | \belowdisplayshortskip 3\p@ \@plus 2\p@ \@minus 2\p@ 63 | } 64 | \renewcommand{\footnotesize}{\@setfontsize\footnotesize\@ixpt\@xpt} 65 | \renewcommand{\scriptsize}{\@setfontsize\scriptsize\@viipt\@viiipt} 66 | \renewcommand{\tiny}{\@setfontsize\tiny\@vipt\@viipt} 67 | \renewcommand{\large}{\@setfontsize\large\@xiipt{14}} 68 | \renewcommand{\Large}{\@setfontsize\Large\@xivpt{16}} 69 | \renewcommand{\LARGE}{\@setfontsize\LARGE\@xviipt{20}} 70 | \renewcommand{\huge}{\@setfontsize\huge\@xxpt{23}} 71 | \renewcommand{\Huge}{\@setfontsize\Huge\@xxvpt{28}} 72 | 73 | % sections with less space 74 | \providecommand{\section}{} 75 | \renewcommand{\section}{% 76 | \@startsection{section}{1}{\z@}% 77 | {-2.0ex \@plus -0.5ex \@minus -0.2ex}% 78 | { 1.5ex \@plus 0.3ex \@minus 0.2ex}% 79 | {\large\bf\raggedright}% 80 | } 81 | \providecommand{\subsection}{} 82 | \renewcommand{\subsection}{% 83 | \@startsection{subsection}{2}{\z@}% 84 | {-1.8ex \@plus -0.5ex \@minus -0.2ex}% 85 | { 0.8ex \@plus 0.2ex}% 86 | {\normalsize\bf\raggedright}% 87 | } 88 | \providecommand{\subsubsection}{} 89 | \renewcommand{\subsubsection}{% 90 | \@startsection{subsubsection}{3}{\z@}% 91 | {-1.5ex \@plus -0.5ex \@minus -0.2ex}% 92 | { 0.5ex \@plus 0.2ex}% 93 | {\normalsize\bf\raggedright}% 94 | } 95 | \providecommand{\paragraph}{} 96 | \renewcommand{\paragraph}{% 97 | \@startsection{paragraph}{4}{\z@}% 98 | {1.5ex \@plus 0.5ex \@minus 0.2ex}% 99 | {-1em}% 100 | {\normalsize\bf}% 101 | } 102 | \providecommand{\subparagraph}{} 103 | \renewcommand{\subparagraph}{% 104 | \@startsection{subparagraph}{5}{\z@}% 105 | {1.5ex \@plus 0.5ex \@minus 0.2ex}% 106 | {-1em}% 107 | {\normalsize\bf}% 108 | } 109 | \providecommand{\subsubsubsection}{} 110 | \renewcommand{\subsubsubsection}{% 111 | \vskip5pt{\noindent\normalsize\rm\raggedright}% 112 | } 113 | 114 | % float placement 115 | \renewcommand{\topfraction }{0.85} 116 | \renewcommand{\bottomfraction }{0.4} 117 | \renewcommand{\textfraction }{0.1} 118 | \renewcommand{\floatpagefraction}{0.7} 119 | 120 | \newlength{\@abovecaptionskip}\setlength{\@abovecaptionskip}{7\p@} 121 | \newlength{\@belowcaptionskip}\setlength{\@belowcaptionskip}{\z@} 122 | 123 | \setlength{\abovecaptionskip}{\@abovecaptionskip} 124 | \setlength{\belowcaptionskip}{\@belowcaptionskip} 125 | 126 | % swap above/belowcaptionskip lengths for tables 127 | \renewenvironment{table} 128 | {\setlength{\abovecaptionskip}{\@belowcaptionskip}% 129 | \setlength{\belowcaptionskip}{\@abovecaptionskip}% 130 | \@float{table}} 131 | {\end@float} 132 | 133 | % footnote formatting 134 | \setlength{\footnotesep }{6.65\p@} 135 | \setlength{\skip\footins}{9\p@ \@plus 4\p@ \@minus 2\p@} 136 | \renewcommand{\footnoterule}{\kern-3\p@ \hrule width 12pc \kern 2.6\p@} 137 | \setcounter{footnote}{0} 138 | 139 | % paragraph formatting 140 | \setlength{\parindent}{\z@} 141 | \setlength{\parskip }{5.5\p@} 142 | 143 | % list formatting 144 | \setlength{\topsep }{4\p@ \@plus 1\p@ \@minus 2\p@} 145 | \setlength{\partopsep }{1\p@ \@plus 0.5\p@ \@minus 0.5\p@} 146 | \setlength{\itemsep }{2\p@ \@plus 1\p@ \@minus 0.5\p@} 147 | \setlength{\parsep }{2\p@ \@plus 1\p@ \@minus 0.5\p@} 148 | \setlength{\leftmargin }{3pc} 149 | \setlength{\leftmargini }{\leftmargin} 150 | \setlength{\leftmarginii }{2em} 151 | \setlength{\leftmarginiii}{1.5em} 152 | \setlength{\leftmarginiv }{1.0em} 153 | \setlength{\leftmarginv }{0.5em} 154 | \def\@listi {\leftmargin\leftmargini} 155 | \def\@listii {\leftmargin\leftmarginii 156 | \labelwidth\leftmarginii 157 | \advance\labelwidth-\labelsep 158 | \topsep 2\p@ \@plus 1\p@ \@minus 0.5\p@ 159 | \parsep 1\p@ \@plus 0.5\p@ \@minus 0.5\p@ 160 | \itemsep \parsep} 161 | \def\@listiii{\leftmargin\leftmarginiii 162 | \labelwidth\leftmarginiii 163 | \advance\labelwidth-\labelsep 164 | \topsep 1\p@ \@plus 0.5\p@ \@minus 0.5\p@ 165 | \parsep \z@ 166 | \partopsep 0.5\p@ \@plus 0\p@ \@minus 0.5\p@ 167 | \itemsep \topsep} 168 | \def\@listiv {\leftmargin\leftmarginiv 169 | \labelwidth\leftmarginiv 170 | \advance\labelwidth-\labelsep} 171 | \def\@listv {\leftmargin\leftmarginv 172 | \labelwidth\leftmarginv 173 | \advance\labelwidth-\labelsep} 174 | \def\@listvi {\leftmargin\leftmarginvi 175 | \labelwidth\leftmarginvi 176 | \advance\labelwidth-\labelsep} 177 | 178 | % create title 179 | \providecommand{\maketitle}{} 180 | \renewcommand{\maketitle}{% 181 | \par 182 | \begingroup 183 | \renewcommand{\thefootnote}{\fnsymbol{footnote}} 184 | % for perfect author name centering 185 | \renewcommand{\@makefnmark}{\hbox to \z@{$^{\@thefnmark}$\hss}} 186 | % The footnote-mark was overlapping the footnote-text, 187 | % added the following to fix this problem (MK) 188 | \long\def\@makefntext##1{% 189 | \parindent 1em\noindent 190 | \hbox to 1.8em{\hss $\m@th ^{\@thefnmark}$}##1 191 | } 192 | \thispagestyle{empty} 193 | \@maketitle 194 | \@thanks 195 | %\@notice 196 | \endgroup 197 | \let\maketitle\relax 198 | \let\thanks\relax 199 | } 200 | 201 | % rules for title box at top of first page 202 | \newcommand{\@toptitlebar}{ 203 | \hrule height 2\p@ 204 | \vskip 0.25in 205 | \vskip -\parskip% 206 | } 207 | \newcommand{\@bottomtitlebar}{ 208 | \vskip 0.29in 209 | \vskip -\parskip 210 | \hrule height 2\p@ 211 | \vskip 0.09in% 212 | } 213 | 214 | % create title (includes both anonymized and non-anonymized versions) 215 | \providecommand{\@maketitle}{} 216 | \renewcommand{\@maketitle}{% 217 | \vbox{% 218 | \hsize\textwidth 219 | \linewidth\hsize 220 | \vskip 0.1in 221 | \@toptitlebar 222 | \centering 223 | {\LARGE\sc \@title\par} 224 | \@bottomtitlebar 225 | \textsc{\undertitle}\\ 226 | \vskip 0.1in 227 | \def\And{% 228 | \end{tabular}\hfil\linebreak[0]\hfil% 229 | \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces% 230 | } 231 | \def\AND{% 232 | \end{tabular}\hfil\linebreak[4]\hfil% 233 | \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces% 234 | } 235 | \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\@author\end{tabular}% 236 | \vskip 0.4in \@minus 0.1in \center{\@date} \vskip 0.2in 237 | } 238 | } 239 | 240 | % add conference notice to bottom of first page 241 | \newcommand{\ftype@noticebox}{8} 242 | \newcommand{\@notice}{% 243 | % give a bit of extra room back to authors on first page 244 | \enlargethispage{2\baselineskip}% 245 | \@float{noticebox}[b]% 246 | \footnotesize\@noticestring% 247 | \end@float% 248 | } 249 | 250 | % abstract styling 251 | \renewenvironment{abstract} 252 | { 253 | \centerline 254 | {\large \bfseries \scshape Abstract} 255 | \begin{quote} 256 | } 257 | { 258 | \end{quote} 259 | } 260 | 261 | \endinput 262 | -------------------------------------------------------------------------------- /paper/batchboost.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/batchboost.pdf -------------------------------------------------------------------------------- /paper/batchboost.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | 3 | 4 | 5 | \usepackage{arxiv} 6 | 7 | \usepackage[utf8]{inputenc} % allow utf-8 input 8 | \usepackage[T1]{fontenc} % use 8-bit T1 fonts 9 | \usepackage{hyperref} % hyperlinks 10 | \usepackage{url} % simple URL typesetting 11 | \usepackage{booktabs} % professional-quality tables 12 | \usepackage{amsfonts} % blackboard math symbols 13 | \usepackage{nicefrac} % compact symbols for 1/2, etc. 14 | \usepackage{microtype} % microtypography 15 | \usepackage{lipsum} % Can be removed after putting your text content 16 | 17 | \usepackage{graphicx} 18 | \usepackage{caption} 19 | \usepackage{float} 20 | \usepackage{subcaption} 21 | \usepackage{amsmath} 22 | 23 | \renewcommand{\headeright}{} 24 | \renewcommand{\undertitle}{Draft} 25 | 26 | % Version 2: 27 | % FIXME: gradient clipping (mixup more likely do explode, but why?) 28 | % FIXME: what about adam, sgd and other optimizers with different weight decay? 29 | % FIXME: what about different types of data (tabular)? 30 | % FIXME: adversarial attacks? 31 | 32 | \title{\emph{batchboost}: regularization for stabilizing training with 33 | resistance to underfitting \& overfitting} 34 | 35 | \author{ 36 | Maciej A.~Czyzewski\\ 37 | Institute of Computing Science\\ 38 | Poznan University of Technology\\ 39 | Piotrowo 2, 60-965 Poznan, Poland\\ 40 | \texttt{maciejanthonyczyzewski@gmail.com} \\ 41 | } 42 | 43 | \begin{document} 44 | \maketitle 45 | 46 | % BC learning: https://arxiv.org/pdf/1711.10284.pdf 47 | % EfficientNet: https://arxiv.org/pdf/1905.11946.pdf 48 | % Mixup: https://arxiv.org/pdf/1710.09412.pdf 49 | % SamplePairing: https://arxiv.org/pdf/1801.02929.pdf 50 | % ShakeDrop: https://arxiv.org/pdf/1802.02375.pdf 51 | % ShakeShake: https://arxiv.org/pdf/1705.07485.pdf 52 | 53 | \begin{abstract} 54 | Overfitting \& underfitting and stable training are an important challenges in 55 | machine learning. 56 | % 57 | Current approaches for these issues are \emph{mixup}\cite{zhang2017mixup}, 58 | \emph{SamplePairing}\cite{inoue2018data} 59 | and \emph{BC learning}\cite{tokozume2018between}. 60 | In our work, we state the hypothesis that mixing many images together can be more 61 | effective than just two. 62 | \emph{batchboost} pipeline has three stages: 63 | (a) pairing: method of selecting two samples. 64 | (b) mixing: how to create a new one from two samples. 65 | (c) feeding: combining mixed samples with new ones from dataset into batch (with ratio $\gamma$). 66 | Note that sample that appears in our batch propagates with 67 | subsequent iterations with less and less importance until the end of training. 68 | % 69 | Pairing stage calculates the error per sample, sorts the samples and pairs 70 | with strategy: hardest with easiest one, than mixing stage merges two samples 71 | using \emph{mixup}, $x_1 + (1-\lambda)x_2$. Finally, feeding stage combines 72 | new samples with mixed by ratio 1:1. 73 | % 74 | \emph{batchboost} has 0.5-3\% better accuracy than the current 75 | state-of-the-art \emph{mixup} regularization on 76 | CIFAR-10\cite{krizhevsky2009learning} \& 77 | Fashion-MNIST\cite{xiao2017}. 78 | % 79 | Our method is slightly better than SamplePairing technique 80 | on small datasets (up to 5\%). 81 | % 82 | \emph{batchboost} provides stable training on not tuned parameters (like weight 83 | decay), thus its a good method to test performance of different architectures. 84 | % 85 | Source code is at: \url{https://github.com/maciejczyzewski/batchboost} 86 | \end{abstract} 87 | 88 | \keywords{regularization \and underfitting \and overfitting \ 89 | \and generalization \and mixup} 90 | 91 | \section{Introduction} 92 | \label{sec:introduction} 93 | 94 | In order to improve test errors, regularization methods which are processes to 95 | introduce additional information to DNN have been proposed\cite{miyato2018virtual}. Widely 96 | used regularization methods include \emph{data augmentation}, \emph{stochastic 97 | gradient descent} (SGD) \cite{zhang2016understanding}, \emph{weight decay} 98 | \cite{krogh1992simple}, \emph{batch normalization} (BN) \cite{ioffe2015batch}, 99 | \emph{label 100 | smoothing}\cite{szegedy2016rethinking} and \emph{mixup}\cite{zhang2017mixup}. 101 | % 102 | Our idea comes from \emph{mixup} flaws. In a nutshell, \emph{mixup} constructs 103 | virtual training example from two samples. In term of batch construction, it 104 | simply gets some random samples from dataset and randomly mix together. 105 | % 106 | The overlapping example of many samples (more than two) has not been considered 107 | in previous work. Probably because the imposition of 3 examples significantly affects the model leading to underfitting. 108 | % 109 | It turned out that in many tasks, linear mixing (like \emph{BC learning} or 110 | \emph{mixup}) leads to underfitting (figure \ref{fig:under}). Therefore, these methods are not applicable as universal tools. 111 | 112 | \textbf{Contribution} Our work shows that the imposition of many examples in 113 | subsequent iterations (which are slowly suppressed by new overlays) can improve efficiency, but most importantly it ensures stability of training and resistance to attacks. 114 | % 115 | However, it must be done wisely: that's why we implemented two basic mechanisms: 116 | \begin{itemize} 117 | \item (a) new information is provided gradually, thus \emph{half-batch} adds 118 | new examples without mixing 119 | \item (b) mixing is carried out according to some criterion, in our case it is the 120 | best-the-worst strategy to mediate the error 121 | \end{itemize} 122 | % 123 | The whole procedure is made in three steps to make it more understandable: 124 | \begin{itemize} 125 | \item (a) \emph{pairing}: a method for selecting two samples 126 | \item (b) \emph{mixing}: how to create a new one from two samples 127 | \item (c) \emph{feeding}: to the mixed samples it supplements the batch with new examples 128 | from datasets 129 | \end{itemize} 130 | % 131 | Note that sample that appears in our batch propagates with 132 | subsequent iterations with less and less importance until the end of training. 133 | % 134 | Source code with sample implementation and experiments to verify the results 135 | we present here: 136 | 137 | \begin{center} 138 | \url{https://github.com/maciejczyzewski/batchboost} 139 | \end{center} 140 | 141 | To understand the effects of \emph{bootstrap}, we conduct a 142 | thorough set of study experiments (Section \ref{sec:results}). 143 | 144 | \section{Overview} 145 | \label{sec:overview} 146 | 147 | \begin{figure}[H] 148 | \centering 149 | \includegraphics[width=\linewidth]{figure-abstract} 150 | \caption{\emph{batchboost} presented in three phases: (a) pairing by sorting 151 | error (b) mixing with \emph{mixup} (c) feeding: a mixed feed-batch and new 152 | samples in half-batch by 1:1 ratio.} 153 | \label{fig:abstract} 154 | \end{figure} 155 | 156 | Batch as input for training is a combination of two different mini-batches: 157 | \begin{itemize} 158 | \item (a) \emph{half-batch}: new samples from dataset, classical augmentation is possible here 159 | \item (b) \emph{feed-batch} (mixup): samples mixed together (in-order presented in 160 | figure \ref{fig:abstract}) 161 | \end{itemize} 162 | 163 | Parameter $\gamma$ means the ratio of the number of samples in half-batch to 164 | feed-batch, in our work we have not considered other values than 1. However, we believe that this is an interesting topic for further research and discussion. 165 | 166 | \subsection{Pairing Method} 167 | \label{sec:pairing} 168 | 169 | Combining many overlapping samples may have a negative impact on our optimizer 170 | used in training. In our implementation, it calculates the error for each 171 | sample in batch. Then it sorts this vector, and pairs samples by connecting the 172 | easiest (smallest error) with the most difficult sample. The goal of this 173 | procedure is to create new artificial samples that are between classes, as 174 | described in \emph{BC learning}. 175 | 176 | However, in this case they are not random pairs, but those that 'require' 177 | additional work. In this way, the learning process is more stable because there 178 | are no cases when it mix only difficult with difficult or easy with easy (likely 179 | is at the beginning or end of the learning process). 180 | % 181 | In our case, the error was calculated using L2 metric between one-hot labels and 182 | the predictions (thus we analyzed \emph{batchboost} only on classification 183 | problems like CIFAR-10\cite{krizhevsky2009learning} or 184 | Fashion-MNIST\cite{xiao2017}). For other problems, there is probably 185 | a need to change the metric/method of error calculation. 186 | % 187 | We were also thinking about using RL to pair samples. However, it turns out to 188 | be a more complicated problem thus we leave it here for further discussion. 189 | 190 | \subsection{Mixing Method} 191 | \label{sec:mixing} 192 | 193 | Selected two samples should be combined into one. 194 | There are three methods for linearly mixing samples: \emph{SamplePairing}, 195 | \emph{Mixup}, \emph{BC Learning}. Due to the simplicity of implementation and 196 | the highest scores, we used a mixup, which looks like this: 197 | % 198 | \begin{align*} 199 | \tilde{x} &= \lambda x_i + (1 - \lambda) x_j,\qquad \text{where~} x_i, x_j \text{~are~raw~input~vectors}\\ 200 | \tilde{y} &= \lambda y_i + (1 - \lambda) y_j,\qquad \text{where~} y_i, y_j \text{~are~one-hot~label~encodings} 201 | \end{align*} 202 | $(x_i, y_i)$ and $(x_j, y_j)$ are two examples drawn at random from our 203 | training data, and $\lambda \in [0,1]$. 204 | Label for many samples was averaged over the last 2 labels (due to small differences in results, and large tradeof in memory). 205 | 206 | Why it works? 207 | The good explanation is provided in BC learning research, that images and sound 208 | can be represented as waves. Mixing is an interpolation that human don't 209 | understand but machine could interpret. 210 | However, also a good explanation of this process is: that by training on 211 | artificial samples, we supplement the training data by artificial examples between-classes 212 | (visually, it fills space between clusters in UMAP/t-SNE visualization). 213 | Thus, it generalizes problem more by aggressive cluster separation during 214 | training (the clusters are moving away from each other, because model learns 215 | artificial clusters made up by mixing). 216 | Mixing multiple classes allows for more accurate separation (higher dimensions), however model starts to depart from original problem (new distribution) losing accuracy on test dataset. 217 | 218 | The question is whether linear interpolation is good for all problems. 219 | Probably the best solution would be to use a GAN for this purpose (two inputs + 220 | noise to control). We tried to use the technique described in 221 | SinGAN\cite{shaham2019singan} but it 222 | failed in \emph{batchboost}. It was unsuccessful due to the high cost of 223 | maintaining such a structure. 224 | 225 | \subsection{Continuous Feeding} 226 | \label{sec:feeding} 227 | 228 | The final stage is for 'feeding' new artificial samples on the model's input. In 229 | the previous researches, considered were only cases with mixing two samples along 230 | batch. \emph{batchboost} do this by adding new samples with $\gamma$ ratio to 231 | mixed ones. 232 | % 233 | An interesting observation is that once we mix samples, they are in learning 234 | process till end (at each batch continuously). 235 | When applying a mixing it has only three options: (a) new sample with new sample 236 | (b) new sample with previously mixed sample (c) previously mixed sample with 237 | previously mixed sample. Pairing method cannot choose only one option for all samples 238 | because of non-zero $\gamma$ ratio. 239 | 240 | To maintain compatibility with the mixup 241 | algorithm, it chooses new $\lambda$ when constructing the batch. 242 | That is why past samples have less and less significance in training process, 243 | until they disappear completely (figure \ref{fig:feeding}). 244 | 245 | \begin{figure}[H] 246 | \hspace{0.5cm} 247 | \includegraphics[width=\linewidth]{figure-feeding} 248 | \caption{Orange squares indicates how information is propagated between 249 | batches in the \emph{batchboost} method.} 250 | \label{fig:feeding} 251 | \end{figure} 252 | 253 | We found that for problems by nature not linear, for which the mixup did poorly, 254 | it was caused by the fact that model learned at the time when very low/high 255 | $\lambda$ was assigned (i.e. model learned on a single example, without mixing). 256 | % 257 | In \emph{batchboost} it doesn't look much better. However, \emph{half-batch} 258 | contains new information, and \emph{feed-batch} has examples mixed not randomly but 259 | by pairing method. With this clues, optimizer can slightly improve the direction of 260 | optimization by better interpreting loss landscape. 261 | 262 | \section{Results} 263 | \label{sec:results} 264 | 265 | We focused on the current state-of-the-art \emph{mixup}. The architecture we 266 | used was \emph{EfficientNet-b0}\cite{tan2019efficientnet} and 267 | \emph{ResNet100k}\cite{DBLP:journals/corr/HeZRS15} (having only 100k 268 | parameters from DAWNBench\cite{coleman2017dawnbench}). The problems we've evolved are CIFAR-10 and 269 | Fashion-MNIST. 270 | % 271 | We intend to update this work with more detailed comparisons and experiments, 272 | test on different architectures and parameters. The most interesting 273 | issue which requires additional research is artificial attacks. 274 | 275 | \subsection{Underfitting \& Stabilizing Training} 276 | \label{sec:under} 277 | 278 | We described this problem in the (section \ref{sec:feeding}). The main factors 279 | that stabilize training are: (a) the appropriate pairing of samples for mixing, 280 | i.e. by error per sample (b) propagation of new information in \emph{half-batch}. 281 | 282 | \begin{figure}[H] 283 | \centering 284 | \begin{minipage}{.3\textwidth} 285 | \hspace{-0.65cm} 286 | \includegraphics[totalheight=5.6cm]{figure-1-test-accuracy-without-augment} 287 | \end{minipage} 288 | \begin{minipage}{.65\textwidth}\vspace{-0.00cm}\hspace{0.865cm} 289 | \includegraphics[totalheight=5.6cm]{figure-1-loss-train-without-augment} 290 | \end{minipage}% 291 | \caption{Evaluation on \emph{CIFAR-10}, for \emph{EfficientNet-b0} and 292 | \emph{SGD(weight-decay=10e-4, lr=0.1)} (as 293 | recommended in the \emph{mixup} research), same parameters for each model. 294 | As a result, the models behave differently, although they differ only in the 295 | method of constructing the batch.} 296 | \label{fig:under} 297 | \end{figure} 298 | 299 | Another problem that \emph{mixup} often encounters is very unstable loss 300 | landscape. Therefore, without a well-chosen weight decay, it cannot stabilize in 301 | minimums. To solve this problem, we tune the optimizer parameters 302 | for \emph{mixup}, after that it could achieve a similar result to 303 | \emph{batchboost} (figure \ref{fig:over}). 304 | 305 | \subsection{Overfitting (comparison to \emph{mixup})} 306 | \label{sec:over} 307 | 308 | The most important observation of this section is that \emph{batchboost} retains 309 | the properties of the \emph{mixup} (similarly to \emph{SamplePairing} or 310 | \emph{BC learning}). It protects against overfitting, having slightly better results. 311 | 312 | \begin{figure}[H] 313 | \centering 314 | \begin{minipage}{.3\textwidth} 315 | \hspace{-0.65cm} 316 | \includegraphics[totalheight=5.6cm]{figure-2-train-accuracy-with-augment} 317 | \end{minipage} 318 | \begin{minipage}{.65\textwidth}\vspace{-0.00cm}\hspace{0.865cm} 319 | \includegraphics[totalheight=5.6cm]{figure-2-test-accuracy-with-augment} 320 | \end{minipage}% 321 | \caption{\emph{batchboost} is a new state-of-the-art because it is a slightly 322 | better than \emph{mixup} (here \emph{mixup} has been tuned for best 323 | parameters, \emph{batchboost} uses configuration from figure \ref{fig:under}).} 324 | \label{fig:over} 325 | \end{figure} 326 | 327 | The only difference is that the $\alpha$ coefficient from the original 328 | \emph{mixup} is weakened. 329 | 330 | \subsection{Accelerating Training \& Adversarial Attacks} 331 | \label{sec:attacks} 332 | 333 | In the early stages, it learns faster than a classic \emph{mixup}. 334 | The difference becomes significant when working on very small datasets, e.g. 335 | medical challenges on Kaggle. In this work, we have limited \emph{Fashion-MNIST} 336 | to 64 examples we compared to the classic model and \emph{SamplePairing}. The results were better by 5\$. 337 | When the model perform well at small datasets, it means that training 338 | generalizes problem. On (figure \ref{fig:multipass}) we present samples 339 | generated during this process. 340 | 341 | \begin{figure}[H] 342 | \centering 343 | \includegraphics[width=10.5cm]{figure-multipass} 344 | \caption{More than two samples have been mixed.} 345 | \label{fig:multipass} 346 | \end{figure} 347 | 348 | We tried to modify \emph{batchboost} to generate samples similar to those of 349 | adversarial attacks (by uniformly mixing all samples backward with some Gaussian 350 | noise) without any reasonable results. 351 | 352 | \section{Conclusion} 353 | \label{sec:conclusion} 354 | 355 | Our method is easy to implement and can be used for any 356 | model as an additional BlackBox at input. 357 | It provides stability and slightly better results. 358 | Using \emph{batchboost} is certainly more important in problems with small data sets. 359 | Thanks to the property of avoiding underfitting for misconfigured parameters, 360 | this is a good regularization method for people who want to compare two 361 | architectures without parameter tuning. 362 | Retains all properties of \emph{mixup}, \emph{SamplePairing} and \emph{BC learning}. 363 | 364 | \bibliographystyle{unsrt} 365 | \bibliography{references} 366 | 367 | \end{document} 368 | -------------------------------------------------------------------------------- /paper/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | 5 | from watchdog.observers import Observer 6 | from watchdog.events import FileSystemEventHandler 7 | 8 | 9 | class LatexEventHandler(FileSystemEventHandler): 10 | LATEX_FLAGS = "-interaction nonstopmode -halt-on-error -file-line-error" 11 | FILETYPE_INPUT = [".tex"] 12 | 13 | def on_any_event(self, event): 14 | for ext in self.FILETYPE_INPUT: 15 | if event.src_path.endswith(ext): 16 | self.compile(event) 17 | 18 | def compile(self, event): 19 | print("=== LATEX ===") 20 | os.system(f"pdflatex {self.LATEX_FLAGS} {event.src_path}") 21 | os.system(f"md5 {event.src_path}") 22 | 23 | 24 | if __name__ == "__main__": 25 | path = sys.argv[1] if len(sys.argv) > 1 else "." 26 | 27 | observer = Observer() 28 | observer.schedule(LatexEventHandler(), path, recursive=True) 29 | observer.start() 30 | 31 | try: 32 | while True: 33 | time.sleep(1) 34 | except KeyboardInterrupt: 35 | observer.stop() 36 | observer.join() 37 | -------------------------------------------------------------------------------- /paper/figure-1-loss-train-without-augment.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/figure-1-loss-train-without-augment.pdf -------------------------------------------------------------------------------- /paper/figure-1-test-accuracy-without-augment.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/figure-1-test-accuracy-without-augment.pdf -------------------------------------------------------------------------------- /paper/figure-2-test-accuracy-with-augment.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/figure-2-test-accuracy-with-augment.pdf -------------------------------------------------------------------------------- /paper/figure-2-train-accuracy-with-augment.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/figure-2-train-accuracy-with-augment.pdf -------------------------------------------------------------------------------- /paper/figure-abstract.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/figure-abstract.pdf -------------------------------------------------------------------------------- /paper/figure-feeding.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/figure-feeding.pdf -------------------------------------------------------------------------------- /paper/figure-multipass.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/paper/figure-multipass.png -------------------------------------------------------------------------------- /paper/notes_v2.md: -------------------------------------------------------------------------------- 1 | # Research Brief (Brief intro of the research (50 + words)) 2 | 3 | Batchboost is a simple technique to accelerate ML model training by adaptively feeding mini-batches with artificial samples which are created by mixing two examples from previous step - in favor of pairing those that produce the difficult one. 4 | 5 | # What’s New (What’s new in this research?) 6 | 7 | In this research, we state the hypothesis that mixing many images together can 8 | be more effective than just two. To make it efficient, we propose a new method of 9 | creating mini-batches, where each sample from dataset is propagated with 10 | subsequent iterations with less and less importance until the end of learning 11 | process. 12 | 13 | # How It Works (How this research works?) 14 | 15 | Batchboost pipeline has three stages: 16 | (a) pairing: method of selecting two samples from previous step. 17 | (b) mixing: method of creating a new artificial example from two selected samples. 18 | (c) feeding: constructing training mini-batch with created examples and new samples from dataset (concat with ratio γ). 19 | Note that sample from dataset propagates with subsequent iterations with less and less importance until the end of training. 20 | 21 | Our baseline implements pairing stage as sorting by sample error, where hardest examples are paired with easiest ones. Mixing stage 22 | merges to samples using mixup, x1+(1−λ)x2. Feeding stage combines new samples with ratio 1:1 using concat. 23 | 24 | # Key Insights (What are the main takeaways from this research?) 25 | 26 | The results are promising. Batchboost has 0.5-3% better accuracy than the current state-of-the-art mixup regularization on CIFAR-10 (#10 place in https://paperswithcode.com/) & Fashion-MNIST. 27 | (we hope to see our method in action, for example, on Kaggle as trick to improve a bit test accuracy) 28 | 29 | # Behind The Scenes (Any interesting ideas or research tips you - would like to share with our AI Community?) 30 | 31 | There is a lot to improve in data augmentation and regularization methods. 32 | 33 | # Anything else? (Bottlenecks and future trend?) 34 | 35 | An interesting topic for further research and discussion are 36 | combination of batchboost and existing methods. 37 | -------------------------------------------------------------------------------- /paper/references.bib: -------------------------------------------------------------------------------- 1 | @article{miyato2018virtual, 2 | title={Virtual adversarial training: a regularization method for supervised and semi-supervised learning}, 3 | author={Miyato, Takeru and Maeda, Shin-ichi and Koyama, Masanori and Ishii, Shin}, 4 | journal={IEEE transactions on pattern analysis and machine intelligence}, 5 | volume={41}, 6 | number={8}, 7 | pages={1979--1993}, 8 | year={2018}, 9 | publisher={IEEE} 10 | } 11 | 12 | @article{zhang2016understanding, 13 | title={Understanding deep learning requires rethinking generalization}, 14 | author={Zhang, Chiyuan and Bengio, Samy and Hardt, Moritz and Recht, Benjamin and Vinyals, Oriol}, 15 | journal={arXiv preprint arXiv:1611.03530}, 16 | year={2016} 17 | } 18 | 19 | @inproceedings{krogh1992simple, 20 | title={A simple weight decay can improve generalization}, 21 | author={Krogh, Anders and Hertz, John A}, 22 | booktitle={Advances in neural information processing systems}, 23 | pages={950--957}, 24 | year={1992} 25 | } 26 | 27 | @article{zhang2017mixup, 28 | title={mixup: Beyond empirical risk minimization}, 29 | author={Zhang, Hongyi and Cisse, Moustapha and Dauphin, Yann N and Lopez-Paz, David}, 30 | journal={arXiv preprint arXiv:1710.09412}, 31 | year={2017} 32 | } 33 | 34 | @article{ioffe2015batch, 35 | title={Batch normalization: Accelerating deep network training by reducing internal covariate shift}, 36 | author={Ioffe, Sergey and Szegedy, Christian}, 37 | journal={arXiv preprint arXiv:1502.03167}, 38 | year={2015} 39 | } 40 | 41 | @inproceedings{szegedy2016rethinking, 42 | title={Rethinking the inception architecture for computer vision}, 43 | author={Szegedy, Christian and Vanhoucke, Vincent and Ioffe, Sergey and Shlens, Jon and Wojna, Zbigniew}, 44 | booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, 45 | pages={2818--2826}, 46 | year={2016} 47 | } 48 | 49 | @article{coleman2017dawnbench, 50 | title={Dawnbench: An end-to-end deep learning benchmark and competition}, 51 | author={Coleman, Cody and Narayanan, Deepak and Kang, Daniel and Zhao, Tian and Zhang, Jian and Nardi, Luigi and Bailis, Peter and Olukotun, Kunle and R{\'e}, Chris and Zaharia, Matei}, 52 | journal={Training}, 53 | volume={100}, 54 | number={101}, 55 | pages={102}, 56 | year={2017} 57 | } 58 | 59 | @article{inoue2018data, 60 | title={Data augmentation by pairing samples for images classification}, 61 | author={Inoue, Hiroshi}, 62 | journal={arXiv preprint arXiv:1801.02929}, 63 | year={2018} 64 | } 65 | 66 | @inproceedings{tokozume2018between, 67 | title={Between-class learning for image classification}, 68 | author={Tokozume, Yuji and Ushiku, Yoshitaka and Harada, Tatsuya}, 69 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, 70 | pages={5486--5494}, 71 | year={2018} 72 | } 73 | 74 | @article{xiao2017, 75 | author = {Han Xiao and Kashif Rasul and Roland Vollgraf}, 76 | title = {Fashion-MNIST: a Novel Image Dataset for Benchmarking Machine Learning Algorithms}, 77 | date = {2017-08-28}, 78 | year = {2017}, 79 | eprintclass = {cs.LG}, 80 | eprinttype = {arXiv}, 81 | eprint = {cs.LG/1708.07747}, 82 | } 83 | 84 | @article{krizhevsky2009learning, 85 | title={Learning multiple layers of features from tiny images}, 86 | author={Krizhevsky, Alex and Hinton, Geoffrey and others}, 87 | year={2009}, 88 | publisher={Citeseer} 89 | } 90 | 91 | @inproceedings{shaham2019singan, 92 | title={Singan: Learning a generative model from a single natural image}, 93 | author={Shaham, Tamar Rott and Dekel, Tali and Michaeli, Tomer}, 94 | booktitle={Proceedings of the IEEE International Conference on Computer Vision}, 95 | pages={4570--4580}, 96 | year={2019} 97 | } 98 | 99 | @article{tan2019efficientnet, 100 | title={Efficientnet: Rethinking model scaling for convolutional neural networks}, 101 | author={Tan, Mingxing and Le, Quoc V}, 102 | journal={arXiv preprint arXiv:1905.11946}, 103 | year={2019} 104 | } 105 | 106 | @article{DBLP:journals/corr/HeZRS15, 107 | author = {Kaiming He and 108 | Xiangyu Zhang and 109 | Shaoqing Ren and 110 | Jian Sun}, 111 | title = {Deep Residual Learning for Image Recognition}, 112 | journal = {CoRR}, 113 | volume = {abs/1512.03385}, 114 | year = {2015}, 115 | url = {http://arxiv.org/abs/1512.03385}, 116 | archivePrefix = {arXiv}, 117 | eprint = {1512.03385}, 118 | timestamp = {Wed, 17 Apr 2019 17:23:45 +0200}, 119 | biburl = {https://dblp.org/rec/bib/journals/corr/HeZRS15}, 120 | bibsource = {dblp computer science bibliography, https://dblp.org} 121 | } 122 | -------------------------------------------------------------------------------- /paper/texput.log: -------------------------------------------------------------------------------- 1 | This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019) (preloaded format=pdflatex 2019.10.12) 21 JAN 2020 02:38 2 | entering extended mode 3 | restricted \write18 enabled. 4 | file:line:error style messages enabled. 5 | %&-line parsing enabled. 6 | **./batchboost_polish.tex 7 | 8 | ! Emergency stop. 9 | <*> ./batchboost_polish.tex 10 | 11 | Here is how much of TeX's memory you used: 12 | 3 strings out of 492616 13 | 112 string characters out of 6129480 14 | 57117 words of memory out of 5000000 15 | 4025 multiletter control sequences out of 15000+600000 16 | 3640 words of font info for 14 fonts, out of 8000000 for 9000 17 | 1141 hyphenation exceptions out of 8191 18 | 0i,0n,0p,1b,6s stack positions out of 5000i,500n,10000p,200000b,80000s 19 | 20 | ! ==> Fatal error occurred, no output PDF file produced! 21 | -------------------------------------------------------------------------------- /plot.py: -------------------------------------------------------------------------------- 1 | # 1 - without augment alpha=1.0 2 | # 2 - with augment alpha=1.0 3 | # 3 - without augment alpha=0.4 4 | # 4 - with augment alpha=0.4 5 | 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import unidecode 9 | import csv 10 | import re 11 | 12 | 13 | def slugify(text): 14 | text = unidecode.unidecode(text).lower() 15 | text = re.sub(r"[\W_]+", "-", text) 16 | if text[-1] == "-": 17 | return text[0:-1] 18 | return text 19 | 20 | 21 | class figure: 22 | def __init__(self, name=None, prefix=None): 23 | self.name = name 24 | self.prefix = prefix 25 | 26 | def __enter__(self): 27 | print("--- FIGURE ---") 28 | print(f"`{self.name}`") 29 | plt.cla() 30 | plt.title(self.name) 31 | 32 | def __exit__(self, x, y, z): 33 | print("--- SAVE ---") 34 | figure_prefix = "figure-" 35 | if self.prefix is not None: 36 | figure_prefix += f"{str(self.prefix)}-" 37 | fig.savefig(f"figures/{figure_prefix}{slugify(self.name)}.pdf") 38 | 39 | 40 | # (1) better style 41 | plt.style.use(["science", "ieee"]) 42 | 43 | fig, ax = plt.subplots() 44 | ax.autoscale(tight=True) 45 | 46 | 47 | def read_file(path="log_EfficientNet_batchboost_1", col=5): 48 | X, Y = [], [] 49 | with open(f"results/{path}.csv", "r") as csvfile: 50 | plots = csv.reader(csvfile, delimiter=",") 51 | next(plots, None) 52 | for row in plots: 53 | X.append(int(row[0])) 54 | Y.append( 55 | float(row[col].replace(", device='cuda:0'", 56 | "").replace("tensor(", 57 | "").replace(")", ""))) 58 | return X, Y 59 | 60 | 61 | def fill_between(X, Y, color="blue", alpha=0.05, factor=1): 62 | sigma = factor * np.array(Y).std(axis=0) # ls = '--' 63 | ax.fill_between(X, Y + sigma, Y - sigma, facecolor=color, alpha=alpha) 64 | 65 | 66 | ### FIGURE (1): underfitting ### 67 | 68 | with figure("test accuracy (without augment)", prefix=1): 69 | x1, y1 = read_file("decay=1e-4/log_EfficientNet_batchboost_1") 70 | plt.plot(x1, y1, label="boostbatch (alpha=1.0)", color="darkred") 71 | 72 | x1, y1 = read_file("decay=1e-4/log_EfficientNet_batchboost_3") 73 | plt.plot(x1, y1, label="boostbatch (alpha=0.4)", color="red") 74 | 75 | x2, y2 = read_file("decay=1e-4/log_EfficientNet_mixup_1") 76 | plt.plot(x2, y2, label="mixup (alpha=1.0)", color="darkblue") 77 | 78 | x2, y2 = read_file("decay=1e-4/log_EfficientNet_mixup_3") 79 | plt.plot(x2, y2, label="mixup (alpha=0.4)", color="blue") 80 | 81 | x3, y3 = read_file("decay=1e-4/log_EfficientNet_baseline_13") 82 | plt.plot(x3, y3, label="baseline", color="black") 83 | 84 | plt.ylabel("accuracy") 85 | plt.xlabel("epoch") 86 | # plt.legend(loc="center left", bbox_to_anchor=(1, 0.5)) 87 | 88 | with figure("loss train (without augment)", prefix=1): 89 | x1a, y1a = read_file("decay=1e-4/log_EfficientNet_batchboost_1", col=1) 90 | plt.plot(x1a, y1a, label="boostbatch (alpha=1.0)", color="darkred") 91 | 92 | x1b, y1b = read_file("decay=1e-4/log_EfficientNet_batchboost_3", col=1) 93 | plt.plot(x1b, y1b, label="boostbatch (alpha=0.4)", color="red") 94 | 95 | fill_between(x1a, 96 | np.mean([y1a, y1b], axis=0), 97 | color="red", 98 | factor=1, 99 | alpha=0.1) 100 | 101 | x2a, y2a = read_file("decay=1e-4/log_EfficientNet_mixup_1", col=1) 102 | plt.plot(x2a, y2a, label="mixup (alpha=1.0)", color="darkblue") 103 | 104 | x2b, y2b = read_file("decay=1e-4/log_EfficientNet_mixup_3", col=1) 105 | plt.plot(x2b, y2b, label="mixup (alpha=0.4)", color="blue") 106 | 107 | fill_between(x2a, 108 | np.mean([y2a, y2b], axis=0), 109 | color="blue", 110 | factor=1, 111 | alpha=0.1) 112 | 113 | x3, y3 = read_file("decay=1e-4/log_EfficientNet_baseline_13", col=1) 114 | plt.plot(x3, y3, label="baseline", color="black") 115 | 116 | plt.ylabel("loss") 117 | plt.xlabel("epoch") 118 | plt.legend(loc="center left", bbox_to_anchor=(1, 0.5)) 119 | 120 | ### FIGURE (2): overfitting (compirason to mixup) ### 121 | 122 | with figure("test accuracy (with augment)", prefix=2): 123 | x1a, y1a = read_file("decay=1e-5/log_EfficientNet_batchboost_2") 124 | plt.plot(x1a, y1a, label="boostbatch (alpha=1.0)", color="darkred") 125 | 126 | x1b, y1b = read_file("decay=1e-5/log_EfficientNet_batchboost_4") 127 | plt.plot(x1b, y1b, label="boostbatch (alpha=0.4)", color="red") 128 | 129 | fill_between(x1a, 130 | np.mean([y1a, y1b], axis=0), 131 | color="red", 132 | factor=0.5, 133 | alpha=0.1) 134 | 135 | x2a, y2a = read_file("decay=1e-5/log_EfficientNet_mixup_2") 136 | plt.plot(x2a, y2a, label="mixup (alpha=1.0)", color="darkblue") 137 | 138 | x2b, y2b = read_file("decay=1e-5/log_EfficientNet_mixup_4") 139 | plt.plot(x2b, y2b, label="mixup (alpha=0.4)", color="blue") 140 | 141 | fill_between(x2a, 142 | np.mean([y2a, y2b], axis=0), 143 | color="blue", 144 | factor=0.5, 145 | alpha=0.1) 146 | 147 | # x3, y3 = read_file("decay=1e-5/log_EfficientNet_baseline_24") 148 | # plt.plot(x3, y3, label="baseline", color="black") 149 | 150 | plt.ylabel("accuracy") 151 | plt.xlabel("epoch") 152 | plt.legend(loc="center left", bbox_to_anchor=(1, 0.5)) 153 | 154 | with figure("train accuracy (with augment)", prefix=2): 155 | x1, y1 = read_file("decay=1e-5/log_EfficientNet_batchboost_2", col=3) 156 | plt.plot(x1, y1, label="boostbatch (alpha=1.0)", color="darkred") 157 | 158 | x1, y1 = read_file("decay=1e-5/log_EfficientNet_batchboost_4", col=3) 159 | plt.plot(x1, y1, label="boostbatch (alpha=0.4)", color="red") 160 | 161 | x2, y2 = read_file("decay=1e-5/log_EfficientNet_mixup_2", col=3) 162 | plt.plot(x2, y2, label="mixup (alpha=1.0)", color="darkblue") 163 | 164 | x2, y2 = read_file("decay=1e-5/log_EfficientNet_mixup_4", col=3) 165 | plt.plot(x2, y2, label="mixup (alpha=0.4)", color="blue") 166 | 167 | # x3, y3 = read_file("decay=1e-5/log_EfficientNet_baseline_24", col=3) 168 | # plt.plot(x3, y3, label="baseline", color="black") 169 | 170 | plt.ylabel("accuracy") 171 | plt.xlabel("epoch") 172 | # plt.legend(loc="center left", bbox_to_anchor=(1, 0.5)) 173 | -------------------------------------------------------------------------------- /results/decay=1e-4/log_EfficientNet_baseline_13.csv: -------------------------------------------------------------------------------- 1 | epoch,train loss,reg loss,train acc,test loss,test acc 2 | 0,"tensor(2.6055, device='cuda:0')",0.0,tensor(19.8040),"tensor(2.1569, device='cuda:0')",tensor(18.9400) 3 | 1,"tensor(1.8451, device='cuda:0')",0.0,tensor(31.3720),"tensor(2.0787, device='cuda:0')",tensor(25.8400) 4 | 2,"tensor(1.7183, device='cuda:0')",0.0,tensor(36.4540),"tensor(1.8010, device='cuda:0')",tensor(35.2200) 5 | 3,"tensor(1.5847, device='cuda:0')",0.0,tensor(41.5820),"tensor(1.7851, device='cuda:0')",tensor(35.3800) 6 | 4,"tensor(1.5116, device='cuda:0')",0.0,tensor(44.7700),"tensor(1.9935, device='cuda:0')",tensor(31.7000) 7 | 5,"tensor(1.4671, device='cuda:0')",0.0,tensor(46.4440),"tensor(1.7445, device='cuda:0')",tensor(36.8900) 8 | 6,"tensor(1.4378, device='cuda:0')",0.0,tensor(47.5760),"tensor(1.5938, device='cuda:0')",tensor(43.9800) 9 | 7,"tensor(1.3857, device='cuda:0')",0.0,tensor(49.7040),"tensor(1.4793, device='cuda:0')",tensor(46.5200) 10 | 8,"tensor(1.3346, device='cuda:0')",0.0,tensor(51.8020),"tensor(1.3809, device='cuda:0')",tensor(51.0100) 11 | 9,"tensor(1.2833, device='cuda:0')",0.0,tensor(53.8740),"tensor(1.5019, device='cuda:0')",tensor(45.8600) 12 | 10,"tensor(1.2222, device='cuda:0')",0.0,tensor(56.4380),"tensor(1.4066, device='cuda:0')",tensor(49.5100) 13 | 11,"tensor(1.2230, device='cuda:0')",0.0,tensor(56.4160),"tensor(1.2856, device='cuda:0')",tensor(54.3400) 14 | 12,"tensor(1.2277, device='cuda:0')",0.0,tensor(56.4400),"tensor(1.6038, device='cuda:0')",tensor(46.1800) 15 | 13,"tensor(1.1926, device='cuda:0')",0.0,tensor(57.5240),"tensor(1.2848, device='cuda:0')",tensor(54.5300) 16 | 14,"tensor(1.1133, device='cuda:0')",0.0,tensor(60.5220),"tensor(1.4080, device='cuda:0')",tensor(51.5900) 17 | 15,"tensor(1.1373, device='cuda:0')",0.0,tensor(59.6840),"tensor(1.4613, device='cuda:0')",tensor(51.1200) 18 | 16,"tensor(1.0855, device='cuda:0')",0.0,tensor(61.9180),"tensor(1.3164, device='cuda:0')",tensor(55.0600) 19 | 17,"tensor(0.9908, device='cuda:0')",0.0,tensor(65.1260),"tensor(1.1663, device='cuda:0')",tensor(59.6400) 20 | 18,"tensor(0.9379, device='cuda:0')",0.0,tensor(67.0500),"tensor(1.1016, device='cuda:0')",tensor(61.4500) 21 | 19,"tensor(0.8914, device='cuda:0')",0.0,tensor(68.8620),"tensor(1.3252, device='cuda:0')",tensor(55.9400) 22 | 20,"tensor(0.8710, device='cuda:0')",0.0,tensor(69.7060),"tensor(0.9524, device='cuda:0')",tensor(67.3000) 23 | 21,"tensor(0.8302, device='cuda:0')",0.0,tensor(71.0920),"tensor(0.9558, device='cuda:0')",tensor(66.6600) 24 | 22,"tensor(0.7480, device='cuda:0')",0.0,tensor(73.9720),"tensor(0.8940, device='cuda:0')",tensor(68.3900) 25 | 23,"tensor(0.7158, device='cuda:0')",0.0,tensor(75.1660),"tensor(0.8884, device='cuda:0')",tensor(69.3300) 26 | 24,"tensor(0.6930, device='cuda:0')",0.0,tensor(75.9360),"tensor(0.8584, device='cuda:0')",tensor(70.1800) 27 | 25,"tensor(0.6635, device='cuda:0')",0.0,tensor(77.0360),"tensor(0.9007, device='cuda:0')",tensor(69.3000) 28 | 26,"tensor(0.6355, device='cuda:0')",0.0,tensor(77.8820),"tensor(0.8464, device='cuda:0')",tensor(71.1100) 29 | 27,"tensor(0.6146, device='cuda:0')",0.0,tensor(78.8440),"tensor(0.8866, device='cuda:0')",tensor(70.1600) 30 | 28,"tensor(0.6022, device='cuda:0')",0.0,tensor(79.0920),"tensor(0.8468, device='cuda:0')",tensor(70.8300) 31 | 29,"tensor(0.5818, device='cuda:0')",0.0,tensor(79.8040),"tensor(0.8185, device='cuda:0')",tensor(71.9300) 32 | -------------------------------------------------------------------------------- /results/decay=1e-4/log_EfficientNet_baseline_24.csv: -------------------------------------------------------------------------------- 1 | epoch,train loss,reg loss,train acc,test loss,test acc 2 | 0,"tensor(2.3113, device='cuda:0')",0.0,tensor(21.0420),"tensor(2.1255, device='cuda:0')",tensor(23.4300) 3 | 1,"tensor(1.9387, device='cuda:0')",0.0,tensor(28.4900),"tensor(1.9492, device='cuda:0')",tensor(28.3900) 4 | 2,"tensor(1.8972, device='cuda:0')",0.0,tensor(29.2980),"tensor(2.0775, device='cuda:0')",tensor(24.6300) 5 | 3,"tensor(1.8820, device='cuda:0')",0.0,tensor(30.2560),"tensor(1.8191, device='cuda:0')",tensor(33.) 6 | 4,"tensor(1.8923, device='cuda:0')",0.0,tensor(30.0400),"tensor(2.2398, device='cuda:0')",tensor(21.7100) 7 | 5,"tensor(1.8451, device='cuda:0')",0.0,tensor(31.4360),"tensor(1.9829, device='cuda:0')",tensor(27.5600) 8 | 6,"tensor(1.9566, device='cuda:0')",0.0,tensor(27.0860),"tensor(2.0375, device='cuda:0')",tensor(24.5400) 9 | 7,"tensor(1.9607, device='cuda:0')",0.0,tensor(26.0460),"tensor(1.9464, device='cuda:0')",tensor(27.0500) 10 | 8,"tensor(1.9515, device='cuda:0')",0.0,tensor(26.9960),"tensor(2.2721, device='cuda:0')",tensor(16.3300) 11 | 9,"tensor(1.9365, device='cuda:0')",0.0,tensor(26.5980),"tensor(1.8653, device='cuda:0')",tensor(29.7700) 12 | 10,"tensor(1.8948, device='cuda:0')",0.0,tensor(28.5580),"tensor(2.0207, device='cuda:0')",tensor(24.9000) 13 | 11,"tensor(1.9236, device='cuda:0')",0.0,tensor(27.8180),"tensor(2.0142, device='cuda:0')",tensor(27.8200) 14 | 12,"tensor(1.8476, device='cuda:0')",0.0,tensor(31.1720),"tensor(1.8535, device='cuda:0')",tensor(29.2100) 15 | 13,"tensor(1.9081, device='cuda:0')",0.0,tensor(27.9320),"tensor(1.9645, device='cuda:0')",tensor(28.5600) 16 | 14,"tensor(1.7877, device='cuda:0')",0.0,tensor(33.2980),"tensor(2.0359, device='cuda:0')",tensor(25.7000) 17 | 15,"tensor(1.9540, device='cuda:0')",0.0,tensor(26.3880),"tensor(2.0166, device='cuda:0')",tensor(23.4300) 18 | 16,"tensor(2.0129, device='cuda:0')",0.0,tensor(22.7800),"tensor(4.3970, device='cuda:0')",tensor(11.3500) 19 | 17,"tensor(1.8895, device='cuda:0')",0.0,tensor(27.6220),"tensor(1.9178, device='cuda:0')",tensor(29.3400) 20 | 18,"tensor(1.7812, device='cuda:0')",0.0,tensor(32.4420),"tensor(1.8813, device='cuda:0')",tensor(29.7900) 21 | 19,"tensor(1.7666, device='cuda:0')",0.0,tensor(33.9020),"tensor(1.9696, device='cuda:0')",tensor(27.2200) 22 | 20,"tensor(1.7717, device='cuda:0')",0.0,tensor(33.8160),"tensor(1.9529, device='cuda:0')",tensor(28.1800) 23 | 21,"tensor(1.7940, device='cuda:0')",0.0,tensor(33.0480),"tensor(2.6551, device='cuda:0')",tensor(18.9900) 24 | 22,"tensor(1.8595, device='cuda:0')",0.0,tensor(30.2860),"tensor(1.8495, device='cuda:0')",tensor(32.4800) 25 | 23,"tensor(1.7393, device='cuda:0')",0.0,tensor(35.3400),"tensor(1.9325, device='cuda:0')",tensor(30.7600) 26 | 24,"tensor(1.8225, device='cuda:0')",0.0,tensor(32.5100),"tensor(2.7612, device='cuda:0')",tensor(16.9500) 27 | 25,"tensor(1.7819, device='cuda:0')",0.0,tensor(33.8340),"tensor(2.0891, device='cuda:0')",tensor(28.2000) 28 | 26,"tensor(1.7530, device='cuda:0')",0.0,tensor(35.2620),"tensor(3.0886, device='cuda:0')",tensor(15.7500) 29 | 27,"tensor(1.7344, device='cuda:0')",0.0,tensor(35.9620),"tensor(3.1807, device='cuda:0')",tensor(17.5200) 30 | 28,"tensor(1.7010, device='cuda:0')",0.0,tensor(36.9760),"tensor(1.8512, device='cuda:0')",tensor(32.6100) 31 | 29,"tensor(1.5543, device='cuda:0')",0.0,tensor(42.4600),"tensor(1.6730, device='cuda:0')",tensor(38.7000) 32 | -------------------------------------------------------------------------------- /results/decay=1e-4/log_EfficientNet_batchboost_1.csv: -------------------------------------------------------------------------------- 1 | epoch,train loss,reg loss,train acc,test loss,test acc 2 | 0,"tensor(1.8619, device='cuda:0')",0.0,tensor(33.8036),"tensor(1.3804, device='cuda:0')",tensor(51.5800) 3 | 1,"tensor(1.4982, device='cuda:0')",0.0,tensor(47.6485),"tensor(1.2721, device='cuda:0')",tensor(56.7600) 4 | 2,"tensor(1.3898, device='cuda:0')",0.0,tensor(52.0369),"tensor(1.1117, device='cuda:0')",tensor(64.9300) 5 | 3,"tensor(1.3058, device='cuda:0')",0.0,tensor(55.1306),"tensor(0.9486, device='cuda:0')",tensor(71.2200) 6 | 4,"tensor(1.2607, device='cuda:0')",0.0,tensor(57.0095),"tensor(1.0737, device='cuda:0')",tensor(65.9600) 7 | 5,"tensor(1.2342, device='cuda:0')",0.0,tensor(57.9863),"tensor(1.0367, device='cuda:0')",tensor(67.0500) 8 | 6,"tensor(1.2004, device='cuda:0')",0.0,tensor(59.0677),"tensor(1.0099, device='cuda:0')",tensor(68.6700) 9 | 7,"tensor(1.1755, device='cuda:0')",0.0,tensor(59.9862),"tensor(0.8343, device='cuda:0')",tensor(75.2200) 10 | 8,"tensor(1.1527, device='cuda:0')",0.0,tensor(61.4485),"tensor(0.9027, device='cuda:0')",tensor(72.7400) 11 | 9,"tensor(1.1438, device='cuda:0')",0.0,tensor(61.4357),"tensor(0.8255, device='cuda:0')",tensor(75.1500) 12 | 10,"tensor(1.1069, device='cuda:0')",0.0,tensor(63.1542),"tensor(0.8173, device='cuda:0')",tensor(74.4400) 13 | 11,"tensor(1.1128, device='cuda:0')",0.0,tensor(62.5874),"tensor(0.8894, device='cuda:0')",tensor(74.8200) 14 | 12,"tensor(1.1052, device='cuda:0')",0.0,tensor(63.1480),"tensor(0.7840, device='cuda:0')",tensor(76.5000) 15 | 13,"tensor(1.0902, device='cuda:0')",0.0,tensor(63.5684),"tensor(0.9525, device='cuda:0')",tensor(74.1600) 16 | 14,"tensor(1.0729, device='cuda:0')",0.0,tensor(64.0899),"tensor(0.7787, device='cuda:0')",tensor(75.4700) 17 | 15,"tensor(1.0845, device='cuda:0')",0.0,tensor(63.7330),"tensor(0.7887, device='cuda:0')",tensor(75.8800) 18 | 16,"tensor(1.0568, device='cuda:0')",0.0,tensor(64.9602),"tensor(0.8254, device='cuda:0')",tensor(77.1600) 19 | 17,"tensor(1.0605, device='cuda:0')",0.0,tensor(64.6255),"tensor(0.7816, device='cuda:0')",tensor(75.3900) 20 | 18,"tensor(1.0434, device='cuda:0')",0.0,tensor(65.2123),"tensor(0.8675, device='cuda:0')",tensor(74.2000) 21 | 19,"tensor(1.0560, device='cuda:0')",0.0,tensor(64.7336),"tensor(0.7365, device='cuda:0')",tensor(77.9200) 22 | 20,"tensor(1.0462, device='cuda:0')",0.0,tensor(65.3066),"tensor(0.7319, device='cuda:0')",tensor(77.6900) 23 | 21,"tensor(1.0569, device='cuda:0')",0.0,tensor(64.3693),"tensor(0.8474, device='cuda:0')",tensor(73.5000) 24 | 22,"tensor(1.0264, device='cuda:0')",0.0,tensor(65.7328),"tensor(0.7160, device='cuda:0')",tensor(78.1200) 25 | 23,"tensor(1.0208, device='cuda:0')",0.0,tensor(66.2139),"tensor(0.7352, device='cuda:0')",tensor(76.9700) 26 | 24,"tensor(1.0331, device='cuda:0')",0.0,tensor(65.7051),"tensor(0.7542, device='cuda:0')",tensor(76.3700) 27 | 25,"tensor(1.0031, device='cuda:0')",0.0,tensor(66.7205),"tensor(0.7212, device='cuda:0')",tensor(77.5600) 28 | 26,"tensor(1.0169, device='cuda:0')",0.0,tensor(66.5317),"tensor(0.7610, device='cuda:0')",tensor(77.4400) 29 | 27,"tensor(1.0109, device='cuda:0')",0.0,tensor(66.3433),"tensor(0.8155, device='cuda:0')",tensor(74.9300) 30 | 28,"tensor(1.0128, device='cuda:0')",0.0,tensor(66.4252),"tensor(0.7375, device='cuda:0')",tensor(78.9300) 31 | 29,"tensor(1.0011, device='cuda:0')",0.0,tensor(67.0343),"tensor(0.7180, device='cuda:0')",tensor(78.8500) 32 | -------------------------------------------------------------------------------- /results/decay=1e-4/log_EfficientNet_batchboost_2.csv: -------------------------------------------------------------------------------- 1 | epoch,train loss,reg loss,train acc,test loss,test acc 2 | 0,"tensor(1.7333, device='cuda:0')",0.0,tensor(39.3799),"tensor(1.1499, device='cuda:0')",tensor(61.7400) 3 | 1,"tensor(1.3994, device='cuda:0')",0.0,tensor(51.3792),"tensor(0.9294, device='cuda:0')",tensor(69.9400) 4 | 2,"tensor(1.3037, device='cuda:0')",0.0,tensor(55.3760),"tensor(0.8431, device='cuda:0')",tensor(72.7200) 5 | 3,"tensor(1.2557, device='cuda:0')",0.0,tensor(56.8880),"tensor(0.9415, device='cuda:0')",tensor(70.2800) 6 | 4,"tensor(1.2112, device='cuda:0')",0.0,tensor(58.3391),"tensor(0.8276, device='cuda:0')",tensor(74.7100) 7 | 5,"tensor(1.1966, device='cuda:0')",0.0,tensor(58.7091),"tensor(0.9025, device='cuda:0')",tensor(76.0500) 8 | 6,"tensor(1.1778, device='cuda:0')",0.0,tensor(59.5382),"tensor(0.8087, device='cuda:0')",tensor(76.6200) 9 | 7,"tensor(1.1527, device='cuda:0')",0.0,tensor(60.4451),"tensor(0.8012, device='cuda:0')",tensor(76.3000) 10 | 8,"tensor(1.1488, device='cuda:0')",0.0,tensor(60.5774),"tensor(0.7704, device='cuda:0')",tensor(77.2800) 11 | 9,"tensor(1.1375, device='cuda:0')",0.0,tensor(61.0278),"tensor(0.8168, device='cuda:0')",tensor(75.4900) 12 | 10,"tensor(1.1269, device='cuda:0')",0.0,tensor(61.5662),"tensor(0.6920, device='cuda:0')",tensor(79.1700) 13 | 11,"tensor(1.1198, device='cuda:0')",0.0,tensor(61.5416),"tensor(0.8677, device='cuda:0')",tensor(75.0100) 14 | 12,"tensor(1.1440, device='cuda:0')",0.0,tensor(60.5554),"tensor(0.8204, device='cuda:0')",tensor(77.2300) 15 | 13,"tensor(1.1188, device='cuda:0')",0.0,tensor(61.5085),"tensor(0.8043, device='cuda:0')",tensor(79.0300) 16 | 14,"tensor(1.1159, device='cuda:0')",0.0,tensor(61.5262),"tensor(0.7955, device='cuda:0')",tensor(78.3400) 17 | 15,"tensor(1.1178, device='cuda:0')",0.0,tensor(61.5627),"tensor(0.8294, device='cuda:0')",tensor(78.3200) 18 | 16,"tensor(1.1095, device='cuda:0')",0.0,tensor(61.8756),"tensor(0.7635, device='cuda:0')",tensor(80.1700) 19 | 17,"tensor(1.0935, device='cuda:0')",0.0,tensor(62.4646),"tensor(0.9610, device='cuda:0')",tensor(77.0600) 20 | 18,"tensor(1.0986, device='cuda:0')",0.0,tensor(62.2386),"tensor(0.7500, device='cuda:0')",tensor(78.7500) 21 | 19,"tensor(1.1026, device='cuda:0')",0.0,tensor(62.0449),"tensor(0.9191, device='cuda:0')",tensor(74.9000) 22 | 20,"tensor(1.1038, device='cuda:0')",0.0,tensor(61.9949),"tensor(0.7838, device='cuda:0')",tensor(80.7900) 23 | 21,"tensor(1.0884, device='cuda:0')",0.0,tensor(62.6618),"tensor(0.7176, device='cuda:0')",tensor(79.9900) 24 | 22,"tensor(1.0818, device='cuda:0')",0.0,tensor(63.2573),"tensor(0.7445, device='cuda:0')",tensor(80.0500) 25 | 23,"tensor(1.0898, device='cuda:0')",0.0,tensor(62.4544),"tensor(0.7628, device='cuda:0')",tensor(77.1100) 26 | 24,"tensor(1.0846, device='cuda:0')",0.0,tensor(62.7730),"tensor(0.6846, device='cuda:0')",tensor(79.1600) 27 | 25,"tensor(1.0942, device='cuda:0')",0.0,tensor(62.2764),"tensor(0.7224, device='cuda:0')",tensor(82.3300) 28 | 26,"tensor(1.0587, device='cuda:0')",0.0,tensor(63.9383),"tensor(0.8021, device='cuda:0')",tensor(79.6800) 29 | 27,"tensor(1.0782, device='cuda:0')",0.0,tensor(62.7933),"tensor(0.6434, device='cuda:0')",tensor(81.9000) 30 | 28,"tensor(1.0600, device='cuda:0')",0.0,tensor(63.8398),"tensor(0.7467, device='cuda:0')",tensor(81.3600) 31 | 29,"tensor(1.0518, device='cuda:0')",0.0,tensor(64.3532),"tensor(0.6855, device='cuda:0')",tensor(81.1100) 32 | -------------------------------------------------------------------------------- /results/decay=1e-4/log_EfficientNet_batchboost_3.csv: -------------------------------------------------------------------------------- 1 | epoch,train loss,reg loss,train acc,test loss,test acc 2 | 0,"tensor(1.8809, device='cuda:0')",0.0,tensor(34.8699),"tensor(1.3911, device='cuda:0')",tensor(52.6000) 3 | 1,"tensor(1.5065, device='cuda:0')",0.0,tensor(48.1545),"tensor(1.3242, device='cuda:0')",tensor(57.3100) 4 | 2,"tensor(1.3962, device='cuda:0')",0.0,tensor(52.7664),"tensor(1.1488, device='cuda:0')",tensor(62.4500) 5 | 3,"tensor(1.3444, device='cuda:0')",0.0,tensor(55.5993),"tensor(1.0894, device='cuda:0')",tensor(66.3500) 6 | 4,"tensor(1.2820, device='cuda:0')",0.0,tensor(57.0650),"tensor(1.2615, device='cuda:0')",tensor(57.5000) 7 | 5,"tensor(1.2676, device='cuda:0')",0.0,tensor(57.9336),"tensor(0.9373, device='cuda:0')",tensor(71.1500) 8 | 6,"tensor(1.2373, device='cuda:0')",0.0,tensor(58.6707),"tensor(1.1060, device='cuda:0')",tensor(68.0700) 9 | 7,"tensor(1.1939, device='cuda:0')",0.0,tensor(60.6067),"tensor(1.0972, device='cuda:0')",tensor(67.8900) 10 | 8,"tensor(1.1939, device='cuda:0')",0.0,tensor(60.5827),"tensor(0.9282, device='cuda:0')",tensor(73.8600) 11 | 9,"tensor(1.1518, device='cuda:0')",0.0,tensor(62.1907),"tensor(0.9273, device='cuda:0')",tensor(72.4700) 12 | 10,"tensor(1.1649, device='cuda:0')",0.0,tensor(61.7164),"tensor(0.9758, device='cuda:0')",tensor(69.7300) 13 | 11,"tensor(1.1431, device='cuda:0')",0.0,tensor(62.9341),"tensor(0.9097, device='cuda:0')",tensor(74.4200) 14 | 12,"tensor(1.1476, device='cuda:0')",0.0,tensor(62.7877),"tensor(0.9629, device='cuda:0')",tensor(73.8700) 15 | 13,"tensor(1.1407, device='cuda:0')",0.0,tensor(63.4019),"tensor(0.8294, device='cuda:0')",tensor(75.0800) 16 | 14,"tensor(1.1065, device='cuda:0')",0.0,tensor(64.3079),"tensor(1.1083, device='cuda:0')",tensor(65.9900) 17 | 15,"tensor(1.1262, device='cuda:0')",0.0,tensor(63.1819),"tensor(0.9628, device='cuda:0')",tensor(69.4500) 18 | 16,"tensor(1.1226, device='cuda:0')",0.0,tensor(64.0045),"tensor(0.8510, device='cuda:0')",tensor(77.3200) 19 | 17,"tensor(1.0906, device='cuda:0')",0.0,tensor(64.7494),"tensor(0.9986, device='cuda:0')",tensor(74.8900) 20 | 18,"tensor(1.1165, device='cuda:0')",0.0,tensor(63.7127),"tensor(0.8170, device='cuda:0')",tensor(76.5200) 21 | 19,"tensor(1.0888, device='cuda:0')",0.0,tensor(65.2943),"tensor(0.7733, device='cuda:0')",tensor(78.0600) 22 | 20,"tensor(1.0663, device='cuda:0')",0.0,tensor(65.6215),"tensor(0.9420, device='cuda:0')",tensor(75.6900) 23 | 21,"tensor(1.0590, device='cuda:0')",0.0,tensor(66.2072),"tensor(0.9275, device='cuda:0')",tensor(74.3500) 24 | 22,"tensor(1.0551, device='cuda:0')",0.0,tensor(66.5175),"tensor(0.7751, device='cuda:0')",tensor(76.9400) 25 | 23,"tensor(1.0443, device='cuda:0')",0.0,tensor(66.5285),"tensor(0.8900, device='cuda:0')",tensor(74.5400) 26 | 24,"tensor(1.0459, device='cuda:0')",0.0,tensor(67.2152),"tensor(0.8153, device='cuda:0')",tensor(76.8400) 27 | 25,"tensor(1.0383, device='cuda:0')",0.0,tensor(66.9208),"tensor(0.8578, device='cuda:0')",tensor(78.5000) 28 | 26,"tensor(1.0275, device='cuda:0')",0.0,tensor(66.9116),"tensor(0.8216, device='cuda:0')",tensor(76.6500) 29 | 27,"tensor(1.0292, device='cuda:0')",0.0,tensor(67.0629),"tensor(0.8585, device='cuda:0')",tensor(75.7100) 30 | 28,"tensor(1.0197, device='cuda:0')",0.0,tensor(67.4291),"tensor(0.8267, device='cuda:0')",tensor(77.0400) 31 | 29,"tensor(1.0209, device='cuda:0')",0.0,tensor(67.4399),"tensor(0.8536, device='cuda:0')",tensor(77.8800) 32 | -------------------------------------------------------------------------------- /results/decay=1e-4/log_EfficientNet_batchboost_4.csv: -------------------------------------------------------------------------------- 1 | epoch,train loss,reg loss,train acc,test loss,test acc 2 | 0,"tensor(1.8195, device='cuda:0')",0.0,tensor(37.0720),"tensor(1.2780, device='cuda:0')",tensor(57.6400) 3 | 1,"tensor(1.4345, device='cuda:0')",0.0,tensor(50.7976),"tensor(1.5002, device='cuda:0')",tensor(53.0400) 4 | 2,"tensor(1.3146, device='cuda:0')",0.0,tensor(55.4534),"tensor(1.0494, device='cuda:0')",tensor(69.7100) 5 | 3,"tensor(1.2690, device='cuda:0')",0.0,tensor(56.8110),"tensor(0.9850, device='cuda:0')",tensor(72.5800) 6 | 4,"tensor(1.2196, device='cuda:0')",0.0,tensor(58.6434),"tensor(0.9985, device='cuda:0')",tensor(75.2000) 7 | 5,"tensor(1.2061, device='cuda:0')",0.0,tensor(58.8863),"tensor(1.4348, device='cuda:0')",tensor(57.9500) 8 | 6,"tensor(1.1976, device='cuda:0')",0.0,tensor(59.3146),"tensor(1.0232, device='cuda:0')",tensor(72.1400) 9 | 7,"tensor(1.1794, device='cuda:0')",0.0,tensor(59.9166),"tensor(0.9893, device='cuda:0')",tensor(73.5000) 10 | 8,"tensor(1.1581, device='cuda:0')",0.0,tensor(60.9214),"tensor(1.0609, device='cuda:0')",tensor(73.5500) 11 | 9,"tensor(1.1470, device='cuda:0')",0.0,tensor(61.0844),"tensor(0.8182, device='cuda:0')",tensor(77.2100) 12 | 10,"tensor(1.1350, device='cuda:0')",0.0,tensor(61.7524),"tensor(0.9231, device='cuda:0')",tensor(76.4500) 13 | 11,"tensor(1.1393, device='cuda:0')",0.0,tensor(61.7179),"tensor(0.8716, device='cuda:0')",tensor(75.6600) 14 | 12,"tensor(1.1489, device='cuda:0')",0.0,tensor(60.7491),"tensor(0.9923, device='cuda:0')",tensor(73.5100) 15 | 13,"tensor(1.1502, device='cuda:0')",0.0,tensor(61.0766),"tensor(1.0965, device='cuda:0')",tensor(66.0500) 16 | 14,"tensor(1.1292, device='cuda:0')",0.0,tensor(61.5851),"tensor(0.9852, device='cuda:0')",tensor(76.9900) 17 | 15,"tensor(1.1075, device='cuda:0')",0.0,tensor(62.6161),"tensor(0.7420, device='cuda:0')",tensor(79.5800) 18 | 16,"tensor(1.1258, device='cuda:0')",0.0,tensor(62.1972),"tensor(1.0127, device='cuda:0')",tensor(74.4300) 19 | 17,"tensor(1.1076, device='cuda:0')",0.0,tensor(62.2725),"tensor(0.7605, device='cuda:0')",tensor(81.1600) 20 | 18,"tensor(1.0996, device='cuda:0')",0.0,tensor(62.6641),"tensor(0.7235, device='cuda:0')",tensor(81.5700) 21 | 19,"tensor(1.1162, device='cuda:0')",0.0,tensor(62.1934),"tensor(0.7670, device='cuda:0')",tensor(78.8400) 22 | 20,"tensor(1.0989, device='cuda:0')",0.0,tensor(62.4432),"tensor(0.9550, device='cuda:0')",tensor(76.0800) 23 | 21,"tensor(1.1023, device='cuda:0')",0.0,tensor(62.6439),"tensor(0.6734, device='cuda:0')",tensor(81.0800) 24 | 22,"tensor(1.1070, device='cuda:0')",0.0,tensor(61.7068),"tensor(0.7493, device='cuda:0')",tensor(80.0400) 25 | 23,"tensor(1.0937, device='cuda:0')",0.0,tensor(62.6318),"tensor(0.7509, device='cuda:0')",tensor(78.7200) 26 | 24,"tensor(1.0963, device='cuda:0')",0.0,tensor(63.1412),"tensor(0.7149, device='cuda:0')",tensor(78.7300) 27 | 25,"tensor(1.0731, device='cuda:0')",0.0,tensor(63.3816),"tensor(0.7929, device='cuda:0')",tensor(78.1600) 28 | 26,"tensor(1.0687, device='cuda:0')",0.0,tensor(63.5898),"tensor(0.7151, device='cuda:0')",tensor(80.9300) 29 | 27,"tensor(1.0815, device='cuda:0')",0.0,tensor(63.2100),"tensor(0.6483, device='cuda:0')",tensor(81.8100) 30 | 28,"tensor(1.0759, device='cuda:0')",0.0,tensor(63.6988),"tensor(0.7087, device='cuda:0')",tensor(81.4700) 31 | 29,"tensor(1.0640, device='cuda:0')",0.0,tensor(64.1290),"tensor(0.7718, device='cuda:0')",tensor(80.0900) 32 | -------------------------------------------------------------------------------- /results/decay=1e-4/log_EfficientNet_mixup_1.csv: -------------------------------------------------------------------------------- 1 | epoch,train loss,reg loss,train acc,test loss,test acc 2 | 0,"tensor(2.2877, device='cuda:0')",0.0,tensor(21.4552),"tensor(1.8986, device='cuda:0')",tensor(30.5100) 3 | 1,"tensor(1.9524, device='cuda:0')",0.0,tensor(30.9192),"tensor(1.7501, device='cuda:0')",tensor(38.8000) 4 | 2,"tensor(1.8855, device='cuda:0')",0.0,tensor(34.1426),"tensor(1.8834, device='cuda:0')",tensor(33.3300) 5 | 3,"tensor(1.8253, device='cuda:0')",0.0,tensor(37.0241),"tensor(1.6134, device='cuda:0')",tensor(45.4500) 6 | 4,"tensor(1.9348, device='cuda:0')",0.0,tensor(31.7827),"tensor(1.9646, device='cuda:0')",tensor(27.4900) 7 | 5,"tensor(1.8398, device='cuda:0')",0.0,tensor(36.3115),"tensor(1.5600, device='cuda:0')",tensor(49.8100) 8 | 6,"tensor(1.7886, device='cuda:0')",0.0,tensor(38.7104),"tensor(1.6014, device='cuda:0')",tensor(44.6800) 9 | 7,"tensor(1.7828, device='cuda:0')",0.0,tensor(39.3375),"tensor(1.5151, device='cuda:0')",tensor(49.4600) 10 | 8,"tensor(1.7540, device='cuda:0')",0.0,tensor(40.5212),"tensor(1.6683, device='cuda:0')",tensor(44.1300) 11 | 9,"tensor(1.7623, device='cuda:0')",0.0,tensor(39.9576),"tensor(1.6402, device='cuda:0')",tensor(43.8600) 12 | 10,"tensor(1.7885, device='cuda:0')",0.0,tensor(38.9384),"tensor(2.4766, device='cuda:0')",tensor(15.3800) 13 | 11,"tensor(1.8959, device='cuda:0')",0.0,tensor(33.9427),"tensor(1.6782, device='cuda:0')",tensor(41.1700) 14 | 12,"tensor(1.8126, device='cuda:0')",0.0,tensor(37.8754),"tensor(1.5495, device='cuda:0')",tensor(46.3900) 15 | 13,"tensor(1.7411, device='cuda:0')",0.0,tensor(41.1904),"tensor(1.5361, device='cuda:0')",tensor(48.6700) 16 | 14,"tensor(1.7836, device='cuda:0')",0.0,tensor(39.3387),"tensor(1.5355, device='cuda:0')",tensor(50.3900) 17 | 15,"tensor(1.7489, device='cuda:0')",0.0,tensor(40.8408),"tensor(1.5699, device='cuda:0')",tensor(45.6300) 18 | 16,"tensor(1.7543, device='cuda:0')",0.0,tensor(40.5420),"tensor(2.2198, device='cuda:0')",tensor(43.3000) 19 | 17,"tensor(1.7222, device='cuda:0')",0.0,tensor(42.4087),"tensor(1.4898, device='cuda:0')",tensor(51.3100) 20 | 18,"tensor(1.8517, device='cuda:0')",0.0,tensor(36.3109),"tensor(1.5565, device='cuda:0')",tensor(46.4700) 21 | 19,"tensor(1.8607, device='cuda:0')",0.0,tensor(35.7716),"tensor(1.7921, device='cuda:0')",tensor(37.4400) 22 | 20,"tensor(1.7999, device='cuda:0')",0.0,tensor(38.6506),"tensor(1.4950, device='cuda:0')",tensor(49.7900) 23 | 21,"tensor(1.7446, device='cuda:0')",0.0,tensor(41.0905),"tensor(1.4899, device='cuda:0')",tensor(49.6300) 24 | 22,"tensor(1.6839, device='cuda:0')",0.0,tensor(43.4721),"tensor(1.5370, device='cuda:0')",tensor(48.2100) 25 | 23,"tensor(1.7687, device='cuda:0')",0.0,tensor(40.1902),"tensor(1.4819, device='cuda:0')",tensor(50.0900) 26 | 24,"tensor(1.7312, device='cuda:0')",0.0,tensor(41.9068),"tensor(1.4197, device='cuda:0')",tensor(53.6700) 27 | 25,"tensor(1.7332, device='cuda:0')",0.0,tensor(41.6716),"tensor(1.2940, device='cuda:0')",tensor(58.0100) 28 | 26,"tensor(1.6708, device='cuda:0')",0.0,tensor(44.7846),"tensor(1.3072, device='cuda:0')",tensor(57.9900) 29 | 27,"tensor(1.6245, device='cuda:0')",0.0,tensor(46.8689),"tensor(1.2321, device='cuda:0')",tensor(61.3700) 30 | 28,"tensor(1.6105, device='cuda:0')",0.0,tensor(47.2492),"tensor(1.6507, device='cuda:0')",tensor(42.4700) 31 | 29,"tensor(1.6017, device='cuda:0')",0.0,tensor(47.6939),"tensor(1.2439, device='cuda:0')",tensor(61.4700) 32 | -------------------------------------------------------------------------------- /results/decay=1e-4/log_EfficientNet_mixup_2.csv: -------------------------------------------------------------------------------- 1 | epoch,train loss,reg loss,train acc,test loss,test acc 2 | 0,"tensor(2.5988, device='cuda:0')",0.0,tensor(16.4150),"tensor(2.1065, device='cuda:0')",tensor(24.0700) 3 | 1,"tensor(2.1234, device='cuda:0')",0.0,tensor(22.1954),"tensor(1.9791, device='cuda:0')",tensor(26.8100) 4 | 2,"tensor(2.0674, device='cuda:0')",0.0,tensor(24.8577),"tensor(1.9732, device='cuda:0')",tensor(28.4500) 5 | 3,"tensor(2.0405, device='cuda:0')",0.0,tensor(26.0568),"tensor(2.0376, device='cuda:0')",tensor(22.8200) 6 | 4,"tensor(1.9996, device='cuda:0')",0.0,tensor(28.6330),"tensor(1.8742, device='cuda:0')",tensor(32.5600) 7 | 5,"tensor(1.9977, device='cuda:0')",0.0,tensor(28.8430),"tensor(2.1198, device='cuda:0')",tensor(20.6700) 8 | 6,"tensor(2.0493, device='cuda:0')",0.0,tensor(25.7799),"tensor(2.2385, device='cuda:0')",tensor(19.0900) 9 | 7,"tensor(2.0589, device='cuda:0')",0.0,tensor(25.2904),"tensor(3.0567, device='cuda:0')",tensor(13.4100) 10 | 8,"tensor(2.0175, device='cuda:0')",0.0,tensor(27.3789),"tensor(1.8305, device='cuda:0')",tensor(35.8500) 11 | 9,"tensor(2.0298, device='cuda:0')",0.0,tensor(26.2188),"tensor(1.9093, device='cuda:0')",tensor(28.9200) 12 | 10,"tensor(2.0976, device='cuda:0')",0.0,tensor(22.3591),"tensor(1.9503, device='cuda:0')",tensor(28.7000) 13 | 11,"tensor(2.0748, device='cuda:0')",0.0,tensor(24.0372),"tensor(2.0584, device='cuda:0')",tensor(23.9000) 14 | 12,"tensor(2.0370, device='cuda:0')",0.0,tensor(25.6861),"tensor(1.7837, device='cuda:0')",tensor(35.7100) 15 | 13,"tensor(2.0515, device='cuda:0')",0.0,tensor(24.8760),"tensor(1.9619, device='cuda:0')",tensor(31.7300) 16 | 14,"tensor(2.0787, device='cuda:0')",0.0,tensor(23.7376),"tensor(2.0201, device='cuda:0')",tensor(26.3000) 17 | 15,"tensor(2.0419, device='cuda:0')",0.0,tensor(25.5730),"tensor(1.8605, device='cuda:0')",tensor(28.4800) 18 | 16,"tensor(2.0032, device='cuda:0')",0.0,tensor(27.5435),"tensor(2.5678, device='cuda:0')",tensor(20.5900) 19 | 17,"tensor(2.0786, device='cuda:0')",0.0,tensor(23.8160),"tensor(1.9983, device='cuda:0')",tensor(25.7300) 20 | 18,"tensor(2.0086, device='cuda:0')",0.0,tensor(27.0779),"tensor(1.8201, device='cuda:0')",tensor(34.4400) 21 | 19,"tensor(1.9817, device='cuda:0')",0.0,tensor(28.8634),"tensor(1.7823, device='cuda:0')",tensor(36.3100) 22 | 20,"tensor(1.9784, device='cuda:0')",0.0,tensor(28.6708),"tensor(2.1509, device='cuda:0')",tensor(21.9000) 23 | 21,"tensor(1.9974, device='cuda:0')",0.0,tensor(28.1345),"tensor(1.8063, device='cuda:0')",tensor(35.5100) 24 | 22,"tensor(1.9424, device='cuda:0')",0.0,tensor(30.4217),"tensor(1.7292, device='cuda:0')",tensor(39.7000) 25 | 23,"tensor(1.9676, device='cuda:0')",0.0,tensor(29.5585),"tensor(1.7449, device='cuda:0')",tensor(36.6700) 26 | 24,"tensor(1.9825, device='cuda:0')",0.0,tensor(28.9549),"tensor(2.0882, device='cuda:0')",tensor(24.8400) 27 | 25,"tensor(2.0198, device='cuda:0')",0.0,tensor(27.3775),"tensor(1.8630, device='cuda:0')",tensor(33.4500) 28 | 26,"tensor(1.9847, device='cuda:0')",0.0,tensor(28.6669),"tensor(1.8658, device='cuda:0')",tensor(35.1400) 29 | 27,"tensor(1.9420, device='cuda:0')",0.0,tensor(30.4228),"tensor(1.6885, device='cuda:0')",tensor(40.3000) 30 | 28,"tensor(1.9979, device='cuda:0')",0.0,tensor(27.8036),"tensor(1.9690, device='cuda:0')",tensor(26.8700) 31 | 29,"tensor(2.0084, device='cuda:0')",0.0,tensor(26.5612),"tensor(1.9115, device='cuda:0')",tensor(30.1100) 32 | -------------------------------------------------------------------------------- /results/decay=1e-4/log_EfficientNet_mixup_3.csv: -------------------------------------------------------------------------------- 1 | epoch,train loss,reg loss,train acc,test loss,test acc 2 | 0,"tensor(2.2490, device='cuda:0')",0.0,tensor(21.8189),"tensor(2.0380, device='cuda:0')",tensor(27.1200) 3 | 1,"tensor(1.9387, device='cuda:0')",0.0,tensor(29.3909),"tensor(1.9303, device='cuda:0')",tensor(31.1300) 4 | 2,"tensor(1.8109, device='cuda:0')",0.0,tensor(36.0116),"tensor(1.6713, device='cuda:0')",tensor(39.9300) 5 | 3,"tensor(1.7671, device='cuda:0')",0.0,tensor(38.3374),"tensor(1.6934, device='cuda:0')",tensor(40.4100) 6 | 4,"tensor(1.7966, device='cuda:0')",0.0,tensor(37.2334),"tensor(1.8982, device='cuda:0')",tensor(31.0800) 7 | 5,"tensor(1.7525, device='cuda:0')",0.0,tensor(38.9498),"tensor(1.7087, device='cuda:0')",tensor(39.7500) 8 | 6,"tensor(1.7477, device='cuda:0')",0.0,tensor(39.3013),"tensor(1.9661, device='cuda:0')",tensor(27.9700) 9 | 7,"tensor(1.9798, device='cuda:0')",0.0,tensor(27.6844),"tensor(2.6448, device='cuda:0')",tensor(15.8900) 10 | 8,"tensor(1.9137, device='cuda:0')",0.0,tensor(30.8651),"tensor(1.8522, device='cuda:0')",tensor(32.0600) 11 | 9,"tensor(1.8364, device='cuda:0')",0.0,tensor(34.7881),"tensor(1.7626, device='cuda:0')",tensor(35.0900) 12 | 10,"tensor(1.7537, device='cuda:0')",0.0,tensor(38.3701),"tensor(1.8617, device='cuda:0')",tensor(32.2400) 13 | 11,"tensor(1.7696, device='cuda:0')",0.0,tensor(38.2061),"tensor(2.3542, device='cuda:0')",tensor(21.3600) 14 | 12,"tensor(1.7851, device='cuda:0')",0.0,tensor(37.1738),"tensor(1.8924, device='cuda:0')",tensor(31.7200) 15 | 13,"tensor(1.8270, device='cuda:0')",0.0,tensor(35.4236),"tensor(2.1773, device='cuda:0')",tensor(21.2600) 16 | 14,"tensor(1.8555, device='cuda:0')",0.0,tensor(33.7255),"tensor(2.0021, device='cuda:0')",tensor(28.4100) 17 | 15,"tensor(1.7799, device='cuda:0')",0.0,tensor(37.4785),"tensor(2.4709, device='cuda:0')",tensor(19.6500) 18 | 16,"tensor(1.8668, device='cuda:0')",0.0,tensor(33.5824),"tensor(1.7088, device='cuda:0')",tensor(39.9200) 19 | 17,"tensor(1.8082, device='cuda:0')",0.0,tensor(36.2666),"tensor(1.9701, device='cuda:0')",tensor(34.8700) 20 | 18,"tensor(1.7453, device='cuda:0')",0.0,tensor(39.2025),"tensor(1.6074, device='cuda:0')",tensor(41.0800) 21 | 19,"tensor(1.7906, device='cuda:0')",0.0,tensor(37.2583),"tensor(1.8329, device='cuda:0')",tensor(33.3000) 22 | 20,"tensor(1.8071, device='cuda:0')",0.0,tensor(36.7620),"tensor(1.7982, device='cuda:0')",tensor(36.2900) 23 | 21,"tensor(1.8230, device='cuda:0')",0.0,tensor(35.6204),"tensor(1.9685, device='cuda:0')",tensor(28.9000) 24 | 22,"tensor(1.7635, device='cuda:0')",0.0,tensor(38.3622),"tensor(1.7303, device='cuda:0')",tensor(38.0600) 25 | 23,"tensor(1.7182, device='cuda:0')",0.0,tensor(40.4985),"tensor(1.8462, device='cuda:0')",tensor(34.1400) 26 | 24,"tensor(1.6942, device='cuda:0')",0.0,tensor(41.4110),"tensor(1.5842, device='cuda:0')",tensor(45.4500) 27 | 25,"tensor(1.6960, device='cuda:0')",0.0,tensor(41.3809),"tensor(1.6349, device='cuda:0')",tensor(42.6300) 28 | 26,"tensor(1.7126, device='cuda:0')",0.0,tensor(40.4428),"tensor(1.9265, device='cuda:0')",tensor(29.4000) 29 | 27,"tensor(1.6109, device='cuda:0')",0.0,tensor(45.3281),"tensor(1.5128, device='cuda:0')",tensor(49.6700) 30 | 28,"tensor(1.5759, device='cuda:0')",0.0,tensor(46.9403),"tensor(1.3375, device='cuda:0')",tensor(53.4300) 31 | 29,"tensor(1.5298, device='cuda:0')",0.0,tensor(48.8387),"tensor(2.6476, device='cuda:0')",tensor(15.5000) 32 | -------------------------------------------------------------------------------- /results/decay=1e-4/log_EfficientNet_mixup_4.csv: -------------------------------------------------------------------------------- 1 | epoch,train loss,reg loss,train acc,test loss,test acc 2 | 0,"tensor(2.5290, device='cuda:0')",0.0,tensor(13.9961),"tensor(2.4831, device='cuda:0')",tensor(12.3400) 3 | 1,"tensor(2.1623, device='cuda:0')",0.0,tensor(19.7158),"tensor(2.0269, device='cuda:0')",tensor(25.4800) 4 | 2,"tensor(2.0835, device='cuda:0')",0.0,tensor(23.1430),"tensor(2.1210, device='cuda:0')",tensor(22.5500) 5 | 3,"tensor(2.0396, device='cuda:0')",0.0,tensor(25.3663),"tensor(1.9627, device='cuda:0')",tensor(26.9200) 6 | 4,"tensor(1.9988, device='cuda:0')",0.0,tensor(26.9953),"tensor(1.9151, device='cuda:0')",tensor(31.6100) 7 | 5,"tensor(1.9487, device='cuda:0')",0.0,tensor(29.6776),"tensor(1.8142, device='cuda:0')",tensor(33.3800) 8 | 6,"tensor(1.9233, device='cuda:0')",0.0,tensor(30.8187),"tensor(1.7458, device='cuda:0')",tensor(36.6100) 9 | 7,"tensor(1.8914, device='cuda:0')",0.0,tensor(32.2555),"tensor(2.1836, device='cuda:0')",tensor(23.9400) 10 | 8,"tensor(1.9096, device='cuda:0')",0.0,tensor(31.4838),"tensor(1.9482, device='cuda:0')",tensor(29.3700) 11 | 9,"tensor(1.8889, device='cuda:0')",0.0,tensor(32.2597),"tensor(2.0828, device='cuda:0')",tensor(24.2100) 12 | 10,"tensor(1.8749, device='cuda:0')",0.0,tensor(32.6502),"tensor(1.7281, device='cuda:0')",tensor(39.9700) 13 | 11,"tensor(1.8640, device='cuda:0')",0.0,tensor(33.4449),"tensor(1.7451, device='cuda:0')",tensor(36.1600) 14 | 12,"tensor(1.8373, device='cuda:0')",0.0,tensor(34.6387),"tensor(1.6598, device='cuda:0')",tensor(41.2200) 15 | 13,"tensor(1.8216, device='cuda:0')",0.0,tensor(35.6718),"tensor(1.8085, device='cuda:0')",tensor(35.6300) 16 | 14,"tensor(1.8988, device='cuda:0')",0.0,tensor(32.2875),"tensor(2.0386, device='cuda:0')",tensor(32.9800) 17 | 15,"tensor(1.9598, device='cuda:0')",0.0,tensor(28.9572),"tensor(1.8898, device='cuda:0')",tensor(27.9000) 18 | 16,"tensor(1.9162, device='cuda:0')",0.0,tensor(31.3641),"tensor(2.2362, device='cuda:0')",tensor(21.0600) 19 | 17,"tensor(1.9672, device='cuda:0')",0.0,tensor(28.2926),"tensor(2.4693, device='cuda:0')",tensor(19.2000) 20 | 18,"tensor(1.9095, device='cuda:0')",0.0,tensor(30.7386),"tensor(1.8700, device='cuda:0')",tensor(31.7700) 21 | 19,"tensor(2.0299, device='cuda:0')",0.0,tensor(26.0223),"tensor(2.3860, device='cuda:0')",tensor(17.8100) 22 | 20,"tensor(2.1327, device='cuda:0')",0.0,tensor(21.0535),"tensor(2.1328, device='cuda:0')",tensor(23.5400) 23 | 21,"tensor(2.0320, device='cuda:0')",0.0,tensor(25.6313),"tensor(1.8574, device='cuda:0')",tensor(31.4700) 24 | 22,"tensor(1.9575, device='cuda:0')",0.0,tensor(28.9095),"tensor(1.9248, device='cuda:0')",tensor(30.7500) 25 | 23,"tensor(1.9607, device='cuda:0')",0.0,tensor(29.1441),"tensor(1.8371, device='cuda:0')",tensor(33.9000) 26 | 24,"tensor(1.9145, device='cuda:0')",0.0,tensor(30.7228),"tensor(3.1385, device='cuda:0')",tensor(13.3300) 27 | 25,"tensor(1.9261, device='cuda:0')",0.0,tensor(30.4129),"tensor(1.8249, device='cuda:0')",tensor(30.9200) 28 | 26,"tensor(1.9393, device='cuda:0')",0.0,tensor(30.0298),"tensor(1.9120, device='cuda:0')",tensor(31.6800) 29 | 27,"tensor(1.9288, device='cuda:0')",0.0,tensor(30.3117),"tensor(2.2512, device='cuda:0')",tensor(17.3600) 30 | 28,"tensor(1.9740, device='cuda:0')",0.0,tensor(26.8164),"tensor(1.7792, device='cuda:0')",tensor(33.3700) 31 | 29,"tensor(1.9213, device='cuda:0')",0.0,tensor(30.0489),"tensor(1.7236, device='cuda:0')",tensor(37.6600) 32 | -------------------------------------------------------------------------------- /results/decay=1e-4/loss-test-with-augment-.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-4/loss-test-with-augment-.pdf -------------------------------------------------------------------------------- /results/decay=1e-4/loss-test-without-augment-.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-4/loss-test-without-augment-.pdf -------------------------------------------------------------------------------- /results/decay=1e-4/test-accuracy-with-augment-.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-4/test-accuracy-with-augment-.pdf -------------------------------------------------------------------------------- /results/decay=1e-4/test-accuracy-without-augment-.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-4/test-accuracy-without-augment-.pdf -------------------------------------------------------------------------------- /results/decay=1e-4/train-accuracy-with-augment-.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-4/train-accuracy-with-augment-.pdf -------------------------------------------------------------------------------- /results/decay=1e-4/train-accuracy-without-augment-.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-4/train-accuracy-without-augment-.pdf -------------------------------------------------------------------------------- /results/decay=1e-5/log_EfficientNet_baseline_13.csv: -------------------------------------------------------------------------------- 1 | epoch,train loss,reg loss,train acc,test loss,test acc 2 | 0,"tensor(1.1174, device='cuda:0')",0.0,tensor(62.1440),"tensor(1.0998, device='cuda:0')",tensor(65.9500) 3 | 1,"tensor(0.6976, device='cuda:0')",0.0,tensor(76.6220),"tensor(0.7366, device='cuda:0')",tensor(76.8200) 4 | 2,"tensor(0.5567, device='cuda:0')",0.0,tensor(81.5380),"tensor(0.6900, device='cuda:0')",tensor(77.7300) 5 | 3,"tensor(0.4774, device='cuda:0')",0.0,tensor(84.0140),"tensor(0.6509, device='cuda:0')",tensor(79.5800) 6 | 4,"tensor(0.4144, device='cuda:0')",0.0,tensor(86.1160),"tensor(0.7071, device='cuda:0')",tensor(79.2800) 7 | 5,"tensor(0.3718, device='cuda:0')",0.0,tensor(87.4440),"tensor(0.8875, device='cuda:0')",tensor(75.5800) 8 | 6,"tensor(0.3258, device='cuda:0')",0.0,tensor(88.8200),"tensor(0.7190, device='cuda:0')",tensor(79.7200) 9 | 7,"tensor(0.2987, device='cuda:0')",0.0,tensor(89.7940),"tensor(0.6827, device='cuda:0')",tensor(80.2900) 10 | 8,"tensor(0.2675, device='cuda:0')",0.0,tensor(90.8120),"tensor(0.6776, device='cuda:0')",tensor(80.4700) 11 | 9,"tensor(0.2377, device='cuda:0')",0.0,tensor(91.9300),"tensor(0.6298, device='cuda:0')",tensor(82.1400) 12 | 10,"tensor(0.2226, device='cuda:0')",0.0,tensor(92.3820),"tensor(0.6568, device='cuda:0')",tensor(81.3000) 13 | 11,"tensor(0.1994, device='cuda:0')",0.0,tensor(93.2340),"tensor(0.6691, device='cuda:0')",tensor(81.8800) 14 | 12,"tensor(0.1838, device='cuda:0')",0.0,tensor(93.7300),"tensor(0.7464, device='cuda:0')",tensor(80.8300) 15 | 13,"tensor(0.1758, device='cuda:0')",0.0,tensor(94.0700),"tensor(0.7183, device='cuda:0')",tensor(81.5600) 16 | 14,"tensor(0.1605, device='cuda:0')",0.0,tensor(94.5300),"tensor(0.7699, device='cuda:0')",tensor(81.1300) 17 | 15,"tensor(0.1528, device='cuda:0')",0.0,tensor(94.8600),"tensor(0.6828, device='cuda:0')",tensor(82.4600) 18 | 16,"tensor(0.1449, device='cuda:0')",0.0,tensor(95.0100),"tensor(0.7295, device='cuda:0')",tensor(82.0800) 19 | 17,"tensor(0.1357, device='cuda:0')",0.0,tensor(95.3840),"tensor(0.6751, device='cuda:0')",tensor(82.7300) 20 | 18,"tensor(0.1281, device='cuda:0')",0.0,tensor(95.6720),"tensor(0.7107, device='cuda:0')",tensor(82.9000) 21 | 19,"tensor(0.1224, device='cuda:0')",0.0,tensor(95.7900),"tensor(0.7698, device='cuda:0')",tensor(81.5900) 22 | 20,"tensor(0.1124, device='cuda:0')",0.0,tensor(96.1800),"tensor(0.8016, device='cuda:0')",tensor(80.9400) 23 | 21,"tensor(0.1125, device='cuda:0')",0.0,tensor(96.2200),"tensor(0.7759, device='cuda:0')",tensor(81.8900) 24 | 22,"tensor(0.1068, device='cuda:0')",0.0,tensor(96.3560),"tensor(0.7618, device='cuda:0')",tensor(82.1800) 25 | 23,"tensor(0.1021, device='cuda:0')",0.0,tensor(96.5300),"tensor(0.8303, device='cuda:0')",tensor(81.1600) 26 | 24,"tensor(0.0936, device='cuda:0')",0.0,tensor(96.8120),"tensor(0.8282, device='cuda:0')",tensor(81.7000) 27 | 25,"tensor(0.0946, device='cuda:0')",0.0,tensor(96.7540),"tensor(0.8808, device='cuda:0')",tensor(81.1200) 28 | 26,"tensor(0.0903, device='cuda:0')",0.0,tensor(96.9500),"tensor(0.8434, device='cuda:0')",tensor(82.0400) 29 | 27,"tensor(0.0870, device='cuda:0')",0.0,tensor(97.0700),"tensor(0.8061, device='cuda:0')",tensor(82.3100) 30 | 28,"tensor(0.0831, device='cuda:0')",0.0,tensor(97.1400),"tensor(0.7824, device='cuda:0')",tensor(82.5900) 31 | 29,"tensor(0.0821, device='cuda:0')",0.0,tensor(97.2800),"tensor(0.7807, device='cuda:0')",tensor(82.9600) 32 | -------------------------------------------------------------------------------- /results/decay=1e-5/log_EfficientNet_baseline_24.csv: -------------------------------------------------------------------------------- 1 | epoch,train loss,reg loss,train acc,test loss,test acc 2 | 0,"tensor(1.2443, device='cuda:0')",0.0,tensor(57.3480),"tensor(1.0896, device='cuda:0')",tensor(69.4200) 3 | 1,"tensor(0.8371, device='cuda:0')",0.0,tensor(72.3440),"tensor(0.8776, device='cuda:0')",tensor(72.6300) 4 | 2,"tensor(0.7215, device='cuda:0')",0.0,tensor(75.8100),"tensor(0.7997, device='cuda:0')",tensor(74.9500) 5 | 3,"tensor(0.6623, device='cuda:0')",0.0,tensor(77.9160),"tensor(0.7626, device='cuda:0')",tensor(74.9000) 6 | 4,"tensor(0.6108, device='cuda:0')",0.0,tensor(79.4600),"tensor(0.6899, device='cuda:0')",tensor(78.3100) 7 | 5,"tensor(0.5772, device='cuda:0')",0.0,tensor(80.5560),"tensor(0.6956, device='cuda:0')",tensor(77.9600) 8 | 6,"tensor(0.5470, device='cuda:0')",0.0,tensor(81.6640),"tensor(0.5464, device='cuda:0')",tensor(82.1200) 9 | 7,"tensor(0.5215, device='cuda:0')",0.0,tensor(82.3800),"tensor(0.6220, device='cuda:0')",tensor(79.6600) 10 | 8,"tensor(0.4999, device='cuda:0')",0.0,tensor(83.1800),"tensor(0.5817, device='cuda:0')",tensor(80.9500) 11 | 9,"tensor(0.4810, device='cuda:0')",0.0,tensor(83.7660),"tensor(0.5612, device='cuda:0')",tensor(81.9400) 12 | 10,"tensor(0.4606, device='cuda:0')",0.0,tensor(84.4180),"tensor(0.5870, device='cuda:0')",tensor(80.0800) 13 | 11,"tensor(0.4426, device='cuda:0')",0.0,tensor(85.0140),"tensor(0.6265, device='cuda:0')",tensor(80.0200) 14 | 12,"tensor(0.4267, device='cuda:0')",0.0,tensor(85.5280),"tensor(0.5357, device='cuda:0')",tensor(82.2200) 15 | 13,"tensor(0.4118, device='cuda:0')",0.0,tensor(85.8580),"tensor(0.5398, device='cuda:0')",tensor(82.6000) 16 | 14,"tensor(0.4024, device='cuda:0')",0.0,tensor(86.0660),"tensor(0.5989, device='cuda:0')",tensor(80.7400) 17 | 15,"tensor(0.3909, device='cuda:0')",0.0,tensor(86.6220),"tensor(0.5086, device='cuda:0')",tensor(83.2600) 18 | 16,"tensor(0.3736, device='cuda:0')",0.0,tensor(87.2380),"tensor(0.5118, device='cuda:0')",tensor(83.7200) 19 | 17,"tensor(0.3650, device='cuda:0')",0.0,tensor(87.5220),"tensor(0.5101, device='cuda:0')",tensor(83.6400) 20 | 18,"tensor(0.3528, device='cuda:0')",0.0,tensor(87.8760),"tensor(0.4698, device='cuda:0')",tensor(85.1200) 21 | 19,"tensor(0.3448, device='cuda:0')",0.0,tensor(88.1500),"tensor(0.5130, device='cuda:0')",tensor(83.4900) 22 | 20,"tensor(0.3308, device='cuda:0')",0.0,tensor(88.6460),"tensor(0.5559, device='cuda:0')",tensor(83.1100) 23 | 21,"tensor(0.3232, device='cuda:0')",0.0,tensor(88.9540),"tensor(0.5446, device='cuda:0')",tensor(82.6800) 24 | 22,"tensor(0.3126, device='cuda:0')",0.0,tensor(89.3020),"tensor(0.4592, device='cuda:0')",tensor(85.2100) 25 | 23,"tensor(0.3053, device='cuda:0')",0.0,tensor(89.3980),"tensor(0.5744, device='cuda:0')",tensor(82.6400) 26 | 24,"tensor(0.2983, device='cuda:0')",0.0,tensor(89.7100),"tensor(0.4908, device='cuda:0')",tensor(84.1000) 27 | 25,"tensor(0.2849, device='cuda:0')",0.0,tensor(90.1560),"tensor(0.5206, device='cuda:0')",tensor(83.9400) 28 | 26,"tensor(0.2834, device='cuda:0')",0.0,tensor(90.0800),"tensor(0.4873, device='cuda:0')",tensor(85.2400) 29 | 27,"tensor(0.2689, device='cuda:0')",0.0,tensor(90.5860),"tensor(0.4795, device='cuda:0')",tensor(85.2500) 30 | 28,"tensor(0.2666, device='cuda:0')",0.0,tensor(90.7340),"tensor(0.5007, device='cuda:0')",tensor(85.1100) 31 | 29,"tensor(0.2569, device='cuda:0')",0.0,tensor(91.1420),"tensor(0.5082, device='cuda:0')",tensor(84.2000) 32 | -------------------------------------------------------------------------------- /results/decay=1e-5/log_EfficientNet_batchboost_1.csv: -------------------------------------------------------------------------------- 1 | epoch,train loss,reg loss,train acc,test loss,test acc 2 | 0,"tensor(1.5489, device='cuda:0')",0.0,tensor(46.8700),"tensor(1.0325, device='cuda:0')",tensor(70.1200) 3 | 1,"tensor(1.2682, device='cuda:0')",0.0,tensor(57.2556),"tensor(0.7428, device='cuda:0')",tensor(75.8100) 4 | 2,"tensor(1.1674, device='cuda:0')",0.0,tensor(60.8273),"tensor(0.6898, device='cuda:0')",tensor(78.9600) 5 | 3,"tensor(1.1057, device='cuda:0')",0.0,tensor(63.5734),"tensor(0.6742, device='cuda:0')",tensor(78.3000) 6 | 4,"tensor(1.0625, device='cuda:0')",0.0,tensor(65.0731),"tensor(0.6306, device='cuda:0')",tensor(80.7000) 7 | 5,"tensor(1.0479, device='cuda:0')",0.0,tensor(65.2568),"tensor(0.6129, device='cuda:0')",tensor(81.7700) 8 | 6,"tensor(1.0239, device='cuda:0')",0.0,tensor(65.9444),"tensor(0.6028, device='cuda:0')",tensor(81.2800) 9 | 7,"tensor(0.9993, device='cuda:0')",0.0,tensor(66.7977),"tensor(0.6886, device='cuda:0')",tensor(79.3700) 10 | 8,"tensor(0.9763, device='cuda:0')",0.0,tensor(67.9500),"tensor(0.6598, device='cuda:0')",tensor(81.3100) 11 | 9,"tensor(0.9645, device='cuda:0')",0.0,tensor(68.0379),"tensor(0.7042, device='cuda:0')",tensor(79.8300) 12 | 10,"tensor(0.9515, device='cuda:0')",0.0,tensor(68.5777),"tensor(0.6745, device='cuda:0')",tensor(81.1900) 13 | 11,"tensor(0.9238, device='cuda:0')",0.0,tensor(69.6884),"tensor(0.8189, device='cuda:0')",tensor(78.4200) 14 | 12,"tensor(0.9414, device='cuda:0')",0.0,tensor(68.5351),"tensor(0.6701, device='cuda:0')",tensor(82.0300) 15 | 13,"tensor(0.9389, device='cuda:0')",0.0,tensor(68.8745),"tensor(0.5832, device='cuda:0')",tensor(83.6000) 16 | 14,"tensor(0.9155, device='cuda:0')",0.0,tensor(69.9621),"tensor(0.6897, device='cuda:0')",tensor(81.4700) 17 | 15,"tensor(0.9061, device='cuda:0')",0.0,tensor(70.3822),"tensor(0.6672, device='cuda:0')",tensor(82.3000) 18 | 16,"tensor(0.9020, device='cuda:0')",0.0,tensor(70.4629),"tensor(0.6488, device='cuda:0')",tensor(82.9000) 19 | 17,"tensor(0.8953, device='cuda:0')",0.0,tensor(70.4594),"tensor(0.6244, device='cuda:0')",tensor(82.6800) 20 | 18,"tensor(0.8896, device='cuda:0')",0.0,tensor(70.6783),"tensor(0.6720, device='cuda:0')",tensor(82.1500) 21 | 19,"tensor(0.8901, device='cuda:0')",0.0,tensor(70.6816),"tensor(0.6343, device='cuda:0')",tensor(83.1400) 22 | 20,"tensor(0.8715, device='cuda:0')",0.0,tensor(71.9698),"tensor(0.6502, device='cuda:0')",tensor(83.8300) 23 | 21,"tensor(0.8699, device='cuda:0')",0.0,tensor(71.6310),"tensor(0.6583, device='cuda:0')",tensor(82.7900) 24 | 22,"tensor(0.8638, device='cuda:0')",0.0,tensor(71.6807),"tensor(0.6244, device='cuda:0')",tensor(83.9400) 25 | 23,"tensor(0.8714, device='cuda:0')",0.0,tensor(71.4544),"tensor(0.7419, device='cuda:0')",tensor(82.3300) 26 | 24,"tensor(0.8673, device='cuda:0')",0.0,tensor(71.8646),"tensor(0.7027, device='cuda:0')",tensor(82.9300) 27 | 25,"tensor(0.8617, device='cuda:0')",0.0,tensor(71.8118),"tensor(0.6836, device='cuda:0')",tensor(82.6300) 28 | 26,"tensor(0.8752, device='cuda:0')",0.0,tensor(71.3329),"tensor(0.6698, device='cuda:0')",tensor(83.4400) 29 | 27,"tensor(0.8760, device='cuda:0')",0.0,tensor(71.2223),"tensor(0.6813, device='cuda:0')",tensor(83.0900) 30 | 28,"tensor(0.8627, device='cuda:0')",0.0,tensor(72.0735),"tensor(0.6109, device='cuda:0')",tensor(83.7200) 31 | 29,"tensor(0.8564, device='cuda:0')",0.0,tensor(71.9139),"tensor(0.6498, device='cuda:0')",tensor(83.1300) 32 | -------------------------------------------------------------------------------- /results/decay=1e-5/log_EfficientNet_batchboost_2.csv: -------------------------------------------------------------------------------- 1 | epoch,train loss,reg loss,train acc,test loss,test acc 2 | 0,"tensor(1.6238, device='cuda:0')",0.0,tensor(43.7457),"tensor(1.0339, device='cuda:0')",tensor(68.1200) 3 | 1,"tensor(1.3345, device='cuda:0')",0.0,tensor(54.3368),"tensor(0.9366, device='cuda:0')",tensor(70.6600) 4 | 2,"tensor(1.2593, device='cuda:0')",0.0,tensor(56.6156),"tensor(0.6572, device='cuda:0')",tensor(78.8400) 5 | 3,"tensor(1.2066, device='cuda:0')",0.0,tensor(58.5272),"tensor(0.6226, device='cuda:0')",tensor(79.5300) 6 | 4,"tensor(1.1670, device='cuda:0')",0.0,tensor(60.1677),"tensor(0.6359, device='cuda:0')",tensor(79.3300) 7 | 5,"tensor(1.1355, device='cuda:0')",0.0,tensor(61.4253),"tensor(0.6235, device='cuda:0')",tensor(81.0400) 8 | 6,"tensor(1.1357, device='cuda:0')",0.0,tensor(60.6668),"tensor(0.5750, device='cuda:0')",tensor(82.2400) 9 | 7,"tensor(1.1101, device='cuda:0')",0.0,tensor(61.9495),"tensor(0.5551, device='cuda:0')",tensor(82.6200) 10 | 8,"tensor(1.0871, device='cuda:0')",0.0,tensor(62.7897),"tensor(0.5791, device='cuda:0')",tensor(81.8900) 11 | 9,"tensor(1.0692, device='cuda:0')",0.0,tensor(63.2579),"tensor(0.6077, device='cuda:0')",tensor(81.1100) 12 | 10,"tensor(1.0782, device='cuda:0')",0.0,tensor(62.9700),"tensor(0.5328, device='cuda:0')",tensor(83.2500) 13 | 11,"tensor(1.0514, device='cuda:0')",0.0,tensor(64.1702),"tensor(0.5064, device='cuda:0')",tensor(83.9100) 14 | 12,"tensor(1.0488, device='cuda:0')",0.0,tensor(63.8546),"tensor(0.5531, device='cuda:0')",tensor(83.2200) 15 | 13,"tensor(1.0729, device='cuda:0')",0.0,tensor(62.7652),"tensor(0.5439, device='cuda:0')",tensor(82.8400) 16 | 14,"tensor(1.0357, device='cuda:0')",0.0,tensor(64.4369),"tensor(0.5264, device='cuda:0')",tensor(83.7300) 17 | 15,"tensor(1.0342, device='cuda:0')",0.0,tensor(64.5673),"tensor(0.5263, device='cuda:0')",tensor(83.7500) 18 | 16,"tensor(1.0292, device='cuda:0')",0.0,tensor(64.6994),"tensor(0.5558, device='cuda:0')",tensor(83.1900) 19 | 17,"tensor(1.0299, device='cuda:0')",0.0,tensor(64.6908),"tensor(0.5957, device='cuda:0')",tensor(82.6200) 20 | 18,"tensor(1.0225, device='cuda:0')",0.0,tensor(64.9278),"tensor(0.6015, device='cuda:0')",tensor(82.0700) 21 | 19,"tensor(1.0114, device='cuda:0')",0.0,tensor(65.4504),"tensor(0.5689, device='cuda:0')",tensor(82.4400) 22 | 20,"tensor(1.0239, device='cuda:0')",0.0,tensor(64.8615),"tensor(0.5855, device='cuda:0')",tensor(82.6400) 23 | 21,"tensor(1.0217, device='cuda:0')",0.0,tensor(64.8218),"tensor(0.5125, device='cuda:0')",tensor(84.2600) 24 | 22,"tensor(0.9914, device='cuda:0')",0.0,tensor(66.0869),"tensor(0.4711, device='cuda:0')",tensor(85.5200) 25 | 23,"tensor(1.0080, device='cuda:0')",0.0,tensor(65.1572),"tensor(0.5901, device='cuda:0')",tensor(82.2200) 26 | 24,"tensor(0.9865, device='cuda:0')",0.0,tensor(66.1673),"tensor(0.6213, device='cuda:0')",tensor(84.8400) 27 | 25,"tensor(0.9787, device='cuda:0')",0.0,tensor(66.4097),"tensor(0.5190, device='cuda:0')",tensor(84.2700) 28 | 26,"tensor(0.9911, device='cuda:0')",0.0,tensor(66.1076),"tensor(0.5294, device='cuda:0')",tensor(83.2000) 29 | 27,"tensor(0.9689, device='cuda:0')",0.0,tensor(67.0958),"tensor(0.5068, device='cuda:0')",tensor(84.8300) 30 | 28,"tensor(0.9989, device='cuda:0')",0.0,tensor(65.1615),"tensor(0.5430, device='cuda:0')",tensor(83.9000) 31 | 29,"tensor(0.9745, device='cuda:0')",0.0,tensor(66.6269),"tensor(0.5172, device='cuda:0')",tensor(84.7300) 32 | -------------------------------------------------------------------------------- /results/decay=1e-5/log_EfficientNet_batchboost_3.csv: -------------------------------------------------------------------------------- 1 | epoch,train loss,reg loss,train acc,test loss,test acc 2 | 0,"tensor(1.5592, device='cuda:0')",0.0,tensor(47.6954),"tensor(1.0072, device='cuda:0')",tensor(69.8600) 3 | 1,"tensor(1.2895, device='cuda:0')",0.0,tensor(57.9795),"tensor(1.0657, device='cuda:0')",tensor(66.3500) 4 | 2,"tensor(1.1896, device='cuda:0')",0.0,tensor(61.5622),"tensor(0.7285, device='cuda:0')",tensor(76.8400) 5 | 3,"tensor(1.1315, device='cuda:0')",0.0,tensor(63.4467),"tensor(0.7342, device='cuda:0')",tensor(77.6600) 6 | 4,"tensor(1.0813, device='cuda:0')",0.0,tensor(65.5488),"tensor(0.6676, device='cuda:0')",tensor(80.4200) 7 | 5,"tensor(1.0439, device='cuda:0')",0.0,tensor(67.3616),"tensor(0.6663, device='cuda:0')",tensor(80.1300) 8 | 6,"tensor(1.0427, device='cuda:0')",0.0,tensor(67.1914),"tensor(0.6639, device='cuda:0')",tensor(80.0600) 9 | 7,"tensor(1.0141, device='cuda:0')",0.0,tensor(67.3071),"tensor(0.6258, device='cuda:0')",tensor(82.3600) 10 | 8,"tensor(0.9851, device='cuda:0')",0.0,tensor(68.5185),"tensor(0.6693, device='cuda:0')",tensor(80.8600) 11 | 9,"tensor(0.9848, device='cuda:0')",0.0,tensor(69.1618),"tensor(0.6606, device='cuda:0')",tensor(81.1300) 12 | 10,"tensor(0.9466, device='cuda:0')",0.0,tensor(69.9205),"tensor(0.7316, device='cuda:0')",tensor(80.4600) 13 | 11,"tensor(0.9591, device='cuda:0')",0.0,tensor(69.1466),"tensor(0.6989, device='cuda:0')",tensor(81.5800) 14 | 12,"tensor(0.9271, device='cuda:0')",0.0,tensor(71.0775),"tensor(0.6770, device='cuda:0')",tensor(81.7100) 15 | 13,"tensor(0.9348, device='cuda:0')",0.0,tensor(71.0280),"tensor(0.7440, device='cuda:0')",tensor(79.7700) 16 | 14,"tensor(0.9291, device='cuda:0')",0.0,tensor(70.5768),"tensor(0.6676, device='cuda:0')",tensor(82.1900) 17 | 15,"tensor(0.9004, device='cuda:0')",0.0,tensor(71.5198),"tensor(0.6743, device='cuda:0')",tensor(82.0400) 18 | 16,"tensor(0.9146, device='cuda:0')",0.0,tensor(71.4545),"tensor(0.6879, device='cuda:0')",tensor(81.6000) 19 | 17,"tensor(0.9019, device='cuda:0')",0.0,tensor(71.4323),"tensor(0.7090, device='cuda:0')",tensor(81.7600) 20 | 18,"tensor(0.8996, device='cuda:0')",0.0,tensor(71.2211),"tensor(0.6541, device='cuda:0')",tensor(82.9500) 21 | 19,"tensor(0.8976, device='cuda:0')",0.0,tensor(71.3187),"tensor(0.7200, device='cuda:0')",tensor(82.4200) 22 | 20,"tensor(0.8834, device='cuda:0')",0.0,tensor(72.5813),"tensor(0.6829, device='cuda:0')",tensor(82.7700) 23 | 21,"tensor(0.8742, device='cuda:0')",0.0,tensor(72.3386),"tensor(0.6557, device='cuda:0')",tensor(84.1600) 24 | 22,"tensor(0.8692, device='cuda:0')",0.0,tensor(73.2286),"tensor(0.6941, device='cuda:0')",tensor(82.3000) 25 | 23,"tensor(0.8712, device='cuda:0')",0.0,tensor(72.0759),"tensor(0.7901, device='cuda:0')",tensor(80.2200) 26 | 24,"tensor(0.8599, device='cuda:0')",0.0,tensor(73.2842),"tensor(0.7298, device='cuda:0')",tensor(81.8300) 27 | 25,"tensor(0.8663, device='cuda:0')",0.0,tensor(72.8395),"tensor(0.7081, device='cuda:0')",tensor(81.4800) 28 | 26,"tensor(0.8650, device='cuda:0')",0.0,tensor(72.3468),"tensor(0.7650, device='cuda:0')",tensor(80.4900) 29 | 27,"tensor(0.8632, device='cuda:0')",0.0,tensor(72.6353),"tensor(0.6907, device='cuda:0')",tensor(82.5700) 30 | 28,"tensor(0.8460, device='cuda:0')",0.0,tensor(73.5868),"tensor(0.6731, device='cuda:0')",tensor(83.1100) 31 | 29,"tensor(0.8814, device='cuda:0')",0.0,tensor(72.5378),"tensor(0.7574, device='cuda:0')",tensor(82.3700) 32 | -------------------------------------------------------------------------------- /results/decay=1e-5/log_EfficientNet_batchboost_4.csv: -------------------------------------------------------------------------------- 1 | epoch,train loss,reg loss,train acc,test loss,test acc 2 | 0,"tensor(1.6433, device='cuda:0')",0.0,tensor(43.3735),"tensor(1.2930, device='cuda:0')",tensor(64.3100) 3 | 1,"tensor(1.3486, device='cuda:0')",0.0,tensor(54.5961),"tensor(0.8134, device='cuda:0')",tensor(74.3800) 4 | 2,"tensor(1.2502, device='cuda:0')",0.0,tensor(57.8238),"tensor(0.8502, device='cuda:0')",tensor(73.4500) 5 | 3,"tensor(1.2160, device='cuda:0')",0.0,tensor(59.2377),"tensor(0.7695, device='cuda:0')",tensor(75.6300) 6 | 4,"tensor(1.1928, device='cuda:0')",0.0,tensor(59.9123),"tensor(0.6782, device='cuda:0')",tensor(78.4400) 7 | 5,"tensor(1.1497, device='cuda:0')",0.0,tensor(61.3737),"tensor(0.7590, device='cuda:0')",tensor(76.6500) 8 | 6,"tensor(1.1221, device='cuda:0')",0.0,tensor(62.2608),"tensor(0.5987, device='cuda:0')",tensor(80.9100) 9 | 7,"tensor(1.0959, device='cuda:0')",0.0,tensor(63.0191),"tensor(0.5961, device='cuda:0')",tensor(82.1600) 10 | 8,"tensor(1.0877, device='cuda:0')",0.0,tensor(62.8546),"tensor(0.6956, device='cuda:0')",tensor(81.2600) 11 | 9,"tensor(1.0709, device='cuda:0')",0.0,tensor(63.7188),"tensor(0.6949, device='cuda:0')",tensor(79.6000) 12 | 10,"tensor(1.0627, device='cuda:0')",0.0,tensor(64.1292),"tensor(0.6470, device='cuda:0')",tensor(81.0400) 13 | 11,"tensor(1.0550, device='cuda:0')",0.0,tensor(64.2661),"tensor(0.5805, device='cuda:0')",tensor(82.6400) 14 | 12,"tensor(1.0621, device='cuda:0')",0.0,tensor(63.8284),"tensor(0.6214, device='cuda:0')",tensor(82.5600) 15 | 13,"tensor(1.0382, device='cuda:0')",0.0,tensor(64.9477),"tensor(0.6153, device='cuda:0')",tensor(82.1300) 16 | 14,"tensor(1.0352, device='cuda:0')",0.0,tensor(64.9522),"tensor(0.7908, device='cuda:0')",tensor(81.0800) 17 | 15,"tensor(1.0338, device='cuda:0')",0.0,tensor(64.9852),"tensor(0.5995, device='cuda:0')",tensor(83.2200) 18 | 16,"tensor(1.0070, device='cuda:0')",0.0,tensor(65.8552),"tensor(0.7508, device='cuda:0')",tensor(83.) 19 | 17,"tensor(1.0091, device='cuda:0')",0.0,tensor(65.8721),"tensor(0.5977, device='cuda:0')",tensor(83.7400) 20 | 18,"tensor(0.9970, device='cuda:0')",0.0,tensor(66.2594),"tensor(0.6023, device='cuda:0')",tensor(84.2600) 21 | 19,"tensor(1.0157, device='cuda:0')",0.0,tensor(65.6327),"tensor(0.6868, device='cuda:0')",tensor(83.2400) 22 | 20,"tensor(1.0010, device='cuda:0')",0.0,tensor(65.8058),"tensor(0.5992, device='cuda:0')",tensor(84.3700) 23 | 21,"tensor(0.9925, device='cuda:0')",0.0,tensor(66.2249),"tensor(0.5742, device='cuda:0')",tensor(84.0400) 24 | 22,"tensor(0.9776, device='cuda:0')",0.0,tensor(66.9663),"tensor(0.5625, device='cuda:0')",tensor(85.3400) 25 | 23,"tensor(0.9731, device='cuda:0')",0.0,tensor(66.9961),"tensor(0.6398, device='cuda:0')",tensor(82.9700) 26 | 24,"tensor(0.9685, device='cuda:0')",0.0,tensor(67.0790),"tensor(0.6912, device='cuda:0')",tensor(83.5300) 27 | 25,"tensor(0.9792, device='cuda:0')",0.0,tensor(66.3659),"tensor(0.5675, device='cuda:0')",tensor(84.5900) 28 | 26,"tensor(0.9639, device='cuda:0')",0.0,tensor(66.8584),"tensor(0.6594, device='cuda:0')",tensor(82.6600) 29 | 27,"tensor(0.9792, device='cuda:0')",0.0,tensor(66.7141),"tensor(0.6313, device='cuda:0')",tensor(84.2700) 30 | 28,"tensor(0.9402, device='cuda:0')",0.0,tensor(68.3393),"tensor(0.7023, device='cuda:0')",tensor(84.2200) 31 | 29,"tensor(0.9429, device='cuda:0')",0.0,tensor(68.2936),"tensor(0.5501, device='cuda:0')",tensor(83.8200) 32 | -------------------------------------------------------------------------------- /results/decay=1e-5/log_EfficientNet_mixup_1.csv: -------------------------------------------------------------------------------- 1 | epoch,train loss,reg loss,train acc,test loss,test acc 2 | 0,"tensor(1.8401, device='cuda:0')",0.0,tensor(38.9253),"tensor(1.2284, device='cuda:0')",tensor(59.7100) 3 | 1,"tensor(1.5538, device='cuda:0')",0.0,tensor(51.7325),"tensor(0.9910, device='cuda:0')",tensor(67.6500) 4 | 2,"tensor(1.4499, device='cuda:0')",0.0,tensor(55.7960),"tensor(0.9132, device='cuda:0')",tensor(71.8100) 5 | 3,"tensor(1.3914, device='cuda:0')",0.0,tensor(57.5044),"tensor(0.9560, device='cuda:0')",tensor(69.8500) 6 | 4,"tensor(1.3419, device='cuda:0')",0.0,tensor(59.3992),"tensor(0.8385, device='cuda:0')",tensor(75.3200) 7 | 5,"tensor(1.3383, device='cuda:0')",0.0,tensor(59.3667),"tensor(0.7956, device='cuda:0')",tensor(77.3500) 8 | 6,"tensor(1.2864, device='cuda:0')",0.0,tensor(61.4852),"tensor(0.7863, device='cuda:0')",tensor(77.5400) 9 | 7,"tensor(1.2726, device='cuda:0')",0.0,tensor(61.9529),"tensor(0.6963, device='cuda:0')",tensor(80.4200) 10 | 8,"tensor(1.2563, device='cuda:0')",0.0,tensor(62.3388),"tensor(0.7173, device='cuda:0')",tensor(80.2200) 11 | 9,"tensor(1.2053, device='cuda:0')",0.0,tensor(63.9532),"tensor(0.6878, device='cuda:0')",tensor(80.1900) 12 | 10,"tensor(1.1979, device='cuda:0')",0.0,tensor(64.5873),"tensor(0.7827, device='cuda:0')",tensor(78.1600) 13 | 11,"tensor(1.1863, device='cuda:0')",0.0,tensor(64.5691),"tensor(0.6630, device='cuda:0')",tensor(81.8200) 14 | 12,"tensor(1.1745, device='cuda:0')",0.0,tensor(65.2032),"tensor(0.7188, device='cuda:0')",tensor(80.1400) 15 | 13,"tensor(1.1348, device='cuda:0')",0.0,tensor(66.7967),"tensor(0.6287, device='cuda:0')",tensor(82.4100) 16 | 14,"tensor(1.1840, device='cuda:0')",0.0,tensor(64.6255),"tensor(0.6893, device='cuda:0')",tensor(81.7900) 17 | 15,"tensor(1.1713, device='cuda:0')",0.0,tensor(65.3257),"tensor(0.6450, device='cuda:0')",tensor(82.2600) 18 | 16,"tensor(1.1160, device='cuda:0')",0.0,tensor(67.3888),"tensor(0.6603, device='cuda:0')",tensor(81.3100) 19 | 17,"tensor(1.1688, device='cuda:0')",0.0,tensor(65.2723),"tensor(0.6629, device='cuda:0')",tensor(82.1300) 20 | 18,"tensor(1.1515, device='cuda:0')",0.0,tensor(65.9117),"tensor(0.6897, device='cuda:0')",tensor(81.3000) 21 | 19,"tensor(1.1372, device='cuda:0')",0.0,tensor(66.8916),"tensor(0.7103, device='cuda:0')",tensor(81.3300) 22 | 20,"tensor(1.1010, device='cuda:0')",0.0,tensor(67.7973),"tensor(0.7031, device='cuda:0')",tensor(80.9700) 23 | 21,"tensor(1.1538, device='cuda:0')",0.0,tensor(65.6317),"tensor(0.7909, device='cuda:0')",tensor(76.7500) 24 | 22,"tensor(1.1011, device='cuda:0')",0.0,tensor(67.7370),"tensor(0.6757, device='cuda:0')",tensor(81.4900) 25 | 23,"tensor(1.1160, device='cuda:0')",0.0,tensor(67.2205),"tensor(0.6688, device='cuda:0')",tensor(82.1600) 26 | 24,"tensor(1.1190, device='cuda:0')",0.0,tensor(67.3427),"tensor(0.6924, device='cuda:0')",tensor(81.7000) 27 | 25,"tensor(1.1283, device='cuda:0')",0.0,tensor(66.5068),"tensor(0.6460, device='cuda:0')",tensor(82.2600) 28 | 26,"tensor(1.1064, device='cuda:0')",0.0,tensor(66.7692),"tensor(0.6949, device='cuda:0')",tensor(80.3300) 29 | 27,"tensor(1.0782, device='cuda:0')",0.0,tensor(68.4394),"tensor(0.6394, device='cuda:0')",tensor(83.2100) 30 | 28,"tensor(1.0723, device='cuda:0')",0.0,tensor(69.0931),"tensor(0.6562, device='cuda:0')",tensor(82.2100) 31 | 29,"tensor(1.0710, device='cuda:0')",0.0,tensor(68.6733),"tensor(0.6450, device='cuda:0')",tensor(82.4100) 32 | -------------------------------------------------------------------------------- /results/decay=1e-5/log_EfficientNet_mixup_2.csv: -------------------------------------------------------------------------------- 1 | epoch,train loss,reg loss,train acc,test loss,test acc 2 | 0,"tensor(1.8520, device='cuda:0')",0.0,tensor(38.1330),"tensor(1.1646, device='cuda:0')",tensor(62.2000) 3 | 1,"tensor(1.5934, device='cuda:0')",0.0,tensor(50.0632),"tensor(1.2364, device='cuda:0')",tensor(61.6900) 4 | 2,"tensor(1.5097, device='cuda:0')",0.0,tensor(53.3947),"tensor(0.8179, device='cuda:0')",tensor(75.3000) 5 | 3,"tensor(1.4606, device='cuda:0')",0.0,tensor(55.3491),"tensor(0.8479, device='cuda:0')",tensor(74.7500) 6 | 4,"tensor(1.4019, device='cuda:0')",0.0,tensor(56.9812),"tensor(0.7395, device='cuda:0')",tensor(77.9400) 7 | 5,"tensor(1.3898, device='cuda:0')",0.0,tensor(57.3724),"tensor(0.8446, device='cuda:0')",tensor(74.3800) 8 | 6,"tensor(1.3492, device='cuda:0')",0.0,tensor(58.9751),"tensor(0.8912, device='cuda:0')",tensor(74.4000) 9 | 7,"tensor(1.3546, device='cuda:0')",0.0,tensor(58.6492),"tensor(0.7657, device='cuda:0')",tensor(77.7300) 10 | 8,"tensor(1.3673, device='cuda:0')",0.0,tensor(58.3711),"tensor(0.7878, device='cuda:0')",tensor(76.7400) 11 | 9,"tensor(1.3872, device='cuda:0')",0.0,tensor(57.2477),"tensor(0.8376, device='cuda:0')",tensor(74.4900) 12 | 10,"tensor(1.3261, device='cuda:0')",0.0,tensor(59.6321),"tensor(0.6846, device='cuda:0')",tensor(79.8800) 13 | 11,"tensor(1.3214, device='cuda:0')",0.0,tensor(59.6932),"tensor(0.6900, device='cuda:0')",tensor(79.9700) 14 | 12,"tensor(1.3452, device='cuda:0')",0.0,tensor(58.7390),"tensor(0.7950, device='cuda:0')",tensor(76.3100) 15 | 13,"tensor(1.2936, device='cuda:0')",0.0,tensor(60.6500),"tensor(0.7583, device='cuda:0')",tensor(78.3400) 16 | 14,"tensor(1.3206, device='cuda:0')",0.0,tensor(59.7003),"tensor(0.7124, device='cuda:0')",tensor(80.5700) 17 | 15,"tensor(1.3420, device='cuda:0')",0.0,tensor(58.6945),"tensor(0.7584, device='cuda:0')",tensor(77.7100) 18 | 16,"tensor(1.3114, device='cuda:0')",0.0,tensor(59.9868),"tensor(0.8013, device='cuda:0')",tensor(75.6700) 19 | 17,"tensor(1.2843, device='cuda:0')",0.0,tensor(60.8332),"tensor(0.6785, device='cuda:0')",tensor(81.8400) 20 | 18,"tensor(1.3101, device='cuda:0')",0.0,tensor(59.7496),"tensor(0.7049, device='cuda:0')",tensor(81.2700) 21 | 19,"tensor(1.3010, device='cuda:0')",0.0,tensor(60.2414),"tensor(0.7181, device='cuda:0')",tensor(80.5100) 22 | 20,"tensor(1.2733, device='cuda:0')",0.0,tensor(61.4875),"tensor(0.6511, device='cuda:0')",tensor(82.2900) 23 | 21,"tensor(1.2929, device='cuda:0')",0.0,tensor(60.6969),"tensor(0.7355, device='cuda:0')",tensor(78.9600) 24 | 22,"tensor(1.2743, device='cuda:0')",0.0,tensor(61.4696),"tensor(0.7093, device='cuda:0')",tensor(81.5200) 25 | 23,"tensor(1.2770, device='cuda:0')",0.0,tensor(61.1682),"tensor(0.7249, device='cuda:0')",tensor(79.7100) 26 | 24,"tensor(1.2967, device='cuda:0')",0.0,tensor(60.5049),"tensor(0.7013, device='cuda:0')",tensor(81.3100) 27 | 25,"tensor(1.2891, device='cuda:0')",0.0,tensor(60.6730),"tensor(0.7856, device='cuda:0')",tensor(78.5100) 28 | 26,"tensor(1.2615, device='cuda:0')",0.0,tensor(61.9244),"tensor(0.7153, device='cuda:0')",tensor(80.0600) 29 | 27,"tensor(1.2511, device='cuda:0')",0.0,tensor(61.9265),"tensor(0.6894, device='cuda:0')",tensor(81.4500) 30 | 28,"tensor(1.2783, device='cuda:0')",0.0,tensor(61.2908),"tensor(0.7402, device='cuda:0')",tensor(79.2100) 31 | 29,"tensor(1.2632, device='cuda:0')",0.0,tensor(61.6038),"tensor(0.7070, device='cuda:0')",tensor(80.5200) 32 | -------------------------------------------------------------------------------- /results/decay=1e-5/log_EfficientNet_mixup_3.csv: -------------------------------------------------------------------------------- 1 | epoch,train loss,reg loss,train acc,test loss,test acc 2 | 0,"tensor(1.5852, device='cuda:0')",0.0,tensor(49.3322),"tensor(0.9136, device='cuda:0')",tensor(72.1700) 3 | 1,"tensor(1.2562, device='cuda:0')",0.0,tensor(63.1246),"tensor(0.7735, device='cuda:0')",tensor(75.3300) 4 | 2,"tensor(1.1676, device='cuda:0')",0.0,tensor(66.3281),"tensor(0.7485, device='cuda:0')",tensor(76.2400) 5 | 3,"tensor(1.1023, device='cuda:0')",0.0,tensor(68.3585),"tensor(0.8598, device='cuda:0')",tensor(73.6100) 6 | 4,"tensor(1.0596, device='cuda:0')",0.0,tensor(69.9488),"tensor(0.6650, device='cuda:0')",tensor(79.8500) 7 | 5,"tensor(1.0365, device='cuda:0')",0.0,tensor(70.5446),"tensor(0.7256, device='cuda:0')",tensor(78.5300) 8 | 6,"tensor(0.9986, device='cuda:0')",0.0,tensor(71.8226),"tensor(0.6412, device='cuda:0')",tensor(80.4700) 9 | 7,"tensor(0.9505, device='cuda:0')",0.0,tensor(73.5068),"tensor(0.6535, device='cuda:0')",tensor(80.2000) 10 | 8,"tensor(0.9408, device='cuda:0')",0.0,tensor(73.3418),"tensor(0.6131, device='cuda:0')",tensor(81.1600) 11 | 9,"tensor(0.9354, device='cuda:0')",0.0,tensor(73.9009),"tensor(0.5872, device='cuda:0')",tensor(82.3400) 12 | 10,"tensor(0.9187, device='cuda:0')",0.0,tensor(74.3704),"tensor(0.6324, device='cuda:0')",tensor(80.2100) 13 | 11,"tensor(0.9036, device='cuda:0')",0.0,tensor(74.8706),"tensor(0.6270, device='cuda:0')",tensor(80.6800) 14 | 12,"tensor(0.9332, device='cuda:0')",0.0,tensor(74.1035),"tensor(0.6375, device='cuda:0')",tensor(82.4800) 15 | 13,"tensor(0.8700, device='cuda:0')",0.0,tensor(75.8445),"tensor(0.6200, device='cuda:0')",tensor(81.8900) 16 | 14,"tensor(0.8629, device='cuda:0')",0.0,tensor(76.1854),"tensor(0.6110, device='cuda:0')",tensor(82.0400) 17 | 15,"tensor(0.8222, device='cuda:0')",0.0,tensor(77.5554),"tensor(0.5758, device='cuda:0')",tensor(82.9600) 18 | 16,"tensor(0.8364, device='cuda:0')",0.0,tensor(77.2764),"tensor(0.6242, device='cuda:0')",tensor(81.4800) 19 | 17,"tensor(0.8431, device='cuda:0')",0.0,tensor(77.0262),"tensor(0.6181, device='cuda:0')",tensor(81.6900) 20 | 18,"tensor(0.8176, device='cuda:0')",0.0,tensor(77.7661),"tensor(0.6617, device='cuda:0')",tensor(79.7000) 21 | 19,"tensor(0.7869, device='cuda:0')",0.0,tensor(78.5704),"tensor(0.5973, device='cuda:0')",tensor(83.1200) 22 | 20,"tensor(0.8909, device='cuda:0')",0.0,tensor(75.0715),"tensor(0.6385, device='cuda:0')",tensor(81.9800) 23 | 21,"tensor(0.8334, device='cuda:0')",0.0,tensor(77.2383),"tensor(0.5729, device='cuda:0')",tensor(83.1800) 24 | 22,"tensor(0.8060, device='cuda:0')",0.0,tensor(78.0292),"tensor(0.5911, device='cuda:0')",tensor(83.3900) 25 | 23,"tensor(0.8087, device='cuda:0')",0.0,tensor(78.1002),"tensor(0.6059, device='cuda:0')",tensor(82.7800) 26 | 24,"tensor(0.7846, device='cuda:0')",0.0,tensor(78.7637),"tensor(0.5727, device='cuda:0')",tensor(83.8000) 27 | 25,"tensor(0.7515, device='cuda:0')",0.0,tensor(79.7625),"tensor(0.5971, device='cuda:0')",tensor(83.2400) 28 | 26,"tensor(0.7517, device='cuda:0')",0.0,tensor(79.7170),"tensor(0.5974, device='cuda:0')",tensor(82.0700) 29 | 27,"tensor(0.7605, device='cuda:0')",0.0,tensor(79.5556),"tensor(0.5745, device='cuda:0')",tensor(83.7200) 30 | 28,"tensor(0.7874, device='cuda:0')",0.0,tensor(78.7155),"tensor(0.6164, device='cuda:0')",tensor(83.2200) 31 | 29,"tensor(0.7087, device='cuda:0')",0.0,tensor(81.2600),"tensor(0.5618, device='cuda:0')",tensor(83.8500) 32 | -------------------------------------------------------------------------------- /results/decay=1e-5/log_EfficientNet_mixup_4.csv: -------------------------------------------------------------------------------- 1 | epoch,train loss,reg loss,train acc,test loss,test acc 2 | 0,"tensor(1.7396, device='cuda:0')",0.0,tensor(42.0760),"tensor(1.3003, device='cuda:0')",tensor(59.5700) 3 | 1,"tensor(1.4385, device='cuda:0')",0.0,tensor(56.0643),"tensor(1.0127, device='cuda:0')",tensor(67.5900) 4 | 2,"tensor(1.3099, device='cuda:0')",0.0,tensor(60.8499),"tensor(0.8299, device='cuda:0')",tensor(72.1900) 5 | 3,"tensor(1.2170, device='cuda:0')",0.0,tensor(63.8931),"tensor(0.7399, device='cuda:0')",tensor(77.3000) 6 | 4,"tensor(1.1810, device='cuda:0')",0.0,tensor(64.8712),"tensor(0.7139, device='cuda:0')",tensor(78.7800) 7 | 5,"tensor(1.1764, device='cuda:0')",0.0,tensor(65.1266),"tensor(0.6911, device='cuda:0')",tensor(79.9600) 8 | 6,"tensor(1.1081, device='cuda:0')",0.0,tensor(67.2872),"tensor(0.7118, device='cuda:0')",tensor(77.5100) 9 | 7,"tensor(1.1507, device='cuda:0')",0.0,tensor(65.9127),"tensor(0.7807, device='cuda:0')",tensor(75.6400) 10 | 8,"tensor(1.1161, device='cuda:0')",0.0,tensor(66.8880),"tensor(0.6945, device='cuda:0')",tensor(78.5200) 11 | 9,"tensor(1.0964, device='cuda:0')",0.0,tensor(67.6024),"tensor(0.6623, device='cuda:0')",tensor(80.1500) 12 | 10,"tensor(1.1009, device='cuda:0')",0.0,tensor(67.1994),"tensor(0.6494, device='cuda:0')",tensor(80.6700) 13 | 11,"tensor(1.1054, device='cuda:0')",0.0,tensor(67.0964),"tensor(0.7119, device='cuda:0')",tensor(78.2400) 14 | 12,"tensor(1.0592, device='cuda:0')",0.0,tensor(68.9080),"tensor(0.6337, device='cuda:0')",tensor(80.5600) 15 | 13,"tensor(1.0479, device='cuda:0')",0.0,tensor(69.0810),"tensor(0.6223, device='cuda:0')",tensor(81.3000) 16 | 14,"tensor(1.0443, device='cuda:0')",0.0,tensor(68.9712),"tensor(0.6930, device='cuda:0')",tensor(78.9500) 17 | 15,"tensor(1.0767, device='cuda:0')",0.0,tensor(68.1365),"tensor(0.6540, device='cuda:0')",tensor(80.5200) 18 | 16,"tensor(1.0659, device='cuda:0')",0.0,tensor(68.1313),"tensor(0.6185, device='cuda:0')",tensor(81.2800) 19 | 17,"tensor(1.0341, device='cuda:0')",0.0,tensor(69.6075),"tensor(0.6211, device='cuda:0')",tensor(80.8700) 20 | 18,"tensor(1.0334, device='cuda:0')",0.0,tensor(69.8162),"tensor(0.5778, device='cuda:0')",tensor(82.3300) 21 | 19,"tensor(1.0137, device='cuda:0')",0.0,tensor(69.9117),"tensor(0.6499, device='cuda:0')",tensor(81.0500) 22 | 20,"tensor(1.0188, device='cuda:0')",0.0,tensor(69.6947),"tensor(0.5940, device='cuda:0')",tensor(82.2600) 23 | 21,"tensor(0.9738, device='cuda:0')",0.0,tensor(71.7237),"tensor(0.6454, device='cuda:0')",tensor(81.3700) 24 | 22,"tensor(1.0088, device='cuda:0')",0.0,tensor(70.4746),"tensor(0.6181, device='cuda:0')",tensor(81.5900) 25 | 23,"tensor(1.0384, device='cuda:0')",0.0,tensor(69.2434),"tensor(0.6176, device='cuda:0')",tensor(82.4300) 26 | 24,"tensor(0.9576, device='cuda:0')",0.0,tensor(71.7219),"tensor(0.5998, device='cuda:0')",tensor(82.6100) 27 | 25,"tensor(0.9787, device='cuda:0')",0.0,tensor(71.2283),"tensor(0.5762, device='cuda:0')",tensor(82.7300) 28 | 26,"tensor(0.9771, device='cuda:0')",0.0,tensor(71.1901),"tensor(0.5692, device='cuda:0')",tensor(83.8800) 29 | 27,"tensor(0.9588, device='cuda:0')",0.0,tensor(71.6358),"tensor(0.6037, device='cuda:0')",tensor(82.) 30 | 28,"tensor(0.9769, device='cuda:0')",0.0,tensor(71.1616),"tensor(0.6231, device='cuda:0')",tensor(81.2100) 31 | 29,"tensor(0.9710, device='cuda:0')",0.0,tensor(71.7734),"tensor(0.5814, device='cuda:0')",tensor(83.0300) 32 | -------------------------------------------------------------------------------- /results/decay=1e-5/loss-test-with-augment-.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-5/loss-test-with-augment-.pdf -------------------------------------------------------------------------------- /results/decay=1e-5/loss-test-without-augment-.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-5/loss-test-without-augment-.pdf -------------------------------------------------------------------------------- /results/decay=1e-5/test-accuracy-with-augment-.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-5/test-accuracy-with-augment-.pdf -------------------------------------------------------------------------------- /results/decay=1e-5/test-accuracy-without-augment-.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-5/test-accuracy-without-augment-.pdf -------------------------------------------------------------------------------- /results/decay=1e-5/train-accuracy-with-augment-.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-5/train-accuracy-with-augment-.pdf -------------------------------------------------------------------------------- /results/decay=1e-5/train-accuracy-without-augment-.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maciejczyzewski/batchboost/93f01d9d9fbe86ca9ef0bf1540dc55e2dfd1ef2a/results/decay=1e-5/train-accuracy-without-augment-.pdf -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 -u 2 | # Copyright (c) 2017-present, Facebook, Inc. (mixup) 3 | # Copyright (c) 2020-present, Maciej A. Czyzewski (batchboost) 4 | # All rights reserved. 5 | # 6 | # This source code is licensed under the license found in the LICENSE file in 7 | # the root directory of this source tree. 8 | from __future__ import print_function 9 | 10 | import argparse 11 | import csv 12 | import os 13 | 14 | import numpy as np 15 | import torch 16 | from torch.autograd import Variable 17 | import torch.backends.cudnn as cudnn 18 | import torch.nn as nn 19 | import torch.optim as optim 20 | import torchvision.transforms as transforms 21 | import torchvision.datasets as datasets 22 | 23 | """ 24 | !pip install efficientnet_pytorch 25 | from google.colab import drive 26 | drive.mount('/content/gdrive', force_remount=True) 27 | !cp gdrive/My\ Drive//utils.py . 28 | !cp gdrive/My\ Drive//train.py . 29 | !nvcc --version 30 | !pip3 install --upgrade --force-reinstall torch torchvision 31 | import torch 32 | print('Torch', torch.__version__, 'CUDA', torch.version.cuda) 33 | print('Device:', torch.device('cuda:0'), torch.cuda.is_available()) 34 | # --- START --- 35 | !python3 train.py --decay=1e-5 --no-augment --seed=1 \ 36 | --name=batchboost --model=efficientnet-b0 --epoch=30 37 | """ 38 | 39 | # FIXME: rewrite it clean 40 | import debug 41 | from utils import progress_bar 42 | 43 | try: 44 | import models 45 | 46 | COLAB = False 47 | except: 48 | # FIXME: detect environment? 49 | print("=== GOOGLE COLAB ENVIRONMENT ===") 50 | COLAB = True 51 | 52 | parser = argparse.ArgumentParser(description="PyTorch CIFAR10 Training") 53 | parser.add_argument("--lr", default=0.1, type=float, help="learning rate") 54 | parser.add_argument( 55 | "--resume", "-r", action="store_true", help="resume from checkpoint" 56 | ) 57 | parser.add_argument( 58 | "--model", 59 | default="ResNet18", 60 | type=str, 61 | help="model type (default: ResNet18)", 62 | ) 63 | parser.add_argument("--name", default="0", type=str, help="name of run") 64 | parser.add_argument("--seed", default=0, type=int, help="random seed") 65 | parser.add_argument("--batch-size", default=128, type=int, help="batch size") 66 | parser.add_argument( 67 | "--epoch", default=200, type=int, help="total epochs to run" 68 | ) 69 | parser.add_argument( 70 | "--no-augment", 71 | dest="augment", 72 | action="store_false", 73 | help="use standard augmentation (default: True)", 74 | ) 75 | parser.add_argument( 76 | "--optimizer", 77 | type=str, 78 | default="lamb", 79 | choices=["lamb", "adam"], 80 | help="which optimizer to use", 81 | ) 82 | parser.add_argument("--decay", default=1e-5, type=float, help="weight decay") 83 | parser.add_argument( 84 | "--alpha", 85 | default=1.0, 86 | type=float, 87 | help="mixup interpolation coefficient (default: 1)", 88 | ) 89 | parser.add_argument( 90 | "--debug", 91 | "-d", 92 | action="store_true", 93 | help="debug on FashionMNIST and ResNet100k network", 94 | ) 95 | args = parser.parse_args() 96 | 97 | use_cuda = torch.cuda.is_available() 98 | 99 | best_acc = 0 # best test accuracy 100 | start_epoch = 0 # start from epoch 0 or last checkpoint epoch 101 | 102 | if args.seed != 0: 103 | torch.manual_seed(args.seed) 104 | 105 | # Data 106 | print("==> Preparing data..") 107 | num_classes = 10 108 | 109 | if args.debug: 110 | trainloader, testloader = debug.FashionMNIST_loaders(args) 111 | else: 112 | if args.augment: 113 | transform_train = transforms.Compose( 114 | [ 115 | transforms.RandomCrop(32, padding=4), 116 | transforms.RandomHorizontalFlip(), 117 | transforms.ToTensor(), 118 | transforms.Normalize( 119 | (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010) 120 | ), 121 | ] 122 | ) 123 | else: 124 | transform_train = transforms.Compose( 125 | [ 126 | transforms.ToTensor(), 127 | transforms.Normalize( 128 | (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010) 129 | ), 130 | ] 131 | ) 132 | 133 | transform_test = transforms.Compose( 134 | [ 135 | transforms.ToTensor(), 136 | transforms.Normalize( 137 | (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010) 138 | ), 139 | ] 140 | ) 141 | 142 | trainset = datasets.CIFAR10( 143 | root="./data", train=True, download=True, transform=transform_train 144 | ) 145 | trainloader = torch.utils.data.DataLoader( 146 | trainset, batch_size=args.batch_size, shuffle=True, num_workers=8 147 | ) 148 | 149 | testset = datasets.CIFAR10( 150 | root="./data", train=False, download=True, transform=transform_test 151 | ) 152 | testloader = torch.utils.data.DataLoader( 153 | testset, batch_size=100, shuffle=False, num_workers=8 154 | ) 155 | 156 | # Model 157 | if args.resume: 158 | # Load checkpoint. 159 | print("==> Resuming from checkpoint..") 160 | assert os.path.isdir("checkpoint"), "Error: no checkpoint directory found!" 161 | checkpoint = torch.load( 162 | "./checkpoint/ckpt.t7" + args.name + "_" + str(args.seed) 163 | ) 164 | net = checkpoint["net"] 165 | best_acc = checkpoint["acc"] 166 | start_epoch = checkpoint["epoch"] + 1 167 | rng_state = checkpoint["rng_state"] 168 | torch.set_rng_state(rng_state) 169 | else: 170 | print("==> Building model..") 171 | if args.model.startswith("efficientnet"): 172 | from efficientnet_pytorch import EfficientNet 173 | 174 | net = EfficientNet.from_pretrained(args.model, num_classes=num_classes) 175 | elif args.debug: 176 | net = debug.ResNet100k() 177 | else: 178 | net = models.__dict__[args.model]() 179 | 180 | if not os.path.isdir("results"): 181 | os.mkdir("results") 182 | logname = ( 183 | "results/log_" 184 | + net.__class__.__name__ 185 | + "_" 186 | + args.name 187 | + "_" 188 | + str(args.seed) 189 | + ".csv" 190 | ) 191 | 192 | if use_cuda: 193 | net.cuda() 194 | net = torch.nn.DataParallel(net) 195 | print("device_count =", torch.cuda.device_count()) 196 | cudnn.benchmark = True 197 | print("Using CUDA...") 198 | 199 | criterion = nn.CrossEntropyLoss() 200 | 201 | optimizer = optim.SGD( 202 | net.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.decay 203 | ) 204 | 205 | ### MIXUP ###################################################################### 206 | 207 | 208 | def mixup_data(x, y, index_left, index_right, alpha=1.0, use_cuda=True): 209 | """Returns mixed inputs, pairs of targets, and lambda""" 210 | if alpha > 0: 211 | lam = np.random.beta(alpha, alpha) 212 | else: 213 | lam = 1 214 | 215 | mixed_x = lam * x[index_left, :] + (1 - lam) * x[index_right, :] 216 | y_a, y_b = y[index_left], y[index_right] 217 | return mixed_x, y_a, y_b, lam 218 | 219 | 220 | def mixup_criterion(criterion, pred, y_a, y_b, lam): 221 | return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b) 222 | 223 | 224 | def train_mixup(epoch): 225 | print("MIXUP") 226 | print("\nEpoch: %d" % epoch) 227 | net.train() 228 | train_loss = 0 229 | reg_loss = 0 230 | correct = 0 231 | total = 0 232 | for batch_idx, (inputs, targets) in enumerate(trainloader): 233 | if use_cuda: 234 | inputs, targets = inputs.cuda(), targets.cuda() 235 | 236 | batch_size = inputs.shape[0] 237 | if use_cuda: 238 | index = torch.randperm(batch_size).cuda() 239 | else: 240 | index = torch.randperm(batch_size) 241 | 242 | inputs, targets_a, targets_b, lam = mixup_data( 243 | inputs, targets, range(batch_size), index, args.alpha, use_cuda 244 | ) 245 | inputs, targets_a, targets_b = map( 246 | Variable, (inputs, targets_a, targets_b) 247 | ) 248 | outputs = net(inputs) 249 | loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam) 250 | train_loss += loss.data 251 | _, predicted = torch.max(outputs.data, 1) 252 | total += inputs.size(0) 253 | correct += ( 254 | lam * predicted.eq(targets_a.data).cpu().sum().float() 255 | + (1 - lam) * predicted.eq(targets_b.data).cpu().sum().float() 256 | ) 257 | 258 | optimizer.zero_grad() 259 | loss.backward() 260 | torch.nn.utils.clip_grad_norm_(net.parameters(), 1) 261 | optimizer.step() 262 | 263 | progress_bar( 264 | batch_idx, 265 | len(trainloader), 266 | "Loss: %.3f | Reg: %.5f | Acc: %.3f%% (%d/%d)" 267 | % ( 268 | train_loss / (batch_idx + 1), 269 | reg_loss / (batch_idx + 1), 270 | 100.0 * correct / total, 271 | correct, 272 | total, 273 | ), 274 | ) 275 | return ( 276 | train_loss / batch_idx, 277 | reg_loss / batch_idx, 278 | 100.0 * correct / total, 279 | ) 280 | 281 | 282 | ### BATCHBOOST ################################################################# 283 | 284 | from batchboost import BatchBoost 285 | 286 | 287 | def fn_error(outputs, targets): 288 | logsoftmax = nn.LogSoftmax(dim=1) 289 | return torch.sum(-outputs * logsoftmax(targets), dim=1) 290 | 291 | 292 | def fn_linearize(x, num_classes=10): 293 | _x = torch.zeros(x.size(0), num_classes) 294 | _x[range(x.size(0)), x] = 1 295 | return _x 296 | 297 | 298 | def fn_unlinearize(x): 299 | _, _x = torch.max(x, 1) 300 | return _x 301 | 302 | 303 | BatchBoost.fn_error = fn_error 304 | BatchBoost.fn_linearize = fn_linearize 305 | BatchBoost.fn_unlinearize = fn_unlinearize 306 | 307 | # FIXME: add arguments to command-line 308 | BB = BatchBoost( 309 | alpha=args.alpha, 310 | window_normal=0, 311 | window_boost=10, 312 | factor=1 / 2, 313 | use_cuda=use_cuda, 314 | ) 315 | 316 | 317 | def train_batchboost(epoch): 318 | global inputs, targets_a, targets_b, lam 319 | print("BATCHBOOST") 320 | print("\nEpoch: %d" % epoch) 321 | net.train() 322 | train_loss = 0 323 | reg_loss = 0 324 | correct = 0 325 | total = 0 326 | 327 | BB.clear() 328 | for batch_idx, (new_inputs, new_targets) in enumerate(trainloader): 329 | if use_cuda: 330 | new_inputs, new_targets = new_inputs.cuda(), new_targets.cuda() 331 | 332 | # -----> (a) feed with new information 333 | if not BB.feed(new_inputs, new_targets): 334 | continue 335 | 336 | # -----> (b) apply concat: BB.inputs, BB.targets 337 | outputs = net(BB.inputs) 338 | 339 | # -----> (c) calculate: loss (mixup like style \lambda) 340 | loss = BB.criterion(criterion, outputs) 341 | 342 | train_loss += loss.data 343 | _, predicted = torch.max(outputs.data, 1) 344 | total += BB.inputs.size(0) # -----> remember to use concat 345 | 346 | # -----> (d) calculate: accuracy 347 | correct += BB.correct(predicted) 348 | 349 | # -----> (e) pairing & mixing 350 | BB.mixing(criterion, outputs) 351 | 352 | optimizer.zero_grad() 353 | loss.backward() 354 | torch.nn.utils.clip_grad_norm_(net.parameters(), 1) 355 | optimizer.step() 356 | 357 | progress_bar( 358 | batch_idx, 359 | len(trainloader), 360 | "Loss: %.3f | Reg: %.5f | Acc: %.3f%% (%d/%d)" 361 | % ( 362 | train_loss / (batch_idx + 1), 363 | reg_loss / (batch_idx + 1), 364 | 100.0 * correct / total, 365 | correct, 366 | total, 367 | ), 368 | ) 369 | if total == 0: 370 | total = len(batch_size) 371 | return ( 372 | train_loss / batch_idx, 373 | reg_loss / batch_idx, 374 | 100.0 * correct / (total + 0.000001), 375 | ) 376 | 377 | 378 | ### BASELINE ################################################################### 379 | 380 | 381 | def train_baseline(epoch): 382 | print("BASELINE") 383 | print("\nEpoch: %d" % epoch) 384 | net.train() 385 | train_loss = 0 386 | reg_loss = 0 387 | correct = 0 388 | total = 0 389 | for batch_idx, (inputs, targets) in enumerate(trainloader): 390 | if use_cuda: 391 | inputs, targets = inputs.cuda(), targets.cuda() 392 | 393 | outputs = net(inputs) 394 | loss = criterion(outputs, targets) 395 | train_loss += loss.data 396 | _, predicted = torch.max(outputs.data, 1) 397 | total += inputs.size(0) 398 | correct += predicted.eq(targets.data).cpu().sum().float() 399 | 400 | optimizer.zero_grad() 401 | loss.backward() 402 | torch.nn.utils.clip_grad_norm_(net.parameters(), 1) 403 | optimizer.step() 404 | 405 | progress_bar( 406 | batch_idx, 407 | len(trainloader), 408 | "Loss: %.3f | Reg: %.5f | Acc: %.3f%% (%d/%d)" 409 | % ( 410 | train_loss / (batch_idx + 1), 411 | reg_loss / (batch_idx + 1), 412 | 100.0 * correct / total, 413 | correct, 414 | total, 415 | ), 416 | ) 417 | return ( 418 | train_loss / batch_idx, 419 | reg_loss / batch_idx, 420 | 100.0 * correct / total, 421 | ) 422 | 423 | 424 | def test(epoch): 425 | global best_acc 426 | net.eval() 427 | test_loss = 0 428 | correct = 0 429 | total = 0 430 | with torch.no_grad(): 431 | for batch_idx, (inputs, targets) in enumerate(testloader): 432 | if use_cuda: 433 | inputs, targets = inputs.cuda(), targets.cuda() 434 | inputs, targets = Variable(inputs), Variable(targets) 435 | outputs = net(inputs) 436 | loss = criterion(outputs, targets) 437 | 438 | test_loss += loss.data 439 | _, predicted = torch.max(outputs.data, 1) 440 | total += targets.size(0) 441 | correct += predicted.eq(targets.data).cpu().sum() 442 | 443 | progress_bar( 444 | batch_idx, 445 | len(testloader), 446 | "Loss: %.3f | Acc: %.3f%% (%d/%d)" 447 | % ( 448 | test_loss / (batch_idx + 1), 449 | 100.0 * correct / total, 450 | correct, 451 | total, 452 | ), 453 | ) 454 | acc = 100.0 * correct / total 455 | if epoch == start_epoch + args.epoch - 1 or acc > best_acc: 456 | checkpoint(acc, epoch) 457 | if acc > best_acc: 458 | best_acc = acc 459 | return (test_loss / batch_idx, 100.0 * correct / total) 460 | 461 | 462 | def checkpoint(acc, epoch): 463 | # Save checkpoint. 464 | print("Saving..") 465 | state = { 466 | "net": net, 467 | "acc": acc, 468 | "epoch": epoch, 469 | "rng_state": torch.get_rng_state(), 470 | } 471 | if not os.path.isdir("checkpoint"): 472 | os.mkdir("checkpoint") 473 | torch.save(state, "./checkpoint/ckpt.t7" + args.name + "_" + str(args.seed)) 474 | 475 | 476 | def adjust_learning_rate(optimizer, epoch): 477 | """decrease the learning rate at 100 and 150 epoch""" 478 | lr = args.lr 479 | if epoch >= 100: 480 | lr /= 10 481 | if epoch >= 150: 482 | lr /= 10 483 | for param_group in optimizer.param_groups: 484 | param_group["lr"] = lr 485 | 486 | 487 | if not os.path.exists(logname): 488 | with open(logname, "w") as logfile: 489 | logwriter = csv.writer(logfile, delimiter=",") 490 | logwriter.writerow( 491 | [ 492 | "epoch", 493 | "train loss", 494 | "reg loss", 495 | "train acc", 496 | "test loss", 497 | "test acc", 498 | ] 499 | ) 500 | 501 | if args.name == "batchboost": 502 | train_func = train_batchboost 503 | elif args.name == "mixup": 504 | train_func = train_mixup 505 | else: 506 | train_func = train_baseline 507 | 508 | for epoch in range(start_epoch, args.epoch): 509 | train_loss, reg_loss, train_acc = train_func(epoch) 510 | test_loss, test_acc = test(epoch) 511 | adjust_learning_rate(optimizer, epoch) 512 | with open(logname, "a") as logfile: 513 | logwriter = csv.writer(logfile, delimiter=",") 514 | logwriter.writerow( 515 | [epoch, train_loss, reg_loss, train_acc, test_loss, test_acc] 516 | ) 517 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | """Some helper functions for PyTorch, including: 2 | - progress_bar: progress bar mimic xlua.progress. 3 | """ 4 | import os 5 | import sys 6 | import time 7 | import math 8 | 9 | import torch 10 | import torch.nn as nn 11 | 12 | try: 13 | _, term_width = os.popen("stty size", "r").read().split() 14 | except: 15 | term_with = "80" 16 | term_width = int(term_width) 17 | 18 | TOTAL_BAR_LENGTH = 86.0 19 | last_time = time.time() 20 | begin_time = last_time 21 | 22 | 23 | def progress_bar(current, total, msg=None): 24 | global last_time, begin_time 25 | if current == 0: 26 | begin_time = time.time() # Reset for new bar. 27 | 28 | cur_len = int(TOTAL_BAR_LENGTH * current / total) 29 | rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1 30 | 31 | sys.stdout.write(" [") 32 | for i in range(cur_len): 33 | sys.stdout.write("=") 34 | sys.stdout.write(">") 35 | for i in range(rest_len): 36 | sys.stdout.write(".") 37 | sys.stdout.write("]") 38 | 39 | cur_time = time.time() 40 | step_time = cur_time - last_time 41 | last_time = cur_time 42 | tot_time = cur_time - begin_time 43 | 44 | L = [] 45 | L.append(" Step: %s" % format_time(step_time)) 46 | L.append(" | Tot: %s" % format_time(tot_time)) 47 | if msg: 48 | L.append(" | " + msg) 49 | 50 | msg = "".join(L) 51 | sys.stdout.write(msg) 52 | for i in range(term_width - int(TOTAL_BAR_LENGTH) - len(msg) - 3): 53 | sys.stdout.write(" ") 54 | 55 | # Go back to the center of the bar. 56 | for i in range(term_width - int(TOTAL_BAR_LENGTH / 2)): 57 | sys.stdout.write("\b") 58 | sys.stdout.write(" %d/%d " % (current + 1, total)) 59 | 60 | if current < total - 1: 61 | sys.stdout.write("\r") 62 | else: 63 | sys.stdout.write("\n") 64 | sys.stdout.flush() 65 | 66 | 67 | def format_time(seconds): 68 | days = int(seconds / 3600 / 24) 69 | seconds = seconds - days * 3600 * 24 70 | hours = int(seconds / 3600) 71 | seconds = seconds - hours * 3600 72 | minutes = int(seconds / 60) 73 | seconds = seconds - minutes * 60 74 | secondsf = int(seconds) 75 | seconds = seconds - secondsf 76 | millis = int(seconds * 1000) 77 | 78 | f = "" 79 | i = 1 80 | if days > 0: 81 | f += str(days) + "D" 82 | i += 1 83 | if hours > 0 and i <= 2: 84 | f += str(hours) + "h" 85 | i += 1 86 | if minutes > 0 and i <= 2: 87 | f += str(minutes) + "m" 88 | i += 1 89 | if secondsf > 0 and i <= 2: 90 | f += str(secondsf) + "s" 91 | i += 1 92 | if millis > 0 and i <= 2: 93 | f += str(millis) + "ms" 94 | i += 1 95 | if f == "": 96 | f = "0ms" 97 | return f 98 | --------------------------------------------------------------------------------