├── .deepsource.toml ├── .gitignore ├── LICENSE ├── README.md ├── all_test.csv ├── all_train.csv ├── common_blocks ├── __init__.py ├── datasets.py ├── logger.py ├── losses.py ├── optimizers.py ├── scheduler.py ├── transforms.py └── utils.py ├── config └── config_classification.yml ├── fit_predict_graph.py ├── generate_images.py ├── inference.py ├── models ├── __init__.py ├── lightningclassifier.py ├── pretrained_models.py └── seresnext.py ├── readme_images ├── cv_test.png └── sample_graph.png ├── requirements.txt └── train.py /.deepsource.toml: -------------------------------------------------------------------------------- 1 | version = 1 2 | 3 | [[analyzers]] 4 | name = "python" 5 | enabled = true 6 | 7 | [analyzers.meta] 8 | runtime_version = "3.x.x" 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | data/ 9 | lightning_logs/ 10 | test/ 11 | train/ 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | .idea/.gitignore 134 | .idea/graph analysis.iml 135 | .idea/inspectionProfiles/ 136 | .idea/misc.xml 137 | .idea/modules.xml 138 | .idea/vcs.xml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Results 2 | ### Computer vision 3 | 4 | **Test** 5 | * ROC AUC 0.697 6 | * MAP 0.183 7 | 8 | ![cv_test.png](./readme_images/cv_test.png) 9 | 10 | ### Graph method 11 | 12 | **Test** 13 | * ROC AUC 0.702 14 | * MAP 0.199 15 | 16 | 17 | ## Training models 18 | ### Computer vision 19 | 20 | 0. Generate graph images ```python generate_images.py``` 21 | 1. Prepare data by ```python prepare_data.py``` 22 | 23 | 2. Adjust config in `config/config_classification.yml` 24 | 25 | 3. train models run ``python train.py`` 26 | 27 | 4. Watch tensorboad logs `tensorboard --logdir ./lightning_logs/` 28 | 29 | 5. Collect up-to-date requirements.txt call `pipreqs --force` 30 | 31 | 32 | ### Graph method 33 | 34 | 1. Run ```python fit_predict_graph.py``` 35 | 36 | 37 | ## Data 38 | 39 | We will predict the activity (against COVID?) of different molecules.  40 | 41 | Dataset sample: 42 | ``` 43 | smiles,activity 44 | OC=1C=CC=CC1CNC2=NC=3C=CC=CC3N2,1 45 | CC(=O)NCCC1=CNC=2C=CC(F)=CC12,1 46 | O=C([C@@H]1[C@H](C2=CSC=C2)CCC1)N,1 47 | ``` 48 | 49 | ![sample_graph.png](./readme_images/sample_graph.png) -------------------------------------------------------------------------------- /all_test.csv: -------------------------------------------------------------------------------- 1 | image_filename;label 2 | ./test/id_331_y_0.jpg;0 3 | ./test/id_247_y_0.jpg;0 4 | ./test/id_790_y_0.jpg;0 5 | ./test/id_316_y_0.jpg;0 6 | ./test/id_215_y_0.jpg;0 7 | ./test/id_680_y_0.jpg;0 8 | ./test/id_585_y_0.jpg;0 9 | ./test/id_462_y_0.jpg;0 10 | ./test/id_815_y_0.jpg;0 11 | ./test/id_39_y_1.jpg;1 12 | ./test/id_430_y_0.jpg;0 13 | ./test/id_696_y_0.jpg;0 14 | ./test/id_875_y_0.jpg;0 15 | ./test/id_328_y_0.jpg;0 16 | ./test/id_611_y_0.jpg;0 17 | ./test/id_23_y_1.jpg;1 18 | ./test/id_501_y_0.jpg;0 19 | ./test/id_198_y_0.jpg;0 20 | ./test/id_110_y_0.jpg;0 21 | ./test/id_728_y_0.jpg;0 22 | ./test/id_673_y_0.jpg;0 23 | ./test/id_426_y_0.jpg;0 24 | ./test/id_786_y_0.jpg;0 25 | ./test/id_66_y_1.jpg;1 26 | ./test/id_44_y_1.jpg;1 27 | ./test/id_684_y_0.jpg;0 28 | ./test/id_808_y_0.jpg;0 29 | ./test/id_76_y_1.jpg;1 30 | ./test/id_678_y_0.jpg;0 31 | ./test/id_67_y_1.jpg;1 32 | ./test/id_30_y_1.jpg;1 33 | ./test/id_792_y_0.jpg;0 34 | ./test/id_86_y_0.jpg;0 35 | ./test/id_432_y_0.jpg;0 36 | ./test/id_623_y_0.jpg;0 37 | ./test/id_286_y_0.jpg;0 38 | ./test/id_299_y_0.jpg;0 39 | ./test/id_70_y_1.jpg;1 40 | ./test/id_809_y_0.jpg;0 41 | ./test/id_523_y_0.jpg;0 42 | ./test/id_530_y_0.jpg;0 43 | ./test/id_120_y_0.jpg;0 44 | ./test/id_312_y_0.jpg;0 45 | ./test/id_174_y_0.jpg;0 46 | ./test/id_137_y_0.jpg;0 47 | ./test/id_139_y_0.jpg;0 48 | ./test/id_72_y_1.jpg;1 49 | ./test/id_570_y_0.jpg;0 50 | ./test/id_298_y_0.jpg;0 51 | ./test/id_513_y_0.jpg;0 52 | ./test/id_529_y_0.jpg;0 53 | ./test/id_507_y_0.jpg;0 54 | ./test/id_825_y_0.jpg;0 55 | ./test/id_615_y_0.jpg;0 56 | ./test/id_866_y_0.jpg;0 57 | ./test/id_499_y_0.jpg;0 58 | ./test/id_448_y_0.jpg;0 59 | ./test/id_589_y_0.jpg;0 60 | ./test/id_638_y_0.jpg;0 61 | ./test/id_323_y_0.jpg;0 62 | ./test/id_811_y_0.jpg;0 63 | ./test/id_643_y_0.jpg;0 64 | ./test/id_306_y_0.jpg;0 65 | ./test/id_833_y_0.jpg;0 66 | ./test/id_650_y_0.jpg;0 67 | ./test/id_96_y_0.jpg;0 68 | ./test/id_572_y_0.jpg;0 69 | ./test/id_467_y_0.jpg;0 70 | ./test/id_856_y_0.jpg;0 71 | ./test/id_762_y_0.jpg;0 72 | ./test/id_812_y_0.jpg;0 73 | ./test/id_595_y_0.jpg;0 74 | ./test/id_213_y_0.jpg;0 75 | ./test/id_487_y_0.jpg;0 76 | ./test/id_532_y_0.jpg;0 77 | ./test/id_859_y_0.jpg;0 78 | ./test/id_431_y_0.jpg;0 79 | ./test/id_244_y_0.jpg;0 80 | ./test/id_708_y_0.jpg;0 81 | ./test/id_716_y_0.jpg;0 82 | ./test/id_551_y_0.jpg;0 83 | ./test/id_380_y_0.jpg;0 84 | ./test/id_453_y_0.jpg;0 85 | ./test/id_196_y_0.jpg;0 86 | ./test/id_660_y_0.jpg;0 87 | ./test/id_582_y_0.jpg;0 88 | ./test/id_575_y_0.jpg;0 89 | ./test/id_396_y_0.jpg;0 90 | ./test/id_865_y_0.jpg;0 91 | ./test/id_422_y_0.jpg;0 92 | ./test/id_869_y_0.jpg;0 93 | ./test/id_332_y_0.jpg;0 94 | ./test/id_843_y_0.jpg;0 95 | ./test/id_314_y_0.jpg;0 96 | ./test/id_168_y_0.jpg;0 97 | ./test/id_327_y_0.jpg;0 98 | ./test/id_768_y_0.jpg;0 99 | ./test/id_390_y_0.jpg;0 100 | ./test/id_657_y_0.jpg;0 101 | ./test/id_644_y_0.jpg;0 102 | ./test/id_63_y_1.jpg;1 103 | ./test/id_569_y_0.jpg;0 104 | ./test/id_136_y_0.jpg;0 105 | ./test/id_346_y_0.jpg;0 106 | ./test/id_65_y_1.jpg;1 107 | ./test/id_292_y_0.jpg;0 108 | ./test/id_481_y_0.jpg;0 109 | ./test/id_710_y_0.jpg;0 110 | ./test/id_78_y_0.jpg;0 111 | ./test/id_794_y_0.jpg;0 112 | ./test/id_281_y_0.jpg;0 113 | ./test/id_590_y_0.jpg;0 114 | ./test/id_552_y_0.jpg;0 115 | ./test/id_302_y_0.jpg;0 116 | ./test/id_536_y_0.jpg;0 117 | ./test/id_411_y_0.jpg;0 118 | ./test/id_383_y_0.jpg;0 119 | ./test/id_518_y_0.jpg;0 120 | ./test/id_773_y_0.jpg;0 121 | ./test/id_192_y_0.jpg;0 122 | ./test/id_409_y_0.jpg;0 123 | ./test/id_49_y_1.jpg;1 124 | ./test/id_367_y_0.jpg;0 125 | ./test/id_823_y_0.jpg;0 126 | ./test/id_209_y_0.jpg;0 127 | ./test/id_291_y_0.jpg;0 128 | ./test/id_576_y_0.jpg;0 129 | ./test/id_694_y_0.jpg;0 130 | ./test/id_588_y_0.jpg;0 131 | ./test/id_874_y_0.jpg;0 132 | ./test/id_628_y_0.jpg;0 133 | ./test/id_344_y_0.jpg;0 134 | ./test/id_33_y_1.jpg;1 135 | ./test/id_31_y_1.jpg;1 136 | ./test/id_723_y_0.jpg;0 137 | ./test/id_713_y_0.jpg;0 138 | ./test/id_497_y_0.jpg;0 139 | ./test/id_211_y_0.jpg;0 140 | ./test/id_109_y_0.jpg;0 141 | ./test/id_77_y_1.jpg;1 142 | ./test/id_266_y_0.jpg;0 143 | ./test/id_82_y_0.jpg;0 144 | ./test/id_305_y_0.jpg;0 145 | ./test/id_408_y_0.jpg;0 146 | ./test/id_602_y_0.jpg;0 147 | ./test/id_634_y_0.jpg;0 148 | ./test/id_294_y_0.jpg;0 149 | ./test/id_362_y_0.jpg;0 150 | ./test/id_5_y_1.jpg;1 151 | ./test/id_54_y_1.jpg;1 152 | ./test/id_781_y_0.jpg;0 153 | ./test/id_669_y_0.jpg;0 154 | ./test/id_464_y_0.jpg;0 155 | ./test/id_767_y_0.jpg;0 156 | ./test/id_97_y_0.jpg;0 157 | ./test/id_668_y_0.jpg;0 158 | ./test/id_842_y_0.jpg;0 159 | ./test/id_417_y_0.jpg;0 160 | ./test/id_494_y_0.jpg;0 161 | ./test/id_25_y_1.jpg;1 162 | ./test/id_84_y_0.jpg;0 163 | ./test/id_10_y_1.jpg;1 164 | ./test/id_545_y_0.jpg;0 165 | ./test/id_736_y_0.jpg;0 166 | ./test/id_208_y_0.jpg;0 167 | ./test/id_662_y_0.jpg;0 168 | ./test/id_118_y_0.jpg;0 169 | ./test/id_275_y_0.jpg;0 170 | ./test/id_300_y_0.jpg;0 171 | ./test/id_29_y_1.jpg;1 172 | ./test/id_821_y_0.jpg;0 173 | ./test/id_581_y_0.jpg;0 174 | ./test/id_239_y_0.jpg;0 175 | ./test/id_483_y_0.jpg;0 176 | ./test/id_351_y_0.jpg;0 177 | ./test/id_81_y_0.jpg;0 178 | ./test/id_290_y_0.jpg;0 179 | ./test/id_685_y_0.jpg;0 180 | ./test/id_547_y_0.jpg;0 181 | ./test/id_835_y_0.jpg;0 182 | ./test/id_798_y_0.jpg;0 183 | ./test/id_741_y_0.jpg;0 184 | ./test/id_810_y_0.jpg;0 185 | ./test/id_526_y_0.jpg;0 186 | ./test/id_752_y_0.jpg;0 187 | ./test/id_429_y_0.jpg;0 188 | ./test/id_703_y_0.jpg;0 189 | ./test/id_740_y_0.jpg;0 190 | ./test/id_477_y_0.jpg;0 191 | ./test/id_617_y_0.jpg;0 192 | ./test/id_664_y_0.jpg;0 193 | ./test/id_352_y_0.jpg;0 194 | ./test/id_795_y_0.jpg;0 195 | ./test/id_832_y_0.jpg;0 196 | ./test/id_465_y_0.jpg;0 197 | ./test/id_7_y_1.jpg;1 198 | ./test/id_210_y_0.jpg;0 199 | ./test/id_155_y_0.jpg;0 200 | ./test/id_382_y_0.jpg;0 201 | ./test/id_101_y_0.jpg;0 202 | ./test/id_838_y_0.jpg;0 203 | ./test/id_428_y_0.jpg;0 204 | ./test/id_720_y_0.jpg;0 205 | ./test/id_363_y_0.jpg;0 206 | ./test/id_296_y_0.jpg;0 207 | ./test/id_218_y_0.jpg;0 208 | ./test/id_616_y_0.jpg;0 209 | ./test/id_2_y_1.jpg;1 210 | ./test/id_717_y_0.jpg;0 211 | ./test/id_482_y_0.jpg;0 212 | ./test/id_235_y_0.jpg;0 213 | ./test/id_381_y_0.jpg;0 214 | ./test/id_158_y_0.jpg;0 215 | ./test/id_516_y_0.jpg;0 216 | ./test/id_819_y_0.jpg;0 217 | ./test/id_554_y_0.jpg;0 218 | ./test/id_227_y_0.jpg;0 219 | ./test/id_393_y_0.jpg;0 220 | ./test/id_416_y_0.jpg;0 221 | ./test/id_514_y_0.jpg;0 222 | ./test/id_801_y_0.jpg;0 223 | ./test/id_342_y_0.jpg;0 224 | ./test/id_704_y_0.jpg;0 225 | ./test/id_212_y_0.jpg;0 226 | ./test/id_433_y_0.jpg;0 227 | ./test/id_79_y_0.jpg;0 228 | ./test/id_148_y_0.jpg;0 229 | ./test/id_412_y_0.jpg;0 230 | ./test/id_250_y_0.jpg;0 231 | ./test/id_231_y_0.jpg;0 232 | ./test/id_774_y_0.jpg;0 233 | ./test/id_259_y_0.jpg;0 234 | ./test/id_365_y_0.jpg;0 235 | ./test/id_254_y_0.jpg;0 236 | ./test/id_133_y_0.jpg;0 237 | ./test/id_447_y_0.jpg;0 238 | ./test/id_165_y_0.jpg;0 239 | ./test/id_533_y_0.jpg;0 240 | ./test/id_55_y_1.jpg;1 241 | ./test/id_715_y_0.jpg;0 242 | ./test/id_659_y_0.jpg;0 243 | ./test/id_535_y_0.jpg;0 244 | ./test/id_333_y_0.jpg;0 245 | ./test/id_558_y_0.jpg;0 246 | ./test/id_204_y_0.jpg;0 247 | ./test/id_601_y_0.jpg;0 248 | ./test/id_450_y_0.jpg;0 249 | ./test/id_265_y_0.jpg;0 250 | ./test/id_350_y_0.jpg;0 251 | ./test/id_444_y_0.jpg;0 252 | ./test/id_60_y_1.jpg;1 253 | ./test/id_199_y_0.jpg;0 254 | ./test/id_803_y_0.jpg;0 255 | ./test/id_877_y_0.jpg;0 256 | ./test/id_440_y_0.jpg;0 257 | ./test/id_260_y_0.jpg;0 258 | ./test/id_618_y_0.jpg;0 259 | ./test/id_359_y_0.jpg;0 260 | ./test/id_280_y_0.jpg;0 261 | ./test/id_172_y_0.jpg;0 262 | ./test/id_434_y_0.jpg;0 263 | ./test/id_361_y_0.jpg;0 264 | ./test/id_319_y_0.jpg;0 265 | ./test/id_791_y_0.jpg;0 266 | -------------------------------------------------------------------------------- /all_train.csv: -------------------------------------------------------------------------------- 1 | image_filename;label 2 | ./train/id_90_y_0.jpg;0 3 | ./train/id_824_y_0.jpg;0 4 | ./train/id_181_y_0.jpg;0 5 | ./train/id_785_y_0.jpg;0 6 | ./train/id_69_y_1.jpg;1 7 | ./train/id_131_y_0.jpg;0 8 | ./train/id_732_y_0.jpg;0 9 | ./train/id_456_y_0.jpg;0 10 | ./train/id_135_y_0.jpg;0 11 | ./train/id_164_y_0.jpg;0 12 | ./train/id_28_y_1.jpg;1 13 | ./train/id_839_y_0.jpg;0 14 | ./train/id_193_y_0.jpg;0 15 | ./train/id_783_y_0.jpg;0 16 | ./train/id_485_y_0.jpg;0 17 | ./train/id_368_y_0.jpg;0 18 | ./train/id_140_y_0.jpg;0 19 | ./train/id_173_y_0.jpg;0 20 | ./train/id_6_y_1.jpg;1 21 | ./train/id_568_y_0.jpg;0 22 | ./train/id_360_y_0.jpg;0 23 | ./train/id_73_y_1.jpg;1 24 | ./train/id_605_y_0.jpg;0 25 | ./train/id_820_y_0.jpg;0 26 | ./train/id_655_y_0.jpg;0 27 | ./train/id_145_y_0.jpg;0 28 | ./train/id_234_y_0.jpg;0 29 | ./train/id_220_y_0.jpg;0 30 | ./train/id_357_y_0.jpg;0 31 | ./train/id_375_y_0.jpg;0 32 | ./train/id_132_y_0.jpg;0 33 | ./train/id_439_y_0.jpg;0 34 | ./train/id_311_y_0.jpg;0 35 | ./train/id_495_y_0.jpg;0 36 | ./train/id_41_y_1.jpg;1 37 | ./train/id_538_y_0.jpg;0 38 | ./train/id_108_y_0.jpg;0 39 | ./train/id_405_y_0.jpg;0 40 | ./train/id_56_y_1.jpg;1 41 | ./train/id_309_y_0.jpg;0 42 | ./train/id_334_y_0.jpg;0 43 | ./train/id_478_y_0.jpg;0 44 | ./train/id_24_y_1.jpg;1 45 | ./train/id_338_y_0.jpg;0 46 | ./train/id_855_y_0.jpg;0 47 | ./train/id_757_y_0.jpg;0 48 | ./train/id_51_y_1.jpg;1 49 | ./train/id_578_y_0.jpg;0 50 | ./train/id_806_y_0.jpg;0 51 | ./train/id_844_y_0.jpg;0 52 | ./train/id_264_y_0.jpg;0 53 | ./train/id_420_y_0.jpg;0 54 | ./train/id_604_y_0.jpg;0 55 | ./train/id_394_y_0.jpg;0 56 | ./train/id_755_y_0.jpg;0 57 | ./train/id_18_y_1.jpg;1 58 | ./train/id_688_y_0.jpg;0 59 | ./train/id_326_y_0.jpg;0 60 | ./train/id_83_y_0.jpg;0 61 | ./train/id_61_y_1.jpg;1 62 | ./train/id_272_y_0.jpg;0 63 | ./train/id_285_y_0.jpg;0 64 | ./train/id_449_y_0.jpg;0 65 | ./train/id_445_y_0.jpg;0 66 | ./train/id_12_y_1.jpg;1 67 | ./train/id_182_y_0.jpg;0 68 | ./train/id_506_y_0.jpg;0 69 | ./train/id_543_y_0.jpg;0 70 | ./train/id_223_y_0.jpg;0 71 | ./train/id_512_y_0.jpg;0 72 | ./train/id_796_y_0.jpg;0 73 | ./train/id_377_y_0.jpg;0 74 | ./train/id_388_y_0.jpg;0 75 | ./train/id_557_y_0.jpg;0 76 | ./train/id_537_y_0.jpg;0 77 | ./train/id_176_y_0.jpg;0 78 | ./train/id_591_y_0.jpg;0 79 | ./train/id_425_y_0.jpg;0 80 | ./train/id_163_y_0.jpg;0 81 | ./train/id_248_y_0.jpg;0 82 | ./train/id_804_y_0.jpg;0 83 | ./train/id_777_y_0.jpg;0 84 | ./train/id_74_y_1.jpg;1 85 | ./train/id_113_y_0.jpg;0 86 | ./train/id_490_y_0.jpg;0 87 | ./train/id_473_y_0.jpg;0 88 | ./train/id_104_y_0.jpg;0 89 | ./train/id_114_y_0.jpg;0 90 | ./train/id_355_y_0.jpg;0 91 | ./train/id_92_y_0.jpg;0 92 | ./train/id_479_y_0.jpg;0 93 | ./train/id_89_y_0.jpg;0 94 | ./train/id_336_y_0.jpg;0 95 | ./train/id_500_y_0.jpg;0 96 | ./train/id_807_y_0.jpg;0 97 | ./train/id_827_y_0.jpg;0 98 | ./train/id_534_y_0.jpg;0 99 | ./train/id_539_y_0.jpg;0 100 | ./train/id_94_y_0.jpg;0 101 | ./train/id_11_y_1.jpg;1 102 | ./train/id_682_y_0.jpg;0 103 | ./train/id_43_y_1.jpg;1 104 | ./train/id_42_y_1.jpg;1 105 | ./train/id_329_y_0.jpg;0 106 | ./train/id_167_y_0.jpg;0 107 | ./train/id_858_y_0.jpg;0 108 | ./train/id_528_y_0.jpg;0 109 | ./train/id_667_y_0.jpg;0 110 | ./train/id_559_y_0.jpg;0 111 | ./train/id_178_y_0.jpg;0 112 | ./train/id_837_y_0.jpg;0 113 | ./train/id_584_y_0.jpg;0 114 | ./train/id_177_y_0.jpg;0 115 | ./train/id_599_y_0.jpg;0 116 | ./train/id_873_y_0.jpg;0 117 | ./train/id_606_y_0.jpg;0 118 | ./train/id_257_y_0.jpg;0 119 | ./train/id_594_y_0.jpg;0 120 | ./train/id_335_y_0.jpg;0 121 | ./train/id_542_y_0.jpg;0 122 | ./train/id_15_y_1.jpg;1 123 | ./train/id_3_y_1.jpg;1 124 | ./train/id_442_y_0.jpg;0 125 | ./train/id_256_y_0.jpg;0 126 | ./train/id_878_y_0.jpg;0 127 | ./train/id_457_y_0.jpg;0 128 | ./train/id_398_y_0.jpg;0 129 | ./train/id_222_y_0.jpg;0 130 | ./train/id_486_y_0.jpg;0 131 | ./train/id_436_y_0.jpg;0 132 | ./train/id_404_y_0.jpg;0 133 | ./train/id_324_y_0.jpg;0 134 | ./train/id_521_y_0.jpg;0 135 | ./train/id_9_y_1.jpg;1 136 | ./train/id_249_y_0.jpg;0 137 | ./train/id_22_y_1.jpg;1 138 | ./train/id_356_y_0.jpg;0 139 | ./train/id_221_y_0.jpg;0 140 | ./train/id_760_y_0.jpg;0 141 | ./train/id_756_y_0.jpg;0 142 | ./train/id_863_y_0.jpg;0 143 | ./train/id_519_y_0.jpg;0 144 | ./train/id_340_y_0.jpg;0 145 | ./train/id_733_y_0.jpg;0 146 | ./train/id_626_y_0.jpg;0 147 | ./train/id_203_y_0.jpg;0 148 | ./train/id_237_y_0.jpg;0 149 | ./train/id_93_y_0.jpg;0 150 | ./train/id_468_y_0.jpg;0 151 | ./train/id_424_y_0.jpg;0 152 | ./train/id_622_y_0.jpg;0 153 | ./train/id_284_y_0.jpg;0 154 | ./train/id_184_y_0.jpg;0 155 | ./train/id_709_y_0.jpg;0 156 | ./train/id_630_y_0.jpg;0 157 | ./train/id_153_y_0.jpg;0 158 | ./train/id_75_y_1.jpg;1 159 | ./train/id_765_y_0.jpg;0 160 | ./train/id_541_y_0.jpg;0 161 | ./train/id_277_y_0.jpg;0 162 | ./train/id_68_y_1.jpg;1 163 | ./train/id_784_y_0.jpg;0 164 | ./train/id_188_y_0.jpg;0 165 | ./train/id_271_y_0.jpg;0 166 | ./train/id_653_y_0.jpg;0 167 | ./train/id_706_y_0.jpg;0 168 | ./train/id_236_y_0.jpg;0 169 | ./train/id_88_y_0.jpg;0 170 | ./train/id_845_y_0.jpg;0 171 | ./train/id_117_y_0.jpg;0 172 | ./train/id_125_y_0.jpg;0 173 | ./train/id_721_y_0.jpg;0 174 | ./train/id_289_y_0.jpg;0 175 | ./train/id_238_y_0.jpg;0 176 | ./train/id_0_y_1.jpg;1 177 | ./train/id_870_y_0.jpg;0 178 | ./train/id_451_y_0.jpg;0 179 | ./train/id_766_y_0.jpg;0 180 | ./train/id_395_y_0.jpg;0 181 | ./train/id_522_y_0.jpg;0 182 | ./train/id_126_y_0.jpg;0 183 | ./train/id_278_y_0.jpg;0 184 | ./train/id_707_y_0.jpg;0 185 | ./train/id_116_y_0.jpg;0 186 | ./train/id_228_y_0.jpg;0 187 | ./train/id_613_y_0.jpg;0 188 | ./train/id_826_y_0.jpg;0 189 | ./train/id_525_y_0.jpg;0 190 | ./train/id_830_y_0.jpg;0 191 | ./train/id_274_y_0.jpg;0 192 | ./train/id_318_y_0.jpg;0 193 | ./train/id_597_y_0.jpg;0 194 | ./train/id_144_y_0.jpg;0 195 | ./train/id_625_y_0.jpg;0 196 | ./train/id_549_y_0.jpg;0 197 | ./train/id_799_y_0.jpg;0 198 | ./train/id_633_y_0.jpg;0 199 | ./train/id_369_y_0.jpg;0 200 | ./train/id_268_y_0.jpg;0 201 | ./train/id_871_y_0.jpg;0 202 | ./train/id_307_y_0.jpg;0 203 | ./train/id_423_y_0.jpg;0 204 | ./train/id_310_y_0.jpg;0 205 | ./train/id_354_y_0.jpg;0 206 | ./train/id_46_y_1.jpg;1 207 | ./train/id_349_y_0.jpg;0 208 | ./train/id_371_y_0.jpg;0 209 | ./train/id_567_y_0.jpg;0 210 | ./train/id_261_y_0.jpg;0 211 | ./train/id_195_y_0.jpg;0 212 | ./train/id_879_y_0.jpg;0 213 | ./train/id_734_y_0.jpg;0 214 | ./train/id_107_y_0.jpg;0 215 | ./train/id_59_y_1.jpg;1 216 | ./train/id_649_y_0.jpg;0 217 | ./train/id_263_y_0.jpg;0 218 | ./train/id_100_y_0.jpg;0 219 | ./train/id_443_y_0.jpg;0 220 | ./train/id_598_y_0.jpg;0 221 | ./train/id_789_y_0.jpg;0 222 | ./train/id_631_y_0.jpg;0 223 | ./train/id_169_y_0.jpg;0 224 | ./train/id_179_y_0.jpg;0 225 | ./train/id_304_y_0.jpg;0 226 | ./train/id_761_y_0.jpg;0 227 | ./train/id_57_y_1.jpg;1 228 | ./train/id_848_y_0.jpg;0 229 | ./train/id_149_y_0.jpg;0 230 | ./train/id_124_y_0.jpg;0 231 | ./train/id_693_y_0.jpg;0 232 | ./train/id_817_y_0.jpg;0 233 | ./train/id_185_y_0.jpg;0 234 | ./train/id_596_y_0.jpg;0 235 | ./train/id_50_y_1.jpg;1 236 | ./train/id_341_y_0.jpg;0 237 | ./train/id_446_y_0.jpg;0 238 | ./train/id_868_y_0.jpg;0 239 | ./train/id_527_y_0.jpg;0 240 | ./train/id_321_y_0.jpg;0 241 | ./train/id_353_y_0.jpg;0 242 | ./train/id_813_y_0.jpg;0 243 | ./train/id_142_y_0.jpg;0 244 | ./train/id_470_y_0.jpg;0 245 | ./train/id_370_y_0.jpg;0 246 | ./train/id_705_y_0.jpg;0 247 | ./train/id_141_y_0.jpg;0 248 | ./train/id_399_y_0.jpg;0 249 | ./train/id_493_y_0.jpg;0 250 | ./train/id_743_y_0.jpg;0 251 | ./train/id_320_y_0.jpg;0 252 | ./train/id_19_y_1.jpg;1 253 | ./train/id_714_y_0.jpg;0 254 | ./train/id_697_y_0.jpg;0 255 | ./train/id_407_y_0.jpg;0 256 | ./train/id_548_y_0.jpg;0 257 | ./train/id_38_y_1.jpg;1 258 | ./train/id_175_y_0.jpg;0 259 | ./train/id_245_y_0.jpg;0 260 | ./train/id_730_y_0.jpg;0 261 | ./train/id_593_y_0.jpg;0 262 | ./train/id_665_y_0.jpg;0 263 | ./train/id_754_y_0.jpg;0 264 | ./train/id_154_y_0.jpg;0 265 | ./train/id_287_y_0.jpg;0 266 | ./train/id_531_y_0.jpg;0 267 | ./train/id_17_y_1.jpg;1 268 | ./train/id_127_y_0.jpg;0 269 | ./train/id_322_y_0.jpg;0 270 | ./train/id_255_y_0.jpg;0 271 | ./train/id_583_y_0.jpg;0 272 | ./train/id_846_y_0.jpg;0 273 | ./train/id_190_y_0.jpg;0 274 | ./train/id_115_y_0.jpg;0 275 | ./train/id_544_y_0.jpg;0 276 | ./train/id_180_y_0.jpg;0 277 | ./train/id_301_y_0.jpg;0 278 | ./train/id_670_y_0.jpg;0 279 | ./train/id_629_y_0.jpg;0 280 | ./train/id_640_y_0.jpg;0 281 | ./train/id_758_y_0.jpg;0 282 | ./train/id_587_y_0.jpg;0 283 | ./train/id_517_y_0.jpg;0 284 | ./train/id_864_y_0.jpg;0 285 | ./train/id_45_y_1.jpg;1 286 | ./train/id_797_y_0.jpg;0 287 | ./train/id_157_y_0.jpg;0 288 | ./train/id_656_y_0.jpg;0 289 | ./train/id_171_y_0.jpg;0 290 | ./train/id_16_y_1.jpg;1 291 | ./train/id_511_y_0.jpg;0 292 | ./train/id_48_y_1.jpg;1 293 | ./train/id_852_y_0.jpg;0 294 | ./train/id_739_y_0.jpg;0 295 | ./train/id_515_y_0.jpg;0 296 | ./train/id_635_y_0.jpg;0 297 | ./train/id_480_y_0.jpg;0 298 | ./train/id_283_y_0.jpg;0 299 | ./train/id_580_y_0.jpg;0 300 | ./train/id_225_y_0.jpg;0 301 | ./train/id_26_y_1.jpg;1 302 | ./train/id_689_y_0.jpg;0 303 | ./train/id_437_y_0.jpg;0 304 | ./train/id_834_y_0.jpg;0 305 | ./train/id_364_y_0.jpg;0 306 | ./train/id_229_y_0.jpg;0 307 | ./train/id_37_y_1.jpg;1 308 | ./train/id_847_y_0.jpg;0 309 | ./train/id_374_y_0.jpg;0 310 | ./train/id_469_y_0.jpg;0 311 | ./train/id_849_y_0.jpg;0 312 | ./train/id_780_y_0.jpg;0 313 | ./train/id_735_y_0.jpg;0 314 | ./train/id_194_y_0.jpg;0 315 | ./train/id_750_y_0.jpg;0 316 | ./train/id_759_y_0.jpg;0 317 | ./train/id_503_y_0.jpg;0 318 | ./train/id_851_y_0.jpg;0 319 | ./train/id_731_y_0.jpg;0 320 | ./train/id_579_y_0.jpg;0 321 | ./train/id_850_y_0.jpg;0 322 | ./train/id_162_y_0.jpg;0 323 | ./train/id_621_y_0.jpg;0 324 | ./train/id_152_y_0.jpg;0 325 | ./train/id_603_y_0.jpg;0 326 | ./train/id_620_y_0.jpg;0 327 | ./train/id_666_y_0.jpg;0 328 | ./train/id_111_y_0.jpg;0 329 | ./train/id_226_y_0.jpg;0 330 | ./train/id_711_y_0.jpg;0 331 | ./train/id_103_y_0.jpg;0 332 | ./train/id_421_y_0.jpg;0 333 | ./train/id_419_y_0.jpg;0 334 | ./train/id_586_y_0.jpg;0 335 | ./train/id_119_y_0.jpg;0 336 | ./train/id_53_y_1.jpg;1 337 | ./train/id_151_y_0.jpg;0 338 | ./train/id_403_y_0.jpg;0 339 | ./train/id_829_y_0.jpg;0 340 | ./train/id_207_y_0.jpg;0 341 | ./train/id_742_y_0.jpg;0 342 | ./train/id_672_y_0.jpg;0 343 | ./train/id_8_y_1.jpg;1 344 | ./train/id_712_y_0.jpg;0 345 | ./train/id_36_y_1.jpg;1 346 | ./train/id_452_y_0.jpg;0 347 | ./train/id_253_y_0.jpg;0 348 | ./train/id_303_y_0.jpg;0 349 | ./train/id_658_y_0.jpg;0 350 | ./train/id_571_y_0.jpg;0 351 | ./train/id_687_y_0.jpg;0 352 | ./train/id_636_y_0.jpg;0 353 | ./train/id_262_y_0.jpg;0 354 | ./train/id_610_y_0.jpg;0 355 | ./train/id_297_y_0.jpg;0 356 | ./train/id_414_y_0.jpg;0 357 | ./train/id_150_y_0.jpg;0 358 | ./train/id_695_y_0.jpg;0 359 | ./train/id_779_y_0.jpg;0 360 | ./train/id_550_y_0.jpg;0 361 | ./train/id_488_y_0.jpg;0 362 | ./train/id_147_y_0.jpg;0 363 | ./train/id_146_y_0.jpg;0 364 | ./train/id_651_y_0.jpg;0 365 | ./train/id_816_y_0.jpg;0 366 | ./train/id_652_y_0.jpg;0 367 | ./train/id_348_y_0.jpg;0 368 | ./train/id_463_y_0.jpg;0 369 | ./train/id_325_y_0.jpg;0 370 | ./train/id_186_y_0.jpg;0 371 | ./train/id_123_y_0.jpg;0 372 | ./train/id_749_y_0.jpg;0 373 | ./train/id_608_y_0.jpg;0 374 | ./train/id_143_y_0.jpg;0 375 | ./train/id_841_y_0.jpg;0 376 | ./train/id_197_y_0.jpg;0 377 | ./train/id_609_y_0.jpg;0 378 | ./train/id_279_y_0.jpg;0 379 | ./train/id_293_y_0.jpg;0 380 | ./train/id_400_y_0.jpg;0 381 | ./train/id_122_y_0.jpg;0 382 | ./train/id_183_y_0.jpg;0 383 | ./train/id_202_y_0.jpg;0 384 | ./train/id_438_y_0.jpg;0 385 | ./train/id_246_y_0.jpg;0 386 | ./train/id_415_y_0.jpg;0 387 | ./train/id_778_y_0.jpg;0 388 | ./train/id_129_y_0.jpg;0 389 | ./train/id_637_y_0.jpg;0 390 | ./train/id_402_y_0.jpg;0 391 | ./train/id_691_y_0.jpg;0 392 | ./train/id_679_y_0.jpg;0 393 | ./train/id_800_y_0.jpg;0 394 | ./train/id_219_y_0.jpg;0 395 | ./train/id_641_y_0.jpg;0 396 | ./train/id_802_y_0.jpg;0 397 | ./train/id_677_y_0.jpg;0 398 | ./train/id_854_y_0.jpg;0 399 | ./train/id_624_y_0.jpg;0 400 | ./train/id_737_y_0.jpg;0 401 | ./train/id_675_y_0.jpg;0 402 | ./train/id_386_y_0.jpg;0 403 | ./train/id_853_y_0.jpg;0 404 | ./train/id_509_y_0.jpg;0 405 | ./train/id_267_y_0.jpg;0 406 | ./train/id_722_y_0.jpg;0 407 | ./train/id_441_y_0.jpg;0 408 | ./train/id_496_y_0.jpg;0 409 | ./train/id_112_y_0.jpg;0 410 | ./train/id_232_y_0.jpg;0 411 | ./train/id_764_y_0.jpg;0 412 | ./train/id_607_y_0.jpg;0 413 | ./train/id_671_y_0.jpg;0 414 | ./train/id_373_y_0.jpg;0 415 | ./train/id_862_y_0.jpg;0 416 | ./train/id_233_y_0.jpg;0 417 | ./train/id_692_y_0.jpg;0 418 | ./train/id_676_y_0.jpg;0 419 | ./train/id_317_y_0.jpg;0 420 | ./train/id_648_y_0.jpg;0 421 | ./train/id_410_y_0.jpg;0 422 | ./train/id_788_y_0.jpg;0 423 | ./train/id_358_y_0.jpg;0 424 | ./train/id_258_y_0.jpg;0 425 | ./train/id_627_y_0.jpg;0 426 | ./train/id_632_y_0.jpg;0 427 | ./train/id_282_y_0.jpg;0 428 | ./train/id_376_y_0.jpg;0 429 | ./train/id_384_y_0.jpg;0 430 | ./train/id_224_y_0.jpg;0 431 | ./train/id_836_y_0.jpg;0 432 | ./train/id_718_y_0.jpg;0 433 | ./train/id_472_y_0.jpg;0 434 | ./train/id_347_y_0.jpg;0 435 | ./train/id_505_y_0.jpg;0 436 | ./train/id_639_y_0.jpg;0 437 | ./train/id_867_y_0.jpg;0 438 | ./train/id_814_y_0.jpg;0 439 | ./train/id_793_y_0.jpg;0 440 | ./train/id_619_y_0.jpg;0 441 | ./train/id_751_y_0.jpg;0 442 | ./train/id_645_y_0.jpg;0 443 | ./train/id_744_y_0.jpg;0 444 | ./train/id_556_y_0.jpg;0 445 | ./train/id_840_y_0.jpg;0 446 | ./train/id_577_y_0.jpg;0 447 | ./train/id_85_y_0.jpg;0 448 | ./train/id_242_y_0.jpg;0 449 | ./train/id_698_y_0.jpg;0 450 | ./train/id_159_y_0.jpg;0 451 | ./train/id_524_y_0.jpg;0 452 | ./train/id_35_y_1.jpg;1 453 | ./train/id_540_y_0.jpg;0 454 | ./train/id_170_y_0.jpg;0 455 | ./train/id_654_y_0.jpg;0 456 | ./train/id_753_y_0.jpg;0 457 | ./train/id_745_y_0.jpg;0 458 | ./train/id_828_y_0.jpg;0 459 | ./train/id_95_y_0.jpg;0 460 | ./train/id_563_y_0.jpg;0 461 | ./train/id_240_y_0.jpg;0 462 | ./train/id_574_y_0.jpg;0 463 | ./train/id_690_y_0.jpg;0 464 | ./train/id_460_y_0.jpg;0 465 | ./train/id_553_y_0.jpg;0 466 | ./train/id_770_y_0.jpg;0 467 | ./train/id_206_y_0.jpg;0 468 | ./train/id_392_y_0.jpg;0 469 | ./train/id_397_y_0.jpg;0 470 | ./train/id_746_y_0.jpg;0 471 | ./train/id_217_y_0.jpg;0 472 | ./train/id_4_y_1.jpg;1 473 | ./train/id_642_y_0.jpg;0 474 | ./train/id_787_y_0.jpg;0 475 | ./train/id_612_y_0.jpg;0 476 | ./train/id_738_y_0.jpg;0 477 | ./train/id_546_y_0.jpg;0 478 | ./train/id_725_y_0.jpg;0 479 | ./train/id_683_y_0.jpg;0 480 | ./train/id_98_y_0.jpg;0 481 | ./train/id_727_y_0.jpg;0 482 | ./train/id_573_y_0.jpg;0 483 | ./train/id_406_y_0.jpg;0 484 | ./train/id_502_y_0.jpg;0 485 | ./train/id_47_y_1.jpg;1 486 | ./train/id_32_y_1.jpg;1 487 | ./train/id_200_y_0.jpg;0 488 | ./train/id_134_y_0.jpg;0 489 | ./train/id_27_y_1.jpg;1 490 | ./train/id_772_y_0.jpg;0 491 | ./train/id_230_y_0.jpg;0 492 | ./train/id_489_y_0.jpg;0 493 | ./train/id_378_y_0.jpg;0 494 | ./train/id_288_y_0.jpg;0 495 | ./train/id_418_y_0.jpg;0 496 | ./train/id_674_y_0.jpg;0 497 | ./train/id_391_y_0.jpg;0 498 | ./train/id_592_y_0.jpg;0 499 | ./train/id_498_y_0.jpg;0 500 | ./train/id_138_y_0.jpg;0 501 | ./train/id_62_y_1.jpg;1 502 | ./train/id_471_y_0.jpg;0 503 | ./train/id_647_y_0.jpg;0 504 | ./train/id_128_y_0.jpg;0 505 | ./train/id_857_y_0.jpg;0 506 | ./train/id_520_y_0.jpg;0 507 | ./train/id_64_y_1.jpg;1 508 | ./train/id_14_y_1.jpg;1 509 | ./train/id_156_y_0.jpg;0 510 | ./train/id_40_y_1.jpg;1 511 | ./train/id_492_y_0.jpg;0 512 | ./train/id_379_y_0.jpg;0 513 | ./train/id_187_y_0.jpg;0 514 | ./train/id_763_y_0.jpg;0 515 | ./train/id_216_y_0.jpg;0 516 | ./train/id_52_y_1.jpg;1 517 | ./train/id_337_y_0.jpg;0 518 | ./train/id_748_y_0.jpg;0 519 | ./train/id_719_y_0.jpg;0 520 | ./train/id_724_y_0.jpg;0 521 | ./train/id_295_y_0.jpg;0 522 | ./train/id_701_y_0.jpg;0 523 | ./train/id_251_y_0.jpg;0 524 | ./train/id_726_y_0.jpg;0 525 | ./train/id_461_y_0.jpg;0 526 | ./train/id_455_y_0.jpg;0 527 | ./train/id_876_y_0.jpg;0 528 | ./train/id_269_y_0.jpg;0 529 | ./train/id_201_y_0.jpg;0 530 | ./train/id_161_y_0.jpg;0 531 | ./train/id_555_y_0.jpg;0 532 | ./train/id_729_y_0.jpg;0 533 | ./train/id_401_y_0.jpg;0 534 | ./train/id_702_y_0.jpg;0 535 | ./train/id_476_y_0.jpg;0 536 | ./train/id_771_y_0.jpg;0 537 | ./train/id_105_y_0.jpg;0 538 | ./train/id_565_y_0.jpg;0 539 | ./train/id_389_y_0.jpg;0 540 | ./train/id_1_y_1.jpg;1 541 | ./train/id_822_y_0.jpg;0 542 | ./train/id_561_y_0.jpg;0 543 | ./train/id_80_y_0.jpg;0 544 | ./train/id_205_y_0.jpg;0 545 | ./train/id_34_y_1.jpg;1 546 | ./train/id_775_y_0.jpg;0 547 | ./train/id_508_y_0.jpg;0 548 | ./train/id_427_y_0.jpg;0 549 | ./train/id_454_y_0.jpg;0 550 | ./train/id_366_y_0.jpg;0 551 | ./train/id_91_y_0.jpg;0 552 | ./train/id_339_y_0.jpg;0 553 | ./train/id_564_y_0.jpg;0 554 | ./train/id_345_y_0.jpg;0 555 | ./train/id_776_y_0.jpg;0 556 | ./train/id_241_y_0.jpg;0 557 | ./train/id_13_y_1.jpg;1 558 | ./train/id_315_y_0.jpg;0 559 | ./train/id_600_y_0.jpg;0 560 | ./train/id_387_y_0.jpg;0 561 | ./train/id_273_y_0.jpg;0 562 | ./train/id_166_y_0.jpg;0 563 | ./train/id_872_y_0.jpg;0 564 | ./train/id_646_y_0.jpg;0 565 | ./train/id_818_y_0.jpg;0 566 | ./train/id_484_y_0.jpg;0 567 | ./train/id_861_y_0.jpg;0 568 | ./train/id_504_y_0.jpg;0 569 | ./train/id_831_y_0.jpg;0 570 | ./train/id_243_y_0.jpg;0 571 | ./train/id_566_y_0.jpg;0 572 | ./train/id_562_y_0.jpg;0 573 | ./train/id_686_y_0.jpg;0 574 | ./train/id_189_y_0.jpg;0 575 | ./train/id_782_y_0.jpg;0 576 | ./train/id_699_y_0.jpg;0 577 | ./train/id_475_y_0.jpg;0 578 | ./train/id_681_y_0.jpg;0 579 | ./train/id_510_y_0.jpg;0 580 | ./train/id_58_y_1.jpg;1 581 | ./train/id_474_y_0.jpg;0 582 | ./train/id_560_y_0.jpg;0 583 | ./train/id_747_y_0.jpg;0 584 | ./train/id_252_y_0.jpg;0 585 | ./train/id_21_y_1.jpg;1 586 | ./train/id_313_y_0.jpg;0 587 | ./train/id_459_y_0.jpg;0 588 | ./train/id_160_y_0.jpg;0 589 | ./train/id_276_y_0.jpg;0 590 | ./train/id_191_y_0.jpg;0 591 | ./train/id_385_y_0.jpg;0 592 | ./train/id_805_y_0.jpg;0 593 | ./train/id_413_y_0.jpg;0 594 | ./train/id_491_y_0.jpg;0 595 | ./train/id_343_y_0.jpg;0 596 | ./train/id_769_y_0.jpg;0 597 | ./train/id_308_y_0.jpg;0 598 | ./train/id_661_y_0.jpg;0 599 | ./train/id_130_y_0.jpg;0 600 | ./train/id_663_y_0.jpg;0 601 | ./train/id_99_y_0.jpg;0 602 | ./train/id_372_y_0.jpg;0 603 | ./train/id_87_y_0.jpg;0 604 | ./train/id_458_y_0.jpg;0 605 | ./train/id_330_y_0.jpg;0 606 | ./train/id_214_y_0.jpg;0 607 | ./train/id_466_y_0.jpg;0 608 | ./train/id_121_y_0.jpg;0 609 | ./train/id_614_y_0.jpg;0 610 | ./train/id_20_y_1.jpg;1 611 | ./train/id_700_y_0.jpg;0 612 | ./train/id_71_y_1.jpg;1 613 | ./train/id_106_y_0.jpg;0 614 | ./train/id_270_y_0.jpg;0 615 | ./train/id_860_y_0.jpg;0 616 | ./train/id_435_y_0.jpg;0 617 | ./train/id_102_y_0.jpg;0 618 | -------------------------------------------------------------------------------- /common_blocks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Diyago/Graph-clasification-by-computer-vision/703c44b98f9875d7a7b6db1c2b96372e11e256d6/common_blocks/__init__.py -------------------------------------------------------------------------------- /common_blocks/datasets.py: -------------------------------------------------------------------------------- 1 | import skimage.io 2 | from torch.utils.data import Dataset 3 | 4 | 5 | class TrainDataset(Dataset): 6 | def __init__(self, df, config, transform=None): 7 | self.df = df 8 | self.labels = df[config["target_col"]] 9 | self.transform = transform 10 | self.config = config 11 | 12 | def __len__(self): 13 | return len(self.df) 14 | 15 | def __getitem__(self, idx): 16 | file_name = self.df["image_filename"].values[idx] 17 | file_path = "{}/{}".format(self.config["images_path"], file_name) 18 | image = skimage.io.MultiImage(file_path)[-1] 19 | 20 | if self.transform: 21 | augmented = self.transform(image=image) 22 | image = augmented["image"] 23 | return image, float(self.labels[idx]) 24 | 25 | 26 | class TestDataset(Dataset): 27 | 28 | def __init__(self, df, config, transform=None): 29 | self.df = df 30 | self.transform = transform 31 | self.config = config 32 | 33 | def __len__(self): 34 | return len(self.df) 35 | 36 | def __getitem__(self, idx): 37 | file_name = self.df["image_filename"].values[idx] 38 | file_path = "{}/{}".format(self.config["images_path"], file_name) 39 | image = skimage.io.MultiImage(file_path)[-1] 40 | 41 | if self.transform: 42 | augmented = self.transform(image=image) 43 | image = augmented["image"] 44 | 45 | return file_path, image 46 | -------------------------------------------------------------------------------- /common_blocks/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from contextlib import contextmanager 4 | from logging import getLogger, DEBUG, FileHandler, Formatter, StreamHandler 5 | 6 | 7 | @contextmanager 8 | def timer(name): 9 | t0 = time.time() 10 | LOGGER.info(f"[{name}] start") 11 | yield 12 | LOGGER.info(f"[{name}] done in {time.time() - t0:.0f} s.") 13 | 14 | 15 | def init_logger(log_file="train.log"): 16 | if not os.path.exists(log_file): 17 | with open(log_file, "w"): 18 | pass 19 | log_format = "%(asctime)s %(levelname)s %(message)s" 20 | 21 | stream_handler = StreamHandler() 22 | stream_handler.setLevel(DEBUG) 23 | stream_handler.setFormatter(Formatter(log_format)) 24 | 25 | file_handler = FileHandler(log_file) 26 | file_handler.setFormatter(Formatter(log_format)) 27 | 28 | logger = getLogger("PANDA") 29 | logger.setLevel(DEBUG) 30 | logger.addHandler(stream_handler) 31 | logger.addHandler(file_handler) 32 | 33 | return logger 34 | -------------------------------------------------------------------------------- /common_blocks/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class LabelSmoothingCrossEntropy(nn.Module): 7 | """ 8 | NLL loss with label smoothing. 9 | """ 10 | 11 | def __init__(self, smoothing=0.1): 12 | """ 13 | Constructor for the LabelSmoothing module. 14 | :param smoothing: label smoothing factor 15 | """ 16 | super(LabelSmoothingCrossEntropy, self).__init__() 17 | assert smoothing < 1.0 18 | self.smoothing = smoothing 19 | self.confidence = 1.0 - smoothing 20 | 21 | def forward(self, x, target): 22 | logprobs = F.log_softmax(x, dim=-1) 23 | nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1)) 24 | nll_loss = nll_loss.squeeze(1) 25 | smooth_loss = -logprobs.mean(dim=-1) 26 | loss = self.confidence * nll_loss + self.smoothing * smooth_loss 27 | return loss.mean() 28 | 29 | 30 | class LabelSmoothingCrossEntropyBCE(nn.Module): 31 | def __init__(self, smoothing=0.1): 32 | super(LabelSmoothingCrossEntropyBCE, self).__init__() 33 | assert smoothing < 1.0 34 | self.smoothing = smoothing 35 | self.confidence = 1. - smoothing 36 | 37 | def forward(self, x, target): 38 | target = target.float() * (self.confidence) + 0.5 * self.smoothing 39 | return F.binary_cross_entropy_with_logits(x, target.type_as(x)) 40 | 41 | 42 | class SoftTargetCrossEntropy(nn.Module): 43 | def __init__(self): 44 | super(SoftTargetCrossEntropy, self).__init__() 45 | 46 | def forward(self, x, target): 47 | loss = torch.sum(-target * F.log_softmax(x, dim=-1), dim=-1) 48 | 49 | 50 | class FocalLoss(nn.Module): 51 | def __init__(self, alpha=1, gamma=2, logits=False, reduce=True): 52 | super(FocalLoss, self).__init__() 53 | self.alpha = alpha 54 | self.gamma = gamma 55 | self.logits = logits 56 | self.reduce = reduce 57 | 58 | def forward(self, inputs, targets): 59 | if self.logits: 60 | BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduce=False) 61 | else: 62 | BCE_loss = F.binary_cross_entropy(inputs, targets, reduce=False) 63 | pt = torch.exp(-BCE_loss) 64 | F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss 65 | 66 | if self.reduce: 67 | return torch.mean(F_loss) 68 | else: 69 | return F_loss 70 | 71 | 72 | class F1_Loss(nn.Module): 73 | '''Calculate F1 score. Can work with gpu tensors 74 | 75 | The original implmentation is written by Michal Haltuf on Kaggle. 76 | 77 | Returns 78 | ------- 79 | torch.Tensor 80 | `ndim` == 1. epsilon <= val <= 1 81 | 82 | Reference 83 | --------- 84 | - https://www.kaggle.com/rejpalcz/best-loss-function-for-f1-score-metric 85 | - https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html#sklearn.metrics.f1_score 86 | - https://discuss.pytorch.org/t/calculating-precision-recall-and-f1-score-in-case-of-multi-label-classification/28265/6 87 | - http://www.ryanzhang.info/python/writing-your-own-loss-function-module-for-pytorch/ 88 | ''' 89 | 90 | def __init__(self, epsilon=1e-7): 91 | super().__init__() 92 | self.epsilon = epsilon 93 | 94 | def forward(self, y_pred, y_true, ): 95 | assert y_pred.ndim == 2 96 | assert y_true.ndim == 1 97 | y_true = F.one_hot(y_true, 2).to(torch.float32) 98 | y_pred = F.softmax(y_pred, dim=1) 99 | 100 | tp = (y_true * y_pred).sum(dim=0).to(torch.float32) 101 | tn = ((1 - y_true) * (1 - y_pred)).sum(dim=0).to(torch.float32) 102 | fp = ((1 - y_true) * y_pred).sum(dim=0).to(torch.float32) 103 | fn = (y_true * (1 - y_pred)).sum(dim=0).to(torch.float32) 104 | 105 | precision = tp / (tp + fp + self.epsilon) 106 | recall = tp / (tp + fn + self.epsilon) 107 | 108 | f1 = 2 * (precision * recall) / (precision + recall + self.epsilon) 109 | f1 = f1.clamp(min=self.epsilon, max=1 - self.epsilon) 110 | return 1 - f1.mean() 111 | 112 | -------------------------------------------------------------------------------- /common_blocks/optimizers.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from torch.optim.optimizer import Optimizer, required 4 | 5 | 6 | class RAdam(Optimizer): 7 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0): 8 | defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay) 9 | self.buffer = [[None, None, None] for ind in range(10)] 10 | super(RAdam, self).__init__(params, defaults) 11 | 12 | def __setstate__(self, state): 13 | super(RAdam, self).__setstate__(state) 14 | 15 | def step(self, closure=None): 16 | 17 | loss = None 18 | if closure is not None: 19 | loss = closure() 20 | 21 | for group in self.param_groups: 22 | 23 | for p in group["params"]: 24 | if p.grad is None: 25 | continue 26 | grad = p.grad.data.float() 27 | if grad.is_sparse: 28 | raise RuntimeError("RAdam does not support sparse gradients") 29 | 30 | p_data_fp32 = p.data.float() 31 | 32 | state = self.state[p] 33 | 34 | if len(state) == 0: 35 | state["step"] = 0 36 | state["exp_avg"] = torch.zeros_like(p_data_fp32) 37 | state["exp_avg_sq"] = torch.zeros_like(p_data_fp32) 38 | else: 39 | state["exp_avg"] = state["exp_avg"].type_as(p_data_fp32) 40 | state["exp_avg_sq"] = state["exp_avg_sq"].type_as(p_data_fp32) 41 | 42 | exp_avg, exp_avg_sq = state["exp_avg"], state["exp_avg_sq"] 43 | beta1, beta2 = group["betas"] 44 | 45 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 46 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 47 | 48 | state["step"] += 1 49 | buffered = self.buffer[int(state["step"] % 10)] 50 | if state["step"] == buffered[0]: 51 | N_sma, step_size = buffered[1], buffered[2] 52 | else: 53 | buffered[0] = state["step"] 54 | beta2_t = beta2 ** state["step"] 55 | N_sma_max = 2 / (1 - beta2) - 1 56 | N_sma = N_sma_max - 2 * state["step"] * beta2_t / (1 - beta2_t) 57 | buffered[1] = N_sma 58 | 59 | # more conservative since it's an approximated value 60 | if N_sma >= 5: 61 | step_size = ( 62 | group["lr"] 63 | * math.sqrt( 64 | (1 - beta2_t) 65 | * (N_sma - 4) 66 | / (N_sma_max - 4) 67 | * (N_sma - 2) 68 | / N_sma 69 | * N_sma_max 70 | / (N_sma_max - 2) 71 | ) 72 | / (1 - beta1 ** state["step"]) 73 | ) 74 | else: 75 | step_size = group["lr"] / (1 - beta1 ** state["step"]) 76 | buffered[2] = step_size 77 | 78 | if group["weight_decay"] != 0: 79 | p_data_fp32.add_(-group["weight_decay"] * group["lr"], p_data_fp32) 80 | 81 | # more conservative since it's an approximated value 82 | if N_sma >= 5: 83 | denom = exp_avg_sq.sqrt().add_(group["eps"]) 84 | p_data_fp32.addcdiv_(-step_size, exp_avg, denom) 85 | else: 86 | p_data_fp32.add_(-step_size, exp_avg) 87 | 88 | p.data.copy_(p_data_fp32) 89 | 90 | return loss 91 | 92 | 93 | class PlainRAdam(Optimizer): 94 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0): 95 | defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay) 96 | 97 | super(PlainRAdam, self).__init__(params, defaults) 98 | 99 | def __setstate__(self, state): 100 | super(PlainRAdam, self).__setstate__(state) 101 | 102 | def step(self, closure=None): 103 | 104 | loss = None 105 | if closure is not None: 106 | loss = closure() 107 | 108 | for group in self.param_groups: 109 | 110 | for p in group["params"]: 111 | if p.grad is None: 112 | continue 113 | grad = p.grad.data.float() 114 | if grad.is_sparse: 115 | raise RuntimeError("RAdam does not support sparse gradients") 116 | 117 | p_data_fp32 = p.data.float() 118 | 119 | state = self.state[p] 120 | 121 | if len(state) == 0: 122 | state["step"] = 0 123 | state["exp_avg"] = torch.zeros_like(p_data_fp32) 124 | state["exp_avg_sq"] = torch.zeros_like(p_data_fp32) 125 | else: 126 | state["exp_avg"] = state["exp_avg"].type_as(p_data_fp32) 127 | state["exp_avg_sq"] = state["exp_avg_sq"].type_as(p_data_fp32) 128 | 129 | exp_avg, exp_avg_sq = state["exp_avg"], state["exp_avg_sq"] 130 | beta1, beta2 = group["betas"] 131 | 132 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 133 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 134 | 135 | state["step"] += 1 136 | beta2_t = beta2 ** state["step"] 137 | N_sma_max = 2 / (1 - beta2) - 1 138 | N_sma = N_sma_max - 2 * state["step"] * beta2_t / (1 - beta2_t) 139 | 140 | if group["weight_decay"] != 0: 141 | p_data_fp32.add_(-group["weight_decay"] * group["lr"], p_data_fp32) 142 | 143 | # more conservative since it's an approximated value 144 | if N_sma >= 5: 145 | step_size = ( 146 | group["lr"] 147 | * math.sqrt( 148 | (1 - beta2_t) 149 | * (N_sma - 4) 150 | / (N_sma_max - 4) 151 | * (N_sma - 2) 152 | / N_sma 153 | * N_sma_max 154 | / (N_sma_max - 2) 155 | ) 156 | / (1 - beta1 ** state["step"]) 157 | ) 158 | denom = exp_avg_sq.sqrt().add_(group["eps"]) 159 | p_data_fp32.addcdiv_(-step_size, exp_avg, denom) 160 | else: 161 | step_size = group["lr"] / (1 - beta1 ** state["step"]) 162 | p_data_fp32.add_(-step_size, exp_avg) 163 | 164 | p.data.copy_(p_data_fp32) 165 | 166 | return loss 167 | -------------------------------------------------------------------------------- /common_blocks/scheduler.py: -------------------------------------------------------------------------------- 1 | import math 2 | from bisect import bisect_right 3 | from collections.abc import Iterable 4 | from math import log, cos, pi, floor 5 | 6 | from torch.optim.lr_scheduler import _LRScheduler 7 | from torch.optim.optimizer import Optimizer 8 | 9 | 10 | class CyclicCosineDecayLR(_LRScheduler): 11 | def __init__(self, 12 | optimizer, 13 | init_interval, 14 | min_lr, 15 | restart_multiplier=None, 16 | restart_interval=None, 17 | restart_lr=None, 18 | last_epoch=-1): 19 | """ 20 | Initialize new CyclicCosineDecayLR object 21 | :param optimizer: (Optimizer) - Wrapped optimizer. 22 | :param init_interval: (int) - Initial decay cycle interval. 23 | :param min_lr: (float or iterable of floats) - Minimal learning rate. 24 | :param restart_multiplier: (float) - Multiplication coefficient for increasing cycle intervals, 25 | if this parameter is set, restart_interval must be None. 26 | :param restart_interval: (int) - Restart interval for fixed cycle intervals, 27 | if this parameter is set, restart_multiplier must be None. 28 | :param restart_lr: (float or iterable of floats) - Optional, the learning rate at cycle restarts, 29 | if not provided, initial learning rate will be used. 30 | :param last_epoch: (int) - Last epoch. 31 | """ 32 | 33 | if restart_interval is not None and restart_multiplier is not None: 34 | raise ValueError("You can either set restart_interval or restart_multiplier but not both") 35 | 36 | if isinstance(min_lr, Iterable) and len(min_lr) != len(optimizer.param_groups): 37 | raise ValueError("Expected len(min_lr) to be equal to len(optimizer.param_groups), " 38 | "got {} and {} instead".format(len(min_lr), len(optimizer.param_groups))) 39 | 40 | if isinstance(restart_lr, Iterable) and len(restart_lr) != len(optimizer.param_groups): 41 | raise ValueError("Expected len(restart_lr) to be equal to len(optimizer.param_groups), " 42 | "got {} and {} instead".format(len(restart_lr), len(optimizer.param_groups))) 43 | 44 | if init_interval <= 0: 45 | raise ValueError("init_interval must be a positive number, got {} instead".format(init_interval)) 46 | 47 | group_num = len(optimizer.param_groups) 48 | self._init_interval = init_interval 49 | self._min_lr = [min_lr] * group_num if isinstance(min_lr, float) else min_lr 50 | self._restart_lr = [restart_lr] * group_num if isinstance(restart_lr, float) else restart_lr 51 | self._restart_interval = restart_interval 52 | self._restart_multiplier = restart_multiplier 53 | super(CyclicCosineDecayLR, self).__init__(optimizer, last_epoch) 54 | 55 | def get_lr(self): 56 | if self.last_epoch < self._init_interval: 57 | return self._calc(self.last_epoch, 58 | self._init_interval, 59 | self.base_lrs) 60 | 61 | elif self._restart_interval is not None: 62 | cycle_epoch = (self.last_epoch - self._init_interval) % self._restart_interval 63 | lrs = self.base_lrs if self._restart_lr is None else self._restart_lr 64 | return self._calc(cycle_epoch, 65 | self._restart_interval, 66 | lrs) 67 | 68 | elif self._restart_multiplier is not None: 69 | n = self._get_n(self.last_epoch) 70 | sn_prev = self._partial_sum(n) 71 | cycle_epoch = self.last_epoch - sn_prev 72 | interval = self._init_interval * self._restart_multiplier ** n 73 | lrs = self.base_lrs if self._restart_lr is None else self._restart_lr 74 | return self._calc(cycle_epoch, 75 | interval, 76 | lrs) 77 | else: 78 | return self._min_lr 79 | 80 | def _calc(self, t, T, lrs): 81 | return [min_lr + (lr - min_lr) * (1 + cos(pi * t / T)) / 2 82 | for lr, min_lr in zip(lrs, self._min_lr)] 83 | 84 | def _get_n(self, epoch): 85 | a = self._init_interval 86 | r = self._restart_multiplier 87 | _t = 1 - (1 - r) * epoch / a 88 | return floor(log(_t, r)) 89 | 90 | def _partial_sum(self, n): 91 | a = self._init_interval 92 | r = self._restart_multiplier 93 | return a * (1 - r ** n) / (1 - r) 94 | 95 | 96 | class CyclicCosAnnealingLR(_LRScheduler): 97 | r""" 98 | Implements reset on milestones inspired from CosineAnnealingLR pytorch 99 | 100 | Set the learning rate of each parameter group using a cosine annealing 101 | schedule, where :math:`\eta_{max}` is set to the initial lr and 102 | :math:`T_{cur}` is the number of epochs since the last restart in SGDR: 103 | .. math:: 104 | \eta_t = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})(1 + 105 | \cos(\frac{T_{cur}}{T_{max}}\pi)) 106 | When last_epoch > last set milestone, lr is automatically set to \eta_{min} 107 | It has been proposed in 108 | `SGDR: Stochastic Gradient Descent with Warm Restarts`_. Note that this only 109 | implements the cosine annealing part of SGDR, and not the restarts. 110 | Args: 111 | optimizer (Optimizer): Wrapped optimizer. 112 | milestones (list of ints): List of epoch indices. Must be increasing. 113 | eta_min (float): Minimum learning rate. Default: 0. 114 | last_epoch (int): The index of last epoch. Default: -1. 115 | .. _SGDR\: Stochastic Gradient Descent with Warm Restarts: 116 | https://arxiv.org/abs/1608.03983 117 | """ 118 | 119 | def __init__(self, optimizer, milestones, eta_min=0, last_epoch=-1): 120 | if not list(milestones) == sorted(milestones): 121 | raise ValueError('Milestones should be a list of' 122 | ' increasing integers. Got {}', milestones) 123 | self.eta_min = eta_min 124 | self.milestones = milestones 125 | super(CyclicCosAnnealingLR, self).__init__(optimizer, last_epoch) 126 | 127 | def get_lr(self): 128 | 129 | if self.last_epoch >= self.milestones[-1]: 130 | return [self.eta_min for base_lr in self.base_lrs] 131 | 132 | idx = bisect_right(self.milestones, self.last_epoch) 133 | 134 | left_barrier = 0 if idx == 0 else self.milestones[idx - 1] 135 | right_barrier = self.milestones[idx] 136 | 137 | width = right_barrier - left_barrier 138 | curr_pos = self.last_epoch - left_barrier 139 | 140 | return [self.eta_min + (base_lr - self.eta_min) * 141 | (1 + math.cos(math.pi * curr_pos / width)) / 2 142 | for base_lr in self.base_lrs] 143 | 144 | 145 | class CyclicLinearLR(_LRScheduler): 146 | r""" 147 | Implements reset on milestones inspired from Linear learning rate decay 148 | 149 | Set the learning rate of each parameter group using a linear decay 150 | schedule, where :math:`\eta_{max}` is set to the initial lr and 151 | :math:`T_{cur}` is the number of epochs since the last restart: 152 | .. math:: 153 | \eta_t = \eta_{min} + (\eta_{max} - \eta_{min})(1 -\frac{T_{cur}}{T_{max}}) 154 | When last_epoch > last set milestone, lr is automatically set to \eta_{min} 155 | 156 | Args: 157 | optimizer (Optimizer): Wrapped optimizer. 158 | milestones (list of ints): List of epoch indices. Must be increasing. 159 | eta_min (float): Minimum learning rate. Default: 0. 160 | last_epoch (int): The index of last epoch. Default: -1. 161 | .. _SGDR\: Stochastic Gradient Descent with Warm Restarts: 162 | https://arxiv.org/abs/1608.03983 163 | """ 164 | 165 | def __init__(self, optimizer, milestones, eta_min=0, last_epoch=-1): 166 | if not list(milestones) == sorted(milestones): 167 | raise ValueError('Milestones should be a list of' 168 | ' increasing integers. Got {}', milestones) 169 | self.eta_min = eta_min 170 | self.milestones = milestones 171 | super(CyclicLinearLR, self).__init__(optimizer, last_epoch) 172 | 173 | def get_lr(self): 174 | 175 | if self.last_epoch >= self.milestones[-1]: 176 | return [self.eta_min for base_lr in self.base_lrs] 177 | 178 | idx = bisect_right(self.milestones, self.last_epoch) 179 | 180 | left_barrier = 0 if idx == 0 else self.milestones[idx - 1] 181 | right_barrier = self.milestones[idx] 182 | 183 | width = right_barrier - left_barrier 184 | curr_pos = self.last_epoch - left_barrier 185 | 186 | return [self.eta_min + (base_lr - self.eta_min) * 187 | (1. - 1.0 * curr_pos / width) 188 | for base_lr in self.base_lrs] 189 | -------------------------------------------------------------------------------- /common_blocks/transforms.py: -------------------------------------------------------------------------------- 1 | from albumentations import Compose, Normalize, HorizontalFlip, VerticalFlip 2 | from albumentations.augmentations import transforms 3 | from albumentations.pytorch import ToTensorV2 4 | 5 | 6 | def get_transforms(*, data, width, height): 7 | assert data in ("train", "valid") 8 | assert width % 32 == 0 9 | assert height % 32 == 0 10 | 11 | if data == "train": 12 | return Compose( 13 | [ 14 | HorizontalFlip(p=0.5), 15 | VerticalFlip(p=0.5), 16 | transforms.ShiftScaleRotate( 17 | scale_limit=0.1, shift_limit=0.0625, rotate_limit=10, p=0.2 18 | ), 19 | transforms.RandomBrightnessContrast( 20 | brightness_limit=0.1, contrast_limit=0.1, p=0.25 21 | ), 22 | transforms.Resize(width, height, always_apply=True), 23 | Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 24 | ToTensorV2(), 25 | ] 26 | ) 27 | 28 | elif data == "valid": 29 | return Compose( 30 | [transforms.Resize(width, height, always_apply=True), 31 | Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 32 | ToTensorV2(), 33 | ] 34 | ) 35 | -------------------------------------------------------------------------------- /common_blocks/utils.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import ntpath 3 | import os 4 | import os.path 5 | import random 6 | from functools import partial 7 | from typing import Any 8 | 9 | import matplotlib.pyplot as plt 10 | import numpy as np 11 | import pandas as pd 12 | import torch 13 | from sklearn.metrics import cohen_kappa_score 14 | from sklearn.model_selection import KFold 15 | 16 | 17 | def seed_torch(seed=42): 18 | random.seed(seed) 19 | os.environ["PYTHONHASHSEED"] = str(seed) 20 | np.random.seed(seed) 21 | torch.manual_seed(seed) 22 | torch.cuda.manual_seed(seed) 23 | torch.backends.cudnn.deterministic = True 24 | 25 | 26 | def qwk_metric(y_pred, y, detach=True): 27 | y_pred = torch.round(y_pred).to("cpu").numpy().argmax(1) 28 | y = y.to("cpu").numpy() 29 | # k = torch.tensor(cohen_kappa_score(torch.round(y_pred), y, weights='quadratic'), device='cuda:0') 30 | # k[k != k] = 0 31 | # k[torch.isinf(k)] = 0 32 | # return k 33 | return cohen_kappa_score(y_pred, y) 34 | 35 | 36 | def create_folds(configs): 37 | folds = pd.read_csv(configs["train_csv"], sep=';') 38 | train_labels = folds[configs["target_col"]].values 39 | kf = KFold(n_splits=configs["nfolds"]) 40 | 41 | for fold, (train_index, val_index) in enumerate(kf.split(folds.values, train_labels)): 42 | folds.loc[val_index, "fold"] = int(fold) 43 | folds["fold"] = folds["fold"].astype(int) 44 | folds.to_csv(configs["folds_path"], index=None) 45 | return folds 46 | 47 | 48 | class OptimizedRounder(object): 49 | """ 50 | Usage: 51 | 52 | opt = OptimizedRounder() 53 | preds,y = learn.get_preds(DatasetType.Test) 54 | tst_pred = opt.predict(preds, coefficients) 55 | test_df.diagnosis = tst_pred.astype(int) 56 | test_df.to_csv('submission.csv',index=False) 57 | print ('done') 58 | 59 | 60 | """ 61 | 62 | def __init__(self): 63 | self.coef_ = 0 64 | 65 | def _kappa_loss(self, coef, X, y): 66 | X_p = np.copy(X) 67 | for i, pred in enumerate(X_p): 68 | if pred < coef[0]: 69 | X_p[i] = 0 70 | elif coef[0] <= pred < coef[1]: 71 | X_p[i] = 1 72 | elif coef[1] <= pred < coef[2]: 73 | X_p[i] = 2 74 | elif coef[2] <= pred < coef[3]: 75 | X_p[i] = 3 76 | else: 77 | X_p[i] = 4 78 | 79 | ll = cohen_kappa_score(y, X_p, weights="quadratic") 80 | return -ll 81 | 82 | def fit(self, X, y): 83 | loss_partial = partial(self._kappa_loss, X=X, y=y) 84 | initial_coef = [0.5, 1.5, 2.5, 3.5] 85 | self.coef_ = sp.optimize.minimize( 86 | loss_partial, initial_coef, method="nelder-mead" 87 | ) 88 | print(-loss_partial(self.coef_["x"])) 89 | 90 | def predict(self, X, coef): 91 | X_p = np.copy(X) 92 | for i, pred in enumerate(X_p): 93 | if pred < coef[0]: 94 | X_p[i] = 0 95 | elif pred >= coef[0] and pred < coef[1]: 96 | X_p[i] = 1 97 | elif pred >= coef[1] and pred < coef[2]: 98 | X_p[i] = 2 99 | elif pred >= coef[2] and pred < coef[3]: 100 | X_p[i] = 3 101 | else: 102 | X_p[i] = 4 103 | return X_p 104 | 105 | def coefficients(self): 106 | return self.coef_["x"] 107 | 108 | 109 | def convert_model(model, full_checkpoint_path, output_path): 110 | checkpoint = torch.load( 111 | full_checkpoint_path, map_location=lambda storage, loc: storage 112 | ) 113 | 114 | sanitized_dict = {} 115 | for k, v in checkpoint["state_dict"].items(): 116 | sanitized_dict[k.replace("model.model", "model")] = v 117 | # sanitized_dict[k.replace("model.", "")] = v 118 | 119 | sample = torch.rand(1, 3, 256, 256, dtype=torch.float32) 120 | model.load_state_dict(sanitized_dict) 121 | scripted_model = torch.jit.trace(model, sample) 122 | filename = ntpath.basename(full_checkpoint_path).replace("=", "") 123 | os.makedirs(output_path, exist_ok=True) 124 | scripted_model.save(f"{output_path}/{filename}.pth") 125 | 126 | 127 | def load_obj(obj_path: str, default_obj_path: str = "") -> Any: 128 | """Extract an object from a given path. 129 | Args: 130 | obj_path: Path to an object to be extracted, including the object name. 131 | default_obj_path: Default object path. 132 | Returns: 133 | Extracted object. 134 | Raises: 135 | AttributeError: When the object does not have the given named attribute. 136 | """ 137 | obj_path_list = obj_path.rsplit(".", 1) 138 | obj_path = obj_path_list.pop(0) if len(obj_path_list) > 1 else default_obj_path 139 | obj_name = obj_path_list[0] 140 | module_obj = importlib.import_module(obj_path) 141 | if not hasattr(module_obj, obj_name): 142 | raise AttributeError(f"Object `{obj_name}` cannot be loaded from `{obj_path}`.") 143 | return getattr(module_obj, obj_name) 144 | 145 | 146 | def plot_prec_recall_vs_tresh(precisions, recalls, thresholds): 147 | plt.plot(thresholds, precisions[:-1], 'b--', label='precision') 148 | plt.plot(thresholds, recalls[:-1], 'g--', label='recall') 149 | plt.xlabel('Threshold') 150 | plt.legend(loc='upper left') 151 | plt.ylim([0, 1]) 152 | -------------------------------------------------------------------------------- /config/config_classification.yml: -------------------------------------------------------------------------------- 1 | Train: 2 | Dataset: 3 | images_path: ./ 4 | target_height: 320 5 | target_width: 320 6 | target_col: label 7 | loader: 8 | batch_size: 16 9 | shuffle: True 10 | num_workers: 4 11 | Val: 12 | Dataset: 13 | images_path: ./ 14 | target_height: 320 15 | target_width: 320 16 | target_col: label 17 | loader: 18 | batch_size: 20 19 | shuffle: False 20 | num_workers: 4 21 | 22 | logger_path: 23 | main_logger: ./lightning_logs/main_logs.txt 24 | lightning_logger: ./lightning_logs 25 | 26 | model_params: 27 | num_outputs: 1 28 | model_name: efficientnet_b2b 29 | pretrained: True 30 | freeze_bn: False 31 | dropout_p: 0.5 32 | show_model_summary: False 33 | 34 | training: 35 | batch_size: 16 36 | loss: FocalLoss #LabelSmoothingCrossEntropyBCE #BCELoss FocalLoss 37 | warmup_steps: 300 38 | optimizer: 39 | name: Adam 40 | kwargs: 41 | lr: 1e-4 42 | early_stop_callback: 43 | monitor: acc_metric 44 | mode: max 45 | patience: 4 46 | verbose: True 47 | ModelCheckpoint: 48 | path: /{epoch:02d}-{avg_val_metric:.4f} 49 | kwargs: 50 | monitor: avg_val_metric 51 | mode: max 52 | scheduler: 53 | ReduceLROnPlateau: 54 | factor: 0.5 55 | patience: 2 56 | verbose: True 57 | mode: max 58 | kwargs: 59 | monitor: avg_val_metric # Default: val_loss 60 | interval: epoch 61 | frequency: 1 62 | Trainer: 63 | show_progress_bar: False 64 | max_epochs: 20 65 | min_epochs: 3 66 | precision: 32 67 | fast_dev_run: False 68 | accumulate_grad_batches: 1 69 | gpus: 1 70 | train_percent_check: 1 71 | val_percent_check: 1 72 | num_nodes: 1 73 | auto_lr_find: False # bugs 74 | 75 | 76 | validation: 77 | train_csv: all_train.csv 78 | test_csv: None 79 | folds_path: ./lightning_logs/folds.csv 80 | target_col: label 81 | nfolds: 4 82 | location_col: 83 | seed: 42 84 | batch_size: 16 85 | 86 | total_seed: 42 87 | 88 | test_inference: 89 | train_csv: all_test.csv 90 | target_col: label 91 | nfolds: 2 92 | folds_path: ./lightning_logs/all_train.csv 93 | models_path: './lightning_logs/models' 94 | TTA: 95 | - null 96 | - flip_lr 97 | - flip_ud 98 | Dataset: 99 | images_path: ./ 100 | target_height: 320 101 | target_width: 320 102 | loader: 103 | batch_size: 16 104 | shuffle: False 105 | num_workers: 8 106 | threshold: 0.485 107 | -------------------------------------------------------------------------------- /fit_predict_graph.py: -------------------------------------------------------------------------------- 1 | from torch_geometric.nn import GCNConv, JumpingKnowledge, global_add_pool 2 | from torch.nn import functional as F 3 | from torch_geometric import transforms 4 | from skorch import NeuralNetClassifier 5 | import torch 6 | from torch_geometric.data import Batch 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.metrics import roc_auc_score, average_precision_score 9 | from torch_geometric.data import InMemoryDataset, download_url 10 | from rdkit import Chem 11 | import pandas as pd 12 | import torch 13 | 14 | 15 | class COVID(InMemoryDataset): 16 | url = 'https://github.com/yangkevin2/coronavirus_data/raw/master/data/mpro_xchem.csv' 17 | 18 | def __init__(self, root, transform=None, pre_transform=None, pre_filter=None): 19 | super(COVID, self).__init__(root, transform, pre_transform, pre_filter) 20 | # Load processed data 21 | self.data, self.slices = torch.load(self.processed_paths[0]) 22 | 23 | @property 24 | def raw_file_names(self): 25 | return ['mpro_xchem.csv'] 26 | 27 | @property 28 | def processed_file_names(self): 29 | return ['data.pt'] 30 | 31 | def download(self): 32 | download_url(self.url, self.raw_dir) 33 | 34 | def process(self): 35 | df = pd.read_csv(self.raw_paths[0]) 36 | data_list = [] 37 | for smiles, label in df.itertuples(False, None): 38 | mol = Chem.MolFromSmiles(smiles) # Read the molecule info 39 | adj = Chem.GetAdjacencyMatrix(mol) # Get molecule structure 40 | # You should extract other features here! 41 | data = Data(num_nodes=adj.shape[0], 42 | edge_index=torch.Tensor(adj).nonzero().T, y=label) 43 | data_list.append(data) 44 | self.data, self.slices = self.collate(data_list) 45 | torch.save((self.data, self.slices), self.processed_paths[0]) 46 | 47 | 48 | class SimpleGNN(torch.nn.Module): 49 | def __init__(self, dataset, hidden=64, layers=6): 50 | super(SimpleGNN, self).__init__() 51 | self.dataset = dataset 52 | self.convs = torch.nn.ModuleList() 53 | self.convs.append(GCNConv(in_channels=dataset.num_node_features, 54 | out_channels=hidden)) 55 | 56 | for _ in range(1, layers): 57 | self.convs.append(GCNConv(in_channels=hidden, out_channels=hidden)) 58 | 59 | self.jk = JumpingKnowledge(mode="cat") 60 | self.jk_lin = torch.nn.Linear( 61 | in_features=hidden*layers, out_features=hidden) 62 | self.lin_1 = torch.nn.Linear(in_features=hidden, out_features=hidden) 63 | self.lin_2 = torch.nn.Linear( 64 | in_features=hidden, out_features=dataset.num_classes) 65 | 66 | def forward(self, index): 67 | data = Batch.from_data_list(self.dataset[index]) 68 | x = data.x 69 | xs = [] 70 | for conv in self.convs: 71 | x = F.relu(conv(x=x, edge_index=data.edge_index)) 72 | xs.append(x) 73 | 74 | x = self.jk(xs) 75 | x = F.relu(self.jk_lin(x)) 76 | x = global_add_pool(x, batch=data.batch) 77 | x = F.relu(self.lin_1(x)) 78 | x = F.softmax(self.lin_2(x), dim=-1) 79 | return x 80 | 81 | 82 | if __name__ == "__main__": 83 | print("Preprocessing data") 84 | ohd = transforms.OneHotDegree(max_degree=4) 85 | covid = COVID(root='./data/COVID/', transform=ohd) 86 | 87 | X_train, X_test, y_train, y_test = train_test_split( 88 | torch.arange(len(covid)).long(), covid.data.y, test_size=0.3, random_state=42) 89 | 90 | X, y = torch.arange(len(covid)).long(), covid.data.y 91 | net = NeuralNetClassifier( 92 | module=SimpleGNN, 93 | module__dataset=covid, 94 | max_epochs=20, 95 | batch_size=-1, 96 | lr=0.001 97 | ) 98 | print("Starting training") 99 | fit = net.fit(X_train, y_train) 100 | from sklearn.metrics import roc_auc_score 101 | print('AUC TRAIN', roc_auc_score( 102 | y_train, fit.predict_proba(X_train)[:, 0])) 103 | print('AUC TEST', roc_auc_score(y_test, fit.predict_proba(X_test)[:, 0])) 104 | print('MAP TEST', average_precision_score(y_test, fit.predict_proba(X_test)[:, 0])) 105 | -------------------------------------------------------------------------------- /generate_images.py: -------------------------------------------------------------------------------- 1 | from torch_geometric.nn import GCNConv, JumpingKnowledge, global_add_pool 2 | from torch.nn import functional as F 3 | from torch_geometric import transforms 4 | from skorch import NeuralNetClassifier 5 | import torch 6 | from torch_geometric.data import Batch 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.metrics import roc_auc_score 9 | from torch_geometric.data import InMemoryDataset, download_url 10 | from rdkit import Chem 11 | import pandas as pd 12 | import torch 13 | from tqdm import tqdm 14 | from torch_geometric import utils 15 | import matplotlib.pyplot as plt 16 | import networkx as nx 17 | 18 | 19 | class COVID(InMemoryDataset): 20 | url = 'https://github.com/yangkevin2/coronavirus_data/raw/master/data/mpro_xchem.csv' 21 | 22 | def __init__(self, root, transform=None, pre_transform=None, pre_filter=None): 23 | super(COVID, self).__init__(root, transform, pre_transform, pre_filter) 24 | # Load processed data 25 | self.data, self.slices = torch.load(self.processed_paths[0]) 26 | 27 | @property 28 | def raw_file_names(self): 29 | return ['mpro_xchem.csv'] 30 | 31 | @property 32 | def processed_file_names(self): 33 | return ['data.pt'] 34 | 35 | def download(self): 36 | download_url(self.url, self.raw_dir) 37 | 38 | def process(self): 39 | df = pd.read_csv(self.raw_paths[0]) 40 | data_list = [] 41 | for smiles, label in df.itertuples(False, None): 42 | mol = Chem.MolFromSmiles(smiles) # Read the molecule info 43 | adj = Chem.GetAdjacencyMatrix(mol) # Get molecule structure 44 | # You should extract other features here! 45 | data = Data(num_nodes=adj.shape[0], 46 | edge_index=torch.Tensor(adj).nonzero().T, y=label) 47 | data_list.append(data) 48 | self.data, self.slices = self.collate(data_list) 49 | torch.save((self.data, self.slices), self.processed_paths[0]) 50 | 51 | 52 | class SimpleGNN(torch.nn.Module): 53 | def __init__(self, dataset, hidden=64, layers=6): 54 | super(SimpleGNN, self).__init__() 55 | self.dataset = dataset 56 | self.convs = torch.nn.ModuleList() 57 | self.convs.append(GCNConv(in_channels=dataset.num_node_features, 58 | out_channels=hidden)) 59 | 60 | for _ in range(1, layers): 61 | self.convs.append(GCNConv(in_channels=hidden, out_channels=hidden)) 62 | 63 | self.jk = JumpingKnowledge(mode="cat") 64 | self.jk_lin = torch.nn.Linear( 65 | in_features=hidden*layers, out_features=hidden) 66 | self.lin_1 = torch.nn.Linear(in_features=hidden, out_features=hidden) 67 | self.lin_2 = torch.nn.Linear( 68 | in_features=hidden, out_features=dataset.num_classes) 69 | 70 | def forward(self, index): 71 | data = Batch.from_data_list(self.dataset[index]) 72 | x = data.x 73 | xs = [] 74 | for conv in self.convs: 75 | x = F.relu(conv(x=x, edge_index=data.edge_index)) 76 | xs.append(x) 77 | 78 | x = self.jk(xs) 79 | x = F.relu(self.jk_lin(x)) 80 | x = global_add_pool(x, batch=data.batch) 81 | x = F.relu(self.lin_1(x)) 82 | x = F.softmax(self.lin_2(x), dim=-1) 83 | return x 84 | 85 | 86 | if __name__ == "__main__": 87 | print("Preprocessing data") 88 | ohd = transforms.OneHotDegree(max_degree=4) 89 | covid = COVID(root='./data/COVID/', transform=ohd) 90 | 91 | X_train, X_test, y_train, y_test = train_test_split( 92 | torch.arange(len(covid)).long(), covid.data.y, test_size=0.3, random_state=42) 93 | 94 | print("Generating for train images") 95 | for graph in tqdm(X_train): 96 | fig = plt.figure(figsize=(6, 6)) 97 | G = utils.to_networkx(covid[int(graph)]) 98 | a = nx.draw_kamada_kawai(G) 99 | plt.savefig("./train/id_{}_y_{}.jpg".format(int(graph), 100 | covid.data.y[int(graph)]), format="jpg") 101 | 102 | print("Generating for test images") 103 | for graph in tqdm(X_test): 104 | fig = plt.figure(figsize=(6, 6)) 105 | G = utils.to_networkx(covid[int(graph)]) 106 | a = nx.draw_kamada_kawai(G) 107 | plt.savefig("./test/id_{}_y_{}.jpg".format(int(graph), 108 | covid.data.y[int(graph)]), format="jpg") 109 | -------------------------------------------------------------------------------- /inference.py: -------------------------------------------------------------------------------- 1 | import codecs 2 | import os 3 | import warnings 4 | 5 | import numpy as np 6 | import pandas as pd 7 | import torch 8 | import matplotlib.pyplot as plt 9 | from poyo import parse_string 10 | from sklearn import metrics 11 | from torch.utils.data import DataLoader 12 | from tqdm import tqdm 13 | from common_blocks.datasets import TestDataset 14 | from common_blocks.transforms import get_transforms 15 | from common_blocks.utils import create_folds 16 | from common_blocks.utils import plot_prec_recall_vs_tresh 17 | from models.lightningclassifier import LightningClassifier 18 | 19 | with codecs.open("config/config_classification.yml", encoding="utf-8") as ymlfile: 20 | config_yaml = ymlfile.read() 21 | config = parse_string(config_yaml) 22 | 23 | warnings.filterwarnings("ignore", category=RuntimeWarning) 24 | 25 | 26 | def get_tta_preds(net, images, augment=["null"]): 27 | with torch.no_grad(): 28 | net.eval() 29 | if 1: # null 30 | logit = net(images) 31 | probability = torch.sigmoid(logit) 32 | if "flip_lr" in augment: 33 | logit = net(torch.flip(images, dims=[3])) 34 | probability += torch.sigmoid(logit) 35 | if "flip_ud" in augment: 36 | logit = net(torch.flip(images, dims=[2])) 37 | probability += torch.sigmoid(logit) 38 | probability = probability / len(augment) 39 | return probability.data.cpu().numpy() 40 | 41 | 42 | def get_all_models(path): 43 | all_models = [] 44 | 45 | for model_path in os.listdir(path): 46 | model = LightningClassifier(config) 47 | checkpoint = torch.load( 48 | os.path.join(path, model_path), map_location=lambda storage, loc: storage 49 | ) 50 | model.load_state_dict(checkpoint["state_dict"]) 51 | model.to(device) 52 | model.freeze() 53 | all_models.append(model) 54 | return all_models 55 | 56 | 57 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 58 | 59 | if __name__ == "__main__": 60 | folds = create_folds(config["test_inference"]) 61 | 62 | dataset = TestDataset( 63 | folds, 64 | config["test_inference"]["Dataset"], 65 | transform=get_transforms(data="valid", width=config["test_inference"]["Dataset"]["target_width"], 66 | height=config["test_inference"]["Dataset"]["target_height"])) 67 | loader = DataLoader(dataset, **config["test_inference"]["loader"]) 68 | all_models = get_all_models(config["test_inference"]["models_path"]) 69 | model_results = {"preds": [], "image_names": [], "image_label": {}} 70 | for fnames, images in tqdm(loader): 71 | images = images.to(device) 72 | batch_preds = None 73 | for model in all_models: 74 | if batch_preds is None: 75 | batch_preds = get_tta_preds(model, images, augment=config["test_inference"]["TTA"]) 76 | else: 77 | batch_preds += get_tta_preds(model, images, augment=config["test_inference"]["TTA"]) 78 | model_results["image_names"].extend(list(fnames)) 79 | model_results["preds"].append(batch_preds) 80 | 81 | model_results['preds'] = np.concatenate(model_results["preds"]).ravel() / len(all_models) 82 | model_results["image_label"] = list((model_results["preds"] > config["test_inference"]["threshold"] 83 | ).astype(int)) 84 | model_results = pd.DataFrame(model_results) 85 | model_results['gt_label'] = folds.label.reset_index(drop=True) 86 | 87 | model_results.to_excel('./lightning_logs/model_preds_train.xlsx', index=True) 88 | print('ROC AUC', round(metrics.roc_auc_score(model_results['gt_label'], model_results['preds']), 3)) 89 | print('Precision', round(model_results[model_results['image_label'] == 1].gt_label.mean(), 3)) 90 | print('Recall', round(metrics.recall_score(model_results['gt_label'], model_results['image_label']), 3)) 91 | print('F1_score', round(metrics.f1_score(model_results['gt_label'], model_results['image_label']), 3)) 92 | print('MAP', round(metrics.average_precision_score(model_results['gt_label'], model_results['preds']), 3)) 93 | 94 | prec, rec, tre = metrics.precision_recall_curve(model_results['gt_label'], model_results['preds']) 95 | 96 | plot_prec_recall_vs_tresh(prec, rec, tre) 97 | plt.show() 98 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Diyago/Graph-clasification-by-computer-vision/703c44b98f9875d7a7b6db1c2b96372e11e256d6/models/__init__.py -------------------------------------------------------------------------------- /models/lightningclassifier.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytorch_lightning as pl 3 | import torch 4 | from pytorch_lightning import _logger as log 5 | from pytorch_lightning.core.memory import ModelSummary 6 | from sklearn.metrics import average_precision_score 7 | from torch import nn 8 | from torch.autograd import Variable 9 | from torch.optim.lr_scheduler import ReduceLROnPlateau 10 | 11 | from common_blocks.losses import LabelSmoothingCrossEntropyBCE, FocalLoss 12 | from models.pretrained_models import get_model_output 13 | 14 | 15 | class LightningClassifier(pl.LightningModule): 16 | def __init__(self, config): 17 | super(LightningClassifier, self).__init__() 18 | self.hparams = config 19 | self.model = get_model_output(**config['model_params']) # CustomSEResNeXt(config['model_params']) 20 | self.val_metrics = [] 21 | 22 | def forward(self, x): 23 | batch_size, channels, width, height = x.size() 24 | return self.model.forward(x) 25 | 26 | def get_loss(self, y_preds, labels): 27 | if self.hparams['training']['loss'] == 'FocalLoss': 28 | loss_func = FocalLoss() 29 | return loss_func(y_preds, labels.float()) 30 | elif self.hparams['training']['loss'] == 'LabelSmoothingCrossEntropyBCE': 31 | loss_func = LabelSmoothingCrossEntropyBCE() 32 | return loss_func(y_preds, labels.float()) 33 | elif self.hparams['training']['loss'] == 'BCELoss': 34 | loss_func = nn.BCELoss() 35 | return loss_func(y_preds, labels.float()) 36 | else: 37 | raise NotImplementedError("This loss {} isn't implemented".format(self.hparams['training']['loss'])) 38 | 39 | def training_step(self, train_batch, batch_idx): 40 | x, y = train_batch 41 | y_preds = self.forward(x) 42 | loss = self.get_loss(torch.flatten(torch.sigmoid(y_preds)), y) 43 | with torch.no_grad(): 44 | preds = torch.flatten((y_preds > 0.5).int().cpu()) 45 | metric = (preds == y.int().cpu()).sum().item() / len(y) 46 | 47 | logs = {'train_loss': loss, 'train_metric': metric} 48 | progress_bar = {'train_metric': metric} 49 | return {'loss': loss, 'metric': metric, 'log': logs, "progress_bar": progress_bar} 50 | 51 | def training_step_mixup(self, train_batch, batch_idx): 52 | x, y = train_batch 53 | x, targets_a, targets_b, lam = mixup_data(x, y, alpha=1) 54 | x, targets_a, targets_b = map(Variable, (x, targets_a, targets_b)) 55 | y_preds = torch.flatten(torch.sigmoid(self.model(x))) 56 | 57 | loss = self.mixup_criterion(y_preds, targets_a.float(), targets_b.float(), lam) 58 | 59 | total = x.size(0) 60 | correct = (lam * y_preds.eq(targets_a.data).cpu().sum().float() 61 | + (1 - lam) * y_preds.eq(targets_b.data).cpu().sum().float()) 62 | logs = {'train_loss': loss, 'train_metric': correct / total} 63 | progress_bar = {'train_metric': correct / total} 64 | return {'loss': loss, 'metric': correct / total, 'log': logs, "progress_bar": progress_bar} 65 | 66 | def training_epoch_end(self, outputs): 67 | 68 | avg_loss_train = torch.stack([x['loss'] for x in outputs]).mean() 69 | avg_metric_train = np.stack([x['metric'] for x in outputs]).mean() 70 | 71 | tensorboard_logs = {'avg_train_loss': avg_loss_train, 'acc_train_metric': avg_metric_train} 72 | print('\ntrain', 'avg_train_metric', avg_metric_train) 73 | return {'avg_train_loss': avg_loss_train, 'avg_train_metric': avg_metric_train, 'log': tensorboard_logs} 74 | 75 | def validation_step(self, val_batch, batch_idx): 76 | x, y = val_batch 77 | with torch.no_grad(): 78 | y_preds = self.forward(x) 79 | 80 | loss = self.get_loss(torch.flatten(torch.sigmoid(y_preds)), y) 81 | preds = torch.flatten((y_preds > 0.5).int().cpu()) 82 | metric = (preds == y.int().cpu()).sum().item() / len(y) 83 | return {'val_loss': loss, 84 | 'pred_label': (y_preds).cpu(), 85 | 'val_metric': metric, 86 | 'label': y.int().cpu()} 87 | 88 | def validation_epoch_end(self, outputs): 89 | avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean() 90 | avg_metric = np.stack([x['val_metric'] for x in outputs]).mean() 91 | 92 | print('\nval', 'avg_val_metric', avg_metric) 93 | all_pred_label = torch.cat([x['pred_label'] for x in outputs]) 94 | all_label = torch.cat([x['label'] for x in outputs]) 95 | try: 96 | avg_metric = average_precision_score(y_score=all_pred_label, y_true=all_label) 97 | except ValueError: 98 | avg_metric = 0.5 99 | print('validation_epoch_end', avg_metric) 100 | self.val_metrics.append(avg_metric) 101 | tensorboard_logs = {'val_loss': avg_loss, 'acc_metric': avg_metric, 102 | 'average_precision_score': avg_metric} 103 | return {'avg_val_loss': avg_loss, 'avg_val_metric': avg_metric, 'log': tensorboard_logs, 104 | "progress_bar": tensorboard_logs} 105 | 106 | def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, second_order_closure): 107 | if self.trainer.global_step < self.hparams['training']['warmup_steps']: 108 | lr_scale = min(1., float(self.trainer.global_step + 1) / self.hparams['training']['warmup_steps']) 109 | for pg in optimizer.param_groups: 110 | pg['lr'] = lr_scale * self.hparams['training']['optimizer']['kwargs']['lr'] 111 | optimizer.step() 112 | optimizer.zero_grad() 113 | 114 | def prepare_data(self): 115 | pass 116 | 117 | def summarize(self, mode: str) -> None: 118 | if self.hparams['model_params']['show_model_summary']: 119 | model_summary = ModelSummary(self, mode=mode) 120 | log.info('\n' + model_summary.__str__()) 121 | 122 | def configure_optimizers(self): 123 | if self.hparams['training']['optimizer']['name'] == 'Adam': 124 | optimizer = torch.optim.Adam(self.parameters(), **self.hparams['training']['optimizer']['kwargs']) 125 | else: 126 | NotImplementedError( 127 | "This optimizer {} isn't implemented".format(self.hparams['training']['optimizer']['name'])) 128 | 129 | scheduler = { 130 | 'scheduler': ReduceLROnPlateau(optimizer, **self.hparams['training']['scheduler']['ReduceLROnPlateau']), 131 | **self.hparams['training']['scheduler']['kwargs'] 132 | } 133 | return [optimizer], [scheduler] 134 | 135 | def test_step(self, batch, batch_idx): 136 | x, y = batch 137 | with torch.no_grad(): 138 | y_preds = self.forward(x) 139 | y_preds = torch.flatten(torch.sigmoid(y_preds)) 140 | 141 | return {'y_preds': (y_preds).cpu(), 142 | 'y_labels': torch.flatten((y_preds > 0.5).int().cpu()) 143 | } 144 | 145 | def test_epoch_end(self, outputs): 146 | all_pred_label = torch.cat([x['y_labels'] for x in outputs]) 147 | all_preds = torch.cat([x['y_preds'] for x in outputs]) 148 | return {'all_pred_label': all_pred_label, 149 | 'all_preds': all_preds} 150 | 151 | def mixup_criterion(self, pred, y_a, y_b, lam): 152 | return lam * self.get_loss(pred, y_a) + (1 - lam) * self.get_loss(pred, y_b) 153 | 154 | 155 | def mixup_data(x, y, alpha=1.0, use_cuda=True): 156 | '''Returns mixed inputs, pairs of targets, and lambda''' 157 | if alpha > 0: 158 | lam = np.random.beta(alpha, alpha) 159 | else: 160 | lam = 1 161 | 162 | batch_size = x.size()[0] 163 | if use_cuda: 164 | index = torch.randperm(batch_size).cuda() 165 | else: 166 | index = torch.randperm(batch_size) 167 | 168 | mixed_x = lam * x + (1 - lam) * x[index, :] 169 | y_a, y_b = y, y[index] 170 | return mixed_x, y_a, y_b, lam 171 | -------------------------------------------------------------------------------- /models/pretrained_models.py: -------------------------------------------------------------------------------- 1 | import pretrainedmodels 2 | import torch.nn as nn 3 | from pytorchcv.model_provider import get_model 4 | from torchvision import models 5 | 6 | 7 | def get_model_output(model_name='resnet18', num_outputs=None, pretrained=True, 8 | freeze_bn=False, dropout_p=0, **kwargs): 9 | if 'efficientnet' in model_name: 10 | model = get_model(model_name, pretrained=True) 11 | if dropout_p == 0: 12 | model.output = nn.Linear(model.output.fc.in_features, num_outputs) 13 | else: 14 | model.output = nn.Sequential( 15 | nn.Dropout(p=dropout_p), 16 | nn.Linear(model.output.fc.in_features, num_outputs), 17 | ) 18 | elif 'densenet' in model_name: 19 | model = models.__dict__[model_name](num_classes=1000, 20 | pretrained=pretrained) 21 | in_features = model.classifier.in_features 22 | model.classifier = nn.Linear(in_features, num_outputs) 23 | 24 | else: 25 | pretrained = 'imagenet' if pretrained else None 26 | model = pretrainedmodels.__dict__[model_name](num_classes=1000, 27 | pretrained=pretrained) 28 | 29 | if 'dpn' in model_name: 30 | in_channels = model.last_linear.in_channels 31 | model.last_linear = nn.Conv2d(in_channels, num_outputs, 32 | kernel_size=1, bias=True) 33 | else: 34 | if 'resnet' in model_name: 35 | model.avgpool = nn.AdaptiveAvgPool2d(1) 36 | else: 37 | model.avg_pool = nn.AdaptiveAvgPool2d(1) 38 | in_features = model.last_linear.in_features 39 | if dropout_p == 0: 40 | model.last_linear = nn.Linear(in_features, num_outputs) 41 | else: 42 | model.last_linear = nn.Sequential( 43 | nn.Dropout(p=dropout_p), 44 | nn.Linear(in_features, num_outputs), 45 | ) 46 | 47 | if freeze_bn: 48 | for m in model.modules(): 49 | if isinstance(m, nn.BatchNorm2d): 50 | m.weight.requires_grad = False 51 | m.bias.requires_grad = False 52 | 53 | return model 54 | -------------------------------------------------------------------------------- /models/seresnext.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch import load as load_model 3 | 4 | # https://github.com/Cadene/pretrained-models.pytorch/blob/master/pretrainedmodels/models/senet.py 5 | 6 | import math 7 | from collections import OrderedDict 8 | from torch.utils import model_zoo 9 | 10 | class SEModule(nn.Module): 11 | 12 | def __init__(self, channels, reduction): 13 | super(SEModule, self).__init__() 14 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 15 | self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1, 16 | padding=0) 17 | self.relu = nn.ReLU(inplace=True) 18 | self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1, 19 | padding=0) 20 | self.sigmoid = nn.Sigmoid() 21 | 22 | def forward(self, x): 23 | module_input = x 24 | x = self.avg_pool(x) 25 | x = self.fc1(x) 26 | x = self.relu(x) 27 | x = self.fc2(x) 28 | x = self.sigmoid(x) 29 | return module_input * x 30 | 31 | 32 | class Bottleneck(nn.Module): 33 | """ 34 | Base class for bottlenecks that implements `forward()` method. 35 | """ 36 | 37 | def forward(self, x): 38 | residual = x 39 | 40 | out = self.conv1(x) 41 | out = self.bn1(out) 42 | out = self.relu(out) 43 | 44 | out = self.conv2(out) 45 | out = self.bn2(out) 46 | out = self.relu(out) 47 | 48 | out = self.conv3(out) 49 | out = self.bn3(out) 50 | 51 | if self.downsample is not None: 52 | residual = self.downsample(x) 53 | 54 | out = self.se_module(out) + residual 55 | out = self.relu(out) 56 | 57 | return out 58 | 59 | 60 | class SEBottleneck(Bottleneck): 61 | """ 62 | Bottleneck for SENet154. 63 | """ 64 | expansion = 4 65 | 66 | def __init__(self, inplanes, planes, groups, reduction, stride=1, 67 | downsample=None): 68 | super(SEBottleneck, self).__init__() 69 | self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False) 70 | self.bn1 = nn.BatchNorm2d(planes * 2) 71 | self.conv2 = nn.Conv2d(planes * 2, planes * 4, kernel_size=3, 72 | stride=stride, padding=1, groups=groups, 73 | bias=False) 74 | self.bn2 = nn.BatchNorm2d(planes * 4) 75 | self.conv3 = nn.Conv2d(planes * 4, planes * 4, kernel_size=1, 76 | bias=False) 77 | self.bn3 = nn.BatchNorm2d(planes * 4) 78 | self.relu = nn.ReLU(inplace=True) 79 | self.se_module = SEModule(planes * 4, reduction=reduction) 80 | self.downsample = downsample 81 | self.stride = stride 82 | 83 | 84 | class SEResNetBottleneck(Bottleneck): 85 | """ 86 | ResNet bottleneck with a Squeeze-and-Excitation module. It follows Caffe 87 | implementation and uses `stride=stride` in `conv1` and not in `conv2` 88 | (the latter is used in the torchvision implementation of ResNet). 89 | """ 90 | expansion = 4 91 | 92 | def __init__(self, inplanes, planes, groups, reduction, stride=1, 93 | downsample=None): 94 | super(SEResNetBottleneck, self).__init__() 95 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False, 96 | stride=stride) 97 | self.bn1 = nn.BatchNorm2d(planes) 98 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1, 99 | groups=groups, bias=False) 100 | self.bn2 = nn.BatchNorm2d(planes) 101 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 102 | self.bn3 = nn.BatchNorm2d(planes * 4) 103 | self.relu = nn.ReLU(inplace=True) 104 | self.se_module = SEModule(planes * 4, reduction=reduction) 105 | self.downsample = downsample 106 | self.stride = stride 107 | 108 | 109 | class SEResNeXtBottleneck(Bottleneck): 110 | """ 111 | ResNeXt bottleneck type C with a Squeeze-and-Excitation module. 112 | """ 113 | expansion = 4 114 | 115 | def __init__(self, inplanes, planes, groups, reduction, stride=1, 116 | downsample=None, base_width=4): 117 | super(SEResNeXtBottleneck, self).__init__() 118 | width = math.floor(planes * (base_width / 64)) * groups 119 | self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False, 120 | stride=1) 121 | self.bn1 = nn.BatchNorm2d(width) 122 | self.conv2 = nn.Conv2d(width, width, kernel_size=3, stride=stride, 123 | padding=1, groups=groups, bias=False) 124 | self.bn2 = nn.BatchNorm2d(width) 125 | self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False) 126 | self.bn3 = nn.BatchNorm2d(planes * 4) 127 | self.relu = nn.ReLU(inplace=True) 128 | self.se_module = SEModule(planes * 4, reduction=reduction) 129 | self.downsample = downsample 130 | self.stride = stride 131 | 132 | 133 | class SENet(nn.Module): 134 | 135 | def __init__(self, block, layers, groups, reduction, dropout_p=0.2, 136 | inplanes=128, input_3x3=True, downsample_kernel_size=3, 137 | downsample_padding=1, num_classes=1000): 138 | super(SENet, self).__init__() 139 | self.inplanes = inplanes 140 | if input_3x3: 141 | layer0_modules = [ 142 | ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1, 143 | bias=False)), 144 | ('bn1', nn.BatchNorm2d(64)), 145 | ('relu1', nn.ReLU(inplace=True)), 146 | ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1, 147 | bias=False)), 148 | ('bn2', nn.BatchNorm2d(64)), 149 | ('relu2', nn.ReLU(inplace=True)), 150 | ('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1, 151 | bias=False)), 152 | ('bn3', nn.BatchNorm2d(inplanes)), 153 | ('relu3', nn.ReLU(inplace=True)), 154 | ] 155 | else: 156 | layer0_modules = [ 157 | ('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2, 158 | padding=3, bias=False)), 159 | ('bn1', nn.BatchNorm2d(inplanes)), 160 | ('relu1', nn.ReLU(inplace=True)), 161 | ] 162 | # To preserve compatibility with Caffe weights `ceil_mode=True` 163 | # is used instead of `padding=1`. 164 | layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2, 165 | ceil_mode=True))) 166 | self.layer0 = nn.Sequential(OrderedDict(layer0_modules)) 167 | self.layer1 = self._make_layer( 168 | block, 169 | planes=64, 170 | blocks=layers[0], 171 | groups=groups, 172 | reduction=reduction, 173 | downsample_kernel_size=1, 174 | downsample_padding=0 175 | ) 176 | self.layer2 = self._make_layer( 177 | block, 178 | planes=128, 179 | blocks=layers[1], 180 | stride=2, 181 | groups=groups, 182 | reduction=reduction, 183 | downsample_kernel_size=downsample_kernel_size, 184 | downsample_padding=downsample_padding 185 | ) 186 | self.layer3 = self._make_layer( 187 | block, 188 | planes=256, 189 | blocks=layers[2], 190 | stride=2, 191 | groups=groups, 192 | reduction=reduction, 193 | downsample_kernel_size=downsample_kernel_size, 194 | downsample_padding=downsample_padding 195 | ) 196 | self.layer4 = self._make_layer( 197 | block, 198 | planes=512, 199 | blocks=layers[3], 200 | stride=2, 201 | groups=groups, 202 | reduction=reduction, 203 | downsample_kernel_size=downsample_kernel_size, 204 | downsample_padding=downsample_padding 205 | ) 206 | self.avg_pool = nn.AvgPool2d(7, stride=1) 207 | self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None 208 | self.last_linear = nn.Linear(512 * block.expansion, num_classes) 209 | 210 | def _make_layer(self, block, planes, blocks, groups, reduction, stride=1, 211 | downsample_kernel_size=1, downsample_padding=0): 212 | downsample = None 213 | if stride != 1 or self.inplanes != planes * block.expansion: 214 | downsample = nn.Sequential( 215 | nn.Conv2d(self.inplanes, planes * block.expansion, 216 | kernel_size=downsample_kernel_size, stride=stride, 217 | padding=downsample_padding, bias=False), 218 | nn.BatchNorm2d(planes * block.expansion), 219 | ) 220 | 221 | layers = [] 222 | layers.append(block(self.inplanes, planes, groups, reduction, stride, 223 | downsample)) 224 | self.inplanes = planes * block.expansion 225 | for i in range(1, blocks): 226 | layers.append(block(self.inplanes, planes, groups, reduction)) 227 | 228 | return nn.Sequential(*layers) 229 | 230 | def features(self, x): 231 | x = self.layer0(x) 232 | x = self.layer1(x) 233 | x = self.layer2(x) 234 | x = self.layer3(x) 235 | x = self.layer4(x) 236 | return x 237 | 238 | def logits(self, x): 239 | x = self.avg_pool(x) 240 | if self.dropout is not None: 241 | x = self.dropout(x) 242 | x = x.view(x.size(0), -1) 243 | x = self.last_linear(x) 244 | return x 245 | 246 | def forward(self, x): 247 | x = self.features(x) 248 | x = self.logits(x) 249 | return x 250 | 251 | 252 | def initialize_pretrained_model(model, num_classes, settings): 253 | assert num_classes == settings['num_classes'], \ 254 | 'num_classes should be {}, but is {}'.format( 255 | settings['num_classes'], num_classes) 256 | model.load_state_dict(model_zoo.load_url(settings['url'])) 257 | model.input_space = settings['input_space'] 258 | model.input_size = settings['input_size'] 259 | model.input_range = settings['input_range'] 260 | model.mean = settings['mean'] 261 | model.std = settings['std'] 262 | 263 | 264 | def se_resnext50_32x4d(num_classes=1000): 265 | model = SENet(SEResNeXtBottleneck, [3, 4, 6, 3], groups=32, reduction=16, 266 | dropout_p=None, inplanes=64, input_3x3=False, 267 | downsample_kernel_size=1, downsample_padding=0, 268 | num_classes=num_classes) 269 | return model 270 | 271 | 272 | def se_resnext101_32x4d(num_classes=1000): 273 | model = SENet(SEResNeXtBottleneck, [3, 4, 23, 3], groups=32, reduction=16, 274 | dropout_p=None, inplanes=64, input_3x3=False, 275 | downsample_kernel_size=1, downsample_padding=0, 276 | num_classes=num_classes) 277 | return model 278 | 279 | 280 | class CustomSEResNeXt(nn.Module): 281 | 282 | def __init__(self, configs): 283 | assert configs['model']['name'] in ('se_resnext50_32x4d', 'se_resnext101_32x4d') 284 | super().__init__() 285 | 286 | self.model = se_resnext50_32x4d() 287 | weights_path = configs['model']['path'] 288 | self.model.load_state_dict(load_model(weights_path)) 289 | self.model.avg_pool = nn.AdaptiveAvgPool2d(1) 290 | self.model.last_linear = nn.Linear(self.model.last_linear.in_features, configs['target_size']) 291 | 292 | def forward(self, x): 293 | x = self.model(x) 294 | return x -------------------------------------------------------------------------------- /readme_images/cv_test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Diyago/Graph-clasification-by-computer-vision/703c44b98f9875d7a7b6db1c2b96372e11e256d6/readme_images/cv_test.png -------------------------------------------------------------------------------- /readme_images/sample_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Diyago/Graph-clasification-by-computer-vision/703c44b98f9875d7a7b6db1c2b96372e11e256d6/readme_images/sample_graph.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | networkx==2.4 2 | pretrainedmodels==0.7.4 3 | torch==1.6.0 4 | albumentations==0.3.3 5 | scikit_image==0.16.2 6 | pytorch_lightning==0.7.7.dev0 7 | torchvision==0.7.0 8 | pytorchcv==0.0.58 9 | skorch==0.9.0 10 | torch_geometric==1.6.1 11 | numpy==1.18.1 12 | matplotlib==3.1.3 13 | pandas==1.0.2 14 | poyo==0.5.0 15 | tqdm==4.43.0 16 | rdkit==2009.Q1-1 17 | scikit_learn==0.23.2 18 | skimage==0.0 19 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import codecs 2 | import os 3 | import warnings 4 | 5 | import numpy as np 6 | import pytorch_lightning as pl 7 | from poyo import parse_string 8 | from pytorch_lightning.callbacks.early_stopping import EarlyStopping 9 | from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint 10 | from pytorch_lightning.loggers import TensorBoardLogger 11 | from torch.utils.data import DataLoader 12 | from pytorch_lightning.callbacks import LearningRateLogger 13 | from common_blocks.datasets import TrainDataset 14 | from common_blocks.logger import init_logger 15 | from common_blocks.transforms import get_transforms 16 | from common_blocks.utils import seed_torch, create_folds 17 | from models.lightningclassifier import LightningClassifier 18 | 19 | with codecs.open("config/config_classification.yml", encoding="utf-8") as ymlfile: 20 | config_yaml = ymlfile.read() 21 | config = parse_string(config_yaml) 22 | 23 | LOGGER = init_logger(config['logger_path']['main_logger']) 24 | warnings.filterwarnings("ignore", category=RuntimeWarning) 25 | 26 | if __name__ == '__main__': 27 | seed_torch(seed=config['total_seed']) 28 | folds = create_folds(config['validation']) 29 | fold_best_metrics = [] 30 | for fold in range(config['validation']['nfolds']): 31 | trn_idx = folds[folds['fold'] != fold].index 32 | val_idx = folds[folds['fold'] == fold].index 33 | 34 | #todo move dataset, loader to func 35 | train_dataset = TrainDataset(folds.loc[trn_idx].reset_index(drop=True), 36 | config['Train']['Dataset'], 37 | transform=get_transforms(data='train', 38 | width=config["Train"]["Dataset"]["target_width"], 39 | height=config["Train"]["Dataset"]["target_height"])) 40 | valid_dataset = TrainDataset(folds.loc[val_idx].reset_index(drop=True), 41 | config['Val']['Dataset'], 42 | transform=get_transforms(data='valid', 43 | width=config["Val"]["Dataset"]["target_width"], 44 | height=config["Val"]["Dataset"]["target_height"])) 45 | 46 | train_loader = DataLoader(train_dataset, **config['Train']['loader']) 47 | valid_loader = DataLoader(valid_dataset, **config['Val']['loader']) 48 | #import ipdb; ipdb.set_trace() 49 | tb_logger = TensorBoardLogger(save_dir=config['logger_path']['lightning_logger'], 50 | name=config['model_params']['model_name'], version=f'fold_{fold + 1}') 51 | os.makedirs('{}/{}'.format(config['logger_path']['lightning_logger'], config['model_params']['model_name']), 52 | exist_ok=True) 53 | 54 | checkpoint_callback = ModelCheckpoint( 55 | filepath=tb_logger.log_dir + config['training']['ModelCheckpoint']['path'], 56 | **config['training']['ModelCheckpoint']['kwargs']) 57 | early_stop_callback = EarlyStopping(**config['training']['early_stop_callback']) 58 | 59 | model = LightningClassifier(config) 60 | lr_logger = pl.callbacks.LearningRateLogger() 61 | trainer = pl.Trainer(logger=tb_logger, 62 | callbacks=[lr_logger], 63 | early_stop_callback=early_stop_callback, 64 | checkpoint_callback=checkpoint_callback, 65 | **config['training']['Trainer']) 66 | trainer.fit(model, train_dataloader=train_loader, val_dataloaders=valid_loader) 67 | fold_best_metrics.append(np.max(model.val_metrics)) 68 | 69 | print('MEAN METRIC:', round(np.mean(fold_best_metrics), 3), 'std', round(np.std(fold_best_metrics), 3)) 70 | print('ALL METRICS:', [round(x, 3) for x in fold_best_metrics]) 71 | --------------------------------------------------------------------------------