├── .gitignore ├── LICENSE ├── README.md ├── asteroid_detection ├── README.md ├── dashboards │ └── asteroid.json ├── data │ ├── database.py │ └── nasa.csv ├── demo │ ├── inference.ipynb │ └── inference.py ├── get_data.py ├── global_config.py ├── images │ ├── alert.png │ ├── dashboard.png │ ├── diagram.png │ └── pipeline.png ├── model_training.py ├── pipeline.py ├── preprocess_data.py ├── preprocess_serving.py └── requirements.txt ├── how_much_data_do_you_really_need ├── README.md ├── category_prevalence.py ├── create_subsets.py ├── predict_diminishing_returns.ipynb └── utils.py ├── once_upon_a_repository ├── README.md ├── engines.py ├── inference_with_model.py ├── requirements.txt ├── torchvision_references │ ├── coco_eval.py │ ├── coco_utils.py │ └── utils.py ├── train_model.py ├── transforms.py └── utilities.py ├── setting_up_allegroai_platform ├── pytorch.mnist_trains.py └── requirements.txt ├── the_hero_rises ├── README.md ├── SSD │ ├── __init__.py │ ├── box_coder.py │ ├── multibox_loss.py │ └── ssd_model.py ├── engines.py ├── inference_with_model.py ├── requirements.txt ├── torchvision_references │ ├── __init__.py │ ├── coco_eval.py │ ├── coco_utils.py │ └── utils.py ├── train_model.py ├── transforms.py └── utilities.py └── urbansounds8k ├── .gitignore ├── README.md ├── assets └── diagram.png ├── get_data.py ├── preprocessing.py ├── requirements.txt └── training.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | 10 | dataset 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # pycharm 135 | .idea/ 136 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # trains-blogs 2 | 3 | This repository contains the codebase mentioned and used in trains' blogs. 4 | 5 | The list of blogs includes: 6 | 7 | * [How Much Data Do You Really Need?: Quantifying Diminishing Returns of Annotated Data](how_much_data_do_you_really_need/) 8 | * The Hero's Journey to Deep Learning CodeBase 9 | * [Blog I: Once Upon a Repository: How to Write Readable, Maintainable Code with PyTorch](once_upon_a_repository/) 10 | * Blog IIA: The Battle between Speed & Accuracy: Single-Shot vs Two-Shot Detection 11 | * [Blog IIB: The Hero Rises: Build Your Own SSD](the_hero_rises/) 12 | * Blog III: Flying with Anchors: Optimize SSD to Your Data 13 | * Blog IV: Happily Ever Deployed: ... 14 | 15 | 16 | -------------------------------------------------------------------------------- /asteroid_detection/README.md: -------------------------------------------------------------------------------- 1 | # Asteroid Hazard Detection Example 2 | 3 | ## Repository overview 4 | ![repo diagram](images/diagram.png) 5 | This is the big picture overview of the whole setup and the different files that are part of it. 6 | 7 | `global_config.py` holds the project name so it can be easily changed project-wide. 8 | 9 | ### Setup ClearML (will only take like 2 minutes) 10 | https://clear.ml/docs/latest/docs/getting_started/ds/ds_first_steps 11 | 12 | You should now have some application credentials you can use to connect to the experiment manager and the model serving engine. Everything is open source, so you can also setup your own server! 13 | 14 | ### Experimentation Phase 15 | `get_data.py` will take the csv file in `data/nasa.csv`, query on it as if it were a database (for demo purposes) and upload the resulting file to a ClearML versioned dataset. 16 | 17 | `preprocessing.py` will take that new dataset, preprocess the data inside into X and y dataframes and add these data as csv files to a new version of the dataset. 18 | 19 | `model_training.py` will ingest that preprocessed version of the dataset, get the X and y data and train an XGBoost model on it. 20 | 21 | All three of these files will be tracked using the ClearML experiment manager as well as the 2 datasets which are tracked with ClearML-data 22 | 23 | ### Productionizing Phase 24 | `pipeline.py` is a ClearML pipelinecontroller that will take the above 3 tracked scripts and chain them together in a pipleline. 25 | ![pipeline UI example](images/pipeline.png) 26 | 27 | NOTE: running the pipeline requires at least 1 active ClearML agent running on the same or a remote machine, so it can execute enqueued tasks. 28 | 29 | ### Deployment Phase 30 | ClearML serving works primarily through a CLI interface, so there is no code that sets it up. 31 | 32 | `preprocess_serving.py` however, is used by the CLI to tell ClearML serving what pre- and postprocessing should be done when data is sent to and from the model serving engine. 33 | 34 | To learn how to setup serving in detail, check out the [clearml-serving repository](https://github.com/allegroai/clearml-serving). But for this repo, these steps should get you started: 35 | 36 | 37 | 38 | NOTE: Make sure clearml-serving is installed in the virtualenvironment you're using. 39 | 40 | 1. Create a serving task, note down the ID, we'll need it a lot 41 | ``` 42 | clearml-serving create --name "asteroid serving" 43 | ``` 44 | 45 | 2. Set up the serving stack, in this case using docker-compose (could also be k8s). 46 | 47 | Clone the clearml-serving repository 48 | ``` 49 | git clone https://github.com/allegroai/clearml-serving.git 50 | ``` 51 | 52 | Edit the environment variables in `docker/example.env` to include your ClearML credentials and the serving task ID we copied from step 1 53 | 54 | 3. If you need any extra python packages installed, set them as comma separated list in the environment variable `CLEARML_EXTRA_PYTHON_PACKAGES` or overwrite the variable in the `docker-compose.yml` file 55 | 56 | 4. Fire it up! 57 | ``` 58 | cd docker && docker-compose --env-file example.env -f docker-compose.yml up 59 | ``` 60 | 61 | 5. To deploy the model, go back to this repository and run 62 | ``` 63 | clearml-serving --id __YOUR_SERVING_ID__ model add --engine xgboost --endpoint "asteroid" --preprocess "preprocess_serving.py" --name "model training - best_model" --project "Asteroid Hazard Classification" 64 | ``` 65 | Or change your model name or project depending on if you're using your own or just following along here. 66 | 67 | 6. Before you can start to monitor the deployed model we need to tell ClearML which metrics it should log. For that also use the CLI: 68 | ``` 69 | clearml-serving --id __YOUR_SERVING_ID__ metrics add --endpoint "asteroid" --variable-scalar "Absolute Magnitude=11.139,18.14,25.12,32.1" 70 | ``` 71 | ``` 72 | clearml-serving --id __YOUR_SERVING_ID__ metrics add --endpoint "asteroid" --variable-scalar "Minimum Orbit Intersection=-0.000476,0.159,0.319,0.478" 73 | ``` 74 | 7. Go to `http://localhost:3000/` and login to grafana with the default admin:admin combo, then immediately change it. 75 | Now you can go to dashboards and import the dashboard from this repository called `asteroid.json` 76 | Feel free to add alerts and change settings as much as you like :) 77 | 78 | 8. The grafana alert used in the demo (very simple and NOT very good) can be made like this: 79 | ``` 80 | ((100 * increase(asteroid:Minimum_Orbit_Intersection_bucket{}[1m]) / increase(asteroid:Minimum_Orbit_Intersection_sum{}[1m])) - (100 * increase(asteroid:Minimum_Orbit_Intersection_bucket{}[10m]) / increase(asteroid:Minimum_Orbit_Intersection_sum{}[10m]))) ^ 2 81 | ``` 82 | You can changethe feature name as well if you want to 83 | ![Grafana screenshot](images/dashboard.png) 84 | ![Alert screenshot](images/alert.png) 85 | 86 | ### Testing everything 87 | You can use both the `inference.py` script or the `inference.ipynb` to send some mock data. The notebooks should be ran at least once to create the mock data. 88 | 89 | -------------------------------------------------------------------------------- /asteroid_detection/dashboards/asteroid.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": "-- Grafana --", 7 | "enable": true, 8 | "hide": true, 9 | "iconColor": "rgba(0, 211, 255, 1)", 10 | "name": "Annotations & Alerts", 11 | "target": { 12 | "limit": 100, 13 | "matchAny": false, 14 | "tags": [], 15 | "type": "dashboard" 16 | }, 17 | "type": "dashboard" 18 | } 19 | ] 20 | }, 21 | "editable": true, 22 | "fiscalYearStartMonth": 0, 23 | "graphTooltip": 0, 24 | "id": 1, 25 | "links": [], 26 | "liveNow": false, 27 | "panels": [ 28 | { 29 | "cards": {}, 30 | "color": { 31 | "cardColor": "#b4ff00", 32 | "colorScale": "sqrt", 33 | "colorScheme": "interpolateSpectral", 34 | "exponent": 0.5, 35 | "mode": "spectrum" 36 | }, 37 | "dataFormat": "tsbuckets", 38 | "datasource": { 39 | "type": "prometheus", 40 | "uid": "PBFA97CFB590B2093" 41 | }, 42 | "description": "", 43 | "gridPos": { 44 | "h": 12, 45 | "w": 12, 46 | "x": 0, 47 | "y": 0 48 | }, 49 | "heatmap": {}, 50 | "hideZeroBuckets": false, 51 | "highlightCards": true, 52 | "id": 2, 53 | "legend": { 54 | "show": false 55 | }, 56 | "reverseYBuckets": false, 57 | "targets": [ 58 | { 59 | "datasource": { 60 | "type": "prometheus", 61 | "uid": "PBFA97CFB590B2093" 62 | }, 63 | "exemplar": true, 64 | "expr": "100 * increase(asteroid:Minimum_Orbit_Intersection_bucket{}[1m]) / increase(asteroid:Minimum_Orbit_Intersection_sum{}[1m])", 65 | "interval": "", 66 | "legendFormat": "{{le}}", 67 | "refId": "A" 68 | } 69 | ], 70 | "title": "Minimum Orbit Intersection Distribution", 71 | "tooltip": { 72 | "show": true, 73 | "showHistogram": false 74 | }, 75 | "type": "heatmap", 76 | "xAxis": { 77 | "show": true 78 | }, 79 | "yAxis": { 80 | "format": "short", 81 | "logBase": 1, 82 | "show": true 83 | }, 84 | "yBucketBound": "auto" 85 | }, 86 | { 87 | "cards": {}, 88 | "color": { 89 | "cardColor": "#b4ff00", 90 | "colorScale": "sqrt", 91 | "colorScheme": "interpolateSpectral", 92 | "exponent": 0.5, 93 | "mode": "spectrum" 94 | }, 95 | "dataFormat": "tsbuckets", 96 | "datasource": { 97 | "type": "prometheus", 98 | "uid": "PBFA97CFB590B2093" 99 | }, 100 | "gridPos": { 101 | "h": 12, 102 | "w": 12, 103 | "x": 12, 104 | "y": 0 105 | }, 106 | "heatmap": {}, 107 | "hideZeroBuckets": false, 108 | "highlightCards": true, 109 | "id": 4, 110 | "legend": { 111 | "show": false 112 | }, 113 | "reverseYBuckets": false, 114 | "targets": [ 115 | { 116 | "datasource": { 117 | "type": "prometheus", 118 | "uid": "PBFA97CFB590B2093" 119 | }, 120 | "exemplar": true, 121 | "expr": "100 * increase(asteroid:Absolute_Magnitude_bucket{}[1m]) / increase(asteroid:Absolute_Magnitude_sum{}[1m])", 122 | "format": "time_series", 123 | "interval": "", 124 | "legendFormat": "{{le}}", 125 | "refId": "A" 126 | } 127 | ], 128 | "title": "Absolute Magnitude Distribution", 129 | "tooltip": { 130 | "show": true, 131 | "showHistogram": false 132 | }, 133 | "type": "heatmap", 134 | "xAxis": { 135 | "show": true 136 | }, 137 | "yAxis": { 138 | "format": "short", 139 | "logBase": 1, 140 | "show": true 141 | }, 142 | "yBucketBound": "auto" 143 | } 144 | ], 145 | "refresh": "5s", 146 | "schemaVersion": 35, 147 | "style": "dark", 148 | "tags": [], 149 | "templating": { 150 | "list": [] 151 | }, 152 | "time": { 153 | "from": "now-3h", 154 | "to": "now" 155 | }, 156 | "timepicker": {}, 157 | "timezone": "", 158 | "title": "Asteroid Dashboard", 159 | "uid": "axuPWb9nz", 160 | "version": 4, 161 | "weekStart": "" 162 | } -------------------------------------------------------------------------------- /asteroid_detection/data/database.py: -------------------------------------------------------------------------------- 1 | """This is a mock module and should be replaced with your actual database connector.""" 2 | from pathlib import Path 3 | import pandas as pd 4 | from pandasql import sqldf 5 | from datetime import datetime, timedelta 6 | 7 | 8 | def query_database_to_df(query='SELECT * FROM asteroids'): 9 | # Get the data as CSV 10 | data_path = Path('data/nasa.csv') 11 | out_path = Path('/tmp/nasa.csv') 12 | 13 | # Create a dataframe as mock for the database 14 | asteroids = pd.read_csv(data_path) 15 | 16 | # Add some mock dates 17 | asteroids['date'] = [datetime.now() - i*timedelta(days=1) for i in range(len(asteroids))] 18 | 19 | # Query the df base on the argument 20 | asteroids = sqldf(query, locals()) 21 | 22 | # Save resulting DF to disk so it can be added to a clearml dataset as a file 23 | asteroids.to_csv(out_path) 24 | 25 | return asteroids, out_path -------------------------------------------------------------------------------- /asteroid_detection/demo/inference.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import time 3 | import random 4 | import argparse 5 | import pandas as pd 6 | from pathlib import Path 7 | 8 | parser = argparse.ArgumentParser(description='Run inference data to serving.') 9 | parser.add_argument('--drift', action='store_true', help='Send drifted data instead of normal data.') 10 | args = parser.parse_args() 11 | 12 | data_path = Path('data/') 13 | 14 | synthetic_df = pd.read_csv(data_path / 'synthetic.csv') 15 | synthetic_df_drift = pd.read_csv(data_path / 'synthetic_drift.csv') 16 | 17 | if args.drift: 18 | print('Sending drifted data') 19 | df = synthetic_df_drift 20 | else: 21 | print("Sending normal data") 22 | df = synthetic_df 23 | 24 | while True: 25 | response = requests.post( 26 | url='http://127.0.0.1:8080/serve/asteroid', 27 | headers={'accept': 'application/json', 'Content-Type': 'application/json'}, 28 | json=df.loc[random.randint(0, len(df) - 1), :].to_dict() 29 | ) 30 | if response.status_code != 200: 31 | print(f"Bad request! {response.content}") 32 | time.sleep(random.randrange(0, 1)) -------------------------------------------------------------------------------- /asteroid_detection/get_data.py: -------------------------------------------------------------------------------- 1 | from clearml import Task, Dataset 2 | 3 | import global_config 4 | from data import database 5 | 6 | 7 | task = Task.init( 8 | project_name=global_config.PROJECT_NAME, 9 | task_name='get data', 10 | task_type='data_processing', 11 | reuse_last_task_id=False 12 | ) 13 | 14 | config = { 15 | 'query_date': '2022-01-01' 16 | } 17 | task.connect(config) 18 | 19 | 20 | # Get the data and a path to the file 21 | query = 'SELECT * FROM asteroids WHERE strftime("%Y-%m-%d", `date`) <= strftime("%Y-%m-%d", "{}")'.format(config['query_date']) 22 | df, data_path = database.query_database_to_df(query=query) 23 | print(f"Dataset downloaded to: {data_path}") 24 | print(df.head()) 25 | 26 | # Create a ClearML dataset 27 | dataset = Dataset.create( 28 | dataset_name='raw_asteroid_dataset', 29 | dataset_project=global_config.PROJECT_NAME 30 | ) 31 | # Add the local files we downloaded earlier 32 | dataset.add_files(data_path) 33 | # Let's add some cool graphs as statistics in the plots section! 34 | dataset.get_logger().report_table(title='Asteroid Data', series='head', table_plot=df.head()) 35 | # Finalize and upload the data and labels of the dataset 36 | dataset.finalize(auto_upload=True) 37 | 38 | print(f"Created dataset with ID: {dataset.id}") 39 | print(f"Data size: {len(df)}") 40 | -------------------------------------------------------------------------------- /asteroid_detection/global_config.py: -------------------------------------------------------------------------------- 1 | PROJECT_NAME = 'Project Team NASA' 2 | PIPELINE_NAME = 'NASA Pipeline' -------------------------------------------------------------------------------- /asteroid_detection/images/alert.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clearml/clearml-blogs/c0a10f52de341e7feedc1bd718ff0539f98fdced/asteroid_detection/images/alert.png -------------------------------------------------------------------------------- /asteroid_detection/images/dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clearml/clearml-blogs/c0a10f52de341e7feedc1bd718ff0539f98fdced/asteroid_detection/images/dashboard.png -------------------------------------------------------------------------------- /asteroid_detection/images/diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clearml/clearml-blogs/c0a10f52de341e7feedc1bd718ff0539f98fdced/asteroid_detection/images/diagram.png -------------------------------------------------------------------------------- /asteroid_detection/images/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clearml/clearml-blogs/c0a10f52de341e7feedc1bd718ff0539f98fdced/asteroid_detection/images/pipeline.png -------------------------------------------------------------------------------- /asteroid_detection/model_training.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import matplotlib.pyplot as plt 4 | import pandas as pd 5 | import xgboost as xgb 6 | from clearml import Dataset, Task 7 | from sklearn.metrics import accuracy_score, recall_score 8 | from sklearn.model_selection import train_test_split 9 | from xgboost import plot_importance 10 | 11 | # Connecting ClearML with the current process, 12 | # from here on everything is logged automatically 13 | import global_config 14 | 15 | task = Task.init( 16 | project_name=global_config.PROJECT_NAME, 17 | task_name='model training', 18 | output_uri=True 19 | ) 20 | 21 | # Set default docker 22 | task.set_base_docker(docker_image="python:3.7") 23 | 24 | # Training args 25 | training_args = { 26 | 'eval_metric': "rmse", 27 | 'objective': 'reg:squarederror', 28 | 'test_size': 0.2, 29 | 'random_state': 42, 30 | 'num_boost_round': 100 31 | } 32 | task.connect(training_args) 33 | 34 | # Load our Dataset 35 | local_path = Dataset.get( 36 | dataset_name='preprocessed_asteroid_dataset', 37 | dataset_project=global_config.PROJECT_NAME 38 | ).get_local_copy() 39 | local_path = Path(local_path) 40 | # local_path = Path('data/preprocessed_data') 41 | X = pd.read_csv(local_path / 'X.csv', index_col=0) 42 | y = pd.read_csv(local_path / 'y.csv', index_col=0) 43 | 44 | # Split data 45 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=training_args['test_size'], random_state=training_args['random_state']) 46 | dtrain = xgb.DMatrix(X_train, label=y_train) 47 | dtest = xgb.DMatrix(X_test, label=y_test) 48 | 49 | # Train 50 | bst = xgb.train( 51 | training_args, 52 | dtrain, 53 | num_boost_round=training_args['num_boost_round'], 54 | evals=[(dtrain, "train"), (dtest, "test")], 55 | verbose_eval=0 56 | ) 57 | 58 | bst.save_model("best_model") 59 | plot_importance(bst) 60 | plt.show() 61 | 62 | preds = bst.predict(dtest) 63 | predictions = [round(value) for value in preds] 64 | accuracy = accuracy_score(y_test['Hazardous'].to_list(), predictions) 65 | recall = recall_score(y_test['Hazardous'].to_list(), predictions) 66 | print(f"Model trained with accuracy: {accuracy} and recall: {recall}") 67 | # Save the actual accuracy as an artifact so we can get it as part of the pipeline 68 | task.get_logger().report_scalar( 69 | title='Performance', 70 | series='Accuracy', 71 | value=accuracy, 72 | iteration=0 73 | ) 74 | task.get_logger().report_scalar( 75 | title='Performance', 76 | series='Recall', 77 | value=recall, 78 | iteration=0 79 | ) 80 | print("Done") 81 | -------------------------------------------------------------------------------- /asteroid_detection/pipeline.py: -------------------------------------------------------------------------------- 1 | from platform import node 2 | from clearml import Task 3 | from clearml.automation import PipelineController 4 | 5 | import global_config 6 | 7 | 8 | def pre_execute_callback_example(a_pipeline, a_node, current_param_override): 9 | # type (PipelineController, PipelineController.Node, dict) -> bool 10 | print('Cloning Task id={} with parameters: {}'.format(a_node.base_task_id, current_param_override)) 11 | # if we want to skip this node (and subtree of this node) we return False 12 | # return True to continue DAG execution 13 | return True 14 | 15 | 16 | def post_execute_callback_example(a_pipeline, a_node): 17 | # type (PipelineController, PipelineController.Node) -> None 18 | print('Completed Task id={}'.format(a_node.executed)) 19 | # if we need the actual executed Task: Task.get_task(task_id=a_node.executed) 20 | return 21 | 22 | 23 | def compare_metrics_and_publish_best(**kwargs): 24 | from clearml import OutputModel 25 | # Keep track of best node details 26 | current_best = dict() 27 | 28 | # For each incoming node, compare against current best 29 | for node_name, training_task_id in kwargs.items(): 30 | # Get the original task based on the ID we got from the pipeline 31 | task = Task.get_task(task_id=training_task_id) 32 | accuracy = task.get_reported_scalars()['Performance']['Accuracy']['y'][0] 33 | model_id = task.get_models()['output'][0].id 34 | # Check if accuracy is better than current best, if so, overwrite current best 35 | if accuracy > current_best.get('accuracy', 0): 36 | current_best['accuracy'] = accuracy 37 | current_best['node_name'] = node_name 38 | current_best['model_id'] = model_id 39 | print(f"New current best model: {node_name}") 40 | 41 | # Print the final best model details and log it as an output model on this step 42 | print(f"Final best model: {current_best}") 43 | OutputModel(name="best_pipeline_model", base_model_id=current_best.get('model_id'), tags=['pipeline_winner']) 44 | 45 | 46 | # Connecting ClearML with the current pipeline, 47 | # from here on everything is logged automatically 48 | pipe = PipelineController( 49 | name=global_config.PIPELINE_NAME, 50 | project=global_config.PROJECT_NAME, 51 | version='0.0.1' 52 | ) 53 | 54 | pipe.set_default_execution_queue('CPU Queue') 55 | pipe.add_parameter('training_seeds', [42, 420, 500]) 56 | pipe.add_parameter('query_date', '2022-01-01') 57 | 58 | pipe.add_step( 59 | name='get_data', 60 | base_task_project=global_config.PROJECT_NAME, 61 | base_task_name='get data', 62 | parameter_override={'General/query_date': '${pipeline.query_date}'} 63 | ) 64 | pipe.add_step( 65 | name='preprocess_data', 66 | parents=['get_data'], 67 | base_task_project=global_config.PROJECT_NAME, 68 | base_task_name='preprocess data', 69 | pre_execute_callback=pre_execute_callback_example, 70 | post_execute_callback=post_execute_callback_example 71 | ) 72 | training_nodes = [] 73 | # Seeds should be pipeline arguments 74 | # Don't change these when doing new run 75 | for i, random_state in enumerate(pipe.get_parameters()['training_seeds']): 76 | node_name = f'model_training_{i}' 77 | training_nodes.append(node_name) 78 | pipe.add_step( 79 | name=node_name, 80 | parents=['preprocess_data'], 81 | base_task_project=global_config.PROJECT_NAME, 82 | base_task_name='model training', 83 | parameter_override={'General/num_boost_round': 250, 84 | 'General/test_size': 0.5, 85 | 'General/random_state': random_state} 86 | ) 87 | 88 | pipe.add_function_step( 89 | name='select_best_model', 90 | parents=training_nodes, 91 | function=compare_metrics_and_publish_best, 92 | function_kwargs={node_name: '${%s.id}' % node_name for node_name in training_nodes}, 93 | monitor_models=["best_pipeline_model"] 94 | ) 95 | 96 | 97 | # for debugging purposes use local jobs 98 | # pipe.start_locally(run_pipeline_steps_locally=True) 99 | # Starting the pipeline (in the background) 100 | pipe.start() 101 | 102 | print('Done!') 103 | -------------------------------------------------------------------------------- /asteroid_detection/preprocess_data.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | from pathlib import Path 3 | 4 | import pandas as pd 5 | from clearml import Dataset, Task 6 | 7 | import global_config 8 | 9 | task = Task.init( 10 | project_name=global_config.PROJECT_NAME, 11 | task_name='preprocess data', 12 | task_type='data_processing', 13 | reuse_last_task_id=False 14 | ) 15 | 16 | # Create the folder we'll output the preprocessed data into 17 | preprocessed_data_folder = Path('/tmp') 18 | if not os.path.exists(preprocessed_data_folder): 19 | os.makedirs(preprocessed_data_folder) 20 | 21 | # Get the dataset 22 | dataset = Dataset.get( 23 | dataset_project=global_config.PROJECT_NAME, 24 | dataset_name='raw_asteroid_dataset', 25 | ) 26 | local_folder = dataset.get_local_copy() 27 | print(f"Using dataset ID: {dataset.id}") 28 | 29 | # Clean up the data a little bit 30 | df = pd.read_csv((Path(local_folder) / 'nasa.csv')) 31 | df['avg_dia'] = df[['Est Dia in KM(min)', 'Est Dia in KM(max)']].mean(axis=1) 32 | X = df[['Absolute Magnitude', 'avg_dia', 'Relative Velocity km per hr', 'Miss Dist.(kilometers)', 'Orbit Uncertainity', 33 | 'Minimum Orbit Intersection', 'Jupiter Tisserand Invariant', 'Epoch Osculation', 'Eccentricity', 'Semi Major Axis', 34 | 'Inclination', 'Asc Node Longitude', 'Orbital Period', 'Perihelion Distance', 'Perihelion Arg', 35 | 'Aphelion Dist', 'Perihelion Time', 'Mean Anomaly', 'Mean Motion']] 36 | X.to_csv(path_or_buf=preprocessed_data_folder / 'X.csv') 37 | print(f"Preprocessed data X") 38 | print(X.head()) 39 | 40 | y = pd.DataFrame(df['Hazardous'].astype(int)) 41 | y.to_csv(path_or_buf=preprocessed_data_folder / 'y.csv') 42 | print(f"Preprocessed data y") 43 | print(y.head()) 44 | 45 | # Create a new version of the dataset, which is cleaned up 46 | new_dataset = Dataset.create( 47 | dataset_project=dataset.project, 48 | dataset_name='preprocessed_asteroid_dataset', 49 | parent_datasets=[dataset] 50 | ) 51 | new_dataset.add_files(preprocessed_data_folder / 'X.csv') 52 | new_dataset.add_files(preprocessed_data_folder / 'y.csv') 53 | new_dataset.get_logger().report_table(title='X data', series='head', table_plot=X.head()) 54 | new_dataset.get_logger().report_table(title='y data', series='head', table_plot=y.head()) 55 | new_dataset.finalize(auto_upload=True) 56 | 57 | # Log to console which dataset ID was created 58 | print(f"Created preprocessed dataset with ID: {new_dataset.id}") 59 | -------------------------------------------------------------------------------- /asteroid_detection/preprocess_serving.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import pandas as pd 4 | import numpy as np 5 | import xgboost as xgb 6 | 7 | 8 | # Notice Preprocess class Must be named "Preprocess" 9 | class Preprocess(object): 10 | def __init__(self): 11 | # set internal state, this will be called only once. (i.e. not per request) 12 | pass 13 | 14 | def preprocess(self, body: dict, state: dict, collect_custom_statistics_fn=None) -> Any: 15 | df = pd.DataFrame(columns=body.keys()) 16 | df.loc[0] = body.values() 17 | df['avg_dia'] = df[['Est Dia in KM(min)', 'Est Dia in KM(max)']].mean(axis=1) 18 | X = df[['Absolute Magnitude', 'avg_dia', 'Relative Velocity km per hr', 'Miss Dist.(kilometers)', 'Orbit Uncertainity', 19 | 'Minimum Orbit Intersection', 'Jupiter Tisserand Invariant', 'Epoch Osculation', 'Eccentricity', 'Semi Major Axis', 20 | 'Inclination', 'Asc Node Longitude', 'Orbital Period', 'Perihelion Distance', 'Perihelion Arg', 21 | 'Aphelion Dist', 'Perihelion Time', 'Mean Anomaly', 'Mean Motion']] 22 | # we expect to get four valid numbers on the dict: x0, x1, x2, x3 23 | return xgb.DMatrix(X) 24 | 25 | def postprocess(self, data: Any, state: dict, collect_custom_statistics_fn=None) -> dict: 26 | # post process the data returned from the model inference engine 27 | # data is the return value from model.predict we will put is inside a return value as Y 28 | return dict(y=round(data[0]), y_raw=float(data[0])) 29 | -------------------------------------------------------------------------------- /asteroid_detection/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy~=1.24.1 2 | clearml~=1.9.1 3 | scikit-learn~=1.2.1 4 | xgboost~=1.7.3 5 | pandas~=1.5.3 6 | matplotlib~=3.6.3 7 | requests~=2.28.2 8 | pandasql~=0.7.3 -------------------------------------------------------------------------------- /how_much_data_do_you_really_need/README.md: -------------------------------------------------------------------------------- 1 | # How Much Data Do You Really Need? 2 | ## [Quantifying Diminishing Returns of Annotated Data](https://towardsdatascience.com/how-much-data-do-you-really-need-8c02a59394b) 3 | 4 | Deep learning models are notorious for their endless appetite for training data. The process of acquiring high quality annotated data consumes many types of resources — mostly cash. The growing amounts of data as the machine learning projects progress, lead to other undesired consequences, such as slowing down all of R&D. Therefore, veteran project leaders always look at the overall performance gains brought upon by additional increments of their dataset. More often than not, especially if the new data is relatively similar to the existing one, one will encounter the phenomena of Diminishing Returns. 5 | 6 | The law of diminishing returns states that when you continuously add more and more input in a productive process, it will actually yield progressively smaller increases in output. This phenomena was mentioned by 18th century economist such as Turgot and Adam Smith and articulated in 1815 by the British economist David Ricardo. When addressing the influence of training data volume on model performance, the law of diminishing returns suggests that each increment in train set size will tend to contribute less to the predetermined success metrics. 7 | 8 | When a project leader is able to monitor, and even quantify, the diminishing returns effect in their machine learning project, they are able to attain finer degrees of control throughout its lifetime. For example: estimating how much data is required to reach the project goal; avoiding redundant training sessions; or even predicting whether the current model architecture will be able to achieve the target metric. This knowledge effectively provides a tool for optimal management of time, manpower, and computing resources. -------------------------------------------------------------------------------- /how_much_data_do_you_really_need/category_prevalence.py: -------------------------------------------------------------------------------- 1 | """ 2 | PYTHON VERSION: python3.6 3 | 4 | Calculating and visualizing categories_prevalence in a dataset. 5 | The dataset metadeta should be given as a json file in either COCO of BDD format. 6 | 7 | Usage: 8 | 1. JSON_PATH=path/to/bdd100k_labels_images_train.json 9 | category_prevalence.py --dataset-format BDD --json-path JSON_PATH 10 | 2. JSON_PATH=path/to/coco/annotations/instances_train2017.json 11 | category_prevalence.py --dataset-format COCO --json-path JSON_PATH --wanted-categories person,bike,bird,dog 12 | 13 | Requirements: 14 | - trains 15 | - numpy 16 | - seaborn 17 | """ 18 | import argparse 19 | import json 20 | from pathlib import Path 21 | 22 | import numpy as np 23 | import matplotlib.pyplot as plt 24 | import seaborn as sns 25 | from utils import voco_categories, bdd_things_categories 26 | from trains import Task 27 | 28 | 29 | Task.init( 30 | project_name="Quantify diminishing Returns", 31 | task_name="Class Distribution without axis labels", 32 | ) 33 | 34 | 35 | def bdd_class_distribution(json_path: Path) -> dict: 36 | """ 37 | Create and save a dictionary, with the key being the image name and the value the metadata. 38 | :param json_path: path to the BDD labels json file. 39 | :return: A dictionary. Key: Category name. Value: number of appearances. 40 | """ 41 | category_dict = {} 42 | with open(json_path, "r") as f: 43 | entries_list = np.asarray(json.load(f)) 44 | for entry in entries_list: 45 | for label in entry["labels"]: 46 | category = label["category"] 47 | 48 | category_dict[category] = category_dict.get(category, 0) + 1 49 | return category_dict 50 | 51 | 52 | def coco_id_to_category_name(categories: dict) -> dict: 53 | """ 54 | Creates a dictionary that gives the category name given its COCO id. 55 | :param categories: the categories dictionary from COCO's JSON file. 56 | :return: Dictionary: id -> category_name. 57 | """ 58 | id_to_name_dict = {entry["id"]: entry["name"] for entry in categories} 59 | return id_to_name_dict 60 | 61 | 62 | def coco_class_distribution(json_path: Path) -> dict: 63 | """ 64 | Counts number of accurences for each class in a dataset metadeta coded 65 | in COCO-style JSON (COCO's instance JSON file). 66 | :param json_path: Path to the JSON file contains the metadata. 67 | :return: A dictionary. Key: Category name. Value: number of appearances. 68 | """ 69 | category_dict = {} 70 | with open(json_path, "r") as f: 71 | data_dict = json.load(f) 72 | annotation_list = data_dict["annotations"] 73 | categories = data_dict["categories"] 74 | id_to_category_dict = coco_id_to_category_name(categories) 75 | for entry in annotation_list: 76 | category = id_to_category_dict[entry["category_id"]] 77 | category_dict[category] = category_dict.get(category, 0) + 1 78 | return category_dict 79 | 80 | 81 | def plot_doughnut(category_dict: dict): 82 | """ 83 | Plots a doughnut chart of the categories prevalence. 84 | :param category_dict: category name -> number of occurrence. 85 | """ 86 | fig, ax = plt.subplots(figsize=(8, 4), subplot_kw=dict(aspect="equal")) 87 | fractions = list(category_dict.values()) 88 | wedges, texts = ax.pie(fractions, wedgeprops=dict(width=0.5), startangle=-40) 89 | legend_labels = [ 90 | f"{label}: {fraction}" for label, fraction in category_dict.items() 91 | ] 92 | ax.legend( 93 | wedges, 94 | legend_labels, 95 | title="Categories", 96 | loc="center left", 97 | bbox_to_anchor=(1, 0, 0.5, 1), 98 | ) 99 | ax.set_title("Class prevalence - BDD") 100 | plt.savefig(fname="class prevalence", dpi=200) 101 | plt.show() 102 | 103 | 104 | ########### 105 | ## Plots ## 106 | ########### 107 | 108 | 109 | def plot_hist(category_dict: dict): 110 | """ 111 | Plot labels histogram using seaborn. 112 | """ 113 | labels = list(category_dict) 114 | fractions = list(category_dict.values()) 115 | sns.barplot(x=labels, y=fractions) 116 | plt.show() 117 | 118 | 119 | def plot_bars_matplotlib(category_dict: dict): 120 | """ 121 | Plot labels histogram using matplotlib. 122 | """ 123 | labels = list(category_dict) 124 | fractions = list(category_dict.values()) 125 | index = np.arange(len(labels)) 126 | plt.bar(index, fractions) 127 | plt.xlabel("Class", fontsize=5) 128 | plt.ylabel("Number of Appearances", fontsize=5) 129 | plt.xticks(index, labels, fontsize=5, rotation=80) 130 | plt.title("Class Prevalence") 131 | plt.show() 132 | 133 | 134 | def plot_bars(category_dict: dict): 135 | """ 136 | Plot barplots which appears nicely on trains server as plotly object. 137 | :param category_dict: 138 | :return: 139 | """ 140 | labels = list(category_dict) 141 | fractions = list(category_dict.values()) 142 | plt.bar(labels, fractions) 143 | plt.xlabel("Class") 144 | plt.ylabel("Number of Appearances") 145 | plt.title("Class Prevalence") 146 | plt.show() 147 | 148 | 149 | def class_sieve(category_dict: dict, to_keep: set): 150 | """ 151 | In-place function that leave in 'category_dict' 152 | only the counting of the classes appearing in 'to_keep' 153 | :param category_dict: Key: Category name. Value: number of appearances. 154 | :param to_keep: set of labels to keep in 'category dict' 155 | """ 156 | return {key: value for key, value in category_dict.items() if key in to_keep} 157 | 158 | 159 | def parse_args(): 160 | parser = argparse.ArgumentParser(description=__doc__) 161 | parser.add_argument( 162 | "--dataset-format", 163 | choices=["COCO", "BDD"], 164 | help="the format of the dataset metadeta", 165 | ) 166 | parser.add_argument( 167 | "--json-path", 168 | type=Path, 169 | help="Path to the metadata, saved in json format. " 170 | "For example, in the BDD dataset, bdd100k_labels_images_train.json " 171 | "or bdd100k_labels_images_validation.json" 172 | "files are possible inputs.", 173 | ) 174 | parser.add_argument( 175 | "--wanted-categories", 176 | help="The categories on which you wish to calculate the statistics," 177 | "separated by a comma." 178 | "If None, all the categories of the dataset will be considered.", 179 | ) 180 | return parser.parse_args() 181 | 182 | 183 | def main(): 184 | args = parse_args() 185 | if args.dataset_format == "COCO": 186 | category_count = coco_class_distribution(args.json_path) 187 | if not args.wanted_categories: 188 | wanted_categoris = voco_categories 189 | elif args.dataset_format == "BDD": 190 | category_count = bdd_class_distribution(args.json_path) 191 | if not args.wanted_categories: 192 | wanted_categoris = bdd_things_categories 193 | if args.wanted_categories: 194 | wanted_categoris = args.wanted_categories.split(",") 195 | category_count = class_sieve(category_count, wanted_categoris) 196 | # plot_bars(category_count) 197 | plot_doughnut(category_count) 198 | 199 | 200 | if __name__ == "__main__": 201 | main() 202 | -------------------------------------------------------------------------------- /how_much_data_do_you_really_need/create_subsets.py: -------------------------------------------------------------------------------- 1 | """ 2 | PYTHON VERSION: python3.6 3 | 4 | Create subsets of a dataset's metadeta or data. Each subsets contains the smaller predecessor subsets. 5 | For example, all the data within a subset of 1% of the data, presented in the 10%-subset as well. 6 | All the in the data within the 10%-subset is contained within the 20%-subset and so on. 7 | 8 | Usage: 9 | category_prevalence.py --original-json-path path/to/bdd100k_labels_images_train.json --output-directory path/to/output/directory 10 | 11 | Requirements: 12 | - trains 13 | - numpy 14 | """ 15 | import json 16 | from argparse import ArgumentParser 17 | from typing import Sequence 18 | 19 | import numpy as np 20 | from pathlib import Path 21 | 22 | 23 | def get_datafile_and_number_of_entries(json_file, dataset_fomat: str): 24 | """ 25 | Given a read json file and a dataset format, this function 26 | return the metadata in the usable format and counts how many entries are there in the metadata. 27 | :param json_file: Metadata content in a python dictionary. 28 | :param dataset_fomat: format of the dataset metadata 29 | :return: tuple: (datafile, number of entries in this datafile) 30 | """ 31 | if dataset_fomat == "BDD": 32 | datafile = np.asarray(json_file) 33 | return datafile, len(datafile) 34 | elif dataset_fomat == "COCO": 35 | return json_file, len(json_file["images"]) 36 | 37 | 38 | def get_sub_dataset( 39 | image_array, 40 | entries_array: np.ndarray, 41 | fraction: float, 42 | number_of_entries: int, 43 | dataset_format: str, 44 | annotations_array=None, 45 | data_dict=None, 46 | ): 47 | if dataset_format == "BDD": 48 | return list(image_array[entries_array[: int(fraction * number_of_entries)]]) 49 | elif dataset_format == "COCO": 50 | image_entry_list = image_array[ 51 | entries_array[: int(fraction * number_of_entries)] 52 | ] 53 | annotation_entry_list = annotations_array[ 54 | entries_array[: int(fraction * number_of_entries)] 55 | ] 56 | sub_dataset_dict = { 57 | "info": data_dict["info"], 58 | "licenses": data_dict["licenses"], 59 | "images": list(image_entry_list), 60 | "annotations": list(annotation_entry_list), 61 | "categories": data_dict["categories"], 62 | } 63 | return sub_dataset_dict 64 | 65 | 66 | def create_subsets( 67 | input_json_path: Path, 68 | output_directory: Path, 69 | fraction_array: Sequence[float], 70 | dataset_format: str, 71 | ): 72 | """ 73 | Creates sub sets of BDD metadata. 74 | :param input_json_path: BDD labels JSON file. 75 | :param output_directory: Folder to save the BDD metadata sub-sets. 76 | :param fraction_array: Array contains the sizes of the sub datasets. 77 | The sizes are brought as fractions of the original dataset. 78 | """ 79 | with open(input_json_path, "r") as f: 80 | datafile, number_of_entries = get_datafile_and_number_of_entries( 81 | json.load(f), dataset_format 82 | ) 83 | entries_array = np.random.permutation(number_of_entries) 84 | data_dict = datafile if dataset_format == "COCO" else None 85 | image_array = ( 86 | np.asarray(data_dict["images"]) if dataset_format == "COCO" else datafile 87 | ) 88 | annotations_array = ( 89 | np.asarray(data_dict["annotations"]) if dataset_format == "COCO" else None 90 | ) 91 | for fraction in fraction_array: 92 | with open(output_directory / f"fraction_of_{fraction}", "w") as outfile: 93 | sub_dataset = get_sub_dataset( 94 | image_array=image_array, 95 | entries_array=entries_array, 96 | fraction=fraction, 97 | number_of_entries=number_of_entries, 98 | dataset_format=dataset_format, 99 | annotations_array=annotations_array, 100 | data_dict=data_dict, 101 | ) 102 | json.dump(obj=sub_dataset, fp=outfile) 103 | 104 | 105 | def parse_args(): 106 | parser = ArgumentParser(description=__doc__) 107 | parser.add_argument( 108 | "--dataset-format", 109 | choices=["COCO", "BDD"], 110 | help="the format of the dataset metadeta", 111 | ) 112 | parser.add_argument( 113 | "--original-json-path", 114 | help="Path to json file. This file should hold all metadata (or data) instances" 115 | " as entries in a single Python list", 116 | type=Path, 117 | ) 118 | parser.add_argument( 119 | "--output-directory", type=Path, help="Folder to save the metadata sub-sets." 120 | ) 121 | parser.add_argument( 122 | "--fraction-array", 123 | type=list, 124 | default=[i / 10 for i in range(1, 11)], 125 | help="Array contains the sizes of the sub datasets." 126 | " The sizes are brought as fractions of the original dataset.", 127 | ) 128 | return parser.parse_args() 129 | 130 | 131 | def main(): 132 | args = parse_args() 133 | create_subsets( 134 | input_json_path=args.original_json_path, 135 | output_directory=args.output_directory, 136 | fraction_array=args.fraction_array, 137 | dataset_format=args.dataset_format, 138 | ) 139 | 140 | 141 | if __name__ == "__main__": 142 | main() 143 | -------------------------------------------------------------------------------- /how_much_data_do_you_really_need/utils.py: -------------------------------------------------------------------------------- 1 | voco_categories = [ 2 | "airplane", 3 | "bicycle", 4 | "bird", 5 | "boat", 6 | "bottle", 7 | "bus", 8 | "car", 9 | "cat", 10 | "chair", 11 | "cow", 12 | "dining table", 13 | "dog", 14 | "horse", 15 | "motorcycle", 16 | "person", 17 | "potted plant", 18 | "sheep", 19 | "couch", 20 | "train", 21 | "tv", 22 | ] 23 | 24 | bdd_things_categories = [ 25 | "bike", 26 | "bus", 27 | "car", 28 | "motor", 29 | "person", 30 | "traffic light", 31 | "traffic sign", 32 | "train", 33 | "truck", 34 | "rider" 35 | ] -------------------------------------------------------------------------------- /once_upon_a_repository/README.md: -------------------------------------------------------------------------------- 1 | # The Hero’s Journey to Deep Learning CodeBase 2 | ## [Blog I: Once Upon a Repository: How to Write Readable, Maintainable Code with PyTorch](https://medium.com/p/once-upon-a-repository-how-to-write-readable-maintainable-code-with-pytorch-951f03f6a829?source=email-679430f47f06--writer.postDistributed&sk=3a6953df05559b11fbbc35a258e75ec0) 3 | 4 | We all aim to write a maintainable and modular codebase that supports the R&D process from research to production. Key to an efficient and successful deep learning project, this is not an easy feat. That is why we decided to write this blog series -- to share our experience from numerous deep learning projects and demonstrate the way to achieve this goal using open source tools. 5 | 6 | Our first post in this series is a tutorial on how to leverage the PyTorch ecosystem and Allegro Trains experiments manager to easily write a readable and maintainable computer vision code tailored for your needs. We focus on two packages from the PyTorch ecosystem, Torchvision and Ignite. Torchvision is a popular package consisting of popular datasets wrappers, model architectures, and common image transformations for computer vision. Ignite is a new library that enables simple and clean adding of metrics reports, early-stopping, model checkpointing and other features to your training loop. In this post, we write a codebase that trains and evaluates a Mask-RCNN model on the COCO dataset. We then register the training data (loss, accuracy, etc) to a Pytorch native Tensorboard and use Allegro Trains experiment & autoML manager to manage and track our training experiments. Through these steps, we achieve a seamless, organized, and productive model training flow. 7 | -------------------------------------------------------------------------------- /once_upon_a_repository/engines.py: -------------------------------------------------------------------------------- 1 | import math 2 | import copy 3 | import torch 4 | 5 | from ignite.engine import Engine 6 | from torchvision_references import utils 7 | 8 | 9 | def create_trainer(model, device): 10 | def update_model(engine, batch): 11 | images, targets = copy.deepcopy(batch) 12 | images_model, targets_model = prepare_batch(batch, device=device) 13 | 14 | loss_dict = model(images_model, targets_model) 15 | losses = sum(loss for loss in loss_dict.values()) 16 | 17 | # reduce losses over all GPUs for logging purposes 18 | loss_dict_reduced = utils.reduce_dict(loss_dict) 19 | losses_reduced = sum(loss for loss in loss_dict_reduced.values()) 20 | 21 | loss_value = losses_reduced.item() 22 | 23 | engine.state.optimizer.zero_grad() 24 | if not math.isfinite(loss_value): 25 | print("Loss is {}, resetting loss and skipping training iteration".format(loss_value)) 26 | print('Loss values were: ', loss_dict_reduced) 27 | print('Input labels were: ', [target['labels'] for target in targets]) 28 | print('Input boxes were: ', [target['boxes'] for target in targets]) 29 | loss_dict_reduced = {k: torch.tensor(0) for k, v in loss_dict_reduced.items()} 30 | else: 31 | losses.backward() 32 | engine.state.optimizer.step() 33 | 34 | if engine.state.warmup_scheduler is not None: 35 | engine.state.warmup_scheduler.step() 36 | 37 | images_model = targets_model = None 38 | 39 | return images, targets, loss_dict_reduced 40 | return Engine(update_model) 41 | 42 | 43 | def create_evaluator(model, device): 44 | def update_model(engine, batch): 45 | images, targets = prepare_batch(batch, device=device) 46 | images_model = copy.deepcopy(images) 47 | 48 | torch.cuda.synchronize() 49 | with torch.no_grad(): 50 | outputs = model(images_model) 51 | 52 | outputs = [{k: v.to(device) for k, v in t.items()} for t in outputs] 53 | 54 | res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} 55 | engine.state.coco_evaluator.update(res) 56 | 57 | images_model = outputs = None 58 | 59 | return images, targets, res 60 | return Engine(update_model) 61 | 62 | 63 | def prepare_batch(batch, device=None): 64 | images, targets = batch 65 | images = list(image.to(device, non_blocking=True) for image in images) 66 | targets = [{k: v.to(device, non_blocking=True) for k, v in t.items()} for t in targets] 67 | return images, targets 68 | -------------------------------------------------------------------------------- /once_upon_a_repository/inference_with_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import numpy as np 4 | 5 | import torch 6 | from torch.utils.tensorboard import SummaryWriter 7 | 8 | from argparse import ArgumentParser 9 | from pathlib2 import Path 10 | 11 | from utilities import get_iou_types, draw_boxes, get_model_instance_segmentation, CocoLikeAnnotations 12 | from torchvision_references import utils 13 | from torchvision.transforms import functional as F 14 | 15 | from PIL import Image 16 | from transforms import get_transform 17 | 18 | from trains import Task 19 | task = Task.init(project_name='Object Detection with TRAINS, Ignite and TensorBoard', 20 | task_name='Inference with trained model') 21 | 22 | 23 | def rescale_box(box, image_size, orig_height, orig_width): 24 | rescale_height = float(orig_height) / image_size 25 | rescale_width = float(orig_width) / image_size 26 | box[:2] *= rescale_width 27 | box[2:] *= rescale_height 28 | return box 29 | 30 | 31 | def run(task_args): 32 | writer = SummaryWriter(log_dir=task_args.log_dir) 33 | input_checkpoint = torch.load(task_args.input_checkpoint) 34 | labels_enum = input_checkpoint.get('labels_enumeration') 35 | model_configuration = input_checkpoint.get('configuration') 36 | model_weights = input_checkpoint.get('model') 37 | image_size = model_configuration.get('image_size') 38 | 39 | # Set the training device to GPU if available - if not set it to CPU 40 | device = torch.cuda.current_device() if torch.cuda.is_available() else torch.device('cpu') 41 | torch.backends.cudnn.benchmark = True if torch.cuda.is_available() else False # optimization for fixed input size 42 | 43 | model = get_model_instance_segmentation(model_configuration.get('num_classes'), 44 | model_configuration.get('mask_predictor_hidden_layer')) 45 | 46 | # if there is more than one GPU, parallelize the model 47 | if torch.cuda.device_count() > 1: 48 | print("{} GPUs were detected - we will use all of them".format(torch.cuda.device_count())) 49 | model = torch.nn.DataParallel(model) 50 | 51 | # copy the model to each device 52 | model.to(device) 53 | 54 | # Define train and test datasets 55 | iou_types = get_iou_types(model) 56 | use_mask = True if "segm" in iou_types else False 57 | 58 | # Load pretrained model weights 59 | model.load_state_dict(model_weights) 60 | 61 | # set the model to inference mode 62 | model.eval() 63 | 64 | images_paths = [] 65 | for file_type in ('*.png', '*.jpg', '*.jpeg'): 66 | images_paths.extend(glob.glob(os.path.join(task_args.input_dataset_root, file_type))) 67 | 68 | transforms = get_transform(train=False, image_size=image_size) 69 | 70 | path_to_json = os.path.join(task_args.output_dir, "inference_results.json") 71 | coco_like_anns = CocoLikeAnnotations() 72 | batch_images = [] 73 | batch_paths = [] 74 | batch_shapes = [] 75 | 76 | for i, image_path in enumerate(images_paths): 77 | img = Image.open(image_path).convert('RGB') 78 | batch_shapes.append({'height': img.height, 'width': img.width}) 79 | img, __ = transforms(img) 80 | batch_images.append(img) 81 | batch_paths.append(image_path) 82 | if len(batch_images) < task_args.batch_size: 83 | continue 84 | 85 | input_images = torch.stack(batch_images) 86 | 87 | with torch.no_grad(): 88 | torch_out = model(input_images.to(device)) 89 | 90 | for img_num, image in enumerate(input_images): 91 | valid_detections = torch_out[img_num].get('scores') >= args.detection_thresh 92 | img_boxes = torch_out[img_num].get('boxes')[valid_detections].cpu().numpy() 93 | img_labels_ids = torch_out[img_num].get('labels')[valid_detections].cpu().numpy() 94 | img_labels = [labels_enum[label]['name'] for label in img_labels_ids] 95 | image_id = (i + 1 - task_args.batch_size + img_num) 96 | orig_height = batch_shapes[img_num].get('height') 97 | orig_width = batch_shapes[img_num].get('width') 98 | 99 | coco_like_anns.update_images(file_name=Path(batch_paths[img_num]).name, 100 | height=orig_height, width=orig_width, 101 | id=image_id) 102 | 103 | for box, label, label_id in zip(img_boxes, img_labels, img_labels_ids): 104 | orig_box = rescale_box(image_size=image_size, orig_height=orig_height, orig_width=orig_width, box=box.copy()) 105 | coco_like_anns.update_annotations(box=orig_box, label_id=label_id, 106 | image_id=image_id) 107 | 108 | if ((i+1)/task_args.batch_size) % task_args.log_interval == 0: 109 | print('Batch {}: Saving detections of file {} to {}'.format(int((i+1)/task_args.batch_size), 110 | Path(batch_paths[img_num]).name, 111 | path_to_json)) 112 | 113 | if ((i+1)/task_args.batch_size) % task_args.debug_images_interval == 0: 114 | debug_image = draw_boxes(np.array(F.to_pil_image(image.cpu())), img_boxes, img_labels, color=(0, 150, 0)) 115 | writer.add_image("inference/image_{}".format(img_num), debug_image, ((i+1)/task_args.batch_size), 116 | dataformats='HWC') 117 | 118 | batch_images = [] 119 | batch_paths = [] 120 | 121 | coco_like_anns.dump_to_json(path_to_json=path_to_json) 122 | 123 | 124 | if __name__ == "__main__": 125 | parser = ArgumentParser() 126 | parser.add_argument('--batch_size', type=int, default=4, 127 | help='input batch size for training and validation (default: 4)') 128 | parser.add_argument('--detection_thresh', type=float, default=0.4, 129 | help='Inference confidence threshold') 130 | parser.add_argument('--log_interval', type=int, default=100, 131 | help='how many batches to wait before logging training status') 132 | parser.add_argument('--debug_images_interval', type=int, default=500, 133 | help='how many batches to wait before logging debug images') 134 | parser.add_argument('--input_dataset_root', type=str, 135 | default='/media/dan/bigdata/datasets/coco/2017/val2017', 136 | help='annotation file of test dataset') 137 | parser.add_argument('--input_checkpoint', type=str, default='/tmp/checkpoints/model_epoch_10.pth', 138 | help='Checkpoint to use for inference') 139 | parser.add_argument("--output_dir", type=str, default="/tmp/inference_results", 140 | help="output directory for saving models checkpoints") 141 | parser.add_argument("--log_dir", type=str, default="/tmp/tensorboard_logs", 142 | help="log directory for Tensorboard log output") 143 | args = parser.parse_args() 144 | 145 | if not os.path.exists(args.output_dir): 146 | utils.mkdir(args.output_dir) 147 | if not os.path.exists(args.log_dir): 148 | utils.mkdir(args.log_dir) 149 | 150 | run(args) 151 | -------------------------------------------------------------------------------- /once_upon_a_repository/requirements.txt: -------------------------------------------------------------------------------- 1 | Pillow == 10.2.0 2 | attrs == 19.3.0 3 | numpy == 1.22.0 4 | opencv_python == 4.2.0.32 5 | pathlib2 == 2.3.5 6 | pycocotools == 2.0.0 7 | pytorch_ignite == 0.2.1 8 | torch == 1.4.0 9 | torchvision == 0.5.0 10 | trains == 0.13.2 11 | tensorboard==2.1.0 12 | -------------------------------------------------------------------------------- /once_upon_a_repository/torchvision_references/coco_eval.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import numpy as np 4 | import copy 5 | import torch 6 | import torch._six 7 | 8 | from pycocotools.cocoeval import COCOeval 9 | from pycocotools.coco import COCO 10 | import pycocotools.mask as mask_util 11 | 12 | from collections import defaultdict 13 | 14 | from torchvision_references import utils 15 | 16 | 17 | class CocoEvaluator(object): 18 | def __init__(self, coco_gt, iou_types): 19 | assert isinstance(iou_types, (list, tuple)) 20 | coco_gt = copy.deepcopy(coco_gt) 21 | self.coco_gt = coco_gt 22 | 23 | self.iou_types = iou_types 24 | self.coco_eval = {} 25 | for iou_type in iou_types: 26 | self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type) 27 | 28 | self.img_ids = [] 29 | self.eval_imgs = {k: [] for k in iou_types} 30 | 31 | def update(self, predictions): 32 | img_ids = list(np.unique(list(predictions.keys()))) 33 | self.img_ids.extend(img_ids) 34 | 35 | for iou_type in self.iou_types: 36 | results = self.prepare(predictions, iou_type) 37 | coco_dt = loadRes(self.coco_gt, results) if results else COCO() 38 | coco_eval = self.coco_eval[iou_type] 39 | 40 | coco_eval.cocoDt = coco_dt 41 | coco_eval.params.imgIds = list(img_ids) 42 | img_ids, eval_imgs = evaluate(coco_eval) 43 | 44 | self.eval_imgs[iou_type].append(eval_imgs) 45 | 46 | def synchronize_between_processes(self): 47 | for iou_type in self.iou_types: 48 | self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2) 49 | create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type]) 50 | 51 | def accumulate(self): 52 | for coco_eval in self.coco_eval.values(): 53 | coco_eval.accumulate() 54 | 55 | def summarize(self): 56 | for iou_type, coco_eval in self.coco_eval.items(): 57 | print("IoU metric: {}".format(iou_type)) 58 | coco_eval.summarize() 59 | 60 | def prepare(self, predictions, iou_type): 61 | if iou_type == "bbox": 62 | return self.prepare_for_coco_detection(predictions) 63 | elif iou_type == "segm": 64 | return self.prepare_for_coco_segmentation(predictions) 65 | elif iou_type == "keypoints": 66 | return self.prepare_for_coco_keypoint(predictions) 67 | else: 68 | raise ValueError("Unknown iou type {}".format(iou_type)) 69 | 70 | def prepare_for_coco_detection(self, predictions): 71 | coco_results = [] 72 | for original_id, prediction in predictions.items(): 73 | if len(prediction) == 0: 74 | continue 75 | 76 | boxes = prediction["boxes"] 77 | boxes = convert_to_xywh(boxes).tolist() 78 | scores = prediction["scores"].tolist() 79 | labels = prediction["labels"].tolist() 80 | 81 | coco_results.extend( 82 | [ 83 | { 84 | "image_id": original_id, 85 | "category_id": labels[k], 86 | "bbox": box, 87 | "score": scores[k], 88 | } 89 | for k, box in enumerate(boxes) 90 | ] 91 | ) 92 | return coco_results 93 | 94 | def prepare_for_coco_segmentation(self, predictions): 95 | coco_results = [] 96 | for original_id, prediction in predictions.items(): 97 | if len(prediction) == 0: 98 | continue 99 | 100 | masks = prediction["masks"] 101 | masks = (masks > 0.5).type(torch.uint8) 102 | 103 | scores = prediction["scores"].tolist() 104 | labels = prediction["labels"].tolist() 105 | 106 | rles = [ 107 | mask_util.encode(np.array(mask.cpu()[0, :, :, np.newaxis], order="F"))[0] 108 | for mask in masks 109 | ] 110 | for rle in rles: 111 | rle["counts"] = rle["counts"].decode("utf-8") 112 | 113 | coco_results.extend( 114 | [ 115 | { 116 | "image_id": original_id, 117 | "category_id": labels[k], 118 | "segmentation": rle, 119 | "score": scores[k], 120 | } 121 | for k, rle in enumerate(rles) 122 | ] 123 | ) 124 | return coco_results 125 | 126 | def prepare_for_coco_keypoint(self, predictions): 127 | coco_results = [] 128 | for original_id, prediction in predictions.items(): 129 | if len(prediction) == 0: 130 | continue 131 | 132 | boxes = prediction["boxes"] 133 | boxes = convert_to_xywh(boxes).tolist() 134 | scores = prediction["scores"].tolist() 135 | labels = prediction["labels"].tolist() 136 | keypoints = prediction["keypoints"] 137 | keypoints = keypoints.flatten(start_dim=1).tolist() 138 | 139 | coco_results.extend( 140 | [ 141 | { 142 | "image_id": original_id, 143 | "category_id": labels[k], 144 | 'keypoints': keypoint, 145 | "score": scores[k], 146 | } 147 | for k, keypoint in enumerate(keypoints) 148 | ] 149 | ) 150 | return coco_results 151 | 152 | 153 | def convert_to_xywh(boxes): 154 | xmin, ymin, xmax, ymax = boxes.unbind(1) 155 | return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1) 156 | 157 | 158 | def merge(img_ids, eval_imgs): 159 | all_img_ids = utils.all_gather(img_ids) 160 | all_eval_imgs = utils.all_gather(eval_imgs) 161 | 162 | merged_img_ids = [] 163 | for p in all_img_ids: 164 | merged_img_ids.extend(p) 165 | 166 | merged_eval_imgs = [] 167 | for p in all_eval_imgs: 168 | merged_eval_imgs.append(p) 169 | 170 | merged_img_ids = np.array(merged_img_ids) 171 | merged_eval_imgs = np.concatenate(merged_eval_imgs, 2) 172 | 173 | # keep only unique (and in sorted order) images 174 | merged_img_ids, idx = np.unique(merged_img_ids, return_index=True) 175 | merged_eval_imgs = merged_eval_imgs[..., idx] 176 | 177 | return merged_img_ids, merged_eval_imgs 178 | 179 | 180 | def create_common_coco_eval(coco_eval, img_ids, eval_imgs): 181 | img_ids, eval_imgs = merge(img_ids, eval_imgs) 182 | img_ids = list(img_ids) 183 | eval_imgs = list(eval_imgs.flatten()) 184 | 185 | coco_eval.evalImgs = eval_imgs 186 | coco_eval.params.imgIds = img_ids 187 | coco_eval._paramsEval = copy.deepcopy(coco_eval.params) 188 | 189 | 190 | ################################################################# 191 | # From pycocotools, just removed the prints and fixed 192 | # a Python3 bug about unicode not defined 193 | ################################################################# 194 | 195 | # Ideally, pycocotools wouldn't have hard-coded prints 196 | # so that we could avoid copy-pasting those two functions 197 | 198 | def createIndex(self): 199 | # create index 200 | # print('creating index...') 201 | anns, cats, imgs = {}, {}, {} 202 | imgToAnns, catToImgs = defaultdict(list), defaultdict(list) 203 | if 'annotations' in self.dataset: 204 | for ann in self.dataset['annotations']: 205 | imgToAnns[ann['image_id']].append(ann) 206 | anns[ann['id']] = ann 207 | 208 | if 'images' in self.dataset: 209 | for img in self.dataset['images']: 210 | imgs[img['id']] = img 211 | 212 | if 'categories' in self.dataset: 213 | for cat in self.dataset['categories']: 214 | cats[cat['id']] = cat 215 | 216 | if 'annotations' in self.dataset and 'categories' in self.dataset: 217 | for ann in self.dataset['annotations']: 218 | catToImgs[ann['category_id']].append(ann['image_id']) 219 | 220 | # print('index created!') 221 | 222 | # create class members 223 | self.anns = anns 224 | self.imgToAnns = imgToAnns 225 | self.catToImgs = catToImgs 226 | self.imgs = imgs 227 | self.cats = cats 228 | 229 | 230 | maskUtils = mask_util 231 | 232 | 233 | def loadRes(self, resFile): 234 | """ 235 | Load result file and return a result api object. 236 | :param resFile (str) : file name of result file 237 | :return: res (obj) : result api object 238 | """ 239 | res = COCO() 240 | res.dataset['images'] = [img for img in self.dataset['images']] 241 | 242 | # print('Loading and preparing results...') 243 | # tic = time.time() 244 | if isinstance(resFile, torch._six.string_classes): 245 | anns = json.load(open(resFile)) 246 | elif type(resFile) == np.ndarray: 247 | anns = self.loadNumpyAnnotations(resFile) 248 | else: 249 | anns = resFile 250 | assert type(anns) == list, 'results in not an array of objects' 251 | annsImgIds = [ann['image_id'] for ann in anns] 252 | assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ 253 | 'Results do not correspond to current coco set' 254 | if 'caption' in anns[0]: 255 | imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns]) 256 | res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds] 257 | for id, ann in enumerate(anns): 258 | ann['id'] = id + 1 259 | elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: 260 | res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) 261 | for id, ann in enumerate(anns): 262 | bb = ann['bbox'] 263 | x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] 264 | if 'segmentation' not in ann: 265 | ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] 266 | ann['area'] = bb[2] * bb[3] 267 | ann['id'] = id + 1 268 | ann['iscrowd'] = 0 269 | elif 'segmentation' in anns[0]: 270 | res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) 271 | for id, ann in enumerate(anns): 272 | # now only support compressed RLE format as segmentation results 273 | ann['area'] = maskUtils.area(ann['segmentation']) 274 | if 'bbox' not in ann: 275 | ann['bbox'] = maskUtils.toBbox(ann['segmentation']) 276 | ann['id'] = id + 1 277 | ann['iscrowd'] = 0 278 | elif 'keypoints' in anns[0]: 279 | res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) 280 | for id, ann in enumerate(anns): 281 | s = ann['keypoints'] 282 | x = s[0::3] 283 | y = s[1::3] 284 | x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y) 285 | ann['area'] = (x2 - x1) * (y2 - y1) 286 | ann['id'] = id + 1 287 | ann['bbox'] = [x1, y1, x2 - x1, y2 - y1] 288 | # print('DONE (t={:0.2f}s)'.format(time.time()- tic)) 289 | 290 | res.dataset['annotations'] = anns 291 | createIndex(res) 292 | return res 293 | 294 | 295 | def evaluate(self): 296 | ''' 297 | Run per image evaluation on given images and store results (a list of dict) in self.evalImgs 298 | :return: None 299 | ''' 300 | # tic = time.time() 301 | # print('Running per image evaluation...') 302 | p = self.params 303 | # add backward compatibility if useSegm is specified in params 304 | if p.useSegm is not None: 305 | p.iouType = 'segm' if p.useSegm == 1 else 'bbox' 306 | print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) 307 | # print('Evaluate annotation type *{}*'.format(p.iouType)) 308 | p.imgIds = list(np.unique(p.imgIds)) 309 | if p.useCats: 310 | p.catIds = list(np.unique(p.catIds)) 311 | p.maxDets = sorted(p.maxDets) 312 | self.params = p 313 | 314 | self._prepare() 315 | # loop through images, area range, max detection number 316 | catIds = p.catIds if p.useCats else [-1] 317 | 318 | if p.iouType == 'segm' or p.iouType == 'bbox': 319 | computeIoU = self.computeIoU 320 | elif p.iouType == 'keypoints': 321 | computeIoU = self.computeOks 322 | self.ious = { 323 | (imgId, catId): computeIoU(imgId, catId) 324 | for imgId in p.imgIds 325 | for catId in catIds} 326 | 327 | evaluateImg = self.evaluateImg 328 | maxDet = p.maxDets[-1] 329 | evalImgs = [ 330 | evaluateImg(imgId, catId, areaRng, maxDet) 331 | for catId in catIds 332 | for areaRng in p.areaRng 333 | for imgId in p.imgIds 334 | ] 335 | # this is NOT in the pycocotools code, but could be done outside 336 | evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds)) 337 | self._paramsEval = copy.deepcopy(self.params) 338 | # toc = time.time() 339 | # print('DONE (t={:0.2f}s).'.format(toc-tic)) 340 | return p.imgIds, evalImgs 341 | 342 | ################################################################# 343 | # end of straight copy from pycocotools, just removing the prints 344 | ################################################################# 345 | -------------------------------------------------------------------------------- /once_upon_a_repository/torchvision_references/coco_utils.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | 4 | import torch 5 | import torch.utils.data 6 | import torchvision 7 | 8 | from pycocotools import mask as coco_mask 9 | from pycocotools.coco import COCO 10 | 11 | import transforms as T 12 | 13 | 14 | class FilterAndRemapCocoCategories(object): 15 | def __init__(self, categories, remap=True): 16 | self.categories = categories 17 | self.remap = remap 18 | 19 | def __call__(self, image, target): 20 | anno = target["annotations"] 21 | anno = [obj for obj in anno if obj["category_id"] in self.categories] 22 | if not self.remap: 23 | target["annotations"] = anno 24 | return image, target 25 | anno = copy.deepcopy(anno) 26 | for obj in anno: 27 | obj["category_id"] = self.categories.index(obj["category_id"]) 28 | target["annotations"] = anno 29 | return image, target 30 | 31 | 32 | def convert_coco_poly_to_mask(segmentations, height, width): 33 | masks = [] 34 | for polygons in segmentations: 35 | rles = coco_mask.frPyObjects(polygons, height, width) 36 | mask = coco_mask.decode(rles) 37 | if len(mask.shape) < 3: 38 | mask = mask[..., None] 39 | mask = torch.as_tensor(mask, dtype=torch.uint8) 40 | mask = mask.any(dim=2) 41 | masks.append(mask) 42 | if masks: 43 | masks = torch.stack(masks, dim=0) 44 | else: 45 | masks = torch.zeros((0, height, width), dtype=torch.uint8) 46 | return masks 47 | 48 | 49 | class ConvertCocoPolysToMask(object): 50 | def __call__(self, image, target): 51 | w, h = image.size 52 | 53 | image_id = target["image_id"] 54 | image_id = torch.tensor([image_id]) 55 | 56 | anno = target["annotations"] 57 | 58 | anno = [obj for obj in anno if obj['iscrowd'] == 0] 59 | 60 | boxes = [obj["bbox"] for obj in anno] 61 | # guard against no boxes via resizing 62 | boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4) 63 | boxes[:, 2:] += boxes[:, :2] 64 | boxes[:, 0::2].clamp_(min=0, max=w) 65 | boxes[:, 1::2].clamp_(min=0, max=h) 66 | 67 | classes = [obj["category_id"] for obj in anno] 68 | classes = torch.tensor(classes, dtype=torch.int64) 69 | 70 | segmentations = [obj["segmentation"] for obj in anno] 71 | masks = convert_coco_poly_to_mask(segmentations, h, w) 72 | 73 | keypoints = None 74 | if anno and "keypoints" in anno[0]: 75 | keypoints = [obj["keypoints"] for obj in anno] 76 | keypoints = torch.as_tensor(keypoints, dtype=torch.float32) 77 | num_keypoints = keypoints.shape[0] 78 | if num_keypoints: 79 | keypoints = keypoints.view(num_keypoints, -1, 3) 80 | 81 | keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) 82 | boxes = boxes[keep] 83 | classes = classes[keep] 84 | masks = masks[keep] 85 | if keypoints is not None: 86 | keypoints = keypoints[keep] 87 | 88 | target = {} 89 | target["boxes"] = boxes 90 | target["labels"] = classes 91 | target["masks"] = masks 92 | target["image_id"] = image_id 93 | if keypoints is not None: 94 | target["keypoints"] = keypoints 95 | 96 | # for conversion to coco api 97 | area = torch.tensor([obj["area"] for obj in anno]) 98 | iscrowd = torch.tensor([obj["iscrowd"] for obj in anno]) 99 | target["area"] = area 100 | target["iscrowd"] = iscrowd 101 | 102 | return image, target 103 | 104 | 105 | def _coco_remove_images_without_annotations(dataset, cat_list=None): 106 | def _has_only_empty_bbox(anno): 107 | return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno) 108 | 109 | def _count_visible_keypoints(anno): 110 | return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno) 111 | 112 | min_keypoints_per_image = 10 113 | 114 | def _has_valid_annotation(anno): 115 | # if it's empty, there is no annotation 116 | if len(anno) == 0: 117 | return False 118 | # if all boxes have close to zero area, there is no annotation 119 | if _has_only_empty_bbox(anno): 120 | return False 121 | # keypoints task have a slight different critera for considering 122 | # if an annotation is valid 123 | if "keypoints" not in anno[0]: 124 | return True 125 | # for keypoint detection tasks, only consider valid images those 126 | # containing at least min_keypoints_per_image 127 | if _count_visible_keypoints(anno) >= min_keypoints_per_image: 128 | return True 129 | return False 130 | 131 | assert isinstance(dataset, torchvision.datasets.CocoDetection) 132 | ids = [] 133 | for ds_idx, img_id in enumerate(dataset.ids): 134 | ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None) 135 | anno = dataset.coco.loadAnns(ann_ids) 136 | if cat_list: 137 | anno = [obj for obj in anno if obj["category_id"] in cat_list] 138 | if _has_valid_annotation(anno): 139 | ids.append(ds_idx) 140 | 141 | dataset = torch.utils.data.Subset(dataset, ids) 142 | return dataset 143 | 144 | 145 | def convert_to_coco_api(ds): 146 | coco_ds = COCO() 147 | ann_id = 0 148 | dataset = {'images': [], 'categories': [], 'annotations': []} 149 | categories = set() 150 | for img_idx in range(len(ds)): 151 | # find better way to get target 152 | # targets = ds.get_annotations(img_idx) 153 | img, targets = ds[img_idx] 154 | image_id = targets["image_id"].item() 155 | img_dict = {} 156 | img_dict['id'] = image_id 157 | img_dict['height'] = img.shape[-2] 158 | img_dict['width'] = img.shape[-1] 159 | dataset['images'].append(img_dict) 160 | bboxes = targets["boxes"] 161 | bboxes[:, 2:] -= bboxes[:, :2] 162 | bboxes = bboxes.tolist() 163 | labels = targets['labels'].tolist() 164 | areas = targets['area'].tolist() 165 | iscrowd = targets['iscrowd'].tolist() 166 | if 'masks' in targets: 167 | masks = targets['masks'] 168 | # make masks Fortran contiguous for coco_mask 169 | masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1) 170 | if 'keypoints' in targets: 171 | keypoints = targets['keypoints'] 172 | keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist() 173 | num_objs = len(bboxes) 174 | for i in range(num_objs): 175 | ann = {} 176 | ann['image_id'] = image_id 177 | ann['bbox'] = bboxes[i] 178 | ann['category_id'] = labels[i] 179 | categories.add(labels[i]) 180 | ann['area'] = areas[i] 181 | ann['iscrowd'] = iscrowd[i] 182 | ann['id'] = ann_id 183 | if 'masks' in targets: 184 | ann["segmentation"] = coco_mask.encode(masks[i].numpy()) 185 | if 'keypoints' in targets: 186 | ann['keypoints'] = keypoints[i] 187 | ann['num_keypoints'] = sum(k != 0 for k in keypoints[i][2::3]) 188 | dataset['annotations'].append(ann) 189 | ann_id += 1 190 | dataset['categories'] = [{'id': i} for i in sorted(categories)] 191 | coco_ds.dataset = dataset 192 | coco_ds.createIndex() 193 | return coco_ds 194 | 195 | 196 | def get_coco_api_from_dataset(dataset): 197 | for _ in range(10): 198 | if isinstance(dataset, torchvision.datasets.CocoDetection): 199 | break 200 | if isinstance(dataset, torch.utils.data.Subset): 201 | dataset = dataset.dataset 202 | if isinstance(dataset, torchvision.datasets.CocoDetection): 203 | return dataset.coco 204 | return convert_to_coco_api(dataset) 205 | 206 | 207 | class CocoDetection(torchvision.datasets.CocoDetection): 208 | def __init__(self, img_folder, ann_file, transforms): 209 | super(CocoDetection, self).__init__(img_folder, ann_file) 210 | self._transforms = transforms 211 | 212 | def __getitem__(self, idx): 213 | img, target = super(CocoDetection, self).__getitem__(idx) 214 | image_id = self.ids[idx] 215 | target = dict(image_id=image_id, annotations=target) 216 | if self._transforms is not None: 217 | img, target = self._transforms(img, target) 218 | return img, target 219 | 220 | 221 | def get_coco(root, image_set, transforms, mode='instances'): 222 | anno_file_template = "{}_{}2017.json" 223 | PATHS = { 224 | "train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))), 225 | "val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))), 226 | # "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))) 227 | } 228 | 229 | t = [ConvertCocoPolysToMask()] 230 | 231 | if transforms is not None: 232 | t.append(transforms) 233 | transforms = T.Compose(t) 234 | 235 | img_folder, ann_file = PATHS[image_set] 236 | img_folder = os.path.join(root, img_folder) 237 | ann_file = os.path.join(root, ann_file) 238 | 239 | dataset = CocoDetection(img_folder, ann_file, transforms=transforms) 240 | 241 | if image_set == "train": 242 | dataset = _coco_remove_images_without_annotations(dataset) 243 | 244 | # dataset = torch.utils.data.Subset(dataset, [i for i in range(500)]) 245 | 246 | return dataset 247 | 248 | 249 | def get_coco_kp(root, image_set, transforms): 250 | return get_coco(root, image_set, transforms, mode="person_keypoints") 251 | -------------------------------------------------------------------------------- /once_upon_a_repository/torchvision_references/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import pickle 4 | 5 | import torch 6 | import torch.distributed as dist 7 | 8 | import errno 9 | import os 10 | 11 | 12 | def all_gather(data): 13 | """ 14 | Run all_gather on arbitrary picklable data (not necessarily tensors) 15 | Args: 16 | data: any picklable object 17 | Returns: 18 | list[data]: list of data gathered from each rank 19 | """ 20 | world_size = get_world_size() 21 | if world_size == 1: 22 | return [data] 23 | 24 | # serialized to a Tensor 25 | buffer = pickle.dumps(data) 26 | storage = torch.ByteStorage.from_buffer(buffer) 27 | tensor = torch.ByteTensor(storage).to("cuda") 28 | 29 | # obtain Tensor size of each rank 30 | local_size = torch.tensor([tensor.numel()], device="cuda") 31 | size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)] 32 | dist.all_gather(size_list, local_size) 33 | size_list = [int(size.item()) for size in size_list] 34 | max_size = max(size_list) 35 | 36 | # receiving Tensor from all ranks 37 | # we pad the tensor because torch all_gather does not support 38 | # gathering tensors of different shapes 39 | tensor_list = [] 40 | for _ in size_list: 41 | tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda")) 42 | if local_size != max_size: 43 | padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda") 44 | tensor = torch.cat((tensor, padding), dim=0) 45 | dist.all_gather(tensor_list, tensor) 46 | 47 | data_list = [] 48 | for size, tensor in zip(size_list, tensor_list): 49 | buffer = tensor.cpu().numpy().tobytes()[:size] 50 | data_list.append(pickle.loads(buffer)) 51 | 52 | return data_list 53 | 54 | 55 | def reduce_dict(input_dict, average=True): 56 | """ 57 | Args: 58 | input_dict (dict): all the values will be reduced 59 | average (bool): whether to do average or sum 60 | Reduce the values in the dictionary from all processes so that all processes 61 | have the averaged results. Returns a dict with the same fields as 62 | input_dict, after reduction. 63 | """ 64 | world_size = get_world_size() 65 | if world_size < 2: 66 | return input_dict 67 | with torch.no_grad(): 68 | names = [] 69 | values = [] 70 | # sort the keys so that they are consistent across processes 71 | for k in sorted(input_dict.keys()): 72 | names.append(k) 73 | values.append(input_dict[k]) 74 | values = torch.stack(values, dim=0) 75 | dist.all_reduce(values) 76 | if average: 77 | values /= world_size 78 | reduced_dict = {k: v for k, v in zip(names, values)} 79 | return reduced_dict 80 | 81 | 82 | def collate_fn(batch): 83 | return tuple(zip(*batch)) 84 | 85 | 86 | def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor): 87 | 88 | def f(x): 89 | if x >= warmup_iters: 90 | return 1 91 | alpha = float(x) / warmup_iters 92 | return warmup_factor * (1 - alpha) + alpha 93 | 94 | return torch.optim.lr_scheduler.LambdaLR(optimizer, f) 95 | 96 | 97 | def mkdir(path): 98 | try: 99 | os.makedirs(path) 100 | except OSError as e: 101 | if e.errno != errno.EEXIST: 102 | raise 103 | 104 | 105 | def setup_for_distributed(is_master): 106 | """ 107 | This function disables printing when not in master process 108 | """ 109 | import builtins as __builtin__ 110 | builtin_print = __builtin__.print 111 | 112 | def print(*args, **kwargs): 113 | force = kwargs.pop('force', False) 114 | if is_master or force: 115 | builtin_print(*args, **kwargs) 116 | 117 | __builtin__.print = print 118 | 119 | 120 | def is_dist_avail_and_initialized(): 121 | if not dist.is_available(): 122 | return False 123 | if not dist.is_initialized(): 124 | return False 125 | return True 126 | 127 | 128 | def get_world_size(): 129 | if not is_dist_avail_and_initialized(): 130 | return 1 131 | return dist.get_world_size() 132 | 133 | 134 | def get_rank(): 135 | if not is_dist_avail_and_initialized(): 136 | return 0 137 | return dist.get_rank() 138 | 139 | 140 | def is_main_process(): 141 | return get_rank() == 0 142 | 143 | 144 | def save_on_master(*args, **kwargs): 145 | if is_main_process(): 146 | torch.save(*args, **kwargs) 147 | 148 | 149 | def init_distributed_mode(args): 150 | if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: 151 | args.rank = int(os.environ["RANK"]) 152 | args.world_size = int(os.environ['WORLD_SIZE']) 153 | args.gpu = int(os.environ['LOCAL_RANK']) 154 | elif 'SLURM_PROCID' in os.environ: 155 | args.rank = int(os.environ['SLURM_PROCID']) 156 | args.gpu = args.rank % torch.cuda.device_count() 157 | else: 158 | print('Not using distributed mode') 159 | args.distributed = False 160 | return 161 | 162 | args.distributed = True 163 | 164 | torch.cuda.set_device(args.gpu) 165 | args.dist_backend = 'nccl' 166 | print('| distributed init (rank {}): {}'.format( 167 | args.rank, args.dist_url), flush=True) 168 | torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, 169 | world_size=args.world_size, rank=args.rank) 170 | torch.distributed.barrier() 171 | setup_for_distributed(args.rank == 0) 172 | -------------------------------------------------------------------------------- /once_upon_a_repository/train_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter 3 | from itertools import chain 4 | from operator import add 5 | 6 | import numpy as np 7 | import torch 8 | from PIL import Image 9 | from ignite.engine import Events 10 | from pathlib2 import Path 11 | from torch.utils.data import DataLoader 12 | from torch.utils.tensorboard import SummaryWriter 13 | from torchvision.datasets.coco import CocoDetection 14 | from trains import Task 15 | 16 | from engines import create_trainer, create_evaluator 17 | from torchvision_references import utils 18 | from torchvision_references.coco_eval import CocoEvaluator 19 | from torchvision_references.coco_utils import convert_to_coco_api 20 | from transforms import get_transform 21 | from utilities import draw_debug_images, draw_mask, get_model_instance_segmentation, safe_collate, get_iou_types 22 | 23 | task = Task.init(project_name='Object Detection with TRAINS, Ignite and TensorBoard', 24 | task_name='Train MaskRCNN with torchvision') 25 | 26 | configuration_data = {'image_size': 512, 'mask_predictor_hidden_layer': 256} 27 | configuration_data = task.connect_configuration(configuration_data) 28 | 29 | 30 | class CocoMask(CocoDetection): 31 | def __init__(self, root, annFile, transform=None, target_transform=None, transforms=None, use_mask=True): 32 | super(CocoMask, self).__init__(root, annFile, transforms, target_transform, transform) 33 | self.transforms = transforms 34 | self.use_mask = use_mask 35 | 36 | def __getitem__(self, index): 37 | coco = self.coco 38 | img_id = self.ids[index] 39 | ann_ids = coco.getAnnIds(imgIds=img_id) 40 | target = coco.loadAnns(ann_ids) 41 | if len(ann_ids) == 0: 42 | return None 43 | 44 | path = coco.loadImgs(img_id)[0]['file_name'] 45 | img = Image.open(os.path.join(self.root, path)).convert('RGB') 46 | 47 | # From boxes [x, y, w, h] to [x1, y1, x2, y2] 48 | new_target = {"image_id": torch.as_tensor(target[0]['image_id'], dtype=torch.int64), 49 | "area": torch.as_tensor([obj['area'] for obj in target], dtype=torch.float32), 50 | "iscrowd": torch.as_tensor([obj['iscrowd'] for obj in target], dtype=torch.int64), 51 | "boxes": torch.as_tensor([obj['bbox'][:2] + list(map(add, obj['bbox'][:2], obj['bbox'][2:])) 52 | for obj in target], dtype=torch.float32), 53 | "labels": torch.as_tensor([obj['category_id'] for obj in target], dtype=torch.int64)} 54 | if self.use_mask: 55 | mask = [coco.annToMask(ann) for ann in target] 56 | if len(mask) > 1: 57 | mask = np.stack(tuple(mask), axis=0) 58 | new_target["masks"] = torch.as_tensor(mask, dtype=torch.uint8) 59 | 60 | if self.transforms is not None: 61 | img, new_target = self.transforms(img, new_target) 62 | 63 | return img, new_target 64 | 65 | 66 | def get_data_loaders(train_ann_file, test_ann_file, batch_size, test_size, image_size, use_mask): 67 | # first, crate PyTorch dataset objects, for the train and validation data. 68 | dataset = CocoMask( 69 | root=Path.joinpath(Path(train_ann_file).parent.parent, train_ann_file.split('_')[1].split('.')[0]), 70 | annFile=train_ann_file, 71 | transforms=get_transform(train=True, image_size=image_size), 72 | use_mask=use_mask) 73 | dataset_test = CocoMask( 74 | root=Path.joinpath(Path(test_ann_file).parent.parent, test_ann_file.split('_')[1].split('.')[0]), 75 | annFile=test_ann_file, 76 | transforms=get_transform(train=False, image_size=image_size), 77 | use_mask=use_mask) 78 | 79 | labels_enumeration = dataset.coco.cats 80 | 81 | indices_val = torch.randperm(len(dataset_test)).tolist() 82 | dataset_val = torch.utils.data.Subset(dataset_test, indices_val[:test_size]) 83 | 84 | # set train and validation data-loaders 85 | train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=6, 86 | collate_fn=safe_collate, pin_memory=True) 87 | val_loader = DataLoader(dataset_val, batch_size=batch_size, shuffle=False, num_workers=6, 88 | collate_fn=safe_collate, pin_memory=True) 89 | 90 | return train_loader, val_loader, labels_enumeration 91 | 92 | 93 | def run(task_args): 94 | # Define train and test datasets 95 | train_loader, val_loader, labels_enum = get_data_loaders(task_args.train_dataset_ann_file, 96 | task_args.val_dataset_ann_file, 97 | task_args.batch_size, 98 | task_args.test_size, 99 | configuration_data.get('image_size'), 100 | use_mask=True) 101 | val_dataset = list(chain.from_iterable(zip(*batch) for batch in iter(val_loader))) 102 | coco_api_val_dataset = convert_to_coco_api(val_dataset) 103 | num_classes = max(labels_enum.keys()) + 1 # number of classes plus one for background class 104 | configuration_data['num_classes'] = num_classes 105 | 106 | # Set the training device to GPU if available - if not set it to CPU 107 | device = torch.cuda.current_device() if torch.cuda.is_available() else torch.device('cpu') 108 | torch.backends.cudnn.benchmark = True if torch.cuda.is_available() else False # optimization for fixed input size 109 | 110 | model = get_model_instance_segmentation(num_classes, configuration_data.get('mask_predictor_hidden_layer')) 111 | iou_types = get_iou_types(model) 112 | 113 | # if there is more than one GPU, parallelize the model 114 | if torch.cuda.device_count() > 1: 115 | print("{} GPUs were detected - we will use all of them".format(torch.cuda.device_count())) 116 | model = torch.nn.DataParallel(model) 117 | 118 | # copy the model to each device 119 | model.to(device) 120 | 121 | if task_args.input_checkpoint: 122 | print('Loading model checkpoint from '.format(task_args.input_checkpoint)) 123 | input_checkpoint = torch.load(task_args.input_checkpoint, map_location=torch.device(device)) 124 | model.load_state_dict(input_checkpoint['model']) 125 | 126 | writer = SummaryWriter(log_dir=task_args.log_dir) 127 | 128 | # define Ignite's train and evaluation engine 129 | trainer = create_trainer(model, device) 130 | evaluator = create_evaluator(model, device) 131 | 132 | @trainer.on(Events.STARTED) 133 | def on_training_started(engine): 134 | # construct an optimizer 135 | params = [p for p in model.parameters() if p.requires_grad] 136 | engine.state.optimizer = torch.optim.SGD(params, 137 | lr=task_args.lr, 138 | momentum=task_args.momentum, 139 | weight_decay=task_args.weight_decay) 140 | engine.state.scheduler = torch.optim.lr_scheduler.StepLR(engine.state.optimizer, step_size=3, gamma=0.1) 141 | if task_args.input_checkpoint and task_args.load_optimizer: 142 | engine.state.optimizer.load_state_dict(input_checkpoint['optimizer']) 143 | engine.state.scheduler.load_state_dict(input_checkpoint['lr_scheduler']) 144 | 145 | @trainer.on(Events.EPOCH_STARTED) 146 | def on_epoch_started(engine): 147 | model.train() 148 | engine.state.warmup_scheduler = None 149 | if engine.state.epoch == 1: 150 | warmup_iters = min(task_args.warmup_iterations, len(train_loader) - 1) 151 | print('Warm up period was set to {} iterations'.format(warmup_iters)) 152 | warmup_factor = 1. / warmup_iters 153 | engine.state.warmup_scheduler = utils.warmup_lr_scheduler(engine.state.optimizer, warmup_iters, warmup_factor) 154 | 155 | @trainer.on(Events.ITERATION_COMPLETED) 156 | def on_iteration_completed(engine): 157 | images, targets, loss_dict_reduced = engine.state.output 158 | if engine.state.iteration % task_args.log_interval == 0: 159 | loss = sum(loss for loss in loss_dict_reduced.values()).item() 160 | print("Epoch: {}, Iteration: {}, Loss: {}".format(engine.state.epoch, engine.state.iteration, loss)) 161 | for k, v in loss_dict_reduced.items(): 162 | writer.add_scalar("loss/{}".format(k), v.item(), engine.state.iteration) 163 | writer.add_scalar("loss/total_loss", sum(loss for loss in loss_dict_reduced.values()).item(), engine.state.iteration) 164 | writer.add_scalar("learning rate/lr", engine.state.optimizer.param_groups[0]['lr'], engine.state.iteration) 165 | 166 | if engine.state.iteration % task_args.debug_images_interval == 0: 167 | for n, debug_image in enumerate(draw_debug_images(images, targets)): 168 | writer.add_image("training/image_{}".format(n), debug_image, engine.state.iteration, dataformats='HWC') 169 | if 'masks' in targets[n]: 170 | writer.add_image("training/image_{}_mask".format(n), 171 | draw_mask(targets[n]), engine.state.iteration, dataformats='HW') 172 | images = targets = loss_dict_reduced = engine.state.output = None 173 | 174 | @trainer.on(Events.EPOCH_COMPLETED) 175 | def on_epoch_completed(engine): 176 | engine.state.scheduler.step() 177 | evaluator.run(val_loader) 178 | for res_type in evaluator.state.coco_evaluator.iou_types: 179 | average_precision_05 = evaluator.state.coco_evaluator.coco_eval[res_type].stats[1] 180 | writer.add_scalar("validation-{}/average precision 0_5".format(res_type), average_precision_05, 181 | engine.state.iteration) 182 | checkpoint_path = os.path.join(task_args.output_dir, 'model_epoch_{}.pth'.format(engine.state.epoch)) 183 | print('Saving model checkpoint') 184 | checkpoint = { 185 | 'model': model.state_dict(), 186 | 'optimizer': engine.state.optimizer.state_dict(), 187 | 'lr_scheduler': engine.state.scheduler.state_dict(), 188 | 'epoch': engine.state.epoch, 189 | 'configuration': configuration_data, 190 | 'labels_enumeration': labels_enum} 191 | utils.save_on_master(checkpoint, checkpoint_path) 192 | print('Model checkpoint from epoch {} was saved at {}'.format(engine.state.epoch, checkpoint_path)) 193 | evaluator.state = checkpoint = None 194 | 195 | @evaluator.on(Events.STARTED) 196 | def on_evaluation_started(engine): 197 | model.eval() 198 | engine.state.coco_evaluator = CocoEvaluator(coco_api_val_dataset, iou_types) 199 | 200 | @evaluator.on(Events.ITERATION_COMPLETED) 201 | def on_eval_iteration_completed(engine): 202 | images, targets, results = engine.state.output 203 | if engine.state.iteration % task_args.log_interval == 0: 204 | print("Evaluation: Iteration: {}".format(engine.state.iteration)) 205 | 206 | if engine.state.iteration % task_args.debug_images_interval == 0: 207 | for n, debug_image in enumerate(draw_debug_images(images, targets, results)): 208 | writer.add_image("evaluation/image_{}_{}".format(engine.state.iteration, n), 209 | debug_image, trainer.state.iteration, dataformats='HWC') 210 | if 'masks' in targets[n]: 211 | writer.add_image("evaluation/image_{}_{}_mask".format(engine.state.iteration, n), 212 | draw_mask(targets[n]), trainer.state.iteration, dataformats='HW') 213 | curr_image_id = int(targets[n]['image_id']) 214 | writer.add_image("evaluation/image_{}_{}_predicted_mask".format(engine.state.iteration, n), 215 | draw_mask(results[curr_image_id]).squeeze(), trainer.state.iteration, dataformats='HW') 216 | images = targets = results = engine.state.output = None 217 | 218 | @evaluator.on(Events.COMPLETED) 219 | def on_evaluation_completed(engine): 220 | # gather the stats from all processes 221 | engine.state.coco_evaluator.synchronize_between_processes() 222 | 223 | # accumulate predictions from all images 224 | engine.state.coco_evaluator.accumulate() 225 | engine.state.coco_evaluator.summarize() 226 | 227 | trainer.run(train_loader, max_epochs=task_args.epochs) 228 | writer.close() 229 | 230 | 231 | if __name__ == "__main__": 232 | parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) 233 | parser.add_argument('--warmup_iterations', type=int, default=5000, 234 | help='Number of iteration for warmup period (until reaching base learning rate)') 235 | parser.add_argument('--batch_size', type=int, default=4, 236 | help='input batch size for training and validation') 237 | parser.add_argument('--test_size', type=int, default=2000, 238 | help='number of frames from the test dataset to use for validation') 239 | parser.add_argument('--epochs', type=int, default=10, 240 | help='number of epochs to train') 241 | parser.add_argument('--log_interval', type=int, default=100, 242 | help='how many batches to wait before logging training status') 243 | parser.add_argument('--debug_images_interval', type=int, default=500, 244 | help='how many batches to wait before logging debug images') 245 | parser.add_argument('--train_dataset_ann_file', type=str, 246 | default='~/bigdata/coco/annotations/instances_train2017.json', 247 | help='annotation file of train dataset') 248 | parser.add_argument('--val_dataset_ann_file', type=str, default='~/bigdata/coco/annotations/instances_val2017.json', 249 | help='annotation file of test dataset') 250 | parser.add_argument('--input_checkpoint', type=str, default='', 251 | help='Loading model weights from this checkpoint.') 252 | parser.add_argument('--load_optimizer', default=False, type=bool, 253 | help='Use optimizer and lr_scheduler saved in the input checkpoint to resume training') 254 | parser.add_argument("--output_dir", type=str, default="/tmp/checkpoints", 255 | help="output directory for saving models checkpoints") 256 | parser.add_argument("--log_dir", type=str, default="/tmp/tensorboard_logs", 257 | help="log directory for Tensorboard log output") 258 | parser.add_argument("--lr", type=float, default=0.005, 259 | help="learning rate for optimizer") 260 | parser.add_argument("--momentum", type=float, default=0.9, 261 | help="momentum for optimizer") 262 | parser.add_argument("--weight_decay", type=float, default=0.0005, 263 | help="weight decay for optimizer") 264 | args = parser.parse_args() 265 | 266 | if not os.path.exists(args.output_dir): 267 | utils.mkdir(args.output_dir) 268 | if not os.path.exists(args.log_dir): 269 | utils.mkdir(args.log_dir) 270 | 271 | run(args) 272 | -------------------------------------------------------------------------------- /once_upon_a_repository/transforms.py: -------------------------------------------------------------------------------- 1 | import random 2 | import torch 3 | from PIL import Image 4 | 5 | from torchvision.transforms import functional as F 6 | 7 | 8 | def get_transform(train, image_size): 9 | transforms = [Resize(size=(image_size, image_size)), ToTensor()] 10 | if train: 11 | transforms.append(RandomHorizontalFlip(0.5)) 12 | return Compose(transforms) 13 | 14 | 15 | def _flip_coco_person_keypoints(kps, width): 16 | flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] 17 | flipped_data = kps[:, flip_inds] 18 | flipped_data[..., 0] = width - flipped_data[..., 0] 19 | # Maintain COCO convention that if visibility == 0, then x, y = 0 20 | inds = flipped_data[..., 2] == 0 21 | flipped_data[inds] = 0 22 | return flipped_data 23 | 24 | 25 | class ToTensor(object): 26 | def __call__(self, image, target): 27 | image = F.to_tensor(image) 28 | return image, target 29 | 30 | 31 | class Compose(object): 32 | def __init__(self, transforms): 33 | self.transforms = transforms 34 | 35 | def __call__(self, image, target=None): 36 | for t in self.transforms: 37 | image, target = t(image, target) 38 | return image, target 39 | 40 | 41 | class RandomHorizontalFlip(object): 42 | def __init__(self, prob): 43 | self.prob = prob 44 | 45 | def __call__(self, image, target): 46 | if random.random() < self.prob: 47 | height, width = image.shape[-2:] 48 | image = image.flip(-1) 49 | bbox = target["boxes"] 50 | bbox[:, [0, 2]] = width - bbox[:, [2, 0]] 51 | target["boxes"] = bbox 52 | if "masks" in target: 53 | target["masks"] = target["masks"].flip(-1) 54 | if "keypoints" in target: 55 | keypoints = target["keypoints"] 56 | keypoints = _flip_coco_person_keypoints(keypoints, width) 57 | target["keypoints"] = keypoints 58 | return image, target 59 | 60 | 61 | class Resize(object): 62 | """Resize the input PIL image to given size. 63 | If boxes is not None, resize boxes accordingly. 64 | Args: 65 | size: (tuple or int) 66 | - if is tuple, resize image to the size. 67 | - if is int, resize the shorter side to the size while maintaining the aspect ratio. 68 | max_size: (int) when size is int, limit the image longer size to max_size. 69 | This is essential to limit the usage of GPU memory. 70 | random_interpolation: (bool) randomly choose a resize interpolation method. 71 | Returns: 72 | img: (PIL.Image) resized image. 73 | boxes: (tensor) resized boxes. 74 | Example: 75 | >> img, boxes = resize(img, boxes, 600) # resize shorter side to 600 76 | >> img, boxes = resize(img, boxes, (500,600)) # resize image size to (500,600) 77 | >> img, _ = resize(img, None, (500,600)) # resize image only 78 | """ 79 | def __init__(self, size, max_size=1000, random_interpolation=False): 80 | self.size = size 81 | self.max_size = max_size 82 | self.random_interpolation = random_interpolation 83 | 84 | def __call__(self, image, target): 85 | """Resize the input PIL image to given size. 86 | If boxes is not None, resize boxes accordingly. 87 | Args: 88 | image: (PIL.Image) image to be resized. 89 | target: (tensor) object boxes, sized [#obj,4]. 90 | """ 91 | w, h = image.size 92 | if isinstance(self.size, int): 93 | size_min = min(w, h) 94 | size_max = max(w, h) 95 | sw = sh = float(self.size) / size_min 96 | if sw * size_max > self.max_size: 97 | sw = sh = float(self.max_size) / size_max 98 | ow = int(w * sw + 0.5) 99 | oh = int(h * sh + 0.5) 100 | else: 101 | ow, oh = self.size 102 | sw = float(ow) / w 103 | sh = float(oh) / h 104 | 105 | method = random.choice([ 106 | Image.BOX, 107 | Image.NEAREST, 108 | Image.HAMMING, 109 | Image.BICUBIC, 110 | Image.LANCZOS, 111 | Image.BILINEAR]) if self.random_interpolation else Image.BILINEAR 112 | image = image.resize((ow, oh), method) 113 | if target is not None and "masks" in target: 114 | resized_masks = torch.nn.functional.interpolate( 115 | input=target["masks"][None].float(), 116 | size=(512, 512), 117 | mode="nearest", 118 | )[0].type_as(target["masks"]) 119 | target["masks"] = resized_masks 120 | if target is not None and "boxes" in target: 121 | resized_boxes = target["boxes"] * torch.tensor([sw, sh, sw, sh]) 122 | target["boxes"] = resized_boxes 123 | return image, target 124 | -------------------------------------------------------------------------------- /once_upon_a_repository/utilities.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import attr 4 | import cv2 5 | import numpy as np 6 | import torch 7 | import torchvision 8 | from torchvision.models.detection.faster_rcnn import FastRCNNPredictor 9 | from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor 10 | from torchvision.transforms import functional as F 11 | 12 | from torchvision_references import utils 13 | 14 | 15 | def safe_collate(batch): 16 | batch = list(filter(lambda x: x is not None, batch)) 17 | return utils.collate_fn(batch) 18 | 19 | 20 | def draw_boxes(im, boxes, labels, color=(150, 0, 0)): 21 | for box, draw_label in zip(boxes, labels): 22 | draw_box = box.astype('int') 23 | im = cv2.rectangle(im, tuple(draw_box[:2]), tuple(draw_box[2:]), color, 2) 24 | im = cv2.putText(im, str(draw_label), (draw_box[0], max(0, draw_box[1]-5)), 25 | cv2.FONT_HERSHEY_COMPLEX, 0.8, color, 2) 26 | return im 27 | 28 | 29 | def draw_debug_images(images, targets, predictions=None, score_thr=0.3): 30 | debug_images = [] 31 | for image, target in zip(images, targets): 32 | img = draw_boxes(np.array(F.to_pil_image(image.cpu())), 33 | [box.cpu().numpy() for box in target['boxes']], 34 | [label.item() for label in target['labels']]) 35 | if predictions: 36 | img = draw_boxes(img, 37 | [box.cpu().numpy() for box, score in 38 | zip(predictions[target['image_id'].item()]['boxes'], 39 | predictions[target['image_id'].item()]['scores']) if score >= score_thr], 40 | [label.item() for label, score in 41 | zip(predictions[target['image_id'].item()]['labels'], 42 | predictions[target['image_id'].item()]['scores']) if score >= score_thr], 43 | color=(0, 150, 0)) 44 | debug_images.append(img) 45 | return debug_images 46 | 47 | 48 | def draw_mask(target): 49 | masks = [channel*label for channel, label in zip(target['masks'].cpu().numpy(), target['labels'].cpu().numpy())] 50 | masks_sum = sum(masks) 51 | masks_out = masks_sum + 25*(masks_sum > 0) 52 | return (masks_out*int(255/masks_out.max())).astype('uint8') 53 | 54 | 55 | def get_model_instance_segmentation(num_classes, hidden_layer): 56 | # load an instance segmentation model pre-trained on COCO 57 | model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True) 58 | 59 | # get number of input features for the classifier 60 | in_features = model.roi_heads.box_predictor.cls_score.in_features 61 | # replace the pre-trained head with a new one 62 | model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 63 | 64 | # now get the number of input features for the mask classifier 65 | in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels 66 | 67 | # and replace the mask predictor with a new one 68 | model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes) 69 | return model 70 | 71 | 72 | def get_iou_types(model): 73 | model_without_ddp = model 74 | if isinstance(model, torch.nn.parallel.DistributedDataParallel): 75 | model_without_ddp = model.module 76 | iou_types = ["bbox"] 77 | if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN): 78 | iou_types.append("segm") 79 | if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN): 80 | iou_types.append("keypoints") 81 | return iou_types 82 | 83 | 84 | @attr.s(auto_attribs=True) 85 | class CocoLikeAnnotations(): 86 | def __attrs_post_init__(self): 87 | self.coco_like_json: dict = {'images': [], 'annotations': []} 88 | self._ann_id: int = 0 89 | 90 | def update_images(self, file_name, height, width, id): 91 | self.coco_like_json['images'].append({'file_name': file_name, 92 | 'height': height, 'width': width, 93 | 'id': id}) 94 | 95 | def update_annotations(self, box, label_id, image_id, is_crowd=0): 96 | segmentation, bbox, area = self.extract_coco_info(box) 97 | self.coco_like_json['annotations'].append({'segmentation': segmentation, 'bbox': bbox, 'area': area, 98 | 'category_id': int(label_id), 'id': self._ann_id, 'iscrowd': is_crowd, 99 | 'image_id': image_id}) 100 | self._ann_id += 1 101 | 102 | @staticmethod 103 | def extract_coco_info(box): 104 | segmentation = list(map(int, [box[0], box[1], box[0], box[3], box[2], box[3], box[2], box[1]])) 105 | bbox = list(map(int, np.append(box[:2], (box[2:] - box[:2])))) 106 | area = int(bbox[2] * bbox[3]) 107 | return segmentation, bbox, area 108 | 109 | def dump_to_json(self, path_to_json='/tmp/inference_results/inference_results.json'): 110 | with open(path_to_json, "w") as write_file: 111 | json.dump(self.coco_like_json, write_file) 112 | -------------------------------------------------------------------------------- /setting_up_allegroai_platform/pytorch.mnist_trains.py: -------------------------------------------------------------------------------- 1 | # TRAINS - Example of Pytorch mnist training integration 2 | # 3 | from __future__ import print_function 4 | import argparse 5 | import os 6 | from tempfile import gettempdir 7 | 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | import torch.optim as optim 12 | from torchvision import datasets, transforms 13 | 14 | from trains import Task 15 | task = Task.init(project_name='pytorch mnist', task_name='train SGD 0.1') 16 | logger = task.get_logger() 17 | 18 | class Net(nn.Module): 19 | def __init__(self): 20 | super(Net, self).__init__() 21 | self.conv1 = nn.Conv2d(1, 20, 5, 1) 22 | self.conv2 = nn.Conv2d(20, 50, 5, 1) 23 | self.fc1 = nn.Linear(4 * 4 * 50, 500) 24 | self.fc2 = nn.Linear(500, 10) 25 | 26 | def forward(self, x): 27 | x = F.relu(self.conv1(x)) 28 | x = F.max_pool2d(x, 2, 2) 29 | x = F.relu(self.conv2(x)) 30 | x = F.max_pool2d(x, 2, 2) 31 | x = x.view(-1, 4 * 4 * 50) 32 | x = F.relu(self.fc1(x)) 33 | x = self.fc2(x) 34 | return F.log_softmax(x, dim=1) 35 | 36 | 37 | def train(args, model, device, train_loader, optimizer, epoch): 38 | model.train() 39 | for batch_idx, (data, target) in enumerate(train_loader): 40 | data, target = data.to(device), target.to(device) 41 | optimizer.zero_grad() 42 | output = model(data) 43 | loss = F.nll_loss(output, target) 44 | loss.backward() 45 | optimizer.step() 46 | if batch_idx % args.log_interval == 0: 47 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 48 | epoch, batch_idx * len(data), len(train_loader.dataset), 49 | 100. * batch_idx / len(train_loader), loss.item())) 50 | logger.report_scalar(title='Training',series='loss',value=loss.item(),iteration=int(100. * batch_idx / len(train_loader))) 51 | 52 | 53 | def test(args, model, device, test_loader, epoch): 54 | model.eval() 55 | test_loss = 0 56 | correct = 0 57 | with torch.no_grad(): 58 | for data, target in test_loader: 59 | data, target = data.to(device), target.to(device) 60 | output = model(data) 61 | test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss 62 | pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability 63 | correct += pred.eq(target.view_as(pred)).sum().item() 64 | 65 | test_loss /= len(test_loader.dataset) 66 | 67 | print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( 68 | test_loss, correct, len(test_loader.dataset), 69 | 100. * correct / len(test_loader.dataset))) 70 | logger.report_scalar(title='Test',series='loss',value=test_loss,iteration=epoch) 71 | logger.report_scalar(title='Test', series='accuracy', value=correct / len(test_loader.dataset), iteration=epoch) 72 | 73 | def main(): 74 | # Training settings 75 | parser = argparse.ArgumentParser(description='PyTorch MNIST Example') 76 | parser.add_argument('--batch-size', type=int, default=64, metavar='N', 77 | help='input batch size for training (default: 64)') 78 | parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', 79 | help='input batch size for testing (default: 1000)') 80 | parser.add_argument('--epochs', type=int, default=5, metavar='N', 81 | help='number of epochs to train (default: 10)') 82 | parser.add_argument('--lr', type=float, default=0.01, metavar='LR', 83 | help='learning rate (default: 0.01)') 84 | parser.add_argument('--momentum', type=float, default=0.1, metavar='M', 85 | help='SGD momentum (default: 0.5)') 86 | parser.add_argument('--no-cuda', action='store_true', default=False, 87 | help='disables CUDA training') 88 | parser.add_argument('--seed', type=int, default=1, metavar='S', 89 | help='random seed (default: 1)') 90 | parser.add_argument('--log-interval', type=int, default=10, metavar='N', 91 | help='how many batches to wait before logging training status') 92 | 93 | parser.add_argument('--save-model', action='store_true', default=True, 94 | help='For Saving the current Model') 95 | parser.add_argument('--task-name', type=str, default='train') 96 | args = parser.parse_args() 97 | use_cuda = not args.no_cuda and torch.cuda.is_available() 98 | 99 | task.set_name(args.task_name) 100 | torch.manual_seed(args.seed) 101 | 102 | device = torch.device("cuda" if use_cuda else "cpu") 103 | 104 | kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {} 105 | train_loader = torch.utils.data.DataLoader( 106 | datasets.MNIST('../data', train=True, download=True, 107 | transform=transforms.Compose([ 108 | transforms.ToTensor(), 109 | transforms.Normalize((0.1307,), (0.3081,)) 110 | ])), 111 | batch_size=args.batch_size, shuffle=True, **kwargs) 112 | test_loader = torch.utils.data.DataLoader( 113 | datasets.MNIST('../data', train=False, transform=transforms.Compose([ 114 | transforms.ToTensor(), 115 | transforms.Normalize((0.1307,), (0.3081,)) 116 | ])), 117 | batch_size=args.test_batch_size, shuffle=True, **kwargs) 118 | 119 | model = Net().to(device) 120 | optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) 121 | 122 | for epoch in range(1, args.epochs + 1): 123 | train(args, model, device, train_loader, optimizer, epoch) 124 | test(args, model, device, test_loader, epoch) 125 | 126 | if (args.save_model): 127 | torch.save(model.state_dict(), os.path.join(gettempdir(), "mnist_cnn.pt")) 128 | 129 | 130 | if __name__ == '__main__': 131 | main() 132 | -------------------------------------------------------------------------------- /setting_up_allegroai_platform/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.2.0 2 | trains==0.14.1 3 | torchvision==0.2.1 4 | -------------------------------------------------------------------------------- /the_hero_rises/README.md: -------------------------------------------------------------------------------- 1 | # The Hero’s Journey to Deep Learning CodeBase 2 | ## [Blog IIB: The Hero Rises: Build Your Own SSD](https://allegro.ai/blog/the-hero-rises-build-your-own-ssd/) 3 | 4 | As the state-of-the-art models keep changing, one needs to effectively write a modular machine learning codebase to support and sustain R&D machine and deep learning efforts for years. In our first blog of this series, we demonstrated how to write a readable and maintainable code that trains a Torchvision MaskRCNN model, harnessing Ignite’s framework. In our second post (part IIA), we detailed the fundamental differences between single-shot and two-shot detectors and why the single-shot approach is in the sweet spot of the speed/accuracy trade-off. So it’s only natural that in this post we glean how to leverage the modular nature of the MaskRCNN codebase and enable it to train both MaskRCNN and SSD models. Thanks to the modular nature of the codebase, only minimal changes are needed in the code. 5 | 6 | Torchvision is a package that consists of popular datasets, model architectures, and common image transformations for computer vision. It contains, among others, a model-zoo of pre-trained models for image classification, object detection, person keypoint detection, semantic segmentation and instance segmentation models, ready for out-of-the-box use. This makes a PyTorch user’s life significantly easier as it shortens the time between an idea and a product. Or a research paper. Or a blog post. 7 | 8 | Torchvision does not contain implementations of single-shot object detection models, such as this popular SSD. So, we added one: an SSD implementation based on a Torchvision model as a backbone for feature extraction. Since its release, many improvements have been constructed on the original SSD. However, we have focused on the original SSD meta-architecture for clarity and simplicity. 9 | -------------------------------------------------------------------------------- /the_hero_rises/SSD/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clearml/clearml-blogs/c0a10f52de341e7feedc1bd718ff0539f98fdced/the_hero_rises/SSD/__init__.py -------------------------------------------------------------------------------- /the_hero_rises/SSD/box_coder.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import itertools 4 | 5 | from torchvision.ops.boxes import nms 6 | 7 | 8 | class SSDBoxCoder: 9 | def __init__(self, steps, box_sizes, aspect_ratios, fm_sizes): 10 | self.prior_boxes = self._get_default_boxes(steps, box_sizes, aspect_ratios, fm_sizes) 11 | 12 | @staticmethod 13 | def _get_default_boxes(steps, box_sizes, aspect_ratios, fm_sizes): 14 | boxes = [] 15 | for i, fm_size in enumerate(fm_sizes): 16 | for h, w in itertools.product(range(fm_size), repeat=2): 17 | cx = (w + 0.5) * steps[i] 18 | cy = (h + 0.5) * steps[i] 19 | 20 | s = box_sizes[i] 21 | boxes.append((cx, cy, s, s)) 22 | 23 | s = math.sqrt(box_sizes[i] * box_sizes[i + 1]) 24 | boxes.append((cx, cy, s, s)) 25 | 26 | s = box_sizes[i] 27 | for ar in aspect_ratios[i]: 28 | boxes.append((cx, cy, s * math.sqrt(ar), s / math.sqrt(ar))) 29 | boxes.append((cx, cy, s / math.sqrt(ar), s * math.sqrt(ar))) 30 | return torch.Tensor(boxes) 31 | 32 | def encode(self, boxes, labels): 33 | '''Encode target bounding boxes and class labels. 34 | SSD coding rules: 35 | tx = (x - anchor_x) / (variance[0]*anchor_w) 36 | ty = (y - anchor_y) / (variance[0]*anchor_h) 37 | tw = log(w / anchor_w) / variance[1] 38 | th = log(h / anchor_h) / variance[1] 39 | Args: 40 | boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. 41 | labels: (tensor) object class labels, sized [#obj,]. 42 | Returns: 43 | loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. 44 | cls_targets: (tensor) encoded class labels, sized [#anchors,]. 45 | Reference: 46 | https://github.com/chainer/chainercv/blob/master/chainercv/links/model/ssd/multibox_coder.py 47 | ''' 48 | def argmax(x): 49 | v, i = x.max(0) 50 | j = v.max(0)[1] 51 | return (i[j], j) 52 | 53 | device = labels.get_device() 54 | prior_boxes = self.prior_boxes.to(device) # xywh 55 | prior_boxes = change_box_order(prior_boxes, 'xywh2xyxy') 56 | 57 | ious = box_iou(prior_boxes, boxes) # [#anchors, #obj] 58 | # index = torch.LongTensor(len(prior_boxes)).fill_(-1).to(device) 59 | index = torch.full(size=torch.Size([prior_boxes.size()[0]]), fill_value=-1, dtype=torch.long, device=device) 60 | masked_ious = ious.clone() 61 | while True: 62 | i, j = argmax(masked_ious) 63 | if masked_ious[i, j] < 1e-6: 64 | break 65 | index[i] = j 66 | masked_ious[i, :] = 0 67 | masked_ious[:, j] = 0 68 | 69 | mask = (index < 0) & (ious.max(1)[0] >= 0.5) 70 | if mask.any(): 71 | index[mask] = ious[mask.nonzero().squeeze(dim=1)].max(1)[1] 72 | 73 | boxes = boxes[index.clamp(min=0)] # negative index not supported 74 | boxes = change_box_order(boxes, 'xyxy2xywh') 75 | prior_boxes = change_box_order(prior_boxes, 'xyxy2xywh') 76 | 77 | variances = (0.1, 0.2) 78 | loc_xy = (boxes[:,:2]-prior_boxes[:,:2]) / prior_boxes[:,2:] / variances[0] 79 | loc_wh = torch.log(boxes[:,2:]/prior_boxes[:,2:]) / variances[1] 80 | loc_targets = torch.cat([loc_xy,loc_wh], 1) 81 | # cls_targets = 1 + labels[index.clamp(min=0)] # TODO: why +1 ??? 82 | cls_targets = labels[index.clamp(min=0)] 83 | cls_targets[index<0] = 0 84 | return loc_targets, cls_targets 85 | 86 | def decode(self, loc_preds, cls_preds, score_thresh=0.05, nms_thresh=0.45): 87 | """Decode predicted loc/cls back to real box locations and class labels. 88 | Args: 89 | loc_preds: (tensor) predicted loc, sized [8732,4]. 90 | cls_preds: (tensor) predicted conf, sized [8732,21]. 91 | score_thresh: (float) threshold for object confidence score. 92 | nms_thresh: (float) threshold for box nms. 93 | Returns: 94 | boxes: (tensor) bbox locations, sized [#obj,4]. 95 | labels: (tensor) class labels, sized [#obj,]. 96 | """ 97 | device = cls_preds.get_device() if cls_preds.get_device() >= 0 else torch.device('cpu') 98 | prior_boxes = self.prior_boxes.to(device) 99 | variances = (0.1, 0.2) 100 | xy = loc_preds[:, :2] * variances[0] * prior_boxes[:, 2:] + prior_boxes[:, :2] 101 | wh = torch.exp(loc_preds[:, 2:] * variances[1]) * prior_boxes[:, 2:] 102 | box_preds = torch.cat([xy - wh / 2, xy + wh / 2], 1) 103 | 104 | boxes = [] 105 | labels = [] 106 | scores = [] 107 | # num_classes = cls_preds.size(1) 108 | # for i in range(1, num_classes): 109 | # score = cls_preds[:, i] 110 | for i, cls_pred in enumerate(cls_preds.split(1, dim=1)[1:]): 111 | score = cls_pred.squeeze(dim=1) 112 | mask = (score > score_thresh).nonzero().squeeze(dim=1) 113 | if mask.sum() == torch.tensor(data=0, device=device): 114 | continue 115 | box = box_preds[mask] 116 | score = score[mask] 117 | 118 | # keep = box_nms(box, score, nms_thresh) 119 | keep = nms(box, score, nms_thresh) 120 | boxes.append(box[keep]) 121 | # labels.append(torch.LongTensor(len(box[keep])).fill_(i+1)) 122 | labels.append(torch.full_like(score[keep], fill_value=i+1, dtype=torch.long, device=device)) 123 | # labels.append(torch.full(size=torch.Size([score[keep].size()[0]]), fill_value=i+1, dtype=torch.long, 124 | # device=device)) 125 | 126 | scores.append(score[keep]) 127 | 128 | if not boxes: 129 | return torch.tensor([]), torch.tensor([]), torch.tensor([]) 130 | 131 | boxes = torch.cat(boxes, 0) 132 | labels = torch.cat(labels, 0) 133 | scores = torch.cat(scores, 0) 134 | return boxes, labels, scores 135 | 136 | 137 | def change_box_order(boxes, order): 138 | """Change box order between (xmin,ymin,xmax,ymax) and (xcenter,ycenter,width,height). 139 | 140 | Args: 141 | boxes: (tensor) bounding boxes, sized [N,4]. 142 | order: (str) either 'xyxy2xywh' or 'xywh2xyxy'. 143 | 144 | Returns: 145 | (tensor) converted bounding boxes, sized [N,4]. 146 | """ 147 | assert order in ['xyxy2xywh','xywh2xyxy'] 148 | a = boxes[:,:2] 149 | b = boxes[:,2:] 150 | if order == 'xyxy2xywh': 151 | return torch.cat([(a+b)/2,b-a], 1) 152 | return torch.cat([a-b/2,a+b/2], 1) 153 | 154 | 155 | def box_clamp(boxes, xmin, ymin, xmax, ymax): 156 | """Clamp boxes. 157 | 158 | Args: 159 | boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [N,4]. 160 | xmin: (number) min value of x. 161 | ymin: (number) min value of y. 162 | xmax: (number) max value of x. 163 | ymax: (number) max value of y. 164 | 165 | Returns: 166 | (tensor) clamped boxes. 167 | """ 168 | boxes[:,0].clamp_(min=xmin, max=xmax) 169 | boxes[:,1].clamp_(min=ymin, max=ymax) 170 | boxes[:,2].clamp_(min=xmin, max=xmax) 171 | boxes[:,3].clamp_(min=ymin, max=ymax) 172 | return boxes 173 | 174 | 175 | def box_iou(box1, box2): 176 | """Compute the intersection over union of two set of boxes. 177 | 178 | The box order must be (xmin, ymin, xmax, ymax). 179 | 180 | Args: 181 | box1: (tensor) bounding boxes, sized [N,4]. 182 | box2: (tensor) bounding boxes, sized [M,4]. 183 | 184 | Return: 185 | (tensor) iou, sized [N,M]. 186 | 187 | Reference: 188 | https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py 189 | """ 190 | # N = box1.size(0) 191 | # M = box2.size(0) 192 | 193 | lt = torch.max(box1[:,None,:2], box2[:,:2]) # [N,M,2] 194 | rb = torch.min(box1[:,None,2:], box2[:,2:]) # [N,M,2] 195 | 196 | wh = (rb-lt).clamp(min=0) # [N,M,2] 197 | inter = wh[:,:,0] * wh[:,:,1] # [N,M] 198 | 199 | area1 = (box1[:,2]-box1[:,0]) * (box1[:,3]-box1[:,1]) # [N,] 200 | area2 = (box2[:,2]-box2[:,0]) * (box2[:,3]-box2[:,1]) # [M,] 201 | iou = inter / (area1[:,None] + area2 - inter) 202 | return iou 203 | -------------------------------------------------------------------------------- /the_hero_rises/SSD/multibox_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | class SSDLoss(nn.Module): 6 | def __init__(self, num_classes): 7 | super(SSDLoss, self).__init__() 8 | self.num_classes = num_classes 9 | 10 | def _hard_negative_mining(self, cls_loss, pos): 11 | """Return negative indices that is 3x the number as postive indices. 12 | Args: 13 | cls_loss: (tensor) cross entroy loss between cls_preds and cls_targets, sized [N,#anchors]. 14 | pos: (tensor) positive class mask, sized [N,#anchors]. 15 | Return: 16 | (tensor) negative indices, sized [N,#anchors]. 17 | """ 18 | cls_loss = cls_loss * (pos.float() - 1) 19 | 20 | _, idx = cls_loss.sort(1) # sort by negative losses 21 | _, rank = idx.sort(1) # [N,#anchors] 22 | 23 | num_neg = (3*pos.sum(1)).clamp(min=3) # [N,] 24 | neg = rank < num_neg[:, None] # [N,#anchors] 25 | return neg 26 | 27 | def forward(self, loc_preds, loc_targets, cls_preds, cls_targets): 28 | """Compute loss between (loc_preds, loc_targets) and (cls_preds, cls_targets). 29 | Args: 30 | loc_preds: (tensor) predicted locations, sized [N, #anchors, 4]. 31 | loc_targets: (tensor) encoded target locations, sized [N, #anchors, 4]. 32 | cls_preds: (tensor) predicted class confidences, sized [N, #anchors, #classes]. 33 | cls_targets: (tensor) encoded target labels, sized [N, #anchors]. 34 | loss: 35 | (tensor) loss = SmoothL1Loss(loc_preds, loc_targets) + CrossEntropyLoss(cls_preds, cls_targets). 36 | """ 37 | pos = cls_targets > 0 # [N,#anchors] 38 | batch_size = pos.size(0) 39 | num_pos = pos.sum().item() 40 | 41 | # loc_loss = SmoothL1Loss(pos_loc_preds, pos_loc_targets) 42 | mask = pos.unsqueeze(2).expand_as(loc_preds) # [N,#anchors,4] 43 | loc_loss = F.smooth_l1_loss(loc_preds[mask], loc_targets[mask], reduction='sum') 44 | 45 | # cls_loss = CrossEntropyLoss(cls_preds, cls_targets) 46 | cls_loss = F.cross_entropy(cls_preds.view(-1, self.num_classes), 47 | cls_targets.view(-1), reduction='none') # [N*#anchors,] 48 | cls_loss = cls_loss.view(batch_size, -1) 49 | cls_loss[cls_targets < 0] = 0 # set ignored loss to 0 50 | neg = self._hard_negative_mining(cls_loss, pos) # [N,#anchors] 51 | cls_loss = cls_loss[pos | neg].sum() 52 | 53 | return {'loc_loss': loc_loss/num_pos, 'cls_loss': cls_loss/num_pos} 54 | -------------------------------------------------------------------------------- /the_hero_rises/SSD/ssd_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | SSD model on top of TorchVision feature extractor. 3 | The constant values are suitable to a 512X512 image. Automatic change to a different image size 4 | can be done by runnint the dry_run method. 5 | 6 | requirements: PyTorch and TorchVision 7 | 8 | """ 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | 13 | import torchvision 14 | 15 | from SSD.box_coder import SSDBoxCoder 16 | 17 | # Aspect ration between current layer and original image size. 18 | # I.e, how many pixel steps on the original image are equivalent to a single pixel step on the feature map. 19 | STEPS = (8, 16, 32, 64, 128, 256, 512) 20 | # Length of the shorter anchor rectangle face sizes, for each feature map. 21 | BOX_SIZES = (35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.6) 22 | # Aspect ratio of the rectanglar SSD anchors, besides 1:1 23 | ASPECT_RATIOS = ((2,), (2, 3), (2, 3), (2, 3), (2, 3), (2,), (2,)) 24 | # feature maps sizes. 25 | FM_SIZES = (64, 32, 16, 8, 4, 2, 1) 26 | # Amount of anchors for each feature map 27 | NUM_ANCHORS = (4, 6, 6, 6, 6, 4, 4) 28 | # Amount of each feature map channels, i.e third dimension. 29 | IN_CHANNELS = (512, 1024, 512, 256, 256, 256, 256) 30 | 31 | 32 | class HeadsExtractor(nn.Module): 33 | def __init__(self, backbone): 34 | super(HeadsExtractor, self).__init__() 35 | 36 | def split_backbone(net): 37 | features_extraction = [x for x in net.children()][:-2] 38 | 39 | if type(net) == torchvision.models.vgg.VGG: 40 | features_extraction = [*features_extraction[0]] 41 | net_till_conv4_3 = features_extraction[:-8] 42 | rest_of_net = features_extraction[-7:-1] 43 | elif type(net) == torchvision.models.resnet.ResNet: 44 | net_till_conv4_3 = features_extraction[:-2] 45 | rest_of_net = features_extraction[-2] 46 | else: 47 | raise ValueError('We only support VGG and ResNet backbones') 48 | return nn.Sequential(*net_till_conv4_3), nn.Sequential(*rest_of_net) 49 | 50 | self.till_conv4_3, self.till_conv5_3 = split_backbone(backbone) 51 | self.norm4 = L2Norm(512, 20) 52 | 53 | self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, padding=1, dilation=1) 54 | self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1, dilation=1) 55 | self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1, dilation=1) 56 | 57 | self.conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6) 58 | self.conv7 = nn.Conv2d(1024, 1024, kernel_size=1) 59 | 60 | self.conv8_1 = nn.Conv2d(1024, 256, kernel_size=1) 61 | self.conv8_2 = nn.Conv2d(256, 512, kernel_size=3, padding=1, stride=2) 62 | 63 | self.conv9_1 = nn.Conv2d(512, 128, kernel_size=1) 64 | self.conv9_2 = nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=2) 65 | 66 | self.conv10_1 = nn.Conv2d(256, 128, kernel_size=1) 67 | self.conv10_2 = nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=2) 68 | 69 | self.conv11_1 = nn.Conv2d(256, 128, kernel_size=1) 70 | self.conv11_2 = nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=2) 71 | 72 | self.conv12_1 = nn.Conv2d(256, 128, kernel_size=1) 73 | self.conv12_2 = nn.Conv2d(128, 256, kernel_size=4, padding=1) 74 | 75 | def forward(self, x): 76 | hs = [] 77 | h = self.till_conv4_3(x) 78 | hs.append(self.norm4(h)) 79 | 80 | if type(self.till_conv5_3[-1]) != torchvision.models.resnet.Bottleneck: 81 | h = F.max_pool2d(h, kernel_size=2, stride=2, ceil_mode=True) 82 | h = self.till_conv5_3(h) 83 | h = F.max_pool2d(h, kernel_size=3, stride=1, padding=1, ceil_mode=True) 84 | 85 | h = F.relu(self.conv6(h)) 86 | h = F.relu(self.conv7(h)) 87 | else: 88 | h = self.till_conv5_3(h) 89 | hs.append(h) # conv7 90 | 91 | h = F.relu(self.conv8_1(h)) 92 | h = F.relu(self.conv8_2(h)) 93 | hs.append(h) # conv8_2 94 | 95 | h = F.relu(self.conv9_1(h)) 96 | h = F.relu(self.conv9_2(h)) 97 | hs.append(h) # conv9_2 98 | 99 | h = F.relu(self.conv10_1(h)) 100 | h = F.relu(self.conv10_2(h)) 101 | hs.append(h) # conv10_2 102 | 103 | h = F.relu(self.conv11_1(h)) 104 | h = F.relu(self.conv11_2(h)) 105 | hs.append(h) # conv11_2 106 | 107 | h = F.relu(self.conv12_1(h)) 108 | h = F.relu(self.conv12_2(h)) 109 | hs.append(h) # conv12_2 110 | return hs 111 | 112 | 113 | class SSD(nn.Module): 114 | def __init__(self, backbone, num_classes, loss_function, 115 | num_anchors=NUM_ANCHORS, 116 | in_channels=IN_CHANNELS, 117 | steps=STEPS, 118 | box_sizes=BOX_SIZES, 119 | aspect_ratios=ASPECT_RATIOS, 120 | fm_sizes=FM_SIZES, 121 | heads_extractor_class=HeadsExtractor): 122 | super(SSD, self).__init__() 123 | self.num_classes = num_classes 124 | self.num_anchors = num_anchors 125 | self.in_channels = in_channels 126 | self.steps = steps 127 | self.box_sizes = box_sizes 128 | self.aspect_ratios = aspect_ratios 129 | self.fm_sizes = fm_sizes 130 | 131 | self.extractor = heads_extractor_class(backbone) 132 | self.criterion = loss_function 133 | self.box_coder = SSDBoxCoder(self.steps, self.box_sizes, self.aspect_ratios, self.fm_sizes) 134 | 135 | self._create_heads() 136 | 137 | def _create_heads(self): 138 | self.loc_layers = nn.ModuleList() 139 | self.cls_layers = nn.ModuleList() 140 | for i in range(len(self.in_channels)): 141 | self.loc_layers += [nn.Conv2d(self.in_channels[i], self.num_anchors[i] * 4, kernel_size=3, padding=1)] 142 | self.cls_layers += [nn.Conv2d(self.in_channels[i], self.num_anchors[i] * self.num_classes, kernel_size=3, 143 | padding=1)] 144 | 145 | def change_input_size(self, x): 146 | heads = self.extractor(x) 147 | self.fm_sizes = tuple([head.shape[-1] for head in heads]) 148 | image_size = x.shape[-1] 149 | self.steps = tuple([image_size//fm for fm in self.fm_sizes]) 150 | self.box_coder = SSDBoxCoder(self.steps, self.box_sizes, self.aspect_ratios, self.fm_sizes) 151 | 152 | def forward(self, images, targets=None): 153 | if self.training and targets is None: 154 | raise ValueError("In training mode, targets should be passed") 155 | loc_preds = [] 156 | cls_preds = [] 157 | input_images = torch.stack(images) if isinstance(images, list) else images 158 | extracted_batch = self.extractor(input_images) 159 | for i, x in enumerate(extracted_batch): 160 | loc_pred = self.loc_layers[i](x) 161 | loc_pred = loc_pred.permute(0, 2, 3, 1).contiguous() 162 | loc_preds.append(loc_pred.view(loc_pred.size(0), -1, 4)) 163 | 164 | cls_pred = self.cls_layers[i](x) 165 | cls_pred = cls_pred.permute(0, 2, 3, 1).contiguous() 166 | cls_preds.append(cls_pred.view(cls_pred.size(0), -1, self.num_classes)) 167 | 168 | loc_preds = torch.cat(loc_preds, 1) 169 | cls_preds = torch.cat(cls_preds, 1) 170 | 171 | if self.training: 172 | encoded_targets = [self.box_coder.encode(target['boxes'], target['labels']) for target in targets] 173 | loc_targets = torch.stack([encoded_target[0] for encoded_target in encoded_targets]) 174 | cls_targets = torch.stack([encoded_target[1] for encoded_target in encoded_targets]) 175 | losses = self.criterion(loc_preds, loc_targets, cls_preds, cls_targets) 176 | return losses 177 | 178 | detections = [] 179 | for batch, (loc, cls) in enumerate(zip(loc_preds.split(split_size=1, dim=0), 180 | cls_preds.split(split_size=1, dim=0))): 181 | boxes, labels, scores = self.box_coder.decode(loc.squeeze(), F.softmax(cls.squeeze(), dim=1)) 182 | detections.append({'boxes': boxes, 'labels': labels, 'scores': scores}) 183 | 184 | return detections 185 | 186 | 187 | class L2Norm(nn.Module): 188 | """L2Norm layer across all channels.""" 189 | 190 | def __init__(self, in_features, scale): 191 | super(L2Norm, self).__init__() 192 | self.weight = nn.Parameter(torch.Tensor(in_features)) 193 | self.reset_parameters(scale) 194 | 195 | def reset_parameters(self, scale): 196 | nn.init.constant_(self.weight, scale) 197 | 198 | def forward(self, x): 199 | x = F.normalize(x, dim=1) 200 | scale = self.weight[None, :, None, None] 201 | return scale * x 202 | 203 | 204 | 205 | # Based on https://github.com/kuangliu/torchcv/tree/master/examples/ssd 206 | -------------------------------------------------------------------------------- /the_hero_rises/engines.py: -------------------------------------------------------------------------------- 1 | import math 2 | import copy 3 | import torch 4 | 5 | from ignite.engine import Engine 6 | from torchvision_references import utils 7 | 8 | 9 | def create_trainer(model, device): 10 | def update_model(engine, batch): 11 | images, targets = copy.deepcopy(batch) 12 | images_model, targets_model = prepare_batch(batch, device=device) 13 | 14 | loss_dict = model(images_model, targets_model) 15 | losses = sum(loss for loss in loss_dict.values()) 16 | 17 | # reduce losses over all GPUs for logging purposes 18 | loss_dict_reduced = utils.reduce_dict(loss_dict) 19 | losses_reduced = sum(loss for loss in loss_dict_reduced.values()) 20 | 21 | loss_value = losses_reduced.item() 22 | 23 | engine.state.optimizer.zero_grad() 24 | if not math.isfinite(loss_value): 25 | print("Loss is {}, resetting loss and skipping training iteration".format(loss_value)) 26 | print('Loss values were: ', loss_dict_reduced) 27 | print('Input labels were: ', [target['labels'] for target in targets]) 28 | print('Input boxes were: ', [target['boxes'] for target in targets]) 29 | loss_dict_reduced = {k: torch.tensor(0) for k, v in loss_dict_reduced.items()} 30 | else: 31 | losses.backward() 32 | engine.state.optimizer.step() 33 | 34 | if engine.state.warmup_scheduler is not None: 35 | engine.state.warmup_scheduler.step() 36 | 37 | images_model = targets_model = None 38 | 39 | return images, targets, loss_dict_reduced 40 | return Engine(update_model) 41 | 42 | 43 | def create_evaluator(model, device): 44 | def update_model(engine, batch): 45 | images, targets = prepare_batch(batch, device=device) 46 | images_model = copy.deepcopy(images) 47 | 48 | torch.cuda.synchronize() 49 | with torch.no_grad(): 50 | outputs = model(images_model) 51 | 52 | outputs = [{k: v.to(device) for k, v in t.items()} for t in outputs] 53 | 54 | res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} 55 | engine.state.coco_evaluator.update(res) 56 | 57 | images_model = outputs = None 58 | 59 | return images, targets, res 60 | return Engine(update_model) 61 | 62 | 63 | def prepare_batch(batch, device=None): 64 | images, targets = batch 65 | images = list(image.to(device, non_blocking=True) for image in images) 66 | targets = [{k: v.to(device, non_blocking=True) for k, v in t.items()} for t in targets] 67 | return images, targets 68 | -------------------------------------------------------------------------------- /the_hero_rises/inference_with_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import numpy as np 4 | 5 | import torch 6 | from torch.utils.tensorboard import SummaryWriter 7 | 8 | from argparse import ArgumentParser 9 | from pathlib2 import Path 10 | 11 | from utilities import get_iou_types, draw_boxes, get_model_instance_segmentation, CocoLikeAnnotations, get_backbone 12 | from torchvision_references import utils 13 | from torchvision.transforms import functional as F 14 | 15 | from PIL import Image 16 | from transforms import get_transform 17 | 18 | from SSD.ssd_model import SSD 19 | from SSD.multibox_loss import SSDLoss 20 | 21 | from trains import Task 22 | task = Task.init(project_name='Object Detection with TRAINS, Ignite and TensorBoard', 23 | task_name='Inference with trained SSD model') 24 | 25 | 26 | def rescale_box(box, image_size, orig_height, orig_width): 27 | rescale_height = float(orig_height) / image_size 28 | rescale_width = float(orig_width) / image_size 29 | box[:2] *= rescale_width 30 | box[2:] *= rescale_height 31 | return box 32 | 33 | 34 | def run(task_args): 35 | writer = SummaryWriter(log_dir=task_args.log_dir) 36 | input_checkpoint = torch.load(task_args.input_checkpoint) 37 | labels_enum = input_checkpoint.get('labels_enumeration') 38 | model_configuration = input_checkpoint.get('configuration') 39 | model_weights = input_checkpoint.get('model') 40 | image_size = model_configuration.get('image_size') 41 | 42 | # Set the training device to GPU if available - if not set it to CPU 43 | device = torch.cuda.current_device() if torch.cuda.is_available() else torch.device('cpu') 44 | torch.backends.cudnn.benchmark = True if torch.cuda.is_available() else False # optimization for fixed input size 45 | 46 | # Get the relevant model based in task arguments 47 | num_classes = model_configuration.get('num_classes') 48 | if model_configuration.get('model_type') == 'maskrcnn': 49 | model = get_model_instance_segmentation(num_classes, model_configuration.get('mask_predictor_hidden_layer')) 50 | elif model_configuration.get('model_type') == 'ssd': 51 | backbone = get_backbone(model_configuration.get('ssd_backbone')) 52 | model = SSD(backbone=backbone, num_classes=num_classes, loss_function=SSDLoss(num_classes)) 53 | model.dry_run(torch.rand(size=(1, 3, model_configuration.get('image_size'), model_configuration.get('image_size')))*255) 54 | else: 55 | raise ValueError('Only "maskrcnn" and "ssd" are supported as model type') 56 | 57 | # if there is more than one GPU, parallelize the model 58 | if torch.cuda.device_count() > 1: 59 | print("{} GPUs were detected - we will use all of them".format(torch.cuda.device_count())) 60 | model = torch.nn.DataParallel(model) 61 | 62 | # copy the model to each device 63 | model.to(device) 64 | 65 | # Define train and test datasets 66 | iou_types = get_iou_types(model) 67 | use_mask = True if "segm" in iou_types else False 68 | 69 | # Load pretrained model weights 70 | model.load_state_dict(model_weights) 71 | 72 | # set the model to inference mode 73 | model.eval() 74 | 75 | images_paths = [] 76 | for file_type in ('*.png', '*.jpg', '*.jpeg'): 77 | images_paths.extend(glob.glob(os.path.join(task_args.input_dataset_root, file_type))) 78 | 79 | transforms = get_transform(train=False, image_size=image_size) 80 | 81 | path_to_json = os.path.join(task_args.output_dir, "inference_results.json") 82 | coco_like_anns = CocoLikeAnnotations() 83 | batch_images = [] 84 | batch_paths = [] 85 | batch_shapes = [] 86 | 87 | for i, image_path in enumerate(images_paths): 88 | img = Image.open(image_path).convert('RGB') 89 | batch_shapes.append({'height': img.height, 'width': img.width}) 90 | img, __ = transforms(img) 91 | batch_images.append(img) 92 | batch_paths.append(image_path) 93 | if len(batch_images) < task_args.batch_size: 94 | continue 95 | 96 | input_images = torch.stack(batch_images) 97 | 98 | with torch.no_grad(): 99 | torch_out = model(input_images.to(device)) 100 | 101 | for img_num, image in enumerate(input_images): 102 | valid_detections = torch_out[img_num].get('scores') >= args.detection_thresh 103 | img_boxes = torch_out[img_num].get('boxes')[valid_detections].cpu().numpy() 104 | img_labels_ids = torch_out[img_num].get('labels')[valid_detections].cpu().numpy() 105 | img_labels = [labels_enum[label]['name'] for label in img_labels_ids] 106 | image_id = (i + 1 - task_args.batch_size + img_num) 107 | orig_height = batch_shapes[img_num].get('height') 108 | orig_width = batch_shapes[img_num].get('width') 109 | 110 | coco_like_anns.update_images(file_name=Path(batch_paths[img_num]).name, 111 | height=orig_height, width=orig_width, 112 | id=image_id) 113 | 114 | for box, label, label_id in zip(img_boxes, img_labels, img_labels_ids): 115 | orig_box = rescale_box(image_size=image_size, orig_height=orig_height, orig_width=orig_width, box=box.copy()) 116 | coco_like_anns.update_annotations(box=orig_box, label_id=label_id, 117 | image_id=image_id) 118 | 119 | if ((i+1)/task_args.batch_size) % task_args.log_interval == 0: 120 | print('Batch {}: Saving detections of file {} to {}'.format(int((i+1)/task_args.batch_size), 121 | Path(batch_paths[img_num]).name, 122 | path_to_json)) 123 | 124 | if ((i+1)/task_args.batch_size) % task_args.debug_images_interval == 0: 125 | debug_image = draw_boxes(np.array(F.to_pil_image(image.cpu())), img_boxes, img_labels, color=(0, 150, 0)) 126 | writer.add_image("inference/image_{}".format(img_num), debug_image, ((i+1)/task_args.batch_size), 127 | dataformats='HWC') 128 | 129 | batch_images = [] 130 | batch_paths = [] 131 | 132 | coco_like_anns.dump_to_json(path_to_json=path_to_json) 133 | 134 | 135 | if __name__ == "__main__": 136 | parser = ArgumentParser() 137 | parser.add_argument('--batch_size', type=int, default=4, 138 | help='input batch size for training and validation (default: 4)') 139 | parser.add_argument('--detection_thresh', type=float, default=0.4, 140 | help='Inference confidence threshold') 141 | parser.add_argument('--log_interval', type=int, default=100, 142 | help='how many batches to wait before logging training status') 143 | parser.add_argument('--debug_images_interval', type=int, default=500, 144 | help='how many batches to wait before logging debug images') 145 | parser.add_argument('--input_dataset_root', type=str, 146 | default='/media/dan/bigdata/datasets/coco/2017/val2017', 147 | help='annotation file of test dataset') 148 | parser.add_argument('--input_checkpoint', type=str, default='/tmp/checkpoints/model_epoch_10.pth', 149 | help='Checkpoint to use for inference') 150 | parser.add_argument("--output_dir", type=str, default="/tmp/inference_results", 151 | help="output directory for saving models checkpoints") 152 | parser.add_argument("--log_dir", type=str, default="/tmp/tensorboard_logs", 153 | help="log directory for Tensorboard log output") 154 | args = parser.parse_args() 155 | 156 | if not os.path.exists(args.output_dir): 157 | utils.mkdir(args.output_dir) 158 | if not os.path.exists(args.log_dir): 159 | utils.mkdir(args.log_dir) 160 | 161 | run(args) 162 | -------------------------------------------------------------------------------- /the_hero_rises/requirements.txt: -------------------------------------------------------------------------------- 1 | Pillow == 10.2.0 2 | attrs == 19.3.0 3 | numpy == 1.22.0 4 | opencv_python == 4.2.0.32 5 | pathlib2 == 2.3.5 6 | pycocotools == 2.0.0 7 | pytorch_ignite == 0.2.1 8 | torch == 1.3.1 9 | torchvision == 0.4.2 10 | trains == 0.13.1 11 | tensorboard==2.1.0 12 | -------------------------------------------------------------------------------- /the_hero_rises/torchvision_references/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clearml/clearml-blogs/c0a10f52de341e7feedc1bd718ff0539f98fdced/the_hero_rises/torchvision_references/__init__.py -------------------------------------------------------------------------------- /the_hero_rises/torchvision_references/coco_eval.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import numpy as np 4 | import copy 5 | import torch 6 | import torch._six 7 | 8 | from pycocotools.cocoeval import COCOeval 9 | from pycocotools.coco import COCO 10 | import pycocotools.mask as mask_util 11 | 12 | from collections import defaultdict 13 | 14 | from torchvision_references import utils 15 | 16 | 17 | class CocoEvaluator(object): 18 | def __init__(self, coco_gt, iou_types): 19 | assert isinstance(iou_types, (list, tuple)) 20 | coco_gt = copy.deepcopy(coco_gt) 21 | self.coco_gt = coco_gt 22 | 23 | self.iou_types = iou_types 24 | self.coco_eval = {} 25 | for iou_type in iou_types: 26 | self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type) 27 | 28 | self.img_ids = [] 29 | self.eval_imgs = {k: [] for k in iou_types} 30 | 31 | def update(self, predictions): 32 | img_ids = list(np.unique(list(predictions.keys()))) 33 | self.img_ids.extend(img_ids) 34 | 35 | for iou_type in self.iou_types: 36 | results = self.prepare(predictions, iou_type) 37 | coco_dt = loadRes(self.coco_gt, results) if results else COCO() 38 | coco_eval = self.coco_eval[iou_type] 39 | 40 | coco_eval.cocoDt = coco_dt 41 | coco_eval.params.imgIds = list(img_ids) 42 | img_ids, eval_imgs = evaluate(coco_eval) 43 | 44 | self.eval_imgs[iou_type].append(eval_imgs) 45 | 46 | def synchronize_between_processes(self): 47 | for iou_type in self.iou_types: 48 | self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2) 49 | create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type]) 50 | 51 | def accumulate(self): 52 | for coco_eval in self.coco_eval.values(): 53 | coco_eval.accumulate() 54 | 55 | def summarize(self): 56 | for iou_type, coco_eval in self.coco_eval.items(): 57 | print("IoU metric: {}".format(iou_type)) 58 | coco_eval.summarize() 59 | 60 | def prepare(self, predictions, iou_type): 61 | if iou_type == "bbox": 62 | return self.prepare_for_coco_detection(predictions) 63 | elif iou_type == "segm": 64 | return self.prepare_for_coco_segmentation(predictions) 65 | elif iou_type == "keypoints": 66 | return self.prepare_for_coco_keypoint(predictions) 67 | else: 68 | raise ValueError("Unknown iou type {}".format(iou_type)) 69 | 70 | def prepare_for_coco_detection(self, predictions): 71 | coco_results = [] 72 | for original_id, prediction in predictions.items(): 73 | if len(prediction) == 0: 74 | continue 75 | 76 | boxes = prediction["boxes"] 77 | boxes = convert_to_xywh(boxes).tolist() 78 | scores = prediction["scores"].tolist() 79 | labels = prediction["labels"].tolist() 80 | 81 | coco_results.extend( 82 | [ 83 | { 84 | "image_id": original_id, 85 | "category_id": labels[k], 86 | "bbox": box, 87 | "score": scores[k], 88 | } 89 | for k, box in enumerate(boxes) 90 | ] 91 | ) 92 | return coco_results 93 | 94 | def prepare_for_coco_segmentation(self, predictions): 95 | coco_results = [] 96 | for original_id, prediction in predictions.items(): 97 | if len(prediction) == 0: 98 | continue 99 | 100 | masks = prediction["masks"] 101 | masks = (masks > 0.5).type(torch.uint8) 102 | 103 | scores = prediction["scores"].tolist() 104 | labels = prediction["labels"].tolist() 105 | 106 | rles = [ 107 | mask_util.encode(np.array(mask.cpu()[0, :, :, np.newaxis], order="F"))[0] 108 | for mask in masks 109 | ] 110 | for rle in rles: 111 | rle["counts"] = rle["counts"].decode("utf-8") 112 | 113 | coco_results.extend( 114 | [ 115 | { 116 | "image_id": original_id, 117 | "category_id": labels[k], 118 | "segmentation": rle, 119 | "score": scores[k], 120 | } 121 | for k, rle in enumerate(rles) 122 | ] 123 | ) 124 | return coco_results 125 | 126 | def prepare_for_coco_keypoint(self, predictions): 127 | coco_results = [] 128 | for original_id, prediction in predictions.items(): 129 | if len(prediction) == 0: 130 | continue 131 | 132 | boxes = prediction["boxes"] 133 | boxes = convert_to_xywh(boxes).tolist() 134 | scores = prediction["scores"].tolist() 135 | labels = prediction["labels"].tolist() 136 | keypoints = prediction["keypoints"] 137 | keypoints = keypoints.flatten(start_dim=1).tolist() 138 | 139 | coco_results.extend( 140 | [ 141 | { 142 | "image_id": original_id, 143 | "category_id": labels[k], 144 | 'keypoints': keypoint, 145 | "score": scores[k], 146 | } 147 | for k, keypoint in enumerate(keypoints) 148 | ] 149 | ) 150 | return coco_results 151 | 152 | 153 | def convert_to_xywh(boxes): 154 | xmin, ymin, xmax, ymax = boxes.unbind(1) 155 | return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1) 156 | 157 | 158 | def merge(img_ids, eval_imgs): 159 | all_img_ids = utils.all_gather(img_ids) 160 | all_eval_imgs = utils.all_gather(eval_imgs) 161 | 162 | merged_img_ids = [] 163 | for p in all_img_ids: 164 | merged_img_ids.extend(p) 165 | 166 | merged_eval_imgs = [] 167 | for p in all_eval_imgs: 168 | merged_eval_imgs.append(p) 169 | 170 | merged_img_ids = np.array(merged_img_ids) 171 | merged_eval_imgs = np.concatenate(merged_eval_imgs, 2) 172 | 173 | # keep only unique (and in sorted order) images 174 | merged_img_ids, idx = np.unique(merged_img_ids, return_index=True) 175 | merged_eval_imgs = merged_eval_imgs[..., idx] 176 | 177 | return merged_img_ids, merged_eval_imgs 178 | 179 | 180 | def create_common_coco_eval(coco_eval, img_ids, eval_imgs): 181 | img_ids, eval_imgs = merge(img_ids, eval_imgs) 182 | img_ids = list(img_ids) 183 | eval_imgs = list(eval_imgs.flatten()) 184 | 185 | coco_eval.evalImgs = eval_imgs 186 | coco_eval.params.imgIds = img_ids 187 | coco_eval._paramsEval = copy.deepcopy(coco_eval.params) 188 | 189 | 190 | ################################################################# 191 | # From pycocotools, just removed the prints and fixed 192 | # a Python3 bug about unicode not defined 193 | ################################################################# 194 | 195 | # Ideally, pycocotools wouldn't have hard-coded prints 196 | # so that we could avoid copy-pasting those two functions 197 | 198 | def createIndex(self): 199 | # create index 200 | # print('creating index...') 201 | anns, cats, imgs = {}, {}, {} 202 | imgToAnns, catToImgs = defaultdict(list), defaultdict(list) 203 | if 'annotations' in self.dataset: 204 | for ann in self.dataset['annotations']: 205 | imgToAnns[ann['image_id']].append(ann) 206 | anns[ann['id']] = ann 207 | 208 | if 'images' in self.dataset: 209 | for img in self.dataset['images']: 210 | imgs[img['id']] = img 211 | 212 | if 'categories' in self.dataset: 213 | for cat in self.dataset['categories']: 214 | cats[cat['id']] = cat 215 | 216 | if 'annotations' in self.dataset and 'categories' in self.dataset: 217 | for ann in self.dataset['annotations']: 218 | catToImgs[ann['category_id']].append(ann['image_id']) 219 | 220 | # print('index created!') 221 | 222 | # create class members 223 | self.anns = anns 224 | self.imgToAnns = imgToAnns 225 | self.catToImgs = catToImgs 226 | self.imgs = imgs 227 | self.cats = cats 228 | 229 | 230 | maskUtils = mask_util 231 | 232 | 233 | def loadRes(self, resFile): 234 | """ 235 | Load result file and return a result api object. 236 | :param resFile (str) : file name of result file 237 | :return: res (obj) : result api object 238 | """ 239 | res = COCO() 240 | res.dataset['images'] = [img for img in self.dataset['images']] 241 | 242 | # print('Loading and preparing results...') 243 | # tic = time.time() 244 | if isinstance(resFile, torch._six.string_classes): 245 | anns = json.load(open(resFile)) 246 | elif type(resFile) == np.ndarray: 247 | anns = self.loadNumpyAnnotations(resFile) 248 | else: 249 | anns = resFile 250 | assert type(anns) == list, 'results in not an array of objects' 251 | annsImgIds = [ann['image_id'] for ann in anns] 252 | assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ 253 | 'Results do not correspond to current coco set' 254 | if 'caption' in anns[0]: 255 | imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns]) 256 | res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds] 257 | for id, ann in enumerate(anns): 258 | ann['id'] = id + 1 259 | elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: 260 | res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) 261 | for id, ann in enumerate(anns): 262 | bb = ann['bbox'] 263 | x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] 264 | if 'segmentation' not in ann: 265 | ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] 266 | ann['area'] = bb[2] * bb[3] 267 | ann['id'] = id + 1 268 | ann['iscrowd'] = 0 269 | elif 'segmentation' in anns[0]: 270 | res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) 271 | for id, ann in enumerate(anns): 272 | # now only support compressed RLE format as segmentation results 273 | ann['area'] = maskUtils.area(ann['segmentation']) 274 | if 'bbox' not in ann: 275 | ann['bbox'] = maskUtils.toBbox(ann['segmentation']) 276 | ann['id'] = id + 1 277 | ann['iscrowd'] = 0 278 | elif 'keypoints' in anns[0]: 279 | res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) 280 | for id, ann in enumerate(anns): 281 | s = ann['keypoints'] 282 | x = s[0::3] 283 | y = s[1::3] 284 | x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y) 285 | ann['area'] = (x2 - x1) * (y2 - y1) 286 | ann['id'] = id + 1 287 | ann['bbox'] = [x1, y1, x2 - x1, y2 - y1] 288 | # print('DONE (t={:0.2f}s)'.format(time.time()- tic)) 289 | 290 | res.dataset['annotations'] = anns 291 | createIndex(res) 292 | return res 293 | 294 | 295 | def evaluate(self): 296 | ''' 297 | Run per image evaluation on given images and store results (a list of dict) in self.evalImgs 298 | :return: None 299 | ''' 300 | # tic = time.time() 301 | # print('Running per image evaluation...') 302 | p = self.params 303 | # add backward compatibility if useSegm is specified in params 304 | if p.useSegm is not None: 305 | p.iouType = 'segm' if p.useSegm == 1 else 'bbox' 306 | print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) 307 | # print('Evaluate annotation type *{}*'.format(p.iouType)) 308 | p.imgIds = list(np.unique(p.imgIds)) 309 | if p.useCats: 310 | p.catIds = list(np.unique(p.catIds)) 311 | p.maxDets = sorted(p.maxDets) 312 | self.params = p 313 | 314 | self._prepare() 315 | # loop through images, area range, max detection number 316 | catIds = p.catIds if p.useCats else [-1] 317 | 318 | if p.iouType == 'segm' or p.iouType == 'bbox': 319 | computeIoU = self.computeIoU 320 | elif p.iouType == 'keypoints': 321 | computeIoU = self.computeOks 322 | self.ious = { 323 | (imgId, catId): computeIoU(imgId, catId) 324 | for imgId in p.imgIds 325 | for catId in catIds} 326 | 327 | evaluateImg = self.evaluateImg 328 | maxDet = p.maxDets[-1] 329 | evalImgs = [ 330 | evaluateImg(imgId, catId, areaRng, maxDet) 331 | for catId in catIds 332 | for areaRng in p.areaRng 333 | for imgId in p.imgIds 334 | ] 335 | # this is NOT in the pycocotools code, but could be done outside 336 | evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds)) 337 | self._paramsEval = copy.deepcopy(self.params) 338 | # toc = time.time() 339 | # print('DONE (t={:0.2f}s).'.format(toc-tic)) 340 | return p.imgIds, evalImgs 341 | 342 | ################################################################# 343 | # end of straight copy from pycocotools, just removing the prints 344 | ################################################################# 345 | -------------------------------------------------------------------------------- /the_hero_rises/torchvision_references/coco_utils.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | 4 | import torch 5 | import torch.utils.data 6 | import torchvision 7 | 8 | from pycocotools import mask as coco_mask 9 | from pycocotools.coco import COCO 10 | 11 | import transforms as T 12 | 13 | 14 | class FilterAndRemapCocoCategories(object): 15 | def __init__(self, categories, remap=True): 16 | self.categories = categories 17 | self.remap = remap 18 | 19 | def __call__(self, image, target): 20 | anno = target["annotations"] 21 | anno = [obj for obj in anno if obj["category_id"] in self.categories] 22 | if not self.remap: 23 | target["annotations"] = anno 24 | return image, target 25 | anno = copy.deepcopy(anno) 26 | for obj in anno: 27 | obj["category_id"] = self.categories.index(obj["category_id"]) 28 | target["annotations"] = anno 29 | return image, target 30 | 31 | 32 | def convert_coco_poly_to_mask(segmentations, height, width): 33 | masks = [] 34 | for polygons in segmentations: 35 | rles = coco_mask.frPyObjects(polygons, height, width) 36 | mask = coco_mask.decode(rles) 37 | if len(mask.shape) < 3: 38 | mask = mask[..., None] 39 | mask = torch.as_tensor(mask, dtype=torch.uint8) 40 | mask = mask.any(dim=2) 41 | masks.append(mask) 42 | if masks: 43 | masks = torch.stack(masks, dim=0) 44 | else: 45 | masks = torch.zeros((0, height, width), dtype=torch.uint8) 46 | return masks 47 | 48 | 49 | class ConvertCocoPolysToMask(object): 50 | def __call__(self, image, target): 51 | w, h = image.size 52 | 53 | image_id = target["image_id"] 54 | image_id = torch.tensor([image_id]) 55 | 56 | anno = target["annotations"] 57 | 58 | anno = [obj for obj in anno if obj['iscrowd'] == 0] 59 | 60 | boxes = [obj["bbox"] for obj in anno] 61 | # guard against no boxes via resizing 62 | boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4) 63 | boxes[:, 2:] += boxes[:, :2] 64 | boxes[:, 0::2].clamp_(min=0, max=w) 65 | boxes[:, 1::2].clamp_(min=0, max=h) 66 | 67 | classes = [obj["category_id"] for obj in anno] 68 | classes = torch.tensor(classes, dtype=torch.int64) 69 | 70 | segmentations = [obj["segmentation"] for obj in anno] 71 | masks = convert_coco_poly_to_mask(segmentations, h, w) 72 | 73 | keypoints = None 74 | if anno and "keypoints" in anno[0]: 75 | keypoints = [obj["keypoints"] for obj in anno] 76 | keypoints = torch.as_tensor(keypoints, dtype=torch.float32) 77 | num_keypoints = keypoints.shape[0] 78 | if num_keypoints: 79 | keypoints = keypoints.view(num_keypoints, -1, 3) 80 | 81 | keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) 82 | boxes = boxes[keep] 83 | classes = classes[keep] 84 | masks = masks[keep] 85 | if keypoints is not None: 86 | keypoints = keypoints[keep] 87 | 88 | target = {} 89 | target["boxes"] = boxes 90 | target["labels"] = classes 91 | target["masks"] = masks 92 | target["image_id"] = image_id 93 | if keypoints is not None: 94 | target["keypoints"] = keypoints 95 | 96 | # for conversion to coco api 97 | area = torch.tensor([obj["area"] for obj in anno]) 98 | iscrowd = torch.tensor([obj["iscrowd"] for obj in anno]) 99 | target["area"] = area 100 | target["iscrowd"] = iscrowd 101 | 102 | return image, target 103 | 104 | 105 | def _coco_remove_images_without_annotations(dataset, cat_list=None): 106 | def _has_only_empty_bbox(anno): 107 | return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno) 108 | 109 | def _count_visible_keypoints(anno): 110 | return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno) 111 | 112 | min_keypoints_per_image = 10 113 | 114 | def _has_valid_annotation(anno): 115 | # if it's empty, there is no annotation 116 | if len(anno) == 0: 117 | return False 118 | # if all boxes have close to zero area, there is no annotation 119 | if _has_only_empty_bbox(anno): 120 | return False 121 | # keypoints task have a slight different critera for considering 122 | # if an annotation is valid 123 | if "keypoints" not in anno[0]: 124 | return True 125 | # for keypoint detection tasks, only consider valid images those 126 | # containing at least min_keypoints_per_image 127 | if _count_visible_keypoints(anno) >= min_keypoints_per_image: 128 | return True 129 | return False 130 | 131 | assert isinstance(dataset, torchvision.datasets.CocoDetection) 132 | ids = [] 133 | for ds_idx, img_id in enumerate(dataset.ids): 134 | ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None) 135 | anno = dataset.coco.loadAnns(ann_ids) 136 | if cat_list: 137 | anno = [obj for obj in anno if obj["category_id"] in cat_list] 138 | if _has_valid_annotation(anno): 139 | ids.append(ds_idx) 140 | 141 | dataset = torch.utils.data.Subset(dataset, ids) 142 | return dataset 143 | 144 | 145 | def convert_to_coco_api(ds): 146 | coco_ds = COCO() 147 | ann_id = 0 148 | dataset = {'images': [], 'categories': [], 'annotations': []} 149 | categories = set() 150 | for img_idx in range(len(ds)): 151 | # find better way to get target 152 | # targets = ds.get_annotations(img_idx) 153 | img, targets = ds[img_idx] 154 | image_id = targets["image_id"].item() 155 | img_dict = {} 156 | img_dict['id'] = image_id 157 | img_dict['height'] = img.shape[-2] 158 | img_dict['width'] = img.shape[-1] 159 | dataset['images'].append(img_dict) 160 | bboxes = targets["boxes"] 161 | bboxes[:, 2:] -= bboxes[:, :2] 162 | bboxes = bboxes.tolist() 163 | labels = targets['labels'].tolist() 164 | areas = targets['area'].tolist() 165 | iscrowd = targets['iscrowd'].tolist() 166 | if 'masks' in targets: 167 | masks = targets['masks'] 168 | # make masks Fortran contiguous for coco_mask 169 | masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1) 170 | if 'keypoints' in targets: 171 | keypoints = targets['keypoints'] 172 | keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist() 173 | num_objs = len(bboxes) 174 | for i in range(num_objs): 175 | ann = {} 176 | ann['image_id'] = image_id 177 | ann['bbox'] = bboxes[i] 178 | ann['category_id'] = labels[i] 179 | categories.add(labels[i]) 180 | ann['area'] = areas[i] 181 | ann['iscrowd'] = iscrowd[i] 182 | ann['id'] = ann_id 183 | if 'masks' in targets: 184 | ann["segmentation"] = coco_mask.encode(masks[i].numpy()) 185 | if 'keypoints' in targets: 186 | ann['keypoints'] = keypoints[i] 187 | ann['num_keypoints'] = sum(k != 0 for k in keypoints[i][2::3]) 188 | dataset['annotations'].append(ann) 189 | ann_id += 1 190 | dataset['categories'] = [{'id': i} for i in sorted(categories)] 191 | coco_ds.dataset = dataset 192 | coco_ds.createIndex() 193 | return coco_ds 194 | 195 | 196 | def get_coco_api_from_dataset(dataset): 197 | for _ in range(10): 198 | if isinstance(dataset, torchvision.datasets.CocoDetection): 199 | break 200 | if isinstance(dataset, torch.utils.data.Subset): 201 | dataset = dataset.dataset 202 | if isinstance(dataset, torchvision.datasets.CocoDetection): 203 | return dataset.coco 204 | return convert_to_coco_api(dataset) 205 | 206 | 207 | class CocoDetection(torchvision.datasets.CocoDetection): 208 | def __init__(self, img_folder, ann_file, transforms): 209 | super(CocoDetection, self).__init__(img_folder, ann_file) 210 | self._transforms = transforms 211 | 212 | def __getitem__(self, idx): 213 | img, target = super(CocoDetection, self).__getitem__(idx) 214 | image_id = self.ids[idx] 215 | target = dict(image_id=image_id, annotations=target) 216 | if self._transforms is not None: 217 | img, target = self._transforms(img, target) 218 | return img, target 219 | 220 | 221 | def get_coco(root, image_set, transforms, mode='instances'): 222 | anno_file_template = "{}_{}2017.json" 223 | PATHS = { 224 | "train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))), 225 | "val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))), 226 | # "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))) 227 | } 228 | 229 | t = [ConvertCocoPolysToMask()] 230 | 231 | if transforms is not None: 232 | t.append(transforms) 233 | transforms = T.Compose(t) 234 | 235 | img_folder, ann_file = PATHS[image_set] 236 | img_folder = os.path.join(root, img_folder) 237 | ann_file = os.path.join(root, ann_file) 238 | 239 | dataset = CocoDetection(img_folder, ann_file, transforms=transforms) 240 | 241 | if image_set == "train": 242 | dataset = _coco_remove_images_without_annotations(dataset) 243 | 244 | # dataset = torch.utils.data.Subset(dataset, [i for i in range(500)]) 245 | 246 | return dataset 247 | 248 | 249 | def get_coco_kp(root, image_set, transforms): 250 | return get_coco(root, image_set, transforms, mode="person_keypoints") 251 | -------------------------------------------------------------------------------- /the_hero_rises/torchvision_references/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import pickle 4 | 5 | import torch 6 | import torch.distributed as dist 7 | 8 | import errno 9 | import os 10 | 11 | 12 | def all_gather(data): 13 | """ 14 | Run all_gather on arbitrary picklable data (not necessarily tensors) 15 | Args: 16 | data: any picklable object 17 | Returns: 18 | list[data]: list of data gathered from each rank 19 | """ 20 | world_size = get_world_size() 21 | if world_size == 1: 22 | return [data] 23 | 24 | # serialized to a Tensor 25 | buffer = pickle.dumps(data) 26 | storage = torch.ByteStorage.from_buffer(buffer) 27 | tensor = torch.ByteTensor(storage).to("cuda") 28 | 29 | # obtain Tensor size of each rank 30 | local_size = torch.tensor([tensor.numel()], device="cuda") 31 | size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)] 32 | dist.all_gather(size_list, local_size) 33 | size_list = [int(size.item()) for size in size_list] 34 | max_size = max(size_list) 35 | 36 | # receiving Tensor from all ranks 37 | # we pad the tensor because torch all_gather does not support 38 | # gathering tensors of different shapes 39 | tensor_list = [] 40 | for _ in size_list: 41 | tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda")) 42 | if local_size != max_size: 43 | padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda") 44 | tensor = torch.cat((tensor, padding), dim=0) 45 | dist.all_gather(tensor_list, tensor) 46 | 47 | data_list = [] 48 | for size, tensor in zip(size_list, tensor_list): 49 | buffer = tensor.cpu().numpy().tobytes()[:size] 50 | data_list.append(pickle.loads(buffer)) 51 | 52 | return data_list 53 | 54 | 55 | def reduce_dict(input_dict, average=True): 56 | """ 57 | Args: 58 | input_dict (dict): all the values will be reduced 59 | average (bool): whether to do average or sum 60 | Reduce the values in the dictionary from all processes so that all processes 61 | have the averaged results. Returns a dict with the same fields as 62 | input_dict, after reduction. 63 | """ 64 | world_size = get_world_size() 65 | if world_size < 2: 66 | return input_dict 67 | with torch.no_grad(): 68 | names = [] 69 | values = [] 70 | # sort the keys so that they are consistent across processes 71 | for k in sorted(input_dict.keys()): 72 | names.append(k) 73 | values.append(input_dict[k]) 74 | values = torch.stack(values, dim=0) 75 | dist.all_reduce(values) 76 | if average: 77 | values /= world_size 78 | reduced_dict = {k: v for k, v in zip(names, values)} 79 | return reduced_dict 80 | 81 | 82 | def collate_fn(batch): 83 | return tuple(zip(*batch)) 84 | 85 | 86 | def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor): 87 | 88 | def f(x): 89 | if x >= warmup_iters: 90 | return 1 91 | alpha = float(x) / warmup_iters 92 | return warmup_factor * (1 - alpha) + alpha 93 | 94 | return torch.optim.lr_scheduler.LambdaLR(optimizer, f) 95 | 96 | 97 | def mkdir(path): 98 | try: 99 | os.makedirs(path) 100 | except OSError as e: 101 | if e.errno != errno.EEXIST: 102 | raise 103 | 104 | 105 | def setup_for_distributed(is_master): 106 | """ 107 | This function disables printing when not in master process 108 | """ 109 | import builtins as __builtin__ 110 | builtin_print = __builtin__.print 111 | 112 | def print(*args, **kwargs): 113 | force = kwargs.pop('force', False) 114 | if is_master or force: 115 | builtin_print(*args, **kwargs) 116 | 117 | __builtin__.print = print 118 | 119 | 120 | def is_dist_avail_and_initialized(): 121 | if not dist.is_available(): 122 | return False 123 | if not dist.is_initialized(): 124 | return False 125 | return True 126 | 127 | 128 | def get_world_size(): 129 | if not is_dist_avail_and_initialized(): 130 | return 1 131 | return dist.get_world_size() 132 | 133 | 134 | def get_rank(): 135 | if not is_dist_avail_and_initialized(): 136 | return 0 137 | return dist.get_rank() 138 | 139 | 140 | def is_main_process(): 141 | return get_rank() == 0 142 | 143 | 144 | def save_on_master(*args, **kwargs): 145 | if is_main_process(): 146 | torch.save(*args, **kwargs) 147 | 148 | 149 | def init_distributed_mode(args): 150 | if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: 151 | args.rank = int(os.environ["RANK"]) 152 | args.world_size = int(os.environ['WORLD_SIZE']) 153 | args.gpu = int(os.environ['LOCAL_RANK']) 154 | elif 'SLURM_PROCID' in os.environ: 155 | args.rank = int(os.environ['SLURM_PROCID']) 156 | args.gpu = args.rank % torch.cuda.device_count() 157 | else: 158 | print('Not using distributed mode') 159 | args.distributed = False 160 | return 161 | 162 | args.distributed = True 163 | 164 | torch.cuda.set_device(args.gpu) 165 | args.dist_backend = 'nccl' 166 | print('| distributed init (rank {}): {}'.format( 167 | args.rank, args.dist_url), flush=True) 168 | torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, 169 | world_size=args.world_size, rank=args.rank) 170 | torch.distributed.barrier() 171 | setup_for_distributed(args.rank == 0) 172 | -------------------------------------------------------------------------------- /the_hero_rises/transforms.py: -------------------------------------------------------------------------------- 1 | import random 2 | import torch 3 | from PIL import Image 4 | 5 | from torchvision.transforms import functional as F 6 | 7 | 8 | def get_transform(train, image_size): 9 | transforms = [Resize(size=(image_size, image_size)), ToTensor()] 10 | if train: 11 | transforms.append(RandomHorizontalFlip(0.5)) 12 | return Compose(transforms) 13 | 14 | 15 | def _flip_coco_person_keypoints(kps, width): 16 | flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] 17 | flipped_data = kps[:, flip_inds] 18 | flipped_data[..., 0] = width - flipped_data[..., 0] 19 | # Maintain COCO convention that if visibility == 0, then x, y = 0 20 | inds = flipped_data[..., 2] == 0 21 | flipped_data[inds] = 0 22 | return flipped_data 23 | 24 | 25 | class ToTensor(object): 26 | def __call__(self, image, target): 27 | image = F.to_tensor(image) 28 | return image, target 29 | 30 | 31 | class Compose(object): 32 | def __init__(self, transforms): 33 | self.transforms = transforms 34 | 35 | def __call__(self, image, target=None): 36 | for t in self.transforms: 37 | image, target = t(image, target) 38 | return image, target 39 | 40 | 41 | class RandomHorizontalFlip(object): 42 | def __init__(self, prob): 43 | self.prob = prob 44 | 45 | def __call__(self, image, target): 46 | if random.random() < self.prob: 47 | height, width = image.shape[-2:] 48 | image = image.flip(-1) 49 | bbox = target["boxes"] 50 | bbox[:, [0, 2]] = width - bbox[:, [2, 0]] 51 | target["boxes"] = bbox 52 | if "masks" in target: 53 | target["masks"] = target["masks"].flip(-1) 54 | if "keypoints" in target: 55 | keypoints = target["keypoints"] 56 | keypoints = _flip_coco_person_keypoints(keypoints, width) 57 | target["keypoints"] = keypoints 58 | return image, target 59 | 60 | 61 | class Resize(object): 62 | """Resize the input PIL image to given size. 63 | If boxes is not None, resize boxes accordingly. 64 | Args: 65 | size: (tuple or int) 66 | - if is tuple, resize image to the size. 67 | - if is int, resize the shorter side to the size while maintaining the aspect ratio. 68 | max_size: (int) when size is int, limit the image longer size to max_size. 69 | This is essential to limit the usage of GPU memory. 70 | random_interpolation: (bool) randomly choose a resize interpolation method. 71 | Returns: 72 | img: (PIL.Image) resized image. 73 | boxes: (tensor) resized boxes. 74 | Example: 75 | >> img, boxes = resize(img, boxes, 600) # resize shorter side to 600 76 | >> img, boxes = resize(img, boxes, (500,600)) # resize image size to (500,600) 77 | >> img, _ = resize(img, None, (500,600)) # resize image only 78 | """ 79 | def __init__(self, size, max_size=1000, random_interpolation=False): 80 | self.size = size 81 | self.max_size = max_size 82 | self.random_interpolation = random_interpolation 83 | 84 | def __call__(self, image, target): 85 | """Resize the input PIL image to given size. 86 | If boxes is not None, resize boxes accordingly. 87 | Args: 88 | image: (PIL.Image) image to be resized. 89 | target: (tensor) object boxes, sized [#obj,4]. 90 | """ 91 | w, h = image.size 92 | if isinstance(self.size, int): 93 | size_min = min(w, h) 94 | size_max = max(w, h) 95 | sw = sh = float(self.size) / size_min 96 | if sw * size_max > self.max_size: 97 | sw = sh = float(self.max_size) / size_max 98 | ow = int(w * sw + 0.5) 99 | oh = int(h * sh + 0.5) 100 | else: 101 | ow, oh = self.size 102 | sw = float(ow) / w 103 | sh = float(oh) / h 104 | 105 | method = random.choice([ 106 | Image.BOX, 107 | Image.NEAREST, 108 | Image.HAMMING, 109 | Image.BICUBIC, 110 | Image.LANCZOS, 111 | Image.BILINEAR]) if self.random_interpolation else Image.BILINEAR 112 | image = image.resize((ow, oh), method) 113 | if target is not None and "masks" in target: 114 | resized_masks = torch.nn.functional.interpolate( 115 | input=target["masks"][None].float(), 116 | size=(512, 512), 117 | mode="nearest", 118 | )[0].type_as(target["masks"]) 119 | target["masks"] = resized_masks 120 | if target is not None and "boxes" in target: 121 | resized_boxes = target["boxes"] * torch.tensor([sw, sh, sw, sh]) 122 | target["boxes"] = resized_boxes 123 | return image, target 124 | -------------------------------------------------------------------------------- /the_hero_rises/utilities.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import attr 4 | import cv2 5 | import numpy as np 6 | import torch 7 | import torchvision 8 | from torchvision.models.detection.faster_rcnn import FastRCNNPredictor 9 | from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor 10 | from torchvision.models.resnet import resnet50, resnet101, resnet152 11 | from torchvision.models.vgg import vgg16 12 | from torchvision.transforms import functional as F 13 | 14 | from torchvision_references import utils 15 | 16 | 17 | def safe_collate(batch): 18 | batch = list(filter(lambda x: x is not None, batch)) 19 | return utils.collate_fn(batch) 20 | 21 | 22 | def draw_boxes(im, boxes, labels, color=(150, 0, 0)): 23 | for box, draw_label in zip(boxes, labels): 24 | draw_box = box.astype('int') 25 | im = cv2.rectangle(im, tuple(draw_box[:2]), tuple(draw_box[2:]), color, 2) 26 | im = cv2.putText(im, str(draw_label), (draw_box[0], max(0, draw_box[1]-5)), 27 | cv2.FONT_HERSHEY_COMPLEX, 0.8, color, 2) 28 | return im 29 | 30 | 31 | def draw_debug_images(images, targets, predictions=None, score_thr=0.3): 32 | debug_images = [] 33 | for image, target in zip(images, targets): 34 | img = draw_boxes(np.array(F.to_pil_image(image.cpu())), 35 | [box.cpu().numpy() for box in target['boxes']], 36 | [label.item() for label in target['labels']]) 37 | if predictions: 38 | img = draw_boxes(img, 39 | [box.cpu().numpy() for box, score in 40 | zip(predictions[target['image_id'].item()]['boxes'], 41 | predictions[target['image_id'].item()]['scores']) if score >= score_thr], 42 | [label.item() for label, score in 43 | zip(predictions[target['image_id'].item()]['labels'], 44 | predictions[target['image_id'].item()]['scores']) if score >= score_thr], 45 | color=(0, 150, 0)) 46 | debug_images.append(img) 47 | return debug_images 48 | 49 | 50 | def draw_mask(target): 51 | masks = [channel*label for channel, label in zip(target['masks'].cpu().numpy(), target['labels'].cpu().numpy())] 52 | masks_sum = sum(masks) 53 | masks_out = masks_sum + 25*(masks_sum > 0) 54 | return (masks_out*int(255/masks_out.max())).astype('uint8') 55 | 56 | 57 | def get_backbone(backbone_name): 58 | if backbone_name == 'vgg16': 59 | return vgg16(pretrained=True) 60 | elif backbone_name == 'resnet50': 61 | return resnet50(pretrained=True) 62 | elif backbone_name == 'resnet101': 63 | return resnet101(pretrained=True) 64 | elif backbone_name == 'resnet152': 65 | return resnet152(pretrained=True) 66 | else: 67 | raise ValueError('Only "vgg16", "resnet50", "resnet101" and "resnet152" are supported backbone names') 68 | 69 | 70 | def get_model_instance_segmentation(num_classes, hidden_layer): 71 | # load an instance segmentation model pre-trained on COCO 72 | model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True) 73 | 74 | # get number of input features for the classifier 75 | in_features = model.roi_heads.box_predictor.cls_score.in_features 76 | # replace the pre-trained head with a new one 77 | model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 78 | 79 | # now get the number of input features for the mask classifier 80 | in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels 81 | 82 | # and replace the mask predictor with a new one 83 | model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes) 84 | return model 85 | 86 | 87 | def get_iou_types(model): 88 | model_without_ddp = model 89 | if isinstance(model, torch.nn.parallel.DistributedDataParallel): 90 | model_without_ddp = model.module 91 | iou_types = ["bbox"] 92 | if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN): 93 | iou_types.append("segm") 94 | if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN): 95 | iou_types.append("keypoints") 96 | return iou_types 97 | 98 | 99 | @attr.s(auto_attribs=True) 100 | class CocoLikeAnnotations(): 101 | def __attrs_post_init__(self): 102 | self.coco_like_json: dict = {'images': [], 'annotations': []} 103 | self._ann_id: int = 0 104 | 105 | def update_images(self, file_name, height, width, id): 106 | self.coco_like_json['images'].append({'file_name': file_name, 107 | 'height': height, 'width': width, 108 | 'id': id}) 109 | 110 | def update_annotations(self, box, label_id, image_id, is_crowd=0): 111 | segmentation, bbox, area = self.extract_coco_info(box) 112 | self.coco_like_json['annotations'].append({'segmentation': segmentation, 'bbox': bbox, 'area': area, 113 | 'category_id': int(label_id), 'id': self._ann_id, 'iscrowd': is_crowd, 114 | 'image_id': image_id}) 115 | self._ann_id += 1 116 | 117 | @staticmethod 118 | def extract_coco_info(box): 119 | segmentation = list(map(int, [box[0], box[1], box[0], box[3], box[2], box[3], box[2], box[1]])) 120 | bbox = list(map(int, np.append(box[:2], (box[2:] - box[:2])))) 121 | area = int(bbox[2] * bbox[3]) 122 | return segmentation, bbox, area 123 | 124 | def dump_to_json(self, path_to_json='/tmp/inference_results/inference_results.json'): 125 | with open(path_to_json, "w") as write_file: 126 | json.dump(self.coco_like_json, write_file) 127 | -------------------------------------------------------------------------------- /urbansounds8k/.gitignore: -------------------------------------------------------------------------------- 1 | tensorboard_logs 2 | .venv 3 | dataset 4 | .idea -------------------------------------------------------------------------------- /urbansounds8k/README.md: -------------------------------------------------------------------------------- 1 | # Audio Classification Example 2 | 3 | ## Setting the scene 4 | Audio signals are all around us. As such, there is an increasing interest in audio classification for various scenarios, 5 | from fire alarm detection for hearing impaired people, through engine sound analysis for maintenance purposes, 6 | to baby monitoring. Though audio signals are temporal in nature, in many cases it is possible to leverage recent 7 | advancements in the field of image classification and use popular high performing convolutional neural networks for 8 | audio classification. In this blog post we will demonstrate such an example by using the popular method of converting 9 | the audio signal into the frequency domain. 10 | 11 | This example is based on a series of blogposts that show how to leverage PyTorch's ecosystem to easily jumpstart your 12 | ML/DL project. You can find the [Image Classification](https://clear.ml/blog/ml-dl-engineering-made-easy-with-pytorch-and-allegro-trains/), 13 | [Hyperparameter Optimization](https://clear.ml/blog/accelerate-hyperparameter-optimization-with-pytorchs-ecosystem-tools/) and 14 | the original [Audio Classification](https://clear.ml/blog/audio-classification-with-pytorchs-ecosystem-tools/) blogposts here. 15 | 16 | 17 | ## Urbansounds Structure 18 | The urbansounds dataset consists of the actual data and a csv file containing the metadata. For each sample the csv file keeps track of the location and the label. 19 | The data itself is organized into multiple folds, or equal splits of the data. In testing we use 1 fold to validate our model on, all the other folds are used for training. 20 | 21 | 22 | ## ClearML Workflow 23 | 24 | 25 | ![ClearML flow diagram](assets/diagram.png) 26 | 27 | 28 | ### Getting the data 29 | The first script downloads the data from the official urbansounds sources or in this case a subset that is hosted by ClearML just to play around with. The metadata will be converted into a format that is easier to work with for us and then the files as well as the metadata are uploaded as a ClearML Dataset. 30 | 31 | The Dataset is a special sort of task, so we can also generate some interesting logs and plots such as a historgram and attach it to the task just like we would do for any other task. 32 | 33 | ![ClearML Histogram Dataset](https://imgur.com/4TTovpG.png) 34 | 35 | ### Preprocessing the data 36 | In order to train a model on the data we want to convert the audio samples (.wav files) to images by creating their mel spectrograms. For more information on how this works read the section below. 37 | 38 | In the end we convert each .wav file into a spectrogram image and save the image with the same filename in the same folder. The we create a new ClearML dataset from this dataset. We make it a new version (child) of the previous dataset we made, so the .wav files themselves won't actually be uploaded and just refer to the previously uploaded dataset. ClearML data will only upload the newly created image files. 39 | 40 | The metadata from the csv file is again added to the dataset as an artifact. We can just get it as pandas dataframe when we need it. 41 | 42 | ### Training 43 | Finally we get this latest dataset version, download the data itself and get the pandas dataframe containing the metadata. Based on the fold number we divide the data into train and test and train a machine learning model on it. We then log the output scalars and plot a confusion matrix so we can see the model's performance in the ClearML webUI and compare it easily to other experiment runs. 44 | 45 | 46 | 47 | ## Audio Classification with Convolutional Neural Networks 48 | 49 | In recent years, Convolutional Neural Networks (CNNs) have proven very effective in image classification tasks, which gave rise to the design of various architectures, such as Inception, ResNet, ResNext, Mobilenet and more. These CNNs achieve state of the art results on image classification tasks and offer a variety of ready to use pre trained backbones. As such, if we will be able to transfer audio classification tasks into the image domain, we will be able to leverage this rich variety of backbones for our needs. 50 | 51 | As mentioned before, instead of directly using the sound file as an amplitude vs time signal we wish to convert the audio signal into an image. The following preprocessing was done using [this script](https://github.com/allegroai/trains/blob/master/examples/frameworks/pytorch/notebooks/audio/audio_preprocessing_example.ipynb) on the [YesNo](https://pytorch.org/audio/datasets.html#yesno) dataset that is included in [torchaudio built-in datasets](https://pytorch.org/audio/datasets.html) . 52 | 53 | As a first stage of preprocessing we will: 54 | 55 | - **Read the audio file** – using torchaudio 56 | - **Resample the audio signal to a fixed sample rate** – This will make sure that all signals we will use will have the same sample rate. Theoretically the maximum frequency that can be represented by a sampled signal is a little bit less than half the sample rate (known as the [Nyquist frequency](https://en.wikipedia.org/wiki/Nyquist_frequency)). As 20 kHz is the highest frequency generally audible by humans, sampling rate of 44100 Hz is considered the most popular choice. However, in many cases removing the higher frequencies is considered plausible for the sake of reducing the amount of data per audio file. As such, the sampling rate of 20050 Hz has been reasonably popular for low bitrate MP3 files. In our example we will use this sample rate. 57 | - **Create a mono audio signal** – For simplicity, we will make sure all signals we use will have the same number of channels. 58 | 59 | **The code for such preprocessing, looks like this:** 60 | 61 | ```Python 62 | yesno_data = torchaudio.datasets.YESNO('./data', download=True) 63 | number_of_samples = 3 64 | fixed_sample_rate = 22050 65 | for n in range(number_of_smaples): 66 | audio, sample_rate, labels = yesno_data[n] 67 | resample_transform = torchaudio.transforms.Resample( 68 | orig_freq=sample_rate, new_freq=fixed_sample_rate) 69 | audio_mono = torch.mean(resample_transform(audio), 70 | dim=0, keepdim=True) 71 | 72 | plt.figure() 73 | plt.plot(audio_mono[0,:]) 74 | 75 | ``` 76 | 77 | **The resulted matplotlib plots looks like this:** 78 | 79 | Audio signal time series from the YESNO dataset 80 | 81 | ![https://clear.ml/wp-content/uploads/2020/10/Audio-signal-time-series-from-the-YESNO-dataset.png](https://clear.ml/wp-content/uploads/2020/10/Audio-signal-time-series-from-the-YESNO-dataset.png) 82 | 83 | Now it is time to transform this time-series signal into the image domain. We will do that by converting it into a spectogram, which is a visual representation of the spectrum of frequencies of a signal as it varies with time. For that purpose we will use a log-scaled mel-spectrogram. A mel spectrogram is a spectrogram where the frequencies are converted to the mel scale, which takes into account the fact that humans are better at detecting differences in lower frequencies than higher frequencies. The mel scale converts the frequencies so that equal distances in pitch sounded equally distant to a human listener.**So let’s use torchaudio transforms and add the following lines to our snippet:** 84 | 85 | ```Python 86 | melspectogram_transform = 87 | torchaudio.transforms.MelSpectrogram( 88 | sample_rate=fixed_sample_rate, n_mels=128) 89 | melspectogram_db_transform = torchaudio.transforms.AmplitudeToDB() 90 | 91 | melspectogram = melspectogram_transform(audio_mono) 92 | plt.figure() 93 | plt.imshow(melspectogram.squeeze().numpy(), cmap='hot') 94 | 95 | melspectogram_db=melspectogram_db_transform(melspectogram) 96 | plt.figure() 97 | plt.imshow(melspectogram_db.squeeze().numpy(), cmap='hot') 98 | 99 | ``` 100 | 101 | **Now the audio file is represented as a two dimensional spectrogram image:** 102 | 103 | Mel spectrogram (upper image) and its’ log-scale version (lower image) 104 | 105 | ![https://clear.ml/wp-content/uploads/2020/10/Mel-spectrogram-and-its-log-scale-version.png](https://clear.ml/wp-content/uploads/2020/10/Mel-spectrogram-and-its-log-scale-version.png) 106 | 107 | That’s exactly what we wanted to achieve. The Audio-classification problem is now transformed into an image classification problem. -------------------------------------------------------------------------------- /urbansounds8k/assets/diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clearml/clearml-blogs/c0a10f52de341e7feedc1bd718ff0539f98fdced/urbansounds8k/assets/diagram.png -------------------------------------------------------------------------------- /urbansounds8k/get_data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from pathlib import Path 3 | from clearml import Task, Dataset, StorageManager 4 | 5 | task = Task.init(project_name='examples/Urbansounds', 6 | task_name='download data') 7 | 8 | configuration = { 9 | 'selected_classes': ['air_conditioner', 'car_horn', 'children_playing', 'dog_bark', 'drilling', 10 | 'engine_idling', 'gun_shot', 'jackhammer', 'siren', 'street_music'] 11 | } 12 | task.connect(configuration) 13 | 14 | 15 | def get_urbansound8k(): 16 | # Download UrbanSound8K dataset (https://urbansounddataset.weebly.com/urbansound8k.html) 17 | # For simplicity we will use here a subset of that dataset using clearml StorageManager 18 | path_to_urbansound8k = StorageManager.get_local_copy( 19 | "https://allegro-datasets.s3.amazonaws.com/clearml/UrbanSound8K.zip", 20 | extract_archive=True) 21 | path_to_urbansound8k_csv = Path(path_to_urbansound8k) / 'UrbanSound8K' / 'metadata' / 'UrbanSound8K.csv' 22 | path_to_urbansound8k_audio = Path(path_to_urbansound8k) / 'UrbanSound8K' / 'audio' 23 | 24 | return path_to_urbansound8k_csv, path_to_urbansound8k_audio 25 | 26 | 27 | def log_dataset_statistics(dataset, metadata): 28 | histogram_data = metadata['class'].value_counts() 29 | dataset.get_logger().report_table( 30 | title='Raw Dataset Metadata', 31 | series='Raw Dataset Metadata', 32 | table_plot=metadata 33 | ) 34 | dataset.get_logger().report_histogram( 35 | title='Class distribution', 36 | series='Class distribution', 37 | values=histogram_data, 38 | iteration=0, 39 | xlabels=histogram_data.index.tolist(), 40 | yaxis='Amount of samples' 41 | ) 42 | 43 | 44 | def build_clearml_dataset(): 45 | # Get a local copy of both the data and the labels 46 | path_to_urbansound8k_csv, path_to_urbansound8k_audio = get_urbansound8k() 47 | urbansound8k_metadata = pd.read_csv(path_to_urbansound8k_csv) 48 | # Subset the data to only include the classes we want 49 | urbansound8k_metadata = \ 50 | urbansound8k_metadata[urbansound8k_metadata['class'].isin(configuration['selected_classes'])] 51 | 52 | # Create a pandas dataframe containing labels and other info we need later (fold is for train test split) 53 | metadata = pd.DataFrame({ 54 | 'fold': urbansound8k_metadata.loc[:, 'fold'], 55 | 'filepath': ('fold' + urbansound8k_metadata.loc[:, 'fold'].astype(str) 56 | + '/' + urbansound8k_metadata.loc[:, 'slice_file_name'].astype(str)), 57 | 'label': urbansound8k_metadata.loc[:, 'classID'] 58 | }) 59 | 60 | # Now create a clearml dataset to start versioning our changes and make it much easier to get the right data 61 | # in other tasks as well as on different machines 62 | dataset = Dataset.create( 63 | dataset_name='UrbanSounds example', 64 | dataset_project='examples/Urbansounds', 65 | dataset_tags=['raw'] 66 | ) 67 | 68 | # Add the local files we downloaded earlier 69 | dataset.add_files(path_to_urbansound8k_audio) 70 | # Add the metadata in pandas format, we can now see it in the webUI and have it be easily accessible 71 | dataset._task.upload_artifact(name='metadata', artifact_object=metadata) 72 | # Let's add some cool graphs as statistics in the plots section! 73 | log_dataset_statistics(dataset, urbansound8k_metadata) 74 | # Finalize and upload the data and labels of the dataset 75 | dataset.finalize(auto_upload=True) 76 | 77 | 78 | if __name__ == '__main__': 79 | build_clearml_dataset() 80 | -------------------------------------------------------------------------------- /urbansounds8k/preprocessing.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | from pathlib import Path 3 | 4 | import matplotlib as mpl 5 | import numpy as np 6 | from tqdm import tqdm 7 | import torchaudio 8 | import torch 9 | from clearml import Task, Dataset 10 | 11 | task = Task.init(project_name='examples/Urbansounds', 12 | task_name='preprocessing') 13 | 14 | # Let's preprocess the data and create a new ClearML dataset from it, so we can track it around 15 | # The cool thing is, we can easily debug, by using, you guessed it: debug samples! We can log both 16 | # the original sound and its processed mel spectrogram as debug samples, so we can manually check 17 | # if everything went as planned. 18 | 19 | 20 | class PreProcessor: 21 | def __init__(self): 22 | self.configuration = { 23 | 'number_of_mel_filters': 64, 24 | 'resample_freq': 22050 25 | } 26 | task.connect(self.configuration) 27 | 28 | def preprocess_sample(self, sample, original_sample_freq): 29 | if self.configuration['resample_freq'] > 0: 30 | resample_transform = torchaudio.transforms.Resample(orig_freq=original_sample_freq, 31 | new_freq=self.configuration['resample_freq']) 32 | sample = resample_transform(sample) 33 | 34 | # This will convert audio files with two channels into one 35 | sample = torch.mean(sample, dim=0, keepdim=True) 36 | 37 | # Convert audio to log-scale Mel spectrogram 38 | melspectrogram_transform = torchaudio.transforms.MelSpectrogram( 39 | sample_rate=self.configuration['resample_freq'], 40 | n_mels=self.configuration['number_of_mel_filters'] 41 | ) 42 | melspectrogram = melspectrogram_transform(sample) 43 | melspectogram_db = torchaudio.transforms.AmplitudeToDB()(melspectrogram) 44 | 45 | # Make sure all spectrograms are the same size 46 | fixed_length = 3 * (self.configuration['resample_freq'] // 200) 47 | if melspectogram_db.shape[2] < fixed_length: 48 | melspectogram_db = torch.nn.functional.pad(melspectogram_db, (0, fixed_length - melspectogram_db.shape[2])) 49 | else: 50 | melspectogram_db = melspectogram_db[:, :, :fixed_length] 51 | 52 | return melspectogram_db 53 | 54 | 55 | class DataSetBuilder: 56 | def __init__(self): 57 | self.configuration = { 58 | 'dataset_path': 'dataset' 59 | } 60 | task.connect(self.configuration) 61 | 62 | self.original_dataset = Dataset.get( 63 | dataset_project='examples/Urbansounds', 64 | dataset_name='UrbanSounds example', 65 | dataset_tags=['raw'], 66 | alias='Raw Dataset' 67 | ) 68 | # This will return the pandas dataframe we added in the previous task 69 | self.metadata = Task.get_task(task_id=self.original_dataset._task.id).artifacts['metadata'].get() 70 | # This will download the data and return a local path to the data 71 | self.original_dataset_path = \ 72 | Path(self.original_dataset.get_mutable_local_copy(self.configuration['dataset_path'], overwrite=True)) 73 | 74 | # Prepare a preprocessor that will handle each sample one by one 75 | self.preprocessor = PreProcessor() 76 | 77 | # Get ready for the new one 78 | self.preprocessed_dataset = None 79 | 80 | def log_dataset_statistics(self): 81 | histogram_data = self.metadata['label'].value_counts() 82 | self.preprocessed_dataset.get_logger().report_table( 83 | title='Raw Dataset Metadata', 84 | series='Raw Dataset Metadata', 85 | table_plot=self.metadata 86 | ) 87 | self.preprocessed_dataset.get_logger().report_histogram( 88 | title='Class distribution', 89 | series='Class distribution', 90 | values=histogram_data, 91 | iteration=0, 92 | xlabels=histogram_data.index.tolist(), 93 | yaxis='Amount of samples' 94 | ) 95 | 96 | def build_dataset(self): 97 | # Let's create a new dataset that is a child of the original one 98 | # We'll add the preprocessed samples to the original dataset, leading to a new version 99 | # Providing the parent dataset allows us to keep a clear lineage of our data 100 | self.preprocessed_dataset = Dataset.create( 101 | dataset_name='UrbanSounds example', 102 | dataset_project='examples/Urbansounds', 103 | dataset_tags=["preprocessed"], 104 | parent_datasets=[self.original_dataset.id] 105 | ) 106 | 107 | # loop through the metadata entries and preprocess each sample, then add some of them as debug samples to 108 | # manually double check in the UI that everything has worked (you can watch the spectrogram and listen to the 109 | # audio side by side in the debug sample UI) 110 | for i, (_, data) in tqdm(enumerate(self.metadata.iterrows())): 111 | _, audio_file_path, label = data.tolist() 112 | sample, sample_freq = torchaudio.load(self.original_dataset_path / audio_file_path, normalize=True) 113 | spectrogram = self.preprocessor.preprocess_sample(sample, sample_freq) 114 | # Get only the filename and replace the extension, we're saving an image here 115 | new_file_name = os.path.basename(audio_file_path).replace('.wav', '.npy') 116 | # Get the correct folder, basically the original dataset folder + the new filename 117 | spectrogram_path = self.original_dataset_path / os.path.dirname(audio_file_path) / new_file_name 118 | # Save the numpy array to disk 119 | np.save(spectrogram_path, spectrogram) 120 | 121 | # Log every 10th sample as a debug sample to the UI, so we can manually check it 122 | if i % 10 == 0: 123 | # Convert the numpy array to a viewable JPEG 124 | rgb_image = mpl.colormaps['viridis'](spectrogram[0, :, :].detach().numpy() * 255)[:, :, :3] 125 | title = os.path.splitext(os.path.basename(audio_file_path))[0] 126 | 127 | # Report the image and the original sound, so they can be viewed side by side 128 | self.preprocessed_dataset.get_logger().report_image( 129 | title=title, 130 | series='spectrogram', 131 | image=rgb_image 132 | ) 133 | self.preprocessed_dataset.get_logger().report_media( 134 | title=title, 135 | series='original_audio', 136 | local_path=self.original_dataset_path / audio_file_path 137 | ) 138 | # The original data path will now also have the spectrograms in its filetree. 139 | # So that's why we add it here to fill up the new dataset with. 140 | self.preprocessed_dataset.add_files(self.original_dataset_path) 141 | # Again add some visualizations to the task 142 | self.log_dataset_statistics() 143 | # We still want the metadata 144 | self.preprocessed_dataset._task.upload_artifact(name='metadata', artifact_object=self.metadata) 145 | self.preprocessed_dataset.finalize(auto_upload=True) 146 | 147 | 148 | if __name__ == '__main__': 149 | datasetbuilder = DataSetBuilder() 150 | datasetbuilder.build_dataset() 151 | -------------------------------------------------------------------------------- /urbansounds8k/requirements.txt: -------------------------------------------------------------------------------- 1 | clearml>=1.3.0 2 | torchaudio>=0.10.2 3 | torch>=1.10.2 4 | pandas>=1.3 5 | torchvision 6 | matplotlib 7 | tqdm 8 | sklearn 9 | tensorboard -------------------------------------------------------------------------------- /urbansounds8k/training.py: -------------------------------------------------------------------------------- 1 | import PIL 2 | import io 3 | import os 4 | from tempfile import gettempdir 5 | import matplotlib.pyplot as plt 6 | from torchvision import models 7 | from sklearn.metrics import ConfusionMatrixDisplay, f1_score 8 | from torchvision.transforms import ToTensor 9 | import torchaudio 10 | import torch 11 | import torch.optim as optim 12 | from torch.utils.data import Dataset as TorchDataset 13 | import torch.nn as nn 14 | from torch.utils.tensorboard import SummaryWriter 15 | import numpy as np 16 | from argparse import ArgumentParser 17 | 18 | from clearml import Task, Dataset 19 | task = Task.init(project_name='examples/Urbansounds', 20 | task_name='training') 21 | 22 | 23 | parser = ArgumentParser() 24 | parser.add_argument('--dropout', type=float, default=0.30) 25 | parser.add_argument('--base_lr', type=float, default=0.002) 26 | parser.add_argument('--number_of_epochs', type=int, default=10) 27 | parser.add_argument('--batch_size', type=int, default=4) 28 | 29 | args = parser.parse_args() 30 | 31 | 32 | configuration_dict = { 33 | 'dropout': args.dropout, 34 | 'base_lr': args.base_lr, 35 | 'number_of_epochs': args.number_of_epochs, 36 | 'batch_size': args.batch_size 37 | } 38 | 39 | 40 | class ClearMLDataLoader(TorchDataset): 41 | def __init__(self, dataset_name, project_name, folder_filter): 42 | clearml_dataset = Dataset.get( 43 | dataset_name=dataset_name, 44 | dataset_project=project_name, 45 | dataset_tags=["preprocessed"], 46 | alias='Preprocessed Dataset' 47 | ) 48 | self.img_dir = clearml_dataset.get_local_copy() 49 | self.img_metadata = Task.get_task(task_id=clearml_dataset.id).artifacts['metadata'].get() 50 | self.img_metadata = self.img_metadata[self.img_metadata['fold'].isin(folder_filter)] 51 | # We just removed some rows by filtering on class, this will make gaps in the dataframe index 52 | # (e.g. 57 won't exist anymore) so we reindex to make it a full range again, otherwise we'll get errors later 53 | # when selecting a row by index 54 | self.img_metadata = self.img_metadata.reset_index(drop=True) 55 | 56 | def __len__(self): 57 | return len(self.img_metadata) 58 | 59 | def __getitem__(self, idx): 60 | sound_path = os.path.join(self.img_dir, self.img_metadata.loc[idx, 'filepath']) 61 | img_path = sound_path.replace('.wav', '.npy') 62 | image = np.load(img_path) 63 | label = self.img_metadata.loc[idx, 'label'] 64 | return sound_path, image, label 65 | 66 | 67 | train_dataset = ClearMLDataLoader('UrbanSounds example', 'examples/Urbansounds', set(range(1, 10))) 68 | test_dataset = ClearMLDataLoader('UrbanSounds example', 'examples/Urbansounds', {10}) 69 | print(len(train_dataset), len(test_dataset)) 70 | train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=configuration_dict.get('batch_size', 4), 71 | shuffle=True, pin_memory=True, num_workers=1) 72 | test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=configuration_dict.get('batch_size', 4), 73 | shuffle=False, pin_memory=False, num_workers=1) 74 | 75 | classes = ['air_conditioner', 'car_horn', 'children_playing', 'dog_bark', 'drilling', 76 | 'engine_idling', 'gun_shot', 'jackhammer', 'siren', 'street_music'] 77 | 78 | 79 | model = models.resnet18(pretrained=True) 80 | model.conv1 = nn.Conv2d(1, model.conv1.out_channels, kernel_size=model.conv1.kernel_size[0], 81 | stride=model.conv1.stride[0], padding=model.conv1.padding[0]) 82 | num_ftrs = model.fc.in_features 83 | model.fc = nn.Sequential(*[nn.Dropout(p=configuration_dict.get('dropout', 0.25)), nn.Linear(num_ftrs, len(classes))]) 84 | 85 | optimizer = optim.SGD(model.parameters(), lr=configuration_dict.get('base_lr', 0.001), momentum=0.9) 86 | scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=configuration_dict.get('number_of_epochs') // 3, gamma=0.1) 87 | criterion = nn.CrossEntropyLoss() 88 | 89 | device = torch.cuda.current_device() if torch.cuda.is_available() else torch.device('cpu') 90 | print('Device to use: {}'.format(device)) 91 | model.to(device) 92 | 93 | tensorboard_writer = SummaryWriter('./tensorboard_logs') 94 | 95 | 96 | def plot_signal(signal, title, cmap=None): 97 | fig = plt.figure() 98 | if signal.ndim == 1: 99 | plt.plot(signal) 100 | else: 101 | plt.imshow(signal, cmap=cmap) 102 | plt.title(title) 103 | 104 | plot_buf = io.BytesIO() 105 | plt.savefig(plot_buf, format='jpeg') 106 | plot_buf.seek(0) 107 | plt.close(fig) 108 | return ToTensor()(PIL.Image.open(plot_buf)) 109 | 110 | 111 | def train(model, epoch): 112 | model.train() 113 | for batch_idx, (_, inputs, labels) in enumerate(train_loader): 114 | inputs = inputs.to(device) 115 | labels = labels.to(device) 116 | 117 | # zero the parameter gradients 118 | optimizer.zero_grad() 119 | 120 | # forward + backward + optimize 121 | outputs = model(inputs) 122 | _, predicted = torch.max(outputs, 1) 123 | loss = criterion(outputs, labels) 124 | loss.backward() 125 | optimizer.step() 126 | 127 | iteration = epoch * len(train_loader) + batch_idx 128 | if batch_idx % log_interval == 0: # print training stats 129 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}' 130 | .format(epoch, batch_idx * len(inputs), len(train_loader), 131 | 100. * batch_idx / len(train_loader), loss)) 132 | tensorboard_writer.add_scalar('training loss/loss', loss, iteration) 133 | tensorboard_writer.add_scalar('learning rate/lr', optimizer.param_groups[0]['lr'], iteration) 134 | 135 | if batch_idx % debug_interval == 0: # report debug image every "debug_interval" mini-batches 136 | for n, (inp, pred, label) in enumerate(zip(inputs, predicted, labels)): 137 | series = 'label_{}_pred_{}'.format(classes[label.cpu()], classes[pred.cpu()]) 138 | tensorboard_writer.add_image('Train MelSpectrogram samples/{}_{}_{}'.format(batch_idx, n, series), 139 | plot_signal(inp.cpu().numpy().squeeze(), series, 'hot'), iteration) 140 | 141 | 142 | def test_model(model, epoch): 143 | model.eval() 144 | all_predictions = [] 145 | all_labels = [] 146 | with torch.no_grad(): 147 | for idx, (sound_paths, inputs, labels) in enumerate(test_loader): 148 | inputs = inputs.to(device) 149 | labels = labels.to(device) 150 | 151 | outputs = model(inputs) 152 | 153 | _, predicted = torch.max(outputs, 1) 154 | for pred, label in zip(predicted.cpu(), labels.cpu()): 155 | all_predictions.append(int(pred)) 156 | all_labels.append(int(label)) 157 | 158 | iteration = (epoch + 1) * len(train_loader) 159 | if idx % debug_interval == 0: # report debug image every "debug_interval" mini-batches 160 | 161 | for n, (sound_path, inp, pred, label) in enumerate(zip(sound_paths, inputs, predicted, labels)): 162 | sound, sample_rate = torchaudio.load(sound_path, normalize=True) 163 | series = 'label_{}_pred_{}'.format(classes[label.cpu()], classes[pred.cpu()]) 164 | tensorboard_writer.add_audio('Test audio samples/{}_{}_{}'.format(idx, n, series), 165 | sound.reshape(1, -1), iteration, int(sample_rate)) 166 | tensorboard_writer.add_image('Test MelSpectrogram samples/{}_{}_{}'.format(idx, n, series), 167 | plot_signal(inp.cpu().numpy().squeeze(), series, 'hot'), iteration) 168 | 169 | tensorboard_writer.add_scalar('f1_score/total', 170 | f1_score(all_labels, all_predictions, average='weighted'), iteration) 171 | ConfusionMatrixDisplay.from_predictions(all_labels, all_predictions) 172 | 173 | 174 | log_interval = 10 175 | debug_interval = 25 176 | for epoch in range(configuration_dict.get('number_of_epochs', 10)): 177 | train(model, epoch) 178 | test_model(model, epoch) 179 | scheduler.step() 180 | 181 | # save model 182 | torch.save(model, os.path.join(gettempdir(), "urbansounds_model.pt")) 183 | --------------------------------------------------------------------------------