├── .gitignore
├── LICENSE
├── README.md
├── asteroid_detection
    ├── README.md
    ├── dashboards
    │   └── asteroid.json
    ├── data
    │   ├── database.py
    │   └── nasa.csv
    ├── demo
    │   ├── inference.ipynb
    │   └── inference.py
    ├── get_data.py
    ├── global_config.py
    ├── images
    │   ├── alert.png
    │   ├── dashboard.png
    │   ├── diagram.png
    │   └── pipeline.png
    ├── model_training.py
    ├── pipeline.py
    ├── preprocess_data.py
    ├── preprocess_serving.py
    └── requirements.txt
├── how_much_data_do_you_really_need
    ├── README.md
    ├── category_prevalence.py
    ├── create_subsets.py
    ├── predict_diminishing_returns.ipynb
    └── utils.py
├── once_upon_a_repository
    ├── README.md
    ├── engines.py
    ├── inference_with_model.py
    ├── requirements.txt
    ├── torchvision_references
    │   ├── coco_eval.py
    │   ├── coco_utils.py
    │   └── utils.py
    ├── train_model.py
    ├── transforms.py
    └── utilities.py
├── setting_up_allegroai_platform
    ├── pytorch.mnist_trains.py
    └── requirements.txt
├── the_hero_rises
    ├── README.md
    ├── SSD
    │   ├── __init__.py
    │   ├── box_coder.py
    │   ├── multibox_loss.py
    │   └── ssd_model.py
    ├── engines.py
    ├── inference_with_model.py
    ├── requirements.txt
    ├── torchvision_references
    │   ├── __init__.py
    │   ├── coco_eval.py
    │   ├── coco_utils.py
    │   └── utils.py
    ├── train_model.py
    ├── transforms.py
    └── utilities.py
└── urbansounds8k
    ├── .gitignore
    ├── README.md
    ├── assets
        └── diagram.png
    ├── get_data.py
    ├── preprocessing.py
    ├── requirements.txt
    └── training.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | 
 10 | dataset
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | pip-wheel-metadata/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # IPython
 84 | profile_default/
 85 | ipython_config.py
 86 | 
 87 | # pyenv
 88 | .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 
134 | # pycharm
135 | .idea/
136 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # trains-blogs
 2 | 
 3 | This repository contains the codebase mentioned and used in trains' blogs.
 4 | 
 5 | The list of blogs includes:
 6 | 
 7 | * [How Much Data Do You Really Need?: Quantifying Diminishing Returns of Annotated Data](how_much_data_do_you_really_need/)
 8 | * The Hero's Journey to Deep Learning CodeBase
 9 |     * [Blog I: Once Upon a Repository: How to Write Readable, Maintainable Code with PyTorch](once_upon_a_repository/)
10 |     * Blog IIA: The Battle between Speed & Accuracy: Single-Shot vs Two-Shot Detection
11 |     * [Blog IIB: The Hero Rises: Build Your Own SSD](the_hero_rises/)
12 |     * Blog III: Flying with Anchors: Optimize SSD to Your Data
13 |     * Blog IV: Happily Ever Deployed: ...
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/asteroid_detection/README.md:
--------------------------------------------------------------------------------
 1 | # Asteroid Hazard Detection Example
 2 | 
 3 | ## Repository overview
 4 | ![repo diagram](images/diagram.png)
 5 | This is the big picture overview of the whole setup and the different files that are part of it.
 6 | 
 7 | `global_config.py` holds the project name so it can be easily changed project-wide.
 8 | 
 9 | ### Setup ClearML (will only take like 2 minutes)
10 | https://clear.ml/docs/latest/docs/getting_started/ds/ds_first_steps
11 | 
12 | You should now have some application credentials you can use to connect to the experiment manager and the model serving engine. Everything is open source, so you can also setup your own server!
13 | 
14 | ### Experimentation Phase
15 | `get_data.py` will take the csv file in `data/nasa.csv`, query on it as if it were a database (for demo purposes) and upload the resulting file to a ClearML versioned dataset.
16 | 
17 | `preprocessing.py` will take that new dataset, preprocess the data inside into X and y dataframes and add these data as csv files to a new version of the dataset.
18 | 
19 | `model_training.py` will ingest that preprocessed version of the dataset, get the X and y data and train an XGBoost model on it.
20 | 
21 | All three of these files will be tracked using the ClearML experiment manager as well as the 2 datasets which are tracked with ClearML-data
22 | 
23 | ### Productionizing Phase
24 | `pipeline.py` is a ClearML pipelinecontroller that will take the above 3 tracked scripts and chain them together in a pipleline.
25 | ![pipeline UI example](images/pipeline.png)
26 | 
27 | NOTE: running the pipeline requires at least 1 active ClearML agent running on the same or a remote machine, so it can execute enqueued tasks.
28 | 
29 | ### Deployment Phase
30 | ClearML serving works primarily through a CLI interface, so there is no code that sets it up.
31 | 
32 | `preprocess_serving.py` however, is used by the CLI to tell ClearML serving what pre- and postprocessing should be done when data is sent to and from the model serving engine.
33 | 
34 | To learn how to setup serving in detail, check out the [clearml-serving repository](https://github.com/allegroai/clearml-serving). But for this repo, these steps should get you started:
35 | 
36 | 
37 | 
38 | NOTE: Make sure clearml-serving is installed in the virtualenvironment you're using.
39 | 
40 | 1. Create a serving task, note down the ID, we'll need it a lot
41 | ```
42 | clearml-serving create --name "asteroid serving"
43 | ```
44 | 
45 | 2. Set up the serving stack, in this case using docker-compose (could also be k8s).
46 | 
47 | Clone the clearml-serving repository
48 | ```
49 | git clone https://github.com/allegroai/clearml-serving.git
50 | ```
51 | 
52 | Edit the environment variables in `docker/example.env` to include your ClearML credentials and the serving task ID we copied from step 1
53 | 
54 | 3. If you need any extra python packages installed, set them as comma separated list in the environment variable `CLEARML_EXTRA_PYTHON_PACKAGES` or overwrite the variable in the `docker-compose.yml` file
55 | 
56 | 4. Fire it up!
57 | ```
58 | cd docker && docker-compose --env-file example.env -f docker-compose.yml up
59 | ```
60 | 
61 | 5. To deploy the model, go back to this repository and run
62 | ```
63 | clearml-serving --id __YOUR_SERVING_ID__  model add --engine xgboost --endpoint "asteroid" --preprocess "preprocess_serving.py" --name "model training - best_model" --project "Asteroid Hazard Classification"
64 | ```
65 | Or change your model name or project depending on if you're using your own or just following along here.
66 | 
67 | 6. Before you can start to monitor the deployed model we need to tell ClearML which metrics it should log. For that also use the CLI:
68 | ```
69 | clearml-serving --id __YOUR_SERVING_ID__  metrics add --endpoint "asteroid" --variable-scalar "Absolute Magnitude=11.139,18.14,25.12,32.1"
70 | ```
71 | ```
72 | clearml-serving --id __YOUR_SERVING_ID__  metrics add --endpoint "asteroid" --variable-scalar "Minimum Orbit Intersection=-0.000476,0.159,0.319,0.478"
73 | ```
74 | 7. Go to `http://localhost:3000/` and login to grafana with the default admin:admin combo, then immediately change it.
75 | Now you can go to dashboards and import the dashboard from this repository called `asteroid.json`
76 | Feel free to add alerts and change settings as much as you like :)
77 | 
78 | 8. The grafana alert used in the demo (very simple and NOT very good) can be made like this:
79 | ```
80 | ((100 * increase(asteroid:Minimum_Orbit_Intersection_bucket{}[1m]) / increase(asteroid:Minimum_Orbit_Intersection_sum{}[1m])) - (100 * increase(asteroid:Minimum_Orbit_Intersection_bucket{}[10m]) / increase(asteroid:Minimum_Orbit_Intersection_sum{}[10m]))) ^ 2
81 | ```
82 | You can changethe feature name as well if you want to
83 | ![Grafana screenshot](images/dashboard.png)
84 | ![Alert screenshot](images/alert.png)
85 | 
86 | ### Testing everything
87 | You can use both the `inference.py` script or the `inference.ipynb` to send some mock data. The notebooks should be ran at least once to create the mock data.
88 | 
89 | 


--------------------------------------------------------------------------------
/asteroid_detection/dashboards/asteroid.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "annotations": {
  3 |       "list": [
  4 |         {
  5 |           "builtIn": 1,
  6 |           "datasource": "-- Grafana --",
  7 |           "enable": true,
  8 |           "hide": true,
  9 |           "iconColor": "rgba(0, 211, 255, 1)",
 10 |           "name": "Annotations & Alerts",
 11 |           "target": {
 12 |             "limit": 100,
 13 |             "matchAny": false,
 14 |             "tags": [],
 15 |             "type": "dashboard"
 16 |           },
 17 |           "type": "dashboard"
 18 |         }
 19 |       ]
 20 |     },
 21 |     "editable": true,
 22 |     "fiscalYearStartMonth": 0,
 23 |     "graphTooltip": 0,
 24 |     "id": 1,
 25 |     "links": [],
 26 |     "liveNow": false,
 27 |     "panels": [
 28 |       {
 29 |         "cards": {},
 30 |         "color": {
 31 |           "cardColor": "#b4ff00",
 32 |           "colorScale": "sqrt",
 33 |           "colorScheme": "interpolateSpectral",
 34 |           "exponent": 0.5,
 35 |           "mode": "spectrum"
 36 |         },
 37 |         "dataFormat": "tsbuckets",
 38 |         "datasource": {
 39 |           "type": "prometheus",
 40 |           "uid": "PBFA97CFB590B2093"
 41 |         },
 42 |         "description": "",
 43 |         "gridPos": {
 44 |           "h": 12,
 45 |           "w": 12,
 46 |           "x": 0,
 47 |           "y": 0
 48 |         },
 49 |         "heatmap": {},
 50 |         "hideZeroBuckets": false,
 51 |         "highlightCards": true,
 52 |         "id": 2,
 53 |         "legend": {
 54 |           "show": false
 55 |         },
 56 |         "reverseYBuckets": false,
 57 |         "targets": [
 58 |           {
 59 |             "datasource": {
 60 |               "type": "prometheus",
 61 |               "uid": "PBFA97CFB590B2093"
 62 |             },
 63 |             "exemplar": true,
 64 |             "expr": "100 * increase(asteroid:Minimum_Orbit_Intersection_bucket{}[1m]) / increase(asteroid:Minimum_Orbit_Intersection_sum{}[1m])",
 65 |             "interval": "",
 66 |             "legendFormat": "{{le}}",
 67 |             "refId": "A"
 68 |           }
 69 |         ],
 70 |         "title": "Minimum Orbit Intersection Distribution",
 71 |         "tooltip": {
 72 |           "show": true,
 73 |           "showHistogram": false
 74 |         },
 75 |         "type": "heatmap",
 76 |         "xAxis": {
 77 |           "show": true
 78 |         },
 79 |         "yAxis": {
 80 |           "format": "short",
 81 |           "logBase": 1,
 82 |           "show": true
 83 |         },
 84 |         "yBucketBound": "auto"
 85 |       },
 86 |       {
 87 |         "cards": {},
 88 |         "color": {
 89 |           "cardColor": "#b4ff00",
 90 |           "colorScale": "sqrt",
 91 |           "colorScheme": "interpolateSpectral",
 92 |           "exponent": 0.5,
 93 |           "mode": "spectrum"
 94 |         },
 95 |         "dataFormat": "tsbuckets",
 96 |         "datasource": {
 97 |           "type": "prometheus",
 98 |           "uid": "PBFA97CFB590B2093"
 99 |         },
100 |         "gridPos": {
101 |           "h": 12,
102 |           "w": 12,
103 |           "x": 12,
104 |           "y": 0
105 |         },
106 |         "heatmap": {},
107 |         "hideZeroBuckets": false,
108 |         "highlightCards": true,
109 |         "id": 4,
110 |         "legend": {
111 |           "show": false
112 |         },
113 |         "reverseYBuckets": false,
114 |         "targets": [
115 |           {
116 |             "datasource": {
117 |               "type": "prometheus",
118 |               "uid": "PBFA97CFB590B2093"
119 |             },
120 |             "exemplar": true,
121 |             "expr": "100 * increase(asteroid:Absolute_Magnitude_bucket{}[1m]) / increase(asteroid:Absolute_Magnitude_sum{}[1m])",
122 |             "format": "time_series",
123 |             "interval": "",
124 |             "legendFormat": "{{le}}",
125 |             "refId": "A"
126 |           }
127 |         ],
128 |         "title": "Absolute Magnitude Distribution",
129 |         "tooltip": {
130 |           "show": true,
131 |           "showHistogram": false
132 |         },
133 |         "type": "heatmap",
134 |         "xAxis": {
135 |           "show": true
136 |         },
137 |         "yAxis": {
138 |           "format": "short",
139 |           "logBase": 1,
140 |           "show": true
141 |         },
142 |         "yBucketBound": "auto"
143 |       }
144 |     ],
145 |     "refresh": "5s",
146 |     "schemaVersion": 35,
147 |     "style": "dark",
148 |     "tags": [],
149 |     "templating": {
150 |       "list": []
151 |     },
152 |     "time": {
153 |       "from": "now-3h",
154 |       "to": "now"
155 |     },
156 |     "timepicker": {},
157 |     "timezone": "",
158 |     "title": "Asteroid Dashboard",
159 |     "uid": "axuPWb9nz",
160 |     "version": 4,
161 |     "weekStart": ""
162 |   }


--------------------------------------------------------------------------------
/asteroid_detection/data/database.py:
--------------------------------------------------------------------------------
 1 | """This is a mock module and should be replaced with your actual database connector."""
 2 | from pathlib import Path
 3 | import pandas as pd
 4 | from pandasql import sqldf
 5 | from datetime import datetime, timedelta
 6 | 
 7 | 
 8 | def query_database_to_df(query='SELECT * FROM asteroids'):
 9 |     # Get the data as CSV
10 |     data_path = Path('data/nasa.csv')
11 |     out_path = Path('/tmp/nasa.csv')
12 | 
13 |     # Create a dataframe as mock for the database
14 |     asteroids = pd.read_csv(data_path)
15 | 
16 |     # Add some mock dates
17 |     asteroids['date'] = [datetime.now() - i*timedelta(days=1) for i in range(len(asteroids))]
18 | 
19 |     # Query the df base on the argument
20 |     asteroids = sqldf(query, locals())
21 | 
22 |     # Save resulting DF to disk so it can be added to a clearml dataset as a file
23 |     asteroids.to_csv(out_path)
24 | 
25 |     return asteroids, out_path


--------------------------------------------------------------------------------
/asteroid_detection/demo/inference.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import time
 3 | import random
 4 | import argparse
 5 | import pandas as pd
 6 | from pathlib import Path
 7 | 
 8 | parser = argparse.ArgumentParser(description='Run inference data to serving.')
 9 | parser.add_argument('--drift', action='store_true', help='Send drifted data instead of normal data.')
10 | args = parser.parse_args()
11 | 
12 | data_path = Path('data/')
13 | 
14 | synthetic_df = pd.read_csv(data_path / 'synthetic.csv')
15 | synthetic_df_drift = pd.read_csv(data_path / 'synthetic_drift.csv')
16 | 
17 | if args.drift:
18 |     print('Sending drifted data')
19 |     df = synthetic_df_drift
20 | else:
21 |     print("Sending normal data")
22 |     df = synthetic_df
23 | 
24 | while True:
25 |     response = requests.post(
26 |         url='http://127.0.0.1:8080/serve/asteroid',
27 |         headers={'accept': 'application/json', 'Content-Type': 'application/json'},
28 |         json=df.loc[random.randint(0, len(df) - 1), :].to_dict()
29 |     )
30 |     if response.status_code != 200:
31 |         print(f"Bad request! {response.content}")
32 |     time.sleep(random.randrange(0, 1))


--------------------------------------------------------------------------------
/asteroid_detection/get_data.py:
--------------------------------------------------------------------------------
 1 | from clearml import Task, Dataset
 2 | 
 3 | import global_config
 4 | from data import database
 5 | 
 6 | 
 7 | task = Task.init(
 8 |     project_name=global_config.PROJECT_NAME,
 9 |     task_name='get data',
10 |     task_type='data_processing',
11 |     reuse_last_task_id=False
12 | )
13 | 
14 | config = {
15 |     'query_date': '2022-01-01'
16 | }
17 | task.connect(config)
18 | 
19 | 
20 | # Get the data and a path to the file
21 | query = 'SELECT * FROM asteroids WHERE strftime("%Y-%m-%d", `date`) <= strftime("%Y-%m-%d", "{}")'.format(config['query_date'])
22 | df, data_path = database.query_database_to_df(query=query)
23 | print(f"Dataset downloaded to: {data_path}")
24 | print(df.head())
25 | 
26 | # Create a ClearML dataset
27 | dataset = Dataset.create(
28 |     dataset_name='raw_asteroid_dataset',
29 |     dataset_project=global_config.PROJECT_NAME
30 | )
31 | # Add the local files we downloaded earlier
32 | dataset.add_files(data_path)
33 | # Let's add some cool graphs as statistics in the plots section!
34 | dataset.get_logger().report_table(title='Asteroid Data', series='head', table_plot=df.head())
35 | # Finalize and upload the data and labels of the dataset
36 | dataset.finalize(auto_upload=True)
37 | 
38 | print(f"Created dataset with ID: {dataset.id}")
39 | print(f"Data size: {len(df)}")
40 | 


--------------------------------------------------------------------------------
/asteroid_detection/global_config.py:
--------------------------------------------------------------------------------
1 | PROJECT_NAME = 'Project Team NASA'
2 | PIPELINE_NAME = 'NASA Pipeline'


--------------------------------------------------------------------------------
/asteroid_detection/images/alert.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clearml/clearml-blogs/c0a10f52de341e7feedc1bd718ff0539f98fdced/asteroid_detection/images/alert.png


--------------------------------------------------------------------------------
/asteroid_detection/images/dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clearml/clearml-blogs/c0a10f52de341e7feedc1bd718ff0539f98fdced/asteroid_detection/images/dashboard.png


--------------------------------------------------------------------------------
/asteroid_detection/images/diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clearml/clearml-blogs/c0a10f52de341e7feedc1bd718ff0539f98fdced/asteroid_detection/images/diagram.png


--------------------------------------------------------------------------------
/asteroid_detection/images/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clearml/clearml-blogs/c0a10f52de341e7feedc1bd718ff0539f98fdced/asteroid_detection/images/pipeline.png


--------------------------------------------------------------------------------
/asteroid_detection/model_training.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import pandas as pd
 5 | import xgboost as xgb
 6 | from clearml import Dataset, Task
 7 | from sklearn.metrics import accuracy_score, recall_score
 8 | from sklearn.model_selection import train_test_split
 9 | from xgboost import plot_importance
10 | 
11 | # Connecting ClearML with the current process,
12 | # from here on everything is logged automatically
13 | import global_config
14 | 
15 | task = Task.init(
16 |     project_name=global_config.PROJECT_NAME,
17 |     task_name='model training',
18 |     output_uri=True
19 | )
20 | 
21 | # Set default docker
22 | task.set_base_docker(docker_image="python:3.7")
23 | 
24 | # Training args
25 | training_args = {
26 |     'eval_metric': "rmse",
27 |     'objective': 'reg:squarederror',
28 |     'test_size': 0.2,
29 |     'random_state': 42,
30 |     'num_boost_round': 100
31 | }
32 | task.connect(training_args)
33 | 
34 | # Load our Dataset
35 | local_path = Dataset.get(
36 |     dataset_name='preprocessed_asteroid_dataset',
37 |     dataset_project=global_config.PROJECT_NAME
38 | ).get_local_copy()
39 | local_path = Path(local_path)
40 | # local_path = Path('data/preprocessed_data')
41 | X = pd.read_csv(local_path / 'X.csv', index_col=0)
42 | y = pd.read_csv(local_path / 'y.csv', index_col=0)
43 | 
44 | # Split data
45 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=training_args['test_size'], random_state=training_args['random_state'])
46 | dtrain = xgb.DMatrix(X_train, label=y_train)
47 | dtest = xgb.DMatrix(X_test, label=y_test)
48 | 
49 | # Train
50 | bst = xgb.train(
51 |     training_args,
52 |     dtrain,
53 |     num_boost_round=training_args['num_boost_round'],
54 |     evals=[(dtrain, "train"), (dtest, "test")],
55 |     verbose_eval=0
56 | )
57 | 
58 | bst.save_model("best_model")
59 | plot_importance(bst)
60 | plt.show()
61 | 
62 | preds = bst.predict(dtest)
63 | predictions = [round(value) for value in preds]
64 | accuracy = accuracy_score(y_test['Hazardous'].to_list(), predictions)
65 | recall = recall_score(y_test['Hazardous'].to_list(), predictions)
66 | print(f"Model trained with accuracy: {accuracy} and recall: {recall}")
67 | # Save the actual accuracy as an artifact so we can get it as part of the pipeline
68 | task.get_logger().report_scalar(
69 |     title='Performance',
70 |     series='Accuracy',
71 |     value=accuracy,
72 |     iteration=0
73 | )
74 | task.get_logger().report_scalar(
75 |     title='Performance',
76 |     series='Recall',
77 |     value=recall,
78 |     iteration=0
79 | )
80 | print("Done")
81 | 


--------------------------------------------------------------------------------
/asteroid_detection/pipeline.py:
--------------------------------------------------------------------------------
  1 | from platform import node
  2 | from clearml import Task
  3 | from clearml.automation import PipelineController
  4 | 
  5 | import global_config
  6 | 
  7 | 
  8 | def pre_execute_callback_example(a_pipeline, a_node, current_param_override):
  9 |     # type (PipelineController, PipelineController.Node, dict) -> bool
 10 |     print('Cloning Task id={} with parameters: {}'.format(a_node.base_task_id, current_param_override))
 11 |     # if we want to skip this node (and subtree of this node) we return False
 12 |     # return True to continue DAG execution
 13 |     return True
 14 | 
 15 | 
 16 | def post_execute_callback_example(a_pipeline, a_node):
 17 |     # type (PipelineController, PipelineController.Node) -> None
 18 |     print('Completed Task id={}'.format(a_node.executed))
 19 |     # if we need the actual executed Task: Task.get_task(task_id=a_node.executed)
 20 |     return
 21 | 
 22 | 
 23 | def compare_metrics_and_publish_best(**kwargs):
 24 |     from clearml import OutputModel
 25 |     # Keep track of best node details
 26 |     current_best = dict()
 27 | 
 28 |     # For each incoming node, compare against current best
 29 |     for node_name, training_task_id in kwargs.items():
 30 |         # Get the original task based on the ID we got from the pipeline
 31 |         task = Task.get_task(task_id=training_task_id)
 32 |         accuracy = task.get_reported_scalars()['Performance']['Accuracy']['y'][0]
 33 |         model_id = task.get_models()['output'][0].id
 34 |         # Check if accuracy is better than current best, if so, overwrite current best
 35 |         if accuracy > current_best.get('accuracy', 0):
 36 |             current_best['accuracy'] = accuracy
 37 |             current_best['node_name'] = node_name
 38 |             current_best['model_id'] = model_id
 39 |             print(f"New current best model: {node_name}")
 40 | 
 41 |     # Print the final best model details and log it as an output model on this step
 42 |     print(f"Final best model: {current_best}")
 43 |     OutputModel(name="best_pipeline_model", base_model_id=current_best.get('model_id'), tags=['pipeline_winner'])
 44 | 
 45 | 
 46 | # Connecting ClearML with the current pipeline,
 47 | # from here on everything is logged automatically
 48 | pipe = PipelineController(
 49 |     name=global_config.PIPELINE_NAME,
 50 |     project=global_config.PROJECT_NAME,
 51 |     version='0.0.1'
 52 | )
 53 | 
 54 | pipe.set_default_execution_queue('CPU Queue')
 55 | pipe.add_parameter('training_seeds', [42, 420, 500])
 56 | pipe.add_parameter('query_date', '2022-01-01')
 57 | 
 58 | pipe.add_step(
 59 |     name='get_data',
 60 |     base_task_project=global_config.PROJECT_NAME,
 61 |     base_task_name='get data',
 62 |     parameter_override={'General/query_date': '${pipeline.query_date}'}
 63 | )
 64 | pipe.add_step(
 65 |     name='preprocess_data',
 66 |     parents=['get_data'],
 67 |     base_task_project=global_config.PROJECT_NAME,
 68 |     base_task_name='preprocess data',
 69 |     pre_execute_callback=pre_execute_callback_example,
 70 |     post_execute_callback=post_execute_callback_example
 71 | )
 72 | training_nodes = []
 73 | # Seeds should be pipeline arguments
 74 | # Don't change these when doing new run
 75 | for i, random_state in enumerate(pipe.get_parameters()['training_seeds']):
 76 |     node_name = f'model_training_{i}'
 77 |     training_nodes.append(node_name)
 78 |     pipe.add_step(
 79 |         name=node_name,
 80 |         parents=['preprocess_data'],
 81 |         base_task_project=global_config.PROJECT_NAME,
 82 |         base_task_name='model training',
 83 |         parameter_override={'General/num_boost_round': 250,
 84 |                             'General/test_size': 0.5,
 85 |                             'General/random_state': random_state}
 86 |     )
 87 | 
 88 | pipe.add_function_step(
 89 |     name='select_best_model',
 90 |     parents=training_nodes,
 91 |     function=compare_metrics_and_publish_best,
 92 |     function_kwargs={node_name: '${%s.id}' % node_name for node_name in training_nodes},
 93 |     monitor_models=["best_pipeline_model"]
 94 | )
 95 | 
 96 | 
 97 | # for debugging purposes use local jobs
 98 | # pipe.start_locally(run_pipeline_steps_locally=True)
 99 | # Starting the pipeline (in the background)
100 | pipe.start()
101 | 
102 | print('Done!')
103 | 


--------------------------------------------------------------------------------
/asteroid_detection/preprocess_data.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | from pathlib import Path
 3 | 
 4 | import pandas as pd
 5 | from clearml import Dataset, Task
 6 | 
 7 | import global_config
 8 | 
 9 | task = Task.init(
10 |     project_name=global_config.PROJECT_NAME,
11 |     task_name='preprocess data',
12 |     task_type='data_processing',
13 |     reuse_last_task_id=False
14 | )
15 | 
16 | # Create the folder we'll output the preprocessed data into
17 | preprocessed_data_folder = Path('/tmp')
18 | if not os.path.exists(preprocessed_data_folder):
19 |     os.makedirs(preprocessed_data_folder)
20 | 
21 | # Get the dataset
22 | dataset = Dataset.get(
23 |     dataset_project=global_config.PROJECT_NAME,
24 |     dataset_name='raw_asteroid_dataset',
25 | )
26 | local_folder = dataset.get_local_copy()
27 | print(f"Using dataset ID: {dataset.id}")
28 | 
29 | # Clean up the data a little bit
30 | df = pd.read_csv((Path(local_folder) / 'nasa.csv'))
31 | df['avg_dia'] = df[['Est Dia in KM(min)', 'Est Dia in KM(max)']].mean(axis=1)
32 | X = df[['Absolute Magnitude', 'avg_dia', 'Relative Velocity km per hr', 'Miss Dist.(kilometers)', 'Orbit Uncertainity',
33 |         'Minimum Orbit Intersection', 'Jupiter Tisserand Invariant', 'Epoch Osculation', 'Eccentricity', 'Semi Major Axis',
34 |         'Inclination', 'Asc Node Longitude', 'Orbital Period', 'Perihelion Distance', 'Perihelion Arg',
35 |         'Aphelion Dist', 'Perihelion Time', 'Mean Anomaly', 'Mean Motion']]
36 | X.to_csv(path_or_buf=preprocessed_data_folder / 'X.csv')
37 | print(f"Preprocessed data X")
38 | print(X.head())
39 | 
40 | y = pd.DataFrame(df['Hazardous'].astype(int))
41 | y.to_csv(path_or_buf=preprocessed_data_folder / 'y.csv')
42 | print(f"Preprocessed data y")
43 | print(y.head())
44 | 
45 | # Create a new version of the dataset, which is cleaned up
46 | new_dataset = Dataset.create(
47 |     dataset_project=dataset.project,
48 |     dataset_name='preprocessed_asteroid_dataset',
49 |     parent_datasets=[dataset]
50 | )
51 | new_dataset.add_files(preprocessed_data_folder / 'X.csv')
52 | new_dataset.add_files(preprocessed_data_folder / 'y.csv')
53 | new_dataset.get_logger().report_table(title='X data', series='head', table_plot=X.head())
54 | new_dataset.get_logger().report_table(title='y data', series='head', table_plot=y.head())
55 | new_dataset.finalize(auto_upload=True)
56 | 
57 | # Log to console which dataset ID was created
58 | print(f"Created preprocessed dataset with ID: {new_dataset.id}")
59 | 


--------------------------------------------------------------------------------
/asteroid_detection/preprocess_serving.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | import pandas as pd
 4 | import numpy as np
 5 | import xgboost as xgb
 6 | 
 7 | 
 8 | # Notice Preprocess class Must be named "Preprocess"
 9 | class Preprocess(object):
10 |     def __init__(self):
11 |         # set internal state, this will be called only once. (i.e. not per request)
12 |         pass
13 | 
14 |     def preprocess(self, body: dict, state: dict, collect_custom_statistics_fn=None) -> Any:
15 |         df = pd.DataFrame(columns=body.keys())
16 |         df.loc[0] = body.values()
17 |         df['avg_dia'] = df[['Est Dia in KM(min)', 'Est Dia in KM(max)']].mean(axis=1)
18 |         X = df[['Absolute Magnitude', 'avg_dia', 'Relative Velocity km per hr', 'Miss Dist.(kilometers)', 'Orbit Uncertainity',
19 |                 'Minimum Orbit Intersection', 'Jupiter Tisserand Invariant', 'Epoch Osculation', 'Eccentricity', 'Semi Major Axis',
20 |                 'Inclination', 'Asc Node Longitude', 'Orbital Period', 'Perihelion Distance', 'Perihelion Arg',
21 |                 'Aphelion Dist', 'Perihelion Time', 'Mean Anomaly', 'Mean Motion']]
22 |         # we expect to get four valid numbers on the dict: x0, x1, x2, x3
23 |         return xgb.DMatrix(X)
24 | 
25 |     def postprocess(self, data: Any, state: dict, collect_custom_statistics_fn=None) -> dict:
26 |         # post process the data returned from the model inference engine
27 |         # data is the return value from model.predict we will put is inside a return value as Y
28 |         return dict(y=round(data[0]), y_raw=float(data[0]))
29 | 


--------------------------------------------------------------------------------
/asteroid_detection/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy~=1.24.1
2 | clearml~=1.9.1
3 | scikit-learn~=1.2.1
4 | xgboost~=1.7.3
5 | pandas~=1.5.3
6 | matplotlib~=3.6.3
7 | requests~=2.28.2
8 | pandasql~=0.7.3


--------------------------------------------------------------------------------
/how_much_data_do_you_really_need/README.md:
--------------------------------------------------------------------------------
1 | # How Much Data Do You Really Need?
2 | ## [Quantifying Diminishing Returns of Annotated Data](https://towardsdatascience.com/how-much-data-do-you-really-need-8c02a59394b)
3 | 
4 | Deep learning models are notorious for their endless appetite for training data. The process of acquiring high quality annotated data consumes many types of resources — mostly cash. The growing amounts of data as the machine learning projects progress, lead to other undesired consequences, such as slowing down all of R&D. Therefore, veteran project leaders always look at the overall performance gains brought upon by additional increments of their dataset. More often than not, especially if the new data is relatively similar to the existing one, one will encounter the phenomena of Diminishing Returns.
5 | 
6 | The law of diminishing returns states that when you continuously add more and more input in a productive process, it will actually yield progressively smaller increases in output. This phenomena was mentioned by 18th century economist such as Turgot and Adam Smith and articulated in 1815 by the British economist David Ricardo. When addressing the influence of training data volume on model performance, the law of diminishing returns suggests that each increment in train set size will tend to contribute less to the predetermined success metrics.
7 | 
8 | When a project leader is able to monitor, and even quantify, the diminishing returns effect in their machine learning project, they are able to attain finer degrees of control throughout its lifetime. For example: estimating how much data is required to reach the project goal; avoiding redundant training sessions; or even predicting whether the current model architecture will be able to achieve the target metric. This knowledge effectively provides a tool for optimal management of time, manpower, and computing resources.


--------------------------------------------------------------------------------
/how_much_data_do_you_really_need/category_prevalence.py:
--------------------------------------------------------------------------------
  1 | """
  2 | PYTHON VERSION: python3.6
  3 | 
  4 | Calculating and visualizing categories_prevalence in a dataset.
  5 | The dataset metadeta should be given as a json file in either COCO of BDD format.
  6 | 
  7 | Usage:
  8 |     1. JSON_PATH=path/to/bdd100k_labels_images_train.json
  9 |     category_prevalence.py --dataset-format BDD --json-path JSON_PATH
 10 |     2. JSON_PATH=path/to/coco/annotations/instances_train2017.json
 11 |     category_prevalence.py --dataset-format COCO --json-path JSON_PATH --wanted-categories person,bike,bird,dog
 12 | 
 13 | Requirements:
 14 |     - trains
 15 |     - numpy
 16 |     - seaborn
 17 | """
 18 | import argparse
 19 | import json
 20 | from pathlib import Path
 21 | 
 22 | import numpy as np
 23 | import matplotlib.pyplot as plt
 24 | import seaborn as sns
 25 | from utils import voco_categories, bdd_things_categories
 26 | from trains import Task
 27 | 
 28 | 
 29 | Task.init(
 30 |     project_name="Quantify diminishing Returns",
 31 |     task_name="Class Distribution without axis labels",
 32 | )
 33 | 
 34 | 
 35 | def bdd_class_distribution(json_path: Path) -> dict:
 36 |     """
 37 |     Create and save a dictionary, with the key being the image name and the value the metadata.
 38 |     :param json_path: path to the BDD labels json file.
 39 |     :return: A dictionary. Key: Category name. Value: number of appearances.
 40 |     """
 41 |     category_dict = {}
 42 |     with open(json_path, "r") as f:
 43 |         entries_list = np.asarray(json.load(f))
 44 |         for entry in entries_list:
 45 |             for label in entry["labels"]:
 46 |                 category = label["category"]
 47 | 
 48 |                 category_dict[category] = category_dict.get(category, 0) + 1
 49 |     return category_dict
 50 | 
 51 | 
 52 | def coco_id_to_category_name(categories: dict) -> dict:
 53 |     """
 54 |     Creates a dictionary that gives the category name given its COCO id.
 55 |     :param categories: the categories dictionary from COCO's JSON file.
 56 |     :return: Dictionary: id -> category_name.
 57 |     """
 58 |     id_to_name_dict = {entry["id"]: entry["name"] for entry in categories}
 59 |     return id_to_name_dict
 60 | 
 61 | 
 62 | def coco_class_distribution(json_path: Path) -> dict:
 63 |     """
 64 |     Counts number of accurences for each class in a dataset metadeta coded
 65 |     in COCO-style JSON (COCO's instance JSON file).
 66 |     :param json_path: Path to the JSON file contains the metadata.
 67 |     :return:  A dictionary. Key: Category name. Value: number of appearances.
 68 |     """
 69 |     category_dict = {}
 70 |     with open(json_path, "r") as f:
 71 |         data_dict = json.load(f)
 72 |         annotation_list = data_dict["annotations"]
 73 |         categories = data_dict["categories"]
 74 |         id_to_category_dict = coco_id_to_category_name(categories)
 75 |         for entry in annotation_list:
 76 |             category = id_to_category_dict[entry["category_id"]]
 77 |             category_dict[category] = category_dict.get(category, 0) + 1
 78 |     return category_dict
 79 | 
 80 | 
 81 | def plot_doughnut(category_dict: dict):
 82 |     """
 83 |     Plots a doughnut chart of the categories prevalence.
 84 |     :param category_dict: category name -> number of occurrence.
 85 |     """
 86 |     fig, ax = plt.subplots(figsize=(8, 4), subplot_kw=dict(aspect="equal"))
 87 |     fractions = list(category_dict.values())
 88 |     wedges, texts = ax.pie(fractions, wedgeprops=dict(width=0.5), startangle=-40)
 89 |     legend_labels = [
 90 |         f"{label}: {fraction}" for label, fraction in category_dict.items()
 91 |     ]
 92 |     ax.legend(
 93 |         wedges,
 94 |         legend_labels,
 95 |         title="Categories",
 96 |         loc="center left",
 97 |         bbox_to_anchor=(1, 0, 0.5, 1),
 98 |     )
 99 |     ax.set_title("Class prevalence - BDD")
100 |     plt.savefig(fname="class prevalence", dpi=200)
101 |     plt.show()
102 | 
103 | 
104 | ###########
105 | ## Plots ##
106 | ###########
107 | 
108 | 
109 | def plot_hist(category_dict: dict):
110 |     """
111 |     Plot labels histogram using seaborn.
112 |     """
113 |     labels = list(category_dict)
114 |     fractions = list(category_dict.values())
115 |     sns.barplot(x=labels, y=fractions)
116 |     plt.show()
117 | 
118 | 
119 | def plot_bars_matplotlib(category_dict: dict):
120 |     """
121 |     Plot labels histogram using matplotlib.
122 |     """
123 |     labels = list(category_dict)
124 |     fractions = list(category_dict.values())
125 |     index = np.arange(len(labels))
126 |     plt.bar(index, fractions)
127 |     plt.xlabel("Class", fontsize=5)
128 |     plt.ylabel("Number of Appearances", fontsize=5)
129 |     plt.xticks(index, labels, fontsize=5, rotation=80)
130 |     plt.title("Class Prevalence")
131 |     plt.show()
132 | 
133 | 
134 | def plot_bars(category_dict: dict):
135 |     """
136 |     Plot barplots which appears nicely on trains server as plotly object.
137 |     :param category_dict:
138 |     :return:
139 |     """
140 |     labels = list(category_dict)
141 |     fractions = list(category_dict.values())
142 |     plt.bar(labels, fractions)
143 |     plt.xlabel("Class")
144 |     plt.ylabel("Number of Appearances")
145 |     plt.title("Class Prevalence")
146 |     plt.show()
147 | 
148 | 
149 | def class_sieve(category_dict: dict, to_keep: set):
150 |     """
151 |     In-place function that leave in 'category_dict'
152 |     only the counting of the classes appearing in 'to_keep'
153 |     :param category_dict: Key: Category name. Value: number of appearances.
154 |     :param to_keep: set of labels to keep in 'category dict'
155 |     """
156 |     return {key: value for key, value in category_dict.items() if key in to_keep}
157 | 
158 | 
159 | def parse_args():
160 |     parser = argparse.ArgumentParser(description=__doc__)
161 |     parser.add_argument(
162 |         "--dataset-format",
163 |         choices=["COCO", "BDD"],
164 |         help="the format of the dataset metadeta",
165 |     )
166 |     parser.add_argument(
167 |         "--json-path",
168 |         type=Path,
169 |         help="Path to the metadata, saved in json format. "
170 |         "For example, in the BDD dataset, bdd100k_labels_images_train.json "
171 |         "or bdd100k_labels_images_validation.json"
172 |         "files are possible inputs.",
173 |     )
174 |     parser.add_argument(
175 |         "--wanted-categories",
176 |         help="The categories on which you wish to calculate the statistics,"
177 |         "separated by a comma."
178 |         "If None, all the categories of the dataset will be considered.",
179 |     )
180 |     return parser.parse_args()
181 | 
182 | 
183 | def main():
184 |     args = parse_args()
185 |     if args.dataset_format == "COCO":
186 |         category_count = coco_class_distribution(args.json_path)
187 |         if not args.wanted_categories:
188 |             wanted_categoris = voco_categories
189 |     elif args.dataset_format == "BDD":
190 |         category_count = bdd_class_distribution(args.json_path)
191 |         if not args.wanted_categories:
192 |             wanted_categoris = bdd_things_categories
193 |     if args.wanted_categories:
194 |         wanted_categoris = args.wanted_categories.split(",")
195 |     category_count = class_sieve(category_count, wanted_categoris)
196 |     # plot_bars(category_count)
197 |     plot_doughnut(category_count)
198 | 
199 | 
200 | if __name__ == "__main__":
201 |     main()
202 | 


--------------------------------------------------------------------------------
/how_much_data_do_you_really_need/create_subsets.py:
--------------------------------------------------------------------------------
  1 | """
  2 | PYTHON VERSION: python3.6
  3 | 
  4 | Create subsets of a dataset's metadeta or data. Each subsets contains the smaller predecessor subsets.
  5 | For example, all the data within a subset of 1% of the data, presented in the 10%-subset as well.
  6 | All the in the data within the 10%-subset is contained within the 20%-subset and so on.
  7 | 
  8 | Usage:
  9 |     category_prevalence.py --original-json-path  path/to/bdd100k_labels_images_train.json --output-directory path/to/output/directory
 10 | 
 11 | Requirements:
 12 |     - trains
 13 |     - numpy
 14 | """
 15 | import json
 16 | from argparse import ArgumentParser
 17 | from typing import Sequence
 18 | 
 19 | import numpy as np
 20 | from pathlib import Path
 21 | 
 22 | 
 23 | def get_datafile_and_number_of_entries(json_file, dataset_fomat: str):
 24 |     """
 25 |     Given a read json file and a dataset format, this function
 26 |     return the metadata in the usable format and counts how many entries are there in the metadata.
 27 |     :param json_file: Metadata content in a python dictionary.
 28 |     :param dataset_fomat: format of the dataset metadata
 29 |     :return: tuple: (datafile, number of entries in this datafile)
 30 |     """
 31 |     if dataset_fomat == "BDD":
 32 |         datafile = np.asarray(json_file)
 33 |         return datafile, len(datafile)
 34 |     elif dataset_fomat == "COCO":
 35 |         return json_file, len(json_file["images"])
 36 | 
 37 | 
 38 | def get_sub_dataset(
 39 |     image_array,
 40 |     entries_array: np.ndarray,
 41 |     fraction: float,
 42 |     number_of_entries: int,
 43 |     dataset_format: str,
 44 |     annotations_array=None,
 45 |     data_dict=None,
 46 | ):
 47 |     if dataset_format == "BDD":
 48 |         return list(image_array[entries_array[: int(fraction * number_of_entries)]])
 49 |     elif dataset_format == "COCO":
 50 |         image_entry_list = image_array[
 51 |             entries_array[: int(fraction * number_of_entries)]
 52 |         ]
 53 |         annotation_entry_list = annotations_array[
 54 |             entries_array[: int(fraction * number_of_entries)]
 55 |         ]
 56 |         sub_dataset_dict = {
 57 |             "info": data_dict["info"],
 58 |             "licenses": data_dict["licenses"],
 59 |             "images": list(image_entry_list),
 60 |             "annotations": list(annotation_entry_list),
 61 |             "categories": data_dict["categories"],
 62 |         }
 63 |         return sub_dataset_dict
 64 | 
 65 | 
 66 | def create_subsets(
 67 |     input_json_path: Path,
 68 |     output_directory: Path,
 69 |     fraction_array: Sequence[float],
 70 |     dataset_format: str,
 71 | ):
 72 |     """
 73 |     Creates sub sets of BDD metadata.
 74 |     :param input_json_path: BDD labels JSON file.
 75 |     :param output_directory: Folder to save the BDD metadata sub-sets.
 76 |     :param fraction_array: Array contains the sizes of the sub datasets.
 77 |     The sizes are brought as fractions of the original dataset.
 78 |     """
 79 |     with open(input_json_path, "r") as f:
 80 |         datafile, number_of_entries = get_datafile_and_number_of_entries(
 81 |             json.load(f), dataset_format
 82 |         )
 83 |     entries_array = np.random.permutation(number_of_entries)
 84 |     data_dict = datafile if dataset_format == "COCO" else None
 85 |     image_array = (
 86 |         np.asarray(data_dict["images"]) if dataset_format == "COCO" else datafile
 87 |     )
 88 |     annotations_array = (
 89 |         np.asarray(data_dict["annotations"]) if dataset_format == "COCO" else None
 90 |     )
 91 |     for fraction in fraction_array:
 92 |         with open(output_directory / f"fraction_of_{fraction}", "w") as outfile:
 93 |             sub_dataset = get_sub_dataset(
 94 |                 image_array=image_array,
 95 |                 entries_array=entries_array,
 96 |                 fraction=fraction,
 97 |                 number_of_entries=number_of_entries,
 98 |                 dataset_format=dataset_format,
 99 |                 annotations_array=annotations_array,
100 |                 data_dict=data_dict,
101 |             )
102 |             json.dump(obj=sub_dataset, fp=outfile)
103 | 
104 | 
105 | def parse_args():
106 |     parser = ArgumentParser(description=__doc__)
107 |     parser.add_argument(
108 |         "--dataset-format",
109 |         choices=["COCO", "BDD"],
110 |         help="the format of the dataset metadeta",
111 |     )
112 |     parser.add_argument(
113 |         "--original-json-path",
114 |         help="Path to json file. This file should hold all metadata (or data) instances"
115 |         " as entries in a single Python list",
116 |         type=Path,
117 |     )
118 |     parser.add_argument(
119 |         "--output-directory", type=Path, help="Folder to save the metadata sub-sets."
120 |     )
121 |     parser.add_argument(
122 |         "--fraction-array",
123 |         type=list,
124 |         default=[i / 10 for i in range(1, 11)],
125 |         help="Array contains the sizes of the sub datasets."
126 |         "    The sizes are brought as fractions of the original dataset.",
127 |     )
128 |     return parser.parse_args()
129 | 
130 | 
131 | def main():
132 |     args = parse_args()
133 |     create_subsets(
134 |         input_json_path=args.original_json_path,
135 |         output_directory=args.output_directory,
136 |         fraction_array=args.fraction_array,
137 |         dataset_format=args.dataset_format,
138 |     )
139 | 
140 | 
141 | if __name__ == "__main__":
142 |     main()
143 | 


--------------------------------------------------------------------------------
/how_much_data_do_you_really_need/utils.py:
--------------------------------------------------------------------------------
 1 | voco_categories = [
 2 |     "airplane",
 3 |     "bicycle",
 4 |     "bird",
 5 |     "boat",
 6 |     "bottle",
 7 |     "bus",
 8 |     "car",
 9 |     "cat",
10 |     "chair",
11 |     "cow",
12 |     "dining table",
13 |     "dog",
14 |     "horse",
15 |     "motorcycle",
16 |     "person",
17 |     "potted plant",
18 |     "sheep",
19 |     "couch",
20 |     "train",
21 |     "tv",
22 | ]
23 | 
24 | bdd_things_categories = [
25 |     "bike",
26 |     "bus",
27 |     "car",
28 |     "motor",
29 |     "person",
30 |     "traffic light",
31 |     "traffic sign",
32 |     "train",
33 |     "truck",
34 |     "rider"
35 | ]


--------------------------------------------------------------------------------
/once_upon_a_repository/README.md:
--------------------------------------------------------------------------------
1 | # The Hero’s Journey to Deep Learning CodeBase
2 | ## [Blog I: Once Upon a Repository: How to Write Readable, Maintainable Code with PyTorch](https://medium.com/p/once-upon-a-repository-how-to-write-readable-maintainable-code-with-pytorch-951f03f6a829?source=email-679430f47f06--writer.postDistributed&sk=3a6953df05559b11fbbc35a258e75ec0)
3 | 
4 | We all aim to write a maintainable and modular codebase that supports the R&D process from research to production. Key to an efficient and successful deep learning project, this is not an easy feat. That is why we decided to write this blog series -- to share our experience from numerous deep learning projects and demonstrate the way to achieve this goal using open source tools.
5 | 
6 | Our first post in this series is a tutorial on how to leverage the PyTorch ecosystem and Allegro Trains experiments manager to easily write a readable and maintainable computer vision code tailored for your needs. We focus on two packages from the PyTorch ecosystem, Torchvision and Ignite. Torchvision is a popular package consisting of popular datasets wrappers, model architectures, and common image transformations for computer vision. Ignite is a new library that enables simple and clean adding of metrics reports, early-stopping, model checkpointing and other features to your training loop. In this post, we write a codebase that trains and evaluates a Mask-RCNN model on the COCO dataset. We then register the training data (loss, accuracy, etc) to a Pytorch native Tensorboard and use Allegro Trains experiment & autoML manager to manage and track our training experiments. Through these steps, we achieve a seamless, organized, and productive model training flow.
7 | 


--------------------------------------------------------------------------------
/once_upon_a_repository/engines.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import copy
 3 | import torch
 4 | 
 5 | from ignite.engine import Engine
 6 | from torchvision_references import utils
 7 | 
 8 | 
 9 | def create_trainer(model, device):
10 |     def update_model(engine, batch):
11 |         images, targets = copy.deepcopy(batch)
12 |         images_model, targets_model = prepare_batch(batch, device=device)
13 | 
14 |         loss_dict = model(images_model, targets_model)
15 |         losses = sum(loss for loss in loss_dict.values())
16 | 
17 |         # reduce losses over all GPUs for logging purposes
18 |         loss_dict_reduced = utils.reduce_dict(loss_dict)
19 |         losses_reduced = sum(loss for loss in loss_dict_reduced.values())
20 | 
21 |         loss_value = losses_reduced.item()
22 | 
23 |         engine.state.optimizer.zero_grad()
24 |         if not math.isfinite(loss_value):
25 |             print("Loss is {}, resetting loss and skipping training iteration".format(loss_value))
26 |             print('Loss values were: ', loss_dict_reduced)
27 |             print('Input labels were: ', [target['labels'] for target in targets])
28 |             print('Input boxes were: ', [target['boxes'] for target in targets])
29 |             loss_dict_reduced = {k: torch.tensor(0) for k, v in loss_dict_reduced.items()}
30 |         else:
31 |             losses.backward()
32 |             engine.state.optimizer.step()
33 | 
34 |         if engine.state.warmup_scheduler is not None:
35 |             engine.state.warmup_scheduler.step()
36 | 
37 |         images_model = targets_model = None
38 | 
39 |         return images, targets, loss_dict_reduced
40 |     return Engine(update_model)
41 | 
42 | 
43 | def create_evaluator(model, device):
44 |     def update_model(engine, batch):
45 |         images, targets = prepare_batch(batch, device=device)
46 |         images_model = copy.deepcopy(images)
47 | 
48 |         torch.cuda.synchronize()
49 |         with torch.no_grad():
50 |             outputs = model(images_model)
51 | 
52 |         outputs = [{k: v.to(device) for k, v in t.items()} for t in outputs]
53 | 
54 |         res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
55 |         engine.state.coco_evaluator.update(res)
56 | 
57 |         images_model = outputs = None
58 | 
59 |         return images, targets, res
60 |     return Engine(update_model)
61 | 
62 | 
63 | def prepare_batch(batch, device=None):
64 |     images, targets = batch
65 |     images = list(image.to(device, non_blocking=True) for image in images)
66 |     targets = [{k: v.to(device, non_blocking=True) for k, v in t.items()} for t in targets]
67 |     return images, targets
68 | 


--------------------------------------------------------------------------------
/once_upon_a_repository/inference_with_model.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import glob
  3 | import numpy as np
  4 | 
  5 | import torch
  6 | from torch.utils.tensorboard import SummaryWriter
  7 | 
  8 | from argparse import ArgumentParser
  9 | from pathlib2 import Path
 10 | 
 11 | from utilities import get_iou_types, draw_boxes, get_model_instance_segmentation, CocoLikeAnnotations
 12 | from torchvision_references import utils
 13 | from torchvision.transforms import functional as F
 14 | 
 15 | from PIL import Image
 16 | from transforms import get_transform
 17 | 
 18 | from trains import Task
 19 | task = Task.init(project_name='Object Detection with TRAINS, Ignite and TensorBoard',
 20 |                  task_name='Inference with trained model')
 21 | 
 22 | 
 23 | def rescale_box(box, image_size, orig_height, orig_width):
 24 |     rescale_height = float(orig_height) / image_size
 25 |     rescale_width = float(orig_width) / image_size
 26 |     box[:2] *= rescale_width
 27 |     box[2:] *= rescale_height
 28 |     return box
 29 | 
 30 | 
 31 | def run(task_args):
 32 |     writer = SummaryWriter(log_dir=task_args.log_dir)
 33 |     input_checkpoint = torch.load(task_args.input_checkpoint)
 34 |     labels_enum = input_checkpoint.get('labels_enumeration')
 35 |     model_configuration = input_checkpoint.get('configuration')
 36 |     model_weights = input_checkpoint.get('model')
 37 |     image_size = model_configuration.get('image_size')
 38 | 
 39 |     # Set the training device to GPU if available - if not set it to CPU
 40 |     device = torch.cuda.current_device() if torch.cuda.is_available() else torch.device('cpu')
 41 |     torch.backends.cudnn.benchmark = True if torch.cuda.is_available() else False  # optimization for fixed input size
 42 | 
 43 |     model = get_model_instance_segmentation(model_configuration.get('num_classes'),
 44 |                                             model_configuration.get('mask_predictor_hidden_layer'))
 45 | 
 46 |     # if there is more than one GPU, parallelize the model
 47 |     if torch.cuda.device_count() > 1:
 48 |         print("{} GPUs were detected - we will use all of them".format(torch.cuda.device_count()))
 49 |         model = torch.nn.DataParallel(model)
 50 | 
 51 |     # copy the model to each device
 52 |     model.to(device)
 53 | 
 54 |     # Define train and test datasets
 55 |     iou_types = get_iou_types(model)
 56 |     use_mask = True if "segm" in iou_types else False
 57 | 
 58 |     # Load pretrained model weights
 59 |     model.load_state_dict(model_weights)
 60 | 
 61 |     # set the model to inference mode
 62 |     model.eval()
 63 | 
 64 |     images_paths = []
 65 |     for file_type in ('*.png', '*.jpg', '*.jpeg'):
 66 |         images_paths.extend(glob.glob(os.path.join(task_args.input_dataset_root, file_type)))
 67 | 
 68 |     transforms = get_transform(train=False, image_size=image_size)
 69 | 
 70 |     path_to_json = os.path.join(task_args.output_dir, "inference_results.json")
 71 |     coco_like_anns = CocoLikeAnnotations()
 72 |     batch_images = []
 73 |     batch_paths = []
 74 |     batch_shapes = []
 75 | 
 76 |     for i, image_path in enumerate(images_paths):
 77 |         img = Image.open(image_path).convert('RGB')
 78 |         batch_shapes.append({'height': img.height, 'width': img.width})
 79 |         img, __ = transforms(img)
 80 |         batch_images.append(img)
 81 |         batch_paths.append(image_path)
 82 |         if len(batch_images) < task_args.batch_size:
 83 |             continue
 84 | 
 85 |         input_images = torch.stack(batch_images)
 86 | 
 87 |         with torch.no_grad():
 88 |             torch_out = model(input_images.to(device))
 89 | 
 90 |         for img_num, image in enumerate(input_images):
 91 |             valid_detections = torch_out[img_num].get('scores') >= args.detection_thresh
 92 |             img_boxes = torch_out[img_num].get('boxes')[valid_detections].cpu().numpy()
 93 |             img_labels_ids = torch_out[img_num].get('labels')[valid_detections].cpu().numpy()
 94 |             img_labels = [labels_enum[label]['name'] for label in img_labels_ids]
 95 |             image_id = (i + 1 - task_args.batch_size + img_num)
 96 |             orig_height = batch_shapes[img_num].get('height')
 97 |             orig_width = batch_shapes[img_num].get('width')
 98 | 
 99 |             coco_like_anns.update_images(file_name=Path(batch_paths[img_num]).name,
100 |                                          height=orig_height, width=orig_width,
101 |                                          id=image_id)
102 | 
103 |             for box, label, label_id in zip(img_boxes, img_labels, img_labels_ids):
104 |                 orig_box = rescale_box(image_size=image_size, orig_height=orig_height, orig_width=orig_width, box=box.copy())
105 |                 coco_like_anns.update_annotations(box=orig_box, label_id=label_id,
106 |                                                   image_id=image_id)
107 | 
108 |             if ((i+1)/task_args.batch_size) % task_args.log_interval == 0:
109 |                 print('Batch {}: Saving detections of file {} to {}'.format(int((i+1)/task_args.batch_size),
110 |                                                                             Path(batch_paths[img_num]).name,
111 |                                                                             path_to_json))
112 | 
113 |             if ((i+1)/task_args.batch_size) % task_args.debug_images_interval == 0:
114 |                 debug_image = draw_boxes(np.array(F.to_pil_image(image.cpu())), img_boxes, img_labels, color=(0, 150, 0))
115 |                 writer.add_image("inference/image_{}".format(img_num), debug_image, ((i+1)/task_args.batch_size),
116 |                                  dataformats='HWC')
117 | 
118 |         batch_images = []
119 |         batch_paths = []
120 | 
121 |     coco_like_anns.dump_to_json(path_to_json=path_to_json)
122 | 
123 | 
124 | if __name__ == "__main__":
125 |     parser = ArgumentParser()
126 |     parser.add_argument('--batch_size', type=int, default=4,
127 |                         help='input batch size for training and validation (default: 4)')
128 |     parser.add_argument('--detection_thresh', type=float, default=0.4,
129 |                         help='Inference confidence threshold')
130 |     parser.add_argument('--log_interval', type=int, default=100,
131 |                         help='how many batches to wait before logging training status')
132 |     parser.add_argument('--debug_images_interval', type=int, default=500,
133 |                         help='how many batches to wait before logging debug images')
134 |     parser.add_argument('--input_dataset_root', type=str,
135 |                         default='/media/dan/bigdata/datasets/coco/2017/val2017',
136 |                         help='annotation file of test dataset')
137 |     parser.add_argument('--input_checkpoint', type=str, default='/tmp/checkpoints/model_epoch_10.pth',
138 |                         help='Checkpoint to use for inference')
139 |     parser.add_argument("--output_dir", type=str, default="/tmp/inference_results",
140 |                         help="output directory for saving models checkpoints")
141 |     parser.add_argument("--log_dir", type=str, default="/tmp/tensorboard_logs",
142 |                         help="log directory for Tensorboard log output")
143 |     args = parser.parse_args()
144 | 
145 |     if not os.path.exists(args.output_dir):
146 |         utils.mkdir(args.output_dir)
147 |     if not os.path.exists(args.log_dir):
148 |         utils.mkdir(args.log_dir)
149 | 
150 |     run(args)
151 | 


--------------------------------------------------------------------------------
/once_upon_a_repository/requirements.txt:
--------------------------------------------------------------------------------
 1 | Pillow == 10.2.0
 2 | attrs == 19.3.0
 3 | numpy == 1.22.0
 4 | opencv_python == 4.2.0.32
 5 | pathlib2 == 2.3.5
 6 | pycocotools == 2.0.0
 7 | pytorch_ignite == 0.2.1
 8 | torch == 1.4.0
 9 | torchvision == 0.5.0
10 | trains == 0.13.2
11 | tensorboard==2.1.0
12 | 


--------------------------------------------------------------------------------
/once_upon_a_repository/torchvision_references/coco_eval.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | import numpy as np
  4 | import copy
  5 | import torch
  6 | import torch._six
  7 | 
  8 | from pycocotools.cocoeval import COCOeval
  9 | from pycocotools.coco import COCO
 10 | import pycocotools.mask as mask_util
 11 | 
 12 | from collections import defaultdict
 13 | 
 14 | from torchvision_references import utils
 15 | 
 16 | 
 17 | class CocoEvaluator(object):
 18 |     def __init__(self, coco_gt, iou_types):
 19 |         assert isinstance(iou_types, (list, tuple))
 20 |         coco_gt = copy.deepcopy(coco_gt)
 21 |         self.coco_gt = coco_gt
 22 | 
 23 |         self.iou_types = iou_types
 24 |         self.coco_eval = {}
 25 |         for iou_type in iou_types:
 26 |             self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)
 27 | 
 28 |         self.img_ids = []
 29 |         self.eval_imgs = {k: [] for k in iou_types}
 30 | 
 31 |     def update(self, predictions):
 32 |         img_ids = list(np.unique(list(predictions.keys())))
 33 |         self.img_ids.extend(img_ids)
 34 | 
 35 |         for iou_type in self.iou_types:
 36 |             results = self.prepare(predictions, iou_type)
 37 |             coco_dt = loadRes(self.coco_gt, results) if results else COCO()
 38 |             coco_eval = self.coco_eval[iou_type]
 39 | 
 40 |             coco_eval.cocoDt = coco_dt
 41 |             coco_eval.params.imgIds = list(img_ids)
 42 |             img_ids, eval_imgs = evaluate(coco_eval)
 43 | 
 44 |             self.eval_imgs[iou_type].append(eval_imgs)
 45 | 
 46 |     def synchronize_between_processes(self):
 47 |         for iou_type in self.iou_types:
 48 |             self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
 49 |             create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])
 50 | 
 51 |     def accumulate(self):
 52 |         for coco_eval in self.coco_eval.values():
 53 |             coco_eval.accumulate()
 54 | 
 55 |     def summarize(self):
 56 |         for iou_type, coco_eval in self.coco_eval.items():
 57 |             print("IoU metric: {}".format(iou_type))
 58 |             coco_eval.summarize()
 59 | 
 60 |     def prepare(self, predictions, iou_type):
 61 |         if iou_type == "bbox":
 62 |             return self.prepare_for_coco_detection(predictions)
 63 |         elif iou_type == "segm":
 64 |             return self.prepare_for_coco_segmentation(predictions)
 65 |         elif iou_type == "keypoints":
 66 |             return self.prepare_for_coco_keypoint(predictions)
 67 |         else:
 68 |             raise ValueError("Unknown iou type {}".format(iou_type))
 69 | 
 70 |     def prepare_for_coco_detection(self, predictions):
 71 |         coco_results = []
 72 |         for original_id, prediction in predictions.items():
 73 |             if len(prediction) == 0:
 74 |                 continue
 75 | 
 76 |             boxes = prediction["boxes"]
 77 |             boxes = convert_to_xywh(boxes).tolist()
 78 |             scores = prediction["scores"].tolist()
 79 |             labels = prediction["labels"].tolist()
 80 | 
 81 |             coco_results.extend(
 82 |                 [
 83 |                     {
 84 |                         "image_id": original_id,
 85 |                         "category_id": labels[k],
 86 |                         "bbox": box,
 87 |                         "score": scores[k],
 88 |                     }
 89 |                     for k, box in enumerate(boxes)
 90 |                 ]
 91 |             )
 92 |         return coco_results
 93 | 
 94 |     def prepare_for_coco_segmentation(self, predictions):
 95 |         coco_results = []
 96 |         for original_id, prediction in predictions.items():
 97 |             if len(prediction) == 0:
 98 |                 continue
 99 | 
100 |             masks = prediction["masks"]
101 |             masks = (masks > 0.5).type(torch.uint8)
102 | 
103 |             scores = prediction["scores"].tolist()
104 |             labels = prediction["labels"].tolist()
105 | 
106 |             rles = [
107 |                 mask_util.encode(np.array(mask.cpu()[0, :, :, np.newaxis], order="F"))[0]
108 |                 for mask in masks
109 |             ]
110 |             for rle in rles:
111 |                 rle["counts"] = rle["counts"].decode("utf-8")
112 | 
113 |             coco_results.extend(
114 |                 [
115 |                     {
116 |                         "image_id": original_id,
117 |                         "category_id": labels[k],
118 |                         "segmentation": rle,
119 |                         "score": scores[k],
120 |                     }
121 |                     for k, rle in enumerate(rles)
122 |                 ]
123 |             )
124 |         return coco_results
125 | 
126 |     def prepare_for_coco_keypoint(self, predictions):
127 |         coco_results = []
128 |         for original_id, prediction in predictions.items():
129 |             if len(prediction) == 0:
130 |                 continue
131 | 
132 |             boxes = prediction["boxes"]
133 |             boxes = convert_to_xywh(boxes).tolist()
134 |             scores = prediction["scores"].tolist()
135 |             labels = prediction["labels"].tolist()
136 |             keypoints = prediction["keypoints"]
137 |             keypoints = keypoints.flatten(start_dim=1).tolist()
138 | 
139 |             coco_results.extend(
140 |                 [
141 |                     {
142 |                         "image_id": original_id,
143 |                         "category_id": labels[k],
144 |                         'keypoints': keypoint,
145 |                         "score": scores[k],
146 |                     }
147 |                     for k, keypoint in enumerate(keypoints)
148 |                 ]
149 |             )
150 |         return coco_results
151 | 
152 | 
153 | def convert_to_xywh(boxes):
154 |     xmin, ymin, xmax, ymax = boxes.unbind(1)
155 |     return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
156 | 
157 | 
158 | def merge(img_ids, eval_imgs):
159 |     all_img_ids = utils.all_gather(img_ids)
160 |     all_eval_imgs = utils.all_gather(eval_imgs)
161 | 
162 |     merged_img_ids = []
163 |     for p in all_img_ids:
164 |         merged_img_ids.extend(p)
165 | 
166 |     merged_eval_imgs = []
167 |     for p in all_eval_imgs:
168 |         merged_eval_imgs.append(p)
169 | 
170 |     merged_img_ids = np.array(merged_img_ids)
171 |     merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)
172 | 
173 |     # keep only unique (and in sorted order) images
174 |     merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
175 |     merged_eval_imgs = merged_eval_imgs[..., idx]
176 | 
177 |     return merged_img_ids, merged_eval_imgs
178 | 
179 | 
180 | def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
181 |     img_ids, eval_imgs = merge(img_ids, eval_imgs)
182 |     img_ids = list(img_ids)
183 |     eval_imgs = list(eval_imgs.flatten())
184 | 
185 |     coco_eval.evalImgs = eval_imgs
186 |     coco_eval.params.imgIds = img_ids
187 |     coco_eval._paramsEval = copy.deepcopy(coco_eval.params)
188 | 
189 | 
190 | #################################################################
191 | # From pycocotools, just removed the prints and fixed
192 | # a Python3 bug about unicode not defined
193 | #################################################################
194 | 
195 | # Ideally, pycocotools wouldn't have hard-coded prints
196 | # so that we could avoid copy-pasting those two functions
197 | 
198 | def createIndex(self):
199 |     # create index
200 |     # print('creating index...')
201 |     anns, cats, imgs = {}, {}, {}
202 |     imgToAnns, catToImgs = defaultdict(list), defaultdict(list)
203 |     if 'annotations' in self.dataset:
204 |         for ann in self.dataset['annotations']:
205 |             imgToAnns[ann['image_id']].append(ann)
206 |             anns[ann['id']] = ann
207 | 
208 |     if 'images' in self.dataset:
209 |         for img in self.dataset['images']:
210 |             imgs[img['id']] = img
211 | 
212 |     if 'categories' in self.dataset:
213 |         for cat in self.dataset['categories']:
214 |             cats[cat['id']] = cat
215 | 
216 |     if 'annotations' in self.dataset and 'categories' in self.dataset:
217 |         for ann in self.dataset['annotations']:
218 |             catToImgs[ann['category_id']].append(ann['image_id'])
219 | 
220 |     # print('index created!')
221 | 
222 |     # create class members
223 |     self.anns = anns
224 |     self.imgToAnns = imgToAnns
225 |     self.catToImgs = catToImgs
226 |     self.imgs = imgs
227 |     self.cats = cats
228 | 
229 | 
230 | maskUtils = mask_util
231 | 
232 | 
233 | def loadRes(self, resFile):
234 |     """
235 |     Load result file and return a result api object.
236 |     :param   resFile (str)     : file name of result file
237 |     :return: res (obj)         : result api object
238 |     """
239 |     res = COCO()
240 |     res.dataset['images'] = [img for img in self.dataset['images']]
241 | 
242 |     # print('Loading and preparing results...')
243 |     # tic = time.time()
244 |     if isinstance(resFile, torch._six.string_classes):
245 |         anns = json.load(open(resFile))
246 |     elif type(resFile) == np.ndarray:
247 |         anns = self.loadNumpyAnnotations(resFile)
248 |     else:
249 |         anns = resFile
250 |     assert type(anns) == list, 'results in not an array of objects'
251 |     annsImgIds = [ann['image_id'] for ann in anns]
252 |     assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
253 |         'Results do not correspond to current coco set'
254 |     if 'caption' in anns[0]:
255 |         imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
256 |         res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
257 |         for id, ann in enumerate(anns):
258 |             ann['id'] = id + 1
259 |     elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
260 |         res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
261 |         for id, ann in enumerate(anns):
262 |             bb = ann['bbox']
263 |             x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
264 |             if 'segmentation' not in ann:
265 |                 ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
266 |             ann['area'] = bb[2] * bb[3]
267 |             ann['id'] = id + 1
268 |             ann['iscrowd'] = 0
269 |     elif 'segmentation' in anns[0]:
270 |         res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
271 |         for id, ann in enumerate(anns):
272 |             # now only support compressed RLE format as segmentation results
273 |             ann['area'] = maskUtils.area(ann['segmentation'])
274 |             if 'bbox' not in ann:
275 |                 ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
276 |             ann['id'] = id + 1
277 |             ann['iscrowd'] = 0
278 |     elif 'keypoints' in anns[0]:
279 |         res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
280 |         for id, ann in enumerate(anns):
281 |             s = ann['keypoints']
282 |             x = s[0::3]
283 |             y = s[1::3]
284 |             x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y)
285 |             ann['area'] = (x2 - x1) * (y2 - y1)
286 |             ann['id'] = id + 1
287 |             ann['bbox'] = [x1, y1, x2 - x1, y2 - y1]
288 |     # print('DONE (t={:0.2f}s)'.format(time.time()- tic))
289 | 
290 |     res.dataset['annotations'] = anns
291 |     createIndex(res)
292 |     return res
293 | 
294 | 
295 | def evaluate(self):
296 |     '''
297 |     Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
298 |     :return: None
299 |     '''
300 |     # tic = time.time()
301 |     # print('Running per image evaluation...')
302 |     p = self.params
303 |     # add backward compatibility if useSegm is specified in params
304 |     if p.useSegm is not None:
305 |         p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
306 |         print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
307 |     # print('Evaluate annotation type *{}*'.format(p.iouType))
308 |     p.imgIds = list(np.unique(p.imgIds))
309 |     if p.useCats:
310 |         p.catIds = list(np.unique(p.catIds))
311 |     p.maxDets = sorted(p.maxDets)
312 |     self.params = p
313 | 
314 |     self._prepare()
315 |     # loop through images, area range, max detection number
316 |     catIds = p.catIds if p.useCats else [-1]
317 | 
318 |     if p.iouType == 'segm' or p.iouType == 'bbox':
319 |         computeIoU = self.computeIoU
320 |     elif p.iouType == 'keypoints':
321 |         computeIoU = self.computeOks
322 |     self.ious = {
323 |         (imgId, catId): computeIoU(imgId, catId)
324 |         for imgId in p.imgIds
325 |         for catId in catIds}
326 | 
327 |     evaluateImg = self.evaluateImg
328 |     maxDet = p.maxDets[-1]
329 |     evalImgs = [
330 |         evaluateImg(imgId, catId, areaRng, maxDet)
331 |         for catId in catIds
332 |         for areaRng in p.areaRng
333 |         for imgId in p.imgIds
334 |     ]
335 |     # this is NOT in the pycocotools code, but could be done outside
336 |     evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))
337 |     self._paramsEval = copy.deepcopy(self.params)
338 |     # toc = time.time()
339 |     # print('DONE (t={:0.2f}s).'.format(toc-tic))
340 |     return p.imgIds, evalImgs
341 | 
342 | #################################################################
343 | # end of straight copy from pycocotools, just removing the prints
344 | #################################################################
345 | 


--------------------------------------------------------------------------------
/once_upon_a_repository/torchvision_references/coco_utils.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import os
  3 | 
  4 | import torch
  5 | import torch.utils.data
  6 | import torchvision
  7 | 
  8 | from pycocotools import mask as coco_mask
  9 | from pycocotools.coco import COCO
 10 | 
 11 | import transforms as T
 12 | 
 13 | 
 14 | class FilterAndRemapCocoCategories(object):
 15 |     def __init__(self, categories, remap=True):
 16 |         self.categories = categories
 17 |         self.remap = remap
 18 | 
 19 |     def __call__(self, image, target):
 20 |         anno = target["annotations"]
 21 |         anno = [obj for obj in anno if obj["category_id"] in self.categories]
 22 |         if not self.remap:
 23 |             target["annotations"] = anno
 24 |             return image, target
 25 |         anno = copy.deepcopy(anno)
 26 |         for obj in anno:
 27 |             obj["category_id"] = self.categories.index(obj["category_id"])
 28 |         target["annotations"] = anno
 29 |         return image, target
 30 | 
 31 | 
 32 | def convert_coco_poly_to_mask(segmentations, height, width):
 33 |     masks = []
 34 |     for polygons in segmentations:
 35 |         rles = coco_mask.frPyObjects(polygons, height, width)
 36 |         mask = coco_mask.decode(rles)
 37 |         if len(mask.shape) < 3:
 38 |             mask = mask[..., None]
 39 |         mask = torch.as_tensor(mask, dtype=torch.uint8)
 40 |         mask = mask.any(dim=2)
 41 |         masks.append(mask)
 42 |     if masks:
 43 |         masks = torch.stack(masks, dim=0)
 44 |     else:
 45 |         masks = torch.zeros((0, height, width), dtype=torch.uint8)
 46 |     return masks
 47 | 
 48 | 
 49 | class ConvertCocoPolysToMask(object):
 50 |     def __call__(self, image, target):
 51 |         w, h = image.size
 52 | 
 53 |         image_id = target["image_id"]
 54 |         image_id = torch.tensor([image_id])
 55 | 
 56 |         anno = target["annotations"]
 57 | 
 58 |         anno = [obj for obj in anno if obj['iscrowd'] == 0]
 59 | 
 60 |         boxes = [obj["bbox"] for obj in anno]
 61 |         # guard against no boxes via resizing
 62 |         boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
 63 |         boxes[:, 2:] += boxes[:, :2]
 64 |         boxes[:, 0::2].clamp_(min=0, max=w)
 65 |         boxes[:, 1::2].clamp_(min=0, max=h)
 66 | 
 67 |         classes = [obj["category_id"] for obj in anno]
 68 |         classes = torch.tensor(classes, dtype=torch.int64)
 69 | 
 70 |         segmentations = [obj["segmentation"] for obj in anno]
 71 |         masks = convert_coco_poly_to_mask(segmentations, h, w)
 72 | 
 73 |         keypoints = None
 74 |         if anno and "keypoints" in anno[0]:
 75 |             keypoints = [obj["keypoints"] for obj in anno]
 76 |             keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
 77 |             num_keypoints = keypoints.shape[0]
 78 |             if num_keypoints:
 79 |                 keypoints = keypoints.view(num_keypoints, -1, 3)
 80 | 
 81 |         keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
 82 |         boxes = boxes[keep]
 83 |         classes = classes[keep]
 84 |         masks = masks[keep]
 85 |         if keypoints is not None:
 86 |             keypoints = keypoints[keep]
 87 | 
 88 |         target = {}
 89 |         target["boxes"] = boxes
 90 |         target["labels"] = classes
 91 |         target["masks"] = masks
 92 |         target["image_id"] = image_id
 93 |         if keypoints is not None:
 94 |             target["keypoints"] = keypoints
 95 | 
 96 |         # for conversion to coco api
 97 |         area = torch.tensor([obj["area"] for obj in anno])
 98 |         iscrowd = torch.tensor([obj["iscrowd"] for obj in anno])
 99 |         target["area"] = area
100 |         target["iscrowd"] = iscrowd
101 | 
102 |         return image, target
103 | 
104 | 
105 | def _coco_remove_images_without_annotations(dataset, cat_list=None):
106 |     def _has_only_empty_bbox(anno):
107 |         return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
108 | 
109 |     def _count_visible_keypoints(anno):
110 |         return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
111 | 
112 |     min_keypoints_per_image = 10
113 | 
114 |     def _has_valid_annotation(anno):
115 |         # if it's empty, there is no annotation
116 |         if len(anno) == 0:
117 |             return False
118 |         # if all boxes have close to zero area, there is no annotation
119 |         if _has_only_empty_bbox(anno):
120 |             return False
121 |         # keypoints task have a slight different critera for considering
122 |         # if an annotation is valid
123 |         if "keypoints" not in anno[0]:
124 |             return True
125 |         # for keypoint detection tasks, only consider valid images those
126 |         # containing at least min_keypoints_per_image
127 |         if _count_visible_keypoints(anno) >= min_keypoints_per_image:
128 |             return True
129 |         return False
130 | 
131 |     assert isinstance(dataset, torchvision.datasets.CocoDetection)
132 |     ids = []
133 |     for ds_idx, img_id in enumerate(dataset.ids):
134 |         ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None)
135 |         anno = dataset.coco.loadAnns(ann_ids)
136 |         if cat_list:
137 |             anno = [obj for obj in anno if obj["category_id"] in cat_list]
138 |         if _has_valid_annotation(anno):
139 |             ids.append(ds_idx)
140 | 
141 |     dataset = torch.utils.data.Subset(dataset, ids)
142 |     return dataset
143 | 
144 | 
145 | def convert_to_coco_api(ds):
146 |     coco_ds = COCO()
147 |     ann_id = 0
148 |     dataset = {'images': [], 'categories': [], 'annotations': []}
149 |     categories = set()
150 |     for img_idx in range(len(ds)):
151 |         # find better way to get target
152 |         # targets = ds.get_annotations(img_idx)
153 |         img, targets = ds[img_idx]
154 |         image_id = targets["image_id"].item()
155 |         img_dict = {}
156 |         img_dict['id'] = image_id
157 |         img_dict['height'] = img.shape[-2]
158 |         img_dict['width'] = img.shape[-1]
159 |         dataset['images'].append(img_dict)
160 |         bboxes = targets["boxes"]
161 |         bboxes[:, 2:] -= bboxes[:, :2]
162 |         bboxes = bboxes.tolist()
163 |         labels = targets['labels'].tolist()
164 |         areas = targets['area'].tolist()
165 |         iscrowd = targets['iscrowd'].tolist()
166 |         if 'masks' in targets:
167 |             masks = targets['masks']
168 |             # make masks Fortran contiguous for coco_mask
169 |             masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1)
170 |         if 'keypoints' in targets:
171 |             keypoints = targets['keypoints']
172 |             keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist()
173 |         num_objs = len(bboxes)
174 |         for i in range(num_objs):
175 |             ann = {}
176 |             ann['image_id'] = image_id
177 |             ann['bbox'] = bboxes[i]
178 |             ann['category_id'] = labels[i]
179 |             categories.add(labels[i])
180 |             ann['area'] = areas[i]
181 |             ann['iscrowd'] = iscrowd[i]
182 |             ann['id'] = ann_id
183 |             if 'masks' in targets:
184 |                 ann["segmentation"] = coco_mask.encode(masks[i].numpy())
185 |             if 'keypoints' in targets:
186 |                 ann['keypoints'] = keypoints[i]
187 |                 ann['num_keypoints'] = sum(k != 0 for k in keypoints[i][2::3])
188 |             dataset['annotations'].append(ann)
189 |             ann_id += 1
190 |     dataset['categories'] = [{'id': i} for i in sorted(categories)]
191 |     coco_ds.dataset = dataset
192 |     coco_ds.createIndex()
193 |     return coco_ds
194 | 
195 | 
196 | def get_coco_api_from_dataset(dataset):
197 |     for _ in range(10):
198 |         if isinstance(dataset, torchvision.datasets.CocoDetection):
199 |             break
200 |         if isinstance(dataset, torch.utils.data.Subset):
201 |             dataset = dataset.dataset
202 |     if isinstance(dataset, torchvision.datasets.CocoDetection):
203 |         return dataset.coco
204 |     return convert_to_coco_api(dataset)
205 | 
206 | 
207 | class CocoDetection(torchvision.datasets.CocoDetection):
208 |     def __init__(self, img_folder, ann_file, transforms):
209 |         super(CocoDetection, self).__init__(img_folder, ann_file)
210 |         self._transforms = transforms
211 | 
212 |     def __getitem__(self, idx):
213 |         img, target = super(CocoDetection, self).__getitem__(idx)
214 |         image_id = self.ids[idx]
215 |         target = dict(image_id=image_id, annotations=target)
216 |         if self._transforms is not None:
217 |             img, target = self._transforms(img, target)
218 |         return img, target
219 | 
220 | 
221 | def get_coco(root, image_set, transforms, mode='instances'):
222 |     anno_file_template = "{}_{}2017.json"
223 |     PATHS = {
224 |         "train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))),
225 |         "val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))),
226 |         # "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val")))
227 |     }
228 | 
229 |     t = [ConvertCocoPolysToMask()]
230 | 
231 |     if transforms is not None:
232 |         t.append(transforms)
233 |     transforms = T.Compose(t)
234 | 
235 |     img_folder, ann_file = PATHS[image_set]
236 |     img_folder = os.path.join(root, img_folder)
237 |     ann_file = os.path.join(root, ann_file)
238 | 
239 |     dataset = CocoDetection(img_folder, ann_file, transforms=transforms)
240 | 
241 |     if image_set == "train":
242 |         dataset = _coco_remove_images_without_annotations(dataset)
243 | 
244 |     # dataset = torch.utils.data.Subset(dataset, [i for i in range(500)])
245 | 
246 |     return dataset
247 | 
248 | 
249 | def get_coco_kp(root, image_set, transforms):
250 |     return get_coco(root, image_set, transforms, mode="person_keypoints")
251 | 


--------------------------------------------------------------------------------
/once_upon_a_repository/torchvision_references/utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import pickle
  4 | 
  5 | import torch
  6 | import torch.distributed as dist
  7 | 
  8 | import errno
  9 | import os
 10 | 
 11 | 
 12 | def all_gather(data):
 13 |     """
 14 |     Run all_gather on arbitrary picklable data (not necessarily tensors)
 15 |     Args:
 16 |         data: any picklable object
 17 |     Returns:
 18 |         list[data]: list of data gathered from each rank
 19 |     """
 20 |     world_size = get_world_size()
 21 |     if world_size == 1:
 22 |         return [data]
 23 | 
 24 |     # serialized to a Tensor
 25 |     buffer = pickle.dumps(data)
 26 |     storage = torch.ByteStorage.from_buffer(buffer)
 27 |     tensor = torch.ByteTensor(storage).to("cuda")
 28 | 
 29 |     # obtain Tensor size of each rank
 30 |     local_size = torch.tensor([tensor.numel()], device="cuda")
 31 |     size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
 32 |     dist.all_gather(size_list, local_size)
 33 |     size_list = [int(size.item()) for size in size_list]
 34 |     max_size = max(size_list)
 35 | 
 36 |     # receiving Tensor from all ranks
 37 |     # we pad the tensor because torch all_gather does not support
 38 |     # gathering tensors of different shapes
 39 |     tensor_list = []
 40 |     for _ in size_list:
 41 |         tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
 42 |     if local_size != max_size:
 43 |         padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
 44 |         tensor = torch.cat((tensor, padding), dim=0)
 45 |     dist.all_gather(tensor_list, tensor)
 46 | 
 47 |     data_list = []
 48 |     for size, tensor in zip(size_list, tensor_list):
 49 |         buffer = tensor.cpu().numpy().tobytes()[:size]
 50 |         data_list.append(pickle.loads(buffer))
 51 | 
 52 |     return data_list
 53 | 
 54 | 
 55 | def reduce_dict(input_dict, average=True):
 56 |     """
 57 |     Args:
 58 |         input_dict (dict): all the values will be reduced
 59 |         average (bool): whether to do average or sum
 60 |     Reduce the values in the dictionary from all processes so that all processes
 61 |     have the averaged results. Returns a dict with the same fields as
 62 |     input_dict, after reduction.
 63 |     """
 64 |     world_size = get_world_size()
 65 |     if world_size < 2:
 66 |         return input_dict
 67 |     with torch.no_grad():
 68 |         names = []
 69 |         values = []
 70 |         # sort the keys so that they are consistent across processes
 71 |         for k in sorted(input_dict.keys()):
 72 |             names.append(k)
 73 |             values.append(input_dict[k])
 74 |         values = torch.stack(values, dim=0)
 75 |         dist.all_reduce(values)
 76 |         if average:
 77 |             values /= world_size
 78 |         reduced_dict = {k: v for k, v in zip(names, values)}
 79 |     return reduced_dict
 80 | 
 81 | 
 82 | def collate_fn(batch):
 83 |     return tuple(zip(*batch))
 84 | 
 85 | 
 86 | def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):
 87 | 
 88 |     def f(x):
 89 |         if x >= warmup_iters:
 90 |             return 1
 91 |         alpha = float(x) / warmup_iters
 92 |         return warmup_factor * (1 - alpha) + alpha
 93 | 
 94 |     return torch.optim.lr_scheduler.LambdaLR(optimizer, f)
 95 | 
 96 | 
 97 | def mkdir(path):
 98 |     try:
 99 |         os.makedirs(path)
100 |     except OSError as e:
101 |         if e.errno != errno.EEXIST:
102 |             raise
103 | 
104 | 
105 | def setup_for_distributed(is_master):
106 |     """
107 |     This function disables printing when not in master process
108 |     """
109 |     import builtins as __builtin__
110 |     builtin_print = __builtin__.print
111 | 
112 |     def print(*args, **kwargs):
113 |         force = kwargs.pop('force', False)
114 |         if is_master or force:
115 |             builtin_print(*args, **kwargs)
116 | 
117 |     __builtin__.print = print
118 | 
119 | 
120 | def is_dist_avail_and_initialized():
121 |     if not dist.is_available():
122 |         return False
123 |     if not dist.is_initialized():
124 |         return False
125 |     return True
126 | 
127 | 
128 | def get_world_size():
129 |     if not is_dist_avail_and_initialized():
130 |         return 1
131 |     return dist.get_world_size()
132 | 
133 | 
134 | def get_rank():
135 |     if not is_dist_avail_and_initialized():
136 |         return 0
137 |     return dist.get_rank()
138 | 
139 | 
140 | def is_main_process():
141 |     return get_rank() == 0
142 | 
143 | 
144 | def save_on_master(*args, **kwargs):
145 |     if is_main_process():
146 |         torch.save(*args, **kwargs)
147 | 
148 | 
149 | def init_distributed_mode(args):
150 |     if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
151 |         args.rank = int(os.environ["RANK"])
152 |         args.world_size = int(os.environ['WORLD_SIZE'])
153 |         args.gpu = int(os.environ['LOCAL_RANK'])
154 |     elif 'SLURM_PROCID' in os.environ:
155 |         args.rank = int(os.environ['SLURM_PROCID'])
156 |         args.gpu = args.rank % torch.cuda.device_count()
157 |     else:
158 |         print('Not using distributed mode')
159 |         args.distributed = False
160 |         return
161 | 
162 |     args.distributed = True
163 | 
164 |     torch.cuda.set_device(args.gpu)
165 |     args.dist_backend = 'nccl'
166 |     print('| distributed init (rank {}): {}'.format(
167 |         args.rank, args.dist_url), flush=True)
168 |     torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
169 |                                          world_size=args.world_size, rank=args.rank)
170 |     torch.distributed.barrier()
171 |     setup_for_distributed(args.rank == 0)
172 | 


--------------------------------------------------------------------------------
/once_upon_a_repository/train_model.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
  3 | from itertools import chain
  4 | from operator import add
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | from PIL import Image
  9 | from ignite.engine import Events
 10 | from pathlib2 import Path
 11 | from torch.utils.data import DataLoader
 12 | from torch.utils.tensorboard import SummaryWriter
 13 | from torchvision.datasets.coco import CocoDetection
 14 | from trains import Task
 15 | 
 16 | from engines import create_trainer, create_evaluator
 17 | from torchvision_references import utils
 18 | from torchvision_references.coco_eval import CocoEvaluator
 19 | from torchvision_references.coco_utils import convert_to_coco_api
 20 | from transforms import get_transform
 21 | from utilities import draw_debug_images, draw_mask, get_model_instance_segmentation, safe_collate, get_iou_types
 22 | 
 23 | task = Task.init(project_name='Object Detection with TRAINS, Ignite and TensorBoard',
 24 |                  task_name='Train MaskRCNN with torchvision')
 25 | 
 26 | configuration_data = {'image_size': 512, 'mask_predictor_hidden_layer': 256}
 27 | configuration_data = task.connect_configuration(configuration_data)
 28 | 
 29 | 
 30 | class CocoMask(CocoDetection):
 31 |     def __init__(self, root, annFile, transform=None, target_transform=None, transforms=None, use_mask=True):
 32 |         super(CocoMask, self).__init__(root, annFile, transforms, target_transform, transform)
 33 |         self.transforms = transforms
 34 |         self.use_mask = use_mask
 35 |     
 36 |     def __getitem__(self, index):
 37 |         coco = self.coco
 38 |         img_id = self.ids[index]
 39 |         ann_ids = coco.getAnnIds(imgIds=img_id)
 40 |         target = coco.loadAnns(ann_ids)
 41 |         if len(ann_ids) == 0:
 42 |             return None
 43 |         
 44 |         path = coco.loadImgs(img_id)[0]['file_name']
 45 |         img = Image.open(os.path.join(self.root, path)).convert('RGB')
 46 |         
 47 |         # From boxes [x, y, w, h] to [x1, y1, x2, y2]
 48 |         new_target = {"image_id": torch.as_tensor(target[0]['image_id'], dtype=torch.int64),
 49 |                       "area": torch.as_tensor([obj['area'] for obj in target], dtype=torch.float32),
 50 |                       "iscrowd": torch.as_tensor([obj['iscrowd'] for obj in target], dtype=torch.int64),
 51 |                       "boxes": torch.as_tensor([obj['bbox'][:2] + list(map(add, obj['bbox'][:2], obj['bbox'][2:]))
 52 |                                                 for obj in target], dtype=torch.float32),
 53 |                       "labels": torch.as_tensor([obj['category_id'] for obj in target], dtype=torch.int64)}
 54 |         if self.use_mask:
 55 |             mask = [coco.annToMask(ann) for ann in target]
 56 |             if len(mask) > 1:
 57 |                 mask = np.stack(tuple(mask), axis=0)
 58 |             new_target["masks"] = torch.as_tensor(mask, dtype=torch.uint8)
 59 |         
 60 |         if self.transforms is not None:
 61 |             img, new_target = self.transforms(img, new_target)
 62 |         
 63 |         return img, new_target
 64 | 
 65 | 
 66 | def get_data_loaders(train_ann_file, test_ann_file, batch_size, test_size, image_size, use_mask):
 67 |     # first, crate PyTorch dataset objects, for the train and validation data.
 68 |     dataset = CocoMask(
 69 |         root=Path.joinpath(Path(train_ann_file).parent.parent, train_ann_file.split('_')[1].split('.')[0]),
 70 |         annFile=train_ann_file,
 71 |         transforms=get_transform(train=True, image_size=image_size),
 72 |         use_mask=use_mask)
 73 |     dataset_test = CocoMask(
 74 |         root=Path.joinpath(Path(test_ann_file).parent.parent, test_ann_file.split('_')[1].split('.')[0]),
 75 |         annFile=test_ann_file,
 76 |         transforms=get_transform(train=False, image_size=image_size),
 77 |         use_mask=use_mask)
 78 |     
 79 |     labels_enumeration = dataset.coco.cats
 80 |     
 81 |     indices_val = torch.randperm(len(dataset_test)).tolist()
 82 |     dataset_val = torch.utils.data.Subset(dataset_test, indices_val[:test_size])
 83 | 
 84 |     # set train and validation data-loaders
 85 |     train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=6,
 86 |                               collate_fn=safe_collate, pin_memory=True)
 87 |     val_loader = DataLoader(dataset_val, batch_size=batch_size, shuffle=False, num_workers=6,
 88 |                             collate_fn=safe_collate, pin_memory=True)
 89 |     
 90 |     return train_loader, val_loader, labels_enumeration
 91 | 
 92 | 
 93 | def run(task_args):
 94 |     # Define train and test datasets
 95 |     train_loader, val_loader, labels_enum = get_data_loaders(task_args.train_dataset_ann_file,
 96 |                                                              task_args.val_dataset_ann_file,
 97 |                                                              task_args.batch_size,
 98 |                                                              task_args.test_size,
 99 |                                                              configuration_data.get('image_size'),
100 |                                                              use_mask=True)
101 |     val_dataset = list(chain.from_iterable(zip(*batch) for batch in iter(val_loader)))
102 |     coco_api_val_dataset = convert_to_coco_api(val_dataset)
103 |     num_classes = max(labels_enum.keys()) + 1  # number of classes plus one for background class
104 |     configuration_data['num_classes'] = num_classes
105 |     
106 |     # Set the training device to GPU if available - if not set it to CPU
107 |     device = torch.cuda.current_device() if torch.cuda.is_available() else torch.device('cpu')
108 |     torch.backends.cudnn.benchmark = True if torch.cuda.is_available() else False  # optimization for fixed input size
109 |     
110 |     model = get_model_instance_segmentation(num_classes, configuration_data.get('mask_predictor_hidden_layer'))
111 |     iou_types = get_iou_types(model)
112 |     
113 |     # if there is more than one GPU, parallelize the model
114 |     if torch.cuda.device_count() > 1:
115 |         print("{} GPUs were detected - we will use all of them".format(torch.cuda.device_count()))
116 |         model = torch.nn.DataParallel(model)
117 |     
118 |     # copy the model to each device
119 |     model.to(device)
120 |     
121 |     if task_args.input_checkpoint:
122 |         print('Loading model checkpoint from '.format(task_args.input_checkpoint))
123 |         input_checkpoint = torch.load(task_args.input_checkpoint, map_location=torch.device(device))
124 |         model.load_state_dict(input_checkpoint['model'])
125 |     
126 |     writer = SummaryWriter(log_dir=task_args.log_dir)
127 |     
128 |     # define Ignite's train and evaluation engine
129 |     trainer = create_trainer(model, device)
130 |     evaluator = create_evaluator(model, device)
131 |     
132 |     @trainer.on(Events.STARTED)
133 |     def on_training_started(engine):
134 |         # construct an optimizer
135 |         params = [p for p in model.parameters() if p.requires_grad]
136 |         engine.state.optimizer = torch.optim.SGD(params,
137 |                                                  lr=task_args.lr,
138 |                                                  momentum=task_args.momentum,
139 |                                                  weight_decay=task_args.weight_decay)
140 |         engine.state.scheduler = torch.optim.lr_scheduler.StepLR(engine.state.optimizer, step_size=3, gamma=0.1)
141 |         if task_args.input_checkpoint and task_args.load_optimizer:
142 |             engine.state.optimizer.load_state_dict(input_checkpoint['optimizer'])
143 |             engine.state.scheduler.load_state_dict(input_checkpoint['lr_scheduler'])
144 |     
145 |     @trainer.on(Events.EPOCH_STARTED)
146 |     def on_epoch_started(engine):
147 |         model.train()
148 |         engine.state.warmup_scheduler = None
149 |         if engine.state.epoch == 1:
150 |             warmup_iters = min(task_args.warmup_iterations, len(train_loader) - 1)
151 |             print('Warm up period was set to {} iterations'.format(warmup_iters))
152 |             warmup_factor = 1. / warmup_iters
153 |             engine.state.warmup_scheduler = utils.warmup_lr_scheduler(engine.state.optimizer, warmup_iters, warmup_factor)
154 |     
155 |     @trainer.on(Events.ITERATION_COMPLETED)
156 |     def on_iteration_completed(engine):
157 |         images, targets, loss_dict_reduced = engine.state.output
158 |         if engine.state.iteration % task_args.log_interval == 0:
159 |             loss = sum(loss for loss in loss_dict_reduced.values()).item()
160 |             print("Epoch: {}, Iteration: {}, Loss: {}".format(engine.state.epoch, engine.state.iteration, loss))
161 |             for k, v in loss_dict_reduced.items():
162 |                 writer.add_scalar("loss/{}".format(k), v.item(), engine.state.iteration)
163 |             writer.add_scalar("loss/total_loss", sum(loss for loss in loss_dict_reduced.values()).item(), engine.state.iteration)
164 |             writer.add_scalar("learning rate/lr", engine.state.optimizer.param_groups[0]['lr'], engine.state.iteration)
165 |         
166 |         if engine.state.iteration % task_args.debug_images_interval == 0:
167 |             for n, debug_image in enumerate(draw_debug_images(images, targets)):
168 |                 writer.add_image("training/image_{}".format(n), debug_image, engine.state.iteration, dataformats='HWC')
169 |                 if 'masks' in targets[n]:
170 |                     writer.add_image("training/image_{}_mask".format(n),
171 |                                      draw_mask(targets[n]), engine.state.iteration, dataformats='HW')
172 |         images = targets = loss_dict_reduced = engine.state.output = None
173 |     
174 |     @trainer.on(Events.EPOCH_COMPLETED)
175 |     def on_epoch_completed(engine):
176 |         engine.state.scheduler.step()
177 |         evaluator.run(val_loader)
178 |         for res_type in evaluator.state.coco_evaluator.iou_types:
179 |             average_precision_05 = evaluator.state.coco_evaluator.coco_eval[res_type].stats[1]
180 |             writer.add_scalar("validation-{}/average precision 0_5".format(res_type), average_precision_05,
181 |                               engine.state.iteration)
182 |         checkpoint_path = os.path.join(task_args.output_dir, 'model_epoch_{}.pth'.format(engine.state.epoch))
183 |         print('Saving model checkpoint')
184 |         checkpoint = {
185 |             'model': model.state_dict(),
186 |             'optimizer': engine.state.optimizer.state_dict(),
187 |             'lr_scheduler': engine.state.scheduler.state_dict(),
188 |             'epoch': engine.state.epoch,
189 |             'configuration': configuration_data,
190 |             'labels_enumeration': labels_enum}
191 |         utils.save_on_master(checkpoint, checkpoint_path)
192 |         print('Model checkpoint from epoch {} was saved at {}'.format(engine.state.epoch, checkpoint_path))
193 |         evaluator.state = checkpoint = None
194 | 
195 |     @evaluator.on(Events.STARTED)
196 |     def on_evaluation_started(engine):
197 |         model.eval()
198 |         engine.state.coco_evaluator = CocoEvaluator(coco_api_val_dataset, iou_types)
199 | 
200 |     @evaluator.on(Events.ITERATION_COMPLETED)
201 |     def on_eval_iteration_completed(engine):
202 |         images, targets, results = engine.state.output
203 |         if engine.state.iteration % task_args.log_interval == 0:
204 |             print("Evaluation: Iteration: {}".format(engine.state.iteration))
205 |         
206 |         if engine.state.iteration % task_args.debug_images_interval == 0:
207 |             for n, debug_image in enumerate(draw_debug_images(images, targets, results)):
208 |                 writer.add_image("evaluation/image_{}_{}".format(engine.state.iteration, n),
209 |                                  debug_image, trainer.state.iteration, dataformats='HWC')
210 |                 if 'masks' in targets[n]:
211 |                     writer.add_image("evaluation/image_{}_{}_mask".format(engine.state.iteration, n),
212 |                                      draw_mask(targets[n]), trainer.state.iteration, dataformats='HW')
213 |                     curr_image_id = int(targets[n]['image_id'])
214 |                     writer.add_image("evaluation/image_{}_{}_predicted_mask".format(engine.state.iteration, n),
215 |                                      draw_mask(results[curr_image_id]).squeeze(), trainer.state.iteration, dataformats='HW')
216 |         images = targets = results = engine.state.output = None
217 | 
218 |     @evaluator.on(Events.COMPLETED)
219 |     def on_evaluation_completed(engine):
220 |         # gather the stats from all processes
221 |         engine.state.coco_evaluator.synchronize_between_processes()
222 |         
223 |         # accumulate predictions from all images
224 |         engine.state.coco_evaluator.accumulate()
225 |         engine.state.coco_evaluator.summarize()
226 | 
227 |     trainer.run(train_loader, max_epochs=task_args.epochs)
228 |     writer.close()
229 |     
230 |     
231 | if __name__ == "__main__":
232 |     parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
233 |     parser.add_argument('--warmup_iterations', type=int, default=5000,
234 |                         help='Number of iteration for warmup period (until reaching base learning rate)')
235 |     parser.add_argument('--batch_size', type=int, default=4,
236 |                         help='input batch size for training and validation')
237 |     parser.add_argument('--test_size', type=int, default=2000,
238 |                         help='number of frames from the test dataset to use for validation')
239 |     parser.add_argument('--epochs', type=int, default=10,
240 |                         help='number of epochs to train')
241 |     parser.add_argument('--log_interval', type=int, default=100,
242 |                         help='how many batches to wait before logging training status')
243 |     parser.add_argument('--debug_images_interval', type=int, default=500,
244 |                         help='how many batches to wait before logging debug images')
245 |     parser.add_argument('--train_dataset_ann_file', type=str,
246 |                         default='~/bigdata/coco/annotations/instances_train2017.json',
247 |                         help='annotation file of train dataset')
248 |     parser.add_argument('--val_dataset_ann_file', type=str, default='~/bigdata/coco/annotations/instances_val2017.json',
249 |                         help='annotation file of test dataset')
250 |     parser.add_argument('--input_checkpoint', type=str, default='',
251 |                         help='Loading model weights from this checkpoint.')
252 |     parser.add_argument('--load_optimizer', default=False, type=bool,
253 |                         help='Use optimizer and lr_scheduler saved in the input checkpoint to resume training')
254 |     parser.add_argument("--output_dir", type=str, default="/tmp/checkpoints",
255 |                         help="output directory for saving models checkpoints")
256 |     parser.add_argument("--log_dir", type=str, default="/tmp/tensorboard_logs",
257 |                         help="log directory for Tensorboard log output")
258 |     parser.add_argument("--lr", type=float, default=0.005,
259 |                         help="learning rate for optimizer")
260 |     parser.add_argument("--momentum", type=float, default=0.9,
261 |                         help="momentum for optimizer")
262 |     parser.add_argument("--weight_decay", type=float, default=0.0005,
263 |                         help="weight decay for optimizer")
264 |     args = parser.parse_args()
265 | 
266 |     if not os.path.exists(args.output_dir):
267 |         utils.mkdir(args.output_dir)
268 |     if not os.path.exists(args.log_dir):
269 |         utils.mkdir(args.log_dir)
270 | 
271 |     run(args)
272 | 


--------------------------------------------------------------------------------
/once_upon_a_repository/transforms.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import torch
  3 | from PIL import Image
  4 | 
  5 | from torchvision.transforms import functional as F
  6 | 
  7 | 
  8 | def get_transform(train, image_size):
  9 |     transforms = [Resize(size=(image_size, image_size)), ToTensor()]
 10 |     if train:
 11 |         transforms.append(RandomHorizontalFlip(0.5))
 12 |     return Compose(transforms)
 13 | 
 14 | 
 15 | def _flip_coco_person_keypoints(kps, width):
 16 |     flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
 17 |     flipped_data = kps[:, flip_inds]
 18 |     flipped_data[..., 0] = width - flipped_data[..., 0]
 19 |     # Maintain COCO convention that if visibility == 0, then x, y = 0
 20 |     inds = flipped_data[..., 2] == 0
 21 |     flipped_data[inds] = 0
 22 |     return flipped_data
 23 | 
 24 | 
 25 | class ToTensor(object):
 26 |     def __call__(self, image, target):
 27 |         image = F.to_tensor(image)
 28 |         return image, target
 29 | 
 30 | 
 31 | class Compose(object):
 32 |     def __init__(self, transforms):
 33 |         self.transforms = transforms
 34 | 
 35 |     def __call__(self, image, target=None):
 36 |         for t in self.transforms:
 37 |             image, target = t(image, target)
 38 |         return image, target
 39 | 
 40 | 
 41 | class RandomHorizontalFlip(object):
 42 |     def __init__(self, prob):
 43 |         self.prob = prob
 44 | 
 45 |     def __call__(self, image, target):
 46 |         if random.random() < self.prob:
 47 |             height, width = image.shape[-2:]
 48 |             image = image.flip(-1)
 49 |             bbox = target["boxes"]
 50 |             bbox[:, [0, 2]] = width - bbox[:, [2, 0]]
 51 |             target["boxes"] = bbox
 52 |             if "masks" in target:
 53 |                 target["masks"] = target["masks"].flip(-1)
 54 |             if "keypoints" in target:
 55 |                 keypoints = target["keypoints"]
 56 |                 keypoints = _flip_coco_person_keypoints(keypoints, width)
 57 |                 target["keypoints"] = keypoints
 58 |         return image, target
 59 | 
 60 | 
 61 | class Resize(object):
 62 |     """Resize the input PIL image to given size.
 63 |     If boxes is not None, resize boxes accordingly.
 64 |     Args:
 65 |       size: (tuple or int)
 66 |         - if is tuple, resize image to the size.
 67 |         - if is int, resize the shorter side to the size while maintaining the aspect ratio.
 68 |       max_size: (int) when size is int, limit the image longer size to max_size.
 69 |                 This is essential to limit the usage of GPU memory.
 70 |       random_interpolation: (bool) randomly choose a resize interpolation method.
 71 |     Returns:
 72 |       img: (PIL.Image) resized image.
 73 |       boxes: (tensor) resized boxes.
 74 |     Example:
 75 |     >> img, boxes = resize(img, boxes, 600)  # resize shorter side to 600
 76 |     >> img, boxes = resize(img, boxes, (500,600))  # resize image size to (500,600)
 77 |     >> img, _ = resize(img, None, (500,600))  # resize image only
 78 |     """
 79 |     def __init__(self, size, max_size=1000, random_interpolation=False):
 80 |         self.size = size
 81 |         self.max_size = max_size
 82 |         self.random_interpolation = random_interpolation
 83 | 
 84 |     def __call__(self, image, target):
 85 |         """Resize the input PIL image to given size.
 86 |         If boxes is not None, resize boxes accordingly.
 87 |         Args:
 88 |           image: (PIL.Image) image to be resized.
 89 |           target: (tensor) object boxes, sized [#obj,4].
 90 |         """
 91 |         w, h = image.size
 92 |         if isinstance(self.size, int):
 93 |             size_min = min(w, h)
 94 |             size_max = max(w, h)
 95 |             sw = sh = float(self.size) / size_min
 96 |             if sw * size_max > self.max_size:
 97 |                 sw = sh = float(self.max_size) / size_max
 98 |             ow = int(w * sw + 0.5)
 99 |             oh = int(h * sh + 0.5)
100 |         else:
101 |             ow, oh = self.size
102 |             sw = float(ow) / w
103 |             sh = float(oh) / h
104 | 
105 |         method = random.choice([
106 |             Image.BOX,
107 |             Image.NEAREST,
108 |             Image.HAMMING,
109 |             Image.BICUBIC,
110 |             Image.LANCZOS,
111 |             Image.BILINEAR]) if self.random_interpolation else Image.BILINEAR
112 |         image = image.resize((ow, oh), method)
113 |         if target is not None and "masks" in target:
114 |             resized_masks = torch.nn.functional.interpolate(
115 |                 input=target["masks"][None].float(),
116 |                 size=(512, 512),
117 |                 mode="nearest",
118 |             )[0].type_as(target["masks"])
119 |             target["masks"] = resized_masks
120 |         if target is not None and "boxes" in target:
121 |             resized_boxes = target["boxes"] * torch.tensor([sw, sh, sw, sh])
122 |             target["boxes"] = resized_boxes
123 |         return image, target
124 | 


--------------------------------------------------------------------------------
/once_upon_a_repository/utilities.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | import attr
  4 | import cv2
  5 | import numpy as np
  6 | import torch
  7 | import torchvision
  8 | from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
  9 | from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
 10 | from torchvision.transforms import functional as F
 11 | 
 12 | from torchvision_references import utils
 13 | 
 14 | 
 15 | def safe_collate(batch):
 16 |     batch = list(filter(lambda x: x is not None, batch))
 17 |     return utils.collate_fn(batch)
 18 | 
 19 | 
 20 | def draw_boxes(im, boxes, labels, color=(150, 0, 0)):
 21 |     for box, draw_label in zip(boxes, labels):
 22 |         draw_box = box.astype('int')
 23 |         im = cv2.rectangle(im, tuple(draw_box[:2]), tuple(draw_box[2:]), color, 2)
 24 |         im = cv2.putText(im, str(draw_label), (draw_box[0], max(0, draw_box[1]-5)),
 25 |                          cv2.FONT_HERSHEY_COMPLEX, 0.8, color, 2)
 26 |     return im
 27 | 
 28 | 
 29 | def draw_debug_images(images, targets, predictions=None, score_thr=0.3):
 30 |     debug_images = []
 31 |     for image, target in zip(images, targets):
 32 |         img = draw_boxes(np.array(F.to_pil_image(image.cpu())),
 33 |                          [box.cpu().numpy() for box in target['boxes']],
 34 |                          [label.item() for label in target['labels']])
 35 |         if predictions:
 36 |             img = draw_boxes(img,
 37 |                              [box.cpu().numpy() for box, score in
 38 |                               zip(predictions[target['image_id'].item()]['boxes'],
 39 |                                   predictions[target['image_id'].item()]['scores']) if score >= score_thr],
 40 |                              [label.item() for label, score in
 41 |                               zip(predictions[target['image_id'].item()]['labels'],
 42 |                                   predictions[target['image_id'].item()]['scores']) if score >= score_thr],
 43 |                              color=(0, 150, 0))
 44 |         debug_images.append(img)
 45 |     return debug_images
 46 | 
 47 | 
 48 | def draw_mask(target):
 49 |     masks = [channel*label for channel, label in zip(target['masks'].cpu().numpy(), target['labels'].cpu().numpy())]
 50 |     masks_sum = sum(masks)
 51 |     masks_out = masks_sum + 25*(masks_sum > 0)
 52 |     return (masks_out*int(255/masks_out.max())).astype('uint8')
 53 | 
 54 | 
 55 | def get_model_instance_segmentation(num_classes, hidden_layer):
 56 |     # load an instance segmentation model pre-trained on COCO
 57 |     model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
 58 | 
 59 |     # get number of input features for the classifier
 60 |     in_features = model.roi_heads.box_predictor.cls_score.in_features
 61 |     # replace the pre-trained head with a new one
 62 |     model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
 63 | 
 64 |     # now get the number of input features for the mask classifier
 65 |     in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
 66 | 
 67 |     # and replace the mask predictor with a new one
 68 |     model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes)
 69 |     return model
 70 | 
 71 | 
 72 | def get_iou_types(model):
 73 |     model_without_ddp = model
 74 |     if isinstance(model, torch.nn.parallel.DistributedDataParallel):
 75 |         model_without_ddp = model.module
 76 |     iou_types = ["bbox"]
 77 |     if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
 78 |         iou_types.append("segm")
 79 |     if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
 80 |         iou_types.append("keypoints")
 81 |     return iou_types
 82 | 
 83 | 
 84 | @attr.s(auto_attribs=True)
 85 | class CocoLikeAnnotations():
 86 |     def __attrs_post_init__(self):
 87 |         self.coco_like_json: dict = {'images': [], 'annotations': []}
 88 |         self._ann_id: int = 0
 89 | 
 90 |     def update_images(self, file_name, height, width, id):
 91 |         self.coco_like_json['images'].append({'file_name': file_name,
 92 |                                          'height': height, 'width': width,
 93 |                                          'id': id})
 94 | 
 95 |     def update_annotations(self, box, label_id, image_id, is_crowd=0):
 96 |         segmentation, bbox, area = self.extract_coco_info(box)
 97 |         self.coco_like_json['annotations'].append({'segmentation': segmentation, 'bbox': bbox, 'area': area,
 98 |                                               'category_id': int(label_id), 'id': self._ann_id, 'iscrowd': is_crowd,
 99 |                                               'image_id': image_id})
100 |         self._ann_id += 1
101 | 
102 |     @staticmethod
103 |     def extract_coco_info(box):
104 |         segmentation = list(map(int, [box[0], box[1], box[0], box[3], box[2], box[3], box[2], box[1]]))
105 |         bbox = list(map(int, np.append(box[:2], (box[2:] - box[:2]))))
106 |         area = int(bbox[2] * bbox[3])
107 |         return segmentation, bbox, area
108 | 
109 |     def dump_to_json(self, path_to_json='/tmp/inference_results/inference_results.json'):
110 |         with open(path_to_json, "w") as write_file:
111 |             json.dump(self.coco_like_json, write_file)
112 | 


--------------------------------------------------------------------------------
/setting_up_allegroai_platform/pytorch.mnist_trains.py:
--------------------------------------------------------------------------------
  1 | # TRAINS - Example of Pytorch mnist training integration
  2 | #
  3 | from __future__ import print_function
  4 | import argparse
  5 | import os
  6 | from tempfile import gettempdir
  7 | 
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.nn.functional as F
 11 | import torch.optim as optim
 12 | from torchvision import datasets, transforms
 13 | 
 14 | from trains import Task
 15 | task = Task.init(project_name='pytorch mnist', task_name='train SGD 0.1')
 16 | logger = task.get_logger()
 17 | 
 18 | class Net(nn.Module):
 19 |     def __init__(self):
 20 |         super(Net, self).__init__()
 21 |         self.conv1 = nn.Conv2d(1, 20, 5, 1)
 22 |         self.conv2 = nn.Conv2d(20, 50, 5, 1)
 23 |         self.fc1 = nn.Linear(4 * 4 * 50, 500)
 24 |         self.fc2 = nn.Linear(500, 10)
 25 | 
 26 |     def forward(self, x):
 27 |         x = F.relu(self.conv1(x))
 28 |         x = F.max_pool2d(x, 2, 2)
 29 |         x = F.relu(self.conv2(x))
 30 |         x = F.max_pool2d(x, 2, 2)
 31 |         x = x.view(-1, 4 * 4 * 50)
 32 |         x = F.relu(self.fc1(x))
 33 |         x = self.fc2(x)
 34 |         return F.log_softmax(x, dim=1)
 35 | 
 36 | 
 37 | def train(args, model, device, train_loader, optimizer, epoch):
 38 |     model.train()
 39 |     for batch_idx, (data, target) in enumerate(train_loader):
 40 |         data, target = data.to(device), target.to(device)
 41 |         optimizer.zero_grad()
 42 |         output = model(data)
 43 |         loss = F.nll_loss(output, target)
 44 |         loss.backward()
 45 |         optimizer.step()
 46 |         if batch_idx % args.log_interval == 0:
 47 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
 48 |                 epoch, batch_idx * len(data), len(train_loader.dataset),
 49 |                        100. * batch_idx / len(train_loader), loss.item()))
 50 |             logger.report_scalar(title='Training',series='loss',value=loss.item(),iteration=int(100. * batch_idx / len(train_loader)))
 51 | 
 52 | 
 53 | def test(args, model, device, test_loader, epoch):
 54 |     model.eval()
 55 |     test_loss = 0
 56 |     correct = 0
 57 |     with torch.no_grad():
 58 |         for data, target in test_loader:
 59 |             data, target = data.to(device), target.to(device)
 60 |             output = model(data)
 61 |             test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
 62 |             pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
 63 |             correct += pred.eq(target.view_as(pred)).sum().item()
 64 | 
 65 |     test_loss /= len(test_loader.dataset)
 66 | 
 67 |     print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
 68 |         test_loss, correct, len(test_loader.dataset),
 69 |         100. * correct / len(test_loader.dataset)))
 70 |     logger.report_scalar(title='Test',series='loss',value=test_loss,iteration=epoch)
 71 |     logger.report_scalar(title='Test', series='accuracy', value=correct / len(test_loader.dataset), iteration=epoch)
 72 | 
 73 | def main():
 74 |     # Training settings
 75 |     parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
 76 |     parser.add_argument('--batch-size', type=int, default=64, metavar='N',
 77 |                         help='input batch size for training (default: 64)')
 78 |     parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
 79 |                         help='input batch size for testing (default: 1000)')
 80 |     parser.add_argument('--epochs', type=int, default=5, metavar='N',
 81 |                         help='number of epochs to train (default: 10)')
 82 |     parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
 83 |                         help='learning rate (default: 0.01)')
 84 |     parser.add_argument('--momentum', type=float, default=0.1, metavar='M',
 85 |                         help='SGD momentum (default: 0.5)')
 86 |     parser.add_argument('--no-cuda', action='store_true', default=False,
 87 |                         help='disables CUDA training')
 88 |     parser.add_argument('--seed', type=int, default=1, metavar='S',
 89 |                         help='random seed (default: 1)')
 90 |     parser.add_argument('--log-interval', type=int, default=10, metavar='N',
 91 |                         help='how many batches to wait before logging training status')
 92 | 
 93 |     parser.add_argument('--save-model', action='store_true', default=True,
 94 |                         help='For Saving the current Model')
 95 |     parser.add_argument('--task-name', type=str, default='train')
 96 |     args = parser.parse_args()
 97 |     use_cuda = not args.no_cuda and torch.cuda.is_available()
 98 | 
 99 |     task.set_name(args.task_name)
100 |     torch.manual_seed(args.seed)
101 | 
102 |     device = torch.device("cuda" if use_cuda else "cpu")
103 | 
104 |     kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}
105 |     train_loader = torch.utils.data.DataLoader(
106 |         datasets.MNIST('../data', train=True, download=True,
107 |                        transform=transforms.Compose([
108 |                            transforms.ToTensor(),
109 |                            transforms.Normalize((0.1307,), (0.3081,))
110 |                        ])),
111 |         batch_size=args.batch_size, shuffle=True, **kwargs)
112 |     test_loader = torch.utils.data.DataLoader(
113 |         datasets.MNIST('../data', train=False, transform=transforms.Compose([
114 |             transforms.ToTensor(),
115 |             transforms.Normalize((0.1307,), (0.3081,))
116 |         ])),
117 |         batch_size=args.test_batch_size, shuffle=True, **kwargs)
118 | 
119 |     model = Net().to(device)
120 |     optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
121 | 
122 |     for epoch in range(1, args.epochs + 1):
123 |         train(args, model, device, train_loader, optimizer, epoch)
124 |         test(args, model, device, test_loader, epoch)
125 | 
126 |     if (args.save_model):
127 |         torch.save(model.state_dict(), os.path.join(gettempdir(), "mnist_cnn.pt"))
128 | 
129 | 
130 | if __name__ == '__main__':
131 |     main()
132 | 


--------------------------------------------------------------------------------
/setting_up_allegroai_platform/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==1.2.0
2 | trains==0.14.1
3 | torchvision==0.2.1
4 | 


--------------------------------------------------------------------------------
/the_hero_rises/README.md:
--------------------------------------------------------------------------------
1 | # The Hero’s Journey to Deep Learning CodeBase
2 | ## [Blog IIB: The Hero Rises: Build Your Own SSD](https://allegro.ai/blog/the-hero-rises-build-your-own-ssd/)
3 | 
4 | As the state-of-the-art models keep changing, one needs to effectively write a modular machine learning codebase to support and sustain R&D machine and deep learning efforts for years. In our first blog of this series, we demonstrated how to write a readable and maintainable code that trains a Torchvision MaskRCNN model, harnessing Ignite’s framework. In our second post (part IIA), we detailed the fundamental differences between single-shot and two-shot detectors and why the single-shot approach is in the sweet spot of the speed/accuracy trade-off. So it’s only natural that in this post we glean how to leverage the modular nature of the MaskRCNN codebase and enable it to train both MaskRCNN and SSD models. Thanks to the modular nature of the codebase, only minimal changes are needed in the code.
5 | 
6 | Torchvision is a package that consists of popular datasets, model architectures, and common image transformations for computer vision. It contains, among others, a model-zoo of pre-trained models for image classification, object detection, person keypoint detection, semantic segmentation and instance segmentation models, ready for out-of-the-box use. This makes a PyTorch user’s life significantly easier as it shortens the time between an idea and a product. Or a research paper. Or a blog post.
7 | 
8 | Torchvision does not contain implementations of single-shot object detection models, such as this popular SSD. So, we added one: an SSD implementation based on a Torchvision model as a backbone for feature extraction. Since its release, many improvements have been constructed on the original SSD. However, we have focused on the original SSD meta-architecture for clarity and simplicity.
9 | 


--------------------------------------------------------------------------------
/the_hero_rises/SSD/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clearml/clearml-blogs/c0a10f52de341e7feedc1bd718ff0539f98fdced/the_hero_rises/SSD/__init__.py


--------------------------------------------------------------------------------
/the_hero_rises/SSD/box_coder.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | import itertools
  4 | 
  5 | from torchvision.ops.boxes import nms
  6 | 
  7 | 
  8 | class SSDBoxCoder:
  9 |     def __init__(self, steps, box_sizes, aspect_ratios, fm_sizes):
 10 |         self.prior_boxes = self._get_default_boxes(steps, box_sizes, aspect_ratios, fm_sizes)
 11 | 
 12 |     @staticmethod
 13 |     def _get_default_boxes(steps, box_sizes, aspect_ratios, fm_sizes):
 14 |         boxes = []
 15 |         for i, fm_size in enumerate(fm_sizes):
 16 |             for h, w in itertools.product(range(fm_size), repeat=2):
 17 |                 cx = (w + 0.5) * steps[i]
 18 |                 cy = (h + 0.5) * steps[i]
 19 | 
 20 |                 s = box_sizes[i]
 21 |                 boxes.append((cx, cy, s, s))
 22 | 
 23 |                 s = math.sqrt(box_sizes[i] * box_sizes[i + 1])
 24 |                 boxes.append((cx, cy, s, s))
 25 | 
 26 |                 s = box_sizes[i]
 27 |                 for ar in aspect_ratios[i]:
 28 |                     boxes.append((cx, cy, s * math.sqrt(ar), s / math.sqrt(ar)))
 29 |                     boxes.append((cx, cy, s / math.sqrt(ar), s * math.sqrt(ar)))
 30 |         return torch.Tensor(boxes)
 31 | 
 32 |     def encode(self, boxes, labels):
 33 |         '''Encode target bounding boxes and class labels.
 34 |         SSD coding rules:
 35 |           tx = (x - anchor_x) / (variance[0]*anchor_w)
 36 |           ty = (y - anchor_y) / (variance[0]*anchor_h)
 37 |           tw = log(w / anchor_w) / variance[1]
 38 |           th = log(h / anchor_h) / variance[1]
 39 |         Args:
 40 |           boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
 41 |           labels: (tensor) object class labels, sized [#obj,].
 42 |         Returns:
 43 |           loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
 44 |           cls_targets: (tensor) encoded class labels, sized [#anchors,].
 45 |         Reference:
 46 |           https://github.com/chainer/chainercv/blob/master/chainercv/links/model/ssd/multibox_coder.py
 47 |         '''
 48 |         def argmax(x):
 49 |             v, i = x.max(0)
 50 |             j = v.max(0)[1]
 51 |             return (i[j], j)
 52 | 
 53 |         device = labels.get_device()
 54 |         prior_boxes = self.prior_boxes.to(device)  # xywh
 55 |         prior_boxes = change_box_order(prior_boxes, 'xywh2xyxy')
 56 | 
 57 |         ious = box_iou(prior_boxes, boxes)  # [#anchors, #obj]
 58 |         # index = torch.LongTensor(len(prior_boxes)).fill_(-1).to(device)
 59 |         index = torch.full(size=torch.Size([prior_boxes.size()[0]]), fill_value=-1, dtype=torch.long, device=device)
 60 |         masked_ious = ious.clone()
 61 |         while True:
 62 |             i, j = argmax(masked_ious)
 63 |             if masked_ious[i, j] < 1e-6:
 64 |                 break
 65 |             index[i] = j
 66 |             masked_ious[i, :] = 0
 67 |             masked_ious[:, j] = 0
 68 | 
 69 |         mask = (index < 0) & (ious.max(1)[0] >= 0.5)
 70 |         if mask.any():
 71 |             index[mask] = ious[mask.nonzero().squeeze(dim=1)].max(1)[1]
 72 | 
 73 |         boxes = boxes[index.clamp(min=0)]  # negative index not supported
 74 |         boxes = change_box_order(boxes, 'xyxy2xywh')
 75 |         prior_boxes = change_box_order(prior_boxes, 'xyxy2xywh')
 76 | 
 77 |         variances = (0.1, 0.2)
 78 |         loc_xy = (boxes[:,:2]-prior_boxes[:,:2]) / prior_boxes[:,2:] / variances[0]
 79 |         loc_wh = torch.log(boxes[:,2:]/prior_boxes[:,2:]) / variances[1]
 80 |         loc_targets = torch.cat([loc_xy,loc_wh], 1)
 81 |         # cls_targets = 1 + labels[index.clamp(min=0)]  # TODO: why +1 ???
 82 |         cls_targets = labels[index.clamp(min=0)]
 83 |         cls_targets[index<0] = 0
 84 |         return loc_targets, cls_targets
 85 | 
 86 |     def decode(self, loc_preds, cls_preds, score_thresh=0.05, nms_thresh=0.45):
 87 |         """Decode predicted loc/cls back to real box locations and class labels.
 88 |         Args:
 89 |           loc_preds: (tensor) predicted loc, sized [8732,4].
 90 |           cls_preds: (tensor) predicted conf, sized [8732,21].
 91 |           score_thresh: (float) threshold for object confidence score.
 92 |           nms_thresh: (float) threshold for box nms.
 93 |         Returns:
 94 |           boxes: (tensor) bbox locations, sized [#obj,4].
 95 |           labels: (tensor) class labels, sized [#obj,].
 96 |         """
 97 |         device = cls_preds.get_device() if cls_preds.get_device() >= 0 else torch.device('cpu')
 98 |         prior_boxes = self.prior_boxes.to(device)
 99 |         variances = (0.1, 0.2)
100 |         xy = loc_preds[:, :2] * variances[0] * prior_boxes[:, 2:] + prior_boxes[:, :2]
101 |         wh = torch.exp(loc_preds[:, 2:] * variances[1]) * prior_boxes[:, 2:]
102 |         box_preds = torch.cat([xy - wh / 2, xy + wh / 2], 1)
103 | 
104 |         boxes = []
105 |         labels = []
106 |         scores = []
107 |         # num_classes = cls_preds.size(1)
108 |         # for i in range(1, num_classes):
109 |         #     score = cls_preds[:, i]
110 |         for i, cls_pred in enumerate(cls_preds.split(1, dim=1)[1:]):
111 |             score = cls_pred.squeeze(dim=1)
112 |             mask = (score > score_thresh).nonzero().squeeze(dim=1)
113 |             if mask.sum() == torch.tensor(data=0, device=device):
114 |                 continue
115 |             box = box_preds[mask]
116 |             score = score[mask]
117 | 
118 |             # keep = box_nms(box, score, nms_thresh)
119 |             keep = nms(box, score, nms_thresh)
120 |             boxes.append(box[keep])
121 |             # labels.append(torch.LongTensor(len(box[keep])).fill_(i+1))
122 |             labels.append(torch.full_like(score[keep], fill_value=i+1, dtype=torch.long, device=device))
123 |             # labels.append(torch.full(size=torch.Size([score[keep].size()[0]]), fill_value=i+1, dtype=torch.long,
124 |             #                          device=device))
125 | 
126 |             scores.append(score[keep])
127 | 
128 |         if not boxes:
129 |             return torch.tensor([]), torch.tensor([]), torch.tensor([])
130 | 
131 |         boxes = torch.cat(boxes, 0)
132 |         labels = torch.cat(labels, 0)
133 |         scores = torch.cat(scores, 0)
134 |         return boxes, labels, scores
135 | 
136 | 
137 | def change_box_order(boxes, order):
138 |     """Change box order between (xmin,ymin,xmax,ymax) and (xcenter,ycenter,width,height).
139 | 
140 |     Args:
141 |       boxes: (tensor) bounding boxes, sized [N,4].
142 |       order: (str) either 'xyxy2xywh' or 'xywh2xyxy'.
143 | 
144 |     Returns:
145 |       (tensor) converted bounding boxes, sized [N,4].
146 |     """
147 |     assert order in ['xyxy2xywh','xywh2xyxy']
148 |     a = boxes[:,:2]
149 |     b = boxes[:,2:]
150 |     if order == 'xyxy2xywh':
151 |         return torch.cat([(a+b)/2,b-a], 1)
152 |     return torch.cat([a-b/2,a+b/2], 1)
153 | 
154 | 
155 | def box_clamp(boxes, xmin, ymin, xmax, ymax):
156 |     """Clamp boxes.
157 | 
158 |     Args:
159 |       boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [N,4].
160 |       xmin: (number) min value of x.
161 |       ymin: (number) min value of y.
162 |       xmax: (number) max value of x.
163 |       ymax: (number) max value of y.
164 | 
165 |     Returns:
166 |       (tensor) clamped boxes.
167 |     """
168 |     boxes[:,0].clamp_(min=xmin, max=xmax)
169 |     boxes[:,1].clamp_(min=ymin, max=ymax)
170 |     boxes[:,2].clamp_(min=xmin, max=xmax)
171 |     boxes[:,3].clamp_(min=ymin, max=ymax)
172 |     return boxes
173 | 
174 | 
175 | def box_iou(box1, box2):
176 |     """Compute the intersection over union of two set of boxes.
177 | 
178 |     The box order must be (xmin, ymin, xmax, ymax).
179 | 
180 |     Args:
181 |       box1: (tensor) bounding boxes, sized [N,4].
182 |       box2: (tensor) bounding boxes, sized [M,4].
183 | 
184 |     Return:
185 |       (tensor) iou, sized [N,M].
186 | 
187 |     Reference:
188 |       https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py
189 |     """
190 |     # N = box1.size(0)
191 |     # M = box2.size(0)
192 | 
193 |     lt = torch.max(box1[:,None,:2], box2[:,:2])  # [N,M,2]
194 |     rb = torch.min(box1[:,None,2:], box2[:,2:])  # [N,M,2]
195 | 
196 |     wh = (rb-lt).clamp(min=0)      # [N,M,2]
197 |     inter = wh[:,:,0] * wh[:,:,1]  # [N,M]
198 | 
199 |     area1 = (box1[:,2]-box1[:,0]) * (box1[:,3]-box1[:,1])  # [N,]
200 |     area2 = (box2[:,2]-box2[:,0]) * (box2[:,3]-box2[:,1])  # [M,]
201 |     iou = inter / (area1[:,None] + area2 - inter)
202 |     return iou
203 | 


--------------------------------------------------------------------------------
/the_hero_rises/SSD/multibox_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | class SSDLoss(nn.Module):
 6 |     def __init__(self, num_classes):
 7 |         super(SSDLoss, self).__init__()
 8 |         self.num_classes = num_classes
 9 | 
10 |     def _hard_negative_mining(self, cls_loss, pos):
11 |         """Return negative indices that is 3x the number as postive indices.
12 |         Args:
13 |           cls_loss: (tensor) cross entroy loss between cls_preds and cls_targets, sized [N,#anchors].
14 |           pos: (tensor) positive class mask, sized [N,#anchors].
15 |         Return:
16 |           (tensor) negative indices, sized [N,#anchors].
17 |         """
18 |         cls_loss = cls_loss * (pos.float() - 1)
19 | 
20 |         _, idx = cls_loss.sort(1)  # sort by negative losses
21 |         _, rank = idx.sort(1)      # [N,#anchors]
22 | 
23 |         num_neg = (3*pos.sum(1)).clamp(min=3)  # [N,]
24 |         neg = rank < num_neg[:, None]   # [N,#anchors]
25 |         return neg
26 | 
27 |     def forward(self, loc_preds, loc_targets, cls_preds, cls_targets):
28 |         """Compute loss between (loc_preds, loc_targets) and (cls_preds, cls_targets).
29 |         Args:
30 |           loc_preds: (tensor) predicted locations, sized [N, #anchors, 4].
31 |           loc_targets: (tensor) encoded target locations, sized [N, #anchors, 4].
32 |           cls_preds: (tensor) predicted class confidences, sized [N, #anchors, #classes].
33 |           cls_targets: (tensor) encoded target labels, sized [N, #anchors].
34 |         loss:
35 |           (tensor) loss = SmoothL1Loss(loc_preds, loc_targets) + CrossEntropyLoss(cls_preds, cls_targets).
36 |         """
37 |         pos = cls_targets > 0  # [N,#anchors]
38 |         batch_size = pos.size(0)
39 |         num_pos = pos.sum().item()
40 | 
41 |         # loc_loss = SmoothL1Loss(pos_loc_preds, pos_loc_targets)
42 |         mask = pos.unsqueeze(2).expand_as(loc_preds)       # [N,#anchors,4]
43 |         loc_loss = F.smooth_l1_loss(loc_preds[mask], loc_targets[mask], reduction='sum')
44 | 
45 |         # cls_loss = CrossEntropyLoss(cls_preds, cls_targets)
46 |         cls_loss = F.cross_entropy(cls_preds.view(-1, self.num_classes),
47 |                                    cls_targets.view(-1), reduction='none')  # [N*#anchors,]
48 |         cls_loss = cls_loss.view(batch_size, -1)
49 |         cls_loss[cls_targets < 0] = 0  # set ignored loss to 0
50 |         neg = self._hard_negative_mining(cls_loss, pos)  # [N,#anchors]
51 |         cls_loss = cls_loss[pos | neg].sum()
52 | 
53 |         return {'loc_loss': loc_loss/num_pos, 'cls_loss': cls_loss/num_pos}
54 | 


--------------------------------------------------------------------------------
/the_hero_rises/SSD/ssd_model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | SSD model on top of TorchVision feature extractor.
  3 | The constant values are suitable to a 512X512 image. Automatic change to a different image size
  4 | can be done by runnint the dry_run method.
  5 | 
  6 | requirements: PyTorch and TorchVision
  7 | 
  8 | """
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | 
 13 | import torchvision
 14 | 
 15 | from SSD.box_coder import SSDBoxCoder
 16 | 
 17 | # Aspect ration between current layer and original image size.
 18 | # I.e, how many pixel steps on the original image are equivalent to a single pixel step on the feature map.
 19 | STEPS = (8, 16, 32, 64, 128, 256, 512)
 20 | # Length of the shorter anchor rectangle face sizes, for each feature map.
 21 | BOX_SIZES = (35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.6)
 22 | # Aspect ratio of the rectanglar SSD anchors, besides 1:1
 23 | ASPECT_RATIOS = ((2,), (2, 3), (2, 3), (2, 3), (2, 3), (2,), (2,))
 24 | # feature maps sizes.
 25 | FM_SIZES = (64, 32, 16, 8, 4, 2, 1)
 26 | # Amount of anchors for each feature map
 27 | NUM_ANCHORS = (4, 6, 6, 6, 6, 4, 4)
 28 | # Amount of each feature map channels, i.e third dimension.
 29 | IN_CHANNELS = (512, 1024, 512, 256, 256, 256, 256)
 30 | 
 31 | 
 32 | class HeadsExtractor(nn.Module):
 33 |     def __init__(self, backbone):
 34 |         super(HeadsExtractor, self).__init__()
 35 |         
 36 |         def split_backbone(net):
 37 |             features_extraction = [x for x in net.children()][:-2]
 38 |             
 39 |             if type(net) == torchvision.models.vgg.VGG:
 40 |                 features_extraction = [*features_extraction[0]]
 41 |                 net_till_conv4_3 = features_extraction[:-8]
 42 |                 rest_of_net = features_extraction[-7:-1]
 43 |             elif type(net) == torchvision.models.resnet.ResNet:
 44 |                 net_till_conv4_3 = features_extraction[:-2]
 45 |                 rest_of_net = features_extraction[-2]
 46 |             else:
 47 |                 raise ValueError('We only support VGG and ResNet backbones')
 48 |             return nn.Sequential(*net_till_conv4_3), nn.Sequential(*rest_of_net)
 49 |         
 50 |         self.till_conv4_3, self.till_conv5_3 = split_backbone(backbone)
 51 |         self.norm4 = L2Norm(512, 20)
 52 |         
 53 |         self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, padding=1, dilation=1)
 54 |         self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1, dilation=1)
 55 |         self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1, dilation=1)
 56 |         
 57 |         self.conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
 58 |         self.conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
 59 |         
 60 |         self.conv8_1 = nn.Conv2d(1024, 256, kernel_size=1)
 61 |         self.conv8_2 = nn.Conv2d(256, 512, kernel_size=3, padding=1, stride=2)
 62 |         
 63 |         self.conv9_1 = nn.Conv2d(512, 128, kernel_size=1)
 64 |         self.conv9_2 = nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=2)
 65 |         
 66 |         self.conv10_1 = nn.Conv2d(256, 128, kernel_size=1)
 67 |         self.conv10_2 = nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=2)
 68 |         
 69 |         self.conv11_1 = nn.Conv2d(256, 128, kernel_size=1)
 70 |         self.conv11_2 = nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=2)
 71 |         
 72 |         self.conv12_1 = nn.Conv2d(256, 128, kernel_size=1)
 73 |         self.conv12_2 = nn.Conv2d(128, 256, kernel_size=4, padding=1)
 74 |     
 75 |     def forward(self, x):
 76 |         hs = []
 77 |         h = self.till_conv4_3(x)
 78 |         hs.append(self.norm4(h))
 79 |         
 80 |         if type(self.till_conv5_3[-1]) != torchvision.models.resnet.Bottleneck:
 81 |             h = F.max_pool2d(h, kernel_size=2, stride=2, ceil_mode=True)
 82 |             h = self.till_conv5_3(h)
 83 |             h = F.max_pool2d(h, kernel_size=3, stride=1, padding=1, ceil_mode=True)
 84 |             
 85 |             h = F.relu(self.conv6(h))
 86 |             h = F.relu(self.conv7(h))
 87 |         else:
 88 |             h = self.till_conv5_3(h)
 89 |         hs.append(h)  # conv7
 90 |         
 91 |         h = F.relu(self.conv8_1(h))
 92 |         h = F.relu(self.conv8_2(h))
 93 |         hs.append(h)  # conv8_2
 94 |         
 95 |         h = F.relu(self.conv9_1(h))
 96 |         h = F.relu(self.conv9_2(h))
 97 |         hs.append(h)  # conv9_2
 98 |         
 99 |         h = F.relu(self.conv10_1(h))
100 |         h = F.relu(self.conv10_2(h))
101 |         hs.append(h)  # conv10_2
102 |         
103 |         h = F.relu(self.conv11_1(h))
104 |         h = F.relu(self.conv11_2(h))
105 |         hs.append(h)  # conv11_2
106 |         
107 |         h = F.relu(self.conv12_1(h))
108 |         h = F.relu(self.conv12_2(h))
109 |         hs.append(h)  # conv12_2
110 |         return hs
111 |     
112 |     
113 | class SSD(nn.Module):
114 |     def __init__(self, backbone, num_classes, loss_function,
115 |                  num_anchors=NUM_ANCHORS,
116 |                  in_channels=IN_CHANNELS,
117 |                  steps=STEPS,
118 |                  box_sizes=BOX_SIZES,
119 |                  aspect_ratios=ASPECT_RATIOS,
120 |                  fm_sizes=FM_SIZES,
121 |                  heads_extractor_class=HeadsExtractor):
122 |         super(SSD, self).__init__()
123 |         self.num_classes = num_classes
124 |         self.num_anchors = num_anchors
125 |         self.in_channels = in_channels
126 |         self.steps = steps
127 |         self.box_sizes = box_sizes
128 |         self.aspect_ratios = aspect_ratios
129 |         self.fm_sizes = fm_sizes
130 | 
131 |         self.extractor = heads_extractor_class(backbone)
132 |         self.criterion = loss_function
133 |         self.box_coder = SSDBoxCoder(self.steps, self.box_sizes, self.aspect_ratios, self.fm_sizes)
134 | 
135 |         self._create_heads()
136 | 
137 |     def _create_heads(self):
138 |         self.loc_layers = nn.ModuleList()
139 |         self.cls_layers = nn.ModuleList()
140 |         for i in range(len(self.in_channels)):
141 |             self.loc_layers += [nn.Conv2d(self.in_channels[i], self.num_anchors[i] * 4, kernel_size=3, padding=1)]
142 |             self.cls_layers += [nn.Conv2d(self.in_channels[i], self.num_anchors[i] * self.num_classes, kernel_size=3,
143 |                                           padding=1)]
144 | 
145 |     def change_input_size(self, x):
146 |         heads = self.extractor(x)
147 |         self.fm_sizes = tuple([head.shape[-1] for head in heads])
148 |         image_size = x.shape[-1]
149 |         self.steps = tuple([image_size//fm for fm in self.fm_sizes])
150 |         self.box_coder = SSDBoxCoder(self.steps, self.box_sizes, self.aspect_ratios, self.fm_sizes)
151 | 
152 |     def forward(self, images, targets=None):
153 |         if self.training and targets is None:
154 |             raise ValueError("In training mode, targets should be passed")
155 |         loc_preds = []
156 |         cls_preds = []
157 |         input_images = torch.stack(images) if isinstance(images, list) else images
158 |         extracted_batch = self.extractor(input_images)
159 |         for i, x in enumerate(extracted_batch):
160 |             loc_pred = self.loc_layers[i](x)
161 |             loc_pred = loc_pred.permute(0, 2, 3, 1).contiguous()
162 |             loc_preds.append(loc_pred.view(loc_pred.size(0), -1, 4))
163 | 
164 |             cls_pred = self.cls_layers[i](x)
165 |             cls_pred = cls_pred.permute(0, 2, 3, 1).contiguous()
166 |             cls_preds.append(cls_pred.view(cls_pred.size(0), -1, self.num_classes))
167 | 
168 |         loc_preds = torch.cat(loc_preds, 1)
169 |         cls_preds = torch.cat(cls_preds, 1)
170 | 
171 |         if self.training:
172 |             encoded_targets = [self.box_coder.encode(target['boxes'], target['labels']) for target in targets]
173 |             loc_targets = torch.stack([encoded_target[0] for encoded_target in encoded_targets])
174 |             cls_targets = torch.stack([encoded_target[1] for encoded_target in encoded_targets])
175 |             losses = self.criterion(loc_preds, loc_targets, cls_preds, cls_targets)
176 |             return losses
177 | 
178 |         detections = []
179 |         for batch, (loc, cls) in enumerate(zip(loc_preds.split(split_size=1, dim=0),
180 |                                                cls_preds.split(split_size=1, dim=0))):
181 |             boxes, labels, scores = self.box_coder.decode(loc.squeeze(), F.softmax(cls.squeeze(), dim=1))
182 |             detections.append({'boxes': boxes, 'labels': labels, 'scores': scores})
183 | 
184 |         return detections
185 | 
186 | 
187 | class L2Norm(nn.Module):
188 |     """L2Norm layer across all channels."""
189 | 
190 |     def __init__(self, in_features, scale):
191 |         super(L2Norm, self).__init__()
192 |         self.weight = nn.Parameter(torch.Tensor(in_features))
193 |         self.reset_parameters(scale)
194 | 
195 |     def reset_parameters(self, scale):
196 |         nn.init.constant_(self.weight, scale)
197 | 
198 |     def forward(self, x):
199 |         x = F.normalize(x, dim=1)
200 |         scale = self.weight[None, :, None, None]
201 |         return scale * x
202 | 
203 | 
204 | 
205 | # Based on https://github.com/kuangliu/torchcv/tree/master/examples/ssd
206 | 


--------------------------------------------------------------------------------
/the_hero_rises/engines.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import copy
 3 | import torch
 4 | 
 5 | from ignite.engine import Engine
 6 | from torchvision_references import utils
 7 | 
 8 | 
 9 | def create_trainer(model, device):
10 |     def update_model(engine, batch):
11 |         images, targets = copy.deepcopy(batch)
12 |         images_model, targets_model = prepare_batch(batch, device=device)
13 | 
14 |         loss_dict = model(images_model, targets_model)
15 |         losses = sum(loss for loss in loss_dict.values())
16 | 
17 |         # reduce losses over all GPUs for logging purposes
18 |         loss_dict_reduced = utils.reduce_dict(loss_dict)
19 |         losses_reduced = sum(loss for loss in loss_dict_reduced.values())
20 | 
21 |         loss_value = losses_reduced.item()
22 | 
23 |         engine.state.optimizer.zero_grad()
24 |         if not math.isfinite(loss_value):
25 |             print("Loss is {}, resetting loss and skipping training iteration".format(loss_value))
26 |             print('Loss values were: ', loss_dict_reduced)
27 |             print('Input labels were: ', [target['labels'] for target in targets])
28 |             print('Input boxes were: ', [target['boxes'] for target in targets])
29 |             loss_dict_reduced = {k: torch.tensor(0) for k, v in loss_dict_reduced.items()}
30 |         else:
31 |             losses.backward()
32 |             engine.state.optimizer.step()
33 | 
34 |         if engine.state.warmup_scheduler is not None:
35 |             engine.state.warmup_scheduler.step()
36 | 
37 |         images_model = targets_model = None
38 | 
39 |         return images, targets, loss_dict_reduced
40 |     return Engine(update_model)
41 | 
42 | 
43 | def create_evaluator(model, device):
44 |     def update_model(engine, batch):
45 |         images, targets = prepare_batch(batch, device=device)
46 |         images_model = copy.deepcopy(images)
47 | 
48 |         torch.cuda.synchronize()
49 |         with torch.no_grad():
50 |             outputs = model(images_model)
51 | 
52 |         outputs = [{k: v.to(device) for k, v in t.items()} for t in outputs]
53 | 
54 |         res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
55 |         engine.state.coco_evaluator.update(res)
56 | 
57 |         images_model = outputs = None
58 | 
59 |         return images, targets, res
60 |     return Engine(update_model)
61 | 
62 | 
63 | def prepare_batch(batch, device=None):
64 |     images, targets = batch
65 |     images = list(image.to(device, non_blocking=True) for image in images)
66 |     targets = [{k: v.to(device, non_blocking=True) for k, v in t.items()} for t in targets]
67 |     return images, targets
68 | 


--------------------------------------------------------------------------------
/the_hero_rises/inference_with_model.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import glob
  3 | import numpy as np
  4 | 
  5 | import torch
  6 | from torch.utils.tensorboard import SummaryWriter
  7 | 
  8 | from argparse import ArgumentParser
  9 | from pathlib2 import Path
 10 | 
 11 | from utilities import get_iou_types, draw_boxes, get_model_instance_segmentation, CocoLikeAnnotations, get_backbone
 12 | from torchvision_references import utils
 13 | from torchvision.transforms import functional as F
 14 | 
 15 | from PIL import Image
 16 | from transforms import get_transform
 17 | 
 18 | from SSD.ssd_model import SSD
 19 | from SSD.multibox_loss import SSDLoss
 20 | 
 21 | from trains import Task
 22 | task = Task.init(project_name='Object Detection with TRAINS, Ignite and TensorBoard',
 23 |                  task_name='Inference with trained SSD model')
 24 | 
 25 | 
 26 | def rescale_box(box, image_size, orig_height, orig_width):
 27 |     rescale_height = float(orig_height) / image_size
 28 |     rescale_width = float(orig_width) / image_size
 29 |     box[:2] *= rescale_width
 30 |     box[2:] *= rescale_height
 31 |     return box
 32 | 
 33 | 
 34 | def run(task_args):
 35 |     writer = SummaryWriter(log_dir=task_args.log_dir)
 36 |     input_checkpoint = torch.load(task_args.input_checkpoint)
 37 |     labels_enum = input_checkpoint.get('labels_enumeration')
 38 |     model_configuration = input_checkpoint.get('configuration')
 39 |     model_weights = input_checkpoint.get('model')
 40 |     image_size = model_configuration.get('image_size')
 41 | 
 42 |     # Set the training device to GPU if available - if not set it to CPU
 43 |     device = torch.cuda.current_device() if torch.cuda.is_available() else torch.device('cpu')
 44 |     torch.backends.cudnn.benchmark = True if torch.cuda.is_available() else False  # optimization for fixed input size
 45 | 
 46 |     # Get the relevant model based in task arguments
 47 |     num_classes = model_configuration.get('num_classes')
 48 |     if model_configuration.get('model_type') == 'maskrcnn':
 49 |         model = get_model_instance_segmentation(num_classes, model_configuration.get('mask_predictor_hidden_layer'))
 50 |     elif model_configuration.get('model_type') == 'ssd':
 51 |         backbone = get_backbone(model_configuration.get('ssd_backbone'))
 52 |         model = SSD(backbone=backbone, num_classes=num_classes, loss_function=SSDLoss(num_classes))
 53 |         model.dry_run(torch.rand(size=(1, 3, model_configuration.get('image_size'), model_configuration.get('image_size')))*255)
 54 |     else:
 55 |         raise ValueError('Only "maskrcnn" and "ssd" are supported as model type')
 56 | 
 57 |     # if there is more than one GPU, parallelize the model
 58 |     if torch.cuda.device_count() > 1:
 59 |         print("{} GPUs were detected - we will use all of them".format(torch.cuda.device_count()))
 60 |         model = torch.nn.DataParallel(model)
 61 | 
 62 |     # copy the model to each device
 63 |     model.to(device)
 64 | 
 65 |     # Define train and test datasets
 66 |     iou_types = get_iou_types(model)
 67 |     use_mask = True if "segm" in iou_types else False
 68 | 
 69 |     # Load pretrained model weights
 70 |     model.load_state_dict(model_weights)
 71 | 
 72 |     # set the model to inference mode
 73 |     model.eval()
 74 | 
 75 |     images_paths = []
 76 |     for file_type in ('*.png', '*.jpg', '*.jpeg'):
 77 |         images_paths.extend(glob.glob(os.path.join(task_args.input_dataset_root, file_type)))
 78 | 
 79 |     transforms = get_transform(train=False, image_size=image_size)
 80 | 
 81 |     path_to_json = os.path.join(task_args.output_dir, "inference_results.json")
 82 |     coco_like_anns = CocoLikeAnnotations()
 83 |     batch_images = []
 84 |     batch_paths = []
 85 |     batch_shapes = []
 86 | 
 87 |     for i, image_path in enumerate(images_paths):
 88 |         img = Image.open(image_path).convert('RGB')
 89 |         batch_shapes.append({'height': img.height, 'width': img.width})
 90 |         img, __ = transforms(img)
 91 |         batch_images.append(img)
 92 |         batch_paths.append(image_path)
 93 |         if len(batch_images) < task_args.batch_size:
 94 |             continue
 95 | 
 96 |         input_images = torch.stack(batch_images)
 97 | 
 98 |         with torch.no_grad():
 99 |             torch_out = model(input_images.to(device))
100 | 
101 |         for img_num, image in enumerate(input_images):
102 |             valid_detections = torch_out[img_num].get('scores') >= args.detection_thresh
103 |             img_boxes = torch_out[img_num].get('boxes')[valid_detections].cpu().numpy()
104 |             img_labels_ids = torch_out[img_num].get('labels')[valid_detections].cpu().numpy()
105 |             img_labels = [labels_enum[label]['name'] for label in img_labels_ids]
106 |             image_id = (i + 1 - task_args.batch_size + img_num)
107 |             orig_height = batch_shapes[img_num].get('height')
108 |             orig_width = batch_shapes[img_num].get('width')
109 | 
110 |             coco_like_anns.update_images(file_name=Path(batch_paths[img_num]).name,
111 |                                          height=orig_height, width=orig_width,
112 |                                          id=image_id)
113 | 
114 |             for box, label, label_id in zip(img_boxes, img_labels, img_labels_ids):
115 |                 orig_box = rescale_box(image_size=image_size, orig_height=orig_height, orig_width=orig_width, box=box.copy())
116 |                 coco_like_anns.update_annotations(box=orig_box, label_id=label_id,
117 |                                                   image_id=image_id)
118 | 
119 |             if ((i+1)/task_args.batch_size) % task_args.log_interval == 0:
120 |                 print('Batch {}: Saving detections of file {} to {}'.format(int((i+1)/task_args.batch_size),
121 |                                                                             Path(batch_paths[img_num]).name,
122 |                                                                             path_to_json))
123 | 
124 |             if ((i+1)/task_args.batch_size) % task_args.debug_images_interval == 0:
125 |                 debug_image = draw_boxes(np.array(F.to_pil_image(image.cpu())), img_boxes, img_labels, color=(0, 150, 0))
126 |                 writer.add_image("inference/image_{}".format(img_num), debug_image, ((i+1)/task_args.batch_size),
127 |                                  dataformats='HWC')
128 | 
129 |         batch_images = []
130 |         batch_paths = []
131 | 
132 |     coco_like_anns.dump_to_json(path_to_json=path_to_json)
133 | 
134 | 
135 | if __name__ == "__main__":
136 |     parser = ArgumentParser()
137 |     parser.add_argument('--batch_size', type=int, default=4,
138 |                         help='input batch size for training and validation (default: 4)')
139 |     parser.add_argument('--detection_thresh', type=float, default=0.4,
140 |                         help='Inference confidence threshold')
141 |     parser.add_argument('--log_interval', type=int, default=100,
142 |                         help='how many batches to wait before logging training status')
143 |     parser.add_argument('--debug_images_interval', type=int, default=500,
144 |                         help='how many batches to wait before logging debug images')
145 |     parser.add_argument('--input_dataset_root', type=str,
146 |                         default='/media/dan/bigdata/datasets/coco/2017/val2017',
147 |                         help='annotation file of test dataset')
148 |     parser.add_argument('--input_checkpoint', type=str, default='/tmp/checkpoints/model_epoch_10.pth',
149 |                         help='Checkpoint to use for inference')
150 |     parser.add_argument("--output_dir", type=str, default="/tmp/inference_results",
151 |                         help="output directory for saving models checkpoints")
152 |     parser.add_argument("--log_dir", type=str, default="/tmp/tensorboard_logs",
153 |                         help="log directory for Tensorboard log output")
154 |     args = parser.parse_args()
155 | 
156 |     if not os.path.exists(args.output_dir):
157 |         utils.mkdir(args.output_dir)
158 |     if not os.path.exists(args.log_dir):
159 |         utils.mkdir(args.log_dir)
160 | 
161 |     run(args)
162 | 


--------------------------------------------------------------------------------
/the_hero_rises/requirements.txt:
--------------------------------------------------------------------------------
 1 | Pillow == 10.2.0
 2 | attrs == 19.3.0
 3 | numpy == 1.22.0
 4 | opencv_python == 4.2.0.32
 5 | pathlib2 == 2.3.5
 6 | pycocotools == 2.0.0
 7 | pytorch_ignite == 0.2.1
 8 | torch == 1.3.1
 9 | torchvision == 0.4.2
10 | trains == 0.13.1
11 | tensorboard==2.1.0
12 | 


--------------------------------------------------------------------------------
/the_hero_rises/torchvision_references/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clearml/clearml-blogs/c0a10f52de341e7feedc1bd718ff0539f98fdced/the_hero_rises/torchvision_references/__init__.py


--------------------------------------------------------------------------------
/the_hero_rises/torchvision_references/coco_eval.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | import numpy as np
  4 | import copy
  5 | import torch
  6 | import torch._six
  7 | 
  8 | from pycocotools.cocoeval import COCOeval
  9 | from pycocotools.coco import COCO
 10 | import pycocotools.mask as mask_util
 11 | 
 12 | from collections import defaultdict
 13 | 
 14 | from torchvision_references import utils
 15 | 
 16 | 
 17 | class CocoEvaluator(object):
 18 |     def __init__(self, coco_gt, iou_types):
 19 |         assert isinstance(iou_types, (list, tuple))
 20 |         coco_gt = copy.deepcopy(coco_gt)
 21 |         self.coco_gt = coco_gt
 22 | 
 23 |         self.iou_types = iou_types
 24 |         self.coco_eval = {}
 25 |         for iou_type in iou_types:
 26 |             self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)
 27 | 
 28 |         self.img_ids = []
 29 |         self.eval_imgs = {k: [] for k in iou_types}
 30 | 
 31 |     def update(self, predictions):
 32 |         img_ids = list(np.unique(list(predictions.keys())))
 33 |         self.img_ids.extend(img_ids)
 34 | 
 35 |         for iou_type in self.iou_types:
 36 |             results = self.prepare(predictions, iou_type)
 37 |             coco_dt = loadRes(self.coco_gt, results) if results else COCO()
 38 |             coco_eval = self.coco_eval[iou_type]
 39 | 
 40 |             coco_eval.cocoDt = coco_dt
 41 |             coco_eval.params.imgIds = list(img_ids)
 42 |             img_ids, eval_imgs = evaluate(coco_eval)
 43 | 
 44 |             self.eval_imgs[iou_type].append(eval_imgs)
 45 | 
 46 |     def synchronize_between_processes(self):
 47 |         for iou_type in self.iou_types:
 48 |             self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
 49 |             create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])
 50 | 
 51 |     def accumulate(self):
 52 |         for coco_eval in self.coco_eval.values():
 53 |             coco_eval.accumulate()
 54 | 
 55 |     def summarize(self):
 56 |         for iou_type, coco_eval in self.coco_eval.items():
 57 |             print("IoU metric: {}".format(iou_type))
 58 |             coco_eval.summarize()
 59 | 
 60 |     def prepare(self, predictions, iou_type):
 61 |         if iou_type == "bbox":
 62 |             return self.prepare_for_coco_detection(predictions)
 63 |         elif iou_type == "segm":
 64 |             return self.prepare_for_coco_segmentation(predictions)
 65 |         elif iou_type == "keypoints":
 66 |             return self.prepare_for_coco_keypoint(predictions)
 67 |         else:
 68 |             raise ValueError("Unknown iou type {}".format(iou_type))
 69 | 
 70 |     def prepare_for_coco_detection(self, predictions):
 71 |         coco_results = []
 72 |         for original_id, prediction in predictions.items():
 73 |             if len(prediction) == 0:
 74 |                 continue
 75 | 
 76 |             boxes = prediction["boxes"]
 77 |             boxes = convert_to_xywh(boxes).tolist()
 78 |             scores = prediction["scores"].tolist()
 79 |             labels = prediction["labels"].tolist()
 80 | 
 81 |             coco_results.extend(
 82 |                 [
 83 |                     {
 84 |                         "image_id": original_id,
 85 |                         "category_id": labels[k],
 86 |                         "bbox": box,
 87 |                         "score": scores[k],
 88 |                     }
 89 |                     for k, box in enumerate(boxes)
 90 |                 ]
 91 |             )
 92 |         return coco_results
 93 | 
 94 |     def prepare_for_coco_segmentation(self, predictions):
 95 |         coco_results = []
 96 |         for original_id, prediction in predictions.items():
 97 |             if len(prediction) == 0:
 98 |                 continue
 99 | 
100 |             masks = prediction["masks"]
101 |             masks = (masks > 0.5).type(torch.uint8)
102 | 
103 |             scores = prediction["scores"].tolist()
104 |             labels = prediction["labels"].tolist()
105 | 
106 |             rles = [
107 |                 mask_util.encode(np.array(mask.cpu()[0, :, :, np.newaxis], order="F"))[0]
108 |                 for mask in masks
109 |             ]
110 |             for rle in rles:
111 |                 rle["counts"] = rle["counts"].decode("utf-8")
112 | 
113 |             coco_results.extend(
114 |                 [
115 |                     {
116 |                         "image_id": original_id,
117 |                         "category_id": labels[k],
118 |                         "segmentation": rle,
119 |                         "score": scores[k],
120 |                     }
121 |                     for k, rle in enumerate(rles)
122 |                 ]
123 |             )
124 |         return coco_results
125 | 
126 |     def prepare_for_coco_keypoint(self, predictions):
127 |         coco_results = []
128 |         for original_id, prediction in predictions.items():
129 |             if len(prediction) == 0:
130 |                 continue
131 | 
132 |             boxes = prediction["boxes"]
133 |             boxes = convert_to_xywh(boxes).tolist()
134 |             scores = prediction["scores"].tolist()
135 |             labels = prediction["labels"].tolist()
136 |             keypoints = prediction["keypoints"]
137 |             keypoints = keypoints.flatten(start_dim=1).tolist()
138 | 
139 |             coco_results.extend(
140 |                 [
141 |                     {
142 |                         "image_id": original_id,
143 |                         "category_id": labels[k],
144 |                         'keypoints': keypoint,
145 |                         "score": scores[k],
146 |                     }
147 |                     for k, keypoint in enumerate(keypoints)
148 |                 ]
149 |             )
150 |         return coco_results
151 | 
152 | 
153 | def convert_to_xywh(boxes):
154 |     xmin, ymin, xmax, ymax = boxes.unbind(1)
155 |     return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
156 | 
157 | 
158 | def merge(img_ids, eval_imgs):
159 |     all_img_ids = utils.all_gather(img_ids)
160 |     all_eval_imgs = utils.all_gather(eval_imgs)
161 | 
162 |     merged_img_ids = []
163 |     for p in all_img_ids:
164 |         merged_img_ids.extend(p)
165 | 
166 |     merged_eval_imgs = []
167 |     for p in all_eval_imgs:
168 |         merged_eval_imgs.append(p)
169 | 
170 |     merged_img_ids = np.array(merged_img_ids)
171 |     merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)
172 | 
173 |     # keep only unique (and in sorted order) images
174 |     merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
175 |     merged_eval_imgs = merged_eval_imgs[..., idx]
176 | 
177 |     return merged_img_ids, merged_eval_imgs
178 | 
179 | 
180 | def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
181 |     img_ids, eval_imgs = merge(img_ids, eval_imgs)
182 |     img_ids = list(img_ids)
183 |     eval_imgs = list(eval_imgs.flatten())
184 | 
185 |     coco_eval.evalImgs = eval_imgs
186 |     coco_eval.params.imgIds = img_ids
187 |     coco_eval._paramsEval = copy.deepcopy(coco_eval.params)
188 | 
189 | 
190 | #################################################################
191 | # From pycocotools, just removed the prints and fixed
192 | # a Python3 bug about unicode not defined
193 | #################################################################
194 | 
195 | # Ideally, pycocotools wouldn't have hard-coded prints
196 | # so that we could avoid copy-pasting those two functions
197 | 
198 | def createIndex(self):
199 |     # create index
200 |     # print('creating index...')
201 |     anns, cats, imgs = {}, {}, {}
202 |     imgToAnns, catToImgs = defaultdict(list), defaultdict(list)
203 |     if 'annotations' in self.dataset:
204 |         for ann in self.dataset['annotations']:
205 |             imgToAnns[ann['image_id']].append(ann)
206 |             anns[ann['id']] = ann
207 | 
208 |     if 'images' in self.dataset:
209 |         for img in self.dataset['images']:
210 |             imgs[img['id']] = img
211 | 
212 |     if 'categories' in self.dataset:
213 |         for cat in self.dataset['categories']:
214 |             cats[cat['id']] = cat
215 | 
216 |     if 'annotations' in self.dataset and 'categories' in self.dataset:
217 |         for ann in self.dataset['annotations']:
218 |             catToImgs[ann['category_id']].append(ann['image_id'])
219 | 
220 |     # print('index created!')
221 | 
222 |     # create class members
223 |     self.anns = anns
224 |     self.imgToAnns = imgToAnns
225 |     self.catToImgs = catToImgs
226 |     self.imgs = imgs
227 |     self.cats = cats
228 | 
229 | 
230 | maskUtils = mask_util
231 | 
232 | 
233 | def loadRes(self, resFile):
234 |     """
235 |     Load result file and return a result api object.
236 |     :param   resFile (str)     : file name of result file
237 |     :return: res (obj)         : result api object
238 |     """
239 |     res = COCO()
240 |     res.dataset['images'] = [img for img in self.dataset['images']]
241 | 
242 |     # print('Loading and preparing results...')
243 |     # tic = time.time()
244 |     if isinstance(resFile, torch._six.string_classes):
245 |         anns = json.load(open(resFile))
246 |     elif type(resFile) == np.ndarray:
247 |         anns = self.loadNumpyAnnotations(resFile)
248 |     else:
249 |         anns = resFile
250 |     assert type(anns) == list, 'results in not an array of objects'
251 |     annsImgIds = [ann['image_id'] for ann in anns]
252 |     assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
253 |         'Results do not correspond to current coco set'
254 |     if 'caption' in anns[0]:
255 |         imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
256 |         res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
257 |         for id, ann in enumerate(anns):
258 |             ann['id'] = id + 1
259 |     elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
260 |         res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
261 |         for id, ann in enumerate(anns):
262 |             bb = ann['bbox']
263 |             x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
264 |             if 'segmentation' not in ann:
265 |                 ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
266 |             ann['area'] = bb[2] * bb[3]
267 |             ann['id'] = id + 1
268 |             ann['iscrowd'] = 0
269 |     elif 'segmentation' in anns[0]:
270 |         res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
271 |         for id, ann in enumerate(anns):
272 |             # now only support compressed RLE format as segmentation results
273 |             ann['area'] = maskUtils.area(ann['segmentation'])
274 |             if 'bbox' not in ann:
275 |                 ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
276 |             ann['id'] = id + 1
277 |             ann['iscrowd'] = 0
278 |     elif 'keypoints' in anns[0]:
279 |         res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
280 |         for id, ann in enumerate(anns):
281 |             s = ann['keypoints']
282 |             x = s[0::3]
283 |             y = s[1::3]
284 |             x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y)
285 |             ann['area'] = (x2 - x1) * (y2 - y1)
286 |             ann['id'] = id + 1
287 |             ann['bbox'] = [x1, y1, x2 - x1, y2 - y1]
288 |     # print('DONE (t={:0.2f}s)'.format(time.time()- tic))
289 | 
290 |     res.dataset['annotations'] = anns
291 |     createIndex(res)
292 |     return res
293 | 
294 | 
295 | def evaluate(self):
296 |     '''
297 |     Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
298 |     :return: None
299 |     '''
300 |     # tic = time.time()
301 |     # print('Running per image evaluation...')
302 |     p = self.params
303 |     # add backward compatibility if useSegm is specified in params
304 |     if p.useSegm is not None:
305 |         p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
306 |         print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
307 |     # print('Evaluate annotation type *{}*'.format(p.iouType))
308 |     p.imgIds = list(np.unique(p.imgIds))
309 |     if p.useCats:
310 |         p.catIds = list(np.unique(p.catIds))
311 |     p.maxDets = sorted(p.maxDets)
312 |     self.params = p
313 | 
314 |     self._prepare()
315 |     # loop through images, area range, max detection number
316 |     catIds = p.catIds if p.useCats else [-1]
317 | 
318 |     if p.iouType == 'segm' or p.iouType == 'bbox':
319 |         computeIoU = self.computeIoU
320 |     elif p.iouType == 'keypoints':
321 |         computeIoU = self.computeOks
322 |     self.ious = {
323 |         (imgId, catId): computeIoU(imgId, catId)
324 |         for imgId in p.imgIds
325 |         for catId in catIds}
326 | 
327 |     evaluateImg = self.evaluateImg
328 |     maxDet = p.maxDets[-1]
329 |     evalImgs = [
330 |         evaluateImg(imgId, catId, areaRng, maxDet)
331 |         for catId in catIds
332 |         for areaRng in p.areaRng
333 |         for imgId in p.imgIds
334 |     ]
335 |     # this is NOT in the pycocotools code, but could be done outside
336 |     evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))
337 |     self._paramsEval = copy.deepcopy(self.params)
338 |     # toc = time.time()
339 |     # print('DONE (t={:0.2f}s).'.format(toc-tic))
340 |     return p.imgIds, evalImgs
341 | 
342 | #################################################################
343 | # end of straight copy from pycocotools, just removing the prints
344 | #################################################################
345 | 


--------------------------------------------------------------------------------
/the_hero_rises/torchvision_references/coco_utils.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import os
  3 | 
  4 | import torch
  5 | import torch.utils.data
  6 | import torchvision
  7 | 
  8 | from pycocotools import mask as coco_mask
  9 | from pycocotools.coco import COCO
 10 | 
 11 | import transforms as T
 12 | 
 13 | 
 14 | class FilterAndRemapCocoCategories(object):
 15 |     def __init__(self, categories, remap=True):
 16 |         self.categories = categories
 17 |         self.remap = remap
 18 | 
 19 |     def __call__(self, image, target):
 20 |         anno = target["annotations"]
 21 |         anno = [obj for obj in anno if obj["category_id"] in self.categories]
 22 |         if not self.remap:
 23 |             target["annotations"] = anno
 24 |             return image, target
 25 |         anno = copy.deepcopy(anno)
 26 |         for obj in anno:
 27 |             obj["category_id"] = self.categories.index(obj["category_id"])
 28 |         target["annotations"] = anno
 29 |         return image, target
 30 | 
 31 | 
 32 | def convert_coco_poly_to_mask(segmentations, height, width):
 33 |     masks = []
 34 |     for polygons in segmentations:
 35 |         rles = coco_mask.frPyObjects(polygons, height, width)
 36 |         mask = coco_mask.decode(rles)
 37 |         if len(mask.shape) < 3:
 38 |             mask = mask[..., None]
 39 |         mask = torch.as_tensor(mask, dtype=torch.uint8)
 40 |         mask = mask.any(dim=2)
 41 |         masks.append(mask)
 42 |     if masks:
 43 |         masks = torch.stack(masks, dim=0)
 44 |     else:
 45 |         masks = torch.zeros((0, height, width), dtype=torch.uint8)
 46 |     return masks
 47 | 
 48 | 
 49 | class ConvertCocoPolysToMask(object):
 50 |     def __call__(self, image, target):
 51 |         w, h = image.size
 52 | 
 53 |         image_id = target["image_id"]
 54 |         image_id = torch.tensor([image_id])
 55 | 
 56 |         anno = target["annotations"]
 57 | 
 58 |         anno = [obj for obj in anno if obj['iscrowd'] == 0]
 59 | 
 60 |         boxes = [obj["bbox"] for obj in anno]
 61 |         # guard against no boxes via resizing
 62 |         boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
 63 |         boxes[:, 2:] += boxes[:, :2]
 64 |         boxes[:, 0::2].clamp_(min=0, max=w)
 65 |         boxes[:, 1::2].clamp_(min=0, max=h)
 66 | 
 67 |         classes = [obj["category_id"] for obj in anno]
 68 |         classes = torch.tensor(classes, dtype=torch.int64)
 69 | 
 70 |         segmentations = [obj["segmentation"] for obj in anno]
 71 |         masks = convert_coco_poly_to_mask(segmentations, h, w)
 72 | 
 73 |         keypoints = None
 74 |         if anno and "keypoints" in anno[0]:
 75 |             keypoints = [obj["keypoints"] for obj in anno]
 76 |             keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
 77 |             num_keypoints = keypoints.shape[0]
 78 |             if num_keypoints:
 79 |                 keypoints = keypoints.view(num_keypoints, -1, 3)
 80 | 
 81 |         keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
 82 |         boxes = boxes[keep]
 83 |         classes = classes[keep]
 84 |         masks = masks[keep]
 85 |         if keypoints is not None:
 86 |             keypoints = keypoints[keep]
 87 | 
 88 |         target = {}
 89 |         target["boxes"] = boxes
 90 |         target["labels"] = classes
 91 |         target["masks"] = masks
 92 |         target["image_id"] = image_id
 93 |         if keypoints is not None:
 94 |             target["keypoints"] = keypoints
 95 | 
 96 |         # for conversion to coco api
 97 |         area = torch.tensor([obj["area"] for obj in anno])
 98 |         iscrowd = torch.tensor([obj["iscrowd"] for obj in anno])
 99 |         target["area"] = area
100 |         target["iscrowd"] = iscrowd
101 | 
102 |         return image, target
103 | 
104 | 
105 | def _coco_remove_images_without_annotations(dataset, cat_list=None):
106 |     def _has_only_empty_bbox(anno):
107 |         return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
108 | 
109 |     def _count_visible_keypoints(anno):
110 |         return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
111 | 
112 |     min_keypoints_per_image = 10
113 | 
114 |     def _has_valid_annotation(anno):
115 |         # if it's empty, there is no annotation
116 |         if len(anno) == 0:
117 |             return False
118 |         # if all boxes have close to zero area, there is no annotation
119 |         if _has_only_empty_bbox(anno):
120 |             return False
121 |         # keypoints task have a slight different critera for considering
122 |         # if an annotation is valid
123 |         if "keypoints" not in anno[0]:
124 |             return True
125 |         # for keypoint detection tasks, only consider valid images those
126 |         # containing at least min_keypoints_per_image
127 |         if _count_visible_keypoints(anno) >= min_keypoints_per_image:
128 |             return True
129 |         return False
130 | 
131 |     assert isinstance(dataset, torchvision.datasets.CocoDetection)
132 |     ids = []
133 |     for ds_idx, img_id in enumerate(dataset.ids):
134 |         ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None)
135 |         anno = dataset.coco.loadAnns(ann_ids)
136 |         if cat_list:
137 |             anno = [obj for obj in anno if obj["category_id"] in cat_list]
138 |         if _has_valid_annotation(anno):
139 |             ids.append(ds_idx)
140 | 
141 |     dataset = torch.utils.data.Subset(dataset, ids)
142 |     return dataset
143 | 
144 | 
145 | def convert_to_coco_api(ds):
146 |     coco_ds = COCO()
147 |     ann_id = 0
148 |     dataset = {'images': [], 'categories': [], 'annotations': []}
149 |     categories = set()
150 |     for img_idx in range(len(ds)):
151 |         # find better way to get target
152 |         # targets = ds.get_annotations(img_idx)
153 |         img, targets = ds[img_idx]
154 |         image_id = targets["image_id"].item()
155 |         img_dict = {}
156 |         img_dict['id'] = image_id
157 |         img_dict['height'] = img.shape[-2]
158 |         img_dict['width'] = img.shape[-1]
159 |         dataset['images'].append(img_dict)
160 |         bboxes = targets["boxes"]
161 |         bboxes[:, 2:] -= bboxes[:, :2]
162 |         bboxes = bboxes.tolist()
163 |         labels = targets['labels'].tolist()
164 |         areas = targets['area'].tolist()
165 |         iscrowd = targets['iscrowd'].tolist()
166 |         if 'masks' in targets:
167 |             masks = targets['masks']
168 |             # make masks Fortran contiguous for coco_mask
169 |             masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1)
170 |         if 'keypoints' in targets:
171 |             keypoints = targets['keypoints']
172 |             keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist()
173 |         num_objs = len(bboxes)
174 |         for i in range(num_objs):
175 |             ann = {}
176 |             ann['image_id'] = image_id
177 |             ann['bbox'] = bboxes[i]
178 |             ann['category_id'] = labels[i]
179 |             categories.add(labels[i])
180 |             ann['area'] = areas[i]
181 |             ann['iscrowd'] = iscrowd[i]
182 |             ann['id'] = ann_id
183 |             if 'masks' in targets:
184 |                 ann["segmentation"] = coco_mask.encode(masks[i].numpy())
185 |             if 'keypoints' in targets:
186 |                 ann['keypoints'] = keypoints[i]
187 |                 ann['num_keypoints'] = sum(k != 0 for k in keypoints[i][2::3])
188 |             dataset['annotations'].append(ann)
189 |             ann_id += 1
190 |     dataset['categories'] = [{'id': i} for i in sorted(categories)]
191 |     coco_ds.dataset = dataset
192 |     coco_ds.createIndex()
193 |     return coco_ds
194 | 
195 | 
196 | def get_coco_api_from_dataset(dataset):
197 |     for _ in range(10):
198 |         if isinstance(dataset, torchvision.datasets.CocoDetection):
199 |             break
200 |         if isinstance(dataset, torch.utils.data.Subset):
201 |             dataset = dataset.dataset
202 |     if isinstance(dataset, torchvision.datasets.CocoDetection):
203 |         return dataset.coco
204 |     return convert_to_coco_api(dataset)
205 | 
206 | 
207 | class CocoDetection(torchvision.datasets.CocoDetection):
208 |     def __init__(self, img_folder, ann_file, transforms):
209 |         super(CocoDetection, self).__init__(img_folder, ann_file)
210 |         self._transforms = transforms
211 | 
212 |     def __getitem__(self, idx):
213 |         img, target = super(CocoDetection, self).__getitem__(idx)
214 |         image_id = self.ids[idx]
215 |         target = dict(image_id=image_id, annotations=target)
216 |         if self._transforms is not None:
217 |             img, target = self._transforms(img, target)
218 |         return img, target
219 | 
220 | 
221 | def get_coco(root, image_set, transforms, mode='instances'):
222 |     anno_file_template = "{}_{}2017.json"
223 |     PATHS = {
224 |         "train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))),
225 |         "val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))),
226 |         # "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val")))
227 |     }
228 | 
229 |     t = [ConvertCocoPolysToMask()]
230 | 
231 |     if transforms is not None:
232 |         t.append(transforms)
233 |     transforms = T.Compose(t)
234 | 
235 |     img_folder, ann_file = PATHS[image_set]
236 |     img_folder = os.path.join(root, img_folder)
237 |     ann_file = os.path.join(root, ann_file)
238 | 
239 |     dataset = CocoDetection(img_folder, ann_file, transforms=transforms)
240 | 
241 |     if image_set == "train":
242 |         dataset = _coco_remove_images_without_annotations(dataset)
243 | 
244 |     # dataset = torch.utils.data.Subset(dataset, [i for i in range(500)])
245 | 
246 |     return dataset
247 | 
248 | 
249 | def get_coco_kp(root, image_set, transforms):
250 |     return get_coco(root, image_set, transforms, mode="person_keypoints")
251 | 


--------------------------------------------------------------------------------
/the_hero_rises/torchvision_references/utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import pickle
  4 | 
  5 | import torch
  6 | import torch.distributed as dist
  7 | 
  8 | import errno
  9 | import os
 10 | 
 11 | 
 12 | def all_gather(data):
 13 |     """
 14 |     Run all_gather on arbitrary picklable data (not necessarily tensors)
 15 |     Args:
 16 |         data: any picklable object
 17 |     Returns:
 18 |         list[data]: list of data gathered from each rank
 19 |     """
 20 |     world_size = get_world_size()
 21 |     if world_size == 1:
 22 |         return [data]
 23 | 
 24 |     # serialized to a Tensor
 25 |     buffer = pickle.dumps(data)
 26 |     storage = torch.ByteStorage.from_buffer(buffer)
 27 |     tensor = torch.ByteTensor(storage).to("cuda")
 28 | 
 29 |     # obtain Tensor size of each rank
 30 |     local_size = torch.tensor([tensor.numel()], device="cuda")
 31 |     size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
 32 |     dist.all_gather(size_list, local_size)
 33 |     size_list = [int(size.item()) for size in size_list]
 34 |     max_size = max(size_list)
 35 | 
 36 |     # receiving Tensor from all ranks
 37 |     # we pad the tensor because torch all_gather does not support
 38 |     # gathering tensors of different shapes
 39 |     tensor_list = []
 40 |     for _ in size_list:
 41 |         tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
 42 |     if local_size != max_size:
 43 |         padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
 44 |         tensor = torch.cat((tensor, padding), dim=0)
 45 |     dist.all_gather(tensor_list, tensor)
 46 | 
 47 |     data_list = []
 48 |     for size, tensor in zip(size_list, tensor_list):
 49 |         buffer = tensor.cpu().numpy().tobytes()[:size]
 50 |         data_list.append(pickle.loads(buffer))
 51 | 
 52 |     return data_list
 53 | 
 54 | 
 55 | def reduce_dict(input_dict, average=True):
 56 |     """
 57 |     Args:
 58 |         input_dict (dict): all the values will be reduced
 59 |         average (bool): whether to do average or sum
 60 |     Reduce the values in the dictionary from all processes so that all processes
 61 |     have the averaged results. Returns a dict with the same fields as
 62 |     input_dict, after reduction.
 63 |     """
 64 |     world_size = get_world_size()
 65 |     if world_size < 2:
 66 |         return input_dict
 67 |     with torch.no_grad():
 68 |         names = []
 69 |         values = []
 70 |         # sort the keys so that they are consistent across processes
 71 |         for k in sorted(input_dict.keys()):
 72 |             names.append(k)
 73 |             values.append(input_dict[k])
 74 |         values = torch.stack(values, dim=0)
 75 |         dist.all_reduce(values)
 76 |         if average:
 77 |             values /= world_size
 78 |         reduced_dict = {k: v for k, v in zip(names, values)}
 79 |     return reduced_dict
 80 | 
 81 | 
 82 | def collate_fn(batch):
 83 |     return tuple(zip(*batch))
 84 | 
 85 | 
 86 | def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):
 87 | 
 88 |     def f(x):
 89 |         if x >= warmup_iters:
 90 |             return 1
 91 |         alpha = float(x) / warmup_iters
 92 |         return warmup_factor * (1 - alpha) + alpha
 93 | 
 94 |     return torch.optim.lr_scheduler.LambdaLR(optimizer, f)
 95 | 
 96 | 
 97 | def mkdir(path):
 98 |     try:
 99 |         os.makedirs(path)
100 |     except OSError as e:
101 |         if e.errno != errno.EEXIST:
102 |             raise
103 | 
104 | 
105 | def setup_for_distributed(is_master):
106 |     """
107 |     This function disables printing when not in master process
108 |     """
109 |     import builtins as __builtin__
110 |     builtin_print = __builtin__.print
111 | 
112 |     def print(*args, **kwargs):
113 |         force = kwargs.pop('force', False)
114 |         if is_master or force:
115 |             builtin_print(*args, **kwargs)
116 | 
117 |     __builtin__.print = print
118 | 
119 | 
120 | def is_dist_avail_and_initialized():
121 |     if not dist.is_available():
122 |         return False
123 |     if not dist.is_initialized():
124 |         return False
125 |     return True
126 | 
127 | 
128 | def get_world_size():
129 |     if not is_dist_avail_and_initialized():
130 |         return 1
131 |     return dist.get_world_size()
132 | 
133 | 
134 | def get_rank():
135 |     if not is_dist_avail_and_initialized():
136 |         return 0
137 |     return dist.get_rank()
138 | 
139 | 
140 | def is_main_process():
141 |     return get_rank() == 0
142 | 
143 | 
144 | def save_on_master(*args, **kwargs):
145 |     if is_main_process():
146 |         torch.save(*args, **kwargs)
147 | 
148 | 
149 | def init_distributed_mode(args):
150 |     if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
151 |         args.rank = int(os.environ["RANK"])
152 |         args.world_size = int(os.environ['WORLD_SIZE'])
153 |         args.gpu = int(os.environ['LOCAL_RANK'])
154 |     elif 'SLURM_PROCID' in os.environ:
155 |         args.rank = int(os.environ['SLURM_PROCID'])
156 |         args.gpu = args.rank % torch.cuda.device_count()
157 |     else:
158 |         print('Not using distributed mode')
159 |         args.distributed = False
160 |         return
161 | 
162 |     args.distributed = True
163 | 
164 |     torch.cuda.set_device(args.gpu)
165 |     args.dist_backend = 'nccl'
166 |     print('| distributed init (rank {}): {}'.format(
167 |         args.rank, args.dist_url), flush=True)
168 |     torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
169 |                                          world_size=args.world_size, rank=args.rank)
170 |     torch.distributed.barrier()
171 |     setup_for_distributed(args.rank == 0)
172 | 


--------------------------------------------------------------------------------
/the_hero_rises/transforms.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import torch
  3 | from PIL import Image
  4 | 
  5 | from torchvision.transforms import functional as F
  6 | 
  7 | 
  8 | def get_transform(train, image_size):
  9 |     transforms = [Resize(size=(image_size, image_size)), ToTensor()]
 10 |     if train:
 11 |         transforms.append(RandomHorizontalFlip(0.5))
 12 |     return Compose(transforms)
 13 | 
 14 | 
 15 | def _flip_coco_person_keypoints(kps, width):
 16 |     flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
 17 |     flipped_data = kps[:, flip_inds]
 18 |     flipped_data[..., 0] = width - flipped_data[..., 0]
 19 |     # Maintain COCO convention that if visibility == 0, then x, y = 0
 20 |     inds = flipped_data[..., 2] == 0
 21 |     flipped_data[inds] = 0
 22 |     return flipped_data
 23 | 
 24 | 
 25 | class ToTensor(object):
 26 |     def __call__(self, image, target):
 27 |         image = F.to_tensor(image)
 28 |         return image, target
 29 | 
 30 | 
 31 | class Compose(object):
 32 |     def __init__(self, transforms):
 33 |         self.transforms = transforms
 34 | 
 35 |     def __call__(self, image, target=None):
 36 |         for t in self.transforms:
 37 |             image, target = t(image, target)
 38 |         return image, target
 39 | 
 40 | 
 41 | class RandomHorizontalFlip(object):
 42 |     def __init__(self, prob):
 43 |         self.prob = prob
 44 | 
 45 |     def __call__(self, image, target):
 46 |         if random.random() < self.prob:
 47 |             height, width = image.shape[-2:]
 48 |             image = image.flip(-1)
 49 |             bbox = target["boxes"]
 50 |             bbox[:, [0, 2]] = width - bbox[:, [2, 0]]
 51 |             target["boxes"] = bbox
 52 |             if "masks" in target:
 53 |                 target["masks"] = target["masks"].flip(-1)
 54 |             if "keypoints" in target:
 55 |                 keypoints = target["keypoints"]
 56 |                 keypoints = _flip_coco_person_keypoints(keypoints, width)
 57 |                 target["keypoints"] = keypoints
 58 |         return image, target
 59 | 
 60 | 
 61 | class Resize(object):
 62 |     """Resize the input PIL image to given size.
 63 |     If boxes is not None, resize boxes accordingly.
 64 |     Args:
 65 |       size: (tuple or int)
 66 |         - if is tuple, resize image to the size.
 67 |         - if is int, resize the shorter side to the size while maintaining the aspect ratio.
 68 |       max_size: (int) when size is int, limit the image longer size to max_size.
 69 |                 This is essential to limit the usage of GPU memory.
 70 |       random_interpolation: (bool) randomly choose a resize interpolation method.
 71 |     Returns:
 72 |       img: (PIL.Image) resized image.
 73 |       boxes: (tensor) resized boxes.
 74 |     Example:
 75 |     >> img, boxes = resize(img, boxes, 600)  # resize shorter side to 600
 76 |     >> img, boxes = resize(img, boxes, (500,600))  # resize image size to (500,600)
 77 |     >> img, _ = resize(img, None, (500,600))  # resize image only
 78 |     """
 79 |     def __init__(self, size, max_size=1000, random_interpolation=False):
 80 |         self.size = size
 81 |         self.max_size = max_size
 82 |         self.random_interpolation = random_interpolation
 83 | 
 84 |     def __call__(self, image, target):
 85 |         """Resize the input PIL image to given size.
 86 |         If boxes is not None, resize boxes accordingly.
 87 |         Args:
 88 |           image: (PIL.Image) image to be resized.
 89 |           target: (tensor) object boxes, sized [#obj,4].
 90 |         """
 91 |         w, h = image.size
 92 |         if isinstance(self.size, int):
 93 |             size_min = min(w, h)
 94 |             size_max = max(w, h)
 95 |             sw = sh = float(self.size) / size_min
 96 |             if sw * size_max > self.max_size:
 97 |                 sw = sh = float(self.max_size) / size_max
 98 |             ow = int(w * sw + 0.5)
 99 |             oh = int(h * sh + 0.5)
100 |         else:
101 |             ow, oh = self.size
102 |             sw = float(ow) / w
103 |             sh = float(oh) / h
104 | 
105 |         method = random.choice([
106 |             Image.BOX,
107 |             Image.NEAREST,
108 |             Image.HAMMING,
109 |             Image.BICUBIC,
110 |             Image.LANCZOS,
111 |             Image.BILINEAR]) if self.random_interpolation else Image.BILINEAR
112 |         image = image.resize((ow, oh), method)
113 |         if target is not None and "masks" in target:
114 |             resized_masks = torch.nn.functional.interpolate(
115 |                 input=target["masks"][None].float(),
116 |                 size=(512, 512),
117 |                 mode="nearest",
118 |             )[0].type_as(target["masks"])
119 |             target["masks"] = resized_masks
120 |         if target is not None and "boxes" in target:
121 |             resized_boxes = target["boxes"] * torch.tensor([sw, sh, sw, sh])
122 |             target["boxes"] = resized_boxes
123 |         return image, target
124 | 


--------------------------------------------------------------------------------
/the_hero_rises/utilities.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | import attr
  4 | import cv2
  5 | import numpy as np
  6 | import torch
  7 | import torchvision
  8 | from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
  9 | from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
 10 | from torchvision.models.resnet import resnet50, resnet101, resnet152
 11 | from torchvision.models.vgg import vgg16
 12 | from torchvision.transforms import functional as F
 13 | 
 14 | from torchvision_references import utils
 15 | 
 16 | 
 17 | def safe_collate(batch):
 18 |     batch = list(filter(lambda x: x is not None, batch))
 19 |     return utils.collate_fn(batch)
 20 | 
 21 | 
 22 | def draw_boxes(im, boxes, labels, color=(150, 0, 0)):
 23 |     for box, draw_label in zip(boxes, labels):
 24 |         draw_box = box.astype('int')
 25 |         im = cv2.rectangle(im, tuple(draw_box[:2]), tuple(draw_box[2:]), color, 2)
 26 |         im = cv2.putText(im, str(draw_label), (draw_box[0], max(0, draw_box[1]-5)),
 27 |                          cv2.FONT_HERSHEY_COMPLEX, 0.8, color, 2)
 28 |     return im
 29 | 
 30 | 
 31 | def draw_debug_images(images, targets, predictions=None, score_thr=0.3):
 32 |     debug_images = []
 33 |     for image, target in zip(images, targets):
 34 |         img = draw_boxes(np.array(F.to_pil_image(image.cpu())),
 35 |                          [box.cpu().numpy() for box in target['boxes']],
 36 |                          [label.item() for label in target['labels']])
 37 |         if predictions:
 38 |             img = draw_boxes(img,
 39 |                              [box.cpu().numpy() for box, score in
 40 |                               zip(predictions[target['image_id'].item()]['boxes'],
 41 |                                   predictions[target['image_id'].item()]['scores']) if score >= score_thr],
 42 |                              [label.item() for label, score in
 43 |                               zip(predictions[target['image_id'].item()]['labels'],
 44 |                                   predictions[target['image_id'].item()]['scores']) if score >= score_thr],
 45 |                              color=(0, 150, 0))
 46 |         debug_images.append(img)
 47 |     return debug_images
 48 | 
 49 | 
 50 | def draw_mask(target):
 51 |     masks = [channel*label for channel, label in zip(target['masks'].cpu().numpy(), target['labels'].cpu().numpy())]
 52 |     masks_sum = sum(masks)
 53 |     masks_out = masks_sum + 25*(masks_sum > 0)
 54 |     return (masks_out*int(255/masks_out.max())).astype('uint8')
 55 | 
 56 | 
 57 | def get_backbone(backbone_name):
 58 |     if backbone_name == 'vgg16':
 59 |         return vgg16(pretrained=True)
 60 |     elif backbone_name == 'resnet50':
 61 |         return resnet50(pretrained=True)
 62 |     elif backbone_name == 'resnet101':
 63 |         return resnet101(pretrained=True)
 64 |     elif backbone_name == 'resnet152':
 65 |         return resnet152(pretrained=True)
 66 |     else:
 67 |         raise ValueError('Only "vgg16", "resnet50", "resnet101" and "resnet152" are supported backbone names')
 68 | 
 69 | 
 70 | def get_model_instance_segmentation(num_classes, hidden_layer):
 71 |     # load an instance segmentation model pre-trained on COCO
 72 |     model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
 73 | 
 74 |     # get number of input features for the classifier
 75 |     in_features = model.roi_heads.box_predictor.cls_score.in_features
 76 |     # replace the pre-trained head with a new one
 77 |     model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
 78 | 
 79 |     # now get the number of input features for the mask classifier
 80 |     in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
 81 | 
 82 |     # and replace the mask predictor with a new one
 83 |     model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes)
 84 |     return model
 85 | 
 86 | 
 87 | def get_iou_types(model):
 88 |     model_without_ddp = model
 89 |     if isinstance(model, torch.nn.parallel.DistributedDataParallel):
 90 |         model_without_ddp = model.module
 91 |     iou_types = ["bbox"]
 92 |     if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
 93 |         iou_types.append("segm")
 94 |     if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
 95 |         iou_types.append("keypoints")
 96 |     return iou_types
 97 | 
 98 | 
 99 | @attr.s(auto_attribs=True)
100 | class CocoLikeAnnotations():
101 |     def __attrs_post_init__(self):
102 |         self.coco_like_json: dict = {'images': [], 'annotations': []}
103 |         self._ann_id: int = 0
104 | 
105 |     def update_images(self, file_name, height, width, id):
106 |         self.coco_like_json['images'].append({'file_name': file_name,
107 |                                          'height': height, 'width': width,
108 |                                          'id': id})
109 | 
110 |     def update_annotations(self, box, label_id, image_id, is_crowd=0):
111 |         segmentation, bbox, area = self.extract_coco_info(box)
112 |         self.coco_like_json['annotations'].append({'segmentation': segmentation, 'bbox': bbox, 'area': area,
113 |                                               'category_id': int(label_id), 'id': self._ann_id, 'iscrowd': is_crowd,
114 |                                               'image_id': image_id})
115 |         self._ann_id += 1
116 | 
117 |     @staticmethod
118 |     def extract_coco_info(box):
119 |         segmentation = list(map(int, [box[0], box[1], box[0], box[3], box[2], box[3], box[2], box[1]]))
120 |         bbox = list(map(int, np.append(box[:2], (box[2:] - box[:2]))))
121 |         area = int(bbox[2] * bbox[3])
122 |         return segmentation, bbox, area
123 | 
124 |     def dump_to_json(self, path_to_json='/tmp/inference_results/inference_results.json'):
125 |         with open(path_to_json, "w") as write_file:
126 |             json.dump(self.coco_like_json, write_file)
127 | 


--------------------------------------------------------------------------------
/urbansounds8k/.gitignore:
--------------------------------------------------------------------------------
1 | tensorboard_logs
2 | .venv
3 | dataset
4 | .idea


--------------------------------------------------------------------------------
/urbansounds8k/README.md:
--------------------------------------------------------------------------------
  1 | # Audio Classification Example
  2 | 
  3 | ## Setting the scene
  4 | Audio signals are all around us. As such, there is an increasing interest in audio classification for various scenarios,
  5 | from fire alarm detection for hearing impaired people, through engine sound analysis for maintenance purposes,
  6 | to baby monitoring. Though audio signals are temporal in nature, in many cases it is possible to leverage recent
  7 | advancements in the field of image classification and use popular high performing convolutional neural networks for
  8 | audio classification. In this blog post we will demonstrate such an example by using the popular method of converting
  9 | the audio signal into the frequency domain.
 10 | 
 11 | This example is based on a series of blogposts that show how to leverage PyTorch's ecosystem to easily jumpstart your
 12 | ML/DL project. You can find the [Image Classification](https://clear.ml/blog/ml-dl-engineering-made-easy-with-pytorch-and-allegro-trains/),
 13 | [Hyperparameter Optimization](https://clear.ml/blog/accelerate-hyperparameter-optimization-with-pytorchs-ecosystem-tools/) and
 14 | the original [Audio Classification](https://clear.ml/blog/audio-classification-with-pytorchs-ecosystem-tools/) blogposts here.
 15 | 
 16 | 
 17 | ## Urbansounds Structure
 18 | The urbansounds dataset consists of the actual data and a csv file containing the metadata. For each sample the csv file keeps track of the location and the label.
 19 | The data itself is organized into multiple folds, or equal splits of the data. In testing we use 1 fold to validate our model on, all the other folds are used for training.
 20 | 
 21 | 
 22 | ## ClearML Workflow
 23 | 
 24 | 
 25 | ![ClearML flow diagram](assets/diagram.png)
 26 | 
 27 | 
 28 | ### Getting the data
 29 | The first script downloads the data from the official urbansounds sources or in this case a subset that is hosted by ClearML just to play around with. The metadata will be converted into a format that is easier to work with for us and then the files as well as the metadata are uploaded as a ClearML Dataset.
 30 | 
 31 | The Dataset is a special sort of task, so we can also generate some interesting logs and plots such as a historgram and attach it to the task just like we would do for any other task.
 32 | 
 33 | ![ClearML Histogram Dataset](https://imgur.com/4TTovpG.png)
 34 | 
 35 | ### Preprocessing the data
 36 | In order to train a model on the data we want to convert the audio samples (.wav files) to images by creating their mel spectrograms. For more information on how this works read the section below.
 37 | 
 38 | In the end we convert each .wav file into a spectrogram image and save the image with the same filename in the same folder. The we create a new ClearML dataset from this dataset. We make it a new version (child) of the previous dataset we made, so the .wav files themselves won't actually be uploaded and just refer to the previously uploaded dataset. ClearML data will only upload the newly created image files.
 39 | 
 40 | The metadata from the csv file is again added to the dataset as an artifact. We can just get it as pandas dataframe when we need it.
 41 | 
 42 | ### Training
 43 | Finally we get this latest dataset version, download the data itself and get the pandas dataframe containing the metadata. Based on the fold number we divide the data into train and test and train a machine learning model on it. We then log the output scalars and plot a confusion matrix so we can see the model's performance in the ClearML webUI and compare it easily to other experiment runs.
 44 | 
 45 | 
 46 | 
 47 | ## Audio Classification with Convolutional Neural Networks
 48 | 
 49 | In recent years, Convolutional Neural Networks (CNNs) have proven very effective in image classification tasks, which gave rise to the design of various architectures, such as Inception, ResNet, ResNext, Mobilenet and more. These CNNs achieve state of the art results on image classification tasks and offer a variety of ready to use pre trained backbones. As such, if we will be able to transfer audio classification tasks into the image domain, we will be able to leverage this rich variety of backbones for our needs.
 50 | 
 51 | As mentioned before, instead of directly using the sound file as an amplitude vs time signal we wish to convert the audio signal into an image. The following preprocessing was done using [this script](https://github.com/allegroai/trains/blob/master/examples/frameworks/pytorch/notebooks/audio/audio_preprocessing_example.ipynb) on the [YesNo](https://pytorch.org/audio/datasets.html#yesno) dataset that is included in [torchaudio built-in datasets](https://pytorch.org/audio/datasets.html) .
 52 | 
 53 | As a first stage of preprocessing we will:
 54 | 
 55 | - **Read the audio file** – using torchaudio
 56 | - **Resample the audio signal to a fixed sample rate** – This will make sure that all signals we will use will have the same sample rate. Theoretically the maximum frequency that can be represented by a sampled signal is a little bit less than half the sample rate (known as the [Nyquist frequency](https://en.wikipedia.org/wiki/Nyquist_frequency)). As 20 kHz is the highest frequency generally audible by humans, sampling rate of 44100 Hz is considered the most popular choice. However, in many cases removing the higher frequencies is considered plausible for the sake of reducing the amount of data per audio file. As such, the sampling rate of 20050 Hz has been reasonably popular for low bitrate MP3 files. In our example we will use this sample rate.
 57 | - **Create a mono audio signal** – For simplicity, we will make sure all signals we use will have the same number of channels.
 58 | 
 59 | **The code for such preprocessing, looks like this:**
 60 | 
 61 | ```Python
 62 | yesno_data = torchaudio.datasets.YESNO('./data', download=True)
 63 | number_of_samples = 3
 64 | fixed_sample_rate = 22050
 65 | for n in range(number_of_smaples):
 66 |     audio, sample_rate, labels = yesno_data[n]
 67 |     resample_transform = torchaudio.transforms.Resample(
 68 |        orig_freq=sample_rate, new_freq=fixed_sample_rate)
 69 |     audio_mono = torch.mean(resample_transform(audio),
 70 |        dim=0, keepdim=True)
 71 | 
 72 |     plt.figure()
 73 |     plt.plot(audio_mono[0,:])
 74 | 
 75 | ```
 76 | 
 77 | **The resulted matplotlib plots looks like this:**
 78 | 
 79 | Audio signal time series from the YESNO dataset
 80 | 
 81 | ![https://clear.ml/wp-content/uploads/2020/10/Audio-signal-time-series-from-the-YESNO-dataset.png](https://clear.ml/wp-content/uploads/2020/10/Audio-signal-time-series-from-the-YESNO-dataset.png)
 82 | 
 83 | Now it is time to transform this time-series signal into the image domain. We will do that by converting it into a spectogram, which is a visual representation of the spectrum of frequencies of a signal as it varies with time. For that purpose we will use a log-scaled mel-spectrogram. A mel spectrogram is a spectrogram where the frequencies are converted to the mel scale, which takes into account the fact that humans are better at detecting differences in lower frequencies than higher frequencies. The mel scale converts the frequencies so that equal distances in pitch sounded equally distant to a human listener.**So let’s use torchaudio transforms and add the following lines to our snippet:**
 84 | 
 85 | ```Python
 86 | melspectogram_transform =
 87 |   torchaudio.transforms.MelSpectrogram(
 88 |   sample_rate=fixed_sample_rate, n_mels=128)
 89 | melspectogram_db_transform = torchaudio.transforms.AmplitudeToDB()
 90 | 
 91 | melspectogram = melspectogram_transform(audio_mono)
 92 | plt.figure()
 93 | plt.imshow(melspectogram.squeeze().numpy(), cmap='hot')
 94 | 
 95 | melspectogram_db=melspectogram_db_transform(melspectogram)
 96 | plt.figure()
 97 | plt.imshow(melspectogram_db.squeeze().numpy(), cmap='hot')
 98 | 
 99 | ```
100 | 
101 | **Now the audio file is represented as a two dimensional spectrogram image:**
102 | 
103 | Mel spectrogram (upper image) and its’ log-scale version (lower image)
104 | 
105 | ![https://clear.ml/wp-content/uploads/2020/10/Mel-spectrogram-and-its-log-scale-version.png](https://clear.ml/wp-content/uploads/2020/10/Mel-spectrogram-and-its-log-scale-version.png)
106 | 
107 | That’s exactly what we wanted to achieve. The Audio-classification problem is now transformed into an image classification problem.


--------------------------------------------------------------------------------
/urbansounds8k/assets/diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clearml/clearml-blogs/c0a10f52de341e7feedc1bd718ff0539f98fdced/urbansounds8k/assets/diagram.png


--------------------------------------------------------------------------------
/urbansounds8k/get_data.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from pathlib import Path
 3 | from clearml import Task, Dataset, StorageManager
 4 | 
 5 | task = Task.init(project_name='examples/Urbansounds',
 6 |                  task_name='download data')
 7 | 
 8 | configuration = {
 9 |     'selected_classes': ['air_conditioner', 'car_horn', 'children_playing', 'dog_bark', 'drilling',
10 |                          'engine_idling', 'gun_shot', 'jackhammer', 'siren', 'street_music']
11 | }
12 | task.connect(configuration)
13 | 
14 | 
15 | def get_urbansound8k():
16 |     # Download UrbanSound8K dataset (https://urbansounddataset.weebly.com/urbansound8k.html)
17 |     # For simplicity we will use here a subset of that dataset using clearml StorageManager
18 |     path_to_urbansound8k = StorageManager.get_local_copy(
19 |         "https://allegro-datasets.s3.amazonaws.com/clearml/UrbanSound8K.zip",
20 |         extract_archive=True)
21 |     path_to_urbansound8k_csv = Path(path_to_urbansound8k) / 'UrbanSound8K' / 'metadata' / 'UrbanSound8K.csv'
22 |     path_to_urbansound8k_audio = Path(path_to_urbansound8k) / 'UrbanSound8K' / 'audio'
23 | 
24 |     return path_to_urbansound8k_csv, path_to_urbansound8k_audio
25 | 
26 | 
27 | def log_dataset_statistics(dataset, metadata):
28 |     histogram_data = metadata['class'].value_counts()
29 |     dataset.get_logger().report_table(
30 |         title='Raw Dataset Metadata',
31 |         series='Raw Dataset Metadata',
32 |         table_plot=metadata
33 |     )
34 |     dataset.get_logger().report_histogram(
35 |         title='Class distribution',
36 |         series='Class distribution',
37 |         values=histogram_data,
38 |         iteration=0,
39 |         xlabels=histogram_data.index.tolist(),
40 |         yaxis='Amount of samples'
41 |     )
42 | 
43 | 
44 | def build_clearml_dataset():
45 |     # Get a local copy of both the data and the labels
46 |     path_to_urbansound8k_csv, path_to_urbansound8k_audio = get_urbansound8k()
47 |     urbansound8k_metadata = pd.read_csv(path_to_urbansound8k_csv)
48 |     # Subset the data to only include the classes we want
49 |     urbansound8k_metadata = \
50 |         urbansound8k_metadata[urbansound8k_metadata['class'].isin(configuration['selected_classes'])]
51 | 
52 |     # Create a pandas dataframe containing labels and other info we need later (fold is for train test split)
53 |     metadata = pd.DataFrame({
54 |         'fold': urbansound8k_metadata.loc[:, 'fold'],
55 |         'filepath': ('fold' + urbansound8k_metadata.loc[:, 'fold'].astype(str)
56 |                      + '/' + urbansound8k_metadata.loc[:, 'slice_file_name'].astype(str)),
57 |         'label': urbansound8k_metadata.loc[:, 'classID']
58 |     })
59 | 
60 |     # Now create a clearml dataset to start versioning our changes and make it much easier to get the right data
61 |     # in other tasks as well as on different machines
62 |     dataset = Dataset.create(
63 |         dataset_name='UrbanSounds example',
64 |         dataset_project='examples/Urbansounds',
65 |         dataset_tags=['raw']
66 |     )
67 | 
68 |     # Add the local files we downloaded earlier
69 |     dataset.add_files(path_to_urbansound8k_audio)
70 |     # Add the metadata in pandas format, we can now see it in the webUI and have it be easily accessible
71 |     dataset._task.upload_artifact(name='metadata', artifact_object=metadata)
72 |     # Let's add some cool graphs as statistics in the plots section!
73 |     log_dataset_statistics(dataset, urbansound8k_metadata)
74 |     # Finalize and upload the data and labels of the dataset
75 |     dataset.finalize(auto_upload=True)
76 | 
77 | 
78 | if __name__ == '__main__':
79 |     build_clearml_dataset()
80 | 


--------------------------------------------------------------------------------
/urbansounds8k/preprocessing.py:
--------------------------------------------------------------------------------
  1 | import os.path
  2 | from pathlib import Path
  3 | 
  4 | import matplotlib as mpl
  5 | import numpy as np
  6 | from tqdm import tqdm
  7 | import torchaudio
  8 | import torch
  9 | from clearml import Task, Dataset
 10 | 
 11 | task = Task.init(project_name='examples/Urbansounds',
 12 |                  task_name='preprocessing')
 13 | 
 14 | # Let's preprocess the data and create a new ClearML dataset from it, so we can track it around
 15 | # The cool thing is, we can easily debug, by using, you guessed it: debug samples! We can log both
 16 | # the original sound and its processed mel spectrogram as debug samples, so we can manually check
 17 | # if everything went as planned.
 18 | 
 19 | 
 20 | class PreProcessor:
 21 |     def __init__(self):
 22 |         self.configuration = {
 23 |             'number_of_mel_filters': 64,
 24 |             'resample_freq': 22050
 25 |         }
 26 |         task.connect(self.configuration)
 27 | 
 28 |     def preprocess_sample(self, sample, original_sample_freq):
 29 |         if self.configuration['resample_freq'] > 0:
 30 |             resample_transform = torchaudio.transforms.Resample(orig_freq=original_sample_freq,
 31 |                                                                 new_freq=self.configuration['resample_freq'])
 32 |             sample = resample_transform(sample)
 33 | 
 34 |         # This will convert audio files with two channels into one
 35 |         sample = torch.mean(sample, dim=0, keepdim=True)
 36 | 
 37 |         # Convert audio to log-scale Mel spectrogram
 38 |         melspectrogram_transform = torchaudio.transforms.MelSpectrogram(
 39 |             sample_rate=self.configuration['resample_freq'],
 40 |             n_mels=self.configuration['number_of_mel_filters']
 41 |         )
 42 |         melspectrogram = melspectrogram_transform(sample)
 43 |         melspectogram_db = torchaudio.transforms.AmplitudeToDB()(melspectrogram)
 44 | 
 45 |         # Make sure all spectrograms are the same size
 46 |         fixed_length = 3 * (self.configuration['resample_freq'] // 200)
 47 |         if melspectogram_db.shape[2] < fixed_length:
 48 |             melspectogram_db = torch.nn.functional.pad(melspectogram_db, (0, fixed_length - melspectogram_db.shape[2]))
 49 |         else:
 50 |             melspectogram_db = melspectogram_db[:, :, :fixed_length]
 51 | 
 52 |         return melspectogram_db
 53 | 
 54 | 
 55 | class DataSetBuilder:
 56 |     def __init__(self):
 57 |         self.configuration = {
 58 |             'dataset_path': 'dataset'
 59 |         }
 60 |         task.connect(self.configuration)
 61 | 
 62 |         self.original_dataset = Dataset.get(
 63 |             dataset_project='examples/Urbansounds',
 64 |             dataset_name='UrbanSounds example',
 65 |             dataset_tags=['raw'],
 66 |             alias='Raw Dataset'
 67 |         )
 68 |         # This will return the pandas dataframe we added in the previous task
 69 |         self.metadata = Task.get_task(task_id=self.original_dataset._task.id).artifacts['metadata'].get()
 70 |         # This will download the data and return a local path to the data
 71 |         self.original_dataset_path = \
 72 |             Path(self.original_dataset.get_mutable_local_copy(self.configuration['dataset_path'], overwrite=True))
 73 | 
 74 |         # Prepare a preprocessor that will handle each sample one by one
 75 |         self.preprocessor = PreProcessor()
 76 | 
 77 |         # Get ready for the new one
 78 |         self.preprocessed_dataset = None
 79 | 
 80 |     def log_dataset_statistics(self):
 81 |         histogram_data = self.metadata['label'].value_counts()
 82 |         self.preprocessed_dataset.get_logger().report_table(
 83 |             title='Raw Dataset Metadata',
 84 |             series='Raw Dataset Metadata',
 85 |             table_plot=self.metadata
 86 |         )
 87 |         self.preprocessed_dataset.get_logger().report_histogram(
 88 |             title='Class distribution',
 89 |             series='Class distribution',
 90 |             values=histogram_data,
 91 |             iteration=0,
 92 |             xlabels=histogram_data.index.tolist(),
 93 |             yaxis='Amount of samples'
 94 |         )
 95 | 
 96 |     def build_dataset(self):
 97 |         # Let's create a new dataset that is a child of the original one
 98 |         # We'll add the preprocessed samples to the original dataset, leading to a new version
 99 |         # Providing the parent dataset allows us to keep a clear lineage of our data
100 |         self.preprocessed_dataset = Dataset.create(
101 |             dataset_name='UrbanSounds example',
102 |             dataset_project='examples/Urbansounds',
103 |             dataset_tags=["preprocessed"],
104 |             parent_datasets=[self.original_dataset.id]
105 |         )
106 | 
107 |         # loop through the metadata entries and preprocess each sample, then add some of them as debug samples to
108 |         # manually double check in the UI that everything has worked (you can watch the spectrogram and listen to the
109 |         # audio side by side in the debug sample UI)
110 |         for i, (_, data) in tqdm(enumerate(self.metadata.iterrows())):
111 |             _, audio_file_path, label = data.tolist()
112 |             sample, sample_freq = torchaudio.load(self.original_dataset_path / audio_file_path, normalize=True)
113 |             spectrogram = self.preprocessor.preprocess_sample(sample, sample_freq)
114 |             # Get only the filename and replace the extension, we're saving an image here
115 |             new_file_name = os.path.basename(audio_file_path).replace('.wav', '.npy')
116 |             # Get the correct folder, basically the original dataset folder + the new filename
117 |             spectrogram_path = self.original_dataset_path / os.path.dirname(audio_file_path) / new_file_name
118 |             # Save the numpy array to disk
119 |             np.save(spectrogram_path, spectrogram)
120 | 
121 |             # Log every 10th sample as a debug sample to the UI, so we can manually check it
122 |             if i % 10 == 0:
123 |                 # Convert the numpy array to a viewable JPEG
124 |                 rgb_image = mpl.colormaps['viridis'](spectrogram[0, :, :].detach().numpy() * 255)[:, :, :3]
125 |                 title = os.path.splitext(os.path.basename(audio_file_path))[0]
126 | 
127 |                 # Report the image and the original sound, so they can be viewed side by side
128 |                 self.preprocessed_dataset.get_logger().report_image(
129 |                     title=title,
130 |                     series='spectrogram',
131 |                     image=rgb_image
132 |                 )
133 |                 self.preprocessed_dataset.get_logger().report_media(
134 |                     title=title,
135 |                     series='original_audio',
136 |                     local_path=self.original_dataset_path / audio_file_path
137 |                 )
138 |         # The original data path will now also have the spectrograms in its filetree.
139 |         # So that's why we add it here to fill up the new dataset with.
140 |         self.preprocessed_dataset.add_files(self.original_dataset_path)
141 |         # Again add some visualizations to the task
142 |         self.log_dataset_statistics()
143 |         # We still want the metadata
144 |         self.preprocessed_dataset._task.upload_artifact(name='metadata', artifact_object=self.metadata)
145 |         self.preprocessed_dataset.finalize(auto_upload=True)
146 | 
147 | 
148 | if __name__ == '__main__':
149 |     datasetbuilder = DataSetBuilder()
150 |     datasetbuilder.build_dataset()
151 | 


--------------------------------------------------------------------------------
/urbansounds8k/requirements.txt:
--------------------------------------------------------------------------------
1 | clearml>=1.3.0
2 | torchaudio>=0.10.2
3 | torch>=1.10.2
4 | pandas>=1.3
5 | torchvision
6 | matplotlib
7 | tqdm
8 | sklearn
9 | tensorboard


--------------------------------------------------------------------------------
/urbansounds8k/training.py:
--------------------------------------------------------------------------------
  1 | import PIL
  2 | import io
  3 | import os
  4 | from tempfile import gettempdir
  5 | import matplotlib.pyplot as plt
  6 | from torchvision import models
  7 | from sklearn.metrics import ConfusionMatrixDisplay, f1_score
  8 | from torchvision.transforms import ToTensor
  9 | import torchaudio
 10 | import torch
 11 | import torch.optim as optim
 12 | from torch.utils.data import Dataset as TorchDataset
 13 | import torch.nn as nn
 14 | from torch.utils.tensorboard import SummaryWriter
 15 | import numpy as np
 16 | from argparse import ArgumentParser
 17 | 
 18 | from clearml import Task, Dataset
 19 | task = Task.init(project_name='examples/Urbansounds',
 20 |                  task_name='training')
 21 | 
 22 | 
 23 | parser = ArgumentParser()
 24 | parser.add_argument('--dropout', type=float, default=0.30)
 25 | parser.add_argument('--base_lr', type=float, default=0.002)
 26 | parser.add_argument('--number_of_epochs', type=int, default=10)
 27 | parser.add_argument('--batch_size', type=int, default=4)
 28 | 
 29 | args = parser.parse_args()
 30 | 
 31 | 
 32 | configuration_dict = {
 33 |     'dropout': args.dropout,
 34 |     'base_lr': args.base_lr,
 35 |     'number_of_epochs': args.number_of_epochs,
 36 |     'batch_size': args.batch_size
 37 | }
 38 | 
 39 | 
 40 | class ClearMLDataLoader(TorchDataset):
 41 |     def __init__(self, dataset_name, project_name, folder_filter):
 42 |         clearml_dataset = Dataset.get(
 43 |             dataset_name=dataset_name,
 44 |             dataset_project=project_name,
 45 |             dataset_tags=["preprocessed"],
 46 |             alias='Preprocessed Dataset'
 47 |         )
 48 |         self.img_dir = clearml_dataset.get_local_copy()
 49 |         self.img_metadata = Task.get_task(task_id=clearml_dataset.id).artifacts['metadata'].get()
 50 |         self.img_metadata = self.img_metadata[self.img_metadata['fold'].isin(folder_filter)]
 51 |         # We just removed some rows by filtering on class, this will make gaps in the dataframe index
 52 |         # (e.g. 57 won't exist anymore) so we reindex to make it a full range again, otherwise we'll get errors later
 53 |         # when selecting a row by index
 54 |         self.img_metadata = self.img_metadata.reset_index(drop=True)
 55 | 
 56 |     def __len__(self):
 57 |         return len(self.img_metadata)
 58 | 
 59 |     def __getitem__(self, idx):
 60 |         sound_path = os.path.join(self.img_dir, self.img_metadata.loc[idx, 'filepath'])
 61 |         img_path = sound_path.replace('.wav', '.npy')
 62 |         image = np.load(img_path)
 63 |         label = self.img_metadata.loc[idx, 'label']
 64 |         return sound_path, image, label
 65 | 
 66 | 
 67 | train_dataset = ClearMLDataLoader('UrbanSounds example', 'examples/Urbansounds', set(range(1, 10)))
 68 | test_dataset = ClearMLDataLoader('UrbanSounds example', 'examples/Urbansounds', {10})
 69 | print(len(train_dataset), len(test_dataset))
 70 | train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=configuration_dict.get('batch_size', 4),
 71 |                                            shuffle=True, pin_memory=True, num_workers=1)
 72 | test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=configuration_dict.get('batch_size', 4),
 73 |                                           shuffle=False, pin_memory=False, num_workers=1)
 74 | 
 75 | classes = ['air_conditioner', 'car_horn', 'children_playing', 'dog_bark', 'drilling',
 76 |            'engine_idling', 'gun_shot', 'jackhammer', 'siren', 'street_music']
 77 | 
 78 | 
 79 | model = models.resnet18(pretrained=True)
 80 | model.conv1 = nn.Conv2d(1, model.conv1.out_channels, kernel_size=model.conv1.kernel_size[0],
 81 |                         stride=model.conv1.stride[0], padding=model.conv1.padding[0])
 82 | num_ftrs = model.fc.in_features
 83 | model.fc = nn.Sequential(*[nn.Dropout(p=configuration_dict.get('dropout', 0.25)), nn.Linear(num_ftrs, len(classes))])
 84 | 
 85 | optimizer = optim.SGD(model.parameters(), lr=configuration_dict.get('base_lr', 0.001), momentum=0.9)
 86 | scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=configuration_dict.get('number_of_epochs') // 3, gamma=0.1)
 87 | criterion = nn.CrossEntropyLoss()
 88 | 
 89 | device = torch.cuda.current_device() if torch.cuda.is_available() else torch.device('cpu')
 90 | print('Device to use: {}'.format(device))
 91 | model.to(device)
 92 | 
 93 | tensorboard_writer = SummaryWriter('./tensorboard_logs')
 94 | 
 95 | 
 96 | def plot_signal(signal, title, cmap=None):
 97 |     fig = plt.figure()
 98 |     if signal.ndim == 1:
 99 |         plt.plot(signal)
100 |     else:
101 |         plt.imshow(signal, cmap=cmap)
102 |     plt.title(title)
103 | 
104 |     plot_buf = io.BytesIO()
105 |     plt.savefig(plot_buf, format='jpeg')
106 |     plot_buf.seek(0)
107 |     plt.close(fig)
108 |     return ToTensor()(PIL.Image.open(plot_buf))
109 | 
110 | 
111 | def train(model, epoch):
112 |     model.train()
113 |     for batch_idx, (_, inputs, labels) in enumerate(train_loader):
114 |         inputs = inputs.to(device)
115 |         labels = labels.to(device)
116 | 
117 |         # zero the parameter gradients
118 |         optimizer.zero_grad()
119 | 
120 |         # forward + backward + optimize
121 |         outputs = model(inputs)
122 |         _, predicted = torch.max(outputs, 1)
123 |         loss = criterion(outputs, labels)
124 |         loss.backward()
125 |         optimizer.step()
126 | 
127 |         iteration = epoch * len(train_loader) + batch_idx
128 |         if batch_idx % log_interval == 0:  # print training stats
129 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'
130 |                   .format(epoch, batch_idx * len(inputs), len(train_loader),
131 |                           100. * batch_idx / len(train_loader), loss))
132 |             tensorboard_writer.add_scalar('training loss/loss', loss, iteration)
133 |             tensorboard_writer.add_scalar('learning rate/lr', optimizer.param_groups[0]['lr'], iteration)
134 | 
135 |         if batch_idx % debug_interval == 0:  # report debug image every "debug_interval" mini-batches
136 |             for n, (inp, pred, label) in enumerate(zip(inputs, predicted, labels)):
137 |                 series = 'label_{}_pred_{}'.format(classes[label.cpu()], classes[pred.cpu()])
138 |                 tensorboard_writer.add_image('Train MelSpectrogram samples/{}_{}_{}'.format(batch_idx, n, series),
139 |                                              plot_signal(inp.cpu().numpy().squeeze(), series, 'hot'), iteration)
140 | 
141 | 
142 | def test_model(model, epoch):
143 |     model.eval()
144 |     all_predictions = []
145 |     all_labels = []
146 |     with torch.no_grad():
147 |         for idx, (sound_paths, inputs, labels) in enumerate(test_loader):
148 |             inputs = inputs.to(device)
149 |             labels = labels.to(device)
150 | 
151 |             outputs = model(inputs)
152 | 
153 |             _, predicted = torch.max(outputs, 1)
154 |             for pred, label in zip(predicted.cpu(), labels.cpu()):
155 |                 all_predictions.append(int(pred))
156 |                 all_labels.append(int(label))
157 | 
158 |             iteration = (epoch + 1) * len(train_loader)
159 |             if idx % debug_interval == 0:  # report debug image every "debug_interval" mini-batches
160 | 
161 |                 for n, (sound_path, inp, pred, label) in enumerate(zip(sound_paths, inputs, predicted, labels)):
162 |                     sound, sample_rate = torchaudio.load(sound_path, normalize=True)
163 |                     series = 'label_{}_pred_{}'.format(classes[label.cpu()], classes[pred.cpu()])
164 |                     tensorboard_writer.add_audio('Test audio samples/{}_{}_{}'.format(idx, n, series),
165 |                                                  sound.reshape(1, -1), iteration, int(sample_rate))
166 |                     tensorboard_writer.add_image('Test MelSpectrogram samples/{}_{}_{}'.format(idx, n, series),
167 |                                                  plot_signal(inp.cpu().numpy().squeeze(), series, 'hot'), iteration)
168 | 
169 |     tensorboard_writer.add_scalar('f1_score/total',
170 |                                   f1_score(all_labels, all_predictions, average='weighted'), iteration)
171 |     ConfusionMatrixDisplay.from_predictions(all_labels, all_predictions)
172 | 
173 | 
174 | log_interval = 10
175 | debug_interval = 25
176 | for epoch in range(configuration_dict.get('number_of_epochs', 10)):
177 |     train(model, epoch)
178 |     test_model(model, epoch)
179 |     scheduler.step()
180 | 
181 | # save model
182 | torch.save(model, os.path.join(gettempdir(), "urbansounds_model.pt"))
183 | 


--------------------------------------------------------------------------------