├── .gitignore
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── configs
    └── centernet_detection_config.yml
├── requirements.txt
└── src
    ├── __init__.py
    ├── callbacks.py
    ├── coco.py
    ├── data_preparation.py
    ├── dataset.py
    ├── debug.py
    ├── experiment.py
    ├── losses
        ├── __init__.py
        └── ctdet_loss.py
    ├── metrics.py
    ├── models
        ├── __init__.py
        ├── centernet.py
        └── resnet.py
    ├── transforms.py
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | data/
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | env/
 14 | build/
 15 | builds/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | 
 52 | # Translations
 53 | *.mo
 54 | *.pot
 55 | 
 56 | # Django stuff:
 57 | *.log
 58 | local_settings.py
 59 | 
 60 | # Flask stuff:
 61 | instance/
 62 | .webassets-cache
 63 | 
 64 | # Scrapy stuff:
 65 | .scrapy
 66 | 
 67 | # Sphinx documentation
 68 | docs/_build/
 69 | 
 70 | # PyBuilder
 71 | target/
 72 | 
 73 | # Jupyter Notebook
 74 | .ipynb_checkpoints
 75 | 
 76 | # pyenv
 77 | .python-version
 78 | 
 79 | # celery beat schedule file
 80 | celerybeat-schedule
 81 | 
 82 | # SageMath parsed files
 83 | *.sage.py
 84 | 
 85 | # dotenv
 86 | .env
 87 | 
 88 | # virtualenv
 89 | .venv
 90 | venv/
 91 | ENV/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | 
107 | 
108 | .DS_Store
109 | .idea
110 | .code
111 | 
112 | *.bak
113 | *.csv
114 | *.tsv
115 | *.ipynb
116 | 
117 | tmp/
118 | logs/
119 | data/
120 | !catalyst/data
121 | examples/data/
122 | # Examples - mock data
123 | !examples/distilbert_text_classification/input/*.csv
124 | !examples/_tests_distilbert_text_classification/input/*.csv
125 | examples/logs/
126 | notebooks/
127 | 
128 | _nogit*
129 | 
130 | ### VisualStudioCode ###
131 | .vscode/*
132 | .vscode/settings.json
133 | !.vscode/tasks.json
134 | !.vscode/launch.json
135 | !.vscode/extensions.json
136 | 
137 | ### VisualStudioCode Patch ###
138 | # Ignore all local history of files
139 | .history
140 | 
141 | # End of https://www.gitignore.io/api/visualstudiocode
142 | 
143 | 
144 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG CATALYST_VERSION="19.11"
 2 | 
 3 | # "-fp16" or ""
 4 | ARG CATALYST_WITH_FP16="-fp16"
 5 | 
 6 | FROM catalystteam/catalyst:${CATALYST_VERSION}${CATALYST_WITH_FP16}
 7 | # Set up locale to prevent bugs with encoding
 8 | ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
 9 | 
10 | COPY requirements.txt .
11 | RUN pip install -r requirements.txt --no-cache-dir && rm requirements.txt
12 | 
13 | CMD mkdir -p /workspace
14 | WORKDIR /workspace
15 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Copyright 2019 Sergey Kolesnikov.  All rights reserved.
  2 | 
  3 |                                  Apache License
  4 |                            Version 2.0, January 2004
  5 |                         http://www.apache.org/licenses/
  6 | 
  7 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  8 | 
  9 |    1. Definitions.
 10 | 
 11 |       "License" shall mean the terms and conditions for use, reproduction,
 12 |       and distribution as defined by Sections 1 through 9 of this document.
 13 | 
 14 |       "Licensor" shall mean the copyright owner or entity authorized by
 15 |       the copyright owner that is granting the License.
 16 | 
 17 |       "Legal Entity" shall mean the union of the acting entity and all
 18 |       other entities that control, are controlled by, or are under common
 19 |       control with that entity. For the purposes of this definition,
 20 |       "control" means (i) the power, direct or indirect, to cause the
 21 |       direction or management of such entity, whether by contract or
 22 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 23 |       outstanding shares, or (iii) beneficial ownership of such entity.
 24 | 
 25 |       "You" (or "Your") shall mean an individual or Legal Entity
 26 |       exercising permissions granted by this License.
 27 | 
 28 |       "Source" form shall mean the preferred form for making modifications,
 29 |       including but not limited to software source code, documentation
 30 |       source, and configuration files.
 31 | 
 32 |       "Object" form shall mean any form resulting from mechanical
 33 |       transformation or translation of a Source form, including but
 34 |       not limited to compiled object code, generated documentation,
 35 |       and conversions to other media types.
 36 | 
 37 |       "Work" shall mean the work of authorship, whether in Source or
 38 |       Object form, made available under the License, as indicated by a
 39 |       copyright notice that is included in or attached to the work
 40 |       (an example is provided in the Appendix below).
 41 | 
 42 |       "Derivative Works" shall mean any work, whether in Source or Object
 43 |       form, that is based on (or derived from) the Work and for which the
 44 |       editorial revisions, annotations, elaborations, or other modifications
 45 |       represent, as a whole, an original work of authorship. For the purposes
 46 |       of this License, Derivative Works shall not include works that remain
 47 |       separable from, or merely link (or bind by name) to the interfaces of,
 48 |       the Work and Derivative Works thereof.
 49 | 
 50 |       "Contribution" shall mean any work of authorship, including
 51 |       the original version of the Work and any modifications or additions
 52 |       to that Work or Derivative Works thereof, that is intentionally
 53 |       submitted to Licensor for inclusion in the Work by the copyright owner
 54 |       or by an individual or Legal Entity authorized to submit on behalf of
 55 |       the copyright owner. For the purposes of this definition, "submitted"
 56 |       means any form of electronic, verbal, or written communication sent
 57 |       to the Licensor or its representatives, including but not limited to
 58 |       communication on electronic mailing lists, source code control systems,
 59 |       and issue tracking systems that are managed by, or on behalf of, the
 60 |       Licensor for the purpose of discussing and improving the Work, but
 61 |       excluding communication that is conspicuously marked or otherwise
 62 |       designated in writing by the copyright owner as "Not a Contribution."
 63 | 
 64 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 65 |       on behalf of whom a Contribution has been received by Licensor and
 66 |       subsequently incorporated within the Work.
 67 | 
 68 |    2. Grant of Copyright License. Subject to the terms and conditions of
 69 |       this License, each Contributor hereby grants to You a perpetual,
 70 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 71 |       copyright license to reproduce, prepare Derivative Works of,
 72 |       publicly display, publicly perform, sublicense, and distribute the
 73 |       Work and such Derivative Works in Source or Object form.
 74 | 
 75 |    3. Grant of Patent License. Subject to the terms and conditions of
 76 |       this License, each Contributor hereby grants to You a perpetual,
 77 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 78 |       (except as stated in this section) patent license to make, have made,
 79 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 80 |       where such license applies only to those patent claims licensable
 81 |       by such Contributor that are necessarily infringed by their
 82 |       Contribution(s) alone or by combination of their Contribution(s)
 83 |       with the Work to which such Contribution(s) was submitted. If You
 84 |       institute patent litigation against any entity (including a
 85 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 86 |       or a Contribution incorporated within the Work constitutes direct
 87 |       or contributory patent infringement, then any patent licenses
 88 |       granted to You under this License for that Work shall terminate
 89 |       as of the date such litigation is filed.
 90 | 
 91 |    4. Redistribution. You may reproduce and distribute copies of the
 92 |       Work or Derivative Works thereof in any medium, with or without
 93 |       modifications, and in Source or Object form, provided that You
 94 |       meet the following conditions:
 95 | 
 96 |       (a) You must give any other recipients of the Work or
 97 |           Derivative Works a copy of this License; and
 98 | 
 99 |       (b) You must cause any modified files to carry prominent notices
100 |           stating that You changed the files; and
101 | 
102 |       (c) You must retain, in the Source form of any Derivative Works
103 |           that You distribute, all copyright, patent, trademark, and
104 |           attribution notices from the Source form of the Work,
105 |           excluding those notices that do not pertain to any part of
106 |           the Derivative Works; and
107 | 
108 |       (d) If the Work includes a "NOTICE" text file as part of its
109 |           distribution, then any Derivative Works that You distribute must
110 |           include a readable copy of the attribution notices contained
111 |           within such NOTICE file, excluding those notices that do not
112 |           pertain to any part of the Derivative Works, in at least one
113 |           of the following places: within a NOTICE text file distributed
114 |           as part of the Derivative Works; within the Source form or
115 |           documentation, if provided along with the Derivative Works; or,
116 |           within a display generated by the Derivative Works, if and
117 |           wherever such third-party notices normally appear. The contents
118 |           of the NOTICE file are for informational purposes only and
119 |           do not modify the License. You may add Your own attribution
120 |           notices within Derivative Works that You distribute, alongside
121 |           or as an addendum to the NOTICE text from the Work, provided
122 |           that such additional attribution notices cannot be construed
123 |           as modifying the License.
124 | 
125 |       You may add Your own copyright statement to Your modifications and
126 |       may provide additional or different license terms and conditions
127 |       for use, reproduction, or distribution of Your modifications, or
128 |       for any such Derivative Works as a whole, provided Your use,
129 |       reproduction, and distribution of the Work otherwise complies with
130 |       the conditions stated in this License.
131 | 
132 |    5. Submission of Contributions. Unless You explicitly state otherwise,
133 |       any Contribution intentionally submitted for inclusion in the Work
134 |       by You to the Licensor shall be under the terms and conditions of
135 |       this License, without any additional terms or conditions.
136 |       Notwithstanding the above, nothing herein shall supersede or modify
137 |       the terms of any separate license agreement you may have executed
138 |       with Licensor regarding such Contributions.
139 | 
140 |    6. Trademarks. This License does not grant permission to use the trade
141 |       names, trademarks, service marks, or product names of the Licensor,
142 |       except as required for reasonable and customary use in describing the
143 |       origin of the Work and reproducing the content of the NOTICE file.
144 | 
145 |    7. Disclaimer of Warranty. Unless required by applicable law or
146 |       agreed to in writing, Licensor provides the Work (and each
147 |       Contributor provides its Contributions) on an "AS IS" BASIS,
148 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
149 |       implied, including, without limitation, any warranties or conditions
150 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
151 |       PARTICULAR PURPOSE. You are solely responsible for determining the
152 |       appropriateness of using or redistributing the Work and assume any
153 |       risks associated with Your exercise of permissions under this License.
154 | 
155 |    8. Limitation of Liability. In no event and under no legal theory,
156 |       whether in tort (including negligence), contract, or otherwise,
157 |       unless required by applicable law (such as deliberate and grossly
158 |       negligent acts) or agreed to in writing, shall any Contributor be
159 |       liable to You for damages, including any direct, indirect, special,
160 |       incidental, or consequential damages of any character arising as a
161 |       result of this License or out of the use or inability to use the
162 |       Work (including but not limited to damages for loss of goodwill,
163 |       work stoppage, computer failure or malfunction, or any and all
164 |       other commercial damages or losses), even if such Contributor
165 |       has been advised of the possibility of such damages.
166 | 
167 |    9. Accepting Warranty or Additional Liability. While redistributing
168 |       the Work or Derivative Works thereof, You may choose to offer,
169 |       and charge a fee for, acceptance of support, warranty, indemnity,
170 |       or other liability obligations and/or rights consistent with this
171 |       License. However, in accepting such obligations, You may act only
172 |       on Your own behalf and on Your sole responsibility, not on behalf
173 |       of any other Contributor, and only if You agree to indemnify,
174 |       defend, and hold each Contributor harmless for any liability
175 |       incurred by, or claims asserted against, such Contributor by reason
176 |       of your accepting any such warranty or additional liability.
177 | 
178 |    END OF TERMS AND CONDITIONS
179 | 
180 |    APPENDIX: How to apply the Apache License to your work.
181 | 
182 |       To apply the Apache License to your work, attach the following
183 |       boilerplate notice, with the fields enclosed by brackets "[]"
184 |       replaced with your own identifying information. (Don't include
185 |       the brackets!)  The text should be enclosed in the appropriate
186 |       comment syntax for the file format. We also recommend that a
187 |       file or class name and description of purpose be included on the
188 |       same "printed page" as the copyright notice for easier
189 |       identification within third-party archives.
190 | 
191 |    Copyright [yyyy] [name of copyright owner]
192 | 
193 |    Licensed under the Apache License, Version 2.0 (the "License");
194 |    you may not use this file except in compliance with the License.
195 |    You may obtain a copy of the License at
196 | 
197 |        http://www.apache.org/licenses/LICENSE-2.0
198 | 
199 |    Unless required by applicable law or agreed to in writing, software
200 |    distributed under the License is distributed on an "AS IS" BASIS,
201 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
202 |    See the License for the specific language governing permissions and
203 |    limitations under the License.
204 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: docker clean
2 | 
3 | docker: ./requirements.txt
4 | 	docker build -t catalyst-detection:latest . -f ./Dockerfile --no-cache
5 | 
6 | clean:
7 | 	rm -rf build/
8 | 	docker rmi -f catalyst-detection:latest
9 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <div align="center">
 2 | 
 3 | [![Catalyst logo](https://raw.githubusercontent.com/catalyst-team/catalyst-pics/master/pics/catalyst_logo.png)](https://github.com/catalyst-team/catalyst)
 4 | 
 5 | **Accelerated DL R&D**
 6 | 
 7 | [![Build Status](http://66.248.205.49:8111/app/rest/builds/buildType:id:Catalyst_Deploy/statusIcon.svg)](http://66.248.205.49:8111/project.html?projectId=Catalyst&tab=projectOverview&guest=1)
 8 | [![CodeFactor](https://www.codefactor.io/repository/github/catalyst-team/catalyst/badge)](https://www.codefactor.io/repository/github/catalyst-team/catalyst)
 9 | [![Pipi version](https://img.shields.io/pypi/v/catalyst.svg)](https://pypi.org/project/catalyst/)
10 | [![Docs](https://img.shields.io/badge/dynamic/json.svg?label=docs&url=https%3A%2F%2Fpypi.org%2Fpypi%2Fcatalyst%2Fjson&query=%24.info.version&colorB=brightgreen&prefix=v)](https://catalyst-team.github.io/catalyst/index.html)
11 | [![PyPI Status](https://pepy.tech/badge/catalyst)](https://pepy.tech/project/catalyst)
12 | 
13 | [![Twitter](https://img.shields.io/badge/news-twitter-499feb)](https://twitter.com/CatalystTeam)
14 | [![Telegram](https://img.shields.io/badge/channel-telegram-blue)](https://t.me/catalyst_team)
15 | [![Slack](https://img.shields.io/badge/Catalyst-slack-success)](https://join.slack.com/t/catalyst-team-devs/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw)
16 | [![Github contributors](https://img.shields.io/github/contributors/catalyst-team/catalyst.svg?logo=github&logoColor=white)](https://github.com/catalyst-team/catalyst/graphs/contributors)
17 | 
18 | </div>
19 | 
20 | PyTorch framework for Deep Learning research and development.
21 | It was developed with a focus on reproducibility,
22 | fast experimentation and code/ideas reusing.
23 | Being able to research/develop something new,
24 | rather than write another regular train loop. <br/>
25 | Break the cycle - use the Catalyst!
26 | 
27 | Project [manifest](https://github.com/catalyst-team/catalyst/blob/master/MANIFEST.md). Part of [PyTorch Ecosystem](https://pytorch.org/ecosystem/). Part of [Catalyst Ecosystem](https://docs.google.com/presentation/d/1D-yhVOg6OXzjo9K_-IS5vSHLPIUxp1PEkFGnpRcNCNU/edit?usp=sharing):
28 | - [Alchemy](https://github.com/catalyst-team/alchemy) - Experiments logging & visualization
29 | - [Catalyst](https://github.com/catalyst-team/catalyst) - Accelerated Deep Learning Research and Development
30 | - [Reaction](https://github.com/catalyst-team/reaction) - Convenient Deep Learning models serving
31 | 
32 | [Catalyst at AI Landscape](https://landscape.lfai.foundation/selected=catalyst).
33 | 
34 | ---
35 | 
36 | # Catalyst.Detection [![Build Status](https://travis-ci.com/catalyst-team/detection.svg?branch=master)](https://travis-ci.com/catalyst-team/detection) [![Github contributors](https://img.shields.io/github/contributors/catalyst-team/detection.svg?logo=github&logoColor=white)](https://github.com/catalyst-team/detection/graphs/contributors)
37 | 
38 | > *Note: this repo uses advanced Catalyst Config API and could be a bit out-of-day right now. 
39 | > Use [Catalyst's minimal examples section](https://github.com/catalyst-team/catalyst#minimal-examples) for a starting point and up-to-day use cases, please.*
40 | 
41 | Based on [Objects as points](https://arxiv.org/abs/1904.07850) article by [Xingyi Zhou](https://arxiv.org/search/cs?searchtype=author&query=Zhou%2C+X), [Dequan Wang](https://arxiv.org/search/cs?searchtype=author&query=Wang%2C+D), [Philipp Krähenbühl](https://arxiv.org/search/cs?searchtype=author&query=Kr%C3%A4henb%C3%BChl%2C+P)
42 | 
43 | ### Training in your dataset
44 | 0. Install requirements ```pip install -r requirements.txt```
45 | 
46 | 1. Copy all images to one directory or two different directories for train and validation.
47 | 
48 | 1. Create ```markup_train.json``` as json file in MSCOCO format using ```COCODetectionFactory``` from ```data_preparation.py```. This class may be copied to your dataset generator. See documentation in code comments.  If your dataset are already in this format, go to next step.
49 | 
50 | 1. Specify perameters and in ```config/centernet_detection_config.yml```.
51 | 
52 | 1. Run catalyst ```catalyst-dl run --config=./configs/centernet_detection_config.yml```
53 | 
54 | 1. When you change dataset, you must delete cache files ```markup_*.json.cache``` because this files contain preprocessed bounding boxes info.
55 | 


--------------------------------------------------------------------------------
/configs/centernet_detection_config.yml:
--------------------------------------------------------------------------------
  1 | shared:
  2 |   classes: &classes ["person"]
  3 |   num_classes: &num_classes 1
  4 | 
  5 |   image_size: &image_size [224, 224]
  6 |   down_ratio: &down_ratio 4 # (height of input image / height of predicted heatmap)
  7 |   max_objs: &max_objs 15 # max objects detected per image, passed to DetectorCallback
  8 | 
  9 |   num_epochs: &num_epochs 200
 10 |   lr: &lr 0.001
 11 |   weight_decay: &wd 0.0001
 12 | 
 13 |   hm_weight: &hm_weight 1.0
 14 |   wh_weight: &wh_weight 10.0
 15 |   off_weight: &off_weight 10.0
 16 | 
 17 | model_params:
 18 |  model: ResnetCenterNet
 19 |  num_classes: *num_classes
 20 |  embedding_dim: 128
 21 |  arch: "ResnetFPNUnet"
 22 |  down_ratio: *down_ratio
 23 |  backbone_params:
 24 |    arch: resnet18
 25 |    pretrained: true
 26 | 
 27 | runner_params:
 28 |   input_key: "input"
 29 |   output_key: null
 30 | 
 31 | args:
 32 |   expdir: src
 33 |   logdir: logs
 34 | 
 35 | stages:
 36 |   state_params:
 37 |     main_metric: &main_metric "loss"
 38 |     minimize_metric: &minimize_metric true
 39 | 
 40 |   data_params:
 41 |     num_workers: 0
 42 |     batch_size: 5
 43 |     max_objs: *max_objs
 44 |     down_ratio: *down_ratio
 45 | 
 46 |     # default values, will be used if something aren't specified
 47 |     annotation_file: ./data/annotation.json
 48 |     images_dir: ./data/images
 49 | 
 50 |     # You may specify next parameters, data source will be overwritten
 51 |     #train_annotation_file: ./data_train/annotation.json
 52 |     #valid_annotation_file: ./data_valid/annotation.json
 53 |     #train_images_dir: ./data_train/images/
 54 |     #valid_images_dir: ./data_valid/images
 55 | 
 56 |     num_classes: *num_classes
 57 |     image_size: *image_size
 58 | 
 59 |     sampler_params:
 60 |       drop_last: true
 61 |       shuffle: per_epoch
 62 | 
 63 |   criterion_params:
 64 |     _key_value: True
 65 | 
 66 |     l_hm:
 67 |       criterion: CenterNetDetectionLoss
 68 |     l1_wh:
 69 |       criterion: RegL1Loss
 70 |     l1_reg:
 71 |       criterion: RegL1Loss
 72 | 
 73 |   scheduler_params:
 74 |      scheduler: MultiStepLR
 75 |      milestones: [12, 40]
 76 |      gamma: 0.8
 77 | 
 78 |   stage1:
 79 |     state_params:
 80 |       num_epochs: *num_epochs
 81 | 
 82 |     optimizer_params:
 83 |       optimizer: Lookahead
 84 |       base_optimizer_params:
 85 |         optimizer: RAdam
 86 |         lr: *lr
 87 |         weight_decay: *wd
 88 |       no_bias_weight_decay: True
 89 | 
 90 |     callbacks_params:
 91 |       loss_hm:
 92 |         callback: CriterionCallback
 93 |         input_key: hm
 94 |         output_key: hm
 95 |         prefix: loss_hm
 96 |         criterion_key: l_hm
 97 |         multiplier: *hm_weight
 98 | 
 99 |       loss_wh:
100 |         callback: CriterionCallback
101 |         input_key:
102 |           reg_mask: targets_mask_key
103 |           ind: targets_ind_key
104 |           wh: targets_key
105 |         output_key:
106 |           wh: outputs_key
107 |         prefix: loss_wh
108 |         criterion_key: l1_wh
109 |         multiplier: *wh_weight
110 | 
111 |       loss_reg:
112 |         callback: CriterionCallback
113 |         input_key:
114 |           reg_mask: targets_mask_key
115 |           ind: targets_ind_key
116 |           reg: targets_key
117 |         output_key:
118 |           reg: outputs_key
119 |         prefix: loss_reg
120 |         criterion_key: l1_reg
121 |         multiplier: *off_weight
122 | 
123 |       loss_aggregator:
124 |         callback: CriterionAggregatorCallback
125 |         prefix: &aggregated_loss loss
126 |         loss_keys: ["loss_hm", "loss_wh", "loss_reg"]
127 |         loss_aggregate_fn: "sum"
128 |         multiplier: 1.0
129 | 
130 |       optimizer:
131 |         callback: OptimizerCallback
132 |         grad_clip_params:
133 |           func: clip_grad_value_
134 |           clip_value: 5.0
135 |         loss_key: *aggregated_loss
136 | 
137 |       scheduler:
138 |         callback: SchedulerCallback
139 |         reduce_metric: *main_metric
140 | 
141 |       decoder:
142 |         callback: DecoderCallback
143 |         down_ratio: *down_ratio
144 |         max_objs: *max_objs
145 | 
146 |       # mAP:
147 |       #  callback: MeanAPCallback
148 |       #  num_classes: *num_classes
149 | 
150 |       saver:
151 |         callback: CheckpointCallback
152 |         save_n_best: 3
153 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | albumentations==0.2.3
2 | packaging==19.2
3 | numpy==1.17.4
4 | pycocotools==2.0.0
5 | torch==1.3.0
6 | catalyst==20.1


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # from .runner import Runner
 3 | from catalyst.dl import SupervisedRunner as Runner
 4 | from catalyst.dl import registry
 5 | 
 6 | from .experiment import Experiment
 7 | 
 8 | from .callbacks import DecoderCallback, MeanAPCallback
 9 | from .losses import CenterNetDetectionLoss, \
10 |     RegL1Loss, MSEIndLoss, BCEIndLoss, FocalIndLoss
11 | from . import models
12 | 
13 | 
14 | registry.Criterion(CenterNetDetectionLoss)
15 | registry.Criterion(RegL1Loss)
16 | registry.Criterion(MSEIndLoss)
17 | registry.Criterion(BCEIndLoss)
18 | registry.Criterion(FocalIndLoss)
19 | 
20 | registry.Callback(DecoderCallback)
21 | registry.Callback(MeanAPCallback)
22 | 
23 | registry.MODELS.add_from_module(models)
24 | 


--------------------------------------------------------------------------------
/src/callbacks.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Dict
  2 | 
  3 | import numpy as np
  4 | from catalyst.dl import Callback, RunnerState, CallbackOrder, CriterionCallback
  5 | from catalyst.utils import detach
  6 | 
  7 | from .losses.ctdet_loss import decode_centernet_predictions
  8 | from .metrics import class_agnostic_mean_ap, calculate_map, construct_mAP_list_from_bboxes
  9 | 
 10 | 
 11 | class DecoderCallback(Callback):
 12 |     def __init__(self, down_ratio: int = 1, max_objs: int = 80):
 13 |         super().__init__(order=CallbackOrder.Metric - 1)
 14 |         self.down_ratio = down_ratio
 15 |         self.max_objs = max_objs
 16 | 
 17 |     def on_batch_end(self, state: RunnerState):
 18 |         if state.loader_name.startswith("valid"):
 19 |             detections = decode_centernet_predictions(
 20 |                 state.output["hm"],
 21 |                 state.output["wh"],
 22 |                 state.output["reg"],
 23 |                 K=self.max_objs
 24 |             )
 25 |             detections = detach(detections).reshape(
 26 |                 (detections.shape[0], -1, detections.shape[2])
 27 |             )
 28 |             detections[:, :, :4] *= self.down_ratio
 29 | 
 30 |             bboxes = detections[:, :, :4].astype(int)
 31 |             scores = detections[:, :, 4]
 32 |             labels = detections[:, :, 5].astype(int)
 33 | 
 34 |             result = dict(
 35 |                 bboxes=bboxes,
 36 |                 labels=labels,
 37 |                 scores=scores,
 38 |             )
 39 |             state.output.update(result)
 40 | 
 41 | 
 42 | class MeanAPCallback(Callback):
 43 |     def __init__(
 44 |             self,
 45 |             num_classes: int = None,
 46 |             prefix: str = "mAP",
 47 |             bboxes_key: str = "bboxes",
 48 |             scores_key: str = "scores",
 49 |             labels_key: str = "labels",
 50 |             iou_threshold: float = 0.9
 51 |     ):
 52 |         super().__init__(order=CallbackOrder.Metric)
 53 |         self.prefix = prefix
 54 |         self.classes = list(range(num_classes))
 55 |         self.mean_mAP = []
 56 | 
 57 |         self.bboxes_key = bboxes_key
 58 |         self.scores_key = scores_key
 59 |         self.labels_key = labels_key
 60 |         # List (dictionary value) contains of pairs of correct/not correct bboxes and model confidence by class
 61 |         self.classes_predictions: Dict[str, List[(bool, float)]] = {c: [] for c in range(num_classes)}
 62 |         self.iou_threshold = iou_threshold
 63 | 
 64 |     def on_batch_end(self, state: RunnerState):
 65 |         if state.loader_name.startswith("valid"):
 66 |             bboxes = state.output[self.bboxes_key]
 67 |             scores = state.output[self.scores_key]
 68 |             labels = state.output[self.labels_key]
 69 | 
 70 |             gt_bboxes = [
 71 |                 np.array(item_bboxes.detach().cpu())
 72 |                 for item_bboxes in state.input[self.bboxes_key]]
 73 |             gt_labels = [
 74 |                 np.array(item_label.detach().cpu())
 75 |                 for item_label in state.input[self.labels_key]
 76 |             ]
 77 | 
 78 |             for i, _class in enumerate(self.classes):
 79 |                 predict_bboxes_batch = []
 80 |                 predict_scores_batch = []
 81 | 
 82 |                 target_bboxes_batch = []
 83 |                 for batch_elem in zip(bboxes, scores, labels, gt_bboxes, gt_labels):
 84 |                     bboxes_, scores_, labels_, gt_bboxes_, gt_labels_ = batch_elem
 85 | 
 86 |                     bboxes_ = bboxes_[scores_ > 0]
 87 |                     labels_ = labels_[scores_ > 0]
 88 |                     scores_ = scores_[scores_ > 0]
 89 | 
 90 |                     mask = (labels_ == i)
 91 |                     predict_bboxes_batch.append(bboxes_[mask])
 92 |                     predict_scores_batch.append(scores_[mask])
 93 | 
 94 |                     gt_mask = gt_labels_ == i
 95 |                     target_bboxes_batch.append(gt_bboxes_[gt_mask])
 96 | 
 97 |                 if len(predict_bboxes_batch) != 0:
 98 |                     per_box_correctness = [
 99 |                         construct_mAP_list_from_bboxes(img_pred_bboxes.reshape(-1, 4), img_scores,
100 |                                                        img_gt_bboxes.reshape(-1, 4), self.iou_threshold)
101 |                         for img_pred_bboxes, img_scores, img_gt_bboxes
102 |                         in zip(predict_bboxes_batch, predict_scores_batch, target_bboxes_batch)
103 |                     ]
104 |                     for answers in per_box_correctness:
105 |                         self.classes_predictions[_class].extend(answers)
106 | 
107 |             mean_value = class_agnostic_mean_ap(bboxes, scores, gt_bboxes)
108 |             self.mean_mAP.append(mean_value)
109 | 
110 |     def on_loader_end(self, state: RunnerState):
111 |         if state.loader_name.startswith("valid"):
112 |             all_predictions = []
113 |             for class_name, predictions in self.classes_predictions.items():
114 |                 # metric_name = f"{self.prefix}/{class_name}"
115 |                 # mAP = calculate_map(predictions)
116 |                 # state.metrics.epoch_values[state.loader_name][metric_name] = mAP
117 |                 all_predictions.extend(predictions)
118 | 
119 |             # mean_AP = calculate_map(all_predictions)
120 |             # state.metrics.epoch_values[state.loader_name][f'{self.prefix}/_mean'] = mean_AP
121 | 
122 |             ap_with_false_negatives = calculate_map(all_predictions, use_false_negatives=True)
123 |             state.metrics.epoch_values[state.loader_name][f'{self.prefix}/_mean_with_fn'] = ap_with_false_negatives
124 | 
125 |             # old mAP
126 |             # state.metrics.epoch_values[state.loader_name][f'{self.prefix}/_mean_old'] = np.mean(self.mean_mAP)
127 |             self.mean_mAP = []
128 |             self.classes_predictions: Dict[str, List[(bool, float)]] = {c: [] for c in self.classes}
129 | 
130 | 
131 | __all__ = ["DecoderCallback", "MeanAPCallback"]
132 | 


--------------------------------------------------------------------------------
/src/coco.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import numpy as np
  4 | import pickle
  5 | from typing import Any
  6 | 
  7 | from pycocotools.coco import COCO
  8 | from torch.utils.data import Dataset
  9 | 
 10 | 
 11 | class DetectionMSCOCODataset(Dataset):
 12 |     def __init__(self, annotation_file: str, image_dir: str):
 13 | 
 14 |         self._annotation_file = annotation_file
 15 |         self._image_dir = image_dir
 16 |         self._cache_file = self._annotation_file + ".cache"
 17 | 
 18 |         self._coco = COCO(self._annotation_file)
 19 | 
 20 |         self._img_ids = self._coco.getImgIds()
 21 |         self._cat_ids = self._coco.getCatIds()
 22 |         self._ann_ids = self._coco.getAnnIds()
 23 | 
 24 |         self._data = "coco"
 25 |         self._classes = {
 26 |             ind: cat_id for ind, cat_id in enumerate(self._cat_ids)
 27 |         }
 28 |         self._coco_to_class_map = {
 29 |             value: key for key, value in self._classes.items()
 30 |         }
 31 | 
 32 |         self._load_data()
 33 |         self._db_inds = np.arange(len(self._image_names))
 34 | 
 35 |         self._load_coco_data()
 36 | 
 37 |     def _load_data(self):
 38 |         print("loading from cache file: {}".format(self._cache_file))
 39 |         if not os.path.exists(self._cache_file):
 40 |             print("No cache file found...")
 41 |             self._extract_data()
 42 |             with open(self._cache_file, "wb") as f:
 43 |                 pickle.dump([self._detections, self._image_names], f)
 44 |             print("Cache file created")
 45 |         else:
 46 |             with open(self._cache_file, "rb") as f:
 47 |                 self._detections, self._image_names = pickle.load(f)
 48 | 
 49 |     def _load_coco_data(self):
 50 |         with open(self._annotation_file, "r") as f:
 51 |             data = json.load(f)
 52 | 
 53 |         coco_ids = self._coco.getImgIds()
 54 |         eval_ids = {
 55 |             self._coco.loadImgs(coco_id)[0]["file_name"]: coco_id
 56 |             for coco_id in coco_ids
 57 |             }
 58 | 
 59 |         self._coco_categories = data["categories"]
 60 |         self._coco_eval_ids = eval_ids
 61 | 
 62 |     def class_name(self, cid):
 63 |         cat_id = self._classes[cid]
 64 |         cat = self._coco.loadCats([cat_id])[0]
 65 |         return cat["name"]
 66 | 
 67 |     def _extract_data(self):
 68 | 
 69 |         self._image_names = [
 70 |             self._coco.loadImgs(img_id)[0]["file_name"]
 71 |             for img_id in self._img_ids
 72 |         ]
 73 |         self._detections = {}
 74 |         for ind, (coco_image_id, image_name) in enumerate(zip(self._img_ids, self._image_names)):
 75 |             image = self._coco.loadImgs(coco_image_id)[0]
 76 |             bboxes = []
 77 |             categories = []
 78 | 
 79 |             for cat_id in self._cat_ids:
 80 |                 annotation_ids = self._coco.getAnnIds(imgIds=image["id"], catIds=cat_id)
 81 |                 annotations = self._coco.loadAnns(annotation_ids)
 82 |                 category = self._coco_to_class_map[cat_id]
 83 |                 for annotation in annotations:
 84 |                     bbox = np.array(annotation["bbox"])
 85 |                     bbox[[2, 3]] += bbox[[0, 1]]
 86 |                     bboxes.append(bbox)
 87 | 
 88 |                     categories.append(category)
 89 | 
 90 |             self._detections[image_name] = [{
 91 |                 'bbox': bbox.astype(np.float32),
 92 |                 'category_id': category,
 93 |                 'category_name': self.class_name(category)
 94 |             } for bbox, category in zip(bboxes, categories)]
 95 | 
 96 |     def __getitem__(self, ind: int) -> Any:
 97 |         image_name = self._image_names[ind]
 98 | 
 99 |         return {
100 |             'image_name': os.path.join(self._image_dir, image_name),
101 |             'detections': self._detections[image_name]
102 |         }
103 | 
104 |     def __len__(self) -> int:
105 |         return len(self._img_ids)
106 | 
107 |     def get_num_classes(self) -> int:
108 |         return len(self._cat_ids)
109 | 


--------------------------------------------------------------------------------
/src/data_preparation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import json
  4 | 
  5 | from typing import Any, List, Optional, Dict, Union
  6 | 
  7 | 
  8 | class _UniqueIdsController:
  9 |     def __init__(self):
 10 |         self._ids = set()
 11 |         self._last_id = -1
 12 | 
 13 |     def has(self, object_id: int) -> bool:
 14 |         return object_id in self._ids
 15 | 
 16 |     def get_new_id(self) -> int:
 17 |         self._last_id += 1
 18 |         return self._last_id
 19 | 
 20 |     def force_add_id(self, object_id: int) -> bool:
 21 |         if object_id in self._ids:
 22 |             return False
 23 |         else:
 24 |             self._last_id = max(self._last_id, object_id)
 25 |             self._ids.add(object_id)
 26 |             return True
 27 | 
 28 | 
 29 | class COCODetectionFactory:
 30 |     """
 31 |     # Create dataset
 32 |     dataset = COCODetectionFactory()
 33 | 
 34 |     # set info to dataset
 35 |     dataset.set_info(description='my_dataset', url='http://localhost.com',
 36 |                      version='0.0.1', year=2019, contributor='Avi2011class',
 37 |                      date_created='20.12.2019')
 38 | 
 39 |     # add interesting licenses
 40 |     dataset.add_licence(name='GPLv3', url='https://en.wikipedia.org/wiki/GPL_License')
 41 |     dataset.add_licence(name='MIT', url='https://en.wikipedia.org/wiki/MIT_License')
 42 | 
 43 |     # add categories of objects
 44 |     dataset.add_category(category_id=10, name='man', supercategory='person')  # with fixed id, 10 is returned
 45 |     dataset.add_category(name='woman', supercategory='person')  # with auto selected id, 11 is returned
 46 |     dataset.add_category(category_id=5, name='child', supercategory='person')  # with fixed id, 5 is returned
 47 |     # dataset.add_category(category_id=5, name='dog', supercategory='not person') will raises RuntimeError,
 48 |     # id=11 already exists
 49 | 
 50 |     # add images to dataset
 51 |     dataset.add_image(image_id=0, file_name='photo_01.jpg')  # with fixed id, 0 is returned
 52 |     dataset.add_image(file_name='photo_02.jpg')  # without fixed id, 1 is returned
 53 |     # dataset.add_image(image_id=1, file_name='photo_01.jpg') will raise RuntimeError, id is already exists
 54 | 
 55 |     # Adds annotations to dataset
 56 | 
 57 |     # add annotation to image with image_id=0 and category_id=0
 58 |     dataset.add_bbox(bbox_left=0, bbox_height=10, bbox_top=0, bbox_width=0, image_id=0, category_id=11)
 59 | 
 60 |     # add annotation to image with auto found image_id=1 and auto found category_id=10
 61 |     dataset.add_bbox(bbox_left=0, bbox_height=10, bbox_top=0, bbox_width=0,
 62 |                      image_file_name='photo_02.jpg', category_name='man')
 63 | 
 64 |     # add annotation to image with image_id=0 and category_id=102, category=102 may be created in future
 65 |     dataset.add_bbox(bbox_left=0, bbox_height=10, bbox_top=0, bbox_width=0, image_id=0, category_id=102)
 66 | 
 67 |     # raises RuntimeError because category_name doesn't exists
 68 |     # dataset.add_bbox(bbox_left=0, bbox_height=10, bbox_top=0, bbox_width=0, image_id=0, category_name='blabla')
 69 | 
 70 |     # save dataset to annotation file
 71 |     annotation_file_content = dataset.get_json()
 72 |     with open('annotation_file.json', 'w') as f:
 73 |         f.write(annotation_file_content)
 74 |     """
 75 | 
 76 |     def __init__(self):
 77 |         self._output = {
 78 |             "info": {},
 79 |             "licenses": [],
 80 |             "categories": [],
 81 |             "images": [],
 82 |             "annotations": []
 83 |         }
 84 |         self._licence_ids = _UniqueIdsController()
 85 |         self._image_ids = _UniqueIdsController()
 86 |         self._category_ids = _UniqueIdsController()
 87 |         self._annotation_ids = _UniqueIdsController()
 88 | 
 89 |         self._category_names_to_idx = {}
 90 |         self._image_names_to_idx = {}
 91 |         self._annotation_names_to_idx = {}
 92 | 
 93 |     def set_info(self,
 94 |                  description: str = "",
 95 |                  url: str = "",
 96 |                  version: str = "",
 97 |                  year: Union[str, int] = "",
 98 |                  contributor: str = "",
 99 |                  date_created: str = ""):
100 |         """Set information in mscoco format
101 |         Args:
102 |             description (str): dataset description
103 |             url (str): dataset url
104 |             version (str): dataset version
105 |             year (Union[str, int]): dataset year
106 |             contributor (str): contribution info
107 |             date_created (str): date
108 | 
109 |         Return:
110 |             COCODetectionFactory: reference to current COCODetectionFactory object
111 |         """
112 | 
113 |         self._output['info'] = {
114 |             'description': description,
115 |             'url': url,
116 |             'version': version,
117 |             'year': year,
118 |             'contributor': contributor,
119 |             'date_created': date_created
120 |         }
121 | 
122 |         return self
123 | 
124 |     def add_licence(self,
125 |                     licence_id: Optional[int] = None,
126 |                     name: str = "",
127 |                     url: str = "") -> Any:
128 |         """Adds license to dataset, dataset may contain more then one license, it will be stored as list
129 | 
130 |         Args:
131 |             licence_id (optional[int]): id of license, must be unique, if None random unique value will be used
132 |             name (str): name of license
133 |             url(str): url to license
134 | 
135 |         Returns:
136 |             int: id of license
137 |         """
138 | 
139 |         if licence_id is None:
140 |             licence_id = self._licence_ids.get_new_id()
141 |         elif self._licence_ids.has(licence_id):
142 |             raise RuntimeError('License ids must be unique, but \"{}\" already exists'.format(licence_id))
143 | 
144 |         self._output['licenses'].append({
145 |             'id': licence_id,
146 |             'name': name,
147 |             'url': url
148 |         })
149 |         return licence_id
150 | 
151 |     def add_category(self,
152 |                      category_id: Optional[int] = None,
153 |                      name: str = "",
154 |                      supercategory: str = ""):
155 |         """ Adds category to dataset
156 | 
157 |         Args:
158 |             category_id (int): id of category, must be unique, if None random unique value will be used
159 |             name (str): name of category, must be unique for dataset
160 |             supercategory (str): name of supercategory
161 |         Returns:
162 |              int: id of category
163 |         """
164 |         if category_id is None:
165 |             category_id = self._category_ids.get_new_id()
166 |         elif self._category_ids.has(category_id):
167 |             raise RuntimeError('Category ids must be unique, but \"{}\" already exists'.format(category_id))
168 | 
169 |         if name in self._image_names_to_idx:
170 |             raise RuntimeError('Category names must be unique, but \"{}\" already exists'.format(name))
171 |         self._category_names_to_idx[name] = category_id
172 | 
173 |         self._output['categories'].append({
174 |             'id': category_id,
175 |             'name': name,
176 |             'supercategory': supercategory
177 |         })
178 |         return category_id
179 | 
180 |     def add_image(self,
181 |                   image_id: Optional[int] = None,
182 |                   file_name: str = "",
183 |                   height: Optional[int] = None,
184 |                   width: Optional[int] = None) -> Any:
185 |         """ Adds image to dataset
186 | 
187 |         Args:
188 |             image_id (Optional[int]): id of image, must be unique, if None random unique value will be used
189 |             file_name (str): filename where image stored, must be unique for dataset
190 |             height (Optional[int]): height of image
191 |             width (optional[int]): width of image
192 |         Returns:
193 |             int: id of image
194 |         """
195 | 
196 |         if image_id is None:
197 |             image_id = self._image_ids.get_new_id()
198 |         elif self._category_ids.has(image_id):
199 |             raise RuntimeError('Image ids must be unique, but \"{}\" already exists'.format(category_id))
200 | 
201 |         if file_name in self._image_names_to_idx:
202 |             raise RuntimeError('Image file names must be unique, but \"{}\" already exists'.format(name))
203 |         self._image_names_to_idx[file_name] = image_id
204 | 
205 |         self._output['images'].append({
206 |             'id': image_id,
207 |             'file_name': file_name,
208 |             'height': height if height is not None else -1,
209 |             'width': width if width is not None else -1
210 |         })
211 | 
212 |         return image_id
213 |     
214 |     def add_bbox(self,
215 |                  bbox_left: int = 0,
216 |                  bbox_top: int = 0,
217 |                  bbox_width: int = 0,
218 |                  bbox_height: int = 0,
219 |                  image_id: Optional[int] = None,
220 |                  image_file_name: Optional[str] = None,
221 |                  category_id: Optional[int] = None,
222 |                  category_name: Optional[str] = None,
223 |                  iscrowd: bool = False):
224 |         """Adds bounding box to image in dataset
225 |         
226 |         One of image_id and image_file_name must be specified. If both image_id will be used
227 |         One of category_id and category_name must be specified. If both category_id will be used
228 | 
229 |         Args:
230 |             bbox_left (int):
231 |             bbox_top (int):
232 |             bbox_width (int):
233 |             bbox_height (int):
234 |             image_id (Optional[int]): if None may be computed from image_file_name. Image_id may be unknown
235 |             image_file_name (Optional[str]): None or name of file added to dataset
236 |             category_id (Optional[int]): if None may be computed from category_name. Category_id may be unknown
237 |             category_name (Optional[str]): None or name of category added to dataset
238 |             iscrowd (bool):
239 |         Returns:
240 |             int: id of bbox
241 |         """
242 | 
243 |         if image_id is None and image_file_name is None:
244 |             raise RuntimeError("One of image_id and image_file_name must be specified")
245 | 
246 |         if image_id is None:
247 |             if image_file_name in self._image_names_to_idx:
248 |                 image_id = self._image_names_to_idx[image_file_name]
249 |             else:
250 |                 raise RuntimeError("Unknown image file name \"{}\"".format(image_file_name))
251 | 
252 |         if category_id is None and category_name is None:
253 |             raise RuntimeError("One of category_id and category_name must be specified")
254 | 
255 |         if category_id is None:
256 |             if category_name in self._category_names_to_idx:
257 |                 category_id = self._category_names_to_idx[category_name]
258 |             else:
259 |                 raise RuntimeError("Unknown category name \"{}\"".format(category_name))
260 | 
261 |         new_id = self._annotation_ids.get_new_id()
262 |         self._output['annotations'].append({
263 |             'id': new_id,
264 |             'image_id': image_id,
265 |             'category_id': category_id,
266 |             'segmentation': [],
267 |             'area': 0,
268 |             'bbox': [bbox_left, bbox_top, bbox_width, bbox_height],
269 |             'iscrowd': iscrowd,
270 |         })
271 |         return new_id
272 | 
273 |     def get_dict(self) -> Dict[str, Any]:
274 |         """
275 |         Args:
276 | 
277 |         Returns:
278 |             Dict[str, Any]: dict in mscoco format
279 |         """
280 |         return self._output
281 | 
282 |     def get_json(self, **kwargs) -> str:
283 |         """
284 |         Args:
285 |             **kwargs: passed to json.dumps
286 |         Returns:
287 |             str: dataset in json format
288 |         """
289 |         return json.dumps(self._output, **kwargs)
290 | 


--------------------------------------------------------------------------------
/src/dataset.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, Optional, Any, Tuple
  2 | 
  3 | import cv2
  4 | import math
  5 | import torch
  6 | from torch.utils.data import Dataset
  7 | import numpy as np
  8 | 
  9 | from .coco import DetectionMSCOCODataset
 10 | from catalyst import utils
 11 | 
 12 | cv2.setNumThreads(1)
 13 | cv2.ocl.setUseOpenCL(False)
 14 | 
 15 | 
 16 | def get_affine_transform(
 17 |         center,
 18 |         scale,
 19 |         rot,
 20 |         output_size,
 21 |         shift=np.array([0, 0], dtype=np.float32),
 22 |         inv=0
 23 | ):
 24 |     if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
 25 |         scale = np.array([scale, scale], dtype=np.float32)
 26 | 
 27 |     scale_tmp = scale
 28 |     src_w = scale_tmp[0]
 29 |     dst_w = output_size[0]
 30 |     dst_h = output_size[1]
 31 | 
 32 |     rot_rad = np.pi * rot / 180
 33 |     src_dir = get_dir([0, src_w * -0.5], rot_rad)
 34 |     dst_dir = np.array([0, dst_w * -0.5], np.float32)
 35 | 
 36 |     src = np.zeros((3, 2), dtype=np.float32)
 37 |     dst = np.zeros((3, 2), dtype=np.float32)
 38 |     src[0, :] = center + scale_tmp * shift
 39 |     src[1, :] = center + src_dir + scale_tmp * shift
 40 |     dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
 41 |     dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir
 42 | 
 43 |     src[2:, :] = get_3rd_point(src[0, :], src[1, :])
 44 |     dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
 45 | 
 46 |     if inv:
 47 |         trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
 48 |     else:
 49 |         trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
 50 | 
 51 |     return trans
 52 | 
 53 | 
 54 | def affine_transform(point: np.array, transform_matrix: np.array) -> np.array:
 55 |     new_pt = np.array([point[0], point[1], 1.], dtype=np.float32).T
 56 |     new_pt = np.dot(transform_matrix, new_pt)
 57 |     return new_pt[:2]
 58 | 
 59 | 
 60 | def get_3rd_point(a, b):
 61 |     direct = a - b
 62 |     return b + np.array([-direct[1], direct[0]], dtype=np.float32)
 63 | 
 64 | 
 65 | def get_dir(src_point, rot_rad):
 66 |     sn, cs = np.sin(rot_rad), np.cos(rot_rad)
 67 | 
 68 |     src_result = [0, 0]
 69 |     src_result[0] = src_point[0] * cs - src_point[1] * sn
 70 |     src_result[1] = src_point[0] * sn + src_point[1] * cs
 71 | 
 72 |     return src_result
 73 | 
 74 | 
 75 | def gaussian_radius(det_size, min_overlap=0.7):
 76 |     height, width = det_size
 77 | 
 78 |     a1 = 1
 79 |     b1 = (height + width)
 80 |     c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
 81 |     sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1)
 82 |     r1 = (b1 + sq1) / 2
 83 | 
 84 |     a2 = 4
 85 |     b2 = 2 * (height + width)
 86 |     c2 = (1 - min_overlap) * width * height
 87 |     sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2)
 88 |     r2 = (b2 + sq2) / 2
 89 | 
 90 |     a3 = 4 * min_overlap
 91 |     b3 = -2 * min_overlap * (height + width)
 92 |     c3 = (min_overlap - 1) * width * height
 93 |     sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3)
 94 |     r3 = (b3 + sq3) / 2
 95 |     return min(r1, r2, r3)
 96 | 
 97 | 
 98 | def gaussian2D(shape, sigma=1):
 99 |     m, n = [(ss - 1.) / 2. for ss in shape]
100 |     y, x = np.ogrid[-m:m + 1, -n:n + 1]
101 | 
102 |     h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
103 |     h[h < np.finfo(h.dtype).eps * h.max()] = 0
104 |     return h
105 | 
106 | 
107 | def draw_umich_gaussian(heatmap, center, radius, k=1):
108 |     diameter = 2 * radius + 1
109 |     gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
110 | 
111 |     x, y = int(center[0]), int(center[1])
112 | 
113 |     height, width = heatmap.shape[0:2]
114 | 
115 |     left, right = min(x, radius), min(width - x, radius + 1)
116 |     top, bottom = min(y, radius), min(height - y, radius + 1)
117 | 
118 |     masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
119 |     masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right]
120 |     if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:  # TODO debug
121 |         np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
122 |     return heatmap
123 | 
124 | 
125 | class DetectionDataset(Dataset):
126 |     def __init__(self,
127 |                  annotation_file: str,
128 |                  images_dir: str,
129 |                  down_ratio: int,
130 |                  max_objects: int,
131 |                  num_classes: Optional[int] = None,
132 |                  image_size: Tuple[int, int] = (224, 224),
133 |                  transform: Optional[Any] = None,
134 |                  **kwargs
135 |                  ):
136 |         super(DetectionDataset, self).__init__()
137 | 
138 |         self._annotations_dataset = DetectionMSCOCODataset(annotation_file, images_dir)
139 | 
140 |         self._num_classes = num_classes
141 |         if self._num_classes is None:
142 |             self._num_classes = self._annotations_dataset.get_num_classes()
143 | 
144 |         self._down_ratio = down_ratio
145 |         self._max_objects = max_objects
146 | 
147 |         assert image_size[0] == image_size[1], "Only square image are now supported"
148 |         self.image_size = image_size[0]
149 |         self.transform = transform
150 | 
151 |     def __len__(self) -> int:
152 |         return len(self._annotations_dataset)
153 | 
154 |     def __getitem__(self, idx: int) -> Dict[str, Any]:
155 |         annotation = self._annotations_dataset[idx]
156 |         image_name = annotation['image_name']
157 |         detections = annotation['detections']
158 | 
159 |         image = utils.imread(image_name)
160 |         x_scale, y_scale = self.image_size / image.shape[1], self.image_size / image.shape[0]
161 | 
162 |         image = cv2.resize(image, (self.image_size, self.image_size), cv2.INTER_LINEAR)
163 | 
164 |         detections = [
165 |             {
166 |                 'category_id': detection['category_id'],
167 |                 'category_name': detection['category_name'],
168 |                 'bbox': detection['bbox'].copy()
169 |             } for detection in detections
170 |         ]
171 | 
172 |         for detection in detections:
173 |             detection['bbox'][0::2] *= x_scale
174 |             detection['bbox'][1::2] *= y_scale
175 | 
176 |         bboxes = []
177 |         labels = []
178 |         for detection in detections:
179 |             median_x = (detection['bbox'][0] + detection['bbox'][2]) // 2
180 |             median_y = (detection['bbox'][1] + detection['bbox'][3]) // 2
181 | 
182 |             # CenterNet are VERY bad when center of detected objects not in the images
183 |             # Let's delete this bboxes
184 |             if not (0 <= median_x <= image.shape[1]) or not (0 <= median_y <= image.shape[0]):
185 |                 continue
186 | 
187 |             detection['bbox'][0::2] = np.clip(detection['bbox'][0::2], 0, image.shape[1])
188 |             detection['bbox'][1::2] = np.clip(detection['bbox'][1::2], 0, image.shape[0])
189 | 
190 |             bboxes.append(detection['bbox'])
191 |             labels.append(detection['category_id'])
192 | 
193 |         bboxes = np.array(bboxes)
194 |         labels = np.array(labels)
195 | 
196 |         if self.transform is not None:
197 |             result = self.transform(
198 |                 image=image,
199 |                 bboxes=bboxes,
200 |                 labels=labels,
201 |             )
202 |         else:
203 |             result = dict(
204 |                 image=image,
205 |                 bboxes=bboxes,
206 |                 labels=labels,
207 |             )
208 | 
209 |         image = result["image"].astype(np.uint8)
210 |         bboxes = result["bboxes"]
211 |         labels = result["labels"]
212 | 
213 |         input_height, input_width = image.shape[0], image.shape[1]
214 | 
215 |         # Normalization
216 |         input = (image.astype(np.float32) / 255.) * 2. - 1.
217 |         input = input.transpose(2, 0, 1)
218 | 
219 |         output_height = input_height // self._down_ratio
220 |         output_width = input_width // self._down_ratio
221 |         # trans_output = get_affine_transform(center, scale, 0, [output_width, output_height])
222 | 
223 |         heatmap = np.zeros((self._num_classes, output_height, output_width), dtype=np.float32)
224 |         width_height = np.zeros((self._max_objects, 2), dtype=np.float32)
225 | 
226 |         reg = np.zeros((self._max_objects, 2), dtype=np.float32)
227 |         ind = np.zeros(self._max_objects, dtype=np.int64)
228 |         reg_mask = np.zeros(self._max_objects, dtype=np.uint8)
229 | 
230 |         draw_gaussian = draw_umich_gaussian
231 | 
232 |         new_bboxes = []
233 |         num_objs = min(len(bboxes), self._max_objects)
234 |         for i in range(num_objs):
235 |             bbox = np.array(bboxes[i], dtype=np.float32) / self._down_ratio
236 |             class_id = labels[i]
237 | 
238 |             bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_width - 1)
239 |             bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_height - 1)
240 |             h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
241 |             new_bboxes.append(bbox)
242 | 
243 |             if h > 0 and w > 0:
244 |                 radius = gaussian_radius((math.ceil(h), math.ceil(w)))
245 |                 radius = max(0, int(radius))
246 |                 _center = np.array(
247 |                     [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
248 |                     dtype=np.float32
249 |                 )
250 |                 _center_int = _center.astype(np.int32)
251 |                 draw_gaussian(heatmap[class_id], _center_int, radius)
252 |                 width_height[i] = 1. * w, 1. * h
253 |                 ind[i] = _center_int[1] * output_width + _center_int[0]
254 |                 reg[i] = _center - _center_int
255 |                 reg_mask[i] = 1
256 | 
257 |         result = {
258 |             "filename": image_name,
259 |             "input": torch.from_numpy(input),
260 |             "hm": torch.from_numpy(heatmap),
261 |             "reg_mask": torch.from_numpy(reg_mask),
262 |             "ind": torch.from_numpy(ind),
263 |             "wh": torch.from_numpy(width_height),
264 |             "reg": torch.from_numpy(reg),
265 |             "bboxes": np.array(bboxes),
266 |             "labels": np.array(labels),
267 |         }
268 | 
269 |         return result
270 | 


--------------------------------------------------------------------------------
/src/debug.py:
--------------------------------------------------------------------------------
 1 | from catalyst.dl import Callback, RunnerState, CallbackOrder, CriterionCallback
 2 | 
 3 | import torch
 4 | from torch.nn import Module
 5 | 
 6 | 
 7 | class MyDebugCallback(Callback):
 8 |     def __init__(self):
 9 |         super(MyDebugCallback, self).__init__(order=CallbackOrder.Metric + 1)
10 | 
11 |     def on_epoch_end(self, state: RunnerState) -> None:
12 |         print('Input:')
13 |         print(state.input.keys())
14 |         print('Output')
15 |         print(state.output.keys())
16 |         print('-' * 40)
17 | 
18 | 
19 | class MyDebugCriterion(Module):
20 |     def __init__(self):
21 |         super(MyDebugCriterion, self).__init__()
22 | 
23 |     def forward(self, *args, **kwargs):
24 |         print('Args:')
25 |         print(', '.join(list(map(str, map(type, args)))))
26 |         print('Kwargs:')
27 |         print(', '.join(kwargs.keys()))
28 |         print('*' * 40)
29 |         return torch.zeros((1, ), dtype=torch.float32)
30 | 


--------------------------------------------------------------------------------
/src/experiment.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | import torch
 4 | from catalyst.data.collate_fn import FilteringCollateFn
 5 | from catalyst.dl import ConfigExperiment
 6 | 
 7 | from .dataset import DetectionDataset
 8 | from .transforms import train_transform, valid_transform, infer_transform
 9 | 
10 | torch.multiprocessing.set_sharing_strategy("file_system")
11 | 
12 | 
13 | class Experiment(ConfigExperiment):
14 |     def get_datasets(
15 |             self,
16 |             stage: str,
17 |             **kwargs,
18 |     ):
19 |         def process_kwargs_by_default_values(parameter, default_parameter):
20 |             if parameter not in kwargs:
21 |                 if default_parameter not in kwargs:
22 |                     raise ValueError('You must specify \"{}\" or default value(\"{}\") in config'
23 |                                      .format(parameter, default_parameter))
24 |                 else:
25 |                     kwargs[parameter] = kwargs[default_parameter]
26 | 
27 |         process_kwargs_by_default_values('train_annotation_file', 'annotation_file')
28 |         process_kwargs_by_default_values('valid_annotation_file', 'annotation_file')
29 |         process_kwargs_by_default_values('train_images_dir', 'images_dir')
30 |         process_kwargs_by_default_values('valid_images_dir', 'images_dir')
31 | 
32 |         if kwargs['train_annotation_file'] == kwargs['valid_annotation_file']:
33 |             warnings.warn("Valid is now equal to train, is it expected?", RuntimeWarning)
34 | 
35 |         train_dataset = DetectionDataset(annotation_file=kwargs['train_annotation_file'],
36 |                                          images_dir=kwargs['train_images_dir'],
37 |                                          down_ratio=kwargs['down_ratio'],
38 |                                          max_objects=kwargs['max_objs'],
39 |                                          num_classes=kwargs['num_classes'],
40 |                                          image_size=kwargs['image_size'],
41 |                                          transform=train_transform(kwargs['image_size'][0])
42 |                                          )
43 | 
44 |         valid_dataset = DetectionDataset(annotation_file=kwargs['valid_annotation_file'],
45 |                                          images_dir=kwargs['valid_images_dir'],
46 |                                          down_ratio=kwargs['down_ratio'],
47 |                                          max_objects=kwargs['max_objs'],
48 |                                          num_classes=kwargs['num_classes'],
49 |                                          image_size=kwargs['image_size'],
50 |                                          transform=valid_transform(kwargs['image_size'][0])
51 |                                          )
52 | 
53 |         return {
54 |             'train': {
55 |                 'dataset': train_dataset,
56 |                 'collate_fn': FilteringCollateFn('bboxes', 'labels')
57 |             },
58 |             'valid': {
59 |                 'dataset': valid_dataset,
60 |                 'collate_fn': FilteringCollateFn('bboxes', 'labels')
61 |             },
62 |         }
63 | 


--------------------------------------------------------------------------------
/src/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .ctdet_loss import CenterNetDetectionLoss, \
2 |     RegL1Loss, MSEIndLoss, BCEIndLoss, FocalIndLoss, \
3 |     decode_centernet_predictions
4 | 


--------------------------------------------------------------------------------
/src/losses/ctdet_loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | def _neg_loss(outputs: torch.Tensor, targets: torch.Tensor):
  7 |     """
  8 |      Modified focal loss. Exactly the same as CornerNet.
  9 |     Runs faster and costs a little bit more memory
 10 | 
 11 |     Arguments:
 12 |         outputs (torch.Tensor): BATCH x C x H x W
 13 |         targets (torch.Tensor): BATCH x C x H x W
 14 |     """
 15 |     pos_inds = targets.eq(1).float()
 16 |     neg_inds = targets.lt(1).float()
 17 | 
 18 |     neg_weights = torch.pow(1 - targets, 4)
 19 | 
 20 |     loss = 0
 21 | 
 22 |     pos_loss = torch.log(outputs) * torch.pow(1 - outputs, 2) * pos_inds
 23 |     neg_loss = torch.log(1 - outputs) * torch.pow(outputs, 2) * neg_weights * neg_inds
 24 | 
 25 |     num_pos = pos_inds.float().sum()
 26 |     pos_loss = pos_loss.sum()
 27 |     neg_loss = neg_loss.sum()
 28 | 
 29 |     if num_pos == 0:
 30 |         loss = loss - neg_loss
 31 |     else:
 32 |         loss = loss - (pos_loss + neg_loss) / num_pos
 33 |     return loss
 34 | 
 35 | 
 36 | def _sigmoid(x):
 37 |     y = torch.clamp(x.sigmoid_(), min=1e-4, max=1 - 1e-4)
 38 |     return y
 39 | 
 40 | 
 41 | def _gather_feat(feat, ind, mask=None):
 42 |     dim = feat.size(2)
 43 |     ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
 44 |     feat = feat.gather(1, ind)
 45 |     if mask is not None:
 46 |         mask = mask.unsqueeze(2).expand_as(feat)
 47 |         feat = feat[mask]
 48 |         feat = feat.view(-1, dim)
 49 |     return feat
 50 | 
 51 | 
 52 | def _tranpose_and_gather_feat(feat, ind):
 53 |     feat = feat.permute(0, 2, 3, 1).contiguous()
 54 |     feat = feat.view(feat.size(0), -1, feat.size(3))
 55 |     feat = _gather_feat(feat, ind)
 56 |     return feat
 57 | 
 58 | 
 59 | def _nms(heat, kernel=3):
 60 |     pad = (kernel - 1) // 2
 61 | 
 62 |     hmax = nn.functional.max_pool2d(
 63 |         heat, (kernel, kernel), stride=1, padding=pad)
 64 |     keep = (hmax == heat).float()
 65 |     return heat * keep
 66 | 
 67 | 
 68 | def _topk(scores, K=40):
 69 |     batch, cat, height, width = scores.size()
 70 | 
 71 |     topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
 72 | 
 73 |     topk_inds = topk_inds % (height * width)
 74 |     topk_ys = (topk_inds / width).int().float()
 75 |     topk_xs = (topk_inds % width).int().float()
 76 | 
 77 |     topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K)
 78 |     topk_clses = (topk_ind / K).int()
 79 |     topk_inds = _gather_feat(
 80 |         topk_inds.view(batch, -1, 1), topk_ind).view(batch, K)
 81 |     topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K)
 82 |     topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K)
 83 | 
 84 |     return topk_score, topk_inds, topk_clses, topk_ys, topk_xs
 85 | 
 86 | 
 87 | def decode_centernet_predictions(
 88 |     heat, wh, reg=None, K=100
 89 | ):
 90 |     with torch.no_grad():
 91 |         batch, cat, height, width = heat.size()
 92 |         # mask = reg_mask.unsqueeze(2).expand_as(pred).float()
 93 | 
 94 |         heat = torch.sigmoid(heat)
 95 |         # perform nms on heatmaps
 96 |         heat = _nms(heat)
 97 | 
 98 |         scores, inds, clses, ys, xs = _topk(heat, K=K)
 99 |         if reg is not None:
100 |             reg = _tranpose_and_gather_feat(reg, inds)
101 |             reg = reg.view(batch, K, 2)
102 |             xs = xs.view(batch, K, 1) + reg[:, :, 0:1]
103 |             ys = ys.view(batch, K, 1) + reg[:, :, 1:2]
104 |         else:
105 |             xs = xs.view(batch, K, 1) + 0.5
106 |             ys = ys.view(batch, K, 1) + 0.5
107 |         wh = _tranpose_and_gather_feat(wh, inds).view(batch, K, 2)
108 | 
109 |         clses = clses.view(batch, K, 1).float()
110 |         scores = scores.view(batch, K, 1)
111 |         bboxes = torch.cat([xs - wh[..., 0:1] / 2,
112 |                             ys - wh[..., 1:2] / 2,
113 |                             xs + wh[..., 0:1] / 2,
114 |                             ys + wh[..., 1:2] / 2], dim=2)
115 |         detections = torch.cat([bboxes, scores, clses], dim=2)
116 | 
117 |     return detections
118 | 
119 | 
120 | class FocalLoss(nn.Module):
121 |     def __init__(self):
122 |         super(FocalLoss, self).__init__()
123 |         self.neg_loss = _neg_loss
124 | 
125 |     def forward(self, outputs, targets):
126 |         return self.neg_loss(outputs, targets)
127 | 
128 | 
129 | class RegL1Loss(nn.Module):
130 |     def __init__(
131 |         self,
132 |         key: str = "",
133 |         mask_key: str = "reg_mask",
134 |         ind_key: str = "ind",
135 |         debug: bool = False
136 |     ):
137 |         super(RegL1Loss, self).__init__()
138 |         self.key = key
139 |         self.mask_key = mask_key
140 |         self.ind_key = ind_key
141 |         self.debug = debug
142 | 
143 |     # def forward(self, outputs, targets):
144 |     #    result = self._forward(
145 |     #        outputs[self.key], targets[self.mask_key],
146 |     #        targets[self.ind_key], targets[self.key]
147 |     #    )
148 |     #    return result
149 | 
150 |     def forward(self, outputs_key, targets_mask_key, targets_ind_key, targets_key):
151 |         result = self._forward(
152 |             outputs_key, targets_mask_key, targets_ind_key, targets_key
153 |         )
154 |         return result
155 | 
156 |     def _forward(self, output, mask, ind, target):
157 |         pred = _tranpose_and_gather_feat(output, ind)
158 |         mask = mask.unsqueeze(2).expand_as(pred).float()
159 | 
160 |         if self.debug:
161 |             import ipdb; ipdb.set_trace()
162 |         loss = F.l1_loss(pred * mask, target * mask)
163 |         loss = loss / (mask.sum() + 1e-4)
164 |         return loss
165 | 
166 | 
167 | class CenterNetDetectionLoss(nn.Module):
168 |     def __init__(self):
169 |         super(CenterNetDetectionLoss, self).__init__()
170 |         self.focal = FocalLoss()
171 | 
172 |     def forward(self, outputs, targets):
173 |         loss = self.focal(_sigmoid(outputs), targets)
174 |         return loss
175 | 
176 | 
177 | class MSEIndLoss(nn.Module):
178 |     def __init__(
179 |         self,
180 |         key: str,
181 |         mask_key: str = "reg_mask",
182 |         ind_key: str = "ind",
183 |         debug: bool = False,
184 |         reduction: str = "mean"
185 |     ):
186 |         super(MSEIndLoss, self).__init__()
187 |         self.key = key
188 |         self.mask_key = mask_key
189 |         self.ind_key = ind_key
190 |         self.debug = debug
191 | 
192 |         self.loss = nn.MSELoss(reduction=reduction)
193 | 
194 |     def forward(self, outputs, targets):
195 |         result = self._forward(
196 |             outputs[self.key], targets[self.mask_key],
197 |             targets[self.ind_key], targets[self.key]
198 |         )
199 | 
200 |         return result
201 | 
202 |     def _forward(self, output, mask, ind, target):
203 |         pred = _tranpose_and_gather_feat(output, ind)
204 |         _mask = mask.unsqueeze(2).expand_as(pred).float()
205 | 
206 |         if self.debug:
207 |             import ipdb; ipdb.set_trace()
208 |         loss = self.loss(_sigmoid(pred) * _mask, target.unsqueeze(2) * _mask)
209 |         # loss = loss / (_mask.sum() + 1e-4)
210 |         return loss
211 | 
212 | 
213 | class BCEIndLoss(nn.Module):
214 |     def __init__(
215 |         self,
216 |         key: str,
217 |         mask_key: str = "reg_mask",
218 |         ind_key: str = "ind",
219 |         debug: bool = False
220 |     ):
221 |         super(BCEIndLoss, self).__init__()
222 |         self.key = key
223 |         self.mask_key = mask_key
224 |         self.ind_key = ind_key
225 |         self.loss = nn.BCELoss()
226 |         self.debug = debug
227 | 
228 |     def forward(self, outputs, targets):
229 |         result = self._forward(
230 |             outputs[self.key], targets[self.mask_key],
231 |             targets[self.ind_key], targets[self.key]
232 |         )
233 | 
234 |         return result
235 | 
236 |     def _forward(self, output, mask, ind, target):
237 |         pred = _tranpose_and_gather_feat(output, ind)
238 |         _mask = mask.unsqueeze(2).expand_as(pred).float()
239 |         if self.debug:
240 |             import ipdb; ipdb.set_trace()
241 | 
242 |         loss = self.loss(_sigmoid(pred) * _mask, target * _mask)
243 |         # loss = loss / (mask.sum() + 1e-4)
244 |         return loss
245 | 
246 | 
247 | class FocalIndLoss(nn.Module):
248 |     def __init__(
249 |         self,
250 |         key: str,
251 |         mask_key: str = "reg_mask",
252 |         ind_key: str = "ind",
253 |         debug: bool = False
254 |     ):
255 |         super(FocalIndLoss, self).__init__()
256 |         self.key = key
257 |         self.mask_key = mask_key
258 |         self.ind_key = ind_key
259 |         self.loss = FocalLoss()
260 |         self.debug = debug
261 | 
262 |     def forward(self, outputs, targets):
263 |         result = self._forward(
264 |             outputs[self.key], targets[self.mask_key],
265 |             targets[self.ind_key], targets[self.key]
266 |         )
267 | 
268 |         return result
269 | 
270 |     def _forward(self, output, mask, ind, target):
271 |         pred = _tranpose_and_gather_feat(output, ind)
272 |         _mask = mask.unsqueeze(2).expand_as(pred).float()
273 |         if self.debug:
274 |             import ipdb; ipdb.set_trace()
275 | 
276 |         loss = self.loss(_sigmoid(pred) * _mask, target * _mask)
277 |         # loss = loss / (mask.sum() + 1e-4)
278 |         return loss
279 | 


--------------------------------------------------------------------------------
/src/metrics.py:
--------------------------------------------------------------------------------
  1 | from typing import Tuple, List
  2 | 
  3 | import numpy as np
  4 | from sklearn.metrics import average_precision_score
  5 | 
  6 | 
  7 | def construct_mAP_list_from_bboxes(predicted_bboxes, scores, gt_bboxes, iou_threshold=.9) -> List[Tuple[bool, float]]:
  8 |     """
  9 |     Args:
 10 |         predicted_bboxes (np.array): predictions
 11 |         scores (np.array): model confidences
 12 |         gt_bboxes (np.array): ground truth bboxes
 13 |         iou_threshold (float): between 0 and 1, iou threshold to mAP metric
 14 | 
 15 |     Returns:
 16 |         List[Tuple[bool, float]]: mAP list
 17 |     """
 18 | 
 19 |     ious_matrix = bbox_iou(predicted_bboxes, gt_bboxes)
 20 |     result = _construct_list_for_map(ious_matrix, scores, iou_thresh=iou_threshold)
 21 |     return result
 22 | 
 23 | 
 24 | def _construct_list_for_map(ious_matrix, scores, iou_thresh=.9) -> List[Tuple[bool, float]]:
 25 |     """
 26 |     Args:
 27 |         ious_matrix (np.array): array ious between predicted and ground-truth objects
 28 |         scores (np.array): array of shape (n) with model confidences for objects
 29 |         iou_thresh (float): between 0 and 1, iou threshold to mAP metric
 30 |     Returns:
 31 |         List[Tuple[bool, float]]: mAP list
 32 |     """
 33 | 
 34 |     ious_thresholded = ious_matrix > iou_thresh
 35 |     correct_bboxes = np.where(ious_thresholded.sum(axis=1).astype(bool))[0]
 36 |     incorrect_bboxes = np.where(~ious_thresholded.sum(axis=1).astype(bool))[0]
 37 |     fn_bboxes = np.where(ious_thresholded.sum(axis=0) == 0)[0]
 38 | 
 39 |     result = []
 40 |     result.extend([(True, scores[i]) for i in correct_bboxes])
 41 |     result.extend([(False, scores[i]) for i in incorrect_bboxes])
 42 |     result.extend([(True, 0) for _ in fn_bboxes])
 43 |     return result
 44 | 
 45 | 
 46 | def calculate_map(predictions: List[Tuple[bool, float]], use_false_negatives: bool = False) -> float:
 47 |     """Calculates average precision metric for list of predictions with confidences
 48 | 
 49 |     Args:
 50 |         predictions (List[Tuple[...]]):  List of Tuples containing all predicted bboxes with scores and corrent/incorrect flag
 51 |         use_false_negatives (bool): Flag to use false negatives in metric
 52 |     Returns:
 53 |          float: average precision
 54 |     """
 55 |     predictions = np.array(predictions)
 56 |     if not use_false_negatives:
 57 |         predictions = predictions[predictions[:, 1] > 0]
 58 |     true_labels = predictions[:, 0].astype(int)
 59 |     scores = predictions[:, 1]
 60 | 
 61 |     # Corner cases, critical for sklearn realisation
 62 |     if len(predictions) == 0:
 63 |         return 0
 64 |     if len(predictions) == 1:
 65 |         return 1
 66 | 
 67 |     result = average_precision_score(true_labels, scores)
 68 |     return result
 69 | 
 70 | 
 71 | def bbox_iou(predicted, target) -> np.ndarray:
 72 |     p_xmin, p_ymin, p_xmax, p_ymax = np.hsplit(predicted, 4)
 73 |     t_xmin, t_ymin, t_xmax, t_ymax = np.hsplit(target, 4)
 74 | 
 75 |     int_xmin = np.maximum(p_xmin, t_xmin.T)
 76 |     int_xmax = np.minimum(p_xmax, t_xmax.T)
 77 |     int_ymin = np.maximum(p_ymin, t_ymin.T)
 78 |     int_ymax = np.minimum(p_ymax, t_ymax.T)
 79 | 
 80 |     int_area = np.maximum(int_ymax - int_ymin, 0) \
 81 |                * np.maximum(int_xmax - int_xmin, 0)
 82 | 
 83 |     un_xmin = np.minimum(p_xmin, t_xmin.T)
 84 |     un_xmax = np.maximum(p_xmax, t_xmax.T)
 85 |     un_ymin = np.minimum(p_ymin, t_ymin.T)
 86 |     un_ymax = np.maximum(p_ymax, t_ymax.T)
 87 | 
 88 |     un_area = np.maximum(un_ymax - un_ymin, 0) \
 89 |               * np.maximum(un_xmax - un_xmin, 0)
 90 | 
 91 |     return int_area / un_area
 92 | 
 93 | 
 94 | def image_stats(pred_bboxes, scores, gt_bboxes, thresholds, iou_threshold=.5):
 95 |     ious = bbox_iou(pred_bboxes, gt_bboxes)
 96 | 
 97 |     true_positives, false_positives = \
 98 |         image_positives_stats(ious, scores, thresholds, iou_threshold)
 99 | 
100 |     false_negatives = image_false_negatives(ious, scores, thresholds,
101 |                                             iou_threshold=iou_threshold)
102 | 
103 |     stats = np.hstack((true_positives, false_positives, false_negatives))
104 | 
105 |     return stats
106 | 
107 | 
108 | def image_positives_stats(
109 |         ious: np.ndarray,
110 |         scores,
111 |         thresholds,
112 |         iou_threshold
113 | ) -> Tuple[np.ndarray, np.ndarray]:
114 |     pred_bbox_max_iou = np.max(ious, axis=1, initial=0)
115 | 
116 |     potential_tp = pred_bbox_max_iou >= iou_threshold
117 |     potential_fp = ~potential_tp
118 | 
119 |     mask: np.ndarray = thresholds[:, np.newaxis] <= scores[np.newaxis, :]
120 |     true_positives = mask.compress(potential_tp, axis=1).sum(axis=1)
121 |     false_positives = mask.compress(potential_fp, axis=1).sum(axis=1)
122 | 
123 |     return true_positives, false_positives
124 | 
125 | 
126 | def image_false_negatives(
127 |         ious: np.ndarray,
128 |         scores,
129 |         thresholds,
130 |         iou_threshold
131 | ):
132 |     n_pred, n_gt = ious.shape
133 | 
134 |     if n_gt == 0:
135 |         return np.zeros(thresholds.shape)
136 | 
137 |     if len(thresholds) == 0 or n_pred == 0:
138 |         return np.full(thresholds.shape, n_gt)
139 | 
140 |     gt_max_iou_idx = ious.argmax(axis=0)
141 | 
142 |     always_fn = \
143 |         ious[gt_max_iou_idx, np.arange(len(gt_max_iou_idx))] < iou_threshold
144 | 
145 |     gt_bbox_max_iou_bbox_score = \
146 |         scores.take(gt_max_iou_idx.compress(~always_fn))
147 |     fn = (thresholds[:, np.newaxis]
148 |           > gt_bbox_max_iou_bbox_score[np.newaxis, :]).sum(axis=1)
149 | 
150 |     return always_fn.sum() + fn
151 | 
152 | 
153 | def class_agnostic_mean_ap(
154 |         pred_bboxes, pred_bbox_score, gt_bboxes,
155 |         sort_scores=True, iou_threshold=0.9
156 | ):
157 |     if len(pred_bboxes):
158 |         pass
159 | 
160 |     thresholds = np.concatenate(pred_bbox_score)
161 |     if sort_scores:
162 |         thresholds = np.sort(thresholds)[::-1]
163 | 
164 |     per_item_stats = [
165 |         image_stats(img_pred_bboxes.reshape(-1, 4), img_scores,
166 |                     img_gt_bboxes.reshape(-1, 4), thresholds, iou_threshold)
167 |         for img_pred_bboxes, img_scores, img_gt_bboxes
168 |         in zip(pred_bboxes, pred_bbox_score, gt_bboxes)
169 |     ]
170 | 
171 |     tp, fp, fn = np.hsplit(np.sum(per_item_stats, axis=0), 3)
172 | 
173 |     all_real_positives = tp + fn
174 |     all_real_positives[all_real_positives == 0] = 1
175 | 
176 |     recall = tp / all_real_positives
177 | 
178 |     all_pred_positives = tp + fp
179 |     all_pred_positives[all_pred_positives == 0] = 1
180 | 
181 |     precision = tp / all_pred_positives
182 | 
183 |     precisions = []
184 |     for recall_threshold in np.linspace(0, 1, 11):
185 |         precisions.append(
186 |             np.max(precision[recall <= recall_threshold], initial=0))
187 | 
188 |     mAP = np.mean(precisions) if len(precisions) > 0 else 0
189 | 
190 |     return mAP
191 | 


--------------------------------------------------------------------------------
/src/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import ResnetCenterNet
2 | 


--------------------------------------------------------------------------------
/src/models/centernet.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Callable
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | 
 7 | class CenterNet(nn.Module):
 8 |     def __init__(
 9 |         self,
10 |         num_classes: int,
11 |         model_fn: Callable,
12 |         down_ratio: int = 1,
13 |         embedding_dim: int = 128,
14 |         model_params: dict = None,
15 |         backbone_key: str = None
16 |     ):
17 |         super().__init__()
18 |         self.num_classes = num_classes
19 |         self.embedding_dim = embedding_dim
20 | 
21 |         model_params = model_params or {}
22 |         self.backbone = model_fn(**model_params)
23 |         self.backbone_key = backbone_key
24 | 
25 |         self.down_sampler = nn.Conv2d(embedding_dim, embedding_dim, kernel_size=(3, 3),
26 |                                       padding=1, stride=down_ratio, bias=True)
27 | 
28 |         self.head_heatmap = nn.Conv2d(embedding_dim, self.num_classes, kernel_size=(3, 3),
29 |                                       padding=1, bias=True)
30 |         self.head_heatmap.bias.data.fill_(-4.)
31 |         self.head_width_height = nn.Conv2d(embedding_dim, 2, kernel_size=(3, 3), padding=1, bias=True)
32 |         self.head_offset_regularizer = nn.Conv2d(embedding_dim, 2, kernel_size=(3, 3), padding=1, bias=True)
33 | 
34 |     def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]:
35 |         value = self.backbone(x)
36 |         if self.backbone_key is not None:
37 |             value = value[self.backbone_key]
38 | 
39 |         features = torch.relu_(self.down_sampler(torch.relu_(value)))
40 | 
41 |         value = {
42 |             "hm": self.head_heatmap(features),
43 |             "wh": self.head_width_height(features),
44 |             "reg": self.head_offset_regularizer(features),
45 |         }
46 |         return value
47 | 
48 |     def predict(self, x: torch.Tensor):
49 |         """
50 |         Method to trace
51 |         """
52 |         value = self.forward(x)
53 |         return value["hm"], value["wh"], value["reg"]
54 | 
55 | 
56 | __all__ = ["CenterNet"]
57 | 


--------------------------------------------------------------------------------
/src/models/resnet.py:
--------------------------------------------------------------------------------
 1 | from catalyst.contrib.models import segmentation
 2 | 
 3 | from .centernet import CenterNet
 4 | 
 5 | 
 6 | class ResnetCenterNet(CenterNet):
 7 |     def __init__(
 8 |         self,
 9 |         num_classes: int,
10 |         down_ratio: int = 1,
11 |         embedding_dim: int = 128,
12 |         arch: str = "ResnetFPNUnet",
13 |         backbone_params: dict = None,
14 |     ):
15 | 
16 |         model_fn = segmentation.__dict__[arch]
17 |         backbone_params = backbone_params or {}
18 |         model_params = {"num_classes": embedding_dim, **backbone_params}
19 |         super().__init__(
20 |             num_classes=num_classes,
21 |             model_fn=model_fn,
22 |             down_ratio=down_ratio,
23 |             embedding_dim=embedding_dim,
24 |             model_params=model_params
25 |         )
26 | 


--------------------------------------------------------------------------------
/src/transforms.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import albumentations as A
 3 | 
 4 | 
 5 | BBOX_PARAMS = dict(
 6 |     format="pascal_voc",
 7 |     min_visibility=0.2,
 8 |     label_fields=["labels"],
 9 | )
10 | 
11 | 
12 | def pre_transform(image_size: int = 512):
13 |     result = [
14 |         A.LongestMaxSize(image_size),
15 |         A.PadIfNeeded(
16 |             min_height=image_size,
17 |             min_width=image_size,
18 |             border_mode=cv2.BORDER_CONSTANT,
19 |             value=(0, 0, 0)),
20 |     ]
21 | 
22 |     return A.Compose(result, bbox_params=BBOX_PARAMS)
23 | 
24 | 
25 | def augmentations(image_size: int):
26 |     channel_augs = [
27 |         A.HueSaturationValue(p=0.5),
28 |         A.ChannelShuffle(p=0.5),
29 |     ]
30 | 
31 |     result = [
32 |         # *pre_transform(image_size),
33 |         A.OneOf([
34 |             A.IAAAdditiveGaussianNoise(),
35 |             A.GaussNoise(),
36 |         ], p=0.5),
37 |         A.OneOf([
38 |             A.MotionBlur(blur_limit=3, p=0.7),
39 |             A.MedianBlur(blur_limit=3, p=1.0),
40 |             A.Blur(blur_limit=3, p=0.7),
41 |         ], p=0.5),
42 |         A.OneOf(channel_augs),
43 |         A.OneOf([
44 |             A.CLAHE(clip_limit=2),
45 |             A.IAASharpen(),
46 |             A.IAAEmboss(),
47 |         ], p=0.5),
48 |         A.RandomBrightnessContrast(
49 |             brightness_limit=0.5,
50 |             contrast_limit=0.5,
51 |             p=0.5
52 |         ),
53 |         A.RandomGamma(p=0.5),
54 |         A.OneOf([
55 |             A.MedianBlur(p=0.5),
56 |             A.MotionBlur(p=0.5)
57 |         ]),
58 |         A.RandomGamma(gamma_limit=(85, 115), p=0.5),
59 |     ]
60 |     return A.Compose(result, bbox_params=BBOX_PARAMS)
61 | 
62 | 
63 | def train_transform(image_size: int):
64 |     result = A.Compose([
65 |         *pre_transform(image_size),
66 |         *augmentations(image_size),
67 |     ], bbox_params=BBOX_PARAMS)
68 |     return result
69 | 
70 | 
71 | def valid_transform(image_size: int):
72 |     result = A.Compose([
73 |         *pre_transform(image_size),
74 |     ], bbox_params=BBOX_PARAMS)
75 |     return result
76 | 
77 | 
78 | def infer_transform(image_size: int):
79 |     result = A.Compose(pre_transform(image_size))
80 |     return result
81 | 


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | 
4 | 
5 | def detach(tensor: torch.Tensor) -> np.ndarray:
6 |     return tensor.detach().cpu().numpy()
7 | 


--------------------------------------------------------------------------------