├── .gitignore
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── configs
└── centernet_detection_config.yml
├── requirements.txt
└── src
├── __init__.py
├── callbacks.py
├── coco.py
├── data_preparation.py
├── dataset.py
├── debug.py
├── experiment.py
├── losses
├── __init__.py
└── ctdet_loss.py
├── metrics.py
├── models
├── __init__.py
├── centernet.py
└── resnet.py
├── transforms.py
└── utils.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | data/
7 |
8 | # C extensions
9 | *.so
10 |
11 | # Distribution / packaging
12 | .Python
13 | env/
14 | build/
15 | builds/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | wheels/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 |
31 | # PyInstaller
32 | # Usually these files are written by a python script from a template
33 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 |
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 |
41 | # Unit test / coverage reports
42 | htmlcov/
43 | .tox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | .hypothesis/
51 |
52 | # Translations
53 | *.mo
54 | *.pot
55 |
56 | # Django stuff:
57 | *.log
58 | local_settings.py
59 |
60 | # Flask stuff:
61 | instance/
62 | .webassets-cache
63 |
64 | # Scrapy stuff:
65 | .scrapy
66 |
67 | # Sphinx documentation
68 | docs/_build/
69 |
70 | # PyBuilder
71 | target/
72 |
73 | # Jupyter Notebook
74 | .ipynb_checkpoints
75 |
76 | # pyenv
77 | .python-version
78 |
79 | # celery beat schedule file
80 | celerybeat-schedule
81 |
82 | # SageMath parsed files
83 | *.sage.py
84 |
85 | # dotenv
86 | .env
87 |
88 | # virtualenv
89 | .venv
90 | venv/
91 | ENV/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
106 |
107 |
108 | .DS_Store
109 | .idea
110 | .code
111 |
112 | *.bak
113 | *.csv
114 | *.tsv
115 | *.ipynb
116 |
117 | tmp/
118 | logs/
119 | data/
120 | !catalyst/data
121 | examples/data/
122 | # Examples - mock data
123 | !examples/distilbert_text_classification/input/*.csv
124 | !examples/_tests_distilbert_text_classification/input/*.csv
125 | examples/logs/
126 | notebooks/
127 |
128 | _nogit*
129 |
130 | ### VisualStudioCode ###
131 | .vscode/*
132 | .vscode/settings.json
133 | !.vscode/tasks.json
134 | !.vscode/launch.json
135 | !.vscode/extensions.json
136 |
137 | ### VisualStudioCode Patch ###
138 | # Ignore all local history of files
139 | .history
140 |
141 | # End of https://www.gitignore.io/api/visualstudiocode
142 |
143 |
144 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG CATALYST_VERSION="19.11"
2 |
3 | # "-fp16" or ""
4 | ARG CATALYST_WITH_FP16="-fp16"
5 |
6 | FROM catalystteam/catalyst:${CATALYST_VERSION}${CATALYST_WITH_FP16}
7 | # Set up locale to prevent bugs with encoding
8 | ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
9 |
10 | COPY requirements.txt .
11 | RUN pip install -r requirements.txt --no-cache-dir && rm requirements.txt
12 |
13 | CMD mkdir -p /workspace
14 | WORKDIR /workspace
15 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2019 Sergey Kolesnikov. All rights reserved.
2 |
3 | Apache License
4 | Version 2.0, January 2004
5 | http://www.apache.org/licenses/
6 |
7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
8 |
9 | 1. Definitions.
10 |
11 | "License" shall mean the terms and conditions for use, reproduction,
12 | and distribution as defined by Sections 1 through 9 of this document.
13 |
14 | "Licensor" shall mean the copyright owner or entity authorized by
15 | the copyright owner that is granting the License.
16 |
17 | "Legal Entity" shall mean the union of the acting entity and all
18 | other entities that control, are controlled by, or are under common
19 | control with that entity. For the purposes of this definition,
20 | "control" means (i) the power, direct or indirect, to cause the
21 | direction or management of such entity, whether by contract or
22 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
23 | outstanding shares, or (iii) beneficial ownership of such entity.
24 |
25 | "You" (or "Your") shall mean an individual or Legal Entity
26 | exercising permissions granted by this License.
27 |
28 | "Source" form shall mean the preferred form for making modifications,
29 | including but not limited to software source code, documentation
30 | source, and configuration files.
31 |
32 | "Object" form shall mean any form resulting from mechanical
33 | transformation or translation of a Source form, including but
34 | not limited to compiled object code, generated documentation,
35 | and conversions to other media types.
36 |
37 | "Work" shall mean the work of authorship, whether in Source or
38 | Object form, made available under the License, as indicated by a
39 | copyright notice that is included in or attached to the work
40 | (an example is provided in the Appendix below).
41 |
42 | "Derivative Works" shall mean any work, whether in Source or Object
43 | form, that is based on (or derived from) the Work and for which the
44 | editorial revisions, annotations, elaborations, or other modifications
45 | represent, as a whole, an original work of authorship. For the purposes
46 | of this License, Derivative Works shall not include works that remain
47 | separable from, or merely link (or bind by name) to the interfaces of,
48 | the Work and Derivative Works thereof.
49 |
50 | "Contribution" shall mean any work of authorship, including
51 | the original version of the Work and any modifications or additions
52 | to that Work or Derivative Works thereof, that is intentionally
53 | submitted to Licensor for inclusion in the Work by the copyright owner
54 | or by an individual or Legal Entity authorized to submit on behalf of
55 | the copyright owner. For the purposes of this definition, "submitted"
56 | means any form of electronic, verbal, or written communication sent
57 | to the Licensor or its representatives, including but not limited to
58 | communication on electronic mailing lists, source code control systems,
59 | and issue tracking systems that are managed by, or on behalf of, the
60 | Licensor for the purpose of discussing and improving the Work, but
61 | excluding communication that is conspicuously marked or otherwise
62 | designated in writing by the copyright owner as "Not a Contribution."
63 |
64 | "Contributor" shall mean Licensor and any individual or Legal Entity
65 | on behalf of whom a Contribution has been received by Licensor and
66 | subsequently incorporated within the Work.
67 |
68 | 2. Grant of Copyright License. Subject to the terms and conditions of
69 | this License, each Contributor hereby grants to You a perpetual,
70 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
71 | copyright license to reproduce, prepare Derivative Works of,
72 | publicly display, publicly perform, sublicense, and distribute the
73 | Work and such Derivative Works in Source or Object form.
74 |
75 | 3. Grant of Patent License. Subject to the terms and conditions of
76 | this License, each Contributor hereby grants to You a perpetual,
77 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
78 | (except as stated in this section) patent license to make, have made,
79 | use, offer to sell, sell, import, and otherwise transfer the Work,
80 | where such license applies only to those patent claims licensable
81 | by such Contributor that are necessarily infringed by their
82 | Contribution(s) alone or by combination of their Contribution(s)
83 | with the Work to which such Contribution(s) was submitted. If You
84 | institute patent litigation against any entity (including a
85 | cross-claim or counterclaim in a lawsuit) alleging that the Work
86 | or a Contribution incorporated within the Work constitutes direct
87 | or contributory patent infringement, then any patent licenses
88 | granted to You under this License for that Work shall terminate
89 | as of the date such litigation is filed.
90 |
91 | 4. Redistribution. You may reproduce and distribute copies of the
92 | Work or Derivative Works thereof in any medium, with or without
93 | modifications, and in Source or Object form, provided that You
94 | meet the following conditions:
95 |
96 | (a) You must give any other recipients of the Work or
97 | Derivative Works a copy of this License; and
98 |
99 | (b) You must cause any modified files to carry prominent notices
100 | stating that You changed the files; and
101 |
102 | (c) You must retain, in the Source form of any Derivative Works
103 | that You distribute, all copyright, patent, trademark, and
104 | attribution notices from the Source form of the Work,
105 | excluding those notices that do not pertain to any part of
106 | the Derivative Works; and
107 |
108 | (d) If the Work includes a "NOTICE" text file as part of its
109 | distribution, then any Derivative Works that You distribute must
110 | include a readable copy of the attribution notices contained
111 | within such NOTICE file, excluding those notices that do not
112 | pertain to any part of the Derivative Works, in at least one
113 | of the following places: within a NOTICE text file distributed
114 | as part of the Derivative Works; within the Source form or
115 | documentation, if provided along with the Derivative Works; or,
116 | within a display generated by the Derivative Works, if and
117 | wherever such third-party notices normally appear. The contents
118 | of the NOTICE file are for informational purposes only and
119 | do not modify the License. You may add Your own attribution
120 | notices within Derivative Works that You distribute, alongside
121 | or as an addendum to the NOTICE text from the Work, provided
122 | that such additional attribution notices cannot be construed
123 | as modifying the License.
124 |
125 | You may add Your own copyright statement to Your modifications and
126 | may provide additional or different license terms and conditions
127 | for use, reproduction, or distribution of Your modifications, or
128 | for any such Derivative Works as a whole, provided Your use,
129 | reproduction, and distribution of the Work otherwise complies with
130 | the conditions stated in this License.
131 |
132 | 5. Submission of Contributions. Unless You explicitly state otherwise,
133 | any Contribution intentionally submitted for inclusion in the Work
134 | by You to the Licensor shall be under the terms and conditions of
135 | this License, without any additional terms or conditions.
136 | Notwithstanding the above, nothing herein shall supersede or modify
137 | the terms of any separate license agreement you may have executed
138 | with Licensor regarding such Contributions.
139 |
140 | 6. Trademarks. This License does not grant permission to use the trade
141 | names, trademarks, service marks, or product names of the Licensor,
142 | except as required for reasonable and customary use in describing the
143 | origin of the Work and reproducing the content of the NOTICE file.
144 |
145 | 7. Disclaimer of Warranty. Unless required by applicable law or
146 | agreed to in writing, Licensor provides the Work (and each
147 | Contributor provides its Contributions) on an "AS IS" BASIS,
148 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
149 | implied, including, without limitation, any warranties or conditions
150 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
151 | PARTICULAR PURPOSE. You are solely responsible for determining the
152 | appropriateness of using or redistributing the Work and assume any
153 | risks associated with Your exercise of permissions under this License.
154 |
155 | 8. Limitation of Liability. In no event and under no legal theory,
156 | whether in tort (including negligence), contract, or otherwise,
157 | unless required by applicable law (such as deliberate and grossly
158 | negligent acts) or agreed to in writing, shall any Contributor be
159 | liable to You for damages, including any direct, indirect, special,
160 | incidental, or consequential damages of any character arising as a
161 | result of this License or out of the use or inability to use the
162 | Work (including but not limited to damages for loss of goodwill,
163 | work stoppage, computer failure or malfunction, or any and all
164 | other commercial damages or losses), even if such Contributor
165 | has been advised of the possibility of such damages.
166 |
167 | 9. Accepting Warranty or Additional Liability. While redistributing
168 | the Work or Derivative Works thereof, You may choose to offer,
169 | and charge a fee for, acceptance of support, warranty, indemnity,
170 | or other liability obligations and/or rights consistent with this
171 | License. However, in accepting such obligations, You may act only
172 | on Your own behalf and on Your sole responsibility, not on behalf
173 | of any other Contributor, and only if You agree to indemnify,
174 | defend, and hold each Contributor harmless for any liability
175 | incurred by, or claims asserted against, such Contributor by reason
176 | of your accepting any such warranty or additional liability.
177 |
178 | END OF TERMS AND CONDITIONS
179 |
180 | APPENDIX: How to apply the Apache License to your work.
181 |
182 | To apply the Apache License to your work, attach the following
183 | boilerplate notice, with the fields enclosed by brackets "[]"
184 | replaced with your own identifying information. (Don't include
185 | the brackets!) The text should be enclosed in the appropriate
186 | comment syntax for the file format. We also recommend that a
187 | file or class name and description of purpose be included on the
188 | same "printed page" as the copyright notice for easier
189 | identification within third-party archives.
190 |
191 | Copyright [yyyy] [name of copyright owner]
192 |
193 | Licensed under the Apache License, Version 2.0 (the "License");
194 | you may not use this file except in compliance with the License.
195 | You may obtain a copy of the License at
196 |
197 | http://www.apache.org/licenses/LICENSE-2.0
198 |
199 | Unless required by applicable law or agreed to in writing, software
200 | distributed under the License is distributed on an "AS IS" BASIS,
201 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
202 | See the License for the specific language governing permissions and
203 | limitations under the License.
204 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: docker clean
2 |
3 | docker: ./requirements.txt
4 | docker build -t catalyst-detection:latest . -f ./Dockerfile --no-cache
5 |
6 | clean:
7 | rm -rf build/
8 | docker rmi -f catalyst-detection:latest
9 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | [](https://github.com/catalyst-team/catalyst)
4 |
5 | **Accelerated DL R&D**
6 |
7 | [](http://66.248.205.49:8111/project.html?projectId=Catalyst&tab=projectOverview&guest=1)
8 | [](https://www.codefactor.io/repository/github/catalyst-team/catalyst)
9 | [](https://pypi.org/project/catalyst/)
10 | [](https://catalyst-team.github.io/catalyst/index.html)
11 | [](https://pepy.tech/project/catalyst)
12 |
13 | [](https://twitter.com/CatalystTeam)
14 | [](https://t.me/catalyst_team)
15 | [](https://join.slack.com/t/catalyst-team-devs/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw)
16 | [](https://github.com/catalyst-team/catalyst/graphs/contributors)
17 |
18 |
19 |
20 | PyTorch framework for Deep Learning research and development.
21 | It was developed with a focus on reproducibility,
22 | fast experimentation and code/ideas reusing.
23 | Being able to research/develop something new,
24 | rather than write another regular train loop.
25 | Break the cycle - use the Catalyst!
26 |
27 | Project [manifest](https://github.com/catalyst-team/catalyst/blob/master/MANIFEST.md). Part of [PyTorch Ecosystem](https://pytorch.org/ecosystem/). Part of [Catalyst Ecosystem](https://docs.google.com/presentation/d/1D-yhVOg6OXzjo9K_-IS5vSHLPIUxp1PEkFGnpRcNCNU/edit?usp=sharing):
28 | - [Alchemy](https://github.com/catalyst-team/alchemy) - Experiments logging & visualization
29 | - [Catalyst](https://github.com/catalyst-team/catalyst) - Accelerated Deep Learning Research and Development
30 | - [Reaction](https://github.com/catalyst-team/reaction) - Convenient Deep Learning models serving
31 |
32 | [Catalyst at AI Landscape](https://landscape.lfai.foundation/selected=catalyst).
33 |
34 | ---
35 |
36 | # Catalyst.Detection [](https://travis-ci.com/catalyst-team/detection) [](https://github.com/catalyst-team/detection/graphs/contributors)
37 |
38 | > *Note: this repo uses advanced Catalyst Config API and could be a bit out-of-day right now.
39 | > Use [Catalyst's minimal examples section](https://github.com/catalyst-team/catalyst#minimal-examples) for a starting point and up-to-day use cases, please.*
40 |
41 | Based on [Objects as points](https://arxiv.org/abs/1904.07850) article by [Xingyi Zhou](https://arxiv.org/search/cs?searchtype=author&query=Zhou%2C+X), [Dequan Wang](https://arxiv.org/search/cs?searchtype=author&query=Wang%2C+D), [Philipp Krähenbühl](https://arxiv.org/search/cs?searchtype=author&query=Kr%C3%A4henb%C3%BChl%2C+P)
42 |
43 | ### Training in your dataset
44 | 0. Install requirements ```pip install -r requirements.txt```
45 |
46 | 1. Copy all images to one directory or two different directories for train and validation.
47 |
48 | 1. Create ```markup_train.json``` as json file in MSCOCO format using ```COCODetectionFactory``` from ```data_preparation.py```. This class may be copied to your dataset generator. See documentation in code comments. If your dataset are already in this format, go to next step.
49 |
50 | 1. Specify perameters and in ```config/centernet_detection_config.yml```.
51 |
52 | 1. Run catalyst ```catalyst-dl run --config=./configs/centernet_detection_config.yml```
53 |
54 | 1. When you change dataset, you must delete cache files ```markup_*.json.cache``` because this files contain preprocessed bounding boxes info.
55 |
--------------------------------------------------------------------------------
/configs/centernet_detection_config.yml:
--------------------------------------------------------------------------------
1 | shared:
2 | classes: &classes ["person"]
3 | num_classes: &num_classes 1
4 |
5 | image_size: &image_size [224, 224]
6 | down_ratio: &down_ratio 4 # (height of input image / height of predicted heatmap)
7 | max_objs: &max_objs 15 # max objects detected per image, passed to DetectorCallback
8 |
9 | num_epochs: &num_epochs 200
10 | lr: &lr 0.001
11 | weight_decay: &wd 0.0001
12 |
13 | hm_weight: &hm_weight 1.0
14 | wh_weight: &wh_weight 10.0
15 | off_weight: &off_weight 10.0
16 |
17 | model_params:
18 | model: ResnetCenterNet
19 | num_classes: *num_classes
20 | embedding_dim: 128
21 | arch: "ResnetFPNUnet"
22 | down_ratio: *down_ratio
23 | backbone_params:
24 | arch: resnet18
25 | pretrained: true
26 |
27 | runner_params:
28 | input_key: "input"
29 | output_key: null
30 |
31 | args:
32 | expdir: src
33 | logdir: logs
34 |
35 | stages:
36 | state_params:
37 | main_metric: &main_metric "loss"
38 | minimize_metric: &minimize_metric true
39 |
40 | data_params:
41 | num_workers: 0
42 | batch_size: 5
43 | max_objs: *max_objs
44 | down_ratio: *down_ratio
45 |
46 | # default values, will be used if something aren't specified
47 | annotation_file: ./data/annotation.json
48 | images_dir: ./data/images
49 |
50 | # You may specify next parameters, data source will be overwritten
51 | #train_annotation_file: ./data_train/annotation.json
52 | #valid_annotation_file: ./data_valid/annotation.json
53 | #train_images_dir: ./data_train/images/
54 | #valid_images_dir: ./data_valid/images
55 |
56 | num_classes: *num_classes
57 | image_size: *image_size
58 |
59 | sampler_params:
60 | drop_last: true
61 | shuffle: per_epoch
62 |
63 | criterion_params:
64 | _key_value: True
65 |
66 | l_hm:
67 | criterion: CenterNetDetectionLoss
68 | l1_wh:
69 | criterion: RegL1Loss
70 | l1_reg:
71 | criterion: RegL1Loss
72 |
73 | scheduler_params:
74 | scheduler: MultiStepLR
75 | milestones: [12, 40]
76 | gamma: 0.8
77 |
78 | stage1:
79 | state_params:
80 | num_epochs: *num_epochs
81 |
82 | optimizer_params:
83 | optimizer: Lookahead
84 | base_optimizer_params:
85 | optimizer: RAdam
86 | lr: *lr
87 | weight_decay: *wd
88 | no_bias_weight_decay: True
89 |
90 | callbacks_params:
91 | loss_hm:
92 | callback: CriterionCallback
93 | input_key: hm
94 | output_key: hm
95 | prefix: loss_hm
96 | criterion_key: l_hm
97 | multiplier: *hm_weight
98 |
99 | loss_wh:
100 | callback: CriterionCallback
101 | input_key:
102 | reg_mask: targets_mask_key
103 | ind: targets_ind_key
104 | wh: targets_key
105 | output_key:
106 | wh: outputs_key
107 | prefix: loss_wh
108 | criterion_key: l1_wh
109 | multiplier: *wh_weight
110 |
111 | loss_reg:
112 | callback: CriterionCallback
113 | input_key:
114 | reg_mask: targets_mask_key
115 | ind: targets_ind_key
116 | reg: targets_key
117 | output_key:
118 | reg: outputs_key
119 | prefix: loss_reg
120 | criterion_key: l1_reg
121 | multiplier: *off_weight
122 |
123 | loss_aggregator:
124 | callback: CriterionAggregatorCallback
125 | prefix: &aggregated_loss loss
126 | loss_keys: ["loss_hm", "loss_wh", "loss_reg"]
127 | loss_aggregate_fn: "sum"
128 | multiplier: 1.0
129 |
130 | optimizer:
131 | callback: OptimizerCallback
132 | grad_clip_params:
133 | func: clip_grad_value_
134 | clip_value: 5.0
135 | loss_key: *aggregated_loss
136 |
137 | scheduler:
138 | callback: SchedulerCallback
139 | reduce_metric: *main_metric
140 |
141 | decoder:
142 | callback: DecoderCallback
143 | down_ratio: *down_ratio
144 | max_objs: *max_objs
145 |
146 | # mAP:
147 | # callback: MeanAPCallback
148 | # num_classes: *num_classes
149 |
150 | saver:
151 | callback: CheckpointCallback
152 | save_n_best: 3
153 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | albumentations==0.2.3
2 | packaging==19.2
3 | numpy==1.17.4
4 | pycocotools==2.0.0
5 | torch==1.3.0
6 | catalyst==20.1
--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | # from .runner import Runner
3 | from catalyst.dl import SupervisedRunner as Runner
4 | from catalyst.dl import registry
5 |
6 | from .experiment import Experiment
7 |
8 | from .callbacks import DecoderCallback, MeanAPCallback
9 | from .losses import CenterNetDetectionLoss, \
10 | RegL1Loss, MSEIndLoss, BCEIndLoss, FocalIndLoss
11 | from . import models
12 |
13 |
14 | registry.Criterion(CenterNetDetectionLoss)
15 | registry.Criterion(RegL1Loss)
16 | registry.Criterion(MSEIndLoss)
17 | registry.Criterion(BCEIndLoss)
18 | registry.Criterion(FocalIndLoss)
19 |
20 | registry.Callback(DecoderCallback)
21 | registry.Callback(MeanAPCallback)
22 |
23 | registry.MODELS.add_from_module(models)
24 |
--------------------------------------------------------------------------------
/src/callbacks.py:
--------------------------------------------------------------------------------
1 | from typing import List, Dict
2 |
3 | import numpy as np
4 | from catalyst.dl import Callback, RunnerState, CallbackOrder, CriterionCallback
5 | from catalyst.utils import detach
6 |
7 | from .losses.ctdet_loss import decode_centernet_predictions
8 | from .metrics import class_agnostic_mean_ap, calculate_map, construct_mAP_list_from_bboxes
9 |
10 |
11 | class DecoderCallback(Callback):
12 | def __init__(self, down_ratio: int = 1, max_objs: int = 80):
13 | super().__init__(order=CallbackOrder.Metric - 1)
14 | self.down_ratio = down_ratio
15 | self.max_objs = max_objs
16 |
17 | def on_batch_end(self, state: RunnerState):
18 | if state.loader_name.startswith("valid"):
19 | detections = decode_centernet_predictions(
20 | state.output["hm"],
21 | state.output["wh"],
22 | state.output["reg"],
23 | K=self.max_objs
24 | )
25 | detections = detach(detections).reshape(
26 | (detections.shape[0], -1, detections.shape[2])
27 | )
28 | detections[:, :, :4] *= self.down_ratio
29 |
30 | bboxes = detections[:, :, :4].astype(int)
31 | scores = detections[:, :, 4]
32 | labels = detections[:, :, 5].astype(int)
33 |
34 | result = dict(
35 | bboxes=bboxes,
36 | labels=labels,
37 | scores=scores,
38 | )
39 | state.output.update(result)
40 |
41 |
42 | class MeanAPCallback(Callback):
43 | def __init__(
44 | self,
45 | num_classes: int = None,
46 | prefix: str = "mAP",
47 | bboxes_key: str = "bboxes",
48 | scores_key: str = "scores",
49 | labels_key: str = "labels",
50 | iou_threshold: float = 0.9
51 | ):
52 | super().__init__(order=CallbackOrder.Metric)
53 | self.prefix = prefix
54 | self.classes = list(range(num_classes))
55 | self.mean_mAP = []
56 |
57 | self.bboxes_key = bboxes_key
58 | self.scores_key = scores_key
59 | self.labels_key = labels_key
60 | # List (dictionary value) contains of pairs of correct/not correct bboxes and model confidence by class
61 | self.classes_predictions: Dict[str, List[(bool, float)]] = {c: [] for c in range(num_classes)}
62 | self.iou_threshold = iou_threshold
63 |
64 | def on_batch_end(self, state: RunnerState):
65 | if state.loader_name.startswith("valid"):
66 | bboxes = state.output[self.bboxes_key]
67 | scores = state.output[self.scores_key]
68 | labels = state.output[self.labels_key]
69 |
70 | gt_bboxes = [
71 | np.array(item_bboxes.detach().cpu())
72 | for item_bboxes in state.input[self.bboxes_key]]
73 | gt_labels = [
74 | np.array(item_label.detach().cpu())
75 | for item_label in state.input[self.labels_key]
76 | ]
77 |
78 | for i, _class in enumerate(self.classes):
79 | predict_bboxes_batch = []
80 | predict_scores_batch = []
81 |
82 | target_bboxes_batch = []
83 | for batch_elem in zip(bboxes, scores, labels, gt_bboxes, gt_labels):
84 | bboxes_, scores_, labels_, gt_bboxes_, gt_labels_ = batch_elem
85 |
86 | bboxes_ = bboxes_[scores_ > 0]
87 | labels_ = labels_[scores_ > 0]
88 | scores_ = scores_[scores_ > 0]
89 |
90 | mask = (labels_ == i)
91 | predict_bboxes_batch.append(bboxes_[mask])
92 | predict_scores_batch.append(scores_[mask])
93 |
94 | gt_mask = gt_labels_ == i
95 | target_bboxes_batch.append(gt_bboxes_[gt_mask])
96 |
97 | if len(predict_bboxes_batch) != 0:
98 | per_box_correctness = [
99 | construct_mAP_list_from_bboxes(img_pred_bboxes.reshape(-1, 4), img_scores,
100 | img_gt_bboxes.reshape(-1, 4), self.iou_threshold)
101 | for img_pred_bboxes, img_scores, img_gt_bboxes
102 | in zip(predict_bboxes_batch, predict_scores_batch, target_bboxes_batch)
103 | ]
104 | for answers in per_box_correctness:
105 | self.classes_predictions[_class].extend(answers)
106 |
107 | mean_value = class_agnostic_mean_ap(bboxes, scores, gt_bboxes)
108 | self.mean_mAP.append(mean_value)
109 |
110 | def on_loader_end(self, state: RunnerState):
111 | if state.loader_name.startswith("valid"):
112 | all_predictions = []
113 | for class_name, predictions in self.classes_predictions.items():
114 | # metric_name = f"{self.prefix}/{class_name}"
115 | # mAP = calculate_map(predictions)
116 | # state.metrics.epoch_values[state.loader_name][metric_name] = mAP
117 | all_predictions.extend(predictions)
118 |
119 | # mean_AP = calculate_map(all_predictions)
120 | # state.metrics.epoch_values[state.loader_name][f'{self.prefix}/_mean'] = mean_AP
121 |
122 | ap_with_false_negatives = calculate_map(all_predictions, use_false_negatives=True)
123 | state.metrics.epoch_values[state.loader_name][f'{self.prefix}/_mean_with_fn'] = ap_with_false_negatives
124 |
125 | # old mAP
126 | # state.metrics.epoch_values[state.loader_name][f'{self.prefix}/_mean_old'] = np.mean(self.mean_mAP)
127 | self.mean_mAP = []
128 | self.classes_predictions: Dict[str, List[(bool, float)]] = {c: [] for c in self.classes}
129 |
130 |
131 | __all__ = ["DecoderCallback", "MeanAPCallback"]
132 |
--------------------------------------------------------------------------------
/src/coco.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import numpy as np
4 | import pickle
5 | from typing import Any
6 |
7 | from pycocotools.coco import COCO
8 | from torch.utils.data import Dataset
9 |
10 |
11 | class DetectionMSCOCODataset(Dataset):
12 | def __init__(self, annotation_file: str, image_dir: str):
13 |
14 | self._annotation_file = annotation_file
15 | self._image_dir = image_dir
16 | self._cache_file = self._annotation_file + ".cache"
17 |
18 | self._coco = COCO(self._annotation_file)
19 |
20 | self._img_ids = self._coco.getImgIds()
21 | self._cat_ids = self._coco.getCatIds()
22 | self._ann_ids = self._coco.getAnnIds()
23 |
24 | self._data = "coco"
25 | self._classes = {
26 | ind: cat_id for ind, cat_id in enumerate(self._cat_ids)
27 | }
28 | self._coco_to_class_map = {
29 | value: key for key, value in self._classes.items()
30 | }
31 |
32 | self._load_data()
33 | self._db_inds = np.arange(len(self._image_names))
34 |
35 | self._load_coco_data()
36 |
37 | def _load_data(self):
38 | print("loading from cache file: {}".format(self._cache_file))
39 | if not os.path.exists(self._cache_file):
40 | print("No cache file found...")
41 | self._extract_data()
42 | with open(self._cache_file, "wb") as f:
43 | pickle.dump([self._detections, self._image_names], f)
44 | print("Cache file created")
45 | else:
46 | with open(self._cache_file, "rb") as f:
47 | self._detections, self._image_names = pickle.load(f)
48 |
49 | def _load_coco_data(self):
50 | with open(self._annotation_file, "r") as f:
51 | data = json.load(f)
52 |
53 | coco_ids = self._coco.getImgIds()
54 | eval_ids = {
55 | self._coco.loadImgs(coco_id)[0]["file_name"]: coco_id
56 | for coco_id in coco_ids
57 | }
58 |
59 | self._coco_categories = data["categories"]
60 | self._coco_eval_ids = eval_ids
61 |
62 | def class_name(self, cid):
63 | cat_id = self._classes[cid]
64 | cat = self._coco.loadCats([cat_id])[0]
65 | return cat["name"]
66 |
67 | def _extract_data(self):
68 |
69 | self._image_names = [
70 | self._coco.loadImgs(img_id)[0]["file_name"]
71 | for img_id in self._img_ids
72 | ]
73 | self._detections = {}
74 | for ind, (coco_image_id, image_name) in enumerate(zip(self._img_ids, self._image_names)):
75 | image = self._coco.loadImgs(coco_image_id)[0]
76 | bboxes = []
77 | categories = []
78 |
79 | for cat_id in self._cat_ids:
80 | annotation_ids = self._coco.getAnnIds(imgIds=image["id"], catIds=cat_id)
81 | annotations = self._coco.loadAnns(annotation_ids)
82 | category = self._coco_to_class_map[cat_id]
83 | for annotation in annotations:
84 | bbox = np.array(annotation["bbox"])
85 | bbox[[2, 3]] += bbox[[0, 1]]
86 | bboxes.append(bbox)
87 |
88 | categories.append(category)
89 |
90 | self._detections[image_name] = [{
91 | 'bbox': bbox.astype(np.float32),
92 | 'category_id': category,
93 | 'category_name': self.class_name(category)
94 | } for bbox, category in zip(bboxes, categories)]
95 |
96 | def __getitem__(self, ind: int) -> Any:
97 | image_name = self._image_names[ind]
98 |
99 | return {
100 | 'image_name': os.path.join(self._image_dir, image_name),
101 | 'detections': self._detections[image_name]
102 | }
103 |
104 | def __len__(self) -> int:
105 | return len(self._img_ids)
106 |
107 | def get_num_classes(self) -> int:
108 | return len(self._cat_ids)
109 |
--------------------------------------------------------------------------------
/src/data_preparation.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import json
4 |
5 | from typing import Any, List, Optional, Dict, Union
6 |
7 |
8 | class _UniqueIdsController:
9 | def __init__(self):
10 | self._ids = set()
11 | self._last_id = -1
12 |
13 | def has(self, object_id: int) -> bool:
14 | return object_id in self._ids
15 |
16 | def get_new_id(self) -> int:
17 | self._last_id += 1
18 | return self._last_id
19 |
20 | def force_add_id(self, object_id: int) -> bool:
21 | if object_id in self._ids:
22 | return False
23 | else:
24 | self._last_id = max(self._last_id, object_id)
25 | self._ids.add(object_id)
26 | return True
27 |
28 |
29 | class COCODetectionFactory:
30 | """
31 | # Create dataset
32 | dataset = COCODetectionFactory()
33 |
34 | # set info to dataset
35 | dataset.set_info(description='my_dataset', url='http://localhost.com',
36 | version='0.0.1', year=2019, contributor='Avi2011class',
37 | date_created='20.12.2019')
38 |
39 | # add interesting licenses
40 | dataset.add_licence(name='GPLv3', url='https://en.wikipedia.org/wiki/GPL_License')
41 | dataset.add_licence(name='MIT', url='https://en.wikipedia.org/wiki/MIT_License')
42 |
43 | # add categories of objects
44 | dataset.add_category(category_id=10, name='man', supercategory='person') # with fixed id, 10 is returned
45 | dataset.add_category(name='woman', supercategory='person') # with auto selected id, 11 is returned
46 | dataset.add_category(category_id=5, name='child', supercategory='person') # with fixed id, 5 is returned
47 | # dataset.add_category(category_id=5, name='dog', supercategory='not person') will raises RuntimeError,
48 | # id=11 already exists
49 |
50 | # add images to dataset
51 | dataset.add_image(image_id=0, file_name='photo_01.jpg') # with fixed id, 0 is returned
52 | dataset.add_image(file_name='photo_02.jpg') # without fixed id, 1 is returned
53 | # dataset.add_image(image_id=1, file_name='photo_01.jpg') will raise RuntimeError, id is already exists
54 |
55 | # Adds annotations to dataset
56 |
57 | # add annotation to image with image_id=0 and category_id=0
58 | dataset.add_bbox(bbox_left=0, bbox_height=10, bbox_top=0, bbox_width=0, image_id=0, category_id=11)
59 |
60 | # add annotation to image with auto found image_id=1 and auto found category_id=10
61 | dataset.add_bbox(bbox_left=0, bbox_height=10, bbox_top=0, bbox_width=0,
62 | image_file_name='photo_02.jpg', category_name='man')
63 |
64 | # add annotation to image with image_id=0 and category_id=102, category=102 may be created in future
65 | dataset.add_bbox(bbox_left=0, bbox_height=10, bbox_top=0, bbox_width=0, image_id=0, category_id=102)
66 |
67 | # raises RuntimeError because category_name doesn't exists
68 | # dataset.add_bbox(bbox_left=0, bbox_height=10, bbox_top=0, bbox_width=0, image_id=0, category_name='blabla')
69 |
70 | # save dataset to annotation file
71 | annotation_file_content = dataset.get_json()
72 | with open('annotation_file.json', 'w') as f:
73 | f.write(annotation_file_content)
74 | """
75 |
76 | def __init__(self):
77 | self._output = {
78 | "info": {},
79 | "licenses": [],
80 | "categories": [],
81 | "images": [],
82 | "annotations": []
83 | }
84 | self._licence_ids = _UniqueIdsController()
85 | self._image_ids = _UniqueIdsController()
86 | self._category_ids = _UniqueIdsController()
87 | self._annotation_ids = _UniqueIdsController()
88 |
89 | self._category_names_to_idx = {}
90 | self._image_names_to_idx = {}
91 | self._annotation_names_to_idx = {}
92 |
93 | def set_info(self,
94 | description: str = "",
95 | url: str = "",
96 | version: str = "",
97 | year: Union[str, int] = "",
98 | contributor: str = "",
99 | date_created: str = ""):
100 | """Set information in mscoco format
101 | Args:
102 | description (str): dataset description
103 | url (str): dataset url
104 | version (str): dataset version
105 | year (Union[str, int]): dataset year
106 | contributor (str): contribution info
107 | date_created (str): date
108 |
109 | Return:
110 | COCODetectionFactory: reference to current COCODetectionFactory object
111 | """
112 |
113 | self._output['info'] = {
114 | 'description': description,
115 | 'url': url,
116 | 'version': version,
117 | 'year': year,
118 | 'contributor': contributor,
119 | 'date_created': date_created
120 | }
121 |
122 | return self
123 |
124 | def add_licence(self,
125 | licence_id: Optional[int] = None,
126 | name: str = "",
127 | url: str = "") -> Any:
128 | """Adds license to dataset, dataset may contain more then one license, it will be stored as list
129 |
130 | Args:
131 | licence_id (optional[int]): id of license, must be unique, if None random unique value will be used
132 | name (str): name of license
133 | url(str): url to license
134 |
135 | Returns:
136 | int: id of license
137 | """
138 |
139 | if licence_id is None:
140 | licence_id = self._licence_ids.get_new_id()
141 | elif self._licence_ids.has(licence_id):
142 | raise RuntimeError('License ids must be unique, but \"{}\" already exists'.format(licence_id))
143 |
144 | self._output['licenses'].append({
145 | 'id': licence_id,
146 | 'name': name,
147 | 'url': url
148 | })
149 | return licence_id
150 |
151 | def add_category(self,
152 | category_id: Optional[int] = None,
153 | name: str = "",
154 | supercategory: str = ""):
155 | """ Adds category to dataset
156 |
157 | Args:
158 | category_id (int): id of category, must be unique, if None random unique value will be used
159 | name (str): name of category, must be unique for dataset
160 | supercategory (str): name of supercategory
161 | Returns:
162 | int: id of category
163 | """
164 | if category_id is None:
165 | category_id = self._category_ids.get_new_id()
166 | elif self._category_ids.has(category_id):
167 | raise RuntimeError('Category ids must be unique, but \"{}\" already exists'.format(category_id))
168 |
169 | if name in self._image_names_to_idx:
170 | raise RuntimeError('Category names must be unique, but \"{}\" already exists'.format(name))
171 | self._category_names_to_idx[name] = category_id
172 |
173 | self._output['categories'].append({
174 | 'id': category_id,
175 | 'name': name,
176 | 'supercategory': supercategory
177 | })
178 | return category_id
179 |
180 | def add_image(self,
181 | image_id: Optional[int] = None,
182 | file_name: str = "",
183 | height: Optional[int] = None,
184 | width: Optional[int] = None) -> Any:
185 | """ Adds image to dataset
186 |
187 | Args:
188 | image_id (Optional[int]): id of image, must be unique, if None random unique value will be used
189 | file_name (str): filename where image stored, must be unique for dataset
190 | height (Optional[int]): height of image
191 | width (optional[int]): width of image
192 | Returns:
193 | int: id of image
194 | """
195 |
196 | if image_id is None:
197 | image_id = self._image_ids.get_new_id()
198 | elif self._category_ids.has(image_id):
199 | raise RuntimeError('Image ids must be unique, but \"{}\" already exists'.format(category_id))
200 |
201 | if file_name in self._image_names_to_idx:
202 | raise RuntimeError('Image file names must be unique, but \"{}\" already exists'.format(name))
203 | self._image_names_to_idx[file_name] = image_id
204 |
205 | self._output['images'].append({
206 | 'id': image_id,
207 | 'file_name': file_name,
208 | 'height': height if height is not None else -1,
209 | 'width': width if width is not None else -1
210 | })
211 |
212 | return image_id
213 |
214 | def add_bbox(self,
215 | bbox_left: int = 0,
216 | bbox_top: int = 0,
217 | bbox_width: int = 0,
218 | bbox_height: int = 0,
219 | image_id: Optional[int] = None,
220 | image_file_name: Optional[str] = None,
221 | category_id: Optional[int] = None,
222 | category_name: Optional[str] = None,
223 | iscrowd: bool = False):
224 | """Adds bounding box to image in dataset
225 |
226 | One of image_id and image_file_name must be specified. If both image_id will be used
227 | One of category_id and category_name must be specified. If both category_id will be used
228 |
229 | Args:
230 | bbox_left (int):
231 | bbox_top (int):
232 | bbox_width (int):
233 | bbox_height (int):
234 | image_id (Optional[int]): if None may be computed from image_file_name. Image_id may be unknown
235 | image_file_name (Optional[str]): None or name of file added to dataset
236 | category_id (Optional[int]): if None may be computed from category_name. Category_id may be unknown
237 | category_name (Optional[str]): None or name of category added to dataset
238 | iscrowd (bool):
239 | Returns:
240 | int: id of bbox
241 | """
242 |
243 | if image_id is None and image_file_name is None:
244 | raise RuntimeError("One of image_id and image_file_name must be specified")
245 |
246 | if image_id is None:
247 | if image_file_name in self._image_names_to_idx:
248 | image_id = self._image_names_to_idx[image_file_name]
249 | else:
250 | raise RuntimeError("Unknown image file name \"{}\"".format(image_file_name))
251 |
252 | if category_id is None and category_name is None:
253 | raise RuntimeError("One of category_id and category_name must be specified")
254 |
255 | if category_id is None:
256 | if category_name in self._category_names_to_idx:
257 | category_id = self._category_names_to_idx[category_name]
258 | else:
259 | raise RuntimeError("Unknown category name \"{}\"".format(category_name))
260 |
261 | new_id = self._annotation_ids.get_new_id()
262 | self._output['annotations'].append({
263 | 'id': new_id,
264 | 'image_id': image_id,
265 | 'category_id': category_id,
266 | 'segmentation': [],
267 | 'area': 0,
268 | 'bbox': [bbox_left, bbox_top, bbox_width, bbox_height],
269 | 'iscrowd': iscrowd,
270 | })
271 | return new_id
272 |
273 | def get_dict(self) -> Dict[str, Any]:
274 | """
275 | Args:
276 |
277 | Returns:
278 | Dict[str, Any]: dict in mscoco format
279 | """
280 | return self._output
281 |
282 | def get_json(self, **kwargs) -> str:
283 | """
284 | Args:
285 | **kwargs: passed to json.dumps
286 | Returns:
287 | str: dataset in json format
288 | """
289 | return json.dumps(self._output, **kwargs)
290 |
--------------------------------------------------------------------------------
/src/dataset.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Optional, Any, Tuple
2 |
3 | import cv2
4 | import math
5 | import torch
6 | from torch.utils.data import Dataset
7 | import numpy as np
8 |
9 | from .coco import DetectionMSCOCODataset
10 | from catalyst import utils
11 |
12 | cv2.setNumThreads(1)
13 | cv2.ocl.setUseOpenCL(False)
14 |
15 |
16 | def get_affine_transform(
17 | center,
18 | scale,
19 | rot,
20 | output_size,
21 | shift=np.array([0, 0], dtype=np.float32),
22 | inv=0
23 | ):
24 | if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
25 | scale = np.array([scale, scale], dtype=np.float32)
26 |
27 | scale_tmp = scale
28 | src_w = scale_tmp[0]
29 | dst_w = output_size[0]
30 | dst_h = output_size[1]
31 |
32 | rot_rad = np.pi * rot / 180
33 | src_dir = get_dir([0, src_w * -0.5], rot_rad)
34 | dst_dir = np.array([0, dst_w * -0.5], np.float32)
35 |
36 | src = np.zeros((3, 2), dtype=np.float32)
37 | dst = np.zeros((3, 2), dtype=np.float32)
38 | src[0, :] = center + scale_tmp * shift
39 | src[1, :] = center + src_dir + scale_tmp * shift
40 | dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
41 | dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir
42 |
43 | src[2:, :] = get_3rd_point(src[0, :], src[1, :])
44 | dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
45 |
46 | if inv:
47 | trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
48 | else:
49 | trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
50 |
51 | return trans
52 |
53 |
54 | def affine_transform(point: np.array, transform_matrix: np.array) -> np.array:
55 | new_pt = np.array([point[0], point[1], 1.], dtype=np.float32).T
56 | new_pt = np.dot(transform_matrix, new_pt)
57 | return new_pt[:2]
58 |
59 |
60 | def get_3rd_point(a, b):
61 | direct = a - b
62 | return b + np.array([-direct[1], direct[0]], dtype=np.float32)
63 |
64 |
65 | def get_dir(src_point, rot_rad):
66 | sn, cs = np.sin(rot_rad), np.cos(rot_rad)
67 |
68 | src_result = [0, 0]
69 | src_result[0] = src_point[0] * cs - src_point[1] * sn
70 | src_result[1] = src_point[0] * sn + src_point[1] * cs
71 |
72 | return src_result
73 |
74 |
75 | def gaussian_radius(det_size, min_overlap=0.7):
76 | height, width = det_size
77 |
78 | a1 = 1
79 | b1 = (height + width)
80 | c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
81 | sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1)
82 | r1 = (b1 + sq1) / 2
83 |
84 | a2 = 4
85 | b2 = 2 * (height + width)
86 | c2 = (1 - min_overlap) * width * height
87 | sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2)
88 | r2 = (b2 + sq2) / 2
89 |
90 | a3 = 4 * min_overlap
91 | b3 = -2 * min_overlap * (height + width)
92 | c3 = (min_overlap - 1) * width * height
93 | sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3)
94 | r3 = (b3 + sq3) / 2
95 | return min(r1, r2, r3)
96 |
97 |
98 | def gaussian2D(shape, sigma=1):
99 | m, n = [(ss - 1.) / 2. for ss in shape]
100 | y, x = np.ogrid[-m:m + 1, -n:n + 1]
101 |
102 | h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
103 | h[h < np.finfo(h.dtype).eps * h.max()] = 0
104 | return h
105 |
106 |
107 | def draw_umich_gaussian(heatmap, center, radius, k=1):
108 | diameter = 2 * radius + 1
109 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
110 |
111 | x, y = int(center[0]), int(center[1])
112 |
113 | height, width = heatmap.shape[0:2]
114 |
115 | left, right = min(x, radius), min(width - x, radius + 1)
116 | top, bottom = min(y, radius), min(height - y, radius + 1)
117 |
118 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
119 | masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right]
120 | if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug
121 | np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
122 | return heatmap
123 |
124 |
125 | class DetectionDataset(Dataset):
126 | def __init__(self,
127 | annotation_file: str,
128 | images_dir: str,
129 | down_ratio: int,
130 | max_objects: int,
131 | num_classes: Optional[int] = None,
132 | image_size: Tuple[int, int] = (224, 224),
133 | transform: Optional[Any] = None,
134 | **kwargs
135 | ):
136 | super(DetectionDataset, self).__init__()
137 |
138 | self._annotations_dataset = DetectionMSCOCODataset(annotation_file, images_dir)
139 |
140 | self._num_classes = num_classes
141 | if self._num_classes is None:
142 | self._num_classes = self._annotations_dataset.get_num_classes()
143 |
144 | self._down_ratio = down_ratio
145 | self._max_objects = max_objects
146 |
147 | assert image_size[0] == image_size[1], "Only square image are now supported"
148 | self.image_size = image_size[0]
149 | self.transform = transform
150 |
151 | def __len__(self) -> int:
152 | return len(self._annotations_dataset)
153 |
154 | def __getitem__(self, idx: int) -> Dict[str, Any]:
155 | annotation = self._annotations_dataset[idx]
156 | image_name = annotation['image_name']
157 | detections = annotation['detections']
158 |
159 | image = utils.imread(image_name)
160 | x_scale, y_scale = self.image_size / image.shape[1], self.image_size / image.shape[0]
161 |
162 | image = cv2.resize(image, (self.image_size, self.image_size), cv2.INTER_LINEAR)
163 |
164 | detections = [
165 | {
166 | 'category_id': detection['category_id'],
167 | 'category_name': detection['category_name'],
168 | 'bbox': detection['bbox'].copy()
169 | } for detection in detections
170 | ]
171 |
172 | for detection in detections:
173 | detection['bbox'][0::2] *= x_scale
174 | detection['bbox'][1::2] *= y_scale
175 |
176 | bboxes = []
177 | labels = []
178 | for detection in detections:
179 | median_x = (detection['bbox'][0] + detection['bbox'][2]) // 2
180 | median_y = (detection['bbox'][1] + detection['bbox'][3]) // 2
181 |
182 | # CenterNet are VERY bad when center of detected objects not in the images
183 | # Let's delete this bboxes
184 | if not (0 <= median_x <= image.shape[1]) or not (0 <= median_y <= image.shape[0]):
185 | continue
186 |
187 | detection['bbox'][0::2] = np.clip(detection['bbox'][0::2], 0, image.shape[1])
188 | detection['bbox'][1::2] = np.clip(detection['bbox'][1::2], 0, image.shape[0])
189 |
190 | bboxes.append(detection['bbox'])
191 | labels.append(detection['category_id'])
192 |
193 | bboxes = np.array(bboxes)
194 | labels = np.array(labels)
195 |
196 | if self.transform is not None:
197 | result = self.transform(
198 | image=image,
199 | bboxes=bboxes,
200 | labels=labels,
201 | )
202 | else:
203 | result = dict(
204 | image=image,
205 | bboxes=bboxes,
206 | labels=labels,
207 | )
208 |
209 | image = result["image"].astype(np.uint8)
210 | bboxes = result["bboxes"]
211 | labels = result["labels"]
212 |
213 | input_height, input_width = image.shape[0], image.shape[1]
214 |
215 | # Normalization
216 | input = (image.astype(np.float32) / 255.) * 2. - 1.
217 | input = input.transpose(2, 0, 1)
218 |
219 | output_height = input_height // self._down_ratio
220 | output_width = input_width // self._down_ratio
221 | # trans_output = get_affine_transform(center, scale, 0, [output_width, output_height])
222 |
223 | heatmap = np.zeros((self._num_classes, output_height, output_width), dtype=np.float32)
224 | width_height = np.zeros((self._max_objects, 2), dtype=np.float32)
225 |
226 | reg = np.zeros((self._max_objects, 2), dtype=np.float32)
227 | ind = np.zeros(self._max_objects, dtype=np.int64)
228 | reg_mask = np.zeros(self._max_objects, dtype=np.uint8)
229 |
230 | draw_gaussian = draw_umich_gaussian
231 |
232 | new_bboxes = []
233 | num_objs = min(len(bboxes), self._max_objects)
234 | for i in range(num_objs):
235 | bbox = np.array(bboxes[i], dtype=np.float32) / self._down_ratio
236 | class_id = labels[i]
237 |
238 | bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_width - 1)
239 | bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_height - 1)
240 | h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
241 | new_bboxes.append(bbox)
242 |
243 | if h > 0 and w > 0:
244 | radius = gaussian_radius((math.ceil(h), math.ceil(w)))
245 | radius = max(0, int(radius))
246 | _center = np.array(
247 | [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
248 | dtype=np.float32
249 | )
250 | _center_int = _center.astype(np.int32)
251 | draw_gaussian(heatmap[class_id], _center_int, radius)
252 | width_height[i] = 1. * w, 1. * h
253 | ind[i] = _center_int[1] * output_width + _center_int[0]
254 | reg[i] = _center - _center_int
255 | reg_mask[i] = 1
256 |
257 | result = {
258 | "filename": image_name,
259 | "input": torch.from_numpy(input),
260 | "hm": torch.from_numpy(heatmap),
261 | "reg_mask": torch.from_numpy(reg_mask),
262 | "ind": torch.from_numpy(ind),
263 | "wh": torch.from_numpy(width_height),
264 | "reg": torch.from_numpy(reg),
265 | "bboxes": np.array(bboxes),
266 | "labels": np.array(labels),
267 | }
268 |
269 | return result
270 |
--------------------------------------------------------------------------------
/src/debug.py:
--------------------------------------------------------------------------------
1 | from catalyst.dl import Callback, RunnerState, CallbackOrder, CriterionCallback
2 |
3 | import torch
4 | from torch.nn import Module
5 |
6 |
7 | class MyDebugCallback(Callback):
8 | def __init__(self):
9 | super(MyDebugCallback, self).__init__(order=CallbackOrder.Metric + 1)
10 |
11 | def on_epoch_end(self, state: RunnerState) -> None:
12 | print('Input:')
13 | print(state.input.keys())
14 | print('Output')
15 | print(state.output.keys())
16 | print('-' * 40)
17 |
18 |
19 | class MyDebugCriterion(Module):
20 | def __init__(self):
21 | super(MyDebugCriterion, self).__init__()
22 |
23 | def forward(self, *args, **kwargs):
24 | print('Args:')
25 | print(', '.join(list(map(str, map(type, args)))))
26 | print('Kwargs:')
27 | print(', '.join(kwargs.keys()))
28 | print('*' * 40)
29 | return torch.zeros((1, ), dtype=torch.float32)
30 |
--------------------------------------------------------------------------------
/src/experiment.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | import torch
4 | from catalyst.data.collate_fn import FilteringCollateFn
5 | from catalyst.dl import ConfigExperiment
6 |
7 | from .dataset import DetectionDataset
8 | from .transforms import train_transform, valid_transform, infer_transform
9 |
10 | torch.multiprocessing.set_sharing_strategy("file_system")
11 |
12 |
13 | class Experiment(ConfigExperiment):
14 | def get_datasets(
15 | self,
16 | stage: str,
17 | **kwargs,
18 | ):
19 | def process_kwargs_by_default_values(parameter, default_parameter):
20 | if parameter not in kwargs:
21 | if default_parameter not in kwargs:
22 | raise ValueError('You must specify \"{}\" or default value(\"{}\") in config'
23 | .format(parameter, default_parameter))
24 | else:
25 | kwargs[parameter] = kwargs[default_parameter]
26 |
27 | process_kwargs_by_default_values('train_annotation_file', 'annotation_file')
28 | process_kwargs_by_default_values('valid_annotation_file', 'annotation_file')
29 | process_kwargs_by_default_values('train_images_dir', 'images_dir')
30 | process_kwargs_by_default_values('valid_images_dir', 'images_dir')
31 |
32 | if kwargs['train_annotation_file'] == kwargs['valid_annotation_file']:
33 | warnings.warn("Valid is now equal to train, is it expected?", RuntimeWarning)
34 |
35 | train_dataset = DetectionDataset(annotation_file=kwargs['train_annotation_file'],
36 | images_dir=kwargs['train_images_dir'],
37 | down_ratio=kwargs['down_ratio'],
38 | max_objects=kwargs['max_objs'],
39 | num_classes=kwargs['num_classes'],
40 | image_size=kwargs['image_size'],
41 | transform=train_transform(kwargs['image_size'][0])
42 | )
43 |
44 | valid_dataset = DetectionDataset(annotation_file=kwargs['valid_annotation_file'],
45 | images_dir=kwargs['valid_images_dir'],
46 | down_ratio=kwargs['down_ratio'],
47 | max_objects=kwargs['max_objs'],
48 | num_classes=kwargs['num_classes'],
49 | image_size=kwargs['image_size'],
50 | transform=valid_transform(kwargs['image_size'][0])
51 | )
52 |
53 | return {
54 | 'train': {
55 | 'dataset': train_dataset,
56 | 'collate_fn': FilteringCollateFn('bboxes', 'labels')
57 | },
58 | 'valid': {
59 | 'dataset': valid_dataset,
60 | 'collate_fn': FilteringCollateFn('bboxes', 'labels')
61 | },
62 | }
63 |
--------------------------------------------------------------------------------
/src/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .ctdet_loss import CenterNetDetectionLoss, \
2 | RegL1Loss, MSEIndLoss, BCEIndLoss, FocalIndLoss, \
3 | decode_centernet_predictions
4 |
--------------------------------------------------------------------------------
/src/losses/ctdet_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | def _neg_loss(outputs: torch.Tensor, targets: torch.Tensor):
7 | """
8 | Modified focal loss. Exactly the same as CornerNet.
9 | Runs faster and costs a little bit more memory
10 |
11 | Arguments:
12 | outputs (torch.Tensor): BATCH x C x H x W
13 | targets (torch.Tensor): BATCH x C x H x W
14 | """
15 | pos_inds = targets.eq(1).float()
16 | neg_inds = targets.lt(1).float()
17 |
18 | neg_weights = torch.pow(1 - targets, 4)
19 |
20 | loss = 0
21 |
22 | pos_loss = torch.log(outputs) * torch.pow(1 - outputs, 2) * pos_inds
23 | neg_loss = torch.log(1 - outputs) * torch.pow(outputs, 2) * neg_weights * neg_inds
24 |
25 | num_pos = pos_inds.float().sum()
26 | pos_loss = pos_loss.sum()
27 | neg_loss = neg_loss.sum()
28 |
29 | if num_pos == 0:
30 | loss = loss - neg_loss
31 | else:
32 | loss = loss - (pos_loss + neg_loss) / num_pos
33 | return loss
34 |
35 |
36 | def _sigmoid(x):
37 | y = torch.clamp(x.sigmoid_(), min=1e-4, max=1 - 1e-4)
38 | return y
39 |
40 |
41 | def _gather_feat(feat, ind, mask=None):
42 | dim = feat.size(2)
43 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
44 | feat = feat.gather(1, ind)
45 | if mask is not None:
46 | mask = mask.unsqueeze(2).expand_as(feat)
47 | feat = feat[mask]
48 | feat = feat.view(-1, dim)
49 | return feat
50 |
51 |
52 | def _tranpose_and_gather_feat(feat, ind):
53 | feat = feat.permute(0, 2, 3, 1).contiguous()
54 | feat = feat.view(feat.size(0), -1, feat.size(3))
55 | feat = _gather_feat(feat, ind)
56 | return feat
57 |
58 |
59 | def _nms(heat, kernel=3):
60 | pad = (kernel - 1) // 2
61 |
62 | hmax = nn.functional.max_pool2d(
63 | heat, (kernel, kernel), stride=1, padding=pad)
64 | keep = (hmax == heat).float()
65 | return heat * keep
66 |
67 |
68 | def _topk(scores, K=40):
69 | batch, cat, height, width = scores.size()
70 |
71 | topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
72 |
73 | topk_inds = topk_inds % (height * width)
74 | topk_ys = (topk_inds / width).int().float()
75 | topk_xs = (topk_inds % width).int().float()
76 |
77 | topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K)
78 | topk_clses = (topk_ind / K).int()
79 | topk_inds = _gather_feat(
80 | topk_inds.view(batch, -1, 1), topk_ind).view(batch, K)
81 | topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K)
82 | topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K)
83 |
84 | return topk_score, topk_inds, topk_clses, topk_ys, topk_xs
85 |
86 |
87 | def decode_centernet_predictions(
88 | heat, wh, reg=None, K=100
89 | ):
90 | with torch.no_grad():
91 | batch, cat, height, width = heat.size()
92 | # mask = reg_mask.unsqueeze(2).expand_as(pred).float()
93 |
94 | heat = torch.sigmoid(heat)
95 | # perform nms on heatmaps
96 | heat = _nms(heat)
97 |
98 | scores, inds, clses, ys, xs = _topk(heat, K=K)
99 | if reg is not None:
100 | reg = _tranpose_and_gather_feat(reg, inds)
101 | reg = reg.view(batch, K, 2)
102 | xs = xs.view(batch, K, 1) + reg[:, :, 0:1]
103 | ys = ys.view(batch, K, 1) + reg[:, :, 1:2]
104 | else:
105 | xs = xs.view(batch, K, 1) + 0.5
106 | ys = ys.view(batch, K, 1) + 0.5
107 | wh = _tranpose_and_gather_feat(wh, inds).view(batch, K, 2)
108 |
109 | clses = clses.view(batch, K, 1).float()
110 | scores = scores.view(batch, K, 1)
111 | bboxes = torch.cat([xs - wh[..., 0:1] / 2,
112 | ys - wh[..., 1:2] / 2,
113 | xs + wh[..., 0:1] / 2,
114 | ys + wh[..., 1:2] / 2], dim=2)
115 | detections = torch.cat([bboxes, scores, clses], dim=2)
116 |
117 | return detections
118 |
119 |
120 | class FocalLoss(nn.Module):
121 | def __init__(self):
122 | super(FocalLoss, self).__init__()
123 | self.neg_loss = _neg_loss
124 |
125 | def forward(self, outputs, targets):
126 | return self.neg_loss(outputs, targets)
127 |
128 |
129 | class RegL1Loss(nn.Module):
130 | def __init__(
131 | self,
132 | key: str = "",
133 | mask_key: str = "reg_mask",
134 | ind_key: str = "ind",
135 | debug: bool = False
136 | ):
137 | super(RegL1Loss, self).__init__()
138 | self.key = key
139 | self.mask_key = mask_key
140 | self.ind_key = ind_key
141 | self.debug = debug
142 |
143 | # def forward(self, outputs, targets):
144 | # result = self._forward(
145 | # outputs[self.key], targets[self.mask_key],
146 | # targets[self.ind_key], targets[self.key]
147 | # )
148 | # return result
149 |
150 | def forward(self, outputs_key, targets_mask_key, targets_ind_key, targets_key):
151 | result = self._forward(
152 | outputs_key, targets_mask_key, targets_ind_key, targets_key
153 | )
154 | return result
155 |
156 | def _forward(self, output, mask, ind, target):
157 | pred = _tranpose_and_gather_feat(output, ind)
158 | mask = mask.unsqueeze(2).expand_as(pred).float()
159 |
160 | if self.debug:
161 | import ipdb; ipdb.set_trace()
162 | loss = F.l1_loss(pred * mask, target * mask)
163 | loss = loss / (mask.sum() + 1e-4)
164 | return loss
165 |
166 |
167 | class CenterNetDetectionLoss(nn.Module):
168 | def __init__(self):
169 | super(CenterNetDetectionLoss, self).__init__()
170 | self.focal = FocalLoss()
171 |
172 | def forward(self, outputs, targets):
173 | loss = self.focal(_sigmoid(outputs), targets)
174 | return loss
175 |
176 |
177 | class MSEIndLoss(nn.Module):
178 | def __init__(
179 | self,
180 | key: str,
181 | mask_key: str = "reg_mask",
182 | ind_key: str = "ind",
183 | debug: bool = False,
184 | reduction: str = "mean"
185 | ):
186 | super(MSEIndLoss, self).__init__()
187 | self.key = key
188 | self.mask_key = mask_key
189 | self.ind_key = ind_key
190 | self.debug = debug
191 |
192 | self.loss = nn.MSELoss(reduction=reduction)
193 |
194 | def forward(self, outputs, targets):
195 | result = self._forward(
196 | outputs[self.key], targets[self.mask_key],
197 | targets[self.ind_key], targets[self.key]
198 | )
199 |
200 | return result
201 |
202 | def _forward(self, output, mask, ind, target):
203 | pred = _tranpose_and_gather_feat(output, ind)
204 | _mask = mask.unsqueeze(2).expand_as(pred).float()
205 |
206 | if self.debug:
207 | import ipdb; ipdb.set_trace()
208 | loss = self.loss(_sigmoid(pred) * _mask, target.unsqueeze(2) * _mask)
209 | # loss = loss / (_mask.sum() + 1e-4)
210 | return loss
211 |
212 |
213 | class BCEIndLoss(nn.Module):
214 | def __init__(
215 | self,
216 | key: str,
217 | mask_key: str = "reg_mask",
218 | ind_key: str = "ind",
219 | debug: bool = False
220 | ):
221 | super(BCEIndLoss, self).__init__()
222 | self.key = key
223 | self.mask_key = mask_key
224 | self.ind_key = ind_key
225 | self.loss = nn.BCELoss()
226 | self.debug = debug
227 |
228 | def forward(self, outputs, targets):
229 | result = self._forward(
230 | outputs[self.key], targets[self.mask_key],
231 | targets[self.ind_key], targets[self.key]
232 | )
233 |
234 | return result
235 |
236 | def _forward(self, output, mask, ind, target):
237 | pred = _tranpose_and_gather_feat(output, ind)
238 | _mask = mask.unsqueeze(2).expand_as(pred).float()
239 | if self.debug:
240 | import ipdb; ipdb.set_trace()
241 |
242 | loss = self.loss(_sigmoid(pred) * _mask, target * _mask)
243 | # loss = loss / (mask.sum() + 1e-4)
244 | return loss
245 |
246 |
247 | class FocalIndLoss(nn.Module):
248 | def __init__(
249 | self,
250 | key: str,
251 | mask_key: str = "reg_mask",
252 | ind_key: str = "ind",
253 | debug: bool = False
254 | ):
255 | super(FocalIndLoss, self).__init__()
256 | self.key = key
257 | self.mask_key = mask_key
258 | self.ind_key = ind_key
259 | self.loss = FocalLoss()
260 | self.debug = debug
261 |
262 | def forward(self, outputs, targets):
263 | result = self._forward(
264 | outputs[self.key], targets[self.mask_key],
265 | targets[self.ind_key], targets[self.key]
266 | )
267 |
268 | return result
269 |
270 | def _forward(self, output, mask, ind, target):
271 | pred = _tranpose_and_gather_feat(output, ind)
272 | _mask = mask.unsqueeze(2).expand_as(pred).float()
273 | if self.debug:
274 | import ipdb; ipdb.set_trace()
275 |
276 | loss = self.loss(_sigmoid(pred) * _mask, target * _mask)
277 | # loss = loss / (mask.sum() + 1e-4)
278 | return loss
279 |
--------------------------------------------------------------------------------
/src/metrics.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple, List
2 |
3 | import numpy as np
4 | from sklearn.metrics import average_precision_score
5 |
6 |
7 | def construct_mAP_list_from_bboxes(predicted_bboxes, scores, gt_bboxes, iou_threshold=.9) -> List[Tuple[bool, float]]:
8 | """
9 | Args:
10 | predicted_bboxes (np.array): predictions
11 | scores (np.array): model confidences
12 | gt_bboxes (np.array): ground truth bboxes
13 | iou_threshold (float): between 0 and 1, iou threshold to mAP metric
14 |
15 | Returns:
16 | List[Tuple[bool, float]]: mAP list
17 | """
18 |
19 | ious_matrix = bbox_iou(predicted_bboxes, gt_bboxes)
20 | result = _construct_list_for_map(ious_matrix, scores, iou_thresh=iou_threshold)
21 | return result
22 |
23 |
24 | def _construct_list_for_map(ious_matrix, scores, iou_thresh=.9) -> List[Tuple[bool, float]]:
25 | """
26 | Args:
27 | ious_matrix (np.array): array ious between predicted and ground-truth objects
28 | scores (np.array): array of shape (n) with model confidences for objects
29 | iou_thresh (float): between 0 and 1, iou threshold to mAP metric
30 | Returns:
31 | List[Tuple[bool, float]]: mAP list
32 | """
33 |
34 | ious_thresholded = ious_matrix > iou_thresh
35 | correct_bboxes = np.where(ious_thresholded.sum(axis=1).astype(bool))[0]
36 | incorrect_bboxes = np.where(~ious_thresholded.sum(axis=1).astype(bool))[0]
37 | fn_bboxes = np.where(ious_thresholded.sum(axis=0) == 0)[0]
38 |
39 | result = []
40 | result.extend([(True, scores[i]) for i in correct_bboxes])
41 | result.extend([(False, scores[i]) for i in incorrect_bboxes])
42 | result.extend([(True, 0) for _ in fn_bboxes])
43 | return result
44 |
45 |
46 | def calculate_map(predictions: List[Tuple[bool, float]], use_false_negatives: bool = False) -> float:
47 | """Calculates average precision metric for list of predictions with confidences
48 |
49 | Args:
50 | predictions (List[Tuple[...]]): List of Tuples containing all predicted bboxes with scores and corrent/incorrect flag
51 | use_false_negatives (bool): Flag to use false negatives in metric
52 | Returns:
53 | float: average precision
54 | """
55 | predictions = np.array(predictions)
56 | if not use_false_negatives:
57 | predictions = predictions[predictions[:, 1] > 0]
58 | true_labels = predictions[:, 0].astype(int)
59 | scores = predictions[:, 1]
60 |
61 | # Corner cases, critical for sklearn realisation
62 | if len(predictions) == 0:
63 | return 0
64 | if len(predictions) == 1:
65 | return 1
66 |
67 | result = average_precision_score(true_labels, scores)
68 | return result
69 |
70 |
71 | def bbox_iou(predicted, target) -> np.ndarray:
72 | p_xmin, p_ymin, p_xmax, p_ymax = np.hsplit(predicted, 4)
73 | t_xmin, t_ymin, t_xmax, t_ymax = np.hsplit(target, 4)
74 |
75 | int_xmin = np.maximum(p_xmin, t_xmin.T)
76 | int_xmax = np.minimum(p_xmax, t_xmax.T)
77 | int_ymin = np.maximum(p_ymin, t_ymin.T)
78 | int_ymax = np.minimum(p_ymax, t_ymax.T)
79 |
80 | int_area = np.maximum(int_ymax - int_ymin, 0) \
81 | * np.maximum(int_xmax - int_xmin, 0)
82 |
83 | un_xmin = np.minimum(p_xmin, t_xmin.T)
84 | un_xmax = np.maximum(p_xmax, t_xmax.T)
85 | un_ymin = np.minimum(p_ymin, t_ymin.T)
86 | un_ymax = np.maximum(p_ymax, t_ymax.T)
87 |
88 | un_area = np.maximum(un_ymax - un_ymin, 0) \
89 | * np.maximum(un_xmax - un_xmin, 0)
90 |
91 | return int_area / un_area
92 |
93 |
94 | def image_stats(pred_bboxes, scores, gt_bboxes, thresholds, iou_threshold=.5):
95 | ious = bbox_iou(pred_bboxes, gt_bboxes)
96 |
97 | true_positives, false_positives = \
98 | image_positives_stats(ious, scores, thresholds, iou_threshold)
99 |
100 | false_negatives = image_false_negatives(ious, scores, thresholds,
101 | iou_threshold=iou_threshold)
102 |
103 | stats = np.hstack((true_positives, false_positives, false_negatives))
104 |
105 | return stats
106 |
107 |
108 | def image_positives_stats(
109 | ious: np.ndarray,
110 | scores,
111 | thresholds,
112 | iou_threshold
113 | ) -> Tuple[np.ndarray, np.ndarray]:
114 | pred_bbox_max_iou = np.max(ious, axis=1, initial=0)
115 |
116 | potential_tp = pred_bbox_max_iou >= iou_threshold
117 | potential_fp = ~potential_tp
118 |
119 | mask: np.ndarray = thresholds[:, np.newaxis] <= scores[np.newaxis, :]
120 | true_positives = mask.compress(potential_tp, axis=1).sum(axis=1)
121 | false_positives = mask.compress(potential_fp, axis=1).sum(axis=1)
122 |
123 | return true_positives, false_positives
124 |
125 |
126 | def image_false_negatives(
127 | ious: np.ndarray,
128 | scores,
129 | thresholds,
130 | iou_threshold
131 | ):
132 | n_pred, n_gt = ious.shape
133 |
134 | if n_gt == 0:
135 | return np.zeros(thresholds.shape)
136 |
137 | if len(thresholds) == 0 or n_pred == 0:
138 | return np.full(thresholds.shape, n_gt)
139 |
140 | gt_max_iou_idx = ious.argmax(axis=0)
141 |
142 | always_fn = \
143 | ious[gt_max_iou_idx, np.arange(len(gt_max_iou_idx))] < iou_threshold
144 |
145 | gt_bbox_max_iou_bbox_score = \
146 | scores.take(gt_max_iou_idx.compress(~always_fn))
147 | fn = (thresholds[:, np.newaxis]
148 | > gt_bbox_max_iou_bbox_score[np.newaxis, :]).sum(axis=1)
149 |
150 | return always_fn.sum() + fn
151 |
152 |
153 | def class_agnostic_mean_ap(
154 | pred_bboxes, pred_bbox_score, gt_bboxes,
155 | sort_scores=True, iou_threshold=0.9
156 | ):
157 | if len(pred_bboxes):
158 | pass
159 |
160 | thresholds = np.concatenate(pred_bbox_score)
161 | if sort_scores:
162 | thresholds = np.sort(thresholds)[::-1]
163 |
164 | per_item_stats = [
165 | image_stats(img_pred_bboxes.reshape(-1, 4), img_scores,
166 | img_gt_bboxes.reshape(-1, 4), thresholds, iou_threshold)
167 | for img_pred_bboxes, img_scores, img_gt_bboxes
168 | in zip(pred_bboxes, pred_bbox_score, gt_bboxes)
169 | ]
170 |
171 | tp, fp, fn = np.hsplit(np.sum(per_item_stats, axis=0), 3)
172 |
173 | all_real_positives = tp + fn
174 | all_real_positives[all_real_positives == 0] = 1
175 |
176 | recall = tp / all_real_positives
177 |
178 | all_pred_positives = tp + fp
179 | all_pred_positives[all_pred_positives == 0] = 1
180 |
181 | precision = tp / all_pred_positives
182 |
183 | precisions = []
184 | for recall_threshold in np.linspace(0, 1, 11):
185 | precisions.append(
186 | np.max(precision[recall <= recall_threshold], initial=0))
187 |
188 | mAP = np.mean(precisions) if len(precisions) > 0 else 0
189 |
190 | return mAP
191 |
--------------------------------------------------------------------------------
/src/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import ResnetCenterNet
2 |
--------------------------------------------------------------------------------
/src/models/centernet.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Callable
2 |
3 | import torch
4 | import torch.nn as nn
5 |
6 |
7 | class CenterNet(nn.Module):
8 | def __init__(
9 | self,
10 | num_classes: int,
11 | model_fn: Callable,
12 | down_ratio: int = 1,
13 | embedding_dim: int = 128,
14 | model_params: dict = None,
15 | backbone_key: str = None
16 | ):
17 | super().__init__()
18 | self.num_classes = num_classes
19 | self.embedding_dim = embedding_dim
20 |
21 | model_params = model_params or {}
22 | self.backbone = model_fn(**model_params)
23 | self.backbone_key = backbone_key
24 |
25 | self.down_sampler = nn.Conv2d(embedding_dim, embedding_dim, kernel_size=(3, 3),
26 | padding=1, stride=down_ratio, bias=True)
27 |
28 | self.head_heatmap = nn.Conv2d(embedding_dim, self.num_classes, kernel_size=(3, 3),
29 | padding=1, bias=True)
30 | self.head_heatmap.bias.data.fill_(-4.)
31 | self.head_width_height = nn.Conv2d(embedding_dim, 2, kernel_size=(3, 3), padding=1, bias=True)
32 | self.head_offset_regularizer = nn.Conv2d(embedding_dim, 2, kernel_size=(3, 3), padding=1, bias=True)
33 |
34 | def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]:
35 | value = self.backbone(x)
36 | if self.backbone_key is not None:
37 | value = value[self.backbone_key]
38 |
39 | features = torch.relu_(self.down_sampler(torch.relu_(value)))
40 |
41 | value = {
42 | "hm": self.head_heatmap(features),
43 | "wh": self.head_width_height(features),
44 | "reg": self.head_offset_regularizer(features),
45 | }
46 | return value
47 |
48 | def predict(self, x: torch.Tensor):
49 | """
50 | Method to trace
51 | """
52 | value = self.forward(x)
53 | return value["hm"], value["wh"], value["reg"]
54 |
55 |
56 | __all__ = ["CenterNet"]
57 |
--------------------------------------------------------------------------------
/src/models/resnet.py:
--------------------------------------------------------------------------------
1 | from catalyst.contrib.models import segmentation
2 |
3 | from .centernet import CenterNet
4 |
5 |
6 | class ResnetCenterNet(CenterNet):
7 | def __init__(
8 | self,
9 | num_classes: int,
10 | down_ratio: int = 1,
11 | embedding_dim: int = 128,
12 | arch: str = "ResnetFPNUnet",
13 | backbone_params: dict = None,
14 | ):
15 |
16 | model_fn = segmentation.__dict__[arch]
17 | backbone_params = backbone_params or {}
18 | model_params = {"num_classes": embedding_dim, **backbone_params}
19 | super().__init__(
20 | num_classes=num_classes,
21 | model_fn=model_fn,
22 | down_ratio=down_ratio,
23 | embedding_dim=embedding_dim,
24 | model_params=model_params
25 | )
26 |
--------------------------------------------------------------------------------
/src/transforms.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import albumentations as A
3 |
4 |
5 | BBOX_PARAMS = dict(
6 | format="pascal_voc",
7 | min_visibility=0.2,
8 | label_fields=["labels"],
9 | )
10 |
11 |
12 | def pre_transform(image_size: int = 512):
13 | result = [
14 | A.LongestMaxSize(image_size),
15 | A.PadIfNeeded(
16 | min_height=image_size,
17 | min_width=image_size,
18 | border_mode=cv2.BORDER_CONSTANT,
19 | value=(0, 0, 0)),
20 | ]
21 |
22 | return A.Compose(result, bbox_params=BBOX_PARAMS)
23 |
24 |
25 | def augmentations(image_size: int):
26 | channel_augs = [
27 | A.HueSaturationValue(p=0.5),
28 | A.ChannelShuffle(p=0.5),
29 | ]
30 |
31 | result = [
32 | # *pre_transform(image_size),
33 | A.OneOf([
34 | A.IAAAdditiveGaussianNoise(),
35 | A.GaussNoise(),
36 | ], p=0.5),
37 | A.OneOf([
38 | A.MotionBlur(blur_limit=3, p=0.7),
39 | A.MedianBlur(blur_limit=3, p=1.0),
40 | A.Blur(blur_limit=3, p=0.7),
41 | ], p=0.5),
42 | A.OneOf(channel_augs),
43 | A.OneOf([
44 | A.CLAHE(clip_limit=2),
45 | A.IAASharpen(),
46 | A.IAAEmboss(),
47 | ], p=0.5),
48 | A.RandomBrightnessContrast(
49 | brightness_limit=0.5,
50 | contrast_limit=0.5,
51 | p=0.5
52 | ),
53 | A.RandomGamma(p=0.5),
54 | A.OneOf([
55 | A.MedianBlur(p=0.5),
56 | A.MotionBlur(p=0.5)
57 | ]),
58 | A.RandomGamma(gamma_limit=(85, 115), p=0.5),
59 | ]
60 | return A.Compose(result, bbox_params=BBOX_PARAMS)
61 |
62 |
63 | def train_transform(image_size: int):
64 | result = A.Compose([
65 | *pre_transform(image_size),
66 | *augmentations(image_size),
67 | ], bbox_params=BBOX_PARAMS)
68 | return result
69 |
70 |
71 | def valid_transform(image_size: int):
72 | result = A.Compose([
73 | *pre_transform(image_size),
74 | ], bbox_params=BBOX_PARAMS)
75 | return result
76 |
77 |
78 | def infer_transform(image_size: int):
79 | result = A.Compose(pre_transform(image_size))
80 | return result
81 |
--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 |
5 | def detach(tensor: torch.Tensor) -> np.ndarray:
6 | return tensor.detach().cpu().numpy()
7 |
--------------------------------------------------------------------------------