├── .gitignore ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── configs └── centernet_detection_config.yml ├── requirements.txt └── src ├── __init__.py ├── callbacks.py ├── coco.py ├── data_preparation.py ├── dataset.py ├── debug.py ├── experiment.py ├── losses ├── __init__.py └── ctdet_loss.py ├── metrics.py ├── models ├── __init__.py ├── centernet.py └── resnet.py ├── transforms.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | data/ 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | env/ 14 | build/ 15 | builds/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # dotenv 86 | .env 87 | 88 | # virtualenv 89 | .venv 90 | venv/ 91 | ENV/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | 107 | 108 | .DS_Store 109 | .idea 110 | .code 111 | 112 | *.bak 113 | *.csv 114 | *.tsv 115 | *.ipynb 116 | 117 | tmp/ 118 | logs/ 119 | data/ 120 | !catalyst/data 121 | examples/data/ 122 | # Examples - mock data 123 | !examples/distilbert_text_classification/input/*.csv 124 | !examples/_tests_distilbert_text_classification/input/*.csv 125 | examples/logs/ 126 | notebooks/ 127 | 128 | _nogit* 129 | 130 | ### VisualStudioCode ### 131 | .vscode/* 132 | .vscode/settings.json 133 | !.vscode/tasks.json 134 | !.vscode/launch.json 135 | !.vscode/extensions.json 136 | 137 | ### VisualStudioCode Patch ### 138 | # Ignore all local history of files 139 | .history 140 | 141 | # End of https://www.gitignore.io/api/visualstudiocode 142 | 143 | 144 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG CATALYST_VERSION="19.11" 2 | 3 | # "-fp16" or "" 4 | ARG CATALYST_WITH_FP16="-fp16" 5 | 6 | FROM catalystteam/catalyst:${CATALYST_VERSION}${CATALYST_WITH_FP16} 7 | # Set up locale to prevent bugs with encoding 8 | ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 9 | 10 | COPY requirements.txt . 11 | RUN pip install -r requirements.txt --no-cache-dir && rm requirements.txt 12 | 13 | CMD mkdir -p /workspace 14 | WORKDIR /workspace 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2019 Sergey Kolesnikov. All rights reserved. 2 | 3 | Apache License 4 | Version 2.0, January 2004 5 | http://www.apache.org/licenses/ 6 | 7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 8 | 9 | 1. Definitions. 10 | 11 | "License" shall mean the terms and conditions for use, reproduction, 12 | and distribution as defined by Sections 1 through 9 of this document. 13 | 14 | "Licensor" shall mean the copyright owner or entity authorized by 15 | the copyright owner that is granting the License. 16 | 17 | "Legal Entity" shall mean the union of the acting entity and all 18 | other entities that control, are controlled by, or are under common 19 | control with that entity. For the purposes of this definition, 20 | "control" means (i) the power, direct or indirect, to cause the 21 | direction or management of such entity, whether by contract or 22 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 23 | outstanding shares, or (iii) beneficial ownership of such entity. 24 | 25 | "You" (or "Your") shall mean an individual or Legal Entity 26 | exercising permissions granted by this License. 27 | 28 | "Source" form shall mean the preferred form for making modifications, 29 | including but not limited to software source code, documentation 30 | source, and configuration files. 31 | 32 | "Object" form shall mean any form resulting from mechanical 33 | transformation or translation of a Source form, including but 34 | not limited to compiled object code, generated documentation, 35 | and conversions to other media types. 36 | 37 | "Work" shall mean the work of authorship, whether in Source or 38 | Object form, made available under the License, as indicated by a 39 | copyright notice that is included in or attached to the work 40 | (an example is provided in the Appendix below). 41 | 42 | "Derivative Works" shall mean any work, whether in Source or Object 43 | form, that is based on (or derived from) the Work and for which the 44 | editorial revisions, annotations, elaborations, or other modifications 45 | represent, as a whole, an original work of authorship. For the purposes 46 | of this License, Derivative Works shall not include works that remain 47 | separable from, or merely link (or bind by name) to the interfaces of, 48 | the Work and Derivative Works thereof. 49 | 50 | "Contribution" shall mean any work of authorship, including 51 | the original version of the Work and any modifications or additions 52 | to that Work or Derivative Works thereof, that is intentionally 53 | submitted to Licensor for inclusion in the Work by the copyright owner 54 | or by an individual or Legal Entity authorized to submit on behalf of 55 | the copyright owner. For the purposes of this definition, "submitted" 56 | means any form of electronic, verbal, or written communication sent 57 | to the Licensor or its representatives, including but not limited to 58 | communication on electronic mailing lists, source code control systems, 59 | and issue tracking systems that are managed by, or on behalf of, the 60 | Licensor for the purpose of discussing and improving the Work, but 61 | excluding communication that is conspicuously marked or otherwise 62 | designated in writing by the copyright owner as "Not a Contribution." 63 | 64 | "Contributor" shall mean Licensor and any individual or Legal Entity 65 | on behalf of whom a Contribution has been received by Licensor and 66 | subsequently incorporated within the Work. 67 | 68 | 2. Grant of Copyright License. Subject to the terms and conditions of 69 | this License, each Contributor hereby grants to You a perpetual, 70 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 71 | copyright license to reproduce, prepare Derivative Works of, 72 | publicly display, publicly perform, sublicense, and distribute the 73 | Work and such Derivative Works in Source or Object form. 74 | 75 | 3. Grant of Patent License. Subject to the terms and conditions of 76 | this License, each Contributor hereby grants to You a perpetual, 77 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 78 | (except as stated in this section) patent license to make, have made, 79 | use, offer to sell, sell, import, and otherwise transfer the Work, 80 | where such license applies only to those patent claims licensable 81 | by such Contributor that are necessarily infringed by their 82 | Contribution(s) alone or by combination of their Contribution(s) 83 | with the Work to which such Contribution(s) was submitted. If You 84 | institute patent litigation against any entity (including a 85 | cross-claim or counterclaim in a lawsuit) alleging that the Work 86 | or a Contribution incorporated within the Work constitutes direct 87 | or contributory patent infringement, then any patent licenses 88 | granted to You under this License for that Work shall terminate 89 | as of the date such litigation is filed. 90 | 91 | 4. Redistribution. You may reproduce and distribute copies of the 92 | Work or Derivative Works thereof in any medium, with or without 93 | modifications, and in Source or Object form, provided that You 94 | meet the following conditions: 95 | 96 | (a) You must give any other recipients of the Work or 97 | Derivative Works a copy of this License; and 98 | 99 | (b) You must cause any modified files to carry prominent notices 100 | stating that You changed the files; and 101 | 102 | (c) You must retain, in the Source form of any Derivative Works 103 | that You distribute, all copyright, patent, trademark, and 104 | attribution notices from the Source form of the Work, 105 | excluding those notices that do not pertain to any part of 106 | the Derivative Works; and 107 | 108 | (d) If the Work includes a "NOTICE" text file as part of its 109 | distribution, then any Derivative Works that You distribute must 110 | include a readable copy of the attribution notices contained 111 | within such NOTICE file, excluding those notices that do not 112 | pertain to any part of the Derivative Works, in at least one 113 | of the following places: within a NOTICE text file distributed 114 | as part of the Derivative Works; within the Source form or 115 | documentation, if provided along with the Derivative Works; or, 116 | within a display generated by the Derivative Works, if and 117 | wherever such third-party notices normally appear. The contents 118 | of the NOTICE file are for informational purposes only and 119 | do not modify the License. You may add Your own attribution 120 | notices within Derivative Works that You distribute, alongside 121 | or as an addendum to the NOTICE text from the Work, provided 122 | that such additional attribution notices cannot be construed 123 | as modifying the License. 124 | 125 | You may add Your own copyright statement to Your modifications and 126 | may provide additional or different license terms and conditions 127 | for use, reproduction, or distribution of Your modifications, or 128 | for any such Derivative Works as a whole, provided Your use, 129 | reproduction, and distribution of the Work otherwise complies with 130 | the conditions stated in this License. 131 | 132 | 5. Submission of Contributions. Unless You explicitly state otherwise, 133 | any Contribution intentionally submitted for inclusion in the Work 134 | by You to the Licensor shall be under the terms and conditions of 135 | this License, without any additional terms or conditions. 136 | Notwithstanding the above, nothing herein shall supersede or modify 137 | the terms of any separate license agreement you may have executed 138 | with Licensor regarding such Contributions. 139 | 140 | 6. Trademarks. This License does not grant permission to use the trade 141 | names, trademarks, service marks, or product names of the Licensor, 142 | except as required for reasonable and customary use in describing the 143 | origin of the Work and reproducing the content of the NOTICE file. 144 | 145 | 7. Disclaimer of Warranty. Unless required by applicable law or 146 | agreed to in writing, Licensor provides the Work (and each 147 | Contributor provides its Contributions) on an "AS IS" BASIS, 148 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 149 | implied, including, without limitation, any warranties or conditions 150 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 151 | PARTICULAR PURPOSE. You are solely responsible for determining the 152 | appropriateness of using or redistributing the Work and assume any 153 | risks associated with Your exercise of permissions under this License. 154 | 155 | 8. Limitation of Liability. In no event and under no legal theory, 156 | whether in tort (including negligence), contract, or otherwise, 157 | unless required by applicable law (such as deliberate and grossly 158 | negligent acts) or agreed to in writing, shall any Contributor be 159 | liable to You for damages, including any direct, indirect, special, 160 | incidental, or consequential damages of any character arising as a 161 | result of this License or out of the use or inability to use the 162 | Work (including but not limited to damages for loss of goodwill, 163 | work stoppage, computer failure or malfunction, or any and all 164 | other commercial damages or losses), even if such Contributor 165 | has been advised of the possibility of such damages. 166 | 167 | 9. Accepting Warranty or Additional Liability. While redistributing 168 | the Work or Derivative Works thereof, You may choose to offer, 169 | and charge a fee for, acceptance of support, warranty, indemnity, 170 | or other liability obligations and/or rights consistent with this 171 | License. However, in accepting such obligations, You may act only 172 | on Your own behalf and on Your sole responsibility, not on behalf 173 | of any other Contributor, and only if You agree to indemnify, 174 | defend, and hold each Contributor harmless for any liability 175 | incurred by, or claims asserted against, such Contributor by reason 176 | of your accepting any such warranty or additional liability. 177 | 178 | END OF TERMS AND CONDITIONS 179 | 180 | APPENDIX: How to apply the Apache License to your work. 181 | 182 | To apply the Apache License to your work, attach the following 183 | boilerplate notice, with the fields enclosed by brackets "[]" 184 | replaced with your own identifying information. (Don't include 185 | the brackets!) The text should be enclosed in the appropriate 186 | comment syntax for the file format. We also recommend that a 187 | file or class name and description of purpose be included on the 188 | same "printed page" as the copyright notice for easier 189 | identification within third-party archives. 190 | 191 | Copyright [yyyy] [name of copyright owner] 192 | 193 | Licensed under the Apache License, Version 2.0 (the "License"); 194 | you may not use this file except in compliance with the License. 195 | You may obtain a copy of the License at 196 | 197 | http://www.apache.org/licenses/LICENSE-2.0 198 | 199 | Unless required by applicable law or agreed to in writing, software 200 | distributed under the License is distributed on an "AS IS" BASIS, 201 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 202 | See the License for the specific language governing permissions and 203 | limitations under the License. 204 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: docker clean 2 | 3 | docker: ./requirements.txt 4 | docker build -t catalyst-detection:latest . -f ./Dockerfile --no-cache 5 | 6 | clean: 7 | rm -rf build/ 8 | docker rmi -f catalyst-detection:latest 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | [![Catalyst logo](https://raw.githubusercontent.com/catalyst-team/catalyst-pics/master/pics/catalyst_logo.png)](https://github.com/catalyst-team/catalyst) 4 | 5 | **Accelerated DL R&D** 6 | 7 | [![Build Status](http://66.248.205.49:8111/app/rest/builds/buildType:id:Catalyst_Deploy/statusIcon.svg)](http://66.248.205.49:8111/project.html?projectId=Catalyst&tab=projectOverview&guest=1) 8 | [![CodeFactor](https://www.codefactor.io/repository/github/catalyst-team/catalyst/badge)](https://www.codefactor.io/repository/github/catalyst-team/catalyst) 9 | [![Pipi version](https://img.shields.io/pypi/v/catalyst.svg)](https://pypi.org/project/catalyst/) 10 | [![Docs](https://img.shields.io/badge/dynamic/json.svg?label=docs&url=https%3A%2F%2Fpypi.org%2Fpypi%2Fcatalyst%2Fjson&query=%24.info.version&colorB=brightgreen&prefix=v)](https://catalyst-team.github.io/catalyst/index.html) 11 | [![PyPI Status](https://pepy.tech/badge/catalyst)](https://pepy.tech/project/catalyst) 12 | 13 | [![Twitter](https://img.shields.io/badge/news-twitter-499feb)](https://twitter.com/CatalystTeam) 14 | [![Telegram](https://img.shields.io/badge/channel-telegram-blue)](https://t.me/catalyst_team) 15 | [![Slack](https://img.shields.io/badge/Catalyst-slack-success)](https://join.slack.com/t/catalyst-team-devs/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw) 16 | [![Github contributors](https://img.shields.io/github/contributors/catalyst-team/catalyst.svg?logo=github&logoColor=white)](https://github.com/catalyst-team/catalyst/graphs/contributors) 17 | 18 |
19 | 20 | PyTorch framework for Deep Learning research and development. 21 | It was developed with a focus on reproducibility, 22 | fast experimentation and code/ideas reusing. 23 | Being able to research/develop something new, 24 | rather than write another regular train loop.
25 | Break the cycle - use the Catalyst! 26 | 27 | Project [manifest](https://github.com/catalyst-team/catalyst/blob/master/MANIFEST.md). Part of [PyTorch Ecosystem](https://pytorch.org/ecosystem/). Part of [Catalyst Ecosystem](https://docs.google.com/presentation/d/1D-yhVOg6OXzjo9K_-IS5vSHLPIUxp1PEkFGnpRcNCNU/edit?usp=sharing): 28 | - [Alchemy](https://github.com/catalyst-team/alchemy) - Experiments logging & visualization 29 | - [Catalyst](https://github.com/catalyst-team/catalyst) - Accelerated Deep Learning Research and Development 30 | - [Reaction](https://github.com/catalyst-team/reaction) - Convenient Deep Learning models serving 31 | 32 | [Catalyst at AI Landscape](https://landscape.lfai.foundation/selected=catalyst). 33 | 34 | --- 35 | 36 | # Catalyst.Detection [![Build Status](https://travis-ci.com/catalyst-team/detection.svg?branch=master)](https://travis-ci.com/catalyst-team/detection) [![Github contributors](https://img.shields.io/github/contributors/catalyst-team/detection.svg?logo=github&logoColor=white)](https://github.com/catalyst-team/detection/graphs/contributors) 37 | 38 | > *Note: this repo uses advanced Catalyst Config API and could be a bit out-of-day right now. 39 | > Use [Catalyst's minimal examples section](https://github.com/catalyst-team/catalyst#minimal-examples) for a starting point and up-to-day use cases, please.* 40 | 41 | Based on [Objects as points](https://arxiv.org/abs/1904.07850) article by [Xingyi Zhou](https://arxiv.org/search/cs?searchtype=author&query=Zhou%2C+X), [Dequan Wang](https://arxiv.org/search/cs?searchtype=author&query=Wang%2C+D), [Philipp Krähenbühl](https://arxiv.org/search/cs?searchtype=author&query=Kr%C3%A4henb%C3%BChl%2C+P) 42 | 43 | ### Training in your dataset 44 | 0. Install requirements ```pip install -r requirements.txt``` 45 | 46 | 1. Copy all images to one directory or two different directories for train and validation. 47 | 48 | 1. Create ```markup_train.json``` as json file in MSCOCO format using ```COCODetectionFactory``` from ```data_preparation.py```. This class may be copied to your dataset generator. See documentation in code comments. If your dataset are already in this format, go to next step. 49 | 50 | 1. Specify perameters and in ```config/centernet_detection_config.yml```. 51 | 52 | 1. Run catalyst ```catalyst-dl run --config=./configs/centernet_detection_config.yml``` 53 | 54 | 1. When you change dataset, you must delete cache files ```markup_*.json.cache``` because this files contain preprocessed bounding boxes info. 55 | -------------------------------------------------------------------------------- /configs/centernet_detection_config.yml: -------------------------------------------------------------------------------- 1 | shared: 2 | classes: &classes ["person"] 3 | num_classes: &num_classes 1 4 | 5 | image_size: &image_size [224, 224] 6 | down_ratio: &down_ratio 4 # (height of input image / height of predicted heatmap) 7 | max_objs: &max_objs 15 # max objects detected per image, passed to DetectorCallback 8 | 9 | num_epochs: &num_epochs 200 10 | lr: &lr 0.001 11 | weight_decay: &wd 0.0001 12 | 13 | hm_weight: &hm_weight 1.0 14 | wh_weight: &wh_weight 10.0 15 | off_weight: &off_weight 10.0 16 | 17 | model_params: 18 | model: ResnetCenterNet 19 | num_classes: *num_classes 20 | embedding_dim: 128 21 | arch: "ResnetFPNUnet" 22 | down_ratio: *down_ratio 23 | backbone_params: 24 | arch: resnet18 25 | pretrained: true 26 | 27 | runner_params: 28 | input_key: "input" 29 | output_key: null 30 | 31 | args: 32 | expdir: src 33 | logdir: logs 34 | 35 | stages: 36 | state_params: 37 | main_metric: &main_metric "loss" 38 | minimize_metric: &minimize_metric true 39 | 40 | data_params: 41 | num_workers: 0 42 | batch_size: 5 43 | max_objs: *max_objs 44 | down_ratio: *down_ratio 45 | 46 | # default values, will be used if something aren't specified 47 | annotation_file: ./data/annotation.json 48 | images_dir: ./data/images 49 | 50 | # You may specify next parameters, data source will be overwritten 51 | #train_annotation_file: ./data_train/annotation.json 52 | #valid_annotation_file: ./data_valid/annotation.json 53 | #train_images_dir: ./data_train/images/ 54 | #valid_images_dir: ./data_valid/images 55 | 56 | num_classes: *num_classes 57 | image_size: *image_size 58 | 59 | sampler_params: 60 | drop_last: true 61 | shuffle: per_epoch 62 | 63 | criterion_params: 64 | _key_value: True 65 | 66 | l_hm: 67 | criterion: CenterNetDetectionLoss 68 | l1_wh: 69 | criterion: RegL1Loss 70 | l1_reg: 71 | criterion: RegL1Loss 72 | 73 | scheduler_params: 74 | scheduler: MultiStepLR 75 | milestones: [12, 40] 76 | gamma: 0.8 77 | 78 | stage1: 79 | state_params: 80 | num_epochs: *num_epochs 81 | 82 | optimizer_params: 83 | optimizer: Lookahead 84 | base_optimizer_params: 85 | optimizer: RAdam 86 | lr: *lr 87 | weight_decay: *wd 88 | no_bias_weight_decay: True 89 | 90 | callbacks_params: 91 | loss_hm: 92 | callback: CriterionCallback 93 | input_key: hm 94 | output_key: hm 95 | prefix: loss_hm 96 | criterion_key: l_hm 97 | multiplier: *hm_weight 98 | 99 | loss_wh: 100 | callback: CriterionCallback 101 | input_key: 102 | reg_mask: targets_mask_key 103 | ind: targets_ind_key 104 | wh: targets_key 105 | output_key: 106 | wh: outputs_key 107 | prefix: loss_wh 108 | criterion_key: l1_wh 109 | multiplier: *wh_weight 110 | 111 | loss_reg: 112 | callback: CriterionCallback 113 | input_key: 114 | reg_mask: targets_mask_key 115 | ind: targets_ind_key 116 | reg: targets_key 117 | output_key: 118 | reg: outputs_key 119 | prefix: loss_reg 120 | criterion_key: l1_reg 121 | multiplier: *off_weight 122 | 123 | loss_aggregator: 124 | callback: CriterionAggregatorCallback 125 | prefix: &aggregated_loss loss 126 | loss_keys: ["loss_hm", "loss_wh", "loss_reg"] 127 | loss_aggregate_fn: "sum" 128 | multiplier: 1.0 129 | 130 | optimizer: 131 | callback: OptimizerCallback 132 | grad_clip_params: 133 | func: clip_grad_value_ 134 | clip_value: 5.0 135 | loss_key: *aggregated_loss 136 | 137 | scheduler: 138 | callback: SchedulerCallback 139 | reduce_metric: *main_metric 140 | 141 | decoder: 142 | callback: DecoderCallback 143 | down_ratio: *down_ratio 144 | max_objs: *max_objs 145 | 146 | # mAP: 147 | # callback: MeanAPCallback 148 | # num_classes: *num_classes 149 | 150 | saver: 151 | callback: CheckpointCallback 152 | save_n_best: 3 153 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | albumentations==0.2.3 2 | packaging==19.2 3 | numpy==1.17.4 4 | pycocotools==2.0.0 5 | torch==1.3.0 6 | catalyst==20.1 -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # from .runner import Runner 3 | from catalyst.dl import SupervisedRunner as Runner 4 | from catalyst.dl import registry 5 | 6 | from .experiment import Experiment 7 | 8 | from .callbacks import DecoderCallback, MeanAPCallback 9 | from .losses import CenterNetDetectionLoss, \ 10 | RegL1Loss, MSEIndLoss, BCEIndLoss, FocalIndLoss 11 | from . import models 12 | 13 | 14 | registry.Criterion(CenterNetDetectionLoss) 15 | registry.Criterion(RegL1Loss) 16 | registry.Criterion(MSEIndLoss) 17 | registry.Criterion(BCEIndLoss) 18 | registry.Criterion(FocalIndLoss) 19 | 20 | registry.Callback(DecoderCallback) 21 | registry.Callback(MeanAPCallback) 22 | 23 | registry.MODELS.add_from_module(models) 24 | -------------------------------------------------------------------------------- /src/callbacks.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict 2 | 3 | import numpy as np 4 | from catalyst.dl import Callback, RunnerState, CallbackOrder, CriterionCallback 5 | from catalyst.utils import detach 6 | 7 | from .losses.ctdet_loss import decode_centernet_predictions 8 | from .metrics import class_agnostic_mean_ap, calculate_map, construct_mAP_list_from_bboxes 9 | 10 | 11 | class DecoderCallback(Callback): 12 | def __init__(self, down_ratio: int = 1, max_objs: int = 80): 13 | super().__init__(order=CallbackOrder.Metric - 1) 14 | self.down_ratio = down_ratio 15 | self.max_objs = max_objs 16 | 17 | def on_batch_end(self, state: RunnerState): 18 | if state.loader_name.startswith("valid"): 19 | detections = decode_centernet_predictions( 20 | state.output["hm"], 21 | state.output["wh"], 22 | state.output["reg"], 23 | K=self.max_objs 24 | ) 25 | detections = detach(detections).reshape( 26 | (detections.shape[0], -1, detections.shape[2]) 27 | ) 28 | detections[:, :, :4] *= self.down_ratio 29 | 30 | bboxes = detections[:, :, :4].astype(int) 31 | scores = detections[:, :, 4] 32 | labels = detections[:, :, 5].astype(int) 33 | 34 | result = dict( 35 | bboxes=bboxes, 36 | labels=labels, 37 | scores=scores, 38 | ) 39 | state.output.update(result) 40 | 41 | 42 | class MeanAPCallback(Callback): 43 | def __init__( 44 | self, 45 | num_classes: int = None, 46 | prefix: str = "mAP", 47 | bboxes_key: str = "bboxes", 48 | scores_key: str = "scores", 49 | labels_key: str = "labels", 50 | iou_threshold: float = 0.9 51 | ): 52 | super().__init__(order=CallbackOrder.Metric) 53 | self.prefix = prefix 54 | self.classes = list(range(num_classes)) 55 | self.mean_mAP = [] 56 | 57 | self.bboxes_key = bboxes_key 58 | self.scores_key = scores_key 59 | self.labels_key = labels_key 60 | # List (dictionary value) contains of pairs of correct/not correct bboxes and model confidence by class 61 | self.classes_predictions: Dict[str, List[(bool, float)]] = {c: [] for c in range(num_classes)} 62 | self.iou_threshold = iou_threshold 63 | 64 | def on_batch_end(self, state: RunnerState): 65 | if state.loader_name.startswith("valid"): 66 | bboxes = state.output[self.bboxes_key] 67 | scores = state.output[self.scores_key] 68 | labels = state.output[self.labels_key] 69 | 70 | gt_bboxes = [ 71 | np.array(item_bboxes.detach().cpu()) 72 | for item_bboxes in state.input[self.bboxes_key]] 73 | gt_labels = [ 74 | np.array(item_label.detach().cpu()) 75 | for item_label in state.input[self.labels_key] 76 | ] 77 | 78 | for i, _class in enumerate(self.classes): 79 | predict_bboxes_batch = [] 80 | predict_scores_batch = [] 81 | 82 | target_bboxes_batch = [] 83 | for batch_elem in zip(bboxes, scores, labels, gt_bboxes, gt_labels): 84 | bboxes_, scores_, labels_, gt_bboxes_, gt_labels_ = batch_elem 85 | 86 | bboxes_ = bboxes_[scores_ > 0] 87 | labels_ = labels_[scores_ > 0] 88 | scores_ = scores_[scores_ > 0] 89 | 90 | mask = (labels_ == i) 91 | predict_bboxes_batch.append(bboxes_[mask]) 92 | predict_scores_batch.append(scores_[mask]) 93 | 94 | gt_mask = gt_labels_ == i 95 | target_bboxes_batch.append(gt_bboxes_[gt_mask]) 96 | 97 | if len(predict_bboxes_batch) != 0: 98 | per_box_correctness = [ 99 | construct_mAP_list_from_bboxes(img_pred_bboxes.reshape(-1, 4), img_scores, 100 | img_gt_bboxes.reshape(-1, 4), self.iou_threshold) 101 | for img_pred_bboxes, img_scores, img_gt_bboxes 102 | in zip(predict_bboxes_batch, predict_scores_batch, target_bboxes_batch) 103 | ] 104 | for answers in per_box_correctness: 105 | self.classes_predictions[_class].extend(answers) 106 | 107 | mean_value = class_agnostic_mean_ap(bboxes, scores, gt_bboxes) 108 | self.mean_mAP.append(mean_value) 109 | 110 | def on_loader_end(self, state: RunnerState): 111 | if state.loader_name.startswith("valid"): 112 | all_predictions = [] 113 | for class_name, predictions in self.classes_predictions.items(): 114 | # metric_name = f"{self.prefix}/{class_name}" 115 | # mAP = calculate_map(predictions) 116 | # state.metrics.epoch_values[state.loader_name][metric_name] = mAP 117 | all_predictions.extend(predictions) 118 | 119 | # mean_AP = calculate_map(all_predictions) 120 | # state.metrics.epoch_values[state.loader_name][f'{self.prefix}/_mean'] = mean_AP 121 | 122 | ap_with_false_negatives = calculate_map(all_predictions, use_false_negatives=True) 123 | state.metrics.epoch_values[state.loader_name][f'{self.prefix}/_mean_with_fn'] = ap_with_false_negatives 124 | 125 | # old mAP 126 | # state.metrics.epoch_values[state.loader_name][f'{self.prefix}/_mean_old'] = np.mean(self.mean_mAP) 127 | self.mean_mAP = [] 128 | self.classes_predictions: Dict[str, List[(bool, float)]] = {c: [] for c in self.classes} 129 | 130 | 131 | __all__ = ["DecoderCallback", "MeanAPCallback"] 132 | -------------------------------------------------------------------------------- /src/coco.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | import pickle 5 | from typing import Any 6 | 7 | from pycocotools.coco import COCO 8 | from torch.utils.data import Dataset 9 | 10 | 11 | class DetectionMSCOCODataset(Dataset): 12 | def __init__(self, annotation_file: str, image_dir: str): 13 | 14 | self._annotation_file = annotation_file 15 | self._image_dir = image_dir 16 | self._cache_file = self._annotation_file + ".cache" 17 | 18 | self._coco = COCO(self._annotation_file) 19 | 20 | self._img_ids = self._coco.getImgIds() 21 | self._cat_ids = self._coco.getCatIds() 22 | self._ann_ids = self._coco.getAnnIds() 23 | 24 | self._data = "coco" 25 | self._classes = { 26 | ind: cat_id for ind, cat_id in enumerate(self._cat_ids) 27 | } 28 | self._coco_to_class_map = { 29 | value: key for key, value in self._classes.items() 30 | } 31 | 32 | self._load_data() 33 | self._db_inds = np.arange(len(self._image_names)) 34 | 35 | self._load_coco_data() 36 | 37 | def _load_data(self): 38 | print("loading from cache file: {}".format(self._cache_file)) 39 | if not os.path.exists(self._cache_file): 40 | print("No cache file found...") 41 | self._extract_data() 42 | with open(self._cache_file, "wb") as f: 43 | pickle.dump([self._detections, self._image_names], f) 44 | print("Cache file created") 45 | else: 46 | with open(self._cache_file, "rb") as f: 47 | self._detections, self._image_names = pickle.load(f) 48 | 49 | def _load_coco_data(self): 50 | with open(self._annotation_file, "r") as f: 51 | data = json.load(f) 52 | 53 | coco_ids = self._coco.getImgIds() 54 | eval_ids = { 55 | self._coco.loadImgs(coco_id)[0]["file_name"]: coco_id 56 | for coco_id in coco_ids 57 | } 58 | 59 | self._coco_categories = data["categories"] 60 | self._coco_eval_ids = eval_ids 61 | 62 | def class_name(self, cid): 63 | cat_id = self._classes[cid] 64 | cat = self._coco.loadCats([cat_id])[0] 65 | return cat["name"] 66 | 67 | def _extract_data(self): 68 | 69 | self._image_names = [ 70 | self._coco.loadImgs(img_id)[0]["file_name"] 71 | for img_id in self._img_ids 72 | ] 73 | self._detections = {} 74 | for ind, (coco_image_id, image_name) in enumerate(zip(self._img_ids, self._image_names)): 75 | image = self._coco.loadImgs(coco_image_id)[0] 76 | bboxes = [] 77 | categories = [] 78 | 79 | for cat_id in self._cat_ids: 80 | annotation_ids = self._coco.getAnnIds(imgIds=image["id"], catIds=cat_id) 81 | annotations = self._coco.loadAnns(annotation_ids) 82 | category = self._coco_to_class_map[cat_id] 83 | for annotation in annotations: 84 | bbox = np.array(annotation["bbox"]) 85 | bbox[[2, 3]] += bbox[[0, 1]] 86 | bboxes.append(bbox) 87 | 88 | categories.append(category) 89 | 90 | self._detections[image_name] = [{ 91 | 'bbox': bbox.astype(np.float32), 92 | 'category_id': category, 93 | 'category_name': self.class_name(category) 94 | } for bbox, category in zip(bboxes, categories)] 95 | 96 | def __getitem__(self, ind: int) -> Any: 97 | image_name = self._image_names[ind] 98 | 99 | return { 100 | 'image_name': os.path.join(self._image_dir, image_name), 101 | 'detections': self._detections[image_name] 102 | } 103 | 104 | def __len__(self) -> int: 105 | return len(self._img_ids) 106 | 107 | def get_num_classes(self) -> int: 108 | return len(self._cat_ids) 109 | -------------------------------------------------------------------------------- /src/data_preparation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import json 4 | 5 | from typing import Any, List, Optional, Dict, Union 6 | 7 | 8 | class _UniqueIdsController: 9 | def __init__(self): 10 | self._ids = set() 11 | self._last_id = -1 12 | 13 | def has(self, object_id: int) -> bool: 14 | return object_id in self._ids 15 | 16 | def get_new_id(self) -> int: 17 | self._last_id += 1 18 | return self._last_id 19 | 20 | def force_add_id(self, object_id: int) -> bool: 21 | if object_id in self._ids: 22 | return False 23 | else: 24 | self._last_id = max(self._last_id, object_id) 25 | self._ids.add(object_id) 26 | return True 27 | 28 | 29 | class COCODetectionFactory: 30 | """ 31 | # Create dataset 32 | dataset = COCODetectionFactory() 33 | 34 | # set info to dataset 35 | dataset.set_info(description='my_dataset', url='http://localhost.com', 36 | version='0.0.1', year=2019, contributor='Avi2011class', 37 | date_created='20.12.2019') 38 | 39 | # add interesting licenses 40 | dataset.add_licence(name='GPLv3', url='https://en.wikipedia.org/wiki/GPL_License') 41 | dataset.add_licence(name='MIT', url='https://en.wikipedia.org/wiki/MIT_License') 42 | 43 | # add categories of objects 44 | dataset.add_category(category_id=10, name='man', supercategory='person') # with fixed id, 10 is returned 45 | dataset.add_category(name='woman', supercategory='person') # with auto selected id, 11 is returned 46 | dataset.add_category(category_id=5, name='child', supercategory='person') # with fixed id, 5 is returned 47 | # dataset.add_category(category_id=5, name='dog', supercategory='not person') will raises RuntimeError, 48 | # id=11 already exists 49 | 50 | # add images to dataset 51 | dataset.add_image(image_id=0, file_name='photo_01.jpg') # with fixed id, 0 is returned 52 | dataset.add_image(file_name='photo_02.jpg') # without fixed id, 1 is returned 53 | # dataset.add_image(image_id=1, file_name='photo_01.jpg') will raise RuntimeError, id is already exists 54 | 55 | # Adds annotations to dataset 56 | 57 | # add annotation to image with image_id=0 and category_id=0 58 | dataset.add_bbox(bbox_left=0, bbox_height=10, bbox_top=0, bbox_width=0, image_id=0, category_id=11) 59 | 60 | # add annotation to image with auto found image_id=1 and auto found category_id=10 61 | dataset.add_bbox(bbox_left=0, bbox_height=10, bbox_top=0, bbox_width=0, 62 | image_file_name='photo_02.jpg', category_name='man') 63 | 64 | # add annotation to image with image_id=0 and category_id=102, category=102 may be created in future 65 | dataset.add_bbox(bbox_left=0, bbox_height=10, bbox_top=0, bbox_width=0, image_id=0, category_id=102) 66 | 67 | # raises RuntimeError because category_name doesn't exists 68 | # dataset.add_bbox(bbox_left=0, bbox_height=10, bbox_top=0, bbox_width=0, image_id=0, category_name='blabla') 69 | 70 | # save dataset to annotation file 71 | annotation_file_content = dataset.get_json() 72 | with open('annotation_file.json', 'w') as f: 73 | f.write(annotation_file_content) 74 | """ 75 | 76 | def __init__(self): 77 | self._output = { 78 | "info": {}, 79 | "licenses": [], 80 | "categories": [], 81 | "images": [], 82 | "annotations": [] 83 | } 84 | self._licence_ids = _UniqueIdsController() 85 | self._image_ids = _UniqueIdsController() 86 | self._category_ids = _UniqueIdsController() 87 | self._annotation_ids = _UniqueIdsController() 88 | 89 | self._category_names_to_idx = {} 90 | self._image_names_to_idx = {} 91 | self._annotation_names_to_idx = {} 92 | 93 | def set_info(self, 94 | description: str = "", 95 | url: str = "", 96 | version: str = "", 97 | year: Union[str, int] = "", 98 | contributor: str = "", 99 | date_created: str = ""): 100 | """Set information in mscoco format 101 | Args: 102 | description (str): dataset description 103 | url (str): dataset url 104 | version (str): dataset version 105 | year (Union[str, int]): dataset year 106 | contributor (str): contribution info 107 | date_created (str): date 108 | 109 | Return: 110 | COCODetectionFactory: reference to current COCODetectionFactory object 111 | """ 112 | 113 | self._output['info'] = { 114 | 'description': description, 115 | 'url': url, 116 | 'version': version, 117 | 'year': year, 118 | 'contributor': contributor, 119 | 'date_created': date_created 120 | } 121 | 122 | return self 123 | 124 | def add_licence(self, 125 | licence_id: Optional[int] = None, 126 | name: str = "", 127 | url: str = "") -> Any: 128 | """Adds license to dataset, dataset may contain more then one license, it will be stored as list 129 | 130 | Args: 131 | licence_id (optional[int]): id of license, must be unique, if None random unique value will be used 132 | name (str): name of license 133 | url(str): url to license 134 | 135 | Returns: 136 | int: id of license 137 | """ 138 | 139 | if licence_id is None: 140 | licence_id = self._licence_ids.get_new_id() 141 | elif self._licence_ids.has(licence_id): 142 | raise RuntimeError('License ids must be unique, but \"{}\" already exists'.format(licence_id)) 143 | 144 | self._output['licenses'].append({ 145 | 'id': licence_id, 146 | 'name': name, 147 | 'url': url 148 | }) 149 | return licence_id 150 | 151 | def add_category(self, 152 | category_id: Optional[int] = None, 153 | name: str = "", 154 | supercategory: str = ""): 155 | """ Adds category to dataset 156 | 157 | Args: 158 | category_id (int): id of category, must be unique, if None random unique value will be used 159 | name (str): name of category, must be unique for dataset 160 | supercategory (str): name of supercategory 161 | Returns: 162 | int: id of category 163 | """ 164 | if category_id is None: 165 | category_id = self._category_ids.get_new_id() 166 | elif self._category_ids.has(category_id): 167 | raise RuntimeError('Category ids must be unique, but \"{}\" already exists'.format(category_id)) 168 | 169 | if name in self._image_names_to_idx: 170 | raise RuntimeError('Category names must be unique, but \"{}\" already exists'.format(name)) 171 | self._category_names_to_idx[name] = category_id 172 | 173 | self._output['categories'].append({ 174 | 'id': category_id, 175 | 'name': name, 176 | 'supercategory': supercategory 177 | }) 178 | return category_id 179 | 180 | def add_image(self, 181 | image_id: Optional[int] = None, 182 | file_name: str = "", 183 | height: Optional[int] = None, 184 | width: Optional[int] = None) -> Any: 185 | """ Adds image to dataset 186 | 187 | Args: 188 | image_id (Optional[int]): id of image, must be unique, if None random unique value will be used 189 | file_name (str): filename where image stored, must be unique for dataset 190 | height (Optional[int]): height of image 191 | width (optional[int]): width of image 192 | Returns: 193 | int: id of image 194 | """ 195 | 196 | if image_id is None: 197 | image_id = self._image_ids.get_new_id() 198 | elif self._category_ids.has(image_id): 199 | raise RuntimeError('Image ids must be unique, but \"{}\" already exists'.format(category_id)) 200 | 201 | if file_name in self._image_names_to_idx: 202 | raise RuntimeError('Image file names must be unique, but \"{}\" already exists'.format(name)) 203 | self._image_names_to_idx[file_name] = image_id 204 | 205 | self._output['images'].append({ 206 | 'id': image_id, 207 | 'file_name': file_name, 208 | 'height': height if height is not None else -1, 209 | 'width': width if width is not None else -1 210 | }) 211 | 212 | return image_id 213 | 214 | def add_bbox(self, 215 | bbox_left: int = 0, 216 | bbox_top: int = 0, 217 | bbox_width: int = 0, 218 | bbox_height: int = 0, 219 | image_id: Optional[int] = None, 220 | image_file_name: Optional[str] = None, 221 | category_id: Optional[int] = None, 222 | category_name: Optional[str] = None, 223 | iscrowd: bool = False): 224 | """Adds bounding box to image in dataset 225 | 226 | One of image_id and image_file_name must be specified. If both image_id will be used 227 | One of category_id and category_name must be specified. If both category_id will be used 228 | 229 | Args: 230 | bbox_left (int): 231 | bbox_top (int): 232 | bbox_width (int): 233 | bbox_height (int): 234 | image_id (Optional[int]): if None may be computed from image_file_name. Image_id may be unknown 235 | image_file_name (Optional[str]): None or name of file added to dataset 236 | category_id (Optional[int]): if None may be computed from category_name. Category_id may be unknown 237 | category_name (Optional[str]): None or name of category added to dataset 238 | iscrowd (bool): 239 | Returns: 240 | int: id of bbox 241 | """ 242 | 243 | if image_id is None and image_file_name is None: 244 | raise RuntimeError("One of image_id and image_file_name must be specified") 245 | 246 | if image_id is None: 247 | if image_file_name in self._image_names_to_idx: 248 | image_id = self._image_names_to_idx[image_file_name] 249 | else: 250 | raise RuntimeError("Unknown image file name \"{}\"".format(image_file_name)) 251 | 252 | if category_id is None and category_name is None: 253 | raise RuntimeError("One of category_id and category_name must be specified") 254 | 255 | if category_id is None: 256 | if category_name in self._category_names_to_idx: 257 | category_id = self._category_names_to_idx[category_name] 258 | else: 259 | raise RuntimeError("Unknown category name \"{}\"".format(category_name)) 260 | 261 | new_id = self._annotation_ids.get_new_id() 262 | self._output['annotations'].append({ 263 | 'id': new_id, 264 | 'image_id': image_id, 265 | 'category_id': category_id, 266 | 'segmentation': [], 267 | 'area': 0, 268 | 'bbox': [bbox_left, bbox_top, bbox_width, bbox_height], 269 | 'iscrowd': iscrowd, 270 | }) 271 | return new_id 272 | 273 | def get_dict(self) -> Dict[str, Any]: 274 | """ 275 | Args: 276 | 277 | Returns: 278 | Dict[str, Any]: dict in mscoco format 279 | """ 280 | return self._output 281 | 282 | def get_json(self, **kwargs) -> str: 283 | """ 284 | Args: 285 | **kwargs: passed to json.dumps 286 | Returns: 287 | str: dataset in json format 288 | """ 289 | return json.dumps(self._output, **kwargs) 290 | -------------------------------------------------------------------------------- /src/dataset.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional, Any, Tuple 2 | 3 | import cv2 4 | import math 5 | import torch 6 | from torch.utils.data import Dataset 7 | import numpy as np 8 | 9 | from .coco import DetectionMSCOCODataset 10 | from catalyst import utils 11 | 12 | cv2.setNumThreads(1) 13 | cv2.ocl.setUseOpenCL(False) 14 | 15 | 16 | def get_affine_transform( 17 | center, 18 | scale, 19 | rot, 20 | output_size, 21 | shift=np.array([0, 0], dtype=np.float32), 22 | inv=0 23 | ): 24 | if not isinstance(scale, np.ndarray) and not isinstance(scale, list): 25 | scale = np.array([scale, scale], dtype=np.float32) 26 | 27 | scale_tmp = scale 28 | src_w = scale_tmp[0] 29 | dst_w = output_size[0] 30 | dst_h = output_size[1] 31 | 32 | rot_rad = np.pi * rot / 180 33 | src_dir = get_dir([0, src_w * -0.5], rot_rad) 34 | dst_dir = np.array([0, dst_w * -0.5], np.float32) 35 | 36 | src = np.zeros((3, 2), dtype=np.float32) 37 | dst = np.zeros((3, 2), dtype=np.float32) 38 | src[0, :] = center + scale_tmp * shift 39 | src[1, :] = center + src_dir + scale_tmp * shift 40 | dst[0, :] = [dst_w * 0.5, dst_h * 0.5] 41 | dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir 42 | 43 | src[2:, :] = get_3rd_point(src[0, :], src[1, :]) 44 | dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) 45 | 46 | if inv: 47 | trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) 48 | else: 49 | trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) 50 | 51 | return trans 52 | 53 | 54 | def affine_transform(point: np.array, transform_matrix: np.array) -> np.array: 55 | new_pt = np.array([point[0], point[1], 1.], dtype=np.float32).T 56 | new_pt = np.dot(transform_matrix, new_pt) 57 | return new_pt[:2] 58 | 59 | 60 | def get_3rd_point(a, b): 61 | direct = a - b 62 | return b + np.array([-direct[1], direct[0]], dtype=np.float32) 63 | 64 | 65 | def get_dir(src_point, rot_rad): 66 | sn, cs = np.sin(rot_rad), np.cos(rot_rad) 67 | 68 | src_result = [0, 0] 69 | src_result[0] = src_point[0] * cs - src_point[1] * sn 70 | src_result[1] = src_point[0] * sn + src_point[1] * cs 71 | 72 | return src_result 73 | 74 | 75 | def gaussian_radius(det_size, min_overlap=0.7): 76 | height, width = det_size 77 | 78 | a1 = 1 79 | b1 = (height + width) 80 | c1 = width * height * (1 - min_overlap) / (1 + min_overlap) 81 | sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1) 82 | r1 = (b1 + sq1) / 2 83 | 84 | a2 = 4 85 | b2 = 2 * (height + width) 86 | c2 = (1 - min_overlap) * width * height 87 | sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2) 88 | r2 = (b2 + sq2) / 2 89 | 90 | a3 = 4 * min_overlap 91 | b3 = -2 * min_overlap * (height + width) 92 | c3 = (min_overlap - 1) * width * height 93 | sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3) 94 | r3 = (b3 + sq3) / 2 95 | return min(r1, r2, r3) 96 | 97 | 98 | def gaussian2D(shape, sigma=1): 99 | m, n = [(ss - 1.) / 2. for ss in shape] 100 | y, x = np.ogrid[-m:m + 1, -n:n + 1] 101 | 102 | h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) 103 | h[h < np.finfo(h.dtype).eps * h.max()] = 0 104 | return h 105 | 106 | 107 | def draw_umich_gaussian(heatmap, center, radius, k=1): 108 | diameter = 2 * radius + 1 109 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6) 110 | 111 | x, y = int(center[0]), int(center[1]) 112 | 113 | height, width = heatmap.shape[0:2] 114 | 115 | left, right = min(x, radius), min(width - x, radius + 1) 116 | top, bottom = min(y, radius), min(height - y, radius + 1) 117 | 118 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] 119 | masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right] 120 | if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug 121 | np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap) 122 | return heatmap 123 | 124 | 125 | class DetectionDataset(Dataset): 126 | def __init__(self, 127 | annotation_file: str, 128 | images_dir: str, 129 | down_ratio: int, 130 | max_objects: int, 131 | num_classes: Optional[int] = None, 132 | image_size: Tuple[int, int] = (224, 224), 133 | transform: Optional[Any] = None, 134 | **kwargs 135 | ): 136 | super(DetectionDataset, self).__init__() 137 | 138 | self._annotations_dataset = DetectionMSCOCODataset(annotation_file, images_dir) 139 | 140 | self._num_classes = num_classes 141 | if self._num_classes is None: 142 | self._num_classes = self._annotations_dataset.get_num_classes() 143 | 144 | self._down_ratio = down_ratio 145 | self._max_objects = max_objects 146 | 147 | assert image_size[0] == image_size[1], "Only square image are now supported" 148 | self.image_size = image_size[0] 149 | self.transform = transform 150 | 151 | def __len__(self) -> int: 152 | return len(self._annotations_dataset) 153 | 154 | def __getitem__(self, idx: int) -> Dict[str, Any]: 155 | annotation = self._annotations_dataset[idx] 156 | image_name = annotation['image_name'] 157 | detections = annotation['detections'] 158 | 159 | image = utils.imread(image_name) 160 | x_scale, y_scale = self.image_size / image.shape[1], self.image_size / image.shape[0] 161 | 162 | image = cv2.resize(image, (self.image_size, self.image_size), cv2.INTER_LINEAR) 163 | 164 | detections = [ 165 | { 166 | 'category_id': detection['category_id'], 167 | 'category_name': detection['category_name'], 168 | 'bbox': detection['bbox'].copy() 169 | } for detection in detections 170 | ] 171 | 172 | for detection in detections: 173 | detection['bbox'][0::2] *= x_scale 174 | detection['bbox'][1::2] *= y_scale 175 | 176 | bboxes = [] 177 | labels = [] 178 | for detection in detections: 179 | median_x = (detection['bbox'][0] + detection['bbox'][2]) // 2 180 | median_y = (detection['bbox'][1] + detection['bbox'][3]) // 2 181 | 182 | # CenterNet are VERY bad when center of detected objects not in the images 183 | # Let's delete this bboxes 184 | if not (0 <= median_x <= image.shape[1]) or not (0 <= median_y <= image.shape[0]): 185 | continue 186 | 187 | detection['bbox'][0::2] = np.clip(detection['bbox'][0::2], 0, image.shape[1]) 188 | detection['bbox'][1::2] = np.clip(detection['bbox'][1::2], 0, image.shape[0]) 189 | 190 | bboxes.append(detection['bbox']) 191 | labels.append(detection['category_id']) 192 | 193 | bboxes = np.array(bboxes) 194 | labels = np.array(labels) 195 | 196 | if self.transform is not None: 197 | result = self.transform( 198 | image=image, 199 | bboxes=bboxes, 200 | labels=labels, 201 | ) 202 | else: 203 | result = dict( 204 | image=image, 205 | bboxes=bboxes, 206 | labels=labels, 207 | ) 208 | 209 | image = result["image"].astype(np.uint8) 210 | bboxes = result["bboxes"] 211 | labels = result["labels"] 212 | 213 | input_height, input_width = image.shape[0], image.shape[1] 214 | 215 | # Normalization 216 | input = (image.astype(np.float32) / 255.) * 2. - 1. 217 | input = input.transpose(2, 0, 1) 218 | 219 | output_height = input_height // self._down_ratio 220 | output_width = input_width // self._down_ratio 221 | # trans_output = get_affine_transform(center, scale, 0, [output_width, output_height]) 222 | 223 | heatmap = np.zeros((self._num_classes, output_height, output_width), dtype=np.float32) 224 | width_height = np.zeros((self._max_objects, 2), dtype=np.float32) 225 | 226 | reg = np.zeros((self._max_objects, 2), dtype=np.float32) 227 | ind = np.zeros(self._max_objects, dtype=np.int64) 228 | reg_mask = np.zeros(self._max_objects, dtype=np.uint8) 229 | 230 | draw_gaussian = draw_umich_gaussian 231 | 232 | new_bboxes = [] 233 | num_objs = min(len(bboxes), self._max_objects) 234 | for i in range(num_objs): 235 | bbox = np.array(bboxes[i], dtype=np.float32) / self._down_ratio 236 | class_id = labels[i] 237 | 238 | bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_width - 1) 239 | bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_height - 1) 240 | h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] 241 | new_bboxes.append(bbox) 242 | 243 | if h > 0 and w > 0: 244 | radius = gaussian_radius((math.ceil(h), math.ceil(w))) 245 | radius = max(0, int(radius)) 246 | _center = np.array( 247 | [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], 248 | dtype=np.float32 249 | ) 250 | _center_int = _center.astype(np.int32) 251 | draw_gaussian(heatmap[class_id], _center_int, radius) 252 | width_height[i] = 1. * w, 1. * h 253 | ind[i] = _center_int[1] * output_width + _center_int[0] 254 | reg[i] = _center - _center_int 255 | reg_mask[i] = 1 256 | 257 | result = { 258 | "filename": image_name, 259 | "input": torch.from_numpy(input), 260 | "hm": torch.from_numpy(heatmap), 261 | "reg_mask": torch.from_numpy(reg_mask), 262 | "ind": torch.from_numpy(ind), 263 | "wh": torch.from_numpy(width_height), 264 | "reg": torch.from_numpy(reg), 265 | "bboxes": np.array(bboxes), 266 | "labels": np.array(labels), 267 | } 268 | 269 | return result 270 | -------------------------------------------------------------------------------- /src/debug.py: -------------------------------------------------------------------------------- 1 | from catalyst.dl import Callback, RunnerState, CallbackOrder, CriterionCallback 2 | 3 | import torch 4 | from torch.nn import Module 5 | 6 | 7 | class MyDebugCallback(Callback): 8 | def __init__(self): 9 | super(MyDebugCallback, self).__init__(order=CallbackOrder.Metric + 1) 10 | 11 | def on_epoch_end(self, state: RunnerState) -> None: 12 | print('Input:') 13 | print(state.input.keys()) 14 | print('Output') 15 | print(state.output.keys()) 16 | print('-' * 40) 17 | 18 | 19 | class MyDebugCriterion(Module): 20 | def __init__(self): 21 | super(MyDebugCriterion, self).__init__() 22 | 23 | def forward(self, *args, **kwargs): 24 | print('Args:') 25 | print(', '.join(list(map(str, map(type, args))))) 26 | print('Kwargs:') 27 | print(', '.join(kwargs.keys())) 28 | print('*' * 40) 29 | return torch.zeros((1, ), dtype=torch.float32) 30 | -------------------------------------------------------------------------------- /src/experiment.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import torch 4 | from catalyst.data.collate_fn import FilteringCollateFn 5 | from catalyst.dl import ConfigExperiment 6 | 7 | from .dataset import DetectionDataset 8 | from .transforms import train_transform, valid_transform, infer_transform 9 | 10 | torch.multiprocessing.set_sharing_strategy("file_system") 11 | 12 | 13 | class Experiment(ConfigExperiment): 14 | def get_datasets( 15 | self, 16 | stage: str, 17 | **kwargs, 18 | ): 19 | def process_kwargs_by_default_values(parameter, default_parameter): 20 | if parameter not in kwargs: 21 | if default_parameter not in kwargs: 22 | raise ValueError('You must specify \"{}\" or default value(\"{}\") in config' 23 | .format(parameter, default_parameter)) 24 | else: 25 | kwargs[parameter] = kwargs[default_parameter] 26 | 27 | process_kwargs_by_default_values('train_annotation_file', 'annotation_file') 28 | process_kwargs_by_default_values('valid_annotation_file', 'annotation_file') 29 | process_kwargs_by_default_values('train_images_dir', 'images_dir') 30 | process_kwargs_by_default_values('valid_images_dir', 'images_dir') 31 | 32 | if kwargs['train_annotation_file'] == kwargs['valid_annotation_file']: 33 | warnings.warn("Valid is now equal to train, is it expected?", RuntimeWarning) 34 | 35 | train_dataset = DetectionDataset(annotation_file=kwargs['train_annotation_file'], 36 | images_dir=kwargs['train_images_dir'], 37 | down_ratio=kwargs['down_ratio'], 38 | max_objects=kwargs['max_objs'], 39 | num_classes=kwargs['num_classes'], 40 | image_size=kwargs['image_size'], 41 | transform=train_transform(kwargs['image_size'][0]) 42 | ) 43 | 44 | valid_dataset = DetectionDataset(annotation_file=kwargs['valid_annotation_file'], 45 | images_dir=kwargs['valid_images_dir'], 46 | down_ratio=kwargs['down_ratio'], 47 | max_objects=kwargs['max_objs'], 48 | num_classes=kwargs['num_classes'], 49 | image_size=kwargs['image_size'], 50 | transform=valid_transform(kwargs['image_size'][0]) 51 | ) 52 | 53 | return { 54 | 'train': { 55 | 'dataset': train_dataset, 56 | 'collate_fn': FilteringCollateFn('bboxes', 'labels') 57 | }, 58 | 'valid': { 59 | 'dataset': valid_dataset, 60 | 'collate_fn': FilteringCollateFn('bboxes', 'labels') 61 | }, 62 | } 63 | -------------------------------------------------------------------------------- /src/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .ctdet_loss import CenterNetDetectionLoss, \ 2 | RegL1Loss, MSEIndLoss, BCEIndLoss, FocalIndLoss, \ 3 | decode_centernet_predictions 4 | -------------------------------------------------------------------------------- /src/losses/ctdet_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | def _neg_loss(outputs: torch.Tensor, targets: torch.Tensor): 7 | """ 8 | Modified focal loss. Exactly the same as CornerNet. 9 | Runs faster and costs a little bit more memory 10 | 11 | Arguments: 12 | outputs (torch.Tensor): BATCH x C x H x W 13 | targets (torch.Tensor): BATCH x C x H x W 14 | """ 15 | pos_inds = targets.eq(1).float() 16 | neg_inds = targets.lt(1).float() 17 | 18 | neg_weights = torch.pow(1 - targets, 4) 19 | 20 | loss = 0 21 | 22 | pos_loss = torch.log(outputs) * torch.pow(1 - outputs, 2) * pos_inds 23 | neg_loss = torch.log(1 - outputs) * torch.pow(outputs, 2) * neg_weights * neg_inds 24 | 25 | num_pos = pos_inds.float().sum() 26 | pos_loss = pos_loss.sum() 27 | neg_loss = neg_loss.sum() 28 | 29 | if num_pos == 0: 30 | loss = loss - neg_loss 31 | else: 32 | loss = loss - (pos_loss + neg_loss) / num_pos 33 | return loss 34 | 35 | 36 | def _sigmoid(x): 37 | y = torch.clamp(x.sigmoid_(), min=1e-4, max=1 - 1e-4) 38 | return y 39 | 40 | 41 | def _gather_feat(feat, ind, mask=None): 42 | dim = feat.size(2) 43 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) 44 | feat = feat.gather(1, ind) 45 | if mask is not None: 46 | mask = mask.unsqueeze(2).expand_as(feat) 47 | feat = feat[mask] 48 | feat = feat.view(-1, dim) 49 | return feat 50 | 51 | 52 | def _tranpose_and_gather_feat(feat, ind): 53 | feat = feat.permute(0, 2, 3, 1).contiguous() 54 | feat = feat.view(feat.size(0), -1, feat.size(3)) 55 | feat = _gather_feat(feat, ind) 56 | return feat 57 | 58 | 59 | def _nms(heat, kernel=3): 60 | pad = (kernel - 1) // 2 61 | 62 | hmax = nn.functional.max_pool2d( 63 | heat, (kernel, kernel), stride=1, padding=pad) 64 | keep = (hmax == heat).float() 65 | return heat * keep 66 | 67 | 68 | def _topk(scores, K=40): 69 | batch, cat, height, width = scores.size() 70 | 71 | topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) 72 | 73 | topk_inds = topk_inds % (height * width) 74 | topk_ys = (topk_inds / width).int().float() 75 | topk_xs = (topk_inds % width).int().float() 76 | 77 | topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K) 78 | topk_clses = (topk_ind / K).int() 79 | topk_inds = _gather_feat( 80 | topk_inds.view(batch, -1, 1), topk_ind).view(batch, K) 81 | topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K) 82 | topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K) 83 | 84 | return topk_score, topk_inds, topk_clses, topk_ys, topk_xs 85 | 86 | 87 | def decode_centernet_predictions( 88 | heat, wh, reg=None, K=100 89 | ): 90 | with torch.no_grad(): 91 | batch, cat, height, width = heat.size() 92 | # mask = reg_mask.unsqueeze(2).expand_as(pred).float() 93 | 94 | heat = torch.sigmoid(heat) 95 | # perform nms on heatmaps 96 | heat = _nms(heat) 97 | 98 | scores, inds, clses, ys, xs = _topk(heat, K=K) 99 | if reg is not None: 100 | reg = _tranpose_and_gather_feat(reg, inds) 101 | reg = reg.view(batch, K, 2) 102 | xs = xs.view(batch, K, 1) + reg[:, :, 0:1] 103 | ys = ys.view(batch, K, 1) + reg[:, :, 1:2] 104 | else: 105 | xs = xs.view(batch, K, 1) + 0.5 106 | ys = ys.view(batch, K, 1) + 0.5 107 | wh = _tranpose_and_gather_feat(wh, inds).view(batch, K, 2) 108 | 109 | clses = clses.view(batch, K, 1).float() 110 | scores = scores.view(batch, K, 1) 111 | bboxes = torch.cat([xs - wh[..., 0:1] / 2, 112 | ys - wh[..., 1:2] / 2, 113 | xs + wh[..., 0:1] / 2, 114 | ys + wh[..., 1:2] / 2], dim=2) 115 | detections = torch.cat([bboxes, scores, clses], dim=2) 116 | 117 | return detections 118 | 119 | 120 | class FocalLoss(nn.Module): 121 | def __init__(self): 122 | super(FocalLoss, self).__init__() 123 | self.neg_loss = _neg_loss 124 | 125 | def forward(self, outputs, targets): 126 | return self.neg_loss(outputs, targets) 127 | 128 | 129 | class RegL1Loss(nn.Module): 130 | def __init__( 131 | self, 132 | key: str = "", 133 | mask_key: str = "reg_mask", 134 | ind_key: str = "ind", 135 | debug: bool = False 136 | ): 137 | super(RegL1Loss, self).__init__() 138 | self.key = key 139 | self.mask_key = mask_key 140 | self.ind_key = ind_key 141 | self.debug = debug 142 | 143 | # def forward(self, outputs, targets): 144 | # result = self._forward( 145 | # outputs[self.key], targets[self.mask_key], 146 | # targets[self.ind_key], targets[self.key] 147 | # ) 148 | # return result 149 | 150 | def forward(self, outputs_key, targets_mask_key, targets_ind_key, targets_key): 151 | result = self._forward( 152 | outputs_key, targets_mask_key, targets_ind_key, targets_key 153 | ) 154 | return result 155 | 156 | def _forward(self, output, mask, ind, target): 157 | pred = _tranpose_and_gather_feat(output, ind) 158 | mask = mask.unsqueeze(2).expand_as(pred).float() 159 | 160 | if self.debug: 161 | import ipdb; ipdb.set_trace() 162 | loss = F.l1_loss(pred * mask, target * mask) 163 | loss = loss / (mask.sum() + 1e-4) 164 | return loss 165 | 166 | 167 | class CenterNetDetectionLoss(nn.Module): 168 | def __init__(self): 169 | super(CenterNetDetectionLoss, self).__init__() 170 | self.focal = FocalLoss() 171 | 172 | def forward(self, outputs, targets): 173 | loss = self.focal(_sigmoid(outputs), targets) 174 | return loss 175 | 176 | 177 | class MSEIndLoss(nn.Module): 178 | def __init__( 179 | self, 180 | key: str, 181 | mask_key: str = "reg_mask", 182 | ind_key: str = "ind", 183 | debug: bool = False, 184 | reduction: str = "mean" 185 | ): 186 | super(MSEIndLoss, self).__init__() 187 | self.key = key 188 | self.mask_key = mask_key 189 | self.ind_key = ind_key 190 | self.debug = debug 191 | 192 | self.loss = nn.MSELoss(reduction=reduction) 193 | 194 | def forward(self, outputs, targets): 195 | result = self._forward( 196 | outputs[self.key], targets[self.mask_key], 197 | targets[self.ind_key], targets[self.key] 198 | ) 199 | 200 | return result 201 | 202 | def _forward(self, output, mask, ind, target): 203 | pred = _tranpose_and_gather_feat(output, ind) 204 | _mask = mask.unsqueeze(2).expand_as(pred).float() 205 | 206 | if self.debug: 207 | import ipdb; ipdb.set_trace() 208 | loss = self.loss(_sigmoid(pred) * _mask, target.unsqueeze(2) * _mask) 209 | # loss = loss / (_mask.sum() + 1e-4) 210 | return loss 211 | 212 | 213 | class BCEIndLoss(nn.Module): 214 | def __init__( 215 | self, 216 | key: str, 217 | mask_key: str = "reg_mask", 218 | ind_key: str = "ind", 219 | debug: bool = False 220 | ): 221 | super(BCEIndLoss, self).__init__() 222 | self.key = key 223 | self.mask_key = mask_key 224 | self.ind_key = ind_key 225 | self.loss = nn.BCELoss() 226 | self.debug = debug 227 | 228 | def forward(self, outputs, targets): 229 | result = self._forward( 230 | outputs[self.key], targets[self.mask_key], 231 | targets[self.ind_key], targets[self.key] 232 | ) 233 | 234 | return result 235 | 236 | def _forward(self, output, mask, ind, target): 237 | pred = _tranpose_and_gather_feat(output, ind) 238 | _mask = mask.unsqueeze(2).expand_as(pred).float() 239 | if self.debug: 240 | import ipdb; ipdb.set_trace() 241 | 242 | loss = self.loss(_sigmoid(pred) * _mask, target * _mask) 243 | # loss = loss / (mask.sum() + 1e-4) 244 | return loss 245 | 246 | 247 | class FocalIndLoss(nn.Module): 248 | def __init__( 249 | self, 250 | key: str, 251 | mask_key: str = "reg_mask", 252 | ind_key: str = "ind", 253 | debug: bool = False 254 | ): 255 | super(FocalIndLoss, self).__init__() 256 | self.key = key 257 | self.mask_key = mask_key 258 | self.ind_key = ind_key 259 | self.loss = FocalLoss() 260 | self.debug = debug 261 | 262 | def forward(self, outputs, targets): 263 | result = self._forward( 264 | outputs[self.key], targets[self.mask_key], 265 | targets[self.ind_key], targets[self.key] 266 | ) 267 | 268 | return result 269 | 270 | def _forward(self, output, mask, ind, target): 271 | pred = _tranpose_and_gather_feat(output, ind) 272 | _mask = mask.unsqueeze(2).expand_as(pred).float() 273 | if self.debug: 274 | import ipdb; ipdb.set_trace() 275 | 276 | loss = self.loss(_sigmoid(pred) * _mask, target * _mask) 277 | # loss = loss / (mask.sum() + 1e-4) 278 | return loss 279 | -------------------------------------------------------------------------------- /src/metrics.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, List 2 | 3 | import numpy as np 4 | from sklearn.metrics import average_precision_score 5 | 6 | 7 | def construct_mAP_list_from_bboxes(predicted_bboxes, scores, gt_bboxes, iou_threshold=.9) -> List[Tuple[bool, float]]: 8 | """ 9 | Args: 10 | predicted_bboxes (np.array): predictions 11 | scores (np.array): model confidences 12 | gt_bboxes (np.array): ground truth bboxes 13 | iou_threshold (float): between 0 and 1, iou threshold to mAP metric 14 | 15 | Returns: 16 | List[Tuple[bool, float]]: mAP list 17 | """ 18 | 19 | ious_matrix = bbox_iou(predicted_bboxes, gt_bboxes) 20 | result = _construct_list_for_map(ious_matrix, scores, iou_thresh=iou_threshold) 21 | return result 22 | 23 | 24 | def _construct_list_for_map(ious_matrix, scores, iou_thresh=.9) -> List[Tuple[bool, float]]: 25 | """ 26 | Args: 27 | ious_matrix (np.array): array ious between predicted and ground-truth objects 28 | scores (np.array): array of shape (n) with model confidences for objects 29 | iou_thresh (float): between 0 and 1, iou threshold to mAP metric 30 | Returns: 31 | List[Tuple[bool, float]]: mAP list 32 | """ 33 | 34 | ious_thresholded = ious_matrix > iou_thresh 35 | correct_bboxes = np.where(ious_thresholded.sum(axis=1).astype(bool))[0] 36 | incorrect_bboxes = np.where(~ious_thresholded.sum(axis=1).astype(bool))[0] 37 | fn_bboxes = np.where(ious_thresholded.sum(axis=0) == 0)[0] 38 | 39 | result = [] 40 | result.extend([(True, scores[i]) for i in correct_bboxes]) 41 | result.extend([(False, scores[i]) for i in incorrect_bboxes]) 42 | result.extend([(True, 0) for _ in fn_bboxes]) 43 | return result 44 | 45 | 46 | def calculate_map(predictions: List[Tuple[bool, float]], use_false_negatives: bool = False) -> float: 47 | """Calculates average precision metric for list of predictions with confidences 48 | 49 | Args: 50 | predictions (List[Tuple[...]]): List of Tuples containing all predicted bboxes with scores and corrent/incorrect flag 51 | use_false_negatives (bool): Flag to use false negatives in metric 52 | Returns: 53 | float: average precision 54 | """ 55 | predictions = np.array(predictions) 56 | if not use_false_negatives: 57 | predictions = predictions[predictions[:, 1] > 0] 58 | true_labels = predictions[:, 0].astype(int) 59 | scores = predictions[:, 1] 60 | 61 | # Corner cases, critical for sklearn realisation 62 | if len(predictions) == 0: 63 | return 0 64 | if len(predictions) == 1: 65 | return 1 66 | 67 | result = average_precision_score(true_labels, scores) 68 | return result 69 | 70 | 71 | def bbox_iou(predicted, target) -> np.ndarray: 72 | p_xmin, p_ymin, p_xmax, p_ymax = np.hsplit(predicted, 4) 73 | t_xmin, t_ymin, t_xmax, t_ymax = np.hsplit(target, 4) 74 | 75 | int_xmin = np.maximum(p_xmin, t_xmin.T) 76 | int_xmax = np.minimum(p_xmax, t_xmax.T) 77 | int_ymin = np.maximum(p_ymin, t_ymin.T) 78 | int_ymax = np.minimum(p_ymax, t_ymax.T) 79 | 80 | int_area = np.maximum(int_ymax - int_ymin, 0) \ 81 | * np.maximum(int_xmax - int_xmin, 0) 82 | 83 | un_xmin = np.minimum(p_xmin, t_xmin.T) 84 | un_xmax = np.maximum(p_xmax, t_xmax.T) 85 | un_ymin = np.minimum(p_ymin, t_ymin.T) 86 | un_ymax = np.maximum(p_ymax, t_ymax.T) 87 | 88 | un_area = np.maximum(un_ymax - un_ymin, 0) \ 89 | * np.maximum(un_xmax - un_xmin, 0) 90 | 91 | return int_area / un_area 92 | 93 | 94 | def image_stats(pred_bboxes, scores, gt_bboxes, thresholds, iou_threshold=.5): 95 | ious = bbox_iou(pred_bboxes, gt_bboxes) 96 | 97 | true_positives, false_positives = \ 98 | image_positives_stats(ious, scores, thresholds, iou_threshold) 99 | 100 | false_negatives = image_false_negatives(ious, scores, thresholds, 101 | iou_threshold=iou_threshold) 102 | 103 | stats = np.hstack((true_positives, false_positives, false_negatives)) 104 | 105 | return stats 106 | 107 | 108 | def image_positives_stats( 109 | ious: np.ndarray, 110 | scores, 111 | thresholds, 112 | iou_threshold 113 | ) -> Tuple[np.ndarray, np.ndarray]: 114 | pred_bbox_max_iou = np.max(ious, axis=1, initial=0) 115 | 116 | potential_tp = pred_bbox_max_iou >= iou_threshold 117 | potential_fp = ~potential_tp 118 | 119 | mask: np.ndarray = thresholds[:, np.newaxis] <= scores[np.newaxis, :] 120 | true_positives = mask.compress(potential_tp, axis=1).sum(axis=1) 121 | false_positives = mask.compress(potential_fp, axis=1).sum(axis=1) 122 | 123 | return true_positives, false_positives 124 | 125 | 126 | def image_false_negatives( 127 | ious: np.ndarray, 128 | scores, 129 | thresholds, 130 | iou_threshold 131 | ): 132 | n_pred, n_gt = ious.shape 133 | 134 | if n_gt == 0: 135 | return np.zeros(thresholds.shape) 136 | 137 | if len(thresholds) == 0 or n_pred == 0: 138 | return np.full(thresholds.shape, n_gt) 139 | 140 | gt_max_iou_idx = ious.argmax(axis=0) 141 | 142 | always_fn = \ 143 | ious[gt_max_iou_idx, np.arange(len(gt_max_iou_idx))] < iou_threshold 144 | 145 | gt_bbox_max_iou_bbox_score = \ 146 | scores.take(gt_max_iou_idx.compress(~always_fn)) 147 | fn = (thresholds[:, np.newaxis] 148 | > gt_bbox_max_iou_bbox_score[np.newaxis, :]).sum(axis=1) 149 | 150 | return always_fn.sum() + fn 151 | 152 | 153 | def class_agnostic_mean_ap( 154 | pred_bboxes, pred_bbox_score, gt_bboxes, 155 | sort_scores=True, iou_threshold=0.9 156 | ): 157 | if len(pred_bboxes): 158 | pass 159 | 160 | thresholds = np.concatenate(pred_bbox_score) 161 | if sort_scores: 162 | thresholds = np.sort(thresholds)[::-1] 163 | 164 | per_item_stats = [ 165 | image_stats(img_pred_bboxes.reshape(-1, 4), img_scores, 166 | img_gt_bboxes.reshape(-1, 4), thresholds, iou_threshold) 167 | for img_pred_bboxes, img_scores, img_gt_bboxes 168 | in zip(pred_bboxes, pred_bbox_score, gt_bboxes) 169 | ] 170 | 171 | tp, fp, fn = np.hsplit(np.sum(per_item_stats, axis=0), 3) 172 | 173 | all_real_positives = tp + fn 174 | all_real_positives[all_real_positives == 0] = 1 175 | 176 | recall = tp / all_real_positives 177 | 178 | all_pred_positives = tp + fp 179 | all_pred_positives[all_pred_positives == 0] = 1 180 | 181 | precision = tp / all_pred_positives 182 | 183 | precisions = [] 184 | for recall_threshold in np.linspace(0, 1, 11): 185 | precisions.append( 186 | np.max(precision[recall <= recall_threshold], initial=0)) 187 | 188 | mAP = np.mean(precisions) if len(precisions) > 0 else 0 189 | 190 | return mAP 191 | -------------------------------------------------------------------------------- /src/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import ResnetCenterNet 2 | -------------------------------------------------------------------------------- /src/models/centernet.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Callable 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | class CenterNet(nn.Module): 8 | def __init__( 9 | self, 10 | num_classes: int, 11 | model_fn: Callable, 12 | down_ratio: int = 1, 13 | embedding_dim: int = 128, 14 | model_params: dict = None, 15 | backbone_key: str = None 16 | ): 17 | super().__init__() 18 | self.num_classes = num_classes 19 | self.embedding_dim = embedding_dim 20 | 21 | model_params = model_params or {} 22 | self.backbone = model_fn(**model_params) 23 | self.backbone_key = backbone_key 24 | 25 | self.down_sampler = nn.Conv2d(embedding_dim, embedding_dim, kernel_size=(3, 3), 26 | padding=1, stride=down_ratio, bias=True) 27 | 28 | self.head_heatmap = nn.Conv2d(embedding_dim, self.num_classes, kernel_size=(3, 3), 29 | padding=1, bias=True) 30 | self.head_heatmap.bias.data.fill_(-4.) 31 | self.head_width_height = nn.Conv2d(embedding_dim, 2, kernel_size=(3, 3), padding=1, bias=True) 32 | self.head_offset_regularizer = nn.Conv2d(embedding_dim, 2, kernel_size=(3, 3), padding=1, bias=True) 33 | 34 | def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]: 35 | value = self.backbone(x) 36 | if self.backbone_key is not None: 37 | value = value[self.backbone_key] 38 | 39 | features = torch.relu_(self.down_sampler(torch.relu_(value))) 40 | 41 | value = { 42 | "hm": self.head_heatmap(features), 43 | "wh": self.head_width_height(features), 44 | "reg": self.head_offset_regularizer(features), 45 | } 46 | return value 47 | 48 | def predict(self, x: torch.Tensor): 49 | """ 50 | Method to trace 51 | """ 52 | value = self.forward(x) 53 | return value["hm"], value["wh"], value["reg"] 54 | 55 | 56 | __all__ = ["CenterNet"] 57 | -------------------------------------------------------------------------------- /src/models/resnet.py: -------------------------------------------------------------------------------- 1 | from catalyst.contrib.models import segmentation 2 | 3 | from .centernet import CenterNet 4 | 5 | 6 | class ResnetCenterNet(CenterNet): 7 | def __init__( 8 | self, 9 | num_classes: int, 10 | down_ratio: int = 1, 11 | embedding_dim: int = 128, 12 | arch: str = "ResnetFPNUnet", 13 | backbone_params: dict = None, 14 | ): 15 | 16 | model_fn = segmentation.__dict__[arch] 17 | backbone_params = backbone_params or {} 18 | model_params = {"num_classes": embedding_dim, **backbone_params} 19 | super().__init__( 20 | num_classes=num_classes, 21 | model_fn=model_fn, 22 | down_ratio=down_ratio, 23 | embedding_dim=embedding_dim, 24 | model_params=model_params 25 | ) 26 | -------------------------------------------------------------------------------- /src/transforms.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import albumentations as A 3 | 4 | 5 | BBOX_PARAMS = dict( 6 | format="pascal_voc", 7 | min_visibility=0.2, 8 | label_fields=["labels"], 9 | ) 10 | 11 | 12 | def pre_transform(image_size: int = 512): 13 | result = [ 14 | A.LongestMaxSize(image_size), 15 | A.PadIfNeeded( 16 | min_height=image_size, 17 | min_width=image_size, 18 | border_mode=cv2.BORDER_CONSTANT, 19 | value=(0, 0, 0)), 20 | ] 21 | 22 | return A.Compose(result, bbox_params=BBOX_PARAMS) 23 | 24 | 25 | def augmentations(image_size: int): 26 | channel_augs = [ 27 | A.HueSaturationValue(p=0.5), 28 | A.ChannelShuffle(p=0.5), 29 | ] 30 | 31 | result = [ 32 | # *pre_transform(image_size), 33 | A.OneOf([ 34 | A.IAAAdditiveGaussianNoise(), 35 | A.GaussNoise(), 36 | ], p=0.5), 37 | A.OneOf([ 38 | A.MotionBlur(blur_limit=3, p=0.7), 39 | A.MedianBlur(blur_limit=3, p=1.0), 40 | A.Blur(blur_limit=3, p=0.7), 41 | ], p=0.5), 42 | A.OneOf(channel_augs), 43 | A.OneOf([ 44 | A.CLAHE(clip_limit=2), 45 | A.IAASharpen(), 46 | A.IAAEmboss(), 47 | ], p=0.5), 48 | A.RandomBrightnessContrast( 49 | brightness_limit=0.5, 50 | contrast_limit=0.5, 51 | p=0.5 52 | ), 53 | A.RandomGamma(p=0.5), 54 | A.OneOf([ 55 | A.MedianBlur(p=0.5), 56 | A.MotionBlur(p=0.5) 57 | ]), 58 | A.RandomGamma(gamma_limit=(85, 115), p=0.5), 59 | ] 60 | return A.Compose(result, bbox_params=BBOX_PARAMS) 61 | 62 | 63 | def train_transform(image_size: int): 64 | result = A.Compose([ 65 | *pre_transform(image_size), 66 | *augmentations(image_size), 67 | ], bbox_params=BBOX_PARAMS) 68 | return result 69 | 70 | 71 | def valid_transform(image_size: int): 72 | result = A.Compose([ 73 | *pre_transform(image_size), 74 | ], bbox_params=BBOX_PARAMS) 75 | return result 76 | 77 | 78 | def infer_transform(image_size: int): 79 | result = A.Compose(pre_transform(image_size)) 80 | return result 81 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def detach(tensor: torch.Tensor) -> np.ndarray: 6 | return tensor.detach().cpu().numpy() 7 | --------------------------------------------------------------------------------