├── .gitignore
├── LICENSE
├── README.md
├── pipeline.png
└── playground
    └── detection
        ├── coco
            ├── anchor.res50.fpn.coco.800size.3x_ms
            │   ├── README.md
            │   ├── config.py
            │   ├── fcos.py
            │   └── net.py
            ├── center.res50.fpn.coco.800size.3x_ms
            │   ├── README.md
            │   ├── config.py
            │   ├── fcos.py
            │   └── net.py
            ├── fcos.res50.fpn.coco.800size.3x_ms.wo_ctrness
            │   ├── README.md
            │   ├── config.py
            │   ├── fcos.py
            │   └── net.py
            ├── fcos.res50.fpn.coco.800size.3x_ms
            │   ├── README.md
            │   ├── config.py
            │   └── net.py
            ├── loss.res50.fpn.coco.800size.3x_ms
            │   ├── README.md
            │   ├── config.py
            │   ├── fcos.py
            │   └── net.py
            ├── poto.res50.fpn.coco.800size.3x_ms.3dmf.aux
            │   ├── README.md
            │   ├── config.py
            │   ├── fcos.py
            │   └── net.py
            ├── poto.res50.fpn.coco.800size.3x_ms.3dmf
            │   ├── README.md
            │   ├── config.py
            │   ├── fcos.py
            │   └── net.py
            ├── poto.res50.fpn.coco.800size.3x_ms.3dmf_wo_gn.aux
            │   ├── README.md
            │   ├── config.py
            │   ├── fcos.py
            │   └── net.py
            ├── poto.res50.fpn.coco.800size.3x_ms.3dmf_wo_gn
            │   ├── README.md
            │   ├── config.py
            │   ├── fcos.py
            │   └── net.py
            ├── poto.res50.fpn.coco.800size.3x_ms.argmax
            │   ├── README.md
            │   ├── config.py
            │   ├── fcos.py
            │   └── net.py
            ├── poto.res50.fpn.coco.800size.3x_ms
            │   ├── README.md
            │   ├── config.py
            │   ├── fcos.py
            │   └── net.py
            ├── poto.res50.fpn.coco.800size.6x_ms
            │   ├── README.md
            │   ├── config.py
            │   ├── fcos.py
            │   └── net.py
            └── poto.res50.fpn.coco.800size.9x_ms
            │   ├── README.md
            │   ├── config.py
            │   ├── fcos.py
            │   └── net.py
        └── crowdhuman
            ├── atss.res50.fpn.crowdhuman.800size.30k
                ├── README.md
                ├── atss.py
                ├── config.py
                └── net.py
            ├── fcos.res50.fpn.crowdhuman.800size.30k
                ├── README.md
                ├── config.py
                ├── fcos.py
                └── net.py
            ├── poto.res50.fpn.crowdhuman.800size.30k.3dmf.aux
                ├── README.md
                ├── config.py
                ├── fcos.py
                └── net.py
            ├── poto.res50.fpn.crowdhuman.800size.30k.3dmf
                ├── README.md
                ├── config.py
                ├── fcos.py
                └── net.py
            └── poto.res50.fpn.crowdhuman.800size.30k
                ├── README.md
                ├── config.py
                ├── fcos.py
                └── net.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | log
132 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # End-to-End Object Detection with Fully Convolutional Network
  2 | 
  3 | ![GitHub](https://img.shields.io/github/license/Megvii-BaseDetection/DeFCN)
  4 | 
  5 | This project provides an implementation for "[End-to-End Object Detection with Fully Convolutional Network](https://arxiv.org/abs/2012.03544)" on PyTorch.
  6 | 
  7 | Experiments in the paper were conducted on the internal framework, thus we reimplement them on [cvpods](https://github.com/Megvii-BaseDetection/cvpods) and report details as below.
  8 | 
  9 | ![](./pipeline.png)
 10 | 
 11 | ## Requirements
 12 | * [cvpods](https://github.com/Megvii-BaseDetection/cvpods)
 13 | * scipy >= 1.5.4
 14 | 
 15 | ## Get Started
 16 | 
 17 | * install cvpods locally (requires cuda to compile)
 18 | ```shell
 19 | 
 20 | python3 -m pip install 'git+https://github.com/Megvii-BaseDetection/cvpods.git'
 21 | # (add --user if you don't have permission)
 22 | 
 23 | # Or, to install it from a local clone:
 24 | git clone https://github.com/Megvii-BaseDetection/cvpods.git
 25 | python3 -m pip install -e cvpods
 26 | 
 27 | # Or,
 28 | pip install -r requirements.txt
 29 | python3 setup.py build develop
 30 | ```
 31 | 
 32 | * prepare datasets
 33 | ```shell
 34 | cd /path/to/cvpods
 35 | cd datasets
 36 | ln -s /path/to/your/coco/dataset coco
 37 | ```
 38 | 
 39 | * Train & Test
 40 | ```shell
 41 | git clone https://github.com/Megvii-BaseDetection/DeFCN.git
 42 | cd DeFCN/playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms  # for example
 43 | 
 44 | # Train
 45 | pods_train --num-gpus 8
 46 | 
 47 | # Test
 48 | pods_test --num-gpus 8 \
 49 |     MODEL.WEIGHTS /path/to/your/save_dir/ckpt.pth # optional
 50 |     OUTPUT_DIR /path/to/your/save_dir # optional
 51 | 
 52 | # Multi node training
 53 | ## sudo apt install net-tools ifconfig
 54 | pods_train --num-gpus 8 --num-machines N --machine-rank 0/1/.../N-1 --dist-url "tcp://MASTER_IP:port"
 55 | 
 56 | ```
 57 | 
 58 | ## Results on COCO2017 val set
 59 | 
 60 | | model | assignment | with NMS | lr sched. | mAP | mAR | download |
 61 | |:------|:----------:|:--------:|:---------:|:---:|:---:|:--------:|
 62 | | [FCOS](./playground/detection/coco/fcos.res50.fpn.coco.800size.3x_ms) | one-to-many | Yes | 3x + ms | 41.4 | 59.1 | [weight](https://drive.google.com/file/d/1j9FmyQQxB2g3J4M7F5DubBtW_7qXHiMv/view?usp=sharing) \| [log](https://drive.google.com/file/d/18RK2jZd7g198hAeAz80BsD_6cF8aY1mb/view?usp=sharing) |
 63 | | [FCOS baseline](./playground/detection/coco/fcos.res50.fpn.coco.800size.3x_ms.wo_ctrness) | one-to-many | Yes | 3x + ms | 40.9 | 58.4 | [weight](https://drive.google.com/file/d/1diZQFuJQR6XzPXJsyh1zrRuFYjbqKZ9l/view?usp=sharing) \| [log](https://drive.google.com/file/d/1P1ouRHmSMB4-WZ_yu46lU3kVXlDQAkdE/view?usp=sharing) |
 64 | | [Anchor](./playground/detection/coco/anchor.res50.fpn.coco.800size.3x_ms) | one-to-one | No | 3x + ms | 37.1 | 60.5 | [weight](https://drive.google.com/file/d/1ZVAZPoOlwtNVlxkaKEFWPrkH57nRpuKr/view?usp=sharing) \| [log](https://drive.google.com/file/d/1CVTcCJvLfPPCDN2rIhk8gX8vp98oQidM/view?usp=sharing) |
 65 | | [Center](./playground/detection/coco/center.res50.fpn.coco.800size.3x_ms) | one-to-one | No | 3x + ms | 35.2 | 61.0 | [weight](https://drive.google.com/file/d/1TgNFHMs9uxjTrMMRTSXarwVWZOkX53av/view?usp=sharing) \| [log](https://drive.google.com/file/d/1zcnQTQaOXPLLoHy9lHwfFdESxhIkqD1R/view?usp=sharing) |
 66 | | [Foreground Loss](./playground/detection/coco/loss.res50.fpn.coco.800size.3x_ms) | one-to-one | No | 3x + ms | 38.7 | 62.2 | [weight](https://drive.google.com/file/d/1rTsXbEC5Tj8kwXdjTuHYcfoap4TsnkXV/view?usp=sharing) \| [log](https://drive.google.com/file/d/1EAMPnK7s0TabKKzZhWjALsY1Hege4pFx/view?usp=sharing) |
 67 | | [POTO](./playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms) | one-to-one | No | 3x + ms | 39.2 | 61.7 | [weight](https://drive.google.com/file/d/1mlk5dxc34PyXMajinlF_zWXxs84Z28MH/view?usp=sharing) \| [log](https://drive.google.com/file/d/1v4TBsbExylfgM7GfGh02vks8AnwSbPDI/view?usp=sharing) |
 68 | | [POTO + 3DMF](./playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms.3dmf) | one-to-one | No | 3x + ms | 40.6 | 61.6 | [weight](https://drive.google.com/file/d/1yUzhK_wtzr4_hqi_WT3YpDryGn_rrltU/view?usp=sharing) \| [log](https://drive.google.com/file/d/1ik5JnVLIzmuYlbCkq_MTEDrd2jWoNprV/view?usp=sharing) |
 69 | | [POTO + 3DMF + Aux](./playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms.3dmf.aux) | mixture\* | No | 3x + ms | 41.4 | 61.5 | [weight](https://drive.google.com/file/d/1bxpmTzVzCkV6BHzca_TVWo3pTOEZMAFS/view?usp=sharing) \| [log](https://drive.google.com/file/d/12LTwMJ3zuBYVa7K0OA0ZRTfC1kxianjW/view?usp=sharing) |
 70 | 
 71 | \* We adopt a one-to-one assignment in POTO and a one-to-many assignment in the auxiliary loss, respectively.
 72 | 
 73 | - `2x + ms` schedule is adopted in the paper, but we adopt `3x + ms` schedule here to achieve higher performance.
 74 | - It's normal to observe ~0.3AP noise in POTO.
 75 | 
 76 | ## Results on CrowdHuman val set
 77 | 
 78 | | model | assignment | with NMS | lr sched. | AP50 | mMR | recall | download |
 79 | |:------|:----------:|:--------:|:---------:|:----:|:---:|:------:|:--------:|
 80 | | [FCOS](./playground/detection/crowdhuman/fcos.res50.fpn.crowdhuman.800size.30k) | one-to-many | Yes | 30k iters | 86.1 | 54.9 | 94.2 | [weight](https://drive.google.com/file/d/1qf34m13kniTK2fo2o8etjMfocezSyosQ/view?usp=sharing) \| [log](https://drive.google.com/file/d/1DgZbvawWGX7rBonS8WgcByIGn7nLNrmA/view?usp=sharing) |
 81 | | [ATSS](./playground/detection/crowdhuman/atss.res50.fpn.crowdhuman.800size.30k) | one-to-many | Yes | 30k iters | 87.2 | 49.7 | 94.0 | [weight](https://drive.google.com/file/d/1J30DVItPgLVg9_ps-NdCXWYqaV0PvwAq/view?usp=sharing) \| [log](https://drive.google.com/file/d/1jdL2v_A_fhU6GjYBOzT80ps5CZEZBtx5/view?usp=sharing) |
 82 | | [POTO](./playground/detection/crowdhuman/poto.res50.fpn.crowdhuman.800size.30k) | one-to-one | No | 30k iters | 88.5 | 52.2 | 96.3 | [weight](https://drive.google.com/file/d/1mbP0mmHpva30BcQIxY84XhEMsTGwi-ze/view?usp=sharing) \| [log](https://drive.google.com/file/d/1dmn2ENMkfNXaQUaruSR9Pu1QAAOAhlEC/view?usp=sharing) |
 83 | | [POTO + 3DMF](./playground/detection/crowdhuman/poto.res50.fpn.crowdhuman.800size.30k.3dmf) | one-to-one | No | 30k iters | 88.8 | 51.0 | 96.6 | [weight](https://drive.google.com/file/d/1d_Z6g54RTIVYHzaUrEogmL3gId2PTBSb/view?usp=sharing) \| [log](https://drive.google.com/file/d/12G-1nm34DjH2xJGRMsiV8OYIZzWooFkt/view?usp=sharing) |
 84 | | [POTO + 3DMF + Aux](./playground/detection/crowdhuman/poto.res50.fpn.crowdhuman.800size.30k.3dmf.aux) | mixture\* | No | 30k iters | 89.1 | 48.9 | 96.5 | [weight](https://drive.google.com/file/d/1P5uWt4kjQnm-P_WC0MzqLC5TWbIH62UY/view?usp=sharing) \| [log](https://drive.google.com/file/d/1sTcb5B0vjwSC6QJnwJlLRBJQlVcM2WDl/view?usp=sharing) |
 85 | 
 86 | \* We adopt a one-to-one assignment in POTO and a one-to-many assignment in the auxiliary loss, respectively.
 87 | 
 88 | - It's normal to observe ~0.3AP noise in POTO, and ~1.0mMR noise in all methods.
 89 | 
 90 | ## Ablations on COCO2017 val set
 91 | 
 92 | | model | assignment | with NMS | lr sched. | mAP | mAR | note |
 93 | |:------|:----------:|:--------:|:---------:|:---:|:---:|:----:|
 94 | | [POTO](./playground/detection/coco/poto.res50.fpn.coco.800size.6x_ms) | one-to-one | No | 6x + ms | 40.0 | 61.9 | |
 95 | | [POTO](./playground/detection/coco/poto.res50.fpn.coco.800size.9x_ms) | one-to-one | No | 9x + ms | 40.2 | 62.3 | |
 96 | | [POTO](./playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms.argmax) | one-to-one | No | 3x + ms | 39.2 | 61.1 | replace Hungarian algorithm by `argmax` |
 97 | | [POTO + 3DMF](./playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms.3dmf_wo_gn) | one-to-one | No | 3x + ms | 40.9 | 62.0 | remove GN in 3DMF |
 98 | | [POTO + 3DMF + Aux](./playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms.3dmf_wo_gn.aux) | mixture\* | No | 3x + ms | 41.5 | 61.5 | remove GN in 3DMF |
 99 | 
100 | \* We adopt a one-to-one assignment in POTO and a one-to-many assignment in the auxiliary loss, respectively.
101 | 
102 | - For `one-to-one` assignment, more training iters lead to higher performance.
103 | - The `argmax` (also known as top-1) operation is indeed the approximate solution of bipartite matching in dense prediction methods.
104 | - It seems harmless to remove GN in 3DMF, which also leads to higher inference speed.
105 | 
106 | ## Acknowledgement
107 | This repo is developed based on cvpods. Please check [cvpods](https://github.com/Megvii-BaseDetection/cvpods) for more details and features.
108 | 
109 | ## License
110 | This repo is released under the Apache 2.0 license. Please see the LICENSE file for more information.
111 | 
112 | ## Citing
113 | If you use this work in your research or wish to refer to the baseline results published here, please use the following BibTeX entries:
114 | ```
115 | @article{wang2020end,
116 |   title   =  {End-to-End Object Detection with Fully Convolutional Network},
117 |   author  =  {Wang, Jianfeng and Song, Lin and Li, Zeming and Sun, Hongbin and Sun, Jian and Zheng, Nanning},
118 |   journal =  {arXiv preprint arXiv:2012.03544},
119 |   year    =  {2020}
120 | }
121 | ```
122 | 
123 | ## Contributing to the project
124 | Any pull requests or issues about the implementation are welcome. If you have any issue about the library (e.g. installation, environments), please refer to [cvpods](https://github.com/Megvii-BaseDetection/cvpods).
125 | 


--------------------------------------------------------------------------------
/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Megvii-BaseDetection/DeFCN/bd7e24d7408d63edc878d175d9cf798974095049/pipeline.png


--------------------------------------------------------------------------------
/playground/detection/coco/anchor.res50.fpn.coco.800size.3x_ms/README.md:
--------------------------------------------------------------------------------
 1 | # anchor.res50.fpn.coco.800size.3x_ms  
 2 | 
 3 | seed: 10266195
 4 | 
 5 | ## Evaluation results for bbox:  
 6 | 
 7 | ```  
 8 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.371
 9 | Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.538
10 | Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.406
11 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.235
12 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.406
13 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.468
14 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.320
15 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.550
16 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.605
17 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.407
18 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.642
19 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.752
20 | ```  
21 | |   AP   |  AP50  |  AP75  |  APs   |  APm   |  APl   |  
22 | |:------:|:------:|:------:|:------:|:------:|:------:|  
23 | | 37.121 | 53.845 | 40.606 | 23.457 | 40.621 | 46.818 |
24 | 
25 | ### Per-category bbox AP:  
26 | 
27 | | category      | AP     | category     | AP     | category       | AP     |  
28 | |:--------------|:-------|:-------------|:-------|:---------------|:-------|  
29 | | person        | 52.613 | bicycle      | 27.819 | car            | 41.466 |  
30 | | motorcycle    | 38.400 | airplane     | 60.967 | bus            | 62.903 |  
31 | | train         | 58.376 | truck        | 30.933 | boat           | 22.104 |  
32 | | traffic light | 25.824 | fire hydrant | 64.486 | stop sign      | 62.992 |  
33 | | parking meter | 36.029 | bench        | 19.416 | bird           | 32.151 |  
34 | | cat           | 61.740 | dog          | 53.929 | horse          | 53.916 |  
35 | | sheep         | 47.808 | cow          | 53.603 | elephant       | 58.405 |  
36 | | bear          | 64.446 | zebra        | 67.133 | giraffe        | 64.894 |  
37 | | backpack      | 13.931 | umbrella     | 35.645 | handbag        | 13.484 |  
38 | | tie           | 29.557 | suitcase     | 34.717 | frisbee        | 60.803 |  
39 | | skis          | 18.115 | snowboard    | 26.962 | sports ball    | 47.191 |  
40 | | kite          | 41.637 | baseball bat | 21.392 | baseball glove | 32.027 |  
41 | | skateboard    | 45.930 | surfboard    | 28.968 | tennis racket  | 42.757 |  
42 | | bottle        | 34.439 | wine glass   | 33.286 | cup            | 38.131 |  
43 | | fork          | 26.456 | knife        | 14.392 | spoon          | 12.133 |  
44 | | bowl          | 36.752 | banana       | 20.660 | apple          | 18.448 |  
45 | | sandwich      | 28.772 | orange       | 28.911 | broccoli       | 21.992 |  
46 | | carrot        | 18.264 | hot dog      | 28.677 | pizza          | 47.652 |  
47 | | donut         | 40.860 | cake         | 30.599 | chair          | 24.919 |  
48 | | couch         | 40.305 | potted plant | 23.166 | bed            | 34.359 |  
49 | | dining table  | 24.119 | toilet       | 56.667 | tv             | 51.158 |  
50 | | laptop        | 52.544 | mouse        | 58.780 | remote         | 25.107 |  
51 | | keyboard      | 47.750 | cell phone   | 30.600 | microwave      | 49.704 |  
52 | | oven          | 27.081 | toaster      | 26.784 | sink           | 32.348 |  
53 | | refrigerator  | 49.154 | book         | 12.779 | clock          | 47.372 |  
54 | | vase          | 32.067 | scissors     | 22.869 | teddy bear     | 39.427 |  
55 | | hair drier    | 5.461  | toothbrush   | 19.293 |                |        |
56 | 


--------------------------------------------------------------------------------
/playground/detection/coco/anchor.res50.fpn.coco.800size.3x_ms/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         ANCHOR_GENERATOR=dict(
10 |             SIZES=[[32], [64], [128], [256], [512]],
11 |             ASPECT_RATIOS=[[1.0]],
12 |             OFFSET=0.5,
13 |         ),
14 |         FCOS=dict(
15 |             NMS_THRESH_TEST=1.0,  # disable NMS when NMS threshold is 1.0
16 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
17 |             FOCAL_LOSS_GAMMA=2.0,
18 |             FOCAL_LOSS_ALPHA=0.25,
19 |             IOU_LOSS_TYPE="giou",
20 |             REG_WEIGHT=2.0,
21 |         ),
22 |         POTO=dict(
23 |             IOU_TOPK=1,
24 |         ),
25 |         NMS_TYPE=None,
26 |     ),
27 |     DATASETS=dict(
28 |         TRAIN=("coco_2017_train",),
29 |         TEST=("coco_2017_val",),
30 |     ),
31 |     SOLVER=dict(
32 |         CHECKPOINT_PERIOD=10000,
33 |         LR_SCHEDULER=dict(
34 |             MAX_ITER=270000,
35 |             STEPS=(210000, 250000),
36 |         ),
37 |         OPTIMIZER=dict(
38 |             BASE_LR=0.01,
39 |         ),
40 |         IMS_PER_BATCH=16,
41 |     ),
42 |     INPUT=dict(
43 |         AUG=dict(
44 |             TRAIN_PIPELINES=[
45 |                 ("ResizeShortestEdge",
46 |                  dict(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style="choice")),
47 |                 ("RandomFlip", dict()),
48 |             ],
49 |             TEST_PIPELINES=[
50 |                 ("ResizeShortestEdge",
51 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
52 |             ],
53 |         )
54 |     ),
55 |     TEST=dict(
56 |         EVAL_PEROID=10000,
57 |     ),
58 |     OUTPUT_DIR=osp.join(
59 |         '/data/Outputs/model_logs/cvpods_playground',
60 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
61 | )
62 | 
63 | 
64 | class CustomFCOSConfig(FCOSConfig):
65 |     def __init__(self):
66 |         super(CustomFCOSConfig, self).__init__()
67 |         self._register_configuration(_config_dict)
68 | 
69 | 
70 | config = CustomFCOSConfig()
71 | 


--------------------------------------------------------------------------------
/playground/detection/coco/anchor.res50.fpn.coco.800size.3x_ms/fcos.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import math
  3 | from typing import List
  4 | 
  5 | import torch
  6 | import torch.nn.functional as F
  7 | from torch import nn
  8 | 
  9 | from cvpods.layers import ShapeSpec, cat, generalized_batched_nms
 10 | from cvpods.modeling.box_regression import Box2BoxTransform
 11 | from cvpods.modeling.losses import iou_loss, sigmoid_focal_loss_jit
 12 | from cvpods.modeling.meta_arch.retinanet import (
 13 |     permute_to_N_HWA_K,
 14 |     permute_all_cls_and_box_to_N_HWA_K_and_concat
 15 | )
 16 | from cvpods.modeling.postprocessing import detector_postprocess
 17 | from cvpods.structures import Boxes, ImageList, Instances, pairwise_iou
 18 | from cvpods.utils import comm, get_event_storage, log_first_n
 19 | 
 20 | 
 21 | class FCOS(nn.Module):
 22 |     """
 23 |     Implement FCOS (https://arxiv.org/abs/1904.01355).
 24 |     """
 25 |     def __init__(self, cfg):
 26 |         super().__init__()
 27 | 
 28 |         self.device = torch.device(cfg.MODEL.DEVICE)
 29 | 
 30 |         # fmt: off
 31 |         self.num_classes = cfg.MODEL.FCOS.NUM_CLASSES
 32 |         self.in_features = cfg.MODEL.FCOS.IN_FEATURES
 33 |         # Loss parameters:
 34 |         self.focal_loss_alpha = cfg.MODEL.FCOS.FOCAL_LOSS_ALPHA
 35 |         self.focal_loss_gamma = cfg.MODEL.FCOS.FOCAL_LOSS_GAMMA
 36 |         self.iou_loss_type = cfg.MODEL.FCOS.IOU_LOSS_TYPE
 37 |         self.reg_weight = cfg.MODEL.FCOS.REG_WEIGHT
 38 |         # Inference parameters:
 39 |         self.score_threshold = cfg.MODEL.FCOS.SCORE_THRESH_TEST
 40 |         self.topk_candidates = cfg.MODEL.FCOS.TOPK_CANDIDATES_TEST
 41 |         self.nms_threshold = cfg.MODEL.FCOS.NMS_THRESH_TEST
 42 |         self.nms_type = cfg.MODEL.NMS_TYPE
 43 |         self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
 44 |         # fmt: on
 45 | 
 46 |         self.backbone = cfg.build_backbone(
 47 |             cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))
 48 | 
 49 |         backbone_shape = self.backbone.output_shape()
 50 |         feature_shapes = [backbone_shape[f] for f in self.in_features]
 51 |         self.head = FCOSHead(cfg, feature_shapes)
 52 |         self.anchor_generator = cfg.build_anchor_generator(cfg, feature_shapes)
 53 | 
 54 |         # Matching and loss
 55 |         self.box2box_transform = Box2BoxTransform(
 56 |             weights=cfg.MODEL.FCOS.BBOX_REG_WEIGHTS)
 57 |         self.iou_topk = cfg.MODEL.POTO.IOU_TOPK
 58 | 
 59 |         pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
 60 |             3, 1, 1)
 61 |         pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
 62 |             3, 1, 1)
 63 |         self.normalizer = lambda x: (x - pixel_mean) / pixel_std
 64 |         self.to(self.device)
 65 | 
 66 |     def forward(self, batched_inputs):
 67 |         """
 68 |         Args:
 69 |             batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
 70 |                 Each item in the list contains the inputs for one image.
 71 |                 For now, each item in the list is a dict that contains:
 72 | 
 73 |                 * image: Tensor, image in (C, H, W) format.
 74 |                 * instances: Instances
 75 | 
 76 |                 Other information that's included in the original dicts, such as:
 77 | 
 78 |                 * "height", "width" (int): the output resolution of the model, used in inference.
 79 |                     See :meth:`postprocess` for details.
 80 |         Returns:
 81 |             dict[str: Tensor]:
 82 |                 mapping from a named loss to a tensor storing the loss. Used during training only.
 83 |         """
 84 |         images = self.preprocess_image(batched_inputs)
 85 |         if "instances" in batched_inputs[0]:
 86 |             gt_instances = [
 87 |                 x["instances"].to(self.device) for x in batched_inputs
 88 |             ]
 89 |         elif "targets" in batched_inputs[0]:
 90 |             log_first_n(
 91 |                 logging.WARN,
 92 |                 "'targets' in the model inputs is now renamed to 'instances'!",
 93 |                 n=10)
 94 |             gt_instances = [
 95 |                 x["targets"].to(self.device) for x in batched_inputs
 96 |             ]
 97 |         else:
 98 |             gt_instances = None
 99 | 
100 |         features = self.backbone(images.tensor)
101 |         features = [features[f] for f in self.in_features]
102 |         box_cls, box_delta = self.head(features)
103 |         anchors = self.anchor_generator(features)
104 | 
105 |         if self.training:
106 |             gt_classes, gt_anchors_reg_deltas = self.get_ground_truth(
107 |                 anchors, gt_instances)
108 |             return self.losses(gt_classes, gt_anchors_reg_deltas, box_cls,
109 |                                box_delta, anchors)
110 |         else:
111 |             results = self.inference(box_cls, box_delta, anchors, images)
112 |             processed_results = []
113 |             for results_per_image, input_per_image, image_size in zip(
114 |                     results, batched_inputs, images.image_sizes):
115 |                 height = input_per_image.get("height", image_size[0])
116 |                 width = input_per_image.get("width", image_size[1])
117 |                 r = detector_postprocess(results_per_image, height, width)
118 |                 processed_results.append({"instances": r})
119 |             return processed_results
120 | 
121 |     def losses(self, gt_classes, gt_anchors_deltas, pred_class_logits,
122 |                pred_anchor_deltas, anchors):
123 |         """
124 |         Args:
125 |             For `gt_classes` and `gt_anchors_deltas` parameters, see
126 |                 :meth:`FCOS.get_ground_truth`.
127 |             Their shapes are (N, R) and (N, R, 4), respectively, where R is
128 |             the total number of anchors across levels, i.e. sum(Hi x Wi)
129 |             For `pred_class_logits` and `pred_anchor_deltas`, see
130 |                 :meth:`FCOSHead.forward`.
131 | 
132 |         Returns:
133 |             dict[str: Tensor]:
134 |                 mapping from a named loss to a scalar tensor
135 |                 storing the loss. Used during training only. The dict keys are:
136 |                 "loss_cls" and "loss_box_reg"
137 |         """
138 |         pred_class_logits, pred_anchor_deltas = \
139 |             permute_all_cls_and_box_to_N_HWA_K_and_concat(
140 |                 pred_class_logits, pred_anchor_deltas, self.num_classes
141 |             )  # Shapes: (N x R, K) and (N x R, 4), respectively.
142 | 
143 |         gt_classes = gt_classes.flatten()
144 |         gt_anchors_deltas = gt_anchors_deltas.view(-1, 4)
145 | 
146 |         valid_idxs = gt_classes >= 0
147 |         foreground_idxs = (gt_classes >= 0) & (gt_classes != self.num_classes)
148 |         num_foreground = foreground_idxs.sum()
149 | 
150 |         gt_classes_target = torch.zeros_like(pred_class_logits)
151 |         gt_classes_target[foreground_idxs, gt_classes[foreground_idxs]] = 1
152 | 
153 |         num_foreground = comm.all_reduce(num_foreground) / float(comm.get_world_size())
154 | 
155 |         # logits loss
156 |         loss_cls = sigmoid_focal_loss_jit(
157 |             pred_class_logits[valid_idxs],
158 |             gt_classes_target[valid_idxs],
159 |             alpha=self.focal_loss_alpha,
160 |             gamma=self.focal_loss_gamma,
161 |             reduction="sum",
162 |         ) / max(1.0, num_foreground)
163 | 
164 |         anchors = Boxes.cat([Boxes.cat(anchors_i) for anchors_i in anchors])
165 |         pred_anchor_deltas = self.box2box_transform.apply_deltas(
166 |             pred_anchor_deltas, anchors.tensor
167 |         )
168 |         gt_anchors_deltas = self.box2box_transform.apply_deltas(
169 |             gt_anchors_deltas, anchors.tensor
170 |         )
171 | 
172 |         # regression loss
173 |         loss_box_reg = iou_loss(
174 |             pred_anchor_deltas[foreground_idxs],
175 |             gt_anchors_deltas[foreground_idxs],
176 |             box_mode="xyxy",
177 |             loss_type=self.iou_loss_type,
178 |             reduction="sum",
179 |         ) / max(1.0, num_foreground) * self.reg_weight
180 | 
181 |         return {
182 |             "loss_cls": loss_cls,
183 |             "loss_box_reg": loss_box_reg,
184 |         }
185 | 
186 |     @torch.no_grad()
187 |     def get_ground_truth(self, anchors, targets):
188 |         """
189 |         Args:
190 |             anchors (list[list[Boxes]]): a list of N=#image elements. Each is a
191 |                 list of #feature level Boxes. The Boxes contains anchors of
192 |                 this image on the specific feature level.
193 |             targets (list[Instances]): a list of N `Instances`s. The i-th
194 |                 `Instances` contains the ground-truth per-instance annotations
195 |                 for the i-th input image.  Specify `targets` during training only.
196 | 
197 |         Returns:
198 |             gt_classes (Tensor):
199 |                 An integer tensor of shape (N, R) storing ground-truth
200 |                 labels for each anchor.
201 |                 R is the total number of anchors, i.e. the sum of Hi x Wi for all levels.
202 |                 Anchors with an IoU with some target higher than the foreground threshold
203 |                 are assigned their corresponding label in the [0, K-1] range.
204 |                 Anchors whose IoU are below the background threshold are assigned
205 |                 the label "K". Anchors whose IoU are between the foreground and background
206 |                 thresholds are assigned a label "-1", i.e. ignore.
207 |             gt_anchors_deltas (Tensor):
208 |                 Shape (N, R, 4).
209 |                 The last dimension represents ground-truth box2box transform
210 |                 targets (dx, dy, dw, dh) that map each anchor to its matched ground-truth box.
211 |                 The values in the tensor are meaningful only when the corresponding
212 |                 anchor is labeled as foreground.
213 |         """
214 |         gt_classes = []
215 |         gt_anchors_deltas = []
216 | 
217 |         num_fg = 0
218 |         num_gt = 0
219 | 
220 |         for anchors_per_image, targets_per_image in zip(anchors, targets):
221 |             anchors_per_image = Boxes.cat(anchors_per_image)
222 | 
223 |             gt_boxes = targets_per_image.gt_boxes
224 | 
225 |             match_quality_matrix = pairwise_iou(gt_boxes, anchors_per_image)
226 | 
227 |             _, is_positive = match_quality_matrix.topk(self.iou_topk, dim=1)
228 |             is_foreground = torch.zeros_like(
229 |                 match_quality_matrix, dtype=torch.bool
230 |             ).scatter_(1, is_positive, True)
231 | 
232 |             match_quality_matrix[~is_foreground] = -1
233 | 
234 |             # if there are still more than one objects for a position,
235 |             # we choose the one with maximum quality
236 |             anchor_labels, gt_matched_idxs = match_quality_matrix.max(dim=0)
237 | 
238 |             num_fg += (anchor_labels != -1).sum().item()
239 |             num_gt += len(targets_per_image)
240 | 
241 |             # ground truth box regression
242 |             gt_anchors_reg_deltas_i = self.box2box_transform.get_deltas(
243 |                 anchors_per_image.tensor, gt_boxes[gt_matched_idxs].tensor)
244 | 
245 |             # ground truth classes
246 |             has_gt = len(targets_per_image) > 0
247 |             if has_gt:
248 |                 gt_classes_i = targets_per_image.gt_classes[gt_matched_idxs]
249 |                 # Anchors with label -1 are treated as background.
250 |                 gt_classes_i[anchor_labels == -1] = self.num_classes
251 |             else:
252 |                 gt_classes_i = torch.zeros_like(
253 |                     gt_matched_idxs) + self.num_classes
254 | 
255 |             gt_classes.append(gt_classes_i)
256 |             gt_anchors_deltas.append(gt_anchors_reg_deltas_i)
257 | 
258 |         get_event_storage().put_scalar("num_fg_per_gt", num_fg / num_gt)
259 | 
260 |         return torch.stack(gt_classes), torch.stack(gt_anchors_deltas)
261 | 
262 |     def inference(self, box_cls, box_delta, anchors, images):
263 |         """
264 |         Arguments:
265 |             box_cls, box_delta: Same as the output of :meth:`FCOSHead.forward`
266 |             anchors (list[list[Boxes]]): a list of #images elements. Each is a
267 |                 list of #feature level Boxes. The Boxes contain anchors of this
268 |                 image on the specific feature level.
269 |             images (ImageList): the input images
270 | 
271 |         Returns:
272 |             results (List[Instances]): a list of #images elements.
273 |         """
274 |         assert len(anchors) == len(images)
275 |         results = []
276 | 
277 |         box_cls = [permute_to_N_HWA_K(x, self.num_classes) for x in box_cls]
278 |         box_delta = [permute_to_N_HWA_K(x, 4) for x in box_delta]
279 |         # list[Tensor], one per level, each has shape (N, Hi x Wi x A, K or 4)
280 | 
281 |         for img_idx, anchors_per_image in enumerate(anchors):
282 |             image_size = images.image_sizes[img_idx]
283 |             box_cls_per_image = [
284 |                 box_cls_per_level[img_idx] for box_cls_per_level in box_cls
285 |             ]
286 |             box_reg_per_image = [
287 |                 box_reg_per_level[img_idx] for box_reg_per_level in box_delta
288 |             ]
289 |             results_per_image = self.inference_single_image(
290 |                 box_cls_per_image, box_reg_per_image, anchors_per_image,
291 |                 tuple(image_size))
292 |             results.append(results_per_image)
293 |         return results
294 | 
295 |     def inference_single_image(self, box_cls, box_delta, anchors, image_size):
296 |         """
297 |         Single-image inference. Return bounding-box detection results by thresholding
298 |         on scores and applying non-maximum suppression (NMS).
299 | 
300 |         Arguments:
301 |             box_cls (list[Tensor]): list of #feature levels. Each entry contains
302 |                 tensor of size (H x W, K)
303 |             box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4.
304 |             anchors (list[Boxes]): list of #feature levels. Each entry contains
305 |                 a Boxes object, which contains all the anchors for that
306 |                 image in that feature level.
307 |             image_size (tuple(H, W)): a tuple of the image height and width.
308 | 
309 |         Returns:
310 |             Same as `inference`, but for only one image.
311 |         """
312 |         boxes_all = []
313 |         scores_all = []
314 |         class_idxs_all = []
315 | 
316 |         # Iterate over every feature level
317 |         for box_cls_i, box_reg_i, anchors_i in zip(box_cls, box_delta, anchors):
318 |             # (HxWxK,)
319 |             box_cls_i = box_cls_i.sigmoid_().flatten()
320 | 
321 |             # Keep top k top scoring indices only.
322 |             num_topk = min(self.topk_candidates, box_reg_i.size(0))
323 |             # torch.sort is actually faster than .topk (at least on GPUs)
324 |             predicted_prob, topk_idxs = box_cls_i.sort(descending=True)
325 |             predicted_prob = predicted_prob[:num_topk]
326 |             topk_idxs = topk_idxs[:num_topk]
327 | 
328 |             # filter out the proposals with low confidence score
329 |             keep_idxs = predicted_prob > self.score_threshold
330 |             predicted_prob = predicted_prob[keep_idxs]
331 |             topk_idxs = topk_idxs[keep_idxs]
332 | 
333 |             anchor_idxs = topk_idxs // self.num_classes
334 |             classes_idxs = topk_idxs % self.num_classes
335 | 
336 |             box_reg_i = box_reg_i[anchor_idxs]
337 |             anchors_i = anchors_i[anchor_idxs]
338 |             # predict boxes
339 |             predicted_boxes = self.box2box_transform.apply_deltas(
340 |                 box_reg_i, anchors_i.tensor)
341 | 
342 |             boxes_all.append(predicted_boxes)
343 |             scores_all.append(predicted_prob)
344 |             class_idxs_all.append(classes_idxs)
345 | 
346 |         boxes_all, scores_all, class_idxs_all = [
347 |             cat(x) for x in [boxes_all, scores_all, class_idxs_all]
348 |         ]
349 | 
350 |         if self.nms_type is None:
351 |             # strategies above (e.g. topk_candidates and score_threshold) are
352 |             # useless for POTO, just keep them for debug and analysis
353 |             keep = scores_all.argsort(descending=True)
354 |         else:
355 |             keep = generalized_batched_nms(
356 |                 boxes_all, scores_all, class_idxs_all,
357 |                 self.nms_threshold, nms_type=self.nms_type
358 |             )
359 |         keep = keep[:self.max_detections_per_image]
360 | 
361 |         result = Instances(image_size)
362 |         result.pred_boxes = Boxes(boxes_all[keep])
363 |         result.scores = scores_all[keep]
364 |         result.pred_classes = class_idxs_all[keep]
365 |         return result
366 | 
367 |     def preprocess_image(self, batched_inputs):
368 |         """
369 |         Normalize, pad and batch the input images.
370 |         """
371 |         images = [x["image"].to(self.device) for x in batched_inputs]
372 |         images = [self.normalizer(x) for x in images]
373 |         images = ImageList.from_tensors(images,
374 |                                         self.backbone.size_divisibility)
375 |         return images
376 | 
377 |     def _inference_for_ms_test(self, batched_inputs):
378 |         """
379 |         function used for multiscale test, will be refactor in the future.
380 |         The same input with `forward` function.
381 |         """
382 |         assert not self.training, "inference mode with training=True"
383 |         assert len(batched_inputs) == 1, "inference image number > 1"
384 |         images = self.preprocess_image(batched_inputs)
385 | 
386 |         features = self.backbone(images.tensor)
387 |         features = [features[f] for f in self.in_features]
388 |         box_cls, box_delta = self.head(features)
389 |         anchors = self.anchor_generator(features)
390 | 
391 |         results = self.inference(box_cls, box_delta, anchors, images)
392 |         for results_per_image, input_per_image, image_size in zip(
393 |                 results, batched_inputs, images.image_sizes
394 |         ):
395 |             height = input_per_image.get("height", image_size[0])
396 |             width = input_per_image.get("width", image_size[1])
397 |             processed_results = detector_postprocess(results_per_image, height, width)
398 |         return processed_results
399 | 
400 | 
401 | class FCOSHead(nn.Module):
402 |     """
403 |     The head used in FCOS for object classification and box regression.
404 |     It has two subnets for the two tasks, with a common structure but separate parameters.
405 |     """
406 |     def __init__(self, cfg, input_shape: List[ShapeSpec]):
407 |         super().__init__()
408 |         # fmt: off
409 |         in_channels = input_shape[0].channels
410 |         num_classes = cfg.MODEL.FCOS.NUM_CLASSES
411 |         num_convs = cfg.MODEL.FCOS.NUM_CONVS
412 |         prior_prob = cfg.MODEL.FCOS.PRIOR_PROB
413 |         num_anchors = cfg.build_anchor_generator(cfg, input_shape).num_cell_anchors
414 |         self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES
415 |         self.norm_reg_targets = cfg.MODEL.FCOS.NORM_REG_TARGETS
416 |         # fmt: on
417 |         assert len(set(num_anchors)) == 1, "using differenct num_anchors value is not supported"
418 |         num_anchors = num_anchors[0]
419 | 
420 |         cls_subnet = []
421 |         bbox_subnet = []
422 |         for _ in range(num_convs):
423 |             cls_subnet.append(
424 |                 nn.Conv2d(in_channels,
425 |                           in_channels,
426 |                           kernel_size=3,
427 |                           stride=1,
428 |                           padding=1))
429 |             cls_subnet.append(nn.GroupNorm(32, in_channels))
430 |             cls_subnet.append(nn.ReLU())
431 |             bbox_subnet.append(
432 |                 nn.Conv2d(in_channels,
433 |                           in_channels,
434 |                           kernel_size=3,
435 |                           stride=1,
436 |                           padding=1))
437 |             bbox_subnet.append(nn.GroupNorm(32, in_channels))
438 |             bbox_subnet.append(nn.ReLU())
439 | 
440 |         self.cls_subnet = nn.Sequential(*cls_subnet)
441 |         self.bbox_subnet = nn.Sequential(*bbox_subnet)
442 |         self.cls_score = nn.Conv2d(in_channels,
443 |                                    num_anchors * num_classes,
444 |                                    kernel_size=3,
445 |                                    stride=1,
446 |                                    padding=1)
447 |         self.bbox_pred = nn.Conv2d(in_channels,
448 |                                    num_anchors * 4,
449 |                                    kernel_size=3,
450 |                                    stride=1,
451 |                                    padding=1)
452 | 
453 |         # Initialization
454 |         for modules in [
455 |             self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred
456 |         ]:
457 |             for layer in modules.modules():
458 |                 if isinstance(layer, nn.Conv2d):
459 |                     torch.nn.init.normal_(layer.weight, mean=0, std=0.01)
460 |                     torch.nn.init.constant_(layer.bias, 0)
461 |                 if isinstance(layer, nn.GroupNorm):
462 |                     torch.nn.init.constant_(layer.weight, 1)
463 |                     torch.nn.init.constant_(layer.bias, 0)
464 | 
465 |         # Use prior in model initialization to improve stability
466 |         bias_value = -math.log((1 - prior_prob) / prior_prob)
467 |         torch.nn.init.constant_(self.cls_score.bias, bias_value)
468 | 
469 |     def forward(self, features):
470 |         """
471 |         Arguments:
472 |             features (list[Tensor]): FPN feature map tensors in high to low resolution.
473 |                 Each tensor in the list correspond to different feature levels.
474 | 
475 |         Returns:
476 |             logits (list[Tensor]): #lvl tensors, each has shape (N, K, Hi, Wi).
477 |                 The tensor predicts the classification probability
478 |                 at each spatial position for each of the K object classes.
479 |             bbox_reg (list[Tensor]): #lvl tensors, each has shape (N, 4, Hi, Wi).
480 |                 The tensor predicts 4-vector (dl,dt,dr,db) box
481 |                 regression values for every anchor. These values are the
482 |                 relative offset between the anchor and the ground truth box.
483 |         """
484 |         logits = []
485 |         bbox_reg = []
486 |         for feature in features:
487 |             cls_subnet = self.cls_subnet(feature)
488 |             bbox_subnet = self.bbox_subnet(feature)
489 | 
490 |             logits.append(self.cls_score(cls_subnet))
491 |             bbox_reg.append(self.bbox_pred(bbox_subnet))
492 |         return logits, bbox_reg
493 | 


--------------------------------------------------------------------------------
/playground/detection/coco/anchor.res50.fpn.coco.800size.3x_ms/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import DefaultAnchorGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_p5_backbone
 7 | 
 8 | from fcos import FCOS
 9 | 
10 | 
11 | def build_backbone(cfg, input_shape=None):
12 |     """
13 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
14 | 
15 |     Returns:
16 |         an instance of :class:`Backbone`
17 |     """
18 |     if input_shape is None:
19 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
20 | 
21 |     backbone = build_retinanet_resnet_fpn_p5_backbone(cfg, input_shape)
22 |     assert isinstance(backbone, Backbone)
23 |     return backbone
24 | 
25 | 
26 | def build_anchor_generator(cfg, input_shape):
27 | 
28 |     return DefaultAnchorGenerator(cfg, input_shape)
29 | 
30 | 
31 | def build_model(cfg):
32 | 
33 |     cfg.build_backbone = build_backbone
34 |     cfg.build_anchor_generator = build_anchor_generator
35 | 
36 |     model = FCOS(cfg)
37 |     logger = logging.getLogger(__name__)
38 |     logger.info("Model:\n{}".format(model))
39 |     return model
40 | 


--------------------------------------------------------------------------------
/playground/detection/coco/center.res50.fpn.coco.800size.3x_ms/README.md:
--------------------------------------------------------------------------------
 1 | # center.res50.fpn.coco.800size.3x_ms  
 2 | 
 3 | seed: 23440541
 4 | 
 5 | ## Evaluation results for bbox:  
 6 | 
 7 | ```  
 8 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.352
 9 | Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.491
10 | Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.388
11 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.212
12 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.391
13 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.436
14 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.323
15 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.549
16 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.610
17 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.401
18 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.646
19 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.775
20 | ```  
21 | |   AP   |  AP50  |  AP75  |  APs   |  APm   |  APl   |  
22 | |:------:|:------:|:------:|:------:|:------:|:------:|  
23 | | 35.184 | 49.112 | 38.833 | 21.240 | 39.081 | 43.624 |
24 | 
25 | ### Per-category bbox AP:  
26 | 
27 | | category      | AP     | category     | AP     | category       | AP     |  
28 | |:--------------|:-------|:-------------|:-------|:---------------|:-------|  
29 | | person        | 50.745 | bicycle      | 24.736 | car            | 40.273 |  
30 | | motorcycle    | 35.226 | airplane     | 56.666 | bus            | 58.531 |  
31 | | train         | 56.242 | truck        | 30.254 | boat           | 21.643 |  
32 | | traffic light | 23.882 | fire hydrant | 59.002 | stop sign      | 57.892 |  
33 | | parking meter | 38.458 | bench        | 16.190 | bird           | 30.530 |  
34 | | cat           | 56.696 | dog          | 49.404 | horse          | 49.831 |  
35 | | sheep         | 44.098 | cow          | 52.376 | elephant       | 57.041 |  
36 | | bear          | 61.582 | zebra        | 64.655 | giraffe        | 63.143 |  
37 | | backpack      | 12.811 | umbrella     | 32.360 | handbag        | 11.444 |  
38 | | tie           | 27.783 | suitcase     | 31.496 | frisbee        | 57.670 |  
39 | | skis          | 17.429 | snowboard    | 27.009 | sports ball    | 44.850 |  
40 | | kite          | 41.692 | baseball bat | 22.479 | baseball glove | 30.474 |  
41 | | skateboard    | 45.098 | surfboard    | 29.039 | tennis racket  | 39.681 |  
42 | | bottle        | 33.861 | wine glass   | 31.908 | cup            | 34.875 |  
43 | | fork          | 23.307 | knife        | 12.405 | spoon          | 10.374 |  
44 | | bowl          | 35.602 | banana       | 18.808 | apple          | 14.727 |  
45 | | sandwich      | 27.692 | orange       | 29.594 | broccoli       | 17.581 |  
46 | | carrot        | 17.170 | hot dog      | 26.330 | pizza          | 44.958 |  
47 | | donut         | 39.596 | cake         | 27.949 | chair          | 22.646 |  
48 | | couch         | 37.705 | potted plant | 20.811 | bed            | 35.757 |  
49 | | dining table  | 22.243 | toilet       | 53.837 | tv             | 48.742 |  
50 | | laptop        | 47.942 | mouse        | 56.732 | remote         | 23.299 |  
51 | | keyboard      | 43.587 | cell phone   | 28.547 | microwave      | 49.056 |  
52 | | oven          | 27.945 | toaster      | 38.837 | sink           | 31.613 |  
53 | | refrigerator  | 44.433 | book         | 10.468 | clock          | 44.014 |  
54 | | vase          | 34.515 | scissors     | 21.610 | teddy bear     | 36.052 |  
55 | | hair drier    | 3.383  | toothbrush   | 13.791 |                |        |
56 | 


--------------------------------------------------------------------------------
/playground/detection/coco/center.res50.fpn.coco.800size.3x_ms/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         SHIFT_GENERATOR=dict(
10 |             NUM_SHIFTS=1,
11 |             OFFSET=0.5,
12 |         ),
13 |         FCOS=dict(
14 |             NORM_REG_TARGETS=True,
15 |             NMS_THRESH_TEST=1.0,  # disable NMS when NMS threshold is 1.0
16 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
17 |             FOCAL_LOSS_GAMMA=2.0,
18 |             FOCAL_LOSS_ALPHA=0.25,
19 |             IOU_LOSS_TYPE="giou",
20 |             REG_WEIGHT=2.0,
21 |         ),
22 |         POTO=dict(
23 |             OBJECT_SIZES_OF_INTEREST=[
24 |                 [-1, 64],
25 |                 [64, 128],
26 |                 [128, 256],
27 |                 [256, 512],
28 |                 [512, float("inf")],
29 |             ],
30 |             DISTANCE_TOPK=1,
31 |         ),
32 |         NMS_TYPE=None,
33 |     ),
34 |     DATASETS=dict(
35 |         TRAIN=("coco_2017_train",),
36 |         TEST=("coco_2017_val",),
37 |     ),
38 |     SOLVER=dict(
39 |         CHECKPOINT_PERIOD=10000,
40 |         LR_SCHEDULER=dict(
41 |             MAX_ITER=270000,
42 |             STEPS=(210000, 250000),
43 |         ),
44 |         OPTIMIZER=dict(
45 |             BASE_LR=0.01,
46 |         ),
47 |         IMS_PER_BATCH=16,
48 |     ),
49 |     INPUT=dict(
50 |         AUG=dict(
51 |             TRAIN_PIPELINES=[
52 |                 ("ResizeShortestEdge",
53 |                  dict(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style="choice")),
54 |                 ("RandomFlip", dict()),
55 |             ],
56 |             TEST_PIPELINES=[
57 |                 ("ResizeShortestEdge",
58 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
59 |             ],
60 |         )
61 |     ),
62 |     TEST=dict(
63 |         EVAL_PEROID=10000,
64 |     ),
65 |     OUTPUT_DIR=osp.join(
66 |         '/data/Outputs/model_logs/cvpods_playground',
67 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
68 | )
69 | 
70 | 
71 | class CustomFCOSConfig(FCOSConfig):
72 |     def __init__(self):
73 |         super(CustomFCOSConfig, self).__init__()
74 |         self._register_configuration(_config_dict)
75 | 
76 | 
77 | config = CustomFCOSConfig()
78 | 


--------------------------------------------------------------------------------
/playground/detection/coco/center.res50.fpn.coco.800size.3x_ms/fcos.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import math
  3 | from typing import List
  4 | 
  5 | import torch
  6 | import torch.nn.functional as F
  7 | from torch import nn
  8 | 
  9 | from cvpods.layers import ShapeSpec, cat, generalized_batched_nms
 10 | from cvpods.modeling.box_regression import Shift2BoxTransform
 11 | from cvpods.modeling.losses import iou_loss, sigmoid_focal_loss_jit
 12 | from cvpods.modeling.meta_arch.fcos import Scale
 13 | from cvpods.modeling.meta_arch.retinanet import (
 14 |     permute_to_N_HWA_K,
 15 |     permute_all_cls_and_box_to_N_HWA_K_and_concat
 16 | )
 17 | from cvpods.modeling.postprocessing import detector_postprocess
 18 | from cvpods.structures import Boxes, ImageList, Instances
 19 | from cvpods.utils import comm, get_event_storage, log_first_n
 20 | 
 21 | 
 22 | class FCOS(nn.Module):
 23 |     """
 24 |     Implement FCOS (https://arxiv.org/abs/1904.01355).
 25 |     """
 26 |     def __init__(self, cfg):
 27 |         super().__init__()
 28 | 
 29 |         self.device = torch.device(cfg.MODEL.DEVICE)
 30 | 
 31 |         # fmt: off
 32 |         self.num_classes = cfg.MODEL.FCOS.NUM_CLASSES
 33 |         self.in_features = cfg.MODEL.FCOS.IN_FEATURES
 34 |         self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES
 35 |         # Loss parameters:
 36 |         self.focal_loss_alpha = cfg.MODEL.FCOS.FOCAL_LOSS_ALPHA
 37 |         self.focal_loss_gamma = cfg.MODEL.FCOS.FOCAL_LOSS_GAMMA
 38 |         self.iou_loss_type = cfg.MODEL.FCOS.IOU_LOSS_TYPE
 39 |         self.reg_weight = cfg.MODEL.FCOS.REG_WEIGHT
 40 |         # Inference parameters:
 41 |         self.score_threshold = cfg.MODEL.FCOS.SCORE_THRESH_TEST
 42 |         self.topk_candidates = cfg.MODEL.FCOS.TOPK_CANDIDATES_TEST
 43 |         self.nms_threshold = cfg.MODEL.FCOS.NMS_THRESH_TEST
 44 |         self.nms_type = cfg.MODEL.NMS_TYPE
 45 |         self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
 46 |         # fmt: on
 47 | 
 48 |         self.backbone = cfg.build_backbone(
 49 |             cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))
 50 | 
 51 |         backbone_shape = self.backbone.output_shape()
 52 |         feature_shapes = [backbone_shape[f] for f in self.in_features]
 53 |         self.head = FCOSHead(cfg, feature_shapes)
 54 |         self.shift_generator = cfg.build_shift_generator(cfg, feature_shapes)
 55 | 
 56 |         # Matching and loss
 57 |         self.shift2box_transform = Shift2BoxTransform(
 58 |             weights=cfg.MODEL.FCOS.BBOX_REG_WEIGHTS)
 59 |         self.object_sizes_of_interest = cfg.MODEL.POTO.OBJECT_SIZES_OF_INTEREST
 60 |         self.distance_topk = cfg.MODEL.POTO.DISTANCE_TOPK
 61 | 
 62 |         pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
 63 |             3, 1, 1)
 64 |         pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
 65 |             3, 1, 1)
 66 |         self.normalizer = lambda x: (x - pixel_mean) / pixel_std
 67 |         self.to(self.device)
 68 | 
 69 |     def forward(self, batched_inputs):
 70 |         """
 71 |         Args:
 72 |             batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
 73 |                 Each item in the list contains the inputs for one image.
 74 |                 For now, each item in the list is a dict that contains:
 75 | 
 76 |                 * image: Tensor, image in (C, H, W) format.
 77 |                 * instances: Instances
 78 | 
 79 |                 Other information that's included in the original dicts, such as:
 80 | 
 81 |                 * "height", "width" (int): the output resolution of the model, used in inference.
 82 |                     See :meth:`postprocess` for details.
 83 |         Returns:
 84 |             dict[str: Tensor]:
 85 |                 mapping from a named loss to a tensor storing the loss. Used during training only.
 86 |         """
 87 |         images = self.preprocess_image(batched_inputs)
 88 |         if "instances" in batched_inputs[0]:
 89 |             gt_instances = [
 90 |                 x["instances"].to(self.device) for x in batched_inputs
 91 |             ]
 92 |         elif "targets" in batched_inputs[0]:
 93 |             log_first_n(
 94 |                 logging.WARN,
 95 |                 "'targets' in the model inputs is now renamed to 'instances'!",
 96 |                 n=10)
 97 |             gt_instances = [
 98 |                 x["targets"].to(self.device) for x in batched_inputs
 99 |             ]
100 |         else:
101 |             gt_instances = None
102 | 
103 |         features = self.backbone(images.tensor)
104 |         features = [features[f] for f in self.in_features]
105 |         box_cls, box_delta = self.head(features)
106 |         shifts = self.shift_generator(features)
107 | 
108 |         if self.training:
109 |             gt_classes, gt_shifts_reg_deltas = self.get_ground_truth(
110 |                 shifts, gt_instances)
111 |             return self.losses(gt_classes, gt_shifts_reg_deltas, box_cls, box_delta)
112 |         else:
113 |             results = self.inference(box_cls, box_delta, shifts, images)
114 |             processed_results = []
115 |             for results_per_image, input_per_image, image_size in zip(
116 |                     results, batched_inputs, images.image_sizes):
117 |                 height = input_per_image.get("height", image_size[0])
118 |                 width = input_per_image.get("width", image_size[1])
119 |                 r = detector_postprocess(results_per_image, height, width)
120 |                 processed_results.append({"instances": r})
121 |             return processed_results
122 | 
123 |     def losses(self, gt_classes, gt_shifts_deltas, pred_class_logits,
124 |                pred_shift_deltas):
125 |         """
126 |         Args:
127 |             For `gt_classes` and `gt_shifts_deltas` parameters, see
128 |                 :meth:`FCOS.get_ground_truth`.
129 |             Their shapes are (N, R) and (N, R, 4), respectively, where R is
130 |             the total number of shifts across levels, i.e. sum(Hi x Wi)
131 |             For `pred_class_logits` and `pred_shift_deltas`, see
132 |                 :meth:`FCOSHead.forward`.
133 | 
134 |         Returns:
135 |             dict[str: Tensor]:
136 |                 mapping from a named loss to a scalar tensor
137 |                 storing the loss. Used during training only. The dict keys are:
138 |                 "loss_cls" and "loss_box_reg"
139 |         """
140 |         pred_class_logits, pred_shift_deltas = \
141 |             permute_all_cls_and_box_to_N_HWA_K_and_concat(
142 |                 pred_class_logits, pred_shift_deltas, self.num_classes
143 |             )  # Shapes: (N x R, K) and (N x R, 4), respectively.
144 | 
145 |         gt_classes = gt_classes.flatten()
146 |         gt_shifts_deltas = gt_shifts_deltas.view(-1, 4)
147 | 
148 |         valid_idxs = gt_classes >= 0
149 |         foreground_idxs = (gt_classes >= 0) & (gt_classes != self.num_classes)
150 |         num_foreground = foreground_idxs.sum()
151 | 
152 |         gt_classes_target = torch.zeros_like(pred_class_logits)
153 |         gt_classes_target[foreground_idxs, gt_classes[foreground_idxs]] = 1
154 | 
155 |         num_foreground = comm.all_reduce(num_foreground) / float(comm.get_world_size())
156 | 
157 |         # logits loss
158 |         loss_cls = sigmoid_focal_loss_jit(
159 |             pred_class_logits[valid_idxs],
160 |             gt_classes_target[valid_idxs],
161 |             alpha=self.focal_loss_alpha,
162 |             gamma=self.focal_loss_gamma,
163 |             reduction="sum",
164 |         ) / max(1.0, num_foreground)
165 | 
166 |         # regression loss
167 |         loss_box_reg = iou_loss(
168 |             pred_shift_deltas[foreground_idxs],
169 |             gt_shifts_deltas[foreground_idxs],
170 |             box_mode="ltrb",
171 |             loss_type=self.iou_loss_type,
172 |             reduction="sum",
173 |         ) / max(1.0, num_foreground) * self.reg_weight
174 | 
175 |         return {
176 |             "loss_cls": loss_cls,
177 |             "loss_box_reg": loss_box_reg,
178 |         }
179 | 
180 |     @torch.no_grad()
181 |     def get_ground_truth(self, shifts, targets):
182 |         """
183 |         Args:
184 |             shifts (list[list[Tensor]]): a list of N=#image elements. Each is a
185 |                 list of #feature level tensors. The tensors contains shifts of
186 |                 this image on the specific feature level.
187 |             targets (list[Instances]): a list of N `Instances`s. The i-th
188 |                 `Instances` contains the ground-truth per-instance annotations
189 |                 for the i-th input image.  Specify `targets` during training only.
190 | 
191 |         Returns:
192 |             gt_classes (Tensor):
193 |                 An integer tensor of shape (N, R) storing ground-truth
194 |                 labels for each shift.
195 |                 R is the total number of shifts, i.e. the sum of Hi x Wi for all levels.
196 |                 Shifts in the valid boxes are assigned their corresponding label in the
197 |                 [0, K-1] range. Shifts in the background are assigned the label "K".
198 |                 Shifts in the ignore areas are assigned a label "-1", i.e. ignore.
199 |             gt_shifts_deltas (Tensor):
200 |                 Shape (N, R, 4).
201 |                 The last dimension represents ground-truth shift2box transform
202 |                 targets (dl, dt, dr, db) that map each shift to its matched ground-truth box.
203 |                 The values in the tensor are meaningful only when the corresponding
204 |                 shift is labeled as foreground.
205 |         """
206 |         gt_classes = []
207 |         gt_shifts_deltas = []
208 | 
209 |         num_fg = 0
210 |         num_gt = 0
211 | 
212 |         for shifts_per_image, targets_per_image in zip(shifts, targets):
213 |             object_sizes_of_interest = torch.cat([
214 |                 shifts_i.new_tensor(size).unsqueeze(0).expand(
215 |                     shifts_i.size(0), -1) for shifts_i, size in zip(
216 |                         shifts_per_image, self.object_sizes_of_interest)
217 |             ], dim=0)
218 | 
219 |             shifts_over_all_feature_maps = torch.cat(shifts_per_image, dim=0)
220 | 
221 |             gt_boxes = targets_per_image.gt_boxes
222 | 
223 |             max_deltas = self.shift2box_transform.get_deltas(
224 |                 shifts_over_all_feature_maps, gt_boxes.tensor.unsqueeze(1)
225 |             ).max(dim=2).values
226 |             # limit the regression range for each location
227 |             is_cared_in_the_level = \
228 |                 (max_deltas >= object_sizes_of_interest[None, :, 0]) & \
229 |                 (max_deltas <= object_sizes_of_interest[None, :, 1])
230 | 
231 |             candidate_idxs = []
232 |             base = 0
233 |             for stride, shifts_i in zip(self.fpn_strides, shifts_per_image):
234 |                 distances = torch.cdist(gt_boxes.get_centers(), shifts_i)
235 |                 _, topk_idxs = distances.topk(
236 |                     self.distance_topk, dim=1, largest=False)
237 |                 candidate_idxs.append(base + topk_idxs)
238 |                 base += len(shifts_i)
239 |             candidate_idxs = torch.cat(candidate_idxs, dim=1)
240 | 
241 |             is_foreground = torch.zeros_like(
242 |                 is_cared_in_the_level
243 |             ).scatter_(1, candidate_idxs, True)
244 | 
245 |             gt_positions_area = gt_boxes.area().unsqueeze(1).repeat(
246 |                 1, shifts_over_all_feature_maps.size(0))
247 |             gt_positions_area[~is_cared_in_the_level] = math.inf
248 |             gt_positions_area[~is_foreground] = math.inf
249 | 
250 |             # if there are still more than one objects for a position,
251 |             # we choose the one with minimal area
252 |             positions_min_area, gt_matched_idxs = gt_positions_area.min(dim=0)
253 | 
254 |             num_fg += (positions_min_area != math.inf).sum().item()
255 |             num_gt += len(targets_per_image)
256 | 
257 |             # ground truth box regression
258 |             gt_shifts_reg_deltas_i = self.shift2box_transform.get_deltas(
259 |                 shifts_over_all_feature_maps, gt_boxes[gt_matched_idxs].tensor)
260 | 
261 |             # ground truth classes
262 |             has_gt = len(targets_per_image) > 0
263 |             if has_gt:
264 |                 gt_classes_i = targets_per_image.gt_classes[gt_matched_idxs]
265 |                 # Shifts with area inf are treated as background.
266 |                 gt_classes_i[positions_min_area == math.inf] = self.num_classes
267 |             else:
268 |                 gt_classes_i = torch.zeros_like(
269 |                     gt_matched_idxs) + self.num_classes
270 | 
271 |             gt_classes.append(gt_classes_i)
272 |             gt_shifts_deltas.append(gt_shifts_reg_deltas_i)
273 | 
274 |         get_event_storage().put_scalar("num_fg_per_gt", num_fg / num_gt)
275 | 
276 |         return torch.stack(gt_classes), torch.stack(gt_shifts_deltas)
277 | 
278 |     def inference(self, box_cls, box_delta, shifts, images):
279 |         """
280 |         Arguments:
281 |             box_cls, box_delta: Same as the output of :meth:`FCOSHead.forward`
282 |             shifts (list[list[Tensor]): a list of #images elements. Each is a
283 |                 list of #feature level tensor. The tensor contain shifts of this
284 |                 image on the specific feature level.
285 |             images (ImageList): the input images
286 | 
287 |         Returns:
288 |             results (List[Instances]): a list of #images elements.
289 |         """
290 |         assert len(shifts) == len(images)
291 |         results = []
292 | 
293 |         box_cls = [permute_to_N_HWA_K(x, self.num_classes) for x in box_cls]
294 |         box_delta = [permute_to_N_HWA_K(x, 4) for x in box_delta]
295 |         # list[Tensor], one per level, each has shape (N, Hi x Wi x A, K or 4)
296 | 
297 |         for img_idx, shifts_per_image in enumerate(shifts):
298 |             image_size = images.image_sizes[img_idx]
299 |             box_cls_per_image = [
300 |                 box_cls_per_level[img_idx] for box_cls_per_level in box_cls
301 |             ]
302 |             box_reg_per_image = [
303 |                 box_reg_per_level[img_idx] for box_reg_per_level in box_delta
304 |             ]
305 |             results_per_image = self.inference_single_image(
306 |                 box_cls_per_image, box_reg_per_image, shifts_per_image,
307 |                 tuple(image_size))
308 |             results.append(results_per_image)
309 |         return results
310 | 
311 |     def inference_single_image(self, box_cls, box_delta, shifts, image_size):
312 |         """
313 |         Single-image inference. Return bounding-box detection results by thresholding
314 |         on scores and applying non-maximum suppression (NMS).
315 | 
316 |         Arguments:
317 |             box_cls (list[Tensor]): list of #feature levels. Each entry contains
318 |                 tensor of size (H x W, K)
319 |             box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4.
320 |             shifts (list[Tensor]): list of #feature levels. Each entry contains
321 |                 a tensor, which contains all the shifts for that
322 |                 image in that feature level.
323 |             image_size (tuple(H, W)): a tuple of the image height and width.
324 | 
325 |         Returns:
326 |             Same as `inference`, but for only one image.
327 |         """
328 |         boxes_all = []
329 |         scores_all = []
330 |         class_idxs_all = []
331 | 
332 |         # Iterate over every feature level
333 |         for box_cls_i, box_reg_i, shifts_i in zip(box_cls, box_delta, shifts):
334 |             # (HxWxK,)
335 |             box_cls_i = box_cls_i.sigmoid_().flatten()
336 | 
337 |             # Keep top k top scoring indices only.
338 |             num_topk = min(self.topk_candidates, box_reg_i.size(0))
339 |             # torch.sort is actually faster than .topk (at least on GPUs)
340 |             predicted_prob, topk_idxs = box_cls_i.sort(descending=True)
341 |             predicted_prob = predicted_prob[:num_topk]
342 |             topk_idxs = topk_idxs[:num_topk]
343 | 
344 |             # filter out the proposals with low confidence score
345 |             keep_idxs = predicted_prob > self.score_threshold
346 |             predicted_prob = predicted_prob[keep_idxs]
347 |             topk_idxs = topk_idxs[keep_idxs]
348 | 
349 |             shift_idxs = topk_idxs // self.num_classes
350 |             classes_idxs = topk_idxs % self.num_classes
351 | 
352 |             box_reg_i = box_reg_i[shift_idxs]
353 |             shifts_i = shifts_i[shift_idxs]
354 |             # predict boxes
355 |             predicted_boxes = self.shift2box_transform.apply_deltas(
356 |                 box_reg_i, shifts_i)
357 | 
358 |             boxes_all.append(predicted_boxes)
359 |             scores_all.append(predicted_prob)
360 |             class_idxs_all.append(classes_idxs)
361 | 
362 |         boxes_all, scores_all, class_idxs_all = [
363 |             cat(x) for x in [boxes_all, scores_all, class_idxs_all]
364 |         ]
365 | 
366 |         if self.nms_type is None:
367 |             # strategies above (e.g. topk_candidates and score_threshold) are
368 |             # useless for POTO, just keep them for debug and analysis
369 |             keep = scores_all.argsort(descending=True)
370 |         else:
371 |             keep = generalized_batched_nms(
372 |                 boxes_all, scores_all, class_idxs_all,
373 |                 self.nms_threshold, nms_type=self.nms_type
374 |             )
375 |         keep = keep[:self.max_detections_per_image]
376 | 
377 |         result = Instances(image_size)
378 |         result.pred_boxes = Boxes(boxes_all[keep])
379 |         result.scores = scores_all[keep]
380 |         result.pred_classes = class_idxs_all[keep]
381 |         return result
382 | 
383 |     def preprocess_image(self, batched_inputs):
384 |         """
385 |         Normalize, pad and batch the input images.
386 |         """
387 |         images = [x["image"].to(self.device) for x in batched_inputs]
388 |         images = [self.normalizer(x) for x in images]
389 |         images = ImageList.from_tensors(images,
390 |                                         self.backbone.size_divisibility)
391 |         return images
392 | 
393 |     def _inference_for_ms_test(self, batched_inputs):
394 |         """
395 |         function used for multiscale test, will be refactor in the future.
396 |         The same input with `forward` function.
397 |         """
398 |         assert not self.training, "inference mode with training=True"
399 |         assert len(batched_inputs) == 1, "inference image number > 1"
400 |         images = self.preprocess_image(batched_inputs)
401 | 
402 |         features = self.backbone(images.tensor)
403 |         features = [features[f] for f in self.in_features]
404 |         box_cls, box_delta = self.head(features)
405 |         shifts = self.shift_generator(features)
406 | 
407 |         results = self.inference(box_cls, box_delta, shifts, images)
408 |         for results_per_image, input_per_image, image_size in zip(
409 |                 results, batched_inputs, images.image_sizes
410 |         ):
411 |             height = input_per_image.get("height", image_size[0])
412 |             width = input_per_image.get("width", image_size[1])
413 |             processed_results = detector_postprocess(results_per_image, height, width)
414 |         return processed_results
415 | 
416 | 
417 | class FCOSHead(nn.Module):
418 |     """
419 |     The head used in FCOS for object classification and box regression.
420 |     It has two subnets for the two tasks, with a common structure but separate parameters.
421 |     """
422 |     def __init__(self, cfg, input_shape: List[ShapeSpec]):
423 |         super().__init__()
424 |         # fmt: off
425 |         in_channels = input_shape[0].channels
426 |         num_classes = cfg.MODEL.FCOS.NUM_CLASSES
427 |         num_convs = cfg.MODEL.FCOS.NUM_CONVS
428 |         prior_prob = cfg.MODEL.FCOS.PRIOR_PROB
429 |         num_shifts = cfg.build_shift_generator(cfg, input_shape).num_cell_shifts
430 |         self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES
431 |         self.norm_reg_targets = cfg.MODEL.FCOS.NORM_REG_TARGETS
432 |         # fmt: on
433 |         assert len(set(num_shifts)) == 1, "using differenct num_shifts value is not supported"
434 |         num_shifts = num_shifts[0]
435 | 
436 |         cls_subnet = []
437 |         bbox_subnet = []
438 |         for _ in range(num_convs):
439 |             cls_subnet.append(
440 |                 nn.Conv2d(in_channels,
441 |                           in_channels,
442 |                           kernel_size=3,
443 |                           stride=1,
444 |                           padding=1))
445 |             cls_subnet.append(nn.GroupNorm(32, in_channels))
446 |             cls_subnet.append(nn.ReLU())
447 |             bbox_subnet.append(
448 |                 nn.Conv2d(in_channels,
449 |                           in_channels,
450 |                           kernel_size=3,
451 |                           stride=1,
452 |                           padding=1))
453 |             bbox_subnet.append(nn.GroupNorm(32, in_channels))
454 |             bbox_subnet.append(nn.ReLU())
455 | 
456 |         self.cls_subnet = nn.Sequential(*cls_subnet)
457 |         self.bbox_subnet = nn.Sequential(*bbox_subnet)
458 |         self.cls_score = nn.Conv2d(in_channels,
459 |                                    num_shifts * num_classes,
460 |                                    kernel_size=3,
461 |                                    stride=1,
462 |                                    padding=1)
463 |         self.bbox_pred = nn.Conv2d(in_channels,
464 |                                    num_shifts * 4,
465 |                                    kernel_size=3,
466 |                                    stride=1,
467 |                                    padding=1)
468 | 
469 |         # Initialization
470 |         for modules in [
471 |             self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred
472 |         ]:
473 |             for layer in modules.modules():
474 |                 if isinstance(layer, nn.Conv2d):
475 |                     torch.nn.init.normal_(layer.weight, mean=0, std=0.01)
476 |                     torch.nn.init.constant_(layer.bias, 0)
477 |                 if isinstance(layer, nn.GroupNorm):
478 |                     torch.nn.init.constant_(layer.weight, 1)
479 |                     torch.nn.init.constant_(layer.bias, 0)
480 | 
481 |         # Use prior in model initialization to improve stability
482 |         bias_value = -math.log((1 - prior_prob) / prior_prob)
483 |         torch.nn.init.constant_(self.cls_score.bias, bias_value)
484 | 
485 |         self.scales = nn.ModuleList(
486 |             [Scale(init_value=1.0) for _ in range(len(self.fpn_strides))])
487 | 
488 |     def forward(self, features):
489 |         """
490 |         Arguments:
491 |             features (list[Tensor]): FPN feature map tensors in high to low resolution.
492 |                 Each tensor in the list correspond to different feature levels.
493 | 
494 |         Returns:
495 |             logits (list[Tensor]): #lvl tensors, each has shape (N, K, Hi, Wi).
496 |                 The tensor predicts the classification probability
497 |                 at each spatial position for each of the K object classes.
498 |             bbox_reg (list[Tensor]): #lvl tensors, each has shape (N, 4, Hi, Wi).
499 |                 The tensor predicts 4-vector (dl,dt,dr,db) box
500 |                 regression values for every shift. These values are the
501 |                 relative offset between the shift and the ground truth box.
502 |         """
503 |         logits = []
504 |         bbox_reg = []
505 |         for level, feature in enumerate(features):
506 |             cls_subnet = self.cls_subnet(feature)
507 |             bbox_subnet = self.bbox_subnet(feature)
508 | 
509 |             logits.append(self.cls_score(cls_subnet))
510 | 
511 |             bbox_pred = self.scales[level](self.bbox_pred(bbox_subnet))
512 |             if self.norm_reg_targets:
513 |                 bbox_reg.append(F.relu(bbox_pred) * self.fpn_strides[level])
514 |             else:
515 |                 bbox_reg.append(torch.exp(bbox_pred))
516 |         return logits, bbox_reg
517 | 


--------------------------------------------------------------------------------
/playground/detection/coco/center.res50.fpn.coco.800size.3x_ms/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_p5_backbone
 7 | 
 8 | from fcos import FCOS
 9 | 
10 | 
11 | def build_backbone(cfg, input_shape=None):
12 |     """
13 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
14 | 
15 |     Returns:
16 |         an instance of :class:`Backbone`
17 |     """
18 |     if input_shape is None:
19 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
20 | 
21 |     backbone = build_retinanet_resnet_fpn_p5_backbone(cfg, input_shape)
22 |     assert isinstance(backbone, Backbone)
23 |     return backbone
24 | 
25 | 
26 | def build_shift_generator(cfg, input_shape):
27 | 
28 |     return ShiftGenerator(cfg, input_shape)
29 | 
30 | 
31 | def build_model(cfg):
32 | 
33 |     cfg.build_backbone = build_backbone
34 |     cfg.build_shift_generator = build_shift_generator
35 | 
36 |     model = FCOS(cfg)
37 |     logger = logging.getLogger(__name__)
38 |     logger.info("Model:\n{}".format(model))
39 |     return model
40 | 


--------------------------------------------------------------------------------
/playground/detection/coco/fcos.res50.fpn.coco.800size.3x_ms.wo_ctrness/README.md:
--------------------------------------------------------------------------------
 1 | # fcos.res50.fpn.coco.800size.3x_ms.wo_ctrness  
 2 | 
 3 | seed: 47789800
 4 | 
 5 | ## Evaluation results for bbox:  
 6 | 
 7 | ```  
 8 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.409
 9 | Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.602
10 | Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.441
11 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.241
12 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.452
13 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.524
14 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.333
15 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.548
16 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.584
17 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.382
18 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.623
19 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.731
20 | ```  
21 | |   AP   |  AP50  |  AP75  |  APs   |  APm   |  APl   |  
22 | |:------:|:------:|:------:|:------:|:------:|:------:|  
23 | | 40.944 | 60.167 | 44.113 | 24.072 | 45.182 | 52.421 |
24 | 
25 | ### Per-category bbox AP:  
26 | 
27 | | category      | AP     | category     | AP     | category       | AP     |  
28 | |:--------------|:-------|:-------------|:-------|:---------------|:-------|  
29 | | person        | 54.384 | bicycle      | 31.306 | car            | 44.309 |  
30 | | motorcycle    | 42.440 | airplane     | 67.711 | bus            | 66.455 |  
31 | | train         | 64.182 | truck        | 35.696 | boat           | 26.276 |  
32 | | traffic light | 27.771 | fire hydrant | 67.507 | stop sign      | 65.803 |  
33 | | parking meter | 42.677 | bench        | 22.744 | bird           | 36.077 |  
34 | | cat           | 65.877 | dog          | 61.872 | horse          | 55.550 |  
35 | | sheep         | 53.451 | cow          | 58.303 | elephant       | 65.260 |  
36 | | bear          | 72.269 | zebra        | 69.098 | giraffe        | 66.824 |  
37 | | backpack      | 16.122 | umbrella     | 38.972 | handbag        | 15.176 |  
38 | | tie           | 33.290 | suitcase     | 39.192 | frisbee        | 65.786 |  
39 | | skis          | 21.490 | snowboard    | 35.935 | sports ball    | 47.188 |  
40 | | kite          | 44.031 | baseball bat | 28.208 | baseball glove | 36.072 |  
41 | | skateboard    | 52.649 | surfboard    | 31.155 | tennis racket  | 47.645 |  
42 | | bottle        | 38.152 | wine glass   | 37.058 | cup            | 41.280 |  
43 | | fork          | 33.254 | knife        | 14.530 | spoon          | 14.718 |  
44 | | bowl          | 37.720 | banana       | 23.842 | apple          | 19.079 |  
45 | | sandwich      | 33.285 | orange       | 31.455 | broccoli       | 23.420 |  
46 | | carrot        | 19.758 | hot dog      | 32.770 | pizza          | 51.021 |  
47 | | donut         | 45.210 | cake         | 35.831 | chair          | 27.455 |  
48 | | couch         | 43.319 | potted plant | 27.762 | bed            | 40.798 |  
49 | | dining table  | 26.791 | toilet       | 61.554 | tv             | 55.279 |  
50 | | laptop        | 57.426 | mouse        | 62.401 | remote         | 31.136 |  
51 | | keyboard      | 47.982 | cell phone   | 33.081 | microwave      | 55.147 |  
52 | | oven          | 33.120 | toaster      | 36.481 | sink           | 38.436 |  
53 | | refrigerator  | 53.491 | book         | 12.543 | clock          | 48.801 |  
54 | | vase          | 37.689 | scissors     | 26.647 | teddy bear     | 44.404 |  
55 | | hair drier    | 6.932  | toothbrush   | 17.679 |                |        |
56 | 


--------------------------------------------------------------------------------
/playground/detection/coco/fcos.res50.fpn.coco.800size.3x_ms.wo_ctrness/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         SHIFT_GENERATOR=dict(
10 |             NUM_SHIFTS=1,
11 |             OFFSET=0.5,
12 |         ),
13 |         FCOS=dict(
14 |             NORM_REG_TARGETS=True,
15 |             NMS_THRESH_TEST=0.6,
16 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
17 |             FOCAL_LOSS_GAMMA=2.0,
18 |             FOCAL_LOSS_ALPHA=0.25,
19 |             IOU_LOSS_TYPE="giou",
20 |             CENTER_SAMPLING_RADIUS=1.5,
21 |             OBJECT_SIZES_OF_INTEREST=[
22 |                 [-1, 64],
23 |                 [64, 128],
24 |                 [128, 256],
25 |                 [256, 512],
26 |                 [512, float("inf")],
27 |             ],
28 |         ),
29 |     ),
30 |     DATASETS=dict(
31 |         TRAIN=("coco_2017_train",),
32 |         TEST=("coco_2017_val",),
33 |     ),
34 |     SOLVER=dict(
35 |         CHECKPOINT_PERIOD=10000,
36 |         LR_SCHEDULER=dict(
37 |             MAX_ITER=270000,
38 |             STEPS=(210000, 250000),
39 |         ),
40 |         OPTIMIZER=dict(
41 |             BASE_LR=0.01,
42 |         ),
43 |         IMS_PER_BATCH=16,
44 |     ),
45 |     INPUT=dict(
46 |         AUG=dict(
47 |             TRAIN_PIPELINES=[
48 |                 ("ResizeShortestEdge",
49 |                  dict(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style="choice")),
50 |                 ("RandomFlip", dict()),
51 |             ],
52 |             TEST_PIPELINES=[
53 |                 ("ResizeShortestEdge",
54 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
55 |             ],
56 |         )
57 |     ),
58 |     TEST=dict(
59 |         EVAL_PEROID=10000,
60 |     ),
61 |     OUTPUT_DIR=osp.join(
62 |         '/data/Outputs/model_logs/cvpods_playground',
63 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
64 | )
65 | 
66 | 
67 | class CustomFCOSConfig(FCOSConfig):
68 |     def __init__(self):
69 |         super(CustomFCOSConfig, self).__init__()
70 |         self._register_configuration(_config_dict)
71 | 
72 | 
73 | config = CustomFCOSConfig()
74 | 


--------------------------------------------------------------------------------
/playground/detection/coco/fcos.res50.fpn.coco.800size.3x_ms.wo_ctrness/fcos.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import math
  3 | from typing import List
  4 | 
  5 | import torch
  6 | import torch.nn.functional as F
  7 | from torch import nn
  8 | 
  9 | from cvpods.layers import ShapeSpec, cat, generalized_batched_nms
 10 | from cvpods.modeling.box_regression import Shift2BoxTransform
 11 | from cvpods.modeling.losses import iou_loss, sigmoid_focal_loss_jit
 12 | from cvpods.modeling.meta_arch.fcos import Scale
 13 | from cvpods.modeling.meta_arch.retinanet import (
 14 |     permute_to_N_HWA_K,
 15 |     permute_all_cls_and_box_to_N_HWA_K_and_concat
 16 | )
 17 | from cvpods.modeling.postprocessing import detector_postprocess
 18 | from cvpods.structures import Boxes, ImageList, Instances
 19 | from cvpods.utils import comm, log_first_n
 20 | 
 21 | 
 22 | class FCOS(nn.Module):
 23 |     """
 24 |     Implement FCOS (https://arxiv.org/abs/1904.01355).
 25 |     """
 26 |     def __init__(self, cfg):
 27 |         super().__init__()
 28 | 
 29 |         self.device = torch.device(cfg.MODEL.DEVICE)
 30 | 
 31 |         # fmt: off
 32 |         self.num_classes = cfg.MODEL.FCOS.NUM_CLASSES
 33 |         self.in_features = cfg.MODEL.FCOS.IN_FEATURES
 34 |         self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES
 35 |         # Loss parameters:
 36 |         self.focal_loss_alpha = cfg.MODEL.FCOS.FOCAL_LOSS_ALPHA
 37 |         self.focal_loss_gamma = cfg.MODEL.FCOS.FOCAL_LOSS_GAMMA
 38 |         self.iou_loss_type = cfg.MODEL.FCOS.IOU_LOSS_TYPE
 39 |         self.center_sampling_radius = cfg.MODEL.FCOS.CENTER_SAMPLING_RADIUS
 40 |         # Inference parameters:
 41 |         self.score_threshold = cfg.MODEL.FCOS.SCORE_THRESH_TEST
 42 |         self.topk_candidates = cfg.MODEL.FCOS.TOPK_CANDIDATES_TEST
 43 |         self.nms_threshold = cfg.MODEL.FCOS.NMS_THRESH_TEST
 44 |         self.nms_type = cfg.MODEL.NMS_TYPE
 45 |         self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
 46 |         # fmt: on
 47 | 
 48 |         self.backbone = cfg.build_backbone(
 49 |             cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))
 50 | 
 51 |         backbone_shape = self.backbone.output_shape()
 52 |         feature_shapes = [backbone_shape[f] for f in self.in_features]
 53 |         self.head = FCOSHead(cfg, feature_shapes)
 54 |         self.shift_generator = cfg.build_shift_generator(cfg, feature_shapes)
 55 | 
 56 |         # Matching and loss
 57 |         self.shift2box_transform = Shift2BoxTransform(
 58 |             weights=cfg.MODEL.FCOS.BBOX_REG_WEIGHTS)
 59 |         self.object_sizes_of_interest = cfg.MODEL.FCOS.OBJECT_SIZES_OF_INTEREST
 60 | 
 61 |         pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
 62 |             3, 1, 1)
 63 |         pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
 64 |             3, 1, 1)
 65 |         self.normalizer = lambda x: (x - pixel_mean) / pixel_std
 66 |         self.to(self.device)
 67 | 
 68 |     def forward(self, batched_inputs):
 69 |         """
 70 |         Args:
 71 |             batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
 72 |                 Each item in the list contains the inputs for one image.
 73 |                 For now, each item in the list is a dict that contains:
 74 | 
 75 |                 * image: Tensor, image in (C, H, W) format.
 76 |                 * instances: Instances
 77 | 
 78 |                 Other information that's included in the original dicts, such as:
 79 | 
 80 |                 * "height", "width" (int): the output resolution of the model, used in inference.
 81 |                     See :meth:`postprocess` for details.
 82 |         Returns:
 83 |             dict[str: Tensor]:
 84 |                 mapping from a named loss to a tensor storing the loss. Used during training only.
 85 |         """
 86 |         images = self.preprocess_image(batched_inputs)
 87 |         if "instances" in batched_inputs[0]:
 88 |             gt_instances = [
 89 |                 x["instances"].to(self.device) for x in batched_inputs
 90 |             ]
 91 |         elif "targets" in batched_inputs[0]:
 92 |             log_first_n(
 93 |                 logging.WARN,
 94 |                 "'targets' in the model inputs is now renamed to 'instances'!",
 95 |                 n=10)
 96 |             gt_instances = [
 97 |                 x["targets"].to(self.device) for x in batched_inputs
 98 |             ]
 99 |         else:
100 |             gt_instances = None
101 | 
102 |         features = self.backbone(images.tensor)
103 |         features = [features[f] for f in self.in_features]
104 |         box_cls, box_delta = self.head(features)
105 |         shifts = self.shift_generator(features)
106 | 
107 |         if self.training:
108 |             gt_classes, gt_shifts_reg_deltas = self.get_ground_truth(
109 |                 shifts, gt_instances)
110 |             return self.losses(gt_classes, gt_shifts_reg_deltas, box_cls, box_delta)
111 |         else:
112 |             results = self.inference(box_cls, box_delta, shifts, images)
113 |             processed_results = []
114 |             for results_per_image, input_per_image, image_size in zip(
115 |                     results, batched_inputs, images.image_sizes):
116 |                 height = input_per_image.get("height", image_size[0])
117 |                 width = input_per_image.get("width", image_size[1])
118 |                 r = detector_postprocess(results_per_image, height, width)
119 |                 processed_results.append({"instances": r})
120 |             return processed_results
121 | 
122 |     def losses(self, gt_classes, gt_shifts_deltas, pred_class_logits,
123 |                pred_shift_deltas):
124 |         """
125 |         Args:
126 |             For `gt_classes` and `gt_shifts_deltas` parameters, see
127 |                 :meth:`FCOS.get_ground_truth`.
128 |             Their shapes are (N, R) and (N, R, 4), respectively, where R is
129 |             the total number of shifts across levels, i.e. sum(Hi x Wi)
130 |             For `pred_class_logits` and `pred_shift_deltas`, see
131 |                 :meth:`FCOSHead.forward`.
132 | 
133 |         Returns:
134 |             dict[str: Tensor]:
135 |                 mapping from a named loss to a scalar tensor
136 |                 storing the loss. Used during training only. The dict keys are:
137 |                 "loss_cls" and "loss_box_reg"
138 |         """
139 |         pred_class_logits, pred_shift_deltas = \
140 |             permute_all_cls_and_box_to_N_HWA_K_and_concat(
141 |                 pred_class_logits, pred_shift_deltas, self.num_classes
142 |             )  # Shapes: (N x R, K) and (N x R, 4), respectively.
143 | 
144 |         gt_classes = gt_classes.flatten()
145 |         gt_shifts_deltas = gt_shifts_deltas.view(-1, 4)
146 | 
147 |         valid_idxs = gt_classes >= 0
148 |         foreground_idxs = (gt_classes >= 0) & (gt_classes != self.num_classes)
149 |         num_foreground = foreground_idxs.sum()
150 | 
151 |         gt_classes_target = torch.zeros_like(pred_class_logits)
152 |         gt_classes_target[foreground_idxs, gt_classes[foreground_idxs]] = 1
153 | 
154 |         num_foreground = comm.all_reduce(num_foreground) / float(comm.get_world_size())
155 | 
156 |         # logits loss
157 |         loss_cls = sigmoid_focal_loss_jit(
158 |             pred_class_logits[valid_idxs],
159 |             gt_classes_target[valid_idxs],
160 |             alpha=self.focal_loss_alpha,
161 |             gamma=self.focal_loss_gamma,
162 |             reduction="sum",
163 |         ) / max(1.0, num_foreground)
164 | 
165 |         # regression loss
166 |         loss_box_reg = iou_loss(
167 |             pred_shift_deltas[foreground_idxs],
168 |             gt_shifts_deltas[foreground_idxs],
169 |             box_mode="ltrb",
170 |             loss_type=self.iou_loss_type,
171 |             reduction="sum",
172 |         ) / max(1.0, num_foreground)
173 | 
174 |         return {
175 |             "loss_cls": loss_cls,
176 |             "loss_box_reg": loss_box_reg,
177 |         }
178 | 
179 |     @torch.no_grad()
180 |     def get_ground_truth(self, shifts, targets):
181 |         """
182 |         Args:
183 |             shifts (list[list[Tensor]]): a list of N=#image elements. Each is a
184 |                 list of #feature level tensors. The tensors contains shifts of
185 |                 this image on the specific feature level.
186 |             targets (list[Instances]): a list of N `Instances`s. The i-th
187 |                 `Instances` contains the ground-truth per-instance annotations
188 |                 for the i-th input image.  Specify `targets` during training only.
189 | 
190 |         Returns:
191 |             gt_classes (Tensor):
192 |                 An integer tensor of shape (N, R) storing ground-truth
193 |                 labels for each shift.
194 |                 R is the total number of shifts, i.e. the sum of Hi x Wi for all levels.
195 |                 Shifts in the valid boxes are assigned their corresponding label in the
196 |                 [0, K-1] range. Shifts in the background are assigned the label "K".
197 |                 Shifts in the ignore areas are assigned a label "-1", i.e. ignore.
198 |             gt_shifts_deltas (Tensor):
199 |                 Shape (N, R, 4).
200 |                 The last dimension represents ground-truth shift2box transform
201 |                 targets (dl, dt, dr, db) that map each shift to its matched ground-truth box.
202 |                 The values in the tensor are meaningful only when the corresponding
203 |                 shift is labeled as foreground.
204 |         """
205 |         gt_classes = []
206 |         gt_shifts_deltas = []
207 | 
208 |         for shifts_per_image, targets_per_image in zip(shifts, targets):
209 |             object_sizes_of_interest = torch.cat([
210 |                 shifts_i.new_tensor(size).unsqueeze(0).expand(
211 |                     shifts_i.size(0), -1) for shifts_i, size in zip(
212 |                         shifts_per_image, self.object_sizes_of_interest)
213 |             ], dim=0)
214 | 
215 |             shifts_over_all_feature_maps = torch.cat(shifts_per_image, dim=0)
216 | 
217 |             gt_boxes = targets_per_image.gt_boxes
218 | 
219 |             deltas = self.shift2box_transform.get_deltas(
220 |                 shifts_over_all_feature_maps, gt_boxes.tensor.unsqueeze(1))
221 | 
222 |             if self.center_sampling_radius > 0:
223 |                 centers = gt_boxes.get_centers()
224 |                 is_in_boxes = []
225 |                 for stride, shifts_i in zip(self.fpn_strides, shifts_per_image):
226 |                     radius = stride * self.center_sampling_radius
227 |                     center_boxes = torch.cat((
228 |                         torch.max(centers - radius, gt_boxes.tensor[:, :2]),
229 |                         torch.min(centers + radius, gt_boxes.tensor[:, 2:]),
230 |                     ), dim=-1)
231 |                     center_deltas = self.shift2box_transform.get_deltas(
232 |                         shifts_i, center_boxes.unsqueeze(1))
233 |                     is_in_boxes.append(center_deltas.min(dim=-1).values > 0)
234 |                 is_in_boxes = torch.cat(is_in_boxes, dim=1)
235 |             else:
236 |                 # no center sampling, it will use all the locations within a ground-truth box
237 |                 is_in_boxes = deltas.min(dim=-1).values > 0
238 | 
239 |             max_deltas = deltas.max(dim=-1).values
240 |             # limit the regression range for each location
241 |             is_cared_in_the_level = \
242 |                 (max_deltas >= object_sizes_of_interest[None, :, 0]) & \
243 |                 (max_deltas <= object_sizes_of_interest[None, :, 1])
244 | 
245 |             gt_positions_area = gt_boxes.area().unsqueeze(1).repeat(
246 |                 1, shifts_over_all_feature_maps.size(0))
247 |             gt_positions_area[~is_in_boxes] = math.inf
248 |             gt_positions_area[~is_cared_in_the_level] = math.inf
249 | 
250 |             # if there are still more than one objects for a position,
251 |             # we choose the one with minimal area
252 |             positions_min_area, gt_matched_idxs = gt_positions_area.min(dim=0)
253 | 
254 |             # ground truth box regression
255 |             gt_shifts_reg_deltas_i = self.shift2box_transform.get_deltas(
256 |                 shifts_over_all_feature_maps, gt_boxes[gt_matched_idxs].tensor)
257 | 
258 |             # ground truth classes
259 |             has_gt = len(targets_per_image) > 0
260 |             if has_gt:
261 |                 gt_classes_i = targets_per_image.gt_classes[gt_matched_idxs]
262 |                 # Shifts with area inf are treated as background.
263 |                 gt_classes_i[positions_min_area == math.inf] = self.num_classes
264 |             else:
265 |                 gt_classes_i = torch.zeros_like(
266 |                     gt_matched_idxs) + self.num_classes
267 | 
268 |             gt_classes.append(gt_classes_i)
269 |             gt_shifts_deltas.append(gt_shifts_reg_deltas_i)
270 | 
271 |         return torch.stack(gt_classes), torch.stack(gt_shifts_deltas)
272 | 
273 |     def inference(self, box_cls, box_delta, shifts, images):
274 |         """
275 |         Arguments:
276 |             box_cls, box_delta: Same as the output of :meth:`FCOSHead.forward`
277 |             shifts (list[list[Tensor]): a list of #images elements. Each is a
278 |                 list of #feature level tensor. The tensor contain shifts of this
279 |                 image on the specific feature level.
280 |             images (ImageList): the input images
281 | 
282 |         Returns:
283 |             results (List[Instances]): a list of #images elements.
284 |         """
285 |         assert len(shifts) == len(images)
286 |         results = []
287 | 
288 |         box_cls = [permute_to_N_HWA_K(x, self.num_classes) for x in box_cls]
289 |         box_delta = [permute_to_N_HWA_K(x, 4) for x in box_delta]
290 |         # list[Tensor], one per level, each has shape (N, Hi x Wi x A, K or 4)
291 | 
292 |         for img_idx, shifts_per_image in enumerate(shifts):
293 |             image_size = images.image_sizes[img_idx]
294 |             box_cls_per_image = [
295 |                 box_cls_per_level[img_idx] for box_cls_per_level in box_cls
296 |             ]
297 |             box_reg_per_image = [
298 |                 box_reg_per_level[img_idx] for box_reg_per_level in box_delta
299 |             ]
300 |             results_per_image = self.inference_single_image(
301 |                 box_cls_per_image, box_reg_per_image, shifts_per_image,
302 |                 tuple(image_size))
303 |             results.append(results_per_image)
304 |         return results
305 | 
306 |     def inference_single_image(self, box_cls, box_delta, shifts, image_size):
307 |         """
308 |         Single-image inference. Return bounding-box detection results by thresholding
309 |         on scores and applying non-maximum suppression (NMS).
310 | 
311 |         Arguments:
312 |             box_cls (list[Tensor]): list of #feature levels. Each entry contains
313 |                 tensor of size (H x W, K)
314 |             box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4.
315 |             shifts (list[Tensor]): list of #feature levels. Each entry contains
316 |                 a tensor, which contains all the shifts for that
317 |                 image in that feature level.
318 |             image_size (tuple(H, W)): a tuple of the image height and width.
319 | 
320 |         Returns:
321 |             Same as `inference`, but for only one image.
322 |         """
323 |         boxes_all = []
324 |         scores_all = []
325 |         class_idxs_all = []
326 | 
327 |         # Iterate over every feature level
328 |         for box_cls_i, box_reg_i, shifts_i in zip(box_cls, box_delta, shifts):
329 |             # (HxWxK,)
330 |             box_cls_i = box_cls_i.sigmoid_().flatten()
331 | 
332 |             # Keep top k top scoring indices only.
333 |             num_topk = min(self.topk_candidates, box_reg_i.size(0))
334 |             # torch.sort is actually faster than .topk (at least on GPUs)
335 |             predicted_prob, topk_idxs = box_cls_i.sort(descending=True)
336 |             predicted_prob = predicted_prob[:num_topk]
337 |             topk_idxs = topk_idxs[:num_topk]
338 | 
339 |             # filter out the proposals with low confidence score
340 |             keep_idxs = predicted_prob > self.score_threshold
341 |             predicted_prob = predicted_prob[keep_idxs]
342 |             topk_idxs = topk_idxs[keep_idxs]
343 | 
344 |             shift_idxs = topk_idxs // self.num_classes
345 |             classes_idxs = topk_idxs % self.num_classes
346 | 
347 |             box_reg_i = box_reg_i[shift_idxs]
348 |             shifts_i = shifts_i[shift_idxs]
349 |             # predict boxes
350 |             predicted_boxes = self.shift2box_transform.apply_deltas(
351 |                 box_reg_i, shifts_i)
352 | 
353 |             boxes_all.append(predicted_boxes)
354 |             scores_all.append(predicted_prob)
355 |             class_idxs_all.append(classes_idxs)
356 | 
357 |         boxes_all, scores_all, class_idxs_all = [
358 |             cat(x) for x in [boxes_all, scores_all, class_idxs_all]
359 |         ]
360 | 
361 |         keep = generalized_batched_nms(
362 |             boxes_all, scores_all, class_idxs_all,
363 |             self.nms_threshold, nms_type=self.nms_type
364 |         )
365 |         keep = keep[:self.max_detections_per_image]
366 | 
367 |         result = Instances(image_size)
368 |         result.pred_boxes = Boxes(boxes_all[keep])
369 |         result.scores = scores_all[keep]
370 |         result.pred_classes = class_idxs_all[keep]
371 |         return result
372 | 
373 |     def preprocess_image(self, batched_inputs):
374 |         """
375 |         Normalize, pad and batch the input images.
376 |         """
377 |         images = [x["image"].to(self.device) for x in batched_inputs]
378 |         images = [self.normalizer(x) for x in images]
379 |         images = ImageList.from_tensors(images,
380 |                                         self.backbone.size_divisibility)
381 |         return images
382 | 
383 |     def _inference_for_ms_test(self, batched_inputs):
384 |         """
385 |         function used for multiscale test, will be refactor in the future.
386 |         The same input with `forward` function.
387 |         """
388 |         assert not self.training, "inference mode with training=True"
389 |         assert len(batched_inputs) == 1, "inference image number > 1"
390 |         images = self.preprocess_image(batched_inputs)
391 | 
392 |         features = self.backbone(images.tensor)
393 |         features = [features[f] for f in self.in_features]
394 |         box_cls, box_delta = self.head(features)
395 |         shifts = self.shift_generator(features)
396 | 
397 |         results = self.inference(box_cls, box_delta, shifts, images)
398 |         for results_per_image, input_per_image, image_size in zip(
399 |                 results, batched_inputs, images.image_sizes
400 |         ):
401 |             height = input_per_image.get("height", image_size[0])
402 |             width = input_per_image.get("width", image_size[1])
403 |             processed_results = detector_postprocess(results_per_image, height, width)
404 |         return processed_results
405 | 
406 | 
407 | class FCOSHead(nn.Module):
408 |     """
409 |     The head used in FCOS for object classification and box regression.
410 |     It has two subnets for the two tasks, with a common structure but separate parameters.
411 |     """
412 |     def __init__(self, cfg, input_shape: List[ShapeSpec]):
413 |         super().__init__()
414 |         # fmt: off
415 |         in_channels = input_shape[0].channels
416 |         num_classes = cfg.MODEL.FCOS.NUM_CLASSES
417 |         num_convs = cfg.MODEL.FCOS.NUM_CONVS
418 |         prior_prob = cfg.MODEL.FCOS.PRIOR_PROB
419 |         num_shifts = cfg.build_shift_generator(cfg, input_shape).num_cell_shifts
420 |         self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES
421 |         self.norm_reg_targets = cfg.MODEL.FCOS.NORM_REG_TARGETS
422 |         # fmt: on
423 |         assert len(set(num_shifts)) == 1, "using differenct num_shifts value is not supported"
424 |         num_shifts = num_shifts[0]
425 | 
426 |         cls_subnet = []
427 |         bbox_subnet = []
428 |         for _ in range(num_convs):
429 |             cls_subnet.append(
430 |                 nn.Conv2d(in_channels,
431 |                           in_channels,
432 |                           kernel_size=3,
433 |                           stride=1,
434 |                           padding=1))
435 |             cls_subnet.append(nn.GroupNorm(32, in_channels))
436 |             cls_subnet.append(nn.ReLU())
437 |             bbox_subnet.append(
438 |                 nn.Conv2d(in_channels,
439 |                           in_channels,
440 |                           kernel_size=3,
441 |                           stride=1,
442 |                           padding=1))
443 |             bbox_subnet.append(nn.GroupNorm(32, in_channels))
444 |             bbox_subnet.append(nn.ReLU())
445 | 
446 |         self.cls_subnet = nn.Sequential(*cls_subnet)
447 |         self.bbox_subnet = nn.Sequential(*bbox_subnet)
448 |         self.cls_score = nn.Conv2d(in_channels,
449 |                                    num_shifts * num_classes,
450 |                                    kernel_size=3,
451 |                                    stride=1,
452 |                                    padding=1)
453 |         self.bbox_pred = nn.Conv2d(in_channels,
454 |                                    num_shifts * 4,
455 |                                    kernel_size=3,
456 |                                    stride=1,
457 |                                    padding=1)
458 | 
459 |         # Initialization
460 |         for modules in [
461 |             self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred
462 |         ]:
463 |             for layer in modules.modules():
464 |                 if isinstance(layer, nn.Conv2d):
465 |                     torch.nn.init.normal_(layer.weight, mean=0, std=0.01)
466 |                     torch.nn.init.constant_(layer.bias, 0)
467 |                 if isinstance(layer, nn.GroupNorm):
468 |                     torch.nn.init.constant_(layer.weight, 1)
469 |                     torch.nn.init.constant_(layer.bias, 0)
470 | 
471 |         # Use prior in model initialization to improve stability
472 |         bias_value = -math.log((1 - prior_prob) / prior_prob)
473 |         torch.nn.init.constant_(self.cls_score.bias, bias_value)
474 | 
475 |         self.scales = nn.ModuleList(
476 |             [Scale(init_value=1.0) for _ in range(len(self.fpn_strides))])
477 | 
478 |     def forward(self, features):
479 |         """
480 |         Arguments:
481 |             features (list[Tensor]): FPN feature map tensors in high to low resolution.
482 |                 Each tensor in the list correspond to different feature levels.
483 | 
484 |         Returns:
485 |             logits (list[Tensor]): #lvl tensors, each has shape (N, K, Hi, Wi).
486 |                 The tensor predicts the classification probability
487 |                 at each spatial position for each of the K object classes.
488 |             bbox_reg (list[Tensor]): #lvl tensors, each has shape (N, 4, Hi, Wi).
489 |                 The tensor predicts 4-vector (dl,dt,dr,db) box
490 |                 regression values for every shift. These values are the
491 |                 relative offset between the shift and the ground truth box.
492 |         """
493 |         logits = []
494 |         bbox_reg = []
495 |         for level, feature in enumerate(features):
496 |             cls_subnet = self.cls_subnet(feature)
497 |             bbox_subnet = self.bbox_subnet(feature)
498 | 
499 |             logits.append(self.cls_score(cls_subnet))
500 | 
501 |             bbox_pred = self.scales[level](self.bbox_pred(bbox_subnet))
502 |             if self.norm_reg_targets:
503 |                 bbox_reg.append(F.relu(bbox_pred) * self.fpn_strides[level])
504 |             else:
505 |                 bbox_reg.append(torch.exp(bbox_pred))
506 |         return logits, bbox_reg
507 | 


--------------------------------------------------------------------------------
/playground/detection/coco/fcos.res50.fpn.coco.800size.3x_ms.wo_ctrness/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_p5_backbone
 7 | 
 8 | from fcos import FCOS
 9 | 
10 | 
11 | def build_backbone(cfg, input_shape=None):
12 |     """
13 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
14 | 
15 |     Returns:
16 |         an instance of :class:`Backbone`
17 |     """
18 |     if input_shape is None:
19 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
20 | 
21 |     backbone = build_retinanet_resnet_fpn_p5_backbone(cfg, input_shape)
22 |     assert isinstance(backbone, Backbone)
23 |     return backbone
24 | 
25 | 
26 | def build_shift_generator(cfg, input_shape):
27 | 
28 |     return ShiftGenerator(cfg, input_shape)
29 | 
30 | 
31 | def build_model(cfg):
32 | 
33 |     cfg.build_backbone = build_backbone
34 |     cfg.build_shift_generator = build_shift_generator
35 | 
36 |     model = FCOS(cfg)
37 |     logger = logging.getLogger(__name__)
38 |     logger.info("Model:\n{}".format(model))
39 |     return model
40 | 


--------------------------------------------------------------------------------
/playground/detection/coco/fcos.res50.fpn.coco.800size.3x_ms/README.md:
--------------------------------------------------------------------------------
 1 | # fcos.res50.fpn.coco.800size.3x_ms  
 2 | 
 3 | seed: 9476764
 4 | 
 5 | ## Evaluation results for bbox:  
 6 | 
 7 | ```  
 8 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.414
 9 | Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.601
10 | Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.449
11 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.256
12 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.449
13 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.531
14 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.335
15 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.553
16 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.591
17 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.400
18 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.635
19 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.735
20 | ```  
21 | |   AP   |  AP50  |  AP75  |  APs   |  APm   |  APl   |  
22 | |:------:|:------:|:------:|:------:|:------:|:------:|  
23 | | 41.393 | 60.086 | 44.923 | 25.561 | 44.897 | 53.084 |
24 | 
25 | ### Per-category bbox AP:  
26 | 
27 | | category      | AP     | category     | AP     | category       | AP     |  
28 | |:--------------|:-------|:-------------|:-------|:---------------|:-------|  
29 | | person        | 55.999 | bicycle      | 32.520 | car            | 45.318 |  
30 | | motorcycle    | 43.277 | airplane     | 67.218 | bus            | 66.594 |  
31 | | train         | 63.735 | truck        | 37.657 | boat           | 24.362 |  
32 | | traffic light | 27.385 | fire hydrant | 67.430 | stop sign      | 63.445 |  
33 | | parking meter | 43.762 | bench        | 22.987 | bird           | 36.695 |  
34 | | cat           | 67.516 | dog          | 62.411 | horse          | 56.741 |  
35 | | sheep         | 53.373 | cow          | 58.669 | elephant       | 64.608 |  
36 | | bear          | 71.341 | zebra        | 69.199 | giraffe        | 68.521 |  
37 | | backpack      | 16.543 | umbrella     | 38.757 | handbag        | 15.861 |  
38 | | tie           | 32.415 | suitcase     | 39.008 | frisbee        | 68.187 |  
39 | | skis          | 20.592 | snowboard    | 32.193 | sports ball    | 47.290 |  
40 | | kite          | 42.626 | baseball bat | 28.741 | baseball glove | 36.490 |  
41 | | skateboard    | 54.258 | surfboard    | 33.234 | tennis racket  | 49.328 |  
42 | | bottle        | 39.079 | wine glass   | 37.518 | cup            | 42.291 |  
43 | | fork          | 31.993 | knife        | 18.649 | spoon          | 15.694 |  
44 | | bowl          | 41.004 | banana       | 24.253 | apple          | 19.303 |  
45 | | sandwich      | 31.717 | orange       | 31.743 | broccoli       | 23.667 |  
46 | | carrot        | 21.484 | hot dog      | 31.344 | pizza          | 52.775 |  
47 | | donut         | 46.693 | cake         | 37.320 | chair          | 28.833 |  
48 | | couch         | 44.514 | potted plant | 28.510 | bed            | 38.643 |  
49 | | dining table  | 26.747 | toilet       | 59.289 | tv             | 55.466 |  
50 | | laptop        | 57.641 | mouse        | 62.759 | remote         | 31.570 |  
51 | | keyboard      | 47.522 | cell phone   | 35.813 | microwave      | 52.229 |  
52 | | oven          | 32.445 | toaster      | 41.552 | sink           | 36.470 |  
53 | | refrigerator  | 53.942 | book         | 13.845 | clock          | 48.035 |  
54 | | vase          | 36.108 | scissors     | 26.815 | teddy bear     | 47.294 |  
55 | | hair drier    | 13.241 | toothbrush   | 19.316 |                |        |
56 | 


--------------------------------------------------------------------------------
/playground/detection/coco/fcos.res50.fpn.coco.800size.3x_ms/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         SHIFT_GENERATOR=dict(
10 |             NUM_SHIFTS=1,
11 |             OFFSET=0.5,
12 |         ),
13 |         FCOS=dict(
14 |             CENTERNESS_ON_REG=True,
15 |             NORM_REG_TARGETS=True,
16 |             NMS_THRESH_TEST=0.6,
17 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
18 |             FOCAL_LOSS_GAMMA=2.0,
19 |             FOCAL_LOSS_ALPHA=0.25,
20 |             IOU_LOSS_TYPE="giou",
21 |             CENTER_SAMPLING_RADIUS=1.5,
22 |             OBJECT_SIZES_OF_INTEREST=[
23 |                 [-1, 64],
24 |                 [64, 128],
25 |                 [128, 256],
26 |                 [256, 512],
27 |                 [512, float("inf")],
28 |             ],
29 |         ),
30 |     ),
31 |     DATASETS=dict(
32 |         TRAIN=("coco_2017_train",),
33 |         TEST=("coco_2017_val",),
34 |     ),
35 |     SOLVER=dict(
36 |         CHECKPOINT_PERIOD=10000,
37 |         LR_SCHEDULER=dict(
38 |             MAX_ITER=270000,
39 |             STEPS=(210000, 250000),
40 |         ),
41 |         OPTIMIZER=dict(
42 |             BASE_LR=0.01,
43 |         ),
44 |         IMS_PER_BATCH=16,
45 |     ),
46 |     INPUT=dict(
47 |         AUG=dict(
48 |             TRAIN_PIPELINES=[
49 |                 ("ResizeShortestEdge",
50 |                  dict(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style="choice")),
51 |                 ("RandomFlip", dict()),
52 |             ],
53 |             TEST_PIPELINES=[
54 |                 ("ResizeShortestEdge",
55 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
56 |             ],
57 |         )
58 |     ),
59 |     TEST=dict(
60 |         EVAL_PEROID=10000,
61 |     ),
62 |     OUTPUT_DIR=osp.join(
63 |         '/data/Outputs/model_logs/cvpods_playground',
64 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
65 | )
66 | 
67 | 
68 | class CustomFCOSConfig(FCOSConfig):
69 |     def __init__(self):
70 |         super(CustomFCOSConfig, self).__init__()
71 |         self._register_configuration(_config_dict)
72 | 
73 | 
74 | config = CustomFCOSConfig()
75 | 


--------------------------------------------------------------------------------
/playground/detection/coco/fcos.res50.fpn.coco.800size.3x_ms/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_p5_backbone
 7 | from cvpods.modeling.meta_arch.fcos import FCOS
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     """
12 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
13 | 
14 |     Returns:
15 |         an instance of :class:`Backbone`
16 |     """
17 |     if input_shape is None:
18 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
19 | 
20 |     backbone = build_retinanet_resnet_fpn_p5_backbone(cfg, input_shape)
21 |     assert isinstance(backbone, Backbone)
22 |     return backbone
23 | 
24 | 
25 | def build_shift_generator(cfg, input_shape):
26 | 
27 |     return ShiftGenerator(cfg, input_shape)
28 | 
29 | 
30 | def build_model(cfg):
31 | 
32 |     cfg.build_backbone = build_backbone
33 |     cfg.build_shift_generator = build_shift_generator
34 | 
35 |     model = FCOS(cfg)
36 |     logger = logging.getLogger(__name__)
37 |     logger.info("Model:\n{}".format(model))
38 |     return model
39 | 


--------------------------------------------------------------------------------
/playground/detection/coco/loss.res50.fpn.coco.800size.3x_ms/README.md:
--------------------------------------------------------------------------------
 1 | # loss.res50.fpn.coco.800size.3x_ms  
 2 | 
 3 | seed: 3751988
 4 | 
 5 | ## Evaluation results for bbox:  
 6 | 
 7 | ```  
 8 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.387
 9 | Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.549
10 | Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.427
11 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.238
12 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.424
13 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.489
14 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.327
15 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.565
16 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.622
17 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.419
18 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.656
19 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.788
20 | ```  
21 | |   AP   |  AP50  |  AP75  |  APs   |  APm   |  APl   |  
22 | |:------:|:------:|:------:|:------:|:------:|:------:|  
23 | | 38.708 | 54.872 | 42.708 | 23.793 | 42.364 | 48.888 |
24 | 
25 | ### Per-category bbox AP:  
26 | 
27 | | category      | AP     | category     | AP     | category       | AP     |  
28 | |:--------------|:-------|:-------------|:-------|:---------------|:-------|  
29 | | person        | 55.156 | bicycle      | 28.500 | car            | 44.210 |  
30 | | motorcycle    | 40.619 | airplane     | 64.635 | bus            | 62.010 |  
31 | | train         | 60.690 | truck        | 31.329 | boat           | 24.446 |  
32 | | traffic light | 28.411 | fire hydrant | 63.136 | stop sign      | 63.268 |  
33 | | parking meter | 39.984 | bench        | 22.074 | bird           | 34.874 |  
34 | | cat           | 61.970 | dog          | 57.768 | horse          | 55.772 |  
35 | | sheep         | 51.466 | cow          | 57.623 | elephant       | 62.707 |  
36 | | bear          | 66.627 | zebra        | 67.822 | giraffe        | 67.217 |  
37 | | backpack      | 13.496 | umbrella     | 37.257 | handbag        | 13.215 |  
38 | | tie           | 30.037 | suitcase     | 35.837 | frisbee        | 63.655 |  
39 | | skis          | 20.689 | snowboard    | 26.305 | sports ball    | 48.801 |  
40 | | kite          | 42.445 | baseball bat | 22.402 | baseball glove | 33.640 |  
41 | | skateboard    | 48.489 | surfboard    | 30.267 | tennis racket  | 45.932 |  
42 | | bottle        | 37.132 | wine glass   | 34.082 | cup            | 39.278 |  
43 | | fork          | 26.000 | knife        | 14.181 | spoon          | 14.024 |  
44 | | bowl          | 37.208 | banana       | 23.155 | apple          | 18.371 |  
45 | | sandwich      | 31.738 | orange       | 30.707 | broccoli       | 23.113 |  
46 | | carrot        | 20.558 | hot dog      | 31.242 | pizza          | 46.054 |  
47 | | donut         | 45.652 | cake         | 34.416 | chair          | 25.191 |  
48 | | couch         | 39.924 | potted plant | 24.988 | bed            | 36.558 |  
49 | | dining table  | 26.308 | toilet       | 56.805 | tv             | 53.605 |  
50 | | laptop        | 51.866 | mouse        | 58.877 | remote         | 25.243 |  
51 | | keyboard      | 48.115 | cell phone   | 30.290 | microwave      | 55.947 |  
52 | | oven          | 31.628 | toaster      | 28.743 | sink           | 33.973 |  
53 | | refrigerator  | 48.122 | book         | 12.652 | clock          | 47.065 |  
54 | | vase          | 35.688 | scissors     | 25.522 | teddy bear     | 42.451 |  
55 | | hair drier    | 8.780  | toothbrush   | 16.637 |                |        |
56 | 


--------------------------------------------------------------------------------
/playground/detection/coco/loss.res50.fpn.coco.800size.3x_ms/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         SHIFT_GENERATOR=dict(
10 |             NUM_SHIFTS=1,
11 |             OFFSET=0.5,
12 |         ),
13 |         FCOS=dict(
14 |             NORM_REG_TARGETS=True,
15 |             NMS_THRESH_TEST=1.0,  # disable NMS when NMS threshold is 1.0
16 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
17 |             FOCAL_LOSS_GAMMA=2.0,
18 |             FOCAL_LOSS_ALPHA=0.25,
19 |             IOU_LOSS_TYPE="giou",
20 |             REG_WEIGHT=2.0,
21 |         ),
22 |         POTO=dict(
23 |             CENTER_SAMPLING_RADIUS=1.5,
24 |         ),
25 |         NMS_TYPE=None,
26 |     ),
27 |     DATASETS=dict(
28 |         TRAIN=("coco_2017_train",),
29 |         TEST=("coco_2017_val",),
30 |     ),
31 |     SOLVER=dict(
32 |         CHECKPOINT_PERIOD=10000,
33 |         LR_SCHEDULER=dict(
34 |             MAX_ITER=270000,
35 |             STEPS=(210000, 250000),
36 |         ),
37 |         OPTIMIZER=dict(
38 |             BASE_LR=0.01,
39 |         ),
40 |         IMS_PER_BATCH=16,
41 |     ),
42 |     INPUT=dict(
43 |         AUG=dict(
44 |             TRAIN_PIPELINES=[
45 |                 ("ResizeShortestEdge",
46 |                  dict(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style="choice")),
47 |                 ("RandomFlip", dict()),
48 |             ],
49 |             TEST_PIPELINES=[
50 |                 ("ResizeShortestEdge",
51 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
52 |             ],
53 |         )
54 |     ),
55 |     TEST=dict(
56 |         EVAL_PEROID=10000,
57 |     ),
58 |     OUTPUT_DIR=osp.join(
59 |         '/data/Outputs/model_logs/cvpods_playground',
60 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
61 | )
62 | 
63 | 
64 | class CustomFCOSConfig(FCOSConfig):
65 |     def __init__(self):
66 |         super(CustomFCOSConfig, self).__init__()
67 |         self._register_configuration(_config_dict)
68 | 
69 | 
70 | config = CustomFCOSConfig()
71 | 


--------------------------------------------------------------------------------
/playground/detection/coco/loss.res50.fpn.coco.800size.3x_ms/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_p5_backbone
 7 | 
 8 | from fcos import FCOS
 9 | 
10 | 
11 | def build_backbone(cfg, input_shape=None):
12 |     """
13 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
14 | 
15 |     Returns:
16 |         an instance of :class:`Backbone`
17 |     """
18 |     if input_shape is None:
19 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
20 | 
21 |     backbone = build_retinanet_resnet_fpn_p5_backbone(cfg, input_shape)
22 |     assert isinstance(backbone, Backbone)
23 |     return backbone
24 | 
25 | 
26 | def build_shift_generator(cfg, input_shape):
27 | 
28 |     return ShiftGenerator(cfg, input_shape)
29 | 
30 | 
31 | def build_model(cfg):
32 | 
33 |     cfg.build_backbone = build_backbone
34 |     cfg.build_shift_generator = build_shift_generator
35 | 
36 |     model = FCOS(cfg)
37 |     logger = logging.getLogger(__name__)
38 |     logger.info("Model:\n{}".format(model))
39 |     return model
40 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms.3dmf.aux/README.md:
--------------------------------------------------------------------------------
 1 | # poto.res50.fpn.coco.800size.3x_ms.3dmf.aux  
 2 | 
 3 | seed: 9905538
 4 | 
 5 | ## Evaluation results for bbox:  
 6 | 
 7 | ```  
 8 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.414
 9 | Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.595
10 | Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.456
11 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.261
12 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.449
13 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.520
14 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.331
15 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.565
16 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.615
17 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.423
18 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.648
19 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.759
20 | ```  
21 | |   AP   |  AP50  |  AP75  |  APs   |  APm   |  APl   |  
22 | |:------:|:------:|:------:|:------:|:------:|:------:|  
23 | | 41.443 | 59.520 | 45.650 | 26.075 | 44.914 | 52.025 |
24 | 
25 | ### Per-category bbox AP:  
26 | 
27 | | category      | AP     | category     | AP     | category       | AP     |  
28 | |:--------------|:-------|:-------------|:-------|:---------------|:-------|  
29 | | person        | 56.121 | bicycle      | 31.877 | car            | 46.088 |  
30 | | motorcycle    | 44.040 | airplane     | 64.505 | bus            | 67.009 |  
31 | | train         | 64.725 | truck        | 36.420 | boat           | 26.455 |  
32 | | traffic light | 28.310 | fire hydrant | 66.097 | stop sign      | 66.077 |  
33 | | parking meter | 41.362 | bench        | 23.638 | bird           | 36.667 |  
34 | | cat           | 65.117 | dog          | 60.452 | horse          | 56.984 |  
35 | | sheep         | 54.113 | cow          | 59.500 | elephant       | 66.082 |  
36 | | bear          | 72.626 | zebra        | 69.089 | giraffe        | 68.849 |  
37 | | backpack      | 16.112 | umbrella     | 39.704 | handbag        | 16.575 |  
38 | | tie           | 32.851 | suitcase     | 39.014 | frisbee        | 66.197 |  
39 | | skis          | 23.084 | snowboard    | 32.775 | sports ball    | 49.004 |  
40 | | kite          | 43.987 | baseball bat | 25.279 | baseball glove | 37.227 |  
41 | | skateboard    | 53.510 | surfboard    | 33.053 | tennis racket  | 47.605 |  
42 | | bottle        | 38.319 | wine glass   | 36.563 | cup            | 43.496 |  
43 | | fork          | 32.541 | knife        | 19.178 | spoon          | 15.184 |  
44 | | bowl          | 41.310 | banana       | 25.229 | apple          | 19.220 |  
45 | | sandwich      | 34.396 | orange       | 30.466 | broccoli       | 22.790 |  
46 | | carrot        | 22.295 | hot dog      | 33.964 | pizza          | 50.737 |  
47 | | donut         | 48.532 | cake         | 36.915 | chair          | 28.639 |  
48 | | couch         | 42.555 | potted plant | 27.923 | bed            | 41.984 |  
49 | | dining table  | 28.917 | toilet       | 61.171 | tv             | 55.547 |  
50 | | laptop        | 57.712 | mouse        | 62.472 | remote         | 31.074 |  
51 | | keyboard      | 46.964 | cell phone   | 35.703 | microwave      | 56.487 |  
52 | | oven          | 36.069 | toaster      | 30.171 | sink           | 35.591 |  
53 | | refrigerator  | 52.934 | book         | 14.796 | clock          | 52.402 |  
54 | | vase          | 40.550 | scissors     | 23.294 | teddy bear     | 45.177 |  
55 | | hair drier    | 8.282  | toothbrush   | 19.696 |                |        |
56 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms.3dmf.aux/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         SHIFT_GENERATOR=dict(
10 |             NUM_SHIFTS=1,
11 |             OFFSET=0.5,
12 |         ),
13 |         FCOS=dict(
14 |             NORM_REG_TARGETS=True,
15 |             NMS_THRESH_TEST=1.0,  # disable NMS when NMS threshold is 1.0
16 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
17 |             FOCAL_LOSS_GAMMA=2.0,
18 |             FOCAL_LOSS_ALPHA=0.25,
19 |             IOU_LOSS_TYPE="giou",
20 |             REG_WEIGHT=2.0,
21 |         ),
22 |         POTO=dict(
23 |             ALPHA=0.8,
24 |             CENTER_SAMPLING_RADIUS=1.5,
25 |             AUX_TOPK=9,
26 |             FILTER_KERNEL_SIZE=3,
27 |             FILTER_TAU=2,
28 |         ),
29 |     ),
30 |     DATASETS=dict(
31 |         TRAIN=("coco_2017_train",),
32 |         TEST=("coco_2017_val",),
33 |     ),
34 |     SOLVER=dict(
35 |         CHECKPOINT_PERIOD=10000,
36 |         LR_SCHEDULER=dict(
37 |             MAX_ITER=270000,
38 |             STEPS=(210000, 250000),
39 |         ),
40 |         OPTIMIZER=dict(
41 |             BASE_LR=0.01,
42 |         ),
43 |         IMS_PER_BATCH=16,
44 |     ),
45 |     INPUT=dict(
46 |         AUG=dict(
47 |             TRAIN_PIPELINES=[
48 |                 ("ResizeShortestEdge",
49 |                  dict(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style="choice")),
50 |                 ("RandomFlip", dict()),
51 |             ],
52 |             TEST_PIPELINES=[
53 |                 ("ResizeShortestEdge",
54 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
55 |             ],
56 |         )
57 |     ),
58 |     TEST=dict(
59 |         EVAL_PEROID=10000,
60 |     ),
61 |     OUTPUT_DIR=osp.join(
62 |         '/data/Outputs/model_logs/cvpods_playground',
63 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
64 | )
65 | 
66 | 
67 | class CustomFCOSConfig(FCOSConfig):
68 |     def __init__(self):
69 |         super(CustomFCOSConfig, self).__init__()
70 |         self._register_configuration(_config_dict)
71 | 
72 | 
73 | config = CustomFCOSConfig()
74 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms.3dmf.aux/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_p5_backbone
 7 | 
 8 | from fcos import FCOS
 9 | 
10 | 
11 | def build_backbone(cfg, input_shape=None):
12 |     """
13 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
14 | 
15 |     Returns:
16 |         an instance of :class:`Backbone`
17 |     """
18 |     if input_shape is None:
19 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
20 | 
21 |     backbone = build_retinanet_resnet_fpn_p5_backbone(cfg, input_shape)
22 |     assert isinstance(backbone, Backbone)
23 |     return backbone
24 | 
25 | 
26 | def build_shift_generator(cfg, input_shape):
27 | 
28 |     return ShiftGenerator(cfg, input_shape)
29 | 
30 | 
31 | def build_model(cfg):
32 | 
33 |     cfg.build_backbone = build_backbone
34 |     cfg.build_shift_generator = build_shift_generator
35 | 
36 |     model = FCOS(cfg)
37 |     logger = logging.getLogger(__name__)
38 |     logger.info("Model:\n{}".format(model))
39 |     return model
40 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms.3dmf/README.md:
--------------------------------------------------------------------------------
 1 | # poto.res50.fpn.coco.800size.3x_ms.3dmf  
 2 | 
 3 | seed: 47909290
 4 | 
 5 | ## Evaluation results for bbox:  
 6 | 
 7 | ```  
 8 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.406
 9 | Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.580
10 | Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.447
11 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.261
12 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.442
13 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.508
14 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.330
15 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.565
16 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.616
17 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.432
18 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.650
19 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.772
20 | ```  
21 | |   AP   |  AP50  |  AP75  |  APs   |  APm   |  APl   |  
22 | |:------:|:------:|:------:|:------:|:------:|:------:|  
23 | | 40.575 | 57.958 | 44.748 | 26.136 | 44.203 | 50.830 |
24 | 
25 | ### Per-category bbox AP:  
26 | 
27 | | category      | AP     | category     | AP     | category       | AP     |  
28 | |:--------------|:-------|:-------------|:-------|:---------------|:-------|  
29 | | person        | 56.011 | bicycle      | 30.611 | car            | 45.575 |  
30 | | motorcycle    | 44.060 | airplane     | 64.475 | bus            | 66.127 |  
31 | | train         | 61.872 | truck        | 34.914 | boat           | 26.082 |  
32 | | traffic light | 27.353 | fire hydrant | 67.141 | stop sign      | 62.605 |  
33 | | parking meter | 37.636 | bench        | 21.800 | bird           | 36.636 |  
34 | | cat           | 63.188 | dog          | 58.236 | horse          | 56.319 |  
35 | | sheep         | 52.617 | cow          | 59.001 | elephant       | 63.909 |  
36 | | bear          | 71.724 | zebra        | 69.096 | giraffe        | 69.357 |  
37 | | backpack      | 15.925 | umbrella     | 38.355 | handbag        | 15.242 |  
38 | | tie           | 31.785 | suitcase     | 38.321 | frisbee        | 65.553 |  
39 | | skis          | 22.458 | snowboard    | 29.646 | sports ball    | 49.683 |  
40 | | kite          | 45.033 | baseball bat | 26.191 | baseball glove | 35.489 |  
41 | | skateboard    | 51.098 | surfboard    | 33.133 | tennis racket  | 47.758 |  
42 | | bottle        | 38.607 | wine glass   | 35.685 | cup            | 41.064 |  
43 | | fork          | 31.083 | knife        | 16.317 | spoon          | 16.181 |  
44 | | bowl          | 39.595 | banana       | 23.849 | apple          | 18.303 |  
45 | | sandwich      | 35.337 | orange       | 31.568 | broccoli       | 21.855 |  
46 | | carrot        | 22.811 | hot dog      | 32.806 | pizza          | 47.969 |  
47 | | donut         | 46.088 | cake         | 36.073 | chair          | 27.704 |  
48 | | couch         | 41.137 | potted plant | 29.213 | bed            | 41.475 |  
49 | | dining table  | 28.768 | toilet       | 58.593 | tv             | 56.197 |  
50 | | laptop        | 55.163 | mouse        | 62.155 | remote         | 28.710 |  
51 | | keyboard      | 47.542 | cell phone   | 33.312 | microwave      | 57.314 |  
52 | | oven          | 36.153 | toaster      | 30.023 | sink           | 34.236 |  
53 | | refrigerator  | 52.814 | book         | 14.329 | clock          | 50.840 |  
54 | | vase          | 38.593 | scissors     | 20.232 | teddy bear     | 42.617 |  
55 | | hair drier    | 9.686  | toothbrush   | 22.022 |                |        |
56 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms.3dmf/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         SHIFT_GENERATOR=dict(
10 |             NUM_SHIFTS=1,
11 |             OFFSET=0.5,
12 |         ),
13 |         FCOS=dict(
14 |             NORM_REG_TARGETS=True,
15 |             NMS_THRESH_TEST=1.0,  # disable NMS when NMS threshold is 1.0
16 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
17 |             FOCAL_LOSS_GAMMA=2.0,
18 |             FOCAL_LOSS_ALPHA=0.25,
19 |             IOU_LOSS_TYPE="giou",
20 |             REG_WEIGHT=2.0,
21 |         ),
22 |         POTO=dict(
23 |             ALPHA=0.8,
24 |             CENTER_SAMPLING_RADIUS=1.5,
25 |             FILTER_KERNEL_SIZE=3,
26 |             FILTER_TAU=2,
27 |         ),
28 |         NMS_TYPE=None,
29 |     ),
30 |     DATASETS=dict(
31 |         TRAIN=("coco_2017_train",),
32 |         TEST=("coco_2017_val",),
33 |     ),
34 |     SOLVER=dict(
35 |         CHECKPOINT_PERIOD=10000,
36 |         LR_SCHEDULER=dict(
37 |             MAX_ITER=270000,
38 |             STEPS=(210000, 250000),
39 |         ),
40 |         OPTIMIZER=dict(
41 |             BASE_LR=0.01,
42 |         ),
43 |         IMS_PER_BATCH=16,
44 |     ),
45 |     INPUT=dict(
46 |         AUG=dict(
47 |             TRAIN_PIPELINES=[
48 |                 ("ResizeShortestEdge",
49 |                  dict(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style="choice")),
50 |                 ("RandomFlip", dict()),
51 |             ],
52 |             TEST_PIPELINES=[
53 |                 ("ResizeShortestEdge",
54 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
55 |             ],
56 |         )
57 |     ),
58 |     TEST=dict(
59 |         EVAL_PEROID=10000,
60 |     ),
61 |     OUTPUT_DIR=osp.join(
62 |         '/data/Outputs/model_logs/cvpods_playground',
63 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
64 | )
65 | 
66 | 
67 | class CustomFCOSConfig(FCOSConfig):
68 |     def __init__(self):
69 |         super(CustomFCOSConfig, self).__init__()
70 |         self._register_configuration(_config_dict)
71 | 
72 | 
73 | config = CustomFCOSConfig()
74 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms.3dmf/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_p5_backbone
 7 | 
 8 | from fcos import FCOS
 9 | 
10 | 
11 | def build_backbone(cfg, input_shape=None):
12 |     """
13 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
14 | 
15 |     Returns:
16 |         an instance of :class:`Backbone`
17 |     """
18 |     if input_shape is None:
19 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
20 | 
21 |     backbone = build_retinanet_resnet_fpn_p5_backbone(cfg, input_shape)
22 |     assert isinstance(backbone, Backbone)
23 |     return backbone
24 | 
25 | 
26 | def build_shift_generator(cfg, input_shape):
27 | 
28 |     return ShiftGenerator(cfg, input_shape)
29 | 
30 | 
31 | def build_model(cfg):
32 | 
33 |     cfg.build_backbone = build_backbone
34 |     cfg.build_shift_generator = build_shift_generator
35 | 
36 |     model = FCOS(cfg)
37 |     logger = logging.getLogger(__name__)
38 |     logger.info("Model:\n{}".format(model))
39 |     return model
40 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms.3dmf_wo_gn.aux/README.md:
--------------------------------------------------------------------------------
 1 | # poto.res50.fpn.coco.800size.3x_ms.3dmf_wo_gn.aux  
 2 | 
 3 | seed: 48196309
 4 | 
 5 | ## Evaluation results for bbox:  
 6 | 
 7 | ```  
 8 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.415
 9 | Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.596
10 | Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.455
11 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.264
12 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.447
13 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.528
14 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.334
15 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.566
16 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.615
17 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.426
18 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.645
19 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.780
20 | ```  
21 | |   AP   |  AP50  |  AP75  |  APs   |  APm   |  APl   |  
22 | |:------:|:------:|:------:|:------:|:------:|:------:|  
23 | | 41.537 | 59.599 | 45.482 | 26.398 | 44.711 | 52.790 |
24 | 
25 | ### Per-category bbox AP:  
26 | 
27 | | category      | AP     | category     | AP     | category       | AP     |  
28 | |:--------------|:-------|:-------------|:-------|:---------------|:-------|  
29 | | person        | 56.022 | bicycle      | 31.748 | car            | 45.843 |  
30 | | motorcycle    | 43.949 | airplane     | 64.532 | bus            | 65.273 |  
31 | | train         | 64.257 | truck        | 34.849 | boat           | 26.335 |  
32 | | traffic light | 28.817 | fire hydrant | 68.268 | stop sign      | 64.870 |  
33 | | parking meter | 45.673 | bench        | 23.891 | bird           | 36.187 |  
34 | | cat           | 65.462 | dog          | 61.240 | horse          | 57.789 |  
35 | | sheep         | 54.343 | cow          | 59.442 | elephant       | 64.308 |  
36 | | bear          | 71.876 | zebra        | 68.594 | giraffe        | 69.073 |  
37 | | backpack      | 15.893 | umbrella     | 40.820 | handbag        | 15.851 |  
38 | | tie           | 34.247 | suitcase     | 39.213 | frisbee        | 67.964 |  
39 | | skis          | 22.769 | snowboard    | 32.849 | sports ball    | 50.388 |  
40 | | kite          | 43.838 | baseball bat | 26.389 | baseball glove | 36.528 |  
41 | | skateboard    | 50.424 | surfboard    | 33.322 | tennis racket  | 47.540 |  
42 | | bottle        | 39.076 | wine glass   | 36.604 | cup            | 42.789 |  
43 | | fork          | 32.285 | knife        | 19.467 | spoon          | 16.573 |  
44 | | bowl          | 40.635 | banana       | 25.418 | apple          | 18.561 |  
45 | | sandwich      | 33.519 | orange       | 32.752 | broccoli       | 24.165 |  
46 | | carrot        | 21.626 | hot dog      | 33.744 | pizza          | 49.567 |  
47 | | donut         | 47.892 | cake         | 37.923 | chair          | 27.820 |  
48 | | couch         | 43.414 | potted plant | 27.061 | bed            | 42.497 |  
49 | | dining table  | 29.552 | toilet       | 60.603 | tv             | 55.802 |  
50 | | laptop        | 57.497 | mouse        | 62.402 | remote         | 30.887 |  
51 | | keyboard      | 47.057 | cell phone   | 34.426 | microwave      | 59.429 |  
52 | | oven          | 34.282 | toaster      | 26.914 | sink           | 37.059 |  
53 | | refrigerator  | 56.400 | book         | 14.374 | clock          | 51.286 |  
54 | | vase          | 39.864 | scissors     | 24.366 | teddy bear     | 44.885 |  
55 | | hair drier    | 6.745  | toothbrush   | 25.060 |                |        |
56 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms.3dmf_wo_gn.aux/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         SHIFT_GENERATOR=dict(
10 |             NUM_SHIFTS=1,
11 |             OFFSET=0.5,
12 |         ),
13 |         FCOS=dict(
14 |             NORM_REG_TARGETS=True,
15 |             NMS_THRESH_TEST=1.0,  # disable NMS when NMS threshold is 1.0
16 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
17 |             FOCAL_LOSS_GAMMA=2.0,
18 |             FOCAL_LOSS_ALPHA=0.25,
19 |             IOU_LOSS_TYPE="giou",
20 |             REG_WEIGHT=2.0,
21 |         ),
22 |         POTO=dict(
23 |             ALPHA=0.8,
24 |             CENTER_SAMPLING_RADIUS=1.5,
25 |             AUX_TOPK=9,
26 |             FILTER_KERNEL_SIZE=3,
27 |             FILTER_TAU=2,
28 |         ),
29 |     ),
30 |     DATASETS=dict(
31 |         TRAIN=("coco_2017_train",),
32 |         TEST=("coco_2017_val",),
33 |     ),
34 |     SOLVER=dict(
35 |         CHECKPOINT_PERIOD=10000,
36 |         LR_SCHEDULER=dict(
37 |             MAX_ITER=270000,
38 |             STEPS=(210000, 250000),
39 |         ),
40 |         OPTIMIZER=dict(
41 |             BASE_LR=0.01,
42 |         ),
43 |         IMS_PER_BATCH=16,
44 |     ),
45 |     INPUT=dict(
46 |         AUG=dict(
47 |             TRAIN_PIPELINES=[
48 |                 ("ResizeShortestEdge",
49 |                  dict(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style="choice")),
50 |                 ("RandomFlip", dict()),
51 |             ],
52 |             TEST_PIPELINES=[
53 |                 ("ResizeShortestEdge",
54 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
55 |             ],
56 |         )
57 |     ),
58 |     TEST=dict(
59 |         EVAL_PEROID=10000,
60 |     ),
61 |     OUTPUT_DIR=osp.join(
62 |         '/data/Outputs/model_logs/cvpods_playground',
63 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
64 | )
65 | 
66 | 
67 | class CustomFCOSConfig(FCOSConfig):
68 |     def __init__(self):
69 |         super(CustomFCOSConfig, self).__init__()
70 |         self._register_configuration(_config_dict)
71 | 
72 | 
73 | config = CustomFCOSConfig()
74 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms.3dmf_wo_gn.aux/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_p5_backbone
 7 | 
 8 | from fcos import FCOS
 9 | 
10 | 
11 | def build_backbone(cfg, input_shape=None):
12 |     """
13 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
14 | 
15 |     Returns:
16 |         an instance of :class:`Backbone`
17 |     """
18 |     if input_shape is None:
19 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
20 | 
21 |     backbone = build_retinanet_resnet_fpn_p5_backbone(cfg, input_shape)
22 |     assert isinstance(backbone, Backbone)
23 |     return backbone
24 | 
25 | 
26 | def build_shift_generator(cfg, input_shape):
27 | 
28 |     return ShiftGenerator(cfg, input_shape)
29 | 
30 | 
31 | def build_model(cfg):
32 | 
33 |     cfg.build_backbone = build_backbone
34 |     cfg.build_shift_generator = build_shift_generator
35 | 
36 |     model = FCOS(cfg)
37 |     logger = logging.getLogger(__name__)
38 |     logger.info("Model:\n{}".format(model))
39 |     return model
40 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms.3dmf_wo_gn/README.md:
--------------------------------------------------------------------------------
 1 | # poto.res50.fpn.coco.800size.3x_ms.3dmf_wo_gn  
 2 | 
 3 | seed: 20416029
 4 | 
 5 | ## Evaluation results for bbox:  
 6 | 
 7 | ```  
 8 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.409
 9 | Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.584
10 | Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.451
11 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.250
12 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.442
13 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.509
14 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.333
15 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.568
16 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.620
17 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.427
18 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.651
19 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.781
20 | ```  
21 | |   AP   |  AP50  |  AP75  |  APs   |  APm   |  APl   |  
22 | |:------:|:------:|:------:|:------:|:------:|:------:|  
23 | | 40.899 | 58.400 | 45.096 | 25.013 | 44.239 | 50.856 |
24 | 
25 | ### Per-category bbox AP:  
26 | 
27 | | category      | AP     | category     | AP     | category       | AP     |  
28 | |:--------------|:-------|:-------------|:-------|:---------------|:-------|  
29 | | person        | 56.046 | bicycle      | 30.989 | car            | 45.579 |  
30 | | motorcycle    | 43.111 | airplane     | 65.106 | bus            | 65.405 |  
31 | | train         | 62.920 | truck        | 34.734 | boat           | 25.357 |  
32 | | traffic light | 28.115 | fire hydrant | 66.877 | stop sign      | 64.047 |  
33 | | parking meter | 42.062 | bench        | 22.379 | bird           | 36.070 |  
34 | | cat           | 64.435 | dog          | 59.563 | horse          | 56.937 |  
35 | | sheep         | 53.341 | cow          | 59.032 | elephant       | 66.209 |  
36 | | bear          | 74.630 | zebra        | 70.397 | giraffe        | 68.291 |  
37 | | backpack      | 14.514 | umbrella     | 40.210 | handbag        | 16.376 |  
38 | | tie           | 33.069 | suitcase     | 39.480 | frisbee        | 65.160 |  
39 | | skis          | 24.172 | snowboard    | 29.357 | sports ball    | 49.727 |  
40 | | kite          | 44.820 | baseball bat | 28.152 | baseball glove | 35.731 |  
41 | | skateboard    | 52.100 | surfboard    | 32.695 | tennis racket  | 46.665 |  
42 | | bottle        | 37.841 | wine glass   | 35.470 | cup            | 40.993 |  
43 | | fork          | 30.267 | knife        | 18.351 | spoon          | 14.851 |  
44 | | bowl          | 39.647 | banana       | 25.875 | apple          | 18.507 |  
45 | | sandwich      | 32.898 | orange       | 31.784 | broccoli       | 22.977 |  
46 | | carrot        | 22.787 | hot dog      | 34.321 | pizza          | 49.750 |  
47 | | donut         | 46.930 | cake         | 36.820 | chair          | 27.396 |  
48 | | couch         | 42.319 | potted plant | 26.744 | bed            | 40.382 |  
49 | | dining table  | 27.526 | toilet       | 61.973 | tv             | 55.405 |  
50 | | laptop        | 55.293 | mouse        | 62.157 | remote         | 30.304 |  
51 | | keyboard      | 47.354 | cell phone   | 32.881 | microwave      | 52.690 |  
52 | | oven          | 35.054 | toaster      | 35.407 | sink           | 34.457 |  
53 | | refrigerator  | 52.187 | book         | 14.121 | clock          | 50.530 |  
54 | | vase          | 39.453 | scissors     | 20.537 | teddy bear     | 43.979 |  
55 | | hair drier    | 7.872  | toothbrush   | 21.980 |                |        |
56 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms.3dmf_wo_gn/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         SHIFT_GENERATOR=dict(
10 |             NUM_SHIFTS=1,
11 |             OFFSET=0.5,
12 |         ),
13 |         FCOS=dict(
14 |             NORM_REG_TARGETS=True,
15 |             NMS_THRESH_TEST=1.0,  # disable NMS when NMS threshold is 1.0
16 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
17 |             FOCAL_LOSS_GAMMA=2.0,
18 |             FOCAL_LOSS_ALPHA=0.25,
19 |             IOU_LOSS_TYPE="giou",
20 |             REG_WEIGHT=2.0,
21 |         ),
22 |         POTO=dict(
23 |             ALPHA=0.8,
24 |             CENTER_SAMPLING_RADIUS=1.5,
25 |             FILTER_KERNEL_SIZE=3,
26 |             FILTER_TAU=2,
27 |         ),
28 |         NMS_TYPE=None,
29 |     ),
30 |     DATASETS=dict(
31 |         TRAIN=("coco_2017_train",),
32 |         TEST=("coco_2017_val",),
33 |     ),
34 |     SOLVER=dict(
35 |         CHECKPOINT_PERIOD=10000,
36 |         LR_SCHEDULER=dict(
37 |             MAX_ITER=270000,
38 |             STEPS=(210000, 250000),
39 |         ),
40 |         OPTIMIZER=dict(
41 |             BASE_LR=0.01,
42 |         ),
43 |         IMS_PER_BATCH=16,
44 |     ),
45 |     INPUT=dict(
46 |         AUG=dict(
47 |             TRAIN_PIPELINES=[
48 |                 ("ResizeShortestEdge",
49 |                  dict(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style="choice")),
50 |                 ("RandomFlip", dict()),
51 |             ],
52 |             TEST_PIPELINES=[
53 |                 ("ResizeShortestEdge",
54 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
55 |             ],
56 |         )
57 |     ),
58 |     TEST=dict(
59 |         EVAL_PEROID=10000,
60 |     ),
61 |     OUTPUT_DIR=osp.join(
62 |         '/data/Outputs/model_logs/cvpods_playground',
63 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
64 | )
65 | 
66 | 
67 | class CustomFCOSConfig(FCOSConfig):
68 |     def __init__(self):
69 |         super(CustomFCOSConfig, self).__init__()
70 |         self._register_configuration(_config_dict)
71 | 
72 | 
73 | config = CustomFCOSConfig()
74 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms.3dmf_wo_gn/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_p5_backbone
 7 | 
 8 | from fcos import FCOS
 9 | 
10 | 
11 | def build_backbone(cfg, input_shape=None):
12 |     """
13 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
14 | 
15 |     Returns:
16 |         an instance of :class:`Backbone`
17 |     """
18 |     if input_shape is None:
19 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
20 | 
21 |     backbone = build_retinanet_resnet_fpn_p5_backbone(cfg, input_shape)
22 |     assert isinstance(backbone, Backbone)
23 |     return backbone
24 | 
25 | 
26 | def build_shift_generator(cfg, input_shape):
27 | 
28 |     return ShiftGenerator(cfg, input_shape)
29 | 
30 | 
31 | def build_model(cfg):
32 | 
33 |     cfg.build_backbone = build_backbone
34 |     cfg.build_shift_generator = build_shift_generator
35 | 
36 |     model = FCOS(cfg)
37 |     logger = logging.getLogger(__name__)
38 |     logger.info("Model:\n{}".format(model))
39 |     return model
40 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms.argmax/README.md:
--------------------------------------------------------------------------------
 1 | # poto.res50.fpn.coco.800size.3x_ms.argmax  
 2 | 
 3 | seed: 28371048
 4 | 
 5 | ## Evaluation results for bbox:  
 6 | 
 7 | ```  
 8 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.392
 9 | Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.565
10 | Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.429
11 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.251
12 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.425
13 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.486
14 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.332
15 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.561
16 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.611
17 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.425
18 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.642
19 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.771
20 | ```  
21 | |   AP   |  AP50  |  AP75  |  APs   |  APm   |  APl   |  
22 | |:------:|:------:|:------:|:------:|:------:|:------:|  
23 | | 39.173 | 56.541 | 42.864 | 25.063 | 42.544 | 48.556 |
24 | 
25 | ### Per-category bbox AP:  
26 | 
27 | | category      | AP     | category     | AP     | category       | AP     |  
28 | |:--------------|:-------|:-------------|:-------|:---------------|:-------|  
29 | | person        | 55.369 | bicycle      | 29.024 | car            | 44.949 |  
30 | | motorcycle    | 40.755 | airplane     | 61.428 | bus            | 63.706 |  
31 | | train         | 63.010 | truck        | 33.339 | boat           | 24.766 |  
32 | | traffic light | 27.866 | fire hydrant | 65.189 | stop sign      | 63.710 |  
33 | | parking meter | 38.060 | bench        | 21.041 | bird           | 34.159 |  
34 | | cat           | 62.558 | dog          | 58.592 | horse          | 53.840 |  
35 | | sheep         | 51.898 | cow          | 57.720 | elephant       | 63.070 |  
36 | | bear          | 65.021 | zebra        | 68.255 | giraffe        | 66.300 |  
37 | | backpack      | 15.401 | umbrella     | 36.794 | handbag        | 13.792 |  
38 | | tie           | 32.077 | suitcase     | 36.793 | frisbee        | 63.497 |  
39 | | skis          | 20.846 | snowboard    | 29.080 | sports ball    | 49.068 |  
40 | | kite          | 45.513 | baseball bat | 23.079 | baseball glove | 35.174 |  
41 | | skateboard    | 48.845 | surfboard    | 31.265 | tennis racket  | 44.957 |  
42 | | bottle        | 37.024 | wine glass   | 36.082 | cup            | 39.056 |  
43 | | fork          | 28.172 | knife        | 15.247 | spoon          | 11.683 |  
44 | | bowl          | 38.294 | banana       | 22.372 | apple          | 17.241 |  
45 | | sandwich      | 32.986 | orange       | 29.593 | broccoli       | 22.183 |  
46 | | carrot        | 20.558 | hot dog      | 30.988 | pizza          | 46.334 |  
47 | | donut         | 45.065 | cake         | 33.891 | chair          | 25.763 |  
48 | | couch         | 38.314 | potted plant | 24.476 | bed            | 38.726 |  
49 | | dining table  | 26.741 | toilet       | 59.266 | tv             | 54.473 |  
50 | | laptop        | 52.434 | mouse        | 60.845 | remote         | 27.208 |  
51 | | keyboard      | 47.558 | cell phone   | 31.518 | microwave      | 54.648 |  
52 | | oven          | 33.369 | toaster      | 37.296 | sink           | 35.792 |  
53 | | refrigerator  | 50.387 | book         | 13.957 | clock          | 50.504 |  
54 | | vase          | 36.301 | scissors     | 22.354 | teddy bear     | 40.904 |  
55 | | hair drier    | 3.695  | toothbrush   | 20.750 |                |        |
56 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms.argmax/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         SHIFT_GENERATOR=dict(
10 |             NUM_SHIFTS=1,
11 |             OFFSET=0.5,
12 |         ),
13 |         FCOS=dict(
14 |             NORM_REG_TARGETS=True,
15 |             NMS_THRESH_TEST=1.0,  # disable NMS when NMS threshold is 1.0
16 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
17 |             FOCAL_LOSS_GAMMA=2.0,
18 |             FOCAL_LOSS_ALPHA=0.25,
19 |             IOU_LOSS_TYPE="giou",
20 |             REG_WEIGHT=2.0,
21 |         ),
22 |         POTO=dict(
23 |             ALPHA=0.8,
24 |             CENTER_SAMPLING_RADIUS=1.5,
25 |         ),
26 |         NMS_TYPE=None,
27 |     ),
28 |     DATASETS=dict(
29 |         TRAIN=("coco_2017_train",),
30 |         TEST=("coco_2017_val",),
31 |     ),
32 |     SOLVER=dict(
33 |         CHECKPOINT_PERIOD=10000,
34 |         LR_SCHEDULER=dict(
35 |             MAX_ITER=270000,
36 |             STEPS=(210000, 250000),
37 |         ),
38 |         OPTIMIZER=dict(
39 |             BASE_LR=0.01,
40 |         ),
41 |         IMS_PER_BATCH=16,
42 |     ),
43 |     INPUT=dict(
44 |         AUG=dict(
45 |             TRAIN_PIPELINES=[
46 |                 ("ResizeShortestEdge",
47 |                  dict(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style="choice")),
48 |                 ("RandomFlip", dict()),
49 |             ],
50 |             TEST_PIPELINES=[
51 |                 ("ResizeShortestEdge",
52 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
53 |             ],
54 |         )
55 |     ),
56 |     TEST=dict(
57 |         EVAL_PEROID=10000,
58 |     ),
59 |     OUTPUT_DIR=osp.join(
60 |         '/data/Outputs/model_logs/cvpods_playground',
61 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
62 | )
63 | 
64 | 
65 | class CustomFCOSConfig(FCOSConfig):
66 |     def __init__(self):
67 |         super(CustomFCOSConfig, self).__init__()
68 |         self._register_configuration(_config_dict)
69 | 
70 | 
71 | config = CustomFCOSConfig()
72 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms.argmax/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_p5_backbone
 7 | 
 8 | from fcos import FCOS
 9 | 
10 | 
11 | def build_backbone(cfg, input_shape=None):
12 |     """
13 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
14 | 
15 |     Returns:
16 |         an instance of :class:`Backbone`
17 |     """
18 |     if input_shape is None:
19 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
20 | 
21 |     backbone = build_retinanet_resnet_fpn_p5_backbone(cfg, input_shape)
22 |     assert isinstance(backbone, Backbone)
23 |     return backbone
24 | 
25 | 
26 | def build_shift_generator(cfg, input_shape):
27 | 
28 |     return ShiftGenerator(cfg, input_shape)
29 | 
30 | 
31 | def build_model(cfg):
32 | 
33 |     cfg.build_backbone = build_backbone
34 |     cfg.build_shift_generator = build_shift_generator
35 | 
36 |     model = FCOS(cfg)
37 |     logger = logging.getLogger(__name__)
38 |     logger.info("Model:\n{}".format(model))
39 |     return model
40 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms/README.md:
--------------------------------------------------------------------------------
 1 | # poto.res50.fpn.coco.800size.3x_ms  
 2 | 
 3 | seed: 46353074
 4 | 
 5 | ## Evaluation results for bbox:  
 6 | 
 7 | ```  
 8 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.392
 9 | Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.565
10 | Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.427
11 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.246
12 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.428
13 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.494
14 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.331
15 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.564
16 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.617
17 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.430
18 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.645
19 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.769
20 | ```  
21 | |   AP   |  AP50  |  AP75  |  APs   |  APm   |  APl   |  
22 | |:------:|:------:|:------:|:------:|:------:|:------:|  
23 | | 39.177 | 56.537 | 42.744 | 24.604 | 42.761 | 49.431 |
24 | 
25 | ### Per-category bbox AP:  
26 | 
27 | | category      | AP     | category     | AP     | category       | AP     |  
28 | |:--------------|:-------|:-------------|:-------|:---------------|:-------|  
29 | | person        | 55.077 | bicycle      | 28.963 | car            | 44.556 |  
30 | | motorcycle    | 40.411 | airplane     | 62.600 | bus            | 64.460 |  
31 | | train         | 61.501 | truck        | 32.743 | boat           | 24.498 |  
32 | | traffic light | 27.148 | fire hydrant | 66.437 | stop sign      | 66.019 |  
33 | | parking meter | 41.497 | bench        | 22.068 | bird           | 32.931 |  
34 | | cat           | 63.692 | dog          | 55.942 | horse          | 53.431 |  
35 | | sheep         | 49.970 | cow          | 57.115 | elephant       | 61.551 |  
36 | | bear          | 68.866 | zebra        | 69.122 | giraffe        | 67.020 |  
37 | | backpack      | 13.829 | umbrella     | 35.503 | handbag        | 13.864 |  
38 | | tie           | 30.570 | suitcase     | 35.649 | frisbee        | 63.528 |  
39 | | skis          | 21.487 | snowboard    | 29.169 | sports ball    | 48.539 |  
40 | | kite          | 43.850 | baseball bat | 25.057 | baseball glove | 33.055 |  
41 | | skateboard    | 49.974 | surfboard    | 32.123 | tennis racket  | 45.815 |  
42 | | bottle        | 36.810 | wine glass   | 33.975 | cup            | 39.115 |  
43 | | fork          | 28.607 | knife        | 15.567 | spoon          | 13.952 |  
44 | | bowl          | 39.732 | banana       | 22.992 | apple          | 18.159 |  
45 | | sandwich      | 32.600 | orange       | 30.111 | broccoli       | 22.669 |  
46 | | carrot        | 20.644 | hot dog      | 29.884 | pizza          | 48.627 |  
47 | | donut         | 47.200 | cake         | 33.947 | chair          | 25.801 |  
48 | | couch         | 39.867 | potted plant | 24.210 | bed            | 38.148 |  
49 | | dining table  | 26.551 | toilet       | 57.943 | tv             | 53.389 |  
50 | | laptop        | 51.368 | mouse        | 61.005 | remote         | 27.733 |  
51 | | keyboard      | 49.490 | cell phone   | 31.307 | microwave      | 54.264 |  
52 | | oven          | 32.144 | toaster      | 26.384 | sink           | 34.782 |  
53 | | refrigerator  | 51.548 | book         | 12.528 | clock          | 49.467 |  
54 | | vase          | 36.095 | scissors     | 22.305 | teddy bear     | 43.729 |  
55 | | hair drier    | 8.966  | toothbrush   | 20.880 |                |        |
56 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         SHIFT_GENERATOR=dict(
10 |             NUM_SHIFTS=1,
11 |             OFFSET=0.5,
12 |         ),
13 |         FCOS=dict(
14 |             NORM_REG_TARGETS=True,
15 |             NMS_THRESH_TEST=1.0,  # disable NMS when NMS threshold is 1.0
16 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
17 |             FOCAL_LOSS_GAMMA=2.0,
18 |             FOCAL_LOSS_ALPHA=0.25,
19 |             IOU_LOSS_TYPE="giou",
20 |             REG_WEIGHT=2.0,
21 |         ),
22 |         POTO=dict(
23 |             ALPHA=0.8,
24 |             CENTER_SAMPLING_RADIUS=1.5,
25 |         ),
26 |         NMS_TYPE=None,
27 |     ),
28 |     DATASETS=dict(
29 |         TRAIN=("coco_2017_train",),
30 |         TEST=("coco_2017_val",),
31 |     ),
32 |     SOLVER=dict(
33 |         CHECKPOINT_PERIOD=10000,
34 |         LR_SCHEDULER=dict(
35 |             MAX_ITER=270000,
36 |             STEPS=(210000, 250000),
37 |         ),
38 |         OPTIMIZER=dict(
39 |             BASE_LR=0.01,
40 |         ),
41 |         IMS_PER_BATCH=16,
42 |     ),
43 |     INPUT=dict(
44 |         AUG=dict(
45 |             TRAIN_PIPELINES=[
46 |                 ("ResizeShortestEdge",
47 |                  dict(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style="choice")),
48 |                 ("RandomFlip", dict()),
49 |             ],
50 |             TEST_PIPELINES=[
51 |                 ("ResizeShortestEdge",
52 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
53 |             ],
54 |         )
55 |     ),
56 |     TEST=dict(
57 |         EVAL_PEROID=10000,
58 |     ),
59 |     OUTPUT_DIR=osp.join(
60 |         '/data/Outputs/model_logs/cvpods_playground',
61 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
62 | )
63 | 
64 | 
65 | class CustomFCOSConfig(FCOSConfig):
66 |     def __init__(self):
67 |         super(CustomFCOSConfig, self).__init__()
68 |         self._register_configuration(_config_dict)
69 | 
70 | 
71 | config = CustomFCOSConfig()
72 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.3x_ms/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_p5_backbone
 7 | 
 8 | from fcos import FCOS
 9 | 
10 | 
11 | def build_backbone(cfg, input_shape=None):
12 |     """
13 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
14 | 
15 |     Returns:
16 |         an instance of :class:`Backbone`
17 |     """
18 |     if input_shape is None:
19 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
20 | 
21 |     backbone = build_retinanet_resnet_fpn_p5_backbone(cfg, input_shape)
22 |     assert isinstance(backbone, Backbone)
23 |     return backbone
24 | 
25 | 
26 | def build_shift_generator(cfg, input_shape):
27 | 
28 |     return ShiftGenerator(cfg, input_shape)
29 | 
30 | 
31 | def build_model(cfg):
32 | 
33 |     cfg.build_backbone = build_backbone
34 |     cfg.build_shift_generator = build_shift_generator
35 | 
36 |     model = FCOS(cfg)
37 |     logger = logging.getLogger(__name__)
38 |     logger.info("Model:\n{}".format(model))
39 |     return model
40 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.6x_ms/README.md:
--------------------------------------------------------------------------------
 1 | # poto.res50.fpn.coco.800size.6x_ms  
 2 | 
 3 | seed: 36847828
 4 | 
 5 | ## Evaluation results for bbox:  
 6 | 
 7 | ```  
 8 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.400
 9 | Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.573
10 | Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.438
11 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.256
12 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.432
13 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.506
14 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.334
15 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.568
16 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.619
17 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.433
18 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.650
19 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.778
20 | ```  
21 | |   AP   |  AP50  |  AP75  |  APs   |  APm   |  APl   |  
22 | |:------:|:------:|:------:|:------:|:------:|:------:|  
23 | | 40.043 | 57.341 | 43.808 | 25.557 | 43.185 | 50.584 |
24 | 
25 | ### Per-category bbox AP:  
26 | 
27 | | category      | AP     | category     | AP     | category       | AP     |  
28 | |:--------------|:-------|:-------------|:-------|:---------------|:-------|  
29 | | person        | 56.090 | bicycle      | 29.735 | car            | 45.618 |  
30 | | motorcycle    | 43.291 | airplane     | 62.310 | bus            | 64.722 |  
31 | | train         | 62.188 | truck        | 33.790 | boat           | 24.878 |  
32 | | traffic light | 27.185 | fire hydrant | 67.150 | stop sign      | 63.966 |  
33 | | parking meter | 37.816 | bench        | 21.362 | bird           | 35.355 |  
34 | | cat           | 64.725 | dog          | 56.913 | horse          | 56.220 |  
35 | | sheep         | 52.330 | cow          | 57.441 | elephant       | 63.153 |  
36 | | bear          | 69.075 | zebra        | 68.448 | giraffe        | 66.663 |  
37 | | backpack      | 15.567 | umbrella     | 37.764 | handbag        | 14.582 |  
38 | | tie           | 33.100 | suitcase     | 37.147 | frisbee        | 67.163 |  
39 | | skis          | 23.415 | snowboard    | 27.146 | sports ball    | 49.088 |  
40 | | kite          | 44.871 | baseball bat | 25.033 | baseball glove | 35.477 |  
41 | | skateboard    | 51.668 | surfboard    | 32.272 | tennis racket  | 46.320 |  
42 | | bottle        | 38.550 | wine glass   | 36.127 | cup            | 41.104 |  
43 | | fork          | 31.559 | knife        | 18.517 | spoon          | 14.022 |  
44 | | bowl          | 39.755 | banana       | 23.037 | apple          | 18.548 |  
45 | | sandwich      | 33.698 | orange       | 30.408 | broccoli       | 22.510 |  
46 | | carrot        | 20.917 | hot dog      | 31.513 | pizza          | 48.674 |  
47 | | donut         | 43.536 | cake         | 35.415 | chair          | 27.109 |  
48 | | couch         | 41.040 | potted plant | 23.775 | bed            | 41.336 |  
49 | | dining table  | 27.386 | toilet       | 59.928 | tv             | 54.313 |  
50 | | laptop        | 53.567 | mouse        | 62.256 | remote         | 28.094 |  
51 | | keyboard      | 47.250 | cell phone   | 32.362 | microwave      | 55.658 |  
52 | | oven          | 33.503 | toaster      | 33.645 | sink           | 36.360 |  
53 | | refrigerator  | 52.201 | book         | 13.569 | clock          | 51.146 |  
54 | | vase          | 37.907 | scissors     | 20.398 | teddy bear     | 41.900 |  
55 | | hair drier    | 8.710  | toothbrush   | 21.119 |                |        |
56 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.6x_ms/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         SHIFT_GENERATOR=dict(
10 |             NUM_SHIFTS=1,
11 |             OFFSET=0.5,
12 |         ),
13 |         FCOS=dict(
14 |             NORM_REG_TARGETS=True,
15 |             NMS_THRESH_TEST=1.0,  # disable NMS when NMS threshold is 1.0
16 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
17 |             FOCAL_LOSS_GAMMA=2.0,
18 |             FOCAL_LOSS_ALPHA=0.25,
19 |             IOU_LOSS_TYPE="giou",
20 |             REG_WEIGHT=2.0,
21 |         ),
22 |         POTO=dict(
23 |             ALPHA=0.8,
24 |             CENTER_SAMPLING_RADIUS=1.5,
25 |         ),
26 |         NMS_TYPE=None,
27 |     ),
28 |     DATASETS=dict(
29 |         TRAIN=("coco_2017_train",),
30 |         TEST=("coco_2017_val",),
31 |     ),
32 |     SOLVER=dict(
33 |         CHECKPOINT_PERIOD=10000,
34 |         LR_SCHEDULER=dict(
35 |             MAX_ITER=540000,
36 |             STEPS=(480000, 520000),
37 |         ),
38 |         OPTIMIZER=dict(
39 |             BASE_LR=0.01,
40 |         ),
41 |         IMS_PER_BATCH=16,
42 |     ),
43 |     INPUT=dict(
44 |         AUG=dict(
45 |             TRAIN_PIPELINES=[
46 |                 ("ResizeShortestEdge",
47 |                  dict(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style="choice")),
48 |                 ("RandomFlip", dict()),
49 |             ],
50 |             TEST_PIPELINES=[
51 |                 ("ResizeShortestEdge",
52 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
53 |             ],
54 |         )
55 |     ),
56 |     TEST=dict(
57 |         EVAL_PEROID=10000,
58 |     ),
59 |     OUTPUT_DIR=osp.join(
60 |         '/data/Outputs/model_logs/cvpods_playground',
61 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
62 | )
63 | 
64 | 
65 | class CustomFCOSConfig(FCOSConfig):
66 |     def __init__(self):
67 |         super(CustomFCOSConfig, self).__init__()
68 |         self._register_configuration(_config_dict)
69 | 
70 | 
71 | config = CustomFCOSConfig()
72 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.6x_ms/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_p5_backbone
 7 | 
 8 | from fcos import FCOS
 9 | 
10 | 
11 | def build_backbone(cfg, input_shape=None):
12 |     """
13 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
14 | 
15 |     Returns:
16 |         an instance of :class:`Backbone`
17 |     """
18 |     if input_shape is None:
19 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
20 | 
21 |     backbone = build_retinanet_resnet_fpn_p5_backbone(cfg, input_shape)
22 |     assert isinstance(backbone, Backbone)
23 |     return backbone
24 | 
25 | 
26 | def build_shift_generator(cfg, input_shape):
27 | 
28 |     return ShiftGenerator(cfg, input_shape)
29 | 
30 | 
31 | def build_model(cfg):
32 | 
33 |     cfg.build_backbone = build_backbone
34 |     cfg.build_shift_generator = build_shift_generator
35 | 
36 |     model = FCOS(cfg)
37 |     logger = logging.getLogger(__name__)
38 |     logger.info("Model:\n{}".format(model))
39 |     return model
40 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.9x_ms/README.md:
--------------------------------------------------------------------------------
 1 | # poto.res50.fpn.coco.800size.9x_ms  
 2 | 
 3 | seed: 55805791
 4 | 
 5 | ## Evaluation results for bbox:  
 6 | 
 7 | ```  
 8 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.402
 9 | Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.576
10 | Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.440
11 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.255
12 | Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.433
13 | Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.497
14 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.335
15 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.572
16 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.623
17 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.429
18 | Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.652
19 | Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.785
20 | ```  
21 | |   AP   |  AP50  |  AP75  |  APs   |  APm   |  APl   |  
22 | |:------:|:------:|:------:|:------:|:------:|:------:|  
23 | | 40.211 | 57.556 | 43.974 | 25.479 | 43.254 | 49.724 |
24 | 
25 | ### Per-category bbox AP:  
26 | 
27 | | category      | AP     | category     | AP     | category       | AP     |  
28 | |:--------------|:-------|:-------------|:-------|:---------------|:-------|  
29 | | person        | 56.461 | bicycle      | 29.751 | car            | 45.531 |  
30 | | motorcycle    | 41.378 | airplane     | 66.387 | bus            | 65.418 |  
31 | | train         | 63.702 | truck        | 33.320 | boat           | 25.286 |  
32 | | traffic light | 27.916 | fire hydrant | 65.679 | stop sign      | 65.608 |  
33 | | parking meter | 43.400 | bench        | 21.921 | bird           | 35.702 |  
34 | | cat           | 64.798 | dog          | 58.762 | horse          | 55.727 |  
35 | | sheep         | 53.283 | cow          | 59.048 | elephant       | 63.088 |  
36 | | bear          | 69.722 | zebra        | 68.877 | giraffe        | 66.154 |  
37 | | backpack      | 14.874 | umbrella     | 38.954 | handbag        | 14.525 |  
38 | | tie           | 32.606 | suitcase     | 38.137 | frisbee        | 63.666 |  
39 | | skis          | 21.814 | snowboard    | 29.888 | sports ball    | 48.920 |  
40 | | kite          | 44.437 | baseball bat | 26.805 | baseball glove | 34.829 |  
41 | | skateboard    | 51.811 | surfboard    | 32.650 | tennis racket  | 46.308 |  
42 | | bottle        | 38.541 | wine glass   | 34.170 | cup            | 41.653 |  
43 | | fork          | 31.612 | knife        | 16.877 | spoon          | 14.055 |  
44 | | bowl          | 39.468 | banana       | 23.297 | apple          | 19.211 |  
45 | | sandwich      | 30.157 | orange       | 29.881 | broccoli       | 21.950 |  
46 | | carrot        | 21.754 | hot dog      | 29.781 | pizza          | 48.315 |  
47 | | donut         | 46.185 | cake         | 35.278 | chair          | 27.498 |  
48 | | couch         | 40.332 | potted plant | 26.441 | bed            | 38.089 |  
49 | | dining table  | 27.612 | toilet       | 60.464 | tv             | 54.461 |  
50 | | laptop        | 55.213 | mouse        | 62.109 | remote         | 29.338 |  
51 | | keyboard      | 47.787 | cell phone   | 31.711 | microwave      | 54.977 |  
52 | | oven          | 34.952 | toaster      | 35.348 | sink           | 34.886 |  
53 | | refrigerator  | 48.997 | book         | 14.370 | clock          | 51.591 |  
54 | | vase          | 40.057 | scissors     | 21.971 | teddy bear     | 43.441 |  
55 | | hair drier    | 5.964  | toothbrush   | 19.921 |                |        |
56 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.9x_ms/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         SHIFT_GENERATOR=dict(
10 |             NUM_SHIFTS=1,
11 |             OFFSET=0.5,
12 |         ),
13 |         FCOS=dict(
14 |             NORM_REG_TARGETS=True,
15 |             NMS_THRESH_TEST=1.0,  # disable NMS when NMS threshold is 1.0
16 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
17 |             FOCAL_LOSS_GAMMA=2.0,
18 |             FOCAL_LOSS_ALPHA=0.25,
19 |             IOU_LOSS_TYPE="giou",
20 |             REG_WEIGHT=2.0,
21 |         ),
22 |         POTO=dict(
23 |             ALPHA=0.8,
24 |             CENTER_SAMPLING_RADIUS=1.5,
25 |         ),
26 |         NMS_TYPE=None,
27 |     ),
28 |     DATASETS=dict(
29 |         TRAIN=("coco_2017_train",),
30 |         TEST=("coco_2017_val",),
31 |     ),
32 |     SOLVER=dict(
33 |         CHECKPOINT_PERIOD=10000,
34 |         LR_SCHEDULER=dict(
35 |             MAX_ITER=810000,
36 |             STEPS=(750000, 790000),
37 |         ),
38 |         OPTIMIZER=dict(
39 |             BASE_LR=0.01,
40 |         ),
41 |         IMS_PER_BATCH=16,
42 |     ),
43 |     INPUT=dict(
44 |         AUG=dict(
45 |             TRAIN_PIPELINES=[
46 |                 ("ResizeShortestEdge",
47 |                  dict(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style="choice")),
48 |                 ("RandomFlip", dict()),
49 |             ],
50 |             TEST_PIPELINES=[
51 |                 ("ResizeShortestEdge",
52 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
53 |             ],
54 |         )
55 |     ),
56 |     TEST=dict(
57 |         EVAL_PEROID=10000,
58 |     ),
59 |     OUTPUT_DIR=osp.join(
60 |         '/data/Outputs/model_logs/cvpods_playground',
61 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
62 | )
63 | 
64 | 
65 | class CustomFCOSConfig(FCOSConfig):
66 |     def __init__(self):
67 |         super(CustomFCOSConfig, self).__init__()
68 |         self._register_configuration(_config_dict)
69 | 
70 | 
71 | config = CustomFCOSConfig()
72 | 


--------------------------------------------------------------------------------
/playground/detection/coco/poto.res50.fpn.coco.800size.9x_ms/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_p5_backbone
 7 | 
 8 | from fcos import FCOS
 9 | 
10 | 
11 | def build_backbone(cfg, input_shape=None):
12 |     """
13 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
14 | 
15 |     Returns:
16 |         an instance of :class:`Backbone`
17 |     """
18 |     if input_shape is None:
19 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
20 | 
21 |     backbone = build_retinanet_resnet_fpn_p5_backbone(cfg, input_shape)
22 |     assert isinstance(backbone, Backbone)
23 |     return backbone
24 | 
25 | 
26 | def build_shift_generator(cfg, input_shape):
27 | 
28 |     return ShiftGenerator(cfg, input_shape)
29 | 
30 | 
31 | def build_model(cfg):
32 | 
33 |     cfg.build_backbone = build_backbone
34 |     cfg.build_shift_generator = build_shift_generator
35 | 
36 |     model = FCOS(cfg)
37 |     logger = logging.getLogger(__name__)
38 |     logger.info("Model:\n{}".format(model))
39 |     return model
40 | 


--------------------------------------------------------------------------------
/playground/detection/crowdhuman/atss.res50.fpn.crowdhuman.800size.30k/README.md:
--------------------------------------------------------------------------------
1 | # atss.res50.fpn.crowdhuman.800size.30k
2 | 
3 | |  AP   |  mMR  |  Recall  |
4 | |:-----:|:-----:|:--------:|
5 | | 0.872 | 0.497 |  0.940   |
6 | 


--------------------------------------------------------------------------------
/playground/detection/crowdhuman/atss.res50.fpn.crowdhuman.800size.30k/atss.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | # Copyright (c) BaseDetection, Inc. and its affiliates. All Rights Reserved
  4 | 
  5 | import logging
  6 | 
  7 | import torch
  8 | import torch.nn.functional as F
  9 | from torch import nn
 10 | 
 11 | from cvpods.layers import ShapeSpec, cat, generalized_batched_nms
 12 | from cvpods.modeling.box_regression import Shift2BoxTransform
 13 | from cvpods.modeling.losses import iou_loss, sigmoid_focal_loss_jit
 14 | from cvpods.modeling.meta_arch.fcos import FCOSHead, permute_all_cls_and_box_to_N_HWA_K_and_concat
 15 | from cvpods.modeling.meta_arch.retinanet import permute_to_N_HWA_K
 16 | from cvpods.modeling.postprocessing import detector_postprocess
 17 | from cvpods.structures import Boxes, ImageList, Instances, pairwise_iou
 18 | from cvpods.utils import comm, log_first_n
 19 | 
 20 | 
 21 | class ATSS(nn.Module):
 22 |     """
 23 |     Implement ATSS (https://arxiv.org/abs/1912.02424).
 24 |     """
 25 |     def __init__(self, cfg):
 26 |         super().__init__()
 27 | 
 28 |         self.device = torch.device(cfg.MODEL.DEVICE)
 29 | 
 30 |         # fmt: off
 31 |         self.num_classes = cfg.MODEL.FCOS.NUM_CLASSES
 32 |         self.in_features = cfg.MODEL.FCOS.IN_FEATURES
 33 |         self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES
 34 |         # Loss parameters:
 35 |         self.focal_loss_alpha = cfg.MODEL.FCOS.FOCAL_LOSS_ALPHA
 36 |         self.focal_loss_gamma = cfg.MODEL.FCOS.FOCAL_LOSS_GAMMA
 37 |         self.iou_loss_type = cfg.MODEL.FCOS.IOU_LOSS_TYPE
 38 |         self.reg_weight = cfg.MODEL.FCOS.REG_WEIGHT
 39 |         # Inference parameters:
 40 |         self.score_threshold = cfg.MODEL.FCOS.SCORE_THRESH_TEST
 41 |         self.topk_candidates = cfg.MODEL.FCOS.TOPK_CANDIDATES_TEST
 42 |         self.nms_threshold = cfg.MODEL.FCOS.NMS_THRESH_TEST
 43 |         self.nms_type = cfg.MODEL.NMS_TYPE
 44 |         self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
 45 |         # fmt: on
 46 | 
 47 |         self.backbone = cfg.build_backbone(
 48 |             cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))
 49 | 
 50 |         backbone_shape = self.backbone.output_shape()
 51 |         feature_shapes = [backbone_shape[f] for f in self.in_features]
 52 |         self.head = FCOSHead(cfg, feature_shapes)
 53 |         self.shift_generator = cfg.build_shift_generator(cfg, feature_shapes)
 54 | 
 55 |         # Matching and loss
 56 |         self.shift2box_transform = Shift2BoxTransform(
 57 |             weights=cfg.MODEL.FCOS.BBOX_REG_WEIGHTS)
 58 |         self.anchor_scale = cfg.MODEL.ATSS.ANCHOR_SCALE
 59 |         self.atss_topk = cfg.MODEL.ATSS.TOPK
 60 | 
 61 |         pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
 62 |             3, 1, 1)
 63 |         pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
 64 |             3, 1, 1)
 65 |         self.normalizer = lambda x: (x - pixel_mean) / pixel_std
 66 |         self.to(self.device)
 67 | 
 68 |     def forward(self, batched_inputs):
 69 |         """
 70 |         Args:
 71 |             batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
 72 |                 Each item in the list contains the inputs for one image.
 73 |                 For now, each item in the list is a dict that contains:
 74 | 
 75 |                 * image: Tensor, image in (C, H, W) format.
 76 |                 * instances: Instances
 77 | 
 78 |                 Other information that's included in the original dicts, such as:
 79 | 
 80 |                 * "height", "width" (int): the output resolution of the model, used in inference.
 81 |                     See :meth:`postprocess` for details.
 82 |         Returns:
 83 |             dict[str: Tensor]:
 84 |                 mapping from a named loss to a tensor storing the loss. Used during training only.
 85 |         """
 86 |         images = self.preprocess_image(batched_inputs)
 87 |         if "instances" in batched_inputs[0]:
 88 |             gt_instances = [
 89 |                 x["instances"].to(self.device) for x in batched_inputs
 90 |             ]
 91 |         elif "targets" in batched_inputs[0]:
 92 |             log_first_n(
 93 |                 logging.WARN,
 94 |                 "'targets' in the model inputs is now renamed to 'instances'!",
 95 |                 n=10)
 96 |             gt_instances = [
 97 |                 x["targets"].to(self.device) for x in batched_inputs
 98 |             ]
 99 |         else:
100 |             gt_instances = None
101 | 
102 |         features = self.backbone(images.tensor)
103 |         features = [features[f] for f in self.in_features]
104 |         box_cls, box_delta, box_center = self.head(features)
105 |         shifts = self.shift_generator(features)
106 | 
107 |         if self.training:
108 |             # remove gt_instances with ignore label
109 |             gt_instances = [inst[inst.gt_classes >= 0] for inst in gt_instances]
110 |             gt_classes, gt_shifts_reg_deltas, gt_centerness = self.get_ground_truth(
111 |                 shifts, gt_instances)
112 |             return self.losses(gt_classes, gt_shifts_reg_deltas, gt_centerness,
113 |                                box_cls, box_delta, box_center)
114 |         else:
115 |             results = self.inference(box_cls, box_delta, box_center, shifts,
116 |                                      images)
117 |             processed_results = []
118 |             for results_per_image, input_per_image, image_size in zip(
119 |                     results, batched_inputs, images.image_sizes):
120 |                 height = input_per_image.get("height", image_size[0])
121 |                 width = input_per_image.get("width", image_size[1])
122 |                 r = detector_postprocess(results_per_image, height, width)
123 |                 processed_results.append({"instances": r})
124 |             return processed_results
125 | 
126 |     def losses(self, gt_classes, gt_shifts_deltas, gt_centerness,
127 |                pred_class_logits, pred_shift_deltas, pred_centerness):
128 |         """
129 |         Args:
130 |             For `gt_classes`, `gt_shifts_deltas` and `gt_centerness` parameters, see
131 |                 :meth:`FCOS.get_ground_truth`.
132 |             Their shapes are (N, R) and (N, R, 4), respectively, where R is
133 |             the total number of shifts across levels, i.e. sum(Hi x Wi)
134 |             For `pred_class_logits`, `pred_shift_deltas` and `pred_centerness`, see
135 |                 :meth:`FCOSHead.forward`.
136 | 
137 |         Returns:
138 |             dict[str: Tensor]:
139 |                 mapping from a named loss to a scalar tensor
140 |                 storing the loss. Used during training only. The dict keys are:
141 |                 "loss_cls" and "loss_box_reg"
142 |         """
143 |         pred_class_logits, pred_shift_deltas, pred_centerness = \
144 |             permute_all_cls_and_box_to_N_HWA_K_and_concat(
145 |                 pred_class_logits, pred_shift_deltas, pred_centerness,
146 |                 self.num_classes
147 |             )  # Shapes: (N x R, K) and (N x R, 4), respectively.
148 | 
149 |         gt_classes = gt_classes.flatten()
150 |         gt_shifts_deltas = gt_shifts_deltas.view(-1, 4)
151 |         gt_centerness = gt_centerness.view(-1, 1)
152 | 
153 |         valid_idxs = gt_classes >= 0
154 |         foreground_idxs = (gt_classes >= 0) & (gt_classes != self.num_classes)
155 |         num_foreground = foreground_idxs.sum()
156 | 
157 |         gt_classes_target = torch.zeros_like(pred_class_logits)
158 |         gt_classes_target[foreground_idxs, gt_classes[foreground_idxs]] = 1
159 | 
160 |         num_foreground = comm.all_reduce(num_foreground) / float(comm.get_world_size())
161 |         num_foreground_centerness = gt_centerness[foreground_idxs].sum()
162 |         num_targets = comm.all_reduce(num_foreground_centerness)  / float(comm.get_world_size())
163 | 
164 |         # logits loss
165 |         loss_cls = sigmoid_focal_loss_jit(
166 |             pred_class_logits[valid_idxs],
167 |             gt_classes_target[valid_idxs],
168 |             alpha=self.focal_loss_alpha,
169 |             gamma=self.focal_loss_gamma,
170 |             reduction="sum",
171 |         ) / max(1.0, num_foreground)
172 | 
173 |         # regression loss
174 |         loss_box_reg = iou_loss(
175 |             pred_shift_deltas[foreground_idxs],
176 |             gt_shifts_deltas[foreground_idxs],
177 |             gt_centerness[foreground_idxs],
178 |             box_mode="ltrb",
179 |             loss_type=self.iou_loss_type,
180 |             reduction="sum",
181 |         ) / max(1.0, num_targets) * self.reg_weight
182 |         # ) / max(1.0, num_foreground) * self.reg_weight
183 | 
184 |         # centerness loss
185 |         loss_centerness = F.binary_cross_entropy_with_logits(
186 |             pred_centerness[foreground_idxs],
187 |             gt_centerness[foreground_idxs],
188 |             reduction="sum",
189 |         ) / max(1, num_foreground)
190 | 
191 |         return {
192 |             "loss_cls": loss_cls,
193 |             "loss_box_reg": loss_box_reg,
194 |             "loss_centerness": loss_centerness
195 |         }
196 | 
197 |     @torch.no_grad()
198 |     def get_ground_truth(self, shifts, targets):
199 |         """
200 |         Args:
201 |             shifts (list[list[Tensor]]): a list of N=#image elements. Each is a
202 |                 list of #feature level tensors. The tensors contains shifts of
203 |                 this image on the specific feature level.
204 |             targets (list[Instances]): a list of N `Instances`s. The i-th
205 |                 `Instances` contains the ground-truth per-instance annotations
206 |                 for the i-th input image.  Specify `targets` during training only.
207 | 
208 |         Returns:
209 |             gt_classes (Tensor):
210 |                 An integer tensor of shape (N, R) storing ground-truth
211 |                 labels for each shift.
212 |                 R is the total number of shifts, i.e. the sum of Hi x Wi for all levels.
213 |                 Shifts in the valid boxes are assigned their corresponding label in the
214 |                 [0, K-1] range. Shifts in the background are assigned the label "K".
215 |                 Shifts in the ignore areas are assigned a label "-1", i.e. ignore.
216 |             gt_shifts_deltas (Tensor):
217 |                 Shape (N, R, 4).
218 |                 The last dimension represents ground-truth shift2box transform
219 |                 targets (dl, dt, dr, db) that map each shift to its matched ground-truth box.
220 |                 The values in the tensor are meaningful only when the corresponding
221 |                 shift is labeled as foreground.
222 |             gt_centerness (Tensor):
223 |                 An float tensor (0, 1) of shape (N, R) whose values in [0, 1]
224 |                 storing ground-truth centerness for each shift.
225 | 
226 |         """
227 |         gt_classes = []
228 |         gt_shifts_deltas = []
229 |         gt_centerness = []
230 | 
231 |         for shifts_per_image, targets_per_image in zip(shifts, targets):
232 |             shifts_over_all_feature_maps = torch.cat(shifts_per_image, dim=0)
233 | 
234 |             gt_boxes = targets_per_image.gt_boxes
235 | 
236 |             is_in_boxes = self.shift2box_transform.get_deltas(
237 |                 shifts_over_all_feature_maps, gt_boxes.tensor.unsqueeze(1)
238 |             ).min(dim=-1).values > 0
239 | 
240 |             gt_positions_iou = []
241 |             candidate_idxs = []
242 |             base = 0
243 |             for stride, shifts_i in zip(self.fpn_strides, shifts_per_image):
244 |                 gt_positions_iou.append(pairwise_iou(
245 |                     gt_boxes,
246 |                     Boxes(torch.cat((
247 |                         shifts_i - stride * self.anchor_scale / 2,
248 |                         shifts_i + stride * self.anchor_scale / 2,
249 |                     ), dim=1))
250 |                 ))
251 | 
252 |                 distances = (
253 |                     gt_boxes.get_centers().unsqueeze(1) - shifts_i
254 |                 ).pow_(2).sum(dim=-1).sqrt_()
255 |                 _, topk_idxs = distances.topk(
256 |                     self.atss_topk, dim=1, largest=False)
257 |                 candidate_idxs.append(base + topk_idxs)
258 |                 base += len(shifts_i)
259 |             gt_positions_iou = torch.cat(gt_positions_iou, dim=1)
260 |             candidate_idxs = torch.cat(candidate_idxs, dim=1)
261 | 
262 |             candidate_ious = gt_positions_iou.gather(1, candidate_idxs)
263 |             ious_thr = (candidate_ious.mean(dim=1, keepdim=True)
264 |                         + candidate_ious.std(dim=1, keepdim=True))
265 |             is_foreground = torch.zeros_like(
266 |                 is_in_boxes).scatter_(1, candidate_idxs, True)
267 |             is_foreground &= gt_positions_iou >= ious_thr
268 | 
269 |             gt_positions_iou[~is_in_boxes] = -1
270 |             gt_positions_iou[~is_foreground] = -1
271 | 
272 |             # if there are still more than one objects for a position,
273 |             # we choose the one with maximum iou
274 |             positions_max_iou, gt_matched_idxs = gt_positions_iou.max(dim=0)
275 | 
276 |             # ground truth box regression
277 |             gt_shifts_reg_deltas_i = self.shift2box_transform.get_deltas(
278 |                 shifts_over_all_feature_maps, gt_boxes[gt_matched_idxs].tensor)
279 | 
280 |             # ground truth classes
281 |             has_gt = len(targets_per_image) > 0
282 |             if has_gt:
283 |                 gt_classes_i = targets_per_image.gt_classes[gt_matched_idxs]
284 |                 # Shifts with iou -1 are treated as background.
285 |                 gt_classes_i[positions_max_iou == -1] = self.num_classes
286 |             else:
287 |                 gt_classes_i = torch.zeros_like(
288 |                     gt_matched_idxs) + self.num_classes
289 | 
290 |             # ground truth centerness
291 |             left_right = gt_shifts_reg_deltas_i[:, [0, 2]]
292 |             top_bottom = gt_shifts_reg_deltas_i[:, [1, 3]]
293 |             gt_centerness_i = torch.sqrt(
294 |                 (left_right.min(dim=-1).values / left_right.max(dim=-1).values).clamp_(min=0)
295 |                 * (top_bottom.min(dim=-1).values / top_bottom.max(dim=-1).values).clamp_(min=0)
296 |             )
297 | 
298 |             gt_classes.append(gt_classes_i)
299 |             gt_shifts_deltas.append(gt_shifts_reg_deltas_i)
300 |             gt_centerness.append(gt_centerness_i)
301 | 
302 |         return torch.stack(gt_classes), torch.stack(
303 |             gt_shifts_deltas), torch.stack(gt_centerness)
304 | 
305 |     def inference(self, box_cls, box_delta, box_center, shifts, images):
306 |         """
307 |         Arguments:
308 |             box_cls, box_delta, box_center: Same as the output of :meth:`FCOSHead.forward`
309 |             shifts (list[list[Tensor]): a list of #images elements. Each is a
310 |                 list of #feature level tensor. The tensor contain shifts of this
311 |                 image on the specific feature level.
312 |             images (ImageList): the input images
313 | 
314 |         Returns:
315 |             results (List[Instances]): a list of #images elements.
316 |         """
317 |         assert len(shifts) == len(images)
318 |         results = []
319 | 
320 |         box_cls = [permute_to_N_HWA_K(x, self.num_classes) for x in box_cls]
321 |         box_delta = [permute_to_N_HWA_K(x, 4) for x in box_delta]
322 |         box_center = [permute_to_N_HWA_K(x, 1) for x in box_center]
323 |         # list[Tensor], one per level, each has shape (N, Hi x Wi, K or 4)
324 | 
325 |         for img_idx, shifts_per_image in enumerate(shifts):
326 |             image_size = images.image_sizes[img_idx]
327 |             box_cls_per_image = [
328 |                 box_cls_per_level[img_idx] for box_cls_per_level in box_cls
329 |             ]
330 |             box_reg_per_image = [
331 |                 box_reg_per_level[img_idx] for box_reg_per_level in box_delta
332 |             ]
333 |             box_ctr_per_image = [
334 |                 box_ctr_per_level[img_idx] for box_ctr_per_level in box_center
335 |             ]
336 |             results_per_image = self.inference_single_image(
337 |                 box_cls_per_image, box_reg_per_image, box_ctr_per_image,
338 |                 shifts_per_image, tuple(image_size))
339 |             results.append(results_per_image)
340 |         return results
341 | 
342 |     def inference_single_image(self, box_cls, box_delta, box_center, shifts,
343 |                                image_size):
344 |         """
345 |         Single-image inference. Return bounding-box detection results by thresholding
346 |         on scores and applying non-maximum suppression (NMS).
347 | 
348 |         Arguments:
349 |             box_cls (list[Tensor]): list of #feature levels. Each entry contains
350 |                 tensor of size (H x W, K)
351 |             box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4.
352 |             box_center (list[Tensor]): Same shape as 'box_cls' except that K becomes 1.
353 |             shifts (list[Tensor]): list of #feature levels. Each entry contains
354 |                 a tensor, which contains all the shifts for that
355 |                 image in that feature level.
356 |             image_size (tuple(H, W)): a tuple of the image height and width.
357 | 
358 |         Returns:
359 |             Same as `inference`, but for only one image.
360 |         """
361 |         boxes_all = []
362 |         scores_all = []
363 |         class_idxs_all = []
364 | 
365 |         # Iterate over every feature level
366 |         for box_cls_i, box_reg_i, box_ctr_i, shifts_i in zip(
367 |                 box_cls, box_delta, box_center, shifts):
368 |             # (HxWxK,)
369 |             box_cls_i = box_cls_i.flatten().sigmoid_()
370 | 
371 |             # Keep top k top scoring indices only.
372 |             num_topk = min(self.topk_candidates, box_reg_i.size(0))
373 |             # torch.sort is actually faster than .topk (at least on GPUs)
374 |             predicted_prob, topk_idxs = box_cls_i.sort(descending=True)
375 |             predicted_prob = predicted_prob[:num_topk]
376 |             topk_idxs = topk_idxs[:num_topk]
377 | 
378 |             # filter out the proposals with low confidence score
379 |             keep_idxs = predicted_prob > self.score_threshold
380 |             predicted_prob = predicted_prob[keep_idxs]
381 |             topk_idxs = topk_idxs[keep_idxs]
382 | 
383 |             shift_idxs = topk_idxs // self.num_classes
384 |             classes_idxs = topk_idxs % self.num_classes
385 | 
386 |             box_reg_i = box_reg_i[shift_idxs]
387 |             shifts_i = shifts_i[shift_idxs]
388 |             # predict boxes
389 |             predicted_boxes = self.shift2box_transform.apply_deltas(
390 |                 box_reg_i, shifts_i)
391 | 
392 |             box_ctr_i = box_ctr_i.flatten().sigmoid_()[shift_idxs]
393 |             predicted_prob = torch.sqrt(predicted_prob * box_ctr_i)
394 | 
395 |             boxes_all.append(predicted_boxes)
396 |             scores_all.append(predicted_prob)
397 |             class_idxs_all.append(classes_idxs)
398 | 
399 |         boxes_all, scores_all, class_idxs_all = [
400 |             cat(x) for x in [boxes_all, scores_all, class_idxs_all]
401 |         ]
402 | 
403 |         keep = generalized_batched_nms(
404 |             boxes_all, scores_all, class_idxs_all,
405 |             self.nms_threshold, nms_type=self.nms_type
406 |         )
407 |         keep = keep[:self.max_detections_per_image]
408 | 
409 |         result = Instances(image_size)
410 |         result.pred_boxes = Boxes(boxes_all[keep])
411 |         result.scores = scores_all[keep]
412 |         result.pred_classes = class_idxs_all[keep]
413 |         return result
414 | 
415 |     def preprocess_image(self, batched_inputs):
416 |         """
417 |         Normalize, pad and batch the input images.
418 |         """
419 |         images = [x["image"].to(self.device) for x in batched_inputs]
420 |         images = [self.normalizer(x) for x in images]
421 |         images = ImageList.from_tensors(images,
422 |                                         self.backbone.size_divisibility)
423 |         return images
424 | 
425 |     def _inference_for_ms_test(self, batched_inputs):
426 |         """
427 |         function used for multiscale test, will be refactor in the future.
428 |         The same input with `forward` function.
429 |         """
430 |         assert not self.training, "inference mode with training=True"
431 |         assert len(batched_inputs) == 1, "inference image number > 1"
432 |         images = self.preprocess_image(batched_inputs)
433 | 
434 |         features = self.backbone(images.tensor)
435 |         features = [features[f] for f in self.in_features]
436 |         box_cls, box_delta, box_center = self.head(features)
437 |         shifts = self.shift_generator(features)
438 | 
439 |         results = self.inference(box_cls, box_delta, box_center, shifts, images)
440 |         for results_per_image, input_per_image, image_size in zip(
441 |                 results, batched_inputs, images.image_sizes
442 |         ):
443 |             height = input_per_image.get("height", image_size[0])
444 |             width = input_per_image.get("width", image_size[1])
445 |             processed_results = detector_postprocess(results_per_image, height, width)
446 |         return processed_results
447 | 


--------------------------------------------------------------------------------
/playground/detection/crowdhuman/atss.res50.fpn.crowdhuman.800size.30k/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         SHIFT_GENERATOR=dict(
10 |             NUM_SHIFTS=1,
11 |             OFFSET=0.5,
12 |         ),
13 |         FCOS=dict(
14 |             NUM_CLASSES=1,
15 |             CENTERNESS_ON_REG=True,
16 |             NORM_REG_TARGETS=True,
17 |             NMS_THRESH_TEST=0.6,
18 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
19 |             FOCAL_LOSS_GAMMA=2.0,
20 |             FOCAL_LOSS_ALPHA=0.25,
21 |             IOU_LOSS_TYPE="giou",
22 |             REG_WEIGHT=2.0,
23 |         ),
24 |         ATSS=dict(
25 |             ANCHOR_SCALE=8,
26 |             TOPK=9,
27 |         ),
28 |     ),
29 |     DATASETS=dict(
30 |         TRAIN=("crowdhuman_train",),
31 |         TEST=("crowdhuman_val",),
32 |     ),
33 |     SOLVER=dict(
34 |         CHECKPOINT_PERIOD=5000,
35 |         LR_SCHEDULER=dict(
36 |             MAX_ITER=30000,
37 |             STEPS=(20000, 25000),
38 |         ),
39 |         OPTIMIZER=dict(
40 |             BASE_LR=0.01,
41 |         ),
42 |         IMS_PER_BATCH=16,
43 |     ),
44 |     INPUT=dict(
45 |         AUG=dict(
46 |             TRAIN_PIPELINES=[
47 |                 ("ResizeShortestEdge", dict(short_edge_length=(800,), max_size=1400, sample_style="choice")),
48 |                 ("RandomFlip", dict()),
49 |             ],
50 |             TEST_PIPELINES=[
51 |                 ("ResizeShortestEdge", dict(short_edge_length=800, max_size=1400, sample_style="choice")),
52 |             ],
53 |         )
54 |     ),
55 |     TEST=dict(
56 |         DETECTIONS_PER_IMAGE=500,
57 |         EVAL_PEROID=5000,
58 |     ),
59 |     OUTPUT_DIR=osp.join(
60 |         '/data/Outputs/model_logs/cvpods_playground',
61 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
62 | )
63 | 
64 | 
65 | class CustomFCOSConfig(FCOSConfig):
66 |     def __init__(self):
67 |         super(CustomFCOSConfig, self).__init__()
68 |         self._register_configuration(_config_dict)
69 | 
70 | 
71 | config = CustomFCOSConfig()
72 | 


--------------------------------------------------------------------------------
/playground/detection/crowdhuman/atss.res50.fpn.crowdhuman.800size.30k/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_p5_backbone
 7 | from atss import ATSS
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     """
12 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
13 | 
14 |     Returns:
15 |         an instance of :class:`Backbone`
16 |     """
17 |     if input_shape is None:
18 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
19 | 
20 |     backbone = build_retinanet_resnet_fpn_p5_backbone(cfg, input_shape)
21 |     assert isinstance(backbone, Backbone)
22 |     return backbone
23 | 
24 | 
25 | def build_shift_generator(cfg, input_shape):
26 | 
27 |     return ShiftGenerator(cfg, input_shape)
28 | 
29 | 
30 | def build_model(cfg):
31 | 
32 |     cfg.build_backbone = build_backbone
33 |     cfg.build_shift_generator = build_shift_generator
34 | 
35 |     model = ATSS(cfg)
36 |     logger = logging.getLogger(__name__)
37 |     logger.info("Model:\n{}".format(model))
38 |     return model
39 | 


--------------------------------------------------------------------------------
/playground/detection/crowdhuman/fcos.res50.fpn.crowdhuman.800size.30k/README.md:
--------------------------------------------------------------------------------
1 | # fcos.res50.fpn.crowdhuman.800size.30k
2 | 
3 | |  AP   |  mMR  |  Recall  |
4 | |:-----:|:-----:|:--------:|
5 | | 0.861 | 0.549 |  0.942   |
6 | 


--------------------------------------------------------------------------------
/playground/detection/crowdhuman/fcos.res50.fpn.crowdhuman.800size.30k/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         SHIFT_GENERATOR=dict(
10 |             NUM_SHIFTS=1,
11 |             OFFSET=0.5,
12 |         ),
13 |         FCOS=dict(
14 |             NUM_CLASSES=1,
15 |             CENTERNESS_ON_REG=True,
16 |             NORM_REG_TARGETS=True,
17 |             NMS_THRESH_TEST=0.6,
18 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
19 |             FOCAL_LOSS_GAMMA=2.0,
20 |             FOCAL_LOSS_ALPHA=0.25,
21 |             IOU_LOSS_TYPE="giou",
22 |             CENTER_SAMPLING_RADIUS=1.5,
23 |             OBJECT_SIZES_OF_INTEREST=[
24 |                 [-1, 64],
25 |                 [64, 128],
26 |                 [128, 256],
27 |                 [256, 512],
28 |                 [512, float("inf")],
29 |             ],
30 |         ),
31 |     ),
32 |     DATASETS=dict(
33 |         TRAIN=("crowdhuman_train",),
34 |         TEST=("crowdhuman_val",),
35 |     ),
36 |     SOLVER=dict(
37 |         CHECKPOINT_PERIOD=5000,
38 |         LR_SCHEDULER=dict(
39 |             MAX_ITER=30000,
40 |             STEPS=(20000, 25000),
41 |         ),
42 |         OPTIMIZER=dict(
43 |             BASE_LR=0.01,
44 |         ),
45 |         IMS_PER_BATCH=16,
46 |     ),
47 |     INPUT=dict(
48 |         AUG=dict(
49 |             TRAIN_PIPELINES=[
50 |                 ("ResizeShortestEdge", dict(short_edge_length=(800,), max_size=1400, sample_style="choice")),
51 |                 ("RandomFlip", dict()),
52 |             ],
53 |             TEST_PIPELINES=[
54 |                 ("ResizeShortestEdge", dict(short_edge_length=800, max_size=1400, sample_style="choice")),
55 |             ],
56 |         )
57 |     ),
58 |     TEST=dict(
59 |         DETECTIONS_PER_IMAGE=500,
60 |         EVAL_PEROID=5000,
61 |     ),
62 |     OUTPUT_DIR=osp.join(
63 |         '/data/Outputs/model_logs/cvpods_playground',
64 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
65 | )
66 | 
67 | 
68 | class CustomFCOSConfig(FCOSConfig):
69 |     def __init__(self):
70 |         super(CustomFCOSConfig, self).__init__()
71 |         self._register_configuration(_config_dict)
72 | 
73 | 
74 | config = CustomFCOSConfig()
75 | 


--------------------------------------------------------------------------------
/playground/detection/crowdhuman/fcos.res50.fpn.crowdhuman.800size.30k/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_p5_backbone
 7 | from fcos import FCOS
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     """
12 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
13 | 
14 |     Returns:
15 |         an instance of :class:`Backbone`
16 |     """
17 |     if input_shape is None:
18 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
19 | 
20 |     backbone = build_retinanet_resnet_fpn_p5_backbone(cfg, input_shape)
21 |     assert isinstance(backbone, Backbone)
22 |     return backbone
23 | 
24 | 
25 | def build_shift_generator(cfg, input_shape):
26 | 
27 |     return ShiftGenerator(cfg, input_shape)
28 | 
29 | 
30 | def build_model(cfg):
31 | 
32 |     cfg.build_backbone = build_backbone
33 |     cfg.build_shift_generator = build_shift_generator
34 | 
35 |     model = FCOS(cfg)
36 |     logger = logging.getLogger(__name__)
37 |     logger.info("Model:\n{}".format(model))
38 |     return model
39 | 


--------------------------------------------------------------------------------
/playground/detection/crowdhuman/poto.res50.fpn.crowdhuman.800size.30k.3dmf.aux/README.md:
--------------------------------------------------------------------------------
1 | # poto.res50.fpn.crowdhuman.800size.30k.3dmf.aux
2 | 
3 | |  AP   |  mMR  |  Recall  |
4 | |:-----:|:-----:|:--------:|
5 | | 0.891 | 0.489 |  0.965   |
6 | 


--------------------------------------------------------------------------------
/playground/detection/crowdhuman/poto.res50.fpn.crowdhuman.800size.30k.3dmf.aux/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         SHIFT_GENERATOR=dict(
10 |             NUM_SHIFTS=1,
11 |             OFFSET=0.5,
12 |         ),
13 |         FCOS=dict(
14 |             NUM_CLASSES=1,
15 |             NORM_REG_TARGETS=True,
16 |             NMS_THRESH_TEST=1.0,  # disable NMS when NMS threshold is 1.0
17 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
18 |             FOCAL_LOSS_GAMMA=2.0,
19 |             FOCAL_LOSS_ALPHA=0.25,
20 |             IOU_LOSS_TYPE="giou",
21 |             REG_WEIGHT=2.0,
22 |         ),
23 |         POTO=dict(
24 |             ALPHA=0.8,
25 |             CENTER_SAMPLING_RADIUS=0.0,  # inside gt box
26 |             AUX_TOPK=9,
27 |             FILTER_KERNEL_SIZE=3,
28 |             FILTER_TAU=2,
29 |         ),
30 |         NMS_TYPE=None,
31 |     ),
32 |     DATASETS=dict(
33 |         TRAIN=("crowdhuman_train",),
34 |         TEST=("crowdhuman_val",),
35 |     ),
36 |     SOLVER=dict(
37 |         CHECKPOINT_PERIOD=5000,
38 |         LR_SCHEDULER=dict(
39 |             MAX_ITER=30000,
40 |             STEPS=(20000, 25000),
41 |         ),
42 |         OPTIMIZER=dict(
43 |             BASE_LR=0.01,
44 |         ),
45 |         IMS_PER_BATCH=16,
46 |     ),
47 |     INPUT=dict(
48 |         AUG=dict(
49 |             TRAIN_PIPELINES=[
50 |                 ("ResizeShortestEdge", dict(short_edge_length=(800,), max_size=1400, sample_style="choice")),
51 |                 ("RandomFlip", dict()),
52 |             ],
53 |             TEST_PIPELINES=[
54 |                 ("ResizeShortestEdge", dict(short_edge_length=800, max_size=1400, sample_style="choice")),
55 |             ],
56 |         )
57 |     ),
58 |     TEST=dict(
59 |         DETECTIONS_PER_IMAGE=500,
60 |         EVAL_PEROID=5000,
61 |     ),
62 |     OUTPUT_DIR=osp.join(
63 |         '/data/Outputs/model_logs/cvpods_playground',
64 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
65 | )
66 | 
67 | 
68 | class CustomFCOSConfig(FCOSConfig):
69 |     def __init__(self):
70 |         super(CustomFCOSConfig, self).__init__()
71 |         self._register_configuration(_config_dict)
72 | 
73 | 
74 | config = CustomFCOSConfig()
75 | 


--------------------------------------------------------------------------------
/playground/detection/crowdhuman/poto.res50.fpn.crowdhuman.800size.30k.3dmf.aux/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_p5_backbone
 7 | 
 8 | from fcos import FCOS
 9 | 
10 | 
11 | def build_backbone(cfg, input_shape=None):
12 |     """
13 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
14 | 
15 |     Returns:
16 |         an instance of :class:`Backbone`
17 |     """
18 |     if input_shape is None:
19 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
20 | 
21 |     backbone = build_retinanet_resnet_fpn_p5_backbone(cfg, input_shape)
22 |     assert isinstance(backbone, Backbone)
23 |     return backbone
24 | 
25 | 
26 | def build_shift_generator(cfg, input_shape):
27 | 
28 |     return ShiftGenerator(cfg, input_shape)
29 | 
30 | 
31 | def build_model(cfg):
32 | 
33 |     cfg.build_backbone = build_backbone
34 |     cfg.build_shift_generator = build_shift_generator
35 | 
36 |     model = FCOS(cfg)
37 |     logger = logging.getLogger(__name__)
38 |     logger.info("Model:\n{}".format(model))
39 |     return model
40 | 


--------------------------------------------------------------------------------
/playground/detection/crowdhuman/poto.res50.fpn.crowdhuman.800size.30k.3dmf/README.md:
--------------------------------------------------------------------------------
1 | # poto.res50.fpn.crowdhuman.800size.30k.3dmf
2 | 
3 | |  AP   |  mMR  |  Recall  |
4 | |:-----:|:-----:|:--------:|
5 | | 0.888 | 0.510 |  0.966   |
6 | 


--------------------------------------------------------------------------------
/playground/detection/crowdhuman/poto.res50.fpn.crowdhuman.800size.30k.3dmf/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         SHIFT_GENERATOR=dict(
10 |             NUM_SHIFTS=1,
11 |             OFFSET=0.5,
12 |         ),
13 |         FCOS=dict(
14 |             NUM_CLASSES=1,
15 |             NORM_REG_TARGETS=True,
16 |             NMS_THRESH_TEST=1.0,  # disable NMS when NMS threshold is 1.0
17 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
18 |             FOCAL_LOSS_GAMMA=2.0,
19 |             FOCAL_LOSS_ALPHA=0.25,
20 |             IOU_LOSS_TYPE="giou",
21 |             REG_WEIGHT=2.0,
22 |         ),
23 |         POTO=dict(
24 |             ALPHA=0.8,
25 |             CENTER_SAMPLING_RADIUS=0.0,  # inside gt box
26 |             FILTER_KERNEL_SIZE=3,
27 |             FILTER_TAU=2,
28 |         ),
29 |         NMS_TYPE=None,
30 |     ),
31 |     DATASETS=dict(
32 |         TRAIN=("crowdhuman_train",),
33 |         TEST=("crowdhuman_val",),
34 |     ),
35 |     SOLVER=dict(
36 |         CHECKPOINT_PERIOD=5000,
37 |         LR_SCHEDULER=dict(
38 |             MAX_ITER=30000,
39 |             STEPS=(20000, 25000),
40 |         ),
41 |         OPTIMIZER=dict(
42 |             BASE_LR=0.01,
43 |         ),
44 |         IMS_PER_BATCH=16,
45 |     ),
46 |     INPUT=dict(
47 |         AUG=dict(
48 |             TRAIN_PIPELINES=[
49 |                 ("ResizeShortestEdge", dict(short_edge_length=(800,), max_size=1400, sample_style="choice")),
50 |                 ("RandomFlip", dict()),
51 |             ],
52 |             TEST_PIPELINES=[
53 |                 ("ResizeShortestEdge", dict(short_edge_length=800, max_size=1400, sample_style="choice")),
54 |             ],
55 |         )
56 |     ),
57 |     TEST=dict(
58 |         DETECTIONS_PER_IMAGE=500,
59 |         EVAL_PEROID=5000,
60 |     ),
61 |     OUTPUT_DIR=osp.join(
62 |         '/data/Outputs/model_logs/cvpods_playground',
63 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
64 | )
65 | 
66 | 
67 | class CustomFCOSConfig(FCOSConfig):
68 |     def __init__(self):
69 |         super(CustomFCOSConfig, self).__init__()
70 |         self._register_configuration(_config_dict)
71 | 
72 | 
73 | config = CustomFCOSConfig()
74 | 


--------------------------------------------------------------------------------
/playground/detection/crowdhuman/poto.res50.fpn.crowdhuman.800size.30k.3dmf/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_p5_backbone
 7 | 
 8 | from fcos import FCOS
 9 | 
10 | 
11 | def build_backbone(cfg, input_shape=None):
12 |     """
13 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
14 | 
15 |     Returns:
16 |         an instance of :class:`Backbone`
17 |     """
18 |     if input_shape is None:
19 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
20 | 
21 |     backbone = build_retinanet_resnet_fpn_p5_backbone(cfg, input_shape)
22 |     assert isinstance(backbone, Backbone)
23 |     return backbone
24 | 
25 | 
26 | def build_shift_generator(cfg, input_shape):
27 | 
28 |     return ShiftGenerator(cfg, input_shape)
29 | 
30 | 
31 | def build_model(cfg):
32 | 
33 |     cfg.build_backbone = build_backbone
34 |     cfg.build_shift_generator = build_shift_generator
35 | 
36 |     model = FCOS(cfg)
37 |     logger = logging.getLogger(__name__)
38 |     logger.info("Model:\n{}".format(model))
39 |     return model
40 | 


--------------------------------------------------------------------------------
/playground/detection/crowdhuman/poto.res50.fpn.crowdhuman.800size.30k/README.md:
--------------------------------------------------------------------------------
1 | # poto.res50.fpn.crowdhuman.800size.30k
2 | 
3 | |  AP   |  mMR  |  Recall  |
4 | |:-----:|:-----:|:--------:|
5 | | 0.885 | 0.522 |  0.963   |
6 | 


--------------------------------------------------------------------------------
/playground/detection/crowdhuman/poto.res50.fpn.crowdhuman.800size.30k/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         SHIFT_GENERATOR=dict(
10 |             NUM_SHIFTS=1,
11 |             OFFSET=0.5,
12 |         ),
13 |         FCOS=dict(
14 |             NUM_CLASSES=1,
15 |             NORM_REG_TARGETS=True,
16 |             NMS_THRESH_TEST=1.0,  # disable NMS when NMS threshold is 1.0
17 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
18 |             FOCAL_LOSS_GAMMA=2.0,
19 |             FOCAL_LOSS_ALPHA=0.25,
20 |             IOU_LOSS_TYPE="giou",
21 |             REG_WEIGHT=2.0,
22 |         ),
23 |         POTO=dict(
24 |             ALPHA=0.8,
25 |             CENTER_SAMPLING_RADIUS=0.0,  # inside gt box
26 |         ),
27 |         NMS_TYPE=None,
28 |     ),
29 |     DATASETS=dict(
30 |         TRAIN=("crowdhuman_train",),
31 |         TEST=("crowdhuman_val",),
32 |     ),
33 |     SOLVER=dict(
34 |         CHECKPOINT_PERIOD=5000,
35 |         LR_SCHEDULER=dict(
36 |             MAX_ITER=30000,
37 |             STEPS=(20000, 25000),
38 |         ),
39 |         OPTIMIZER=dict(
40 |             BASE_LR=0.01,
41 |         ),
42 |         IMS_PER_BATCH=16,
43 |     ),
44 |     INPUT=dict(
45 |         AUG=dict(
46 |             TRAIN_PIPELINES=[
47 |                 ("ResizeShortestEdge", dict(short_edge_length=(800,), max_size=1400, sample_style="choice")),
48 |                 ("RandomFlip", dict()),
49 |             ],
50 |             TEST_PIPELINES=[
51 |                 ("ResizeShortestEdge", dict(short_edge_length=800, max_size=1400, sample_style="choice")),
52 |             ],
53 |         )
54 |     ),
55 |     TEST=dict(
56 |         DETECTIONS_PER_IMAGE=500,
57 |         EVAL_PEROID=5000,
58 |     ),
59 |     OUTPUT_DIR=osp.join(
60 |         '/data/Outputs/model_logs/cvpods_playground',
61 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
62 | )
63 | 
64 | 
65 | class CustomFCOSConfig(FCOSConfig):
66 |     def __init__(self):
67 |         super(CustomFCOSConfig, self).__init__()
68 |         self._register_configuration(_config_dict)
69 | 
70 | 
71 | config = CustomFCOSConfig()
72 | 


--------------------------------------------------------------------------------
/playground/detection/crowdhuman/poto.res50.fpn.crowdhuman.800size.30k/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_p5_backbone
 7 | 
 8 | from fcos import FCOS
 9 | 
10 | 
11 | def build_backbone(cfg, input_shape=None):
12 |     """
13 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
14 | 
15 |     Returns:
16 |         an instance of :class:`Backbone`
17 |     """
18 |     if input_shape is None:
19 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
20 | 
21 |     backbone = build_retinanet_resnet_fpn_p5_backbone(cfg, input_shape)
22 |     assert isinstance(backbone, Backbone)
23 |     return backbone
24 | 
25 | 
26 | def build_shift_generator(cfg, input_shape):
27 | 
28 |     return ShiftGenerator(cfg, input_shape)
29 | 
30 | 
31 | def build_model(cfg):
32 | 
33 |     cfg.build_backbone = build_backbone
34 |     cfg.build_shift_generator = build_shift_generator
35 | 
36 |     model = FCOS(cfg)
37 |     logger = logging.getLogger(__name__)
38 |     logger.info("Model:\n{}".format(model))
39 |     return model
40 | 


--------------------------------------------------------------------------------