├── .gitignore
├── .travis.yml
├── LICENSE
├── NOTICE
├── README.md
├── data
└── .gitignore
├── exp
└── .gitignore
├── experiments
├── ctdet_coco_dla_1x.sh
├── ctdet_coco_dla_2x.sh
├── ctdet_coco_hg.sh
├── ctdet_coco_resdcn101.sh
├── ctdet_coco_resdcn18.sh
├── ctdet_pascal_dla_384.sh
├── ctdet_pascal_dla_512.sh
├── ctdet_pascal_resdcn101_384.sh
├── ctdet_pascal_resdcn101_512.sh
├── ctdet_pascal_resdcn18_384.sh
├── ctdet_pascal_resdcn18_512.sh
├── ddd_3dop.sh
├── ddd_sub.sh
├── exdet_coco_dla.sh
├── exdet_coco_hg.sh
├── multi_pose_dla_1x.sh
├── multi_pose_dla_3x.sh
├── multi_pose_hg_1x.sh
└── multi_pose_hg_3x.sh
├── images
├── 16004479832_a748d55f21_k.jpg
├── 17790319373_bd19b24cfc_k.jpg
├── 18124840932_e42b3e377c_k.jpg
├── 19064748793_bb942deea1_k.jpg
├── 24274813513_0cfd2ce6d0_k.jpg
├── 33823288584_1d21cf0a26_k.jpg
├── 33887522274_eebd074106_k.jpg
├── 34501842524_3c858b3080_k.jpg
└── NOTICE
├── models
└── .gitignore
├── readme
├── DATA.md
├── DEVELOP.md
├── GETTING_STARTED.md
├── INSTALL.md
├── MODEL_ZOO.md
├── det1.png
├── det2.png
├── fig2.png
├── pose1.png
├── pose2.png
└── pose3.png
├── requirements.txt
└── src
├── _init_paths.py
├── demo.py
├── lib
├── datasets
│ ├── dataset
│ │ ├── coco.py
│ │ ├── coco_hp.py
│ │ ├── kitti.py
│ │ └── pascal.py
│ ├── dataset_factory.py
│ └── sample
│ │ ├── ctdet.py
│ │ ├── ddd.py
│ │ ├── exdet.py
│ │ └── multi_pose.py
├── detectors
│ ├── base_detector.py
│ ├── ctdet.py
│ ├── ddd.py
│ ├── detector_factory.py
│ ├── exdet.py
│ └── multi_pose.py
├── external
│ ├── .gitignore
│ ├── Makefile
│ ├── __init__.py
│ ├── nms.pyx
│ └── setup.py
├── logger.py
├── models
│ ├── data_parallel.py
│ ├── decode.py
│ ├── losses.py
│ ├── model.py
│ ├── networks
│ │ ├── DCNv2
│ │ │ ├── .gitignore
│ │ │ ├── LICENSE
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── build.py
│ │ │ ├── build_double.py
│ │ │ ├── dcn_v2.py
│ │ │ ├── dcn_v2_func.py
│ │ │ ├── make.sh
│ │ │ ├── src
│ │ │ │ ├── cuda
│ │ │ │ │ ├── dcn_v2_im2col_cuda.cu
│ │ │ │ │ ├── dcn_v2_im2col_cuda.h
│ │ │ │ │ ├── dcn_v2_im2col_cuda_double.cu
│ │ │ │ │ ├── dcn_v2_im2col_cuda_double.h
│ │ │ │ │ ├── dcn_v2_psroi_pooling_cuda.cu
│ │ │ │ │ ├── dcn_v2_psroi_pooling_cuda.h
│ │ │ │ │ ├── dcn_v2_psroi_pooling_cuda_double.cu
│ │ │ │ │ └── dcn_v2_psroi_pooling_cuda_double.h
│ │ │ │ ├── dcn_v2.c
│ │ │ │ ├── dcn_v2.h
│ │ │ │ ├── dcn_v2_cuda.c
│ │ │ │ ├── dcn_v2_cuda.h
│ │ │ │ ├── dcn_v2_cuda_double.c
│ │ │ │ ├── dcn_v2_cuda_double.h
│ │ │ │ ├── dcn_v2_double.c
│ │ │ │ └── dcn_v2_double.h
│ │ │ └── test.py
│ │ ├── dlav0.py
│ │ ├── large_hourglass.py
│ │ ├── msra_resnet.py
│ │ ├── pose_dla_dcn.py
│ │ └── resnet_dcn.py
│ ├── scatter_gather.py
│ └── utils.py
├── opts.py
├── trains
│ ├── base_trainer.py
│ ├── ctdet.py
│ ├── ddd.py
│ ├── exdet.py
│ ├── multi_pose.py
│ └── train_factory.py
└── utils
│ ├── __init__.py
│ ├── ddd_utils.py
│ ├── debugger.py
│ ├── image.py
│ ├── oracle_utils.py
│ ├── post_process.py
│ └── utils.py
├── main.py
├── test.py
└── tools
├── _init_paths.py
├── calc_coco_overlap.py
├── convert_hourglass_weight.py
├── convert_kitti_to_coco.py
├── eval_coco.py
├── eval_coco_hp.py
├── get_kitti.sh
├── get_pascal_voc.sh
├── kitti_eval
├── README.md
├── evaluate_object_3d.cpp
├── evaluate_object_3d_offline
├── evaluate_object_3d_offline.cpp
└── mail.h
├── merge_pascal_json.py
├── reval.py
├── vis_pred.py
└── voc_eval_lib
├── LICENSE
├── Makefile
├── __init__.py
├── datasets
├── __init__.py
├── bbox.pyx
├── ds_utils.py
├── imdb.py
├── pascal_voc.py
└── voc_eval.py
├── model
├── __init__.py
├── bbox_transform.py
├── config.py
├── nms_wrapper.py
└── test.py
├── nms
├── .gitignore
├── __init__.py
├── cpu_nms.c
├── cpu_nms.pyx
├── gpu_nms.cpp
├── gpu_nms.hpp
├── gpu_nms.pyx
├── nms_kernel.cu
└── py_cpu_nms.py
├── setup.py
└── utils
├── .gitignore
├── __init__.py
├── bbox.pyx
├── blob.py
├── timer.py
└── visualization.py
/.gitignore:
--------------------------------------------------------------------------------
1 | legacy/*
2 | .DS_Store
3 | debug/*
4 | *.DS_Store
5 | *.json
6 | *.mat
7 | src/.vscode/*
8 | preds/*
9 | *.h5
10 | *.pth
11 | *.checkpoint
12 | # Byte-compiled / optimized / DLL files
13 | __pycache__/
14 | *.py[cod]
15 | *$py.class
16 |
17 | # C extensions
18 | *.so
19 |
20 | # Distribution / packaging
21 | .Python
22 | env/
23 | build/
24 | develop-eggs/
25 | dist/
26 | downloads/
27 | eggs/
28 | .eggs/
29 | lib64/
30 | parts/
31 | sdist/
32 | var/
33 | wheels/
34 | *.egg-info/
35 | .installed.cfg
36 | *.egg
37 |
38 | # PyInstaller
39 | # Usually these files are written by a python script from a template
40 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
41 | *.manifest
42 | *.spec
43 |
44 | # Installer logs
45 | pip-log.txt
46 | pip-delete-this-directory.txt
47 |
48 | # Unit test / coverage reports
49 | htmlcov/
50 | .tox/
51 | .coverage
52 | .coverage.*
53 | .cache
54 | nosetests.xml
55 | coverage.xml
56 | *.cover
57 | .hypothesis/
58 |
59 | # Translations
60 | *.mo
61 | *.pot
62 |
63 | # Django stuff:
64 | *.log
65 | local_settings.py
66 |
67 | # Flask stuff:
68 | instance/
69 | .webassets-cache
70 |
71 | # Scrapy stuff:
72 | .scrapy
73 |
74 | # Sphinx documentation
75 | docs/_build/
76 |
77 | # PyBuilder
78 | target/
79 |
80 | # Jupyter Notebook
81 | .ipynb_checkpoints
82 |
83 | # pyenv
84 | .python-version
85 |
86 | # celery beat schedule file
87 | celerybeat-schedule
88 |
89 | # SageMath parsed files
90 | *.sage.py
91 |
92 | # dotenv
93 | .env
94 |
95 | # virtualenv
96 | .venv
97 | venv/
98 | ENV/
99 |
100 | # Spyder project settings
101 | .spyderproject
102 | .spyproject
103 |
104 | # Rope project settings
105 | .ropeproject
106 |
107 | # mkdocs documentation
108 | /site
109 |
110 | # mypy
111 | .mypy_cache/
112 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | group: travis_latest
2 | dist: xenial # ubuntu-16.04
3 | language: python
4 | cache: pip
5 | python:
6 | - 3.6
7 | - 3.7
8 | install:
9 | - pip install flake8
10 | - pip install -r requirements.txt
11 | before_script:
12 | # stop the build if there are Python syntax errors or undefined names
13 | - flake8 . --count --select=E9,F63,F72,F82 --show-source --statistics
14 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
15 | - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
16 | script:
17 | - true # add other tests here
18 | notifications:
19 | on_success: change
20 | on_failure: change # `always` will be the setting once code changes slow down
21 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Xingyi Zhou
4 | All rights reserved.
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
24 |
--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 |
--------------------------------------------------------------------------------
/exp/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 |
--------------------------------------------------------------------------------
/experiments/ctdet_coco_dla_1x.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py ctdet --exp_id coco_dla_1x --batch_size 128 --master_batch 9 --lr 5e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 16
4 | # test
5 | python test.py ctdet --exp_id coco_dla_1x --keep_res --resume
6 | # flip test
7 | python test.py ctdet --exp_id coco_dla_1x --keep_res --resume --flip_test
8 | # multi scale test
9 | python test.py ctdet --exp_id coco_dla_1x --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
10 | cd ..
11 |
--------------------------------------------------------------------------------
/experiments/ctdet_coco_dla_2x.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py ctdet --exp_id coco_dla_2x --batch_size 128 --master_batch 9 --lr 5e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 16 --num_epochs 230 lr_step 180,210
4 | # or use the following command if your have coco_s2_dla_1x trained
5 | # python main.py ctdet --exp_id coco_dla_2x --batch_size 128 --master_batch 9 --lr 5e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 16 --load_model ../exp/ctdet/coco_dla_1x/model_90.pth --resume
6 | # test
7 | python test.py ctdet --exp_id coco_dla_2x --keep_res --resume
8 | # flip test
9 | python test.py ctdet --exp_id coco_dla_2x --keep_res --resume --flip_test
10 | # multi scale test
11 | python test.py ctdet --exp_id coco_dla_2x --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
12 | cd ..
13 |
--------------------------------------------------------------------------------
/experiments/ctdet_coco_hg.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py ctdet --exp_id coco_hg --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 --load_model ../models/ExtremeNet_500000.pth --gpus 0,1,2,3,4
4 | # test
5 | python test.py ctdet --exp_id coco_hg --arch hourglass --keep_res --resume
6 | # flip test
7 | python test.py ctdet --exp_id coco_hg --arch hourglass --keep_res --resume --flip_test
8 | # multi scale test
9 | python test.py ctdet --exp_id coco_hg --arch hourglass --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
10 | cd ..
--------------------------------------------------------------------------------
/experiments/ctdet_coco_resdcn101.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py ctdet --exp_id coco_resdcn101 --arch resdcn_101 --batch_size 96 --master_batch 5 --lr 3.75e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 16
4 | # test
5 | python test.py ctdet --exp_id coco_resdcn101 --keep_res --resume
6 | # flip test
7 | python test.py ctdet --exp_id coco_resdcn101 --keep_res --resume --flip_test
8 | # multi scale test
9 | python test.py ctdet --exp_id coco_resdcn101 --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
10 | cd ..
11 |
--------------------------------------------------------------------------------
/experiments/ctdet_coco_resdcn18.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py ctdet --exp_id coco_resdcn18 --arch resdcn_18 --batch_size 114 --master_batch 18 --lr 5e-4 --gpus 0,1,2,3 --num_workers 16
4 | # test
5 | python test.py ctdet --exp_id coco_resdcn18 --arch resdcn_18 --keep_res --resume
6 | # flip test
7 | python test.py ctdet --exp_id coco_resdcn18 --arch resdcn_18 --keep_res --resume --flip_test
8 | # multi scale test
9 | python test.py ctdet --exp_id coco_resdcn18 --arch resdcn_18 --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
10 | cd ..
11 |
--------------------------------------------------------------------------------
/experiments/ctdet_pascal_dla_384.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py ctdet --exp_id pascal_dla_384 --dataset pascal --num_epochs 70 --lr_step 45,60
4 | # test
5 | python test.py ctdet --exp_id pascal_dla_384 --dataset pascal --resume
6 | # flip test
7 | python test.py ctdet --exp_id pascal_dla_384 --dataset pascal --resume --flip_test
8 | cd ..
9 |
--------------------------------------------------------------------------------
/experiments/ctdet_pascal_dla_512.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py ctdet --exp_id pascal_dla_512 --dataset pascal --input_res 512 --num_epochs 70 --lr_step 45,60 --gpus 0,1
4 | # test
5 | python test.py ctdet --exp_id pascal_dla_512 --dataset pascal --input_res 512 --resume
6 | # flip test
7 | python test.py ctdet --exp_id pascal_dla_512 --dataset pascal --input_res 512 --resume --flip_test
8 | cd ..
9 |
--------------------------------------------------------------------------------
/experiments/ctdet_pascal_resdcn101_384.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py ctdet --exp_id pascal_resdcn101_384 --arch resdcn_101 --dataset pascal --num_epochs 70 --lr_step 45,60 --gpus 0,1
4 | # test
5 | python test.py ctdet --exp_id pascal_resdcn101_384 --arch resdcn_101 --dataset pascal --resume
6 | # flip test
7 | python test.py ctdet --exp_id pascal_resdcn101_384 --arch resdcn_101 --dataset pascal --resume --flip_test
8 | cd ..
9 |
--------------------------------------------------------------------------------
/experiments/ctdet_pascal_resdcn101_512.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py ctdet --exp_id pascal_resdcn101_512 --arch resdcn_101 --dataset pascal --input_res 512 --num_epochs 70 --lr_step 45,60 --gpus 0,1,2,3
4 | # test
5 | python test.py ctdet --exp_id pascal_resdcn101_512 --arch resdcn_101 --dataset pascal --input_res 512 --resume
6 | # flip test
7 | python test.py ctdet --exp_id pascal_resdcn101_512 --arch resdcn_101 --dataset pascal --input_res 512 --resume --flip_test
8 | cd ..
9 |
--------------------------------------------------------------------------------
/experiments/ctdet_pascal_resdcn18_384.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py ctdet --exp_id pascal_resdcn18_384 --arch resdcn_18 --dataset pascal --num_epochs 70 --lr_step 45,60
4 | # test
5 | python test.py ctdet --exp_id pascal_resdcn18_384 --arch resdcn_18 --dataset pascal --resume
6 | # flip test
7 | python test.py ctdet --exp_id pascal_resdcn18_384 --arch resdcn_18 --dataset pascal --resume --flip_test
8 | cd ..
9 |
--------------------------------------------------------------------------------
/experiments/ctdet_pascal_resdcn18_512.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py ctdet --exp_id pascal_resdcn18_512 --arch resdcn_18 --dataset pascal --input_res 512 --num_epochs 70 --lr_step 45,60
4 | # test
5 | python test.py ctdet --exp_id pascal_resdcn18_512 --arch resdcn_18 --dataset pascal --input_res 512 --resume
6 | # flip test
7 | python test.py ctdet --exp_id pascal_resdcn18_512 --arch resdcn_18 --dataset pascal --input_res 512 --resume --flip_test
8 | cd ..
9 |
--------------------------------------------------------------------------------
/experiments/ddd_3dop.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py ddd --exp_id 3dop --dataset kitti --kitti_split 3dop --batch_size 16 --master_batch 7 --num_epochs 70 --lr_step 45,60 --gpus 0,1
4 | # test
5 | python test.py ddd --exp_id 3dop --dataset kitti --kitti_split 3dop --resume
6 | cd ..
7 |
--------------------------------------------------------------------------------
/experiments/ddd_sub.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py ddd --exp_id sub --dataset kitti --kitti_split subcnn --batch_size 16 --master_batch 7 --num_epochs 70 --lr_step 45,60 --gpus 0,1
4 | # test
5 | python test.py ddd --exp_id sub --dataset kitti --kitti_split subcnn --resume
6 | cd ..
7 |
--------------------------------------------------------------------------------
/experiments/exdet_coco_dla.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py exdet --exp_id coco_dla --batch_size 64 --master_batch 1 --lr 2.5e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 8
4 | # test
5 | python test.py exdet --exp_id coco_dla --keep_res --resume
6 | # flip test
7 | python test.py exdet --exp_id coco_dla --keep_res --resume --flip_test
8 | # multi scale test
9 | python test.py exdet --exp_id coco_dla --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
10 | cd ..
11 |
--------------------------------------------------------------------------------
/experiments/exdet_coco_hg.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py exdet --exp_id coco_hg --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 --gpus 0,1,2,3,4
4 | # test
5 | python test.py exdet --exp_id coco_hg --arch hourglass --keep_res --resume
6 | # flip test
7 | python test.py exdet --exp_id coco_hg --arch hourglass --keep_res --resume --flip_test
8 | # multi scale test
9 | python test.py exdet --exp_id coco_hg --arch hourglass --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
10 | cd ..
11 |
--------------------------------------------------------------------------------
/experiments/multi_pose_dla_1x.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py multi_pose --exp_id dla_1x --dataset coco_hp --batch_size 128 --master_batch 9 --lr 5e-4 --load_model ../models/ctdet_coco_dla_2x.pth --gpus 0,1,2,3,4,5,6,7 --num_workers 16
4 | # test
5 | python test.py multi_pose --exp_id dla_1x --dataset coco_hp --keep_res --resume
6 | # flip test
7 | python test.py multi_pose --exp_id dla_1x --dataset coco_hp --keep_res --resume --flip_test
8 | cd ..
9 |
--------------------------------------------------------------------------------
/experiments/multi_pose_dla_3x.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py multi_pose --exp_id dla_3x --dataset coco_hp --batch_size 128 --master_batch 9 --lr 5e-4 --load_model ../models/ctdet_coco_dla_2x.pth --gpus 0,1,2,3,4,5,6,7 --num_workers 16 --num_epochs 320 lr_step 270,300
4 | # or use the following command if your have dla_1x trained
5 | # python main.py multi_pose --exp_id dla_3x --dataset coco_hp --batch_size 128 --master_batch 9 --lr 5e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 16 --load_model ../exp/multi_pose/dla_1x/model_90.pth --resume
6 | # test
7 | python test.py multi_pose --exp_id dla_3x --dataset coco_hp --keep_res --resume
8 | # flip test
9 | python test.py multi_pose --exp_id dla_3x --dataset coco_hp --keep_res --resume --flip_test
10 | cd ..
11 |
--------------------------------------------------------------------------------
/experiments/multi_pose_hg_1x.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py multi_pose --exp_id hg_1x --dataset coco_hp --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 --load_model ../models/ctdet_coco_hg.pth --gpus 0,1,2,3,4 --num_epochs 50 --lr_step 40
4 | # test
5 | python test.py multi_pose --exp_id hg_1x --dataset coco_hp --arch hourglass --keep_res --resume
6 | # flip test
7 | python test.py multi_pose --exp_id hg_1x --dataset coco_hp --arch hourglass --keep_res --resume --flip_test
8 | cd ..
9 |
--------------------------------------------------------------------------------
/experiments/multi_pose_hg_3x.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py multi_pose --exp_id hg_3x --dataset coco_hp --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 -load_model ../models/ctdet_coco_hg.pth --gpus 0,1,2,3,4 --num_epochs 150 --lr_step 130
4 | # or use the following command if your have dla_1x trained
5 | # python main.py multi_pose --exp_id hg_3x --dataset coco_hp --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 --gpus 0,1,2,3,4 --num_epochs 150 --lr_step 130 --load_model ../exp/multi_pose/hg_1x/model_40.pth --resume
6 | # test
7 | python test.py multi_pose --exp_id hg_3x --dataset coco_hp --arch hourglass --keep_res --resume
8 | # flip test
9 | python test.py multi_pose --exp_id hg_3x --dataset coco_hp --arch hourglass --keep_res --resume --flip_test
10 | cd ..
11 |
--------------------------------------------------------------------------------
/images/16004479832_a748d55f21_k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/16004479832_a748d55f21_k.jpg
--------------------------------------------------------------------------------
/images/17790319373_bd19b24cfc_k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/17790319373_bd19b24cfc_k.jpg
--------------------------------------------------------------------------------
/images/18124840932_e42b3e377c_k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/18124840932_e42b3e377c_k.jpg
--------------------------------------------------------------------------------
/images/19064748793_bb942deea1_k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/19064748793_bb942deea1_k.jpg
--------------------------------------------------------------------------------
/images/24274813513_0cfd2ce6d0_k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/24274813513_0cfd2ce6d0_k.jpg
--------------------------------------------------------------------------------
/images/33823288584_1d21cf0a26_k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/33823288584_1d21cf0a26_k.jpg
--------------------------------------------------------------------------------
/images/33887522274_eebd074106_k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/33887522274_eebd074106_k.jpg
--------------------------------------------------------------------------------
/images/34501842524_3c858b3080_k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/34501842524_3c858b3080_k.jpg
--------------------------------------------------------------------------------
/images/NOTICE:
--------------------------------------------------------------------------------
1 | The demo images are licensed as United States government work:
2 | https://www.usa.gov/government-works
3 |
4 | The image files were obtained on Jan 13, 2018 from the following
5 | URLs.
6 |
7 | 16004479832_a748d55f21_k.jpg
8 | https://www.flickr.com/photos/archivesnews/16004479832
9 |
10 | 18124840932_e42b3e377c_k.jpg
11 | https://www.flickr.com/photos/usnavy/18124840932
12 |
13 | 33887522274_eebd074106_k.jpg
14 | https://www.flickr.com/photos/usaid_pakistan/33887522274
15 |
16 | 15673749081_767a7fa63a_k.jpg
17 | https://www.flickr.com/photos/usnavy/15673749081
18 |
19 | 34501842524_3c858b3080_k.jpg
20 | https://www.flickr.com/photos/departmentofenergy/34501842524
21 |
22 | 24274813513_0cfd2ce6d0_k.jpg
23 | https://www.flickr.com/photos/dhsgov/24274813513
24 |
25 | 19064748793_bb942deea1_k.jpg
26 | https://www.flickr.com/photos/statephotos/19064748793
27 |
28 | 33823288584_1d21cf0a26_k.jpg
29 | https://www.flickr.com/photos/cbpphotos/33823288584
30 |
31 | 17790319373_bd19b24cfc_k.jpg
32 | https://www.flickr.com/photos/secdef/17790319373
33 |
--------------------------------------------------------------------------------
/models/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 |
--------------------------------------------------------------------------------
/readme/DATA.md:
--------------------------------------------------------------------------------
1 | # Dataset preparation
2 |
3 | If you want to reproduce the results in the paper for benchmark evaluation and training, you will need to setup dataset.
4 |
5 |
6 | ### COCO
7 | - Download the images (2017 Train, 2017 Val, 2017 Test) from [coco website](http://cocodataset.org/#download).
8 | - Download annotation files (2017 train/val and test image info) from [coco website](http://cocodataset.org/#download).
9 | - Place the data (or create symlinks) to make the data folder like:
10 |
11 | ~~~
12 | ${CenterNet_ROOT}
13 | |-- data
14 | `-- |-- coco
15 | `-- |-- annotations
16 | | |-- instances_train2017.json
17 | | |-- instances_val2017.json
18 | | |-- person_keypoints_train2017.json
19 | | |-- person_keypoints_val2017.json
20 | | |-- image_info_test-dev2017.json
21 | |---|-- train2017
22 | |---|-- val2017
23 | `---|-- test2017
24 | ~~~
25 |
26 | - [Optional] If you want to train ExtremeNet, generate extreme point annotation from segmentation:
27 |
28 | ~~~
29 | cd $CenterNet_ROOT/tools/
30 | python gen_coco_extreme_points.py
31 | ~~~
32 | It generates `instances_extreme_train2017.json` and `instances_extreme_val2017.json` in `data/coco/annotations/`.
33 |
34 | ### Pascal VOC
35 |
36 | - Run
37 |
38 | ~~~
39 | cd $CenterNet_ROOT/tools/
40 | bash get_pascal_voc.sh
41 | ~~~
42 | - The above script includes:
43 | - Download, unzip, and move Pascal VOC images from the [VOC website](http://host.robots.ox.ac.uk/pascal/VOC/).
44 | - [Download](https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip) Pascal VOC annotation in COCO format (from [Detectron](https://github.com/facebookresearch/Detectron/tree/master/detectron/datasets/data)).
45 | - Combine train/val 2007/2012 annotation files into a single json.
46 |
47 |
48 | - Move the created `voc` folder to `data` (or create symlinks) to make the data folder like:
49 |
50 | ~~~
51 | ${CenterNet_ROOT}
52 | |-- data
53 | `-- |-- voc
54 | `-- |-- annotations
55 | | |-- pascal_trainval0712.json
56 | | |-- pascal_test2017.json
57 | |-- images
58 | | |-- 000001.jpg
59 | | ......
60 | `-- VOCdevkit
61 |
62 | ~~~
63 | The `VOCdevkit` folder is needed to run the evaluation script from [faster rcnn](https://github.com/rbgirshick/py-faster-rcnn/blob/master/tools/reval.py).
64 |
65 | ### KITTI
66 |
67 | - Download [images](http://www.cvlibs.net/download.php?file=data_object_image_2.zip), [annotations](http://www.cvlibs.net/download.php?file=data_object_label_2.zip), and [calibrations](http://www.cvlibs.net/download.php?file=data_object_calib.zip) from [KITTI website](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d) and unzip.
68 |
69 | - Download the train-val split of [3DOP](https://xiaozhichen.github.io/files/mv3d/imagesets.tar.gz) and [SubCNN](https://github.com/tanshen/SubCNN/tree/master/fast-rcnn/data/KITTI) and place the data as below
70 |
71 | ~~~
72 | ${CenterNet_ROOT}
73 | |-- data
74 | `-- |-- kitti
75 | `-- |-- training
76 | | |-- image_2
77 | | |-- label_2
78 | | |-- calib
79 | |-- ImageSets_3dop
80 | | |-- test.txt
81 | | |-- train.txt
82 | | |-- val.txt
83 | | |-- trainval.txt
84 | `-- ImageSets_subcnn
85 | |-- test.txt
86 | |-- train.txt
87 | |-- val.txt
88 | |-- trainval.txt
89 | ~~~
90 |
91 | - Run `python convert_kitti_to_coco.py` in `tools` to convert the annotation into COCO format. You can set `DEBUG=True` in `line 5` to visualize the annotation.
92 |
93 | - Link image folder
94 |
95 | ~~~
96 | cd ${CenterNet_ROOT}/data/kitti/
97 | mkdir images
98 | ln -s training/image_2 images/trainval
99 | ~~~
100 |
101 | - The data structure should look like:
102 |
103 | ~~~
104 | ${CenterNet_ROOT}
105 | |-- data
106 | `-- |-- kitti
107 | `-- |-- annotations
108 | | |-- kitti_3dop_train.json
109 | | |-- kitti_3dop_val.json
110 | | |-- kitti_subcnn_train.json
111 | | |-- kitti_subcnn_val.json
112 | `-- images
113 | |-- trainval
114 | |-- test
115 | ~~~
116 |
--------------------------------------------------------------------------------
/readme/DEVELOP.md:
--------------------------------------------------------------------------------
1 | # Develop
2 |
3 | This document provides tutorials to develop CenterNet. `lib/src/opts` lists a few more options that the current version supports.
4 |
5 | ## New dataset
6 | Basically there are three steps:
7 |
8 | - Convert the dataset annotation to [COCO format](http://cocodataset.org/#format-data). Please refer to [src/tools/convert_kitti_to_coco.py](../src/tools/convert_kitti_to_coco.py) for an example to convert kitti format to coco format.
9 | - Create a dataset intilization file in `src/lib/datasets/dataset`. In most cases you can just copy `src/lib/datasets/dataset/coco.py` to your dataset name and change the category information, and annotation path.
10 | - Import your dataset at `src/lib/datasets/dataset_factory`.
11 |
12 | ## New task
13 |
14 | You will need to add files to `src/lib/datasets/sample/`, `src/lib/datasets/trains/`, and `src/lib/datasets/detectors/`, which specify the data generation during training, the training targets, and the testing, respectively.
15 |
16 | ## New architecture
17 |
18 | - Add your model file to `src/lib/models/networks/`. The model should accept a dict `heads` of `{name: channels}`, which specify the name of each network output and its number of channels. Make sure your model returns a list (for multiple stages. Single stage model should return a list containing a single element.). The element of the list is a dict contraining the same keys with `heads`.
19 | - Add your model in `model_factory` of `src/lib/models/model.py`.
20 |
--------------------------------------------------------------------------------
/readme/GETTING_STARTED.md:
--------------------------------------------------------------------------------
1 | # Getting Started
2 |
3 | This document provides tutorials to train and evaluate CenterNet. Before getting started, make sure you have finished [installation](INSTALL.md) and [dataset setup](DATA.md).
4 |
5 | ## Benchmark evaluation
6 |
7 | First, download the models you want to evaluate from our [model zoo](MODEL_ZOO.md) and put them in `CenterNet_ROOT/models/`.
8 |
9 | ### COCO
10 |
11 | To evaluate COCO object detection with DLA
12 | run
13 |
14 | ~~~
15 | python test.py ctdet --exp_id coco_dla --keep_res --load_model ../models/ctdet_coco_dla_2x.pth
16 | ~~~
17 |
18 | This will give an AP of `37.4` if setup correctly. `--keep_res` is for keep the original image resolution. Without `--keep_res` it will resize the images to `512 x 512`. You can add `--flip_test` and `--flip_test --test_scales 0.5,0.75,1,1.25,1.5` to the above commend, for flip test and multi_scale test, respectively. The expected APs are `39.2` and `41.7`, respectively.
19 |
20 | To test with hourglass net, run
21 |
22 | ~~~
23 | python test.py ctdet --exp_id coco_hg --arch hourglass --fix_res --load_model ../models/ctdet_coco_hg.pth
24 | ~~~
25 |
26 | Similarly, to evaluate human pose estimation, run the following command for dla
27 |
28 | ~~~
29 | python test.py multi_pose --exp_id dla --keep_res --load_model ../models/multi_pose_dla_3x.pth --flip_test
30 | ~~~
31 |
32 | and the following for hourglass
33 |
34 | ~~~
35 | python test.py multi_pose --exp_id hg --arch hourglass --keep_res --load_model ../models/multi_pose_dla_3x.pth --flip_test
36 | ~~~
37 |
38 | The expected results can be found in the model zoo.
39 |
40 | ### Pascal
41 |
42 | To evaluate object detection on Pascal VOC (test2007), run
43 |
44 | ~~~
45 | python test.py ctdet --exp_id dla --dataset pascal --load_model ../models/ctdet_pascal_dla.pth --flip_test
46 | ~~~
47 |
48 | Note that we fix the resolution during testing.
49 | And you can change to other network architectures and resolutions by specifying `--arch` and `--input_res 512`.
50 |
51 | ### KITTI
52 |
53 | To evaluate the kitti dataset, first compile the evaluation tool (from [here](https://github.com/prclibo/kitti_eval)):
54 |
55 | ~~~
56 | cd CenterNet_ROOT/src/tools/kitti_eval
57 | g++ -o evaluate_object_3d_offline evaluate_object_3d_offline.cpp -O3
58 | ~~~
59 |
60 | Then run the evaluation with pretrained model:
61 |
62 | ~~~
63 | python test.py ddd --exp_id 3dop --dataset kitti --kitti_split 3dop --load_model ../models/ddd_3dop.pth
64 | ~~~
65 |
66 | to evaluate the 3DOP split. For the subcnn split, change `--kitti_split` to `subcnn` and load the corresponding models.
67 | Note that test time augmentation is not trivially applicable for 3D orientation.
68 |
69 | ## Training
70 |
71 | We have packed all the training scripts in the [experiments](../experiments) folder.
72 | The experiment names are correspond to the model name in the [model zoo](MODEL_ZOO.md).
73 | The number of GPUs for each experiments can be found in the scripts and the model zoo.
74 | In the case that you don't have 8 GPUs, you can follow the [linear learning rate rule](https://arxiv.org/abs/1706.02677) to scale the learning rate as batch size.
75 | For example, to train COCO object detection with dla on 2 GPUs, run
76 |
77 | ~~~
78 | python main.py ctdet --exp_id coco_dla --batch_size 32 --master_batch 15 --lr 1.25e-4 --gpus 0,1
79 | ~~~
80 |
81 | The default learning rate is `1.25e-4` for batch size `32` (on 2 GPUs).
82 | By default, pytorch evenly splits the total batch size to each GPUs.
83 | `--master_batch` allows using different batchsize for the master GPU, which usually costs more memory than other GPUs.
84 | If it encounters GPU memory out, using slightly less batch size (e.g., `112` of `128`) with the same learning is fine.
85 |
86 | If the training is terminated before finishing, you can use the same commond with `--resume` to resume training. It will found the lastest model with the same `exp_id`.
87 |
88 | Our HourglassNet model is finetuned from the pretrained [ExtremeNet model](https://drive.google.com/file/d/1JMbHgN4uLkP9MAyJU5EeHrgxwe101hwO) (from the [ExtremeNet repo](https://github.com/xingyizhou/ExtremeNet)).
89 | You will need to download the model, run `python convert_hourglass_weight.py` to convert the model format, and load the model for training (see the [script](../experiments/ctdet_coco_hg.sh)).
90 |
--------------------------------------------------------------------------------
/readme/INSTALL.md:
--------------------------------------------------------------------------------
1 | # Installation
2 |
3 |
4 | The code was tested on Ubuntu 16.04, with [Anaconda](https://www.anaconda.com/download) Python 3.6 and [PyTorch]((http://pytorch.org/)) v0.4.1. NVIDIA GPUs are needed for both training and testing.
5 | After install Anaconda:
6 |
7 | 0. [Optional but recommended] create a new conda environment.
8 |
9 | ~~~
10 | conda create --name CenterNet python=3.6
11 | ~~~
12 | And activate the environment.
13 |
14 | ~~~
15 | conda activate CenterNet
16 | ~~~
17 |
18 | 1. Install pytorch0.4.1:
19 |
20 | ~~~
21 | conda install pytorch=0.4.1 torchvision -c pytorch
22 | ~~~
23 |
24 | And disable cudnn batch normalization(Due to [this issue](https://github.com/xingyizhou/pytorch-pose-hg-3d/issues/16)).
25 |
26 | ~~~
27 | # PYTORCH=/path/to/pytorch # usually ~/anaconda3/envs/CenterNet/lib/python3.6/site-packages/
28 | # for pytorch v0.4.0
29 | sed -i "1194s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py
30 | # for pytorch v0.4.1
31 | sed -i "1254s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py
32 | ~~~
33 |
34 | For other pytorch version, you can manually open `torch/nn/functional.py` and find the line with `torch.batch_norm` and replace the `torch.backends.cudnn.enabled` with `False`. We observed slight worse training results without doing so.
35 |
36 | 2. Install [COCOAPI](https://github.com/cocodataset/cocoapi):
37 |
38 | ~~~
39 | # COCOAPI=/path/to/clone/cocoapi
40 | git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
41 | cd $COCOAPI/PythonAPI
42 | make
43 | python setup.py install --user
44 | ~~~
45 |
46 | 3. Clone this repo:
47 |
48 | ~~~
49 | CenterNet_ROOT=/path/to/clone/CenterNet
50 | git clone https://github.com/xingyizhou/CenterNet $CenterNet_ROOT
51 | ~~~
52 |
53 |
54 | 4. Install the requirements
55 |
56 | ~~~
57 | pip install -r requirements.txt
58 | ~~~
59 |
60 |
61 | 5. Compile deformable convolutional (from [DCNv2](https://github.com/CharlesShang/DCNv2/tree/pytorch_0.4)).
62 |
63 | ~~~
64 | cd $CenterNet_ROOT/src/lib/models/networks/DCNv2
65 | ./make.sh
66 | ~~~
67 | 6. [Optional, only required if you are using extremenet or multi-scale testing] Compile NMS if your want to use multi-scale testing or test ExtremeNet.
68 |
69 | ~~~
70 | cd $CenterNet_ROOT/src/lib/external
71 | make
72 | ~~~
73 |
74 | 7. Download pertained models for [detection]() or [pose estimation]() and move them to `$CenterNet_ROOT/models/`. More models can be found in [Model zoo](MODEL_ZOO.md).
75 |
--------------------------------------------------------------------------------
/readme/det1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/readme/det1.png
--------------------------------------------------------------------------------
/readme/det2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/readme/det2.png
--------------------------------------------------------------------------------
/readme/fig2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/readme/fig2.png
--------------------------------------------------------------------------------
/readme/pose1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/readme/pose1.png
--------------------------------------------------------------------------------
/readme/pose2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/readme/pose2.png
--------------------------------------------------------------------------------
/readme/pose3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/readme/pose3.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | opencv-python
2 | Cython
3 | numba
4 | progress
5 | matplotlib
6 | easydict
7 | scipy
8 |
--------------------------------------------------------------------------------
/src/_init_paths.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import sys
3 |
4 | def add_path(path):
5 | if path not in sys.path:
6 | sys.path.insert(0, path)
7 |
8 | this_dir = osp.dirname(__file__)
9 |
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, 'lib')
12 | add_path(lib_path)
13 |
--------------------------------------------------------------------------------
/src/demo.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import _init_paths
6 |
7 | import os
8 | import cv2
9 |
10 | from opts import opts
11 | from detectors.detector_factory import detector_factory
12 |
13 | image_ext = ['jpg', 'jpeg', 'png', 'webp']
14 | video_ext = ['mp4', 'mov', 'avi', 'mkv']
15 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge']
16 |
17 | def demo(opt):
18 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
19 | opt.debug = max(opt.debug, 1)
20 | Detector = detector_factory[opt.task]
21 | detector = Detector(opt)
22 |
23 | if opt.demo == 'webcam' or \
24 | opt.demo[opt.demo.rfind('.') + 1:].lower() in video_ext:
25 | cam = cv2.VideoCapture(0 if opt.demo == 'webcam' else opt.demo)
26 | detector.pause = False
27 | while True:
28 | _, img = cam.read()
29 | cv2.imshow('input', img)
30 | ret = detector.run(img)
31 | time_str = ''
32 | for stat in time_stats:
33 | time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat])
34 | print(time_str)
35 | if cv2.waitKey(1) == 27:
36 | return # esc to quit
37 | else:
38 | if os.path.isdir(opt.demo):
39 | image_names = []
40 | ls = os.listdir(opt.demo)
41 | for file_name in sorted(ls):
42 | ext = file_name[file_name.rfind('.') + 1:].lower()
43 | if ext in image_ext:
44 | image_names.append(os.path.join(opt.demo, file_name))
45 | else:
46 | image_names = [opt.demo]
47 |
48 | for (image_name) in image_names:
49 | ret = detector.run(image_name)
50 | time_str = ''
51 | for stat in time_stats:
52 | time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat])
53 | print(time_str)
54 | if __name__ == '__main__':
55 | opt = opts().init()
56 | demo(opt)
57 |
--------------------------------------------------------------------------------
/src/lib/datasets/dataset/coco.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import pycocotools.coco as coco
6 | from pycocotools.cocoeval import COCOeval
7 | import numpy as np
8 | import json
9 | import os
10 |
11 | import torch.utils.data as data
12 |
13 | class COCO(data.Dataset):
14 | num_classes = 80
15 | default_resolution = [512, 512]
16 | mean = np.array([0.40789654, 0.44719302, 0.47026115],
17 | dtype=np.float32).reshape(1, 1, 3)
18 | std = np.array([0.28863828, 0.27408164, 0.27809835],
19 | dtype=np.float32).reshape(1, 1, 3)
20 |
21 | def __init__(self, opt, split):
22 | super(COCO, self).__init__()
23 | self.data_dir = os.path.join(opt.data_dir, 'coco')
24 | self.img_dir = os.path.join(self.data_dir, '{}2017'.format(split))
25 | if split == 'test':
26 | self.annot_path = os.path.join(
27 | self.data_dir, 'annotations',
28 | 'image_info_test-dev2017.json').format(split)
29 | else:
30 | if opt.task == 'exdet':
31 | self.annot_path = os.path.join(
32 | self.data_dir, 'annotations',
33 | 'instances_extreme_{}2017.json').format(split)
34 | else:
35 | self.annot_path = os.path.join(
36 | self.data_dir, 'annotations',
37 | 'instances_{}2017.json').format(split)
38 | self.max_objs = 128
39 | self.class_name = [
40 | '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
41 | 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
42 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
43 | 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
44 | 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
45 | 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
46 | 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
47 | 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
48 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
49 | 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
50 | 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
51 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
52 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
53 | self._valid_ids = [
54 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
55 | 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
56 | 24, 25, 27, 28, 31, 32, 33, 34, 35, 36,
57 | 37, 38, 39, 40, 41, 42, 43, 44, 46, 47,
58 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
59 | 58, 59, 60, 61, 62, 63, 64, 65, 67, 70,
60 | 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
61 | 82, 84, 85, 86, 87, 88, 89, 90]
62 | self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)}
63 | self.voc_color = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32) \
64 | for v in range(1, self.num_classes + 1)]
65 | self._data_rng = np.random.RandomState(123)
66 | self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
67 | dtype=np.float32)
68 | self._eig_vec = np.array([
69 | [-0.58752847, -0.69563484, 0.41340352],
70 | [-0.5832747, 0.00994535, -0.81221408],
71 | [-0.56089297, 0.71832671, 0.41158938]
72 | ], dtype=np.float32)
73 | # self.mean = np.array([0.485, 0.456, 0.406], np.float32).reshape(1, 1, 3)
74 | # self.std = np.array([0.229, 0.224, 0.225], np.float32).reshape(1, 1, 3)
75 |
76 | self.split = split
77 | self.opt = opt
78 |
79 | print('==> initializing coco 2017 {} data.'.format(split))
80 | self.coco = coco.COCO(self.annot_path)
81 | self.images = self.coco.getImgIds()
82 | self.num_samples = len(self.images)
83 |
84 | print('Loaded {} {} samples'.format(split, self.num_samples))
85 |
86 | def _to_float(self, x):
87 | return float("{:.2f}".format(x))
88 |
89 | def convert_eval_format(self, all_bboxes):
90 | # import pdb; pdb.set_trace()
91 | detections = []
92 | for image_id in all_bboxes:
93 | for cls_ind in all_bboxes[image_id]:
94 | category_id = self._valid_ids[cls_ind - 1]
95 | for bbox in all_bboxes[image_id][cls_ind]:
96 | bbox[2] -= bbox[0]
97 | bbox[3] -= bbox[1]
98 | score = bbox[4]
99 | bbox_out = list(map(self._to_float, bbox[0:4]))
100 |
101 | detection = {
102 | "image_id": int(image_id),
103 | "category_id": int(category_id),
104 | "bbox": bbox_out,
105 | "score": float("{:.2f}".format(score))
106 | }
107 | if len(bbox) > 5:
108 | extreme_points = list(map(self._to_float, bbox[5:13]))
109 | detection["extreme_points"] = extreme_points
110 | detections.append(detection)
111 | return detections
112 |
113 | def __len__(self):
114 | return self.num_samples
115 |
116 | def save_results(self, results, save_dir):
117 | json.dump(self.convert_eval_format(results),
118 | open('{}/results.json'.format(save_dir), 'w'))
119 |
120 | def run_eval(self, results, save_dir):
121 | # result_json = os.path.join(save_dir, "results.json")
122 | # detections = self.convert_eval_format(results)
123 | # json.dump(detections, open(result_json, "w"))
124 | self.save_results(results, save_dir)
125 | coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir))
126 | coco_eval = COCOeval(self.coco, coco_dets, "bbox")
127 | coco_eval.evaluate()
128 | coco_eval.accumulate()
129 | coco_eval.summarize()
130 |
--------------------------------------------------------------------------------
/src/lib/datasets/dataset/coco_hp.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import pycocotools.coco as coco
6 | from pycocotools.cocoeval import COCOeval
7 | import numpy as np
8 | import json
9 | import os
10 |
11 | import torch.utils.data as data
12 |
13 | class COCOHP(data.Dataset):
14 | num_classes = 1
15 | num_joints = 17
16 | default_resolution = [512, 512]
17 | mean = np.array([0.40789654, 0.44719302, 0.47026115],
18 | dtype=np.float32).reshape(1, 1, 3)
19 | std = np.array([0.28863828, 0.27408164, 0.27809835],
20 | dtype=np.float32).reshape(1, 1, 3)
21 | flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10],
22 | [11, 12], [13, 14], [15, 16]]
23 | def __init__(self, opt, split):
24 | super(COCOHP, self).__init__()
25 | self.edges = [[0, 1], [0, 2], [1, 3], [2, 4],
26 | [4, 6], [3, 5], [5, 6],
27 | [5, 7], [7, 9], [6, 8], [8, 10],
28 | [6, 12], [5, 11], [11, 12],
29 | [12, 14], [14, 16], [11, 13], [13, 15]]
30 |
31 | self.acc_idxs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
32 | self.data_dir = os.path.join(opt.data_dir, 'coco')
33 | self.img_dir = os.path.join(self.data_dir, '{}2017'.format(split))
34 | if split == 'test':
35 | self.annot_path = os.path.join(
36 | self.data_dir, 'annotations',
37 | 'image_info_test-dev2017.json').format(split)
38 | else:
39 | self.annot_path = os.path.join(
40 | self.data_dir, 'annotations',
41 | 'person_keypoints_{}2017.json').format(split)
42 | self.max_objs = 32
43 | self._data_rng = np.random.RandomState(123)
44 | self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
45 | dtype=np.float32)
46 | self._eig_vec = np.array([
47 | [-0.58752847, -0.69563484, 0.41340352],
48 | [-0.5832747, 0.00994535, -0.81221408],
49 | [-0.56089297, 0.71832671, 0.41158938]
50 | ], dtype=np.float32)
51 | self.split = split
52 | self.opt = opt
53 |
54 | print('==> initializing coco 2017 {} data.'.format(split))
55 | self.coco = coco.COCO(self.annot_path)
56 | image_ids = self.coco.getImgIds()
57 |
58 | if split == 'train':
59 | self.images = []
60 | for img_id in image_ids:
61 | idxs = self.coco.getAnnIds(imgIds=[img_id])
62 | if len(idxs) > 0:
63 | self.images.append(img_id)
64 | else:
65 | self.images = image_ids
66 | self.num_samples = len(self.images)
67 | print('Loaded {} {} samples'.format(split, self.num_samples))
68 |
69 | def _to_float(self, x):
70 | return float("{:.2f}".format(x))
71 |
72 | def convert_eval_format(self, all_bboxes):
73 | # import pdb; pdb.set_trace()
74 | detections = []
75 | for image_id in all_bboxes:
76 | for cls_ind in all_bboxes[image_id]:
77 | category_id = 1
78 | for dets in all_bboxes[image_id][cls_ind]:
79 | bbox = dets[:4]
80 | bbox[2] -= bbox[0]
81 | bbox[3] -= bbox[1]
82 | score = dets[4]
83 | bbox_out = list(map(self._to_float, bbox))
84 | keypoints = np.concatenate([
85 | np.array(dets[5:39], dtype=np.float32).reshape(-1, 2),
86 | np.ones((17, 1), dtype=np.float32)], axis=1).reshape(51).tolist()
87 | keypoints = list(map(self._to_float, keypoints))
88 |
89 | detection = {
90 | "image_id": int(image_id),
91 | "category_id": int(category_id),
92 | "bbox": bbox_out,
93 | "score": float("{:.2f}".format(score)),
94 | "keypoints": keypoints
95 | }
96 | detections.append(detection)
97 | return detections
98 |
99 | def __len__(self):
100 | return self.num_samples
101 |
102 | def save_results(self, results, save_dir):
103 | json.dump(self.convert_eval_format(results),
104 | open('{}/results.json'.format(save_dir), 'w'))
105 |
106 |
107 | def run_eval(self, results, save_dir):
108 | # result_json = os.path.join(opt.save_dir, "results.json")
109 | # detections = convert_eval_format(all_boxes)
110 | # json.dump(detections, open(result_json, "w"))
111 | self.save_results(results, save_dir)
112 | coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir))
113 | coco_eval = COCOeval(self.coco, coco_dets, "keypoints")
114 | coco_eval.evaluate()
115 | coco_eval.accumulate()
116 | coco_eval.summarize()
117 | coco_eval = COCOeval(self.coco, coco_dets, "bbox")
118 | coco_eval.evaluate()
119 | coco_eval.accumulate()
120 | coco_eval.summarize()
--------------------------------------------------------------------------------
/src/lib/datasets/dataset/kitti.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch.utils.data as data
6 | import pycocotools.coco as coco
7 | import numpy as np
8 | import torch
9 | import json
10 | import cv2
11 | import os
12 | import math
13 |
14 | import torch.utils.data as data
15 |
16 |
17 | class KITTI(data.Dataset):
18 | num_classes = 3
19 | default_resolution = [384, 1280]
20 | mean = np.array([0.485, 0.456, 0.406], np.float32).reshape(1, 1, 3)
21 | std = np.array([0.229, 0.224, 0.225], np.float32).reshape(1, 1, 3)
22 |
23 | def __init__(self, opt, split):
24 | super(KITTI, self).__init__()
25 | self.data_dir = os.path.join(opt.data_dir, 'kitti')
26 | self.img_dir = os.path.join(self.data_dir, 'images', 'trainval')
27 | if opt.trainval:
28 | split = 'trainval' if split == 'train' else 'test'
29 | self.img_dir = os.path.join(self.data_dir, 'images', split)
30 | self.annot_path = os.path.join(
31 | self.data_dir, 'annotations', 'kitti_{}.json').format(split)
32 | else:
33 | self.annot_path = os.path.join(self.data_dir,
34 | 'annotations', 'kitti_{}_{}.json').format(opt.kitti_split, split)
35 | self.max_objs = 50
36 | self.class_name = [
37 | '__background__', 'Pedestrian', 'Car', 'Cyclist']
38 | self.cat_ids = {1:0, 2:1, 3:2, 4:-3, 5:-3, 6:-2, 7:-99, 8:-99, 9:-1}
39 |
40 | self._data_rng = np.random.RandomState(123)
41 | self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
42 | dtype=np.float32)
43 | self._eig_vec = np.array([
44 | [-0.58752847, -0.69563484, 0.41340352],
45 | [-0.5832747, 0.00994535, -0.81221408],
46 | [-0.56089297, 0.71832671, 0.41158938]
47 | ], dtype=np.float32)
48 | self.split = split
49 | self.opt = opt
50 | self.alpha_in_degree = False
51 |
52 | print('==> initializing kitti {}, {} data.'.format(opt.kitti_split, split))
53 | self.coco = coco.COCO(self.annot_path)
54 | self.images = self.coco.getImgIds()
55 | self.num_samples = len(self.images)
56 |
57 | print('Loaded {} {} samples'.format(split, self.num_samples))
58 |
59 | def __len__(self):
60 | return self.num_samples
61 |
62 | def _to_float(self, x):
63 | return float("{:.2f}".format(x))
64 |
65 | def convert_eval_format(self, all_bboxes):
66 | pass
67 |
68 | def save_results(self, results, save_dir):
69 | results_dir = os.path.join(save_dir, 'results')
70 | if not os.path.exists(results_dir):
71 | os.mkdir(results_dir)
72 | for img_id in results.keys():
73 | out_path = os.path.join(results_dir, '{:06d}.txt'.format(img_id))
74 | f = open(out_path, 'w')
75 | for cls_ind in results[img_id]:
76 | for j in range(len(results[img_id][cls_ind])):
77 | class_name = self.class_name[cls_ind]
78 | f.write('{} 0.0 0'.format(class_name))
79 | for i in range(len(results[img_id][cls_ind][j])):
80 | f.write(' {:.2f}'.format(results[img_id][cls_ind][j][i]))
81 | f.write('\n')
82 | f.close()
83 |
84 | def run_eval(self, results, save_dir):
85 | self.save_results(results, save_dir)
86 | os.system('./tools/kitti_eval/evaluate_object_3d_offline ' + \
87 | '../data/kitti/training/label_val ' + \
88 | '{}/results/'.format(save_dir))
89 |
90 |
--------------------------------------------------------------------------------
/src/lib/datasets/dataset/pascal.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import pycocotools.coco as coco
6 | import numpy as np
7 | import torch
8 | import json
9 | import os
10 |
11 | import torch.utils.data as data
12 |
13 | class PascalVOC(data.Dataset):
14 | num_classes = 20
15 | default_resolution = [384, 384]
16 | mean = np.array([0.485, 0.456, 0.406],
17 | dtype=np.float32).reshape(1, 1, 3)
18 | std = np.array([0.229, 0.224, 0.225],
19 | dtype=np.float32).reshape(1, 1, 3)
20 |
21 | def __init__(self, opt, split):
22 | super(PascalVOC, self).__init__()
23 | self.data_dir = os.path.join(opt.data_dir, 'voc')
24 | self.img_dir = os.path.join(self.data_dir, 'images')
25 | _ann_name = {'train': 'trainval0712', 'val': 'test2007'}
26 | self.annot_path = os.path.join(
27 | self.data_dir, 'annotations',
28 | 'pascal_{}.json').format(_ann_name[split])
29 | self.max_objs = 50
30 | self.class_name = ['__background__', "aeroplane", "bicycle", "bird", "boat",
31 | "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog",
32 | "horse", "motorbike", "person", "pottedplant", "sheep", "sofa",
33 | "train", "tvmonitor"]
34 | self._valid_ids = np.arange(1, 21, dtype=np.int32)
35 | self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)}
36 | self._data_rng = np.random.RandomState(123)
37 | self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
38 | dtype=np.float32)
39 | self._eig_vec = np.array([
40 | [-0.58752847, -0.69563484, 0.41340352],
41 | [-0.5832747, 0.00994535, -0.81221408],
42 | [-0.56089297, 0.71832671, 0.41158938]
43 | ], dtype=np.float32)
44 | self.split = split
45 | self.opt = opt
46 |
47 | print('==> initializing pascal {} data.'.format(_ann_name[split]))
48 | self.coco = coco.COCO(self.annot_path)
49 | self.images = sorted(self.coco.getImgIds())
50 | self.num_samples = len(self.images)
51 |
52 | print('Loaded {} {} samples'.format(split, self.num_samples))
53 |
54 | def _to_float(self, x):
55 | return float("{:.2f}".format(x))
56 |
57 | def convert_eval_format(self, all_bboxes):
58 | detections = [[[] for __ in range(self.num_samples)] \
59 | for _ in range(self.num_classes + 1)]
60 | for i in range(self.num_samples):
61 | img_id = self.images[i]
62 | for j in range(1, self.num_classes + 1):
63 | if isinstance(all_bboxes[img_id][j], np.ndarray):
64 | detections[j][i] = all_bboxes[img_id][j].tolist()
65 | else:
66 | detections[j][i] = all_bboxes[img_id][j]
67 | return detections
68 |
69 | def __len__(self):
70 | return self.num_samples
71 |
72 | def save_results(self, results, save_dir):
73 | json.dump(self.convert_eval_format(results),
74 | open('{}/results.json'.format(save_dir), 'w'))
75 |
76 | def run_eval(self, results, save_dir):
77 | # result_json = os.path.join(save_dir, "results.json")
78 | # detections = self.convert_eval_format(results)
79 | # json.dump(detections, open(result_json, "w"))
80 | self.save_results(results, save_dir)
81 | os.system('python tools/reval.py ' + \
82 | '{}/results.json'.format(save_dir))
83 |
--------------------------------------------------------------------------------
/src/lib/datasets/dataset_factory.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | from .sample.ddd import DddDataset
6 | from .sample.exdet import EXDetDataset
7 | from .sample.ctdet import CTDetDataset
8 | from .sample.multi_pose import MultiPoseDataset
9 |
10 | from .dataset.coco import COCO
11 | from .dataset.pascal import PascalVOC
12 | from .dataset.kitti import KITTI
13 | from .dataset.coco_hp import COCOHP
14 |
15 |
16 | dataset_factory = {
17 | 'coco': COCO,
18 | 'pascal': PascalVOC,
19 | 'kitti': KITTI,
20 | 'coco_hp': COCOHP
21 | }
22 |
23 | _sample_factory = {
24 | 'exdet': EXDetDataset,
25 | 'ctdet': CTDetDataset,
26 | 'ddd': DddDataset,
27 | 'multi_pose': MultiPoseDataset
28 | }
29 |
30 |
31 | def get_dataset(dataset, task):
32 | class Dataset(dataset_factory[dataset], _sample_factory[task]):
33 | pass
34 | return Dataset
35 |
36 |
--------------------------------------------------------------------------------
/src/lib/detectors/base_detector.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import cv2
6 | import numpy as np
7 | from progress.bar import Bar
8 | import time
9 | import torch
10 |
11 | from models.model import create_model, load_model
12 | from utils.image import get_affine_transform
13 | from utils.debugger import Debugger
14 |
15 |
16 | class BaseDetector(object):
17 | def __init__(self, opt):
18 | if opt.gpus[0] >= 0:
19 | opt.device = torch.device('cuda')
20 | else:
21 | opt.device = torch.device('cpu')
22 |
23 | print('Creating model...')
24 | self.model = create_model(opt.arch, opt.heads, opt.head_conv)
25 | self.model = load_model(self.model, opt.load_model)
26 | self.model = self.model.to(opt.device)
27 | self.model.eval()
28 |
29 | self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3)
30 | self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3)
31 | self.max_per_image = 100
32 | self.num_classes = opt.num_classes
33 | self.scales = opt.test_scales
34 | self.opt = opt
35 | self.pause = True
36 |
37 | def pre_process(self, image, scale, meta=None):
38 | height, width = image.shape[0:2]
39 | new_height = int(height * scale)
40 | new_width = int(width * scale)
41 | if self.opt.fix_res:
42 | inp_height, inp_width = self.opt.input_h, self.opt.input_w
43 | c = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
44 | s = max(height, width) * 1.0
45 | else:
46 | inp_height = (new_height | self.opt.pad) + 1
47 | inp_width = (new_width | self.opt.pad) + 1
48 | c = np.array([new_width // 2, new_height // 2], dtype=np.float32)
49 | s = np.array([inp_width, inp_height], dtype=np.float32)
50 |
51 | trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])
52 | resized_image = cv2.resize(image, (new_width, new_height))
53 | inp_image = cv2.warpAffine(
54 | resized_image, trans_input, (inp_width, inp_height),
55 | flags=cv2.INTER_LINEAR)
56 | inp_image = ((inp_image / 255. - self.mean) / self.std).astype(np.float32)
57 |
58 | images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height, inp_width)
59 | if self.opt.flip_test:
60 | images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
61 | images = torch.from_numpy(images)
62 | meta = {'c': c, 's': s,
63 | 'out_height': inp_height // self.opt.down_ratio,
64 | 'out_width': inp_width // self.opt.down_ratio}
65 | return images, meta
66 |
67 | def process(self, images, return_time=False):
68 | raise NotImplementedError
69 |
70 | def post_process(self, dets, meta, scale=1):
71 | raise NotImplementedError
72 |
73 | def merge_outputs(self, detections):
74 | raise NotImplementedError
75 |
76 | def debug(self, debugger, images, dets, output, scale=1):
77 | raise NotImplementedError
78 |
79 | def show_results(self, debugger, image, results):
80 | raise NotImplementedError
81 |
82 | def run(self, image_or_path_or_tensor, meta=None):
83 | load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0
84 | merge_time, tot_time = 0, 0
85 | debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug==3),
86 | theme=self.opt.debugger_theme)
87 | start_time = time.time()
88 | pre_processed = False
89 | if isinstance(image_or_path_or_tensor, np.ndarray):
90 | image = image_or_path_or_tensor
91 | elif type(image_or_path_or_tensor) == type (''):
92 | image = cv2.imread(image_or_path_or_tensor)
93 | else:
94 | image = image_or_path_or_tensor['image'][0].numpy()
95 | pre_processed_images = image_or_path_or_tensor
96 | pre_processed = True
97 |
98 | loaded_time = time.time()
99 | load_time += (loaded_time - start_time)
100 |
101 | detections = []
102 | for scale in self.scales:
103 | scale_start_time = time.time()
104 | if not pre_processed:
105 | images, meta = self.pre_process(image, scale, meta)
106 | else:
107 | # import pdb; pdb.set_trace()
108 | images = pre_processed_images['images'][scale][0]
109 | meta = pre_processed_images['meta'][scale]
110 | meta = {k: v.numpy()[0] for k, v in meta.items()}
111 | images = images.to(self.opt.device)
112 | torch.cuda.synchronize()
113 | pre_process_time = time.time()
114 | pre_time += pre_process_time - scale_start_time
115 |
116 | output, dets, forward_time = self.process(images, return_time=True)
117 |
118 | torch.cuda.synchronize()
119 | net_time += forward_time - pre_process_time
120 | decode_time = time.time()
121 | dec_time += decode_time - forward_time
122 |
123 | if self.opt.debug >= 2:
124 | self.debug(debugger, images, dets, output, scale)
125 |
126 | dets = self.post_process(dets, meta, scale)
127 | torch.cuda.synchronize()
128 | post_process_time = time.time()
129 | post_time += post_process_time - decode_time
130 |
131 | detections.append(dets)
132 |
133 | results = self.merge_outputs(detections)
134 | torch.cuda.synchronize()
135 | end_time = time.time()
136 | merge_time += end_time - post_process_time
137 | tot_time += end_time - start_time
138 |
139 | if self.opt.debug >= 1:
140 | self.show_results(debugger, image, results)
141 |
142 | return {'results': results, 'tot': tot_time, 'load': load_time,
143 | 'pre': pre_time, 'net': net_time, 'dec': dec_time,
144 | 'post': post_time, 'merge': merge_time}
--------------------------------------------------------------------------------
/src/lib/detectors/ctdet.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import cv2
6 | import numpy as np
7 | from progress.bar import Bar
8 | import time
9 | import torch
10 |
11 | try:
12 | from external.nms import soft_nms
13 | except:
14 | print('NMS not imported! If you need it,'
15 | ' do \n cd $CenterNet_ROOT/src/lib/external \n make')
16 | from models.decode import ctdet_decode
17 | from models.utils import flip_tensor
18 | from utils.image import get_affine_transform
19 | from utils.post_process import ctdet_post_process
20 | from utils.debugger import Debugger
21 |
22 | from .base_detector import BaseDetector
23 |
24 | class CtdetDetector(BaseDetector):
25 | def __init__(self, opt):
26 | super(CtdetDetector, self).__init__(opt)
27 |
28 | def process(self, images, return_time=False):
29 | with torch.no_grad():
30 | output = self.model(images)[-1]
31 | hm = output['hm'].sigmoid_()
32 | wh = output['wh']
33 | reg = output['reg'] if self.opt.reg_offset else None
34 | if self.opt.flip_test:
35 | hm = (hm[0:1] + flip_tensor(hm[1:2])) / 2
36 | wh = (wh[0:1] + flip_tensor(wh[1:2])) / 2
37 | reg = reg[0:1] if reg is not None else None
38 | torch.cuda.synchronize()
39 | forward_time = time.time()
40 | dets = ctdet_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K)
41 |
42 | if return_time:
43 | return output, dets, forward_time
44 | else:
45 | return output, dets
46 |
47 | def post_process(self, dets, meta, scale=1):
48 | dets = dets.detach().cpu().numpy()
49 | dets = dets.reshape(1, -1, dets.shape[2])
50 | dets = ctdet_post_process(
51 | dets.copy(), [meta['c']], [meta['s']],
52 | meta['out_height'], meta['out_width'], self.opt.num_classes)
53 | for j in range(1, self.num_classes + 1):
54 | dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 5)
55 | dets[0][j][:, :4] /= scale
56 | return dets[0]
57 |
58 | def merge_outputs(self, detections):
59 | results = {}
60 | for j in range(1, self.num_classes + 1):
61 | results[j] = np.concatenate(
62 | [detection[j] for detection in detections], axis=0).astype(np.float32)
63 | if len(self.scales) > 1 or self.opt.nms:
64 | soft_nms(results[j], Nt=0.5, method=2)
65 | scores = np.hstack(
66 | [results[j][:, 4] for j in range(1, self.num_classes + 1)])
67 | if len(scores) > self.max_per_image:
68 | kth = len(scores) - self.max_per_image
69 | thresh = np.partition(scores, kth)[kth]
70 | for j in range(1, self.num_classes + 1):
71 | keep_inds = (results[j][:, 4] >= thresh)
72 | results[j] = results[j][keep_inds]
73 | return results
74 |
75 | def debug(self, debugger, images, dets, output, scale=1):
76 | detection = dets.detach().cpu().numpy().copy()
77 | detection[:, :, :4] *= self.opt.down_ratio
78 | for i in range(1):
79 | img = images[i].detach().cpu().numpy().transpose(1, 2, 0)
80 | img = ((img * self.std + self.mean) * 255).astype(np.uint8)
81 | pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy())
82 | debugger.add_blend_img(img, pred, 'pred_hm_{:.1f}'.format(scale))
83 | debugger.add_img(img, img_id='out_pred_{:.1f}'.format(scale))
84 | for k in range(len(dets[i])):
85 | if detection[i, k, 4] > self.opt.center_thresh:
86 | debugger.add_coco_bbox(detection[i, k, :4], detection[i, k, -1],
87 | detection[i, k, 4],
88 | img_id='out_pred_{:.1f}'.format(scale))
89 |
90 | def show_results(self, debugger, image, results):
91 | debugger.add_img(image, img_id='ctdet')
92 | for j in range(1, self.num_classes + 1):
93 | for bbox in results[j]:
94 | if bbox[4] > self.opt.vis_thresh:
95 | debugger.add_coco_bbox(bbox[:4], j - 1, bbox[4], img_id='ctdet')
96 | debugger.show_all_imgs(pause=self.pause)
97 |
--------------------------------------------------------------------------------
/src/lib/detectors/ddd.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import cv2
6 | import numpy as np
7 | from progress.bar import Bar
8 | import time
9 | import torch
10 |
11 |
12 | from models.decode import ddd_decode
13 | from models.utils import flip_tensor
14 | from utils.image import get_affine_transform
15 | from utils.post_process import ddd_post_process
16 | from utils.debugger import Debugger
17 | from utils.ddd_utils import compute_box_3d, project_to_image, alpha2rot_y
18 | from utils.ddd_utils import draw_box_3d, unproject_2d_to_3d
19 |
20 | from .base_detector import BaseDetector
21 |
22 | class DddDetector(BaseDetector):
23 | def __init__(self, opt):
24 | super(DddDetector, self).__init__(opt)
25 | self.calib = np.array([[707.0493, 0, 604.0814, 45.75831],
26 | [0, 707.0493, 180.5066, -0.3454157],
27 | [0, 0, 1., 0.004981016]], dtype=np.float32)
28 |
29 |
30 | def pre_process(self, image, scale, calib=None):
31 | height, width = image.shape[0:2]
32 |
33 | inp_height, inp_width = self.opt.input_h, self.opt.input_w
34 | c = np.array([width / 2, height / 2], dtype=np.float32)
35 | if self.opt.keep_res:
36 | s = np.array([inp_width, inp_height], dtype=np.int32)
37 | else:
38 | s = np.array([width, height], dtype=np.int32)
39 |
40 | trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])
41 | resized_image = image #cv2.resize(image, (width, height))
42 | inp_image = cv2.warpAffine(
43 | resized_image, trans_input, (inp_width, inp_height),
44 | flags=cv2.INTER_LINEAR)
45 | inp_image = (inp_image.astype(np.float32) / 255.)
46 | inp_image = (inp_image - self.mean) / self.std
47 | images = inp_image.transpose(2, 0, 1)[np.newaxis, ...]
48 | calib = np.array(calib, dtype=np.float32) if calib is not None \
49 | else self.calib
50 | images = torch.from_numpy(images)
51 | meta = {'c': c, 's': s,
52 | 'out_height': inp_height // self.opt.down_ratio,
53 | 'out_width': inp_width // self.opt.down_ratio,
54 | 'calib': calib}
55 | return images, meta
56 |
57 | def process(self, images, return_time=False):
58 | with torch.no_grad():
59 | torch.cuda.synchronize()
60 | output = self.model(images)[-1]
61 | output['hm'] = output['hm'].sigmoid_()
62 | output['dep'] = 1. / (output['dep'].sigmoid() + 1e-6) - 1.
63 | wh = output['wh'] if self.opt.reg_bbox else None
64 | reg = output['reg'] if self.opt.reg_offset else None
65 | torch.cuda.synchronize()
66 | forward_time = time.time()
67 |
68 | dets = ddd_decode(output['hm'], output['rot'], output['dep'],
69 | output['dim'], wh=wh, reg=reg, K=self.opt.K)
70 | if return_time:
71 | return output, dets, forward_time
72 | else:
73 | return output, dets
74 |
75 | def post_process(self, dets, meta, scale=1):
76 | dets = dets.detach().cpu().numpy()
77 | detections = ddd_post_process(
78 | dets.copy(), [meta['c']], [meta['s']], [meta['calib']], self.opt)
79 | self.this_calib = meta['calib']
80 | return detections[0]
81 |
82 | def merge_outputs(self, detections):
83 | results = detections[0]
84 | for j in range(1, self.num_classes + 1):
85 | if len(results[j] > 0):
86 | keep_inds = (results[j][:, -1] > self.opt.peak_thresh)
87 | results[j] = results[j][keep_inds]
88 | return results
89 |
90 | def debug(self, debugger, images, dets, output, scale=1):
91 | dets = dets.detach().cpu().numpy()
92 | img = images[0].detach().cpu().numpy().transpose(1, 2, 0)
93 | img = ((img * self.std + self.mean) * 255).astype(np.uint8)
94 | pred = debugger.gen_colormap(output['hm'][0].detach().cpu().numpy())
95 | debugger.add_blend_img(img, pred, 'pred_hm')
96 | debugger.add_ct_detection(
97 | img, dets[0], show_box=self.opt.reg_bbox,
98 | center_thresh=self.opt.vis_thresh, img_id='det_pred')
99 |
100 | def show_results(self, debugger, image, results):
101 | debugger.add_3d_detection(
102 | image, results, self.this_calib,
103 | center_thresh=self.opt.vis_thresh, img_id='add_pred')
104 | debugger.add_bird_view(
105 | results, center_thresh=self.opt.vis_thresh, img_id='bird_pred')
106 | debugger.show_all_imgs(pause=self.pause)
--------------------------------------------------------------------------------
/src/lib/detectors/detector_factory.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | from .exdet import ExdetDetector
6 | from .ddd import DddDetector
7 | from .ctdet import CtdetDetector
8 | from .multi_pose import MultiPoseDetector
9 |
10 | detector_factory = {
11 | 'exdet': ExdetDetector,
12 | 'ddd': DddDetector,
13 | 'ctdet': CtdetDetector,
14 | 'multi_pose': MultiPoseDetector,
15 | }
16 |
--------------------------------------------------------------------------------
/src/lib/detectors/exdet.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import _init_paths
6 |
7 | import os
8 |
9 | import cv2
10 | import numpy as np
11 | from progress.bar import Bar
12 | import time
13 | import torch
14 |
15 | from models.decode import exct_decode, agnex_ct_decode
16 | from models.utils import flip_tensor
17 | from utils.image import get_affine_transform, transform_preds
18 | from utils.post_process import ctdet_post_process
19 | from utils.debugger import Debugger
20 |
21 | from .base_detector import BaseDetector
22 |
23 | class ExdetDetector(BaseDetector):
24 | def __init__(self, opt):
25 | super(ExdetDetector, self).__init__(opt)
26 | self.decode = agnex_ct_decode if opt.agnostic_ex else exct_decode
27 |
28 | def process(self, images, return_time=False):
29 | with torch.no_grad():
30 | torch.cuda.synchronize()
31 | output = self.model(images)[-1]
32 | t_heat = output['hm_t'].sigmoid_()
33 | l_heat = output['hm_l'].sigmoid_()
34 | b_heat = output['hm_b'].sigmoid_()
35 | r_heat = output['hm_r'].sigmoid_()
36 | c_heat = output['hm_c'].sigmoid_()
37 | torch.cuda.synchronize()
38 | forward_time = time.time()
39 | if self.opt.reg_offset:
40 | dets = self.decode(t_heat, l_heat, b_heat, r_heat, c_heat,
41 | output['reg_t'], output['reg_l'],
42 | output['reg_b'], output['reg_r'],
43 | K=self.opt.K,
44 | scores_thresh=self.opt.scores_thresh,
45 | center_thresh=self.opt.center_thresh,
46 | aggr_weight=self.opt.aggr_weight)
47 | else:
48 | dets = self.decode(t_heat, l_heat, b_heat, r_heat, c_heat, K=self.opt.K,
49 | scores_thresh=self.opt.scores_thresh,
50 | center_thresh=self.opt.center_thresh,
51 | aggr_weight=self.opt.aggr_weight)
52 | if return_time:
53 | return output, dets, forward_time
54 | else:
55 | return output, dets
56 |
57 | def debug(self, debugger, images, dets, output, scale=1):
58 | detection = dets.detach().cpu().numpy().copy()
59 | detection[:, :, :4] *= self.opt.down_ratio
60 | for i in range(1):
61 | inp_height, inp_width = images.shape[2], images.shape[3]
62 | pred_hm = np.zeros((inp_height, inp_width, 3), dtype=np.uint8)
63 | img = images[i].detach().cpu().numpy().transpose(1, 2, 0)
64 | img = ((img * self.std + self.mean) * 255).astype(np.uint8)
65 | parts = ['t', 'l', 'b', 'r', 'c']
66 | for p in parts:
67 | tag = 'hm_{}'.format(p)
68 | pred = debugger.gen_colormap(
69 | output[tag][i].detach().cpu().numpy(), (inp_height, inp_width))
70 | if p != 'c':
71 | pred_hm = np.maximum(pred_hm, pred)
72 | else:
73 | debugger.add_blend_img(
74 | img, pred, 'pred_{}_{:.1f}'.format(p, scale))
75 | debugger.add_blend_img(img, pred_hm, 'pred_{:.1f}'.format(scale))
76 | debugger.add_img(img, img_id='out_{:.1f}'.format(scale))
77 | for k in range(len(detection[i])):
78 | # print('detection', detection[i, k, 4], detection[i, k])
79 | if detection[i, k, 4] > 0.01:
80 | # print('detection', detection[i, k, 4], detection[i, k])
81 | debugger.add_coco_bbox(detection[i, k, :4], detection[i, k, -1],
82 | detection[i, k, 4],
83 | img_id='out_{:.1f}'.format(scale))
84 |
85 | def post_process(self, dets, meta, scale=1):
86 | out_width, out_height = meta['out_width'], meta['out_height']
87 | dets = dets.detach().cpu().numpy().reshape(2, -1, 14)
88 | dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
89 | dets = dets.reshape(1, -1, 14)
90 | dets[0, :, 0:2] = transform_preds(
91 | dets[0, :, 0:2], meta['c'], meta['s'], (out_width, out_height))
92 | dets[0, :, 2:4] = transform_preds(
93 | dets[0, :, 2:4], meta['c'], meta['s'], (out_width, out_height))
94 | dets[:, :, 0:4] /= scale
95 | return dets[0]
96 |
97 | def merge_outputs(self, detections):
98 | detections = np.concatenate(
99 | [detection for detection in detections], axis=0).astype(np.float32)
100 | classes = detections[..., -1]
101 | keep_inds = (detections[:, 4] > 0)
102 | detections = detections[keep_inds]
103 | classes = classes[keep_inds]
104 |
105 | results = {}
106 | for j in range(self.num_classes):
107 | keep_inds = (classes == j)
108 | results[j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
109 | soft_nms(results[j + 1], Nt=0.5, method=2)
110 | results[j + 1] = results[j + 1][:, 0:5]
111 |
112 | scores = np.hstack([
113 | results[j][:, -1]
114 | for j in range(1, self.num_classes + 1)
115 | ])
116 | if len(scores) > self.max_per_image:
117 | kth = len(scores) - self.max_per_image
118 | thresh = np.partition(scores, kth)[kth]
119 | for j in range(1, self.num_classes + 1):
120 | keep_inds = (results[j][:, -1] >= thresh)
121 | results[j] = results[j][keep_inds]
122 | return results
123 |
124 |
125 | def show_results(self, debugger, image, results):
126 | debugger.add_img(image, img_id='exdet')
127 | for j in range(1, self.num_classes + 1):
128 | for bbox in results[j]:
129 | if bbox[4] > self.opt.vis_thresh:
130 | debugger.add_coco_bbox(bbox[:4], j - 1, bbox[4], img_id='exdet')
131 | debugger.show_all_imgs(pause=self.pause)
132 |
--------------------------------------------------------------------------------
/src/lib/detectors/multi_pose.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import cv2
6 | import numpy as np
7 | from progress.bar import Bar
8 | import time
9 | import torch
10 |
11 | try:
12 | from external.nms import soft_nms_39
13 | except:
14 | print('NMS not imported! If you need it,'
15 | ' do \n cd $CenterNet_ROOT/src/lib/external \n make')
16 | from models.decode import multi_pose_decode
17 | from models.utils import flip_tensor, flip_lr_off, flip_lr
18 | from utils.image import get_affine_transform
19 | from utils.post_process import multi_pose_post_process
20 | from utils.debugger import Debugger
21 |
22 | from .base_detector import BaseDetector
23 |
24 | class MultiPoseDetector(BaseDetector):
25 | def __init__(self, opt):
26 | super(MultiPoseDetector, self).__init__(opt)
27 | self.flip_idx = opt.flip_idx
28 |
29 | def process(self, images, return_time=False):
30 | with torch.no_grad():
31 | torch.cuda.synchronize()
32 | output = self.model(images)[-1]
33 | output['hm'] = output['hm'].sigmoid_()
34 | if self.opt.hm_hp and not self.opt.mse_loss:
35 | output['hm_hp'] = output['hm_hp'].sigmoid_()
36 |
37 | reg = output['reg'] if self.opt.reg_offset else None
38 | hm_hp = output['hm_hp'] if self.opt.hm_hp else None
39 | hp_offset = output['hp_offset'] if self.opt.reg_hp_offset else None
40 | torch.cuda.synchronize()
41 | forward_time = time.time()
42 |
43 | if self.opt.flip_test:
44 | output['hm'] = (output['hm'][0:1] + flip_tensor(output['hm'][1:2])) / 2
45 | output['wh'] = (output['wh'][0:1] + flip_tensor(output['wh'][1:2])) / 2
46 | output['hps'] = (output['hps'][0:1] +
47 | flip_lr_off(output['hps'][1:2], self.flip_idx)) / 2
48 | hm_hp = (hm_hp[0:1] + flip_lr(hm_hp[1:2], self.flip_idx)) / 2 \
49 | if hm_hp is not None else None
50 | reg = reg[0:1] if reg is not None else None
51 | hp_offset = hp_offset[0:1] if hp_offset is not None else None
52 |
53 | dets = multi_pose_decode(
54 | output['hm'], output['wh'], output['hps'],
55 | reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=self.opt.K)
56 |
57 | if return_time:
58 | return output, dets, forward_time
59 | else:
60 | return output, dets
61 |
62 | def post_process(self, dets, meta, scale=1):
63 | dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
64 | dets = multi_pose_post_process(
65 | dets.copy(), [meta['c']], [meta['s']],
66 | meta['out_height'], meta['out_width'])
67 | for j in range(1, self.num_classes + 1):
68 | dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 39)
69 | # import pdb; pdb.set_trace()
70 | dets[0][j][:, :4] /= scale
71 | dets[0][j][:, 5:] /= scale
72 | return dets[0]
73 |
74 | def merge_outputs(self, detections):
75 | results = {}
76 | results[1] = np.concatenate(
77 | [detection[1] for detection in detections], axis=0).astype(np.float32)
78 | if self.opt.nms or len(self.opt.test_scales) > 1:
79 | soft_nms_39(results[1], Nt=0.5, method=2)
80 | results[1] = results[1].tolist()
81 | return results
82 |
83 | def debug(self, debugger, images, dets, output, scale=1):
84 | dets = dets.detach().cpu().numpy().copy()
85 | dets[:, :, :4] *= self.opt.down_ratio
86 | dets[:, :, 5:39] *= self.opt.down_ratio
87 | img = images[0].detach().cpu().numpy().transpose(1, 2, 0)
88 | img = np.clip(((
89 | img * self.std + self.mean) * 255.), 0, 255).astype(np.uint8)
90 | pred = debugger.gen_colormap(output['hm'][0].detach().cpu().numpy())
91 | debugger.add_blend_img(img, pred, 'pred_hm')
92 | if self.opt.hm_hp:
93 | pred = debugger.gen_colormap_hp(
94 | output['hm_hp'][0].detach().cpu().numpy())
95 | debugger.add_blend_img(img, pred, 'pred_hmhp')
96 |
97 | def show_results(self, debugger, image, results):
98 | debugger.add_img(image, img_id='multi_pose')
99 | for bbox in results[1]:
100 | if bbox[4] > self.opt.vis_thresh:
101 | debugger.add_coco_bbox(bbox[:4], 0, bbox[4], img_id='multi_pose')
102 | debugger.add_coco_hp(bbox[5:39], img_id='multi_pose')
103 | debugger.show_all_imgs(pause=self.pause)
--------------------------------------------------------------------------------
/src/lib/external/.gitignore:
--------------------------------------------------------------------------------
1 | bbox.c
2 | bbox.cpython-35m-x86_64-linux-gnu.so
3 | bbox.cpython-36m-x86_64-linux-gnu.so
4 |
5 | nms.c
6 | nms.cpython-35m-x86_64-linux-gnu.so
7 | nms.cpython-36m-x86_64-linux-gnu.so
8 |
--------------------------------------------------------------------------------
/src/lib/external/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | python setup.py build_ext --inplace
3 | rm -rf build
4 |
--------------------------------------------------------------------------------
/src/lib/external/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/src/lib/external/__init__.py
--------------------------------------------------------------------------------
/src/lib/external/setup.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | from distutils.core import setup
3 | from distutils.extension import Extension
4 | from Cython.Build import cythonize
5 |
6 | extensions = [
7 | Extension(
8 | "nms",
9 | ["nms.pyx"],
10 | extra_compile_args=["-Wno-cpp", "-Wno-unused-function"]
11 | )
12 | ]
13 |
14 | setup(
15 | name="coco",
16 | ext_modules=cythonize(extensions),
17 | include_dirs=[numpy.get_include()]
18 | )
19 |
--------------------------------------------------------------------------------
/src/lib/logger.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
6 | import os
7 | import time
8 | import sys
9 | import torch
10 | USE_TENSORBOARD = True
11 | try:
12 | import tensorboardX
13 | print('Using tensorboardX')
14 | except:
15 | USE_TENSORBOARD = False
16 |
17 | class Logger(object):
18 | def __init__(self, opt):
19 | """Create a summary writer logging to log_dir."""
20 | if not os.path.exists(opt.save_dir):
21 | os.makedirs(opt.save_dir)
22 | if not os.path.exists(opt.debug_dir):
23 | os.makedirs(opt.debug_dir)
24 |
25 | time_str = time.strftime('%Y-%m-%d-%H-%M')
26 |
27 | args = dict((name, getattr(opt, name)) for name in dir(opt)
28 | if not name.startswith('_'))
29 | file_name = os.path.join(opt.save_dir, 'opt.txt')
30 | with open(file_name, 'wt') as opt_file:
31 | opt_file.write('==> torch version: {}\n'.format(torch.__version__))
32 | opt_file.write('==> cudnn version: {}\n'.format(
33 | torch.backends.cudnn.version()))
34 | opt_file.write('==> Cmd:\n')
35 | opt_file.write(str(sys.argv))
36 | opt_file.write('\n==> Opt:\n')
37 | for k, v in sorted(args.items()):
38 | opt_file.write(' %s: %s\n' % (str(k), str(v)))
39 |
40 | log_dir = opt.save_dir + '/logs_{}'.format(time_str)
41 | if USE_TENSORBOARD:
42 | self.writer = tensorboardX.SummaryWriter(log_dir=log_dir)
43 | else:
44 | if not os.path.exists(os.path.dirname(log_dir)):
45 | os.mkdir(os.path.dirname(log_dir))
46 | if not os.path.exists(log_dir):
47 | os.mkdir(log_dir)
48 | self.log = open(log_dir + '/log.txt', 'w')
49 | try:
50 | os.system('cp {}/opt.txt {}/'.format(opt.save_dir, log_dir))
51 | except:
52 | pass
53 | self.start_line = True
54 |
55 | def write(self, txt):
56 | if self.start_line:
57 | time_str = time.strftime('%Y-%m-%d-%H-%M')
58 | self.log.write('{}: {}'.format(time_str, txt))
59 | else:
60 | self.log.write(txt)
61 | self.start_line = False
62 | if '\n' in txt:
63 | self.start_line = True
64 | self.log.flush()
65 |
66 | def close(self):
67 | self.log.close()
68 |
69 | def scalar_summary(self, tag, value, step):
70 | """Log a scalar variable."""
71 | if USE_TENSORBOARD:
72 | self.writer.add_scalar(tag, value, step)
73 |
--------------------------------------------------------------------------------
/src/lib/models/data_parallel.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.nn.modules import Module
3 | from torch.nn.parallel.scatter_gather import gather
4 | from torch.nn.parallel.replicate import replicate
5 | from torch.nn.parallel.parallel_apply import parallel_apply
6 |
7 |
8 | from .scatter_gather import scatter_kwargs
9 |
10 | class _DataParallel(Module):
11 | r"""Implements data parallelism at the module level.
12 |
13 | This container parallelizes the application of the given module by
14 | splitting the input across the specified devices by chunking in the batch
15 | dimension. In the forward pass, the module is replicated on each device,
16 | and each replica handles a portion of the input. During the backwards
17 | pass, gradients from each replica are summed into the original module.
18 |
19 | The batch size should be larger than the number of GPUs used. It should
20 | also be an integer multiple of the number of GPUs so that each chunk is the
21 | same size (so that each GPU processes the same number of samples).
22 |
23 | See also: :ref:`cuda-nn-dataparallel-instead`
24 |
25 | Arbitrary positional and keyword inputs are allowed to be passed into
26 | DataParallel EXCEPT Tensors. All variables will be scattered on dim
27 | specified (default 0). Primitive types will be broadcasted, but all
28 | other types will be a shallow copy and can be corrupted if written to in
29 | the model's forward pass.
30 |
31 | Args:
32 | module: module to be parallelized
33 | device_ids: CUDA devices (default: all devices)
34 | output_device: device location of output (default: device_ids[0])
35 |
36 | Example::
37 |
38 | >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2])
39 | >>> output = net(input_var)
40 | """
41 |
42 | # TODO: update notes/cuda.rst when this class handles 8+ GPUs well
43 |
44 | def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
45 | super(_DataParallel, self).__init__()
46 |
47 | if not torch.cuda.is_available():
48 | self.module = module
49 | self.device_ids = []
50 | return
51 |
52 | if device_ids is None:
53 | device_ids = list(range(torch.cuda.device_count()))
54 | if output_device is None:
55 | output_device = device_ids[0]
56 | self.dim = dim
57 | self.module = module
58 | self.device_ids = device_ids
59 | self.chunk_sizes = chunk_sizes
60 | self.output_device = output_device
61 | if len(self.device_ids) == 1:
62 | self.module.cuda(device_ids[0])
63 |
64 | def forward(self, *inputs, **kwargs):
65 | if not self.device_ids:
66 | return self.module(*inputs, **kwargs)
67 | inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes)
68 | if len(self.device_ids) == 1:
69 | return self.module(*inputs[0], **kwargs[0])
70 | replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
71 | outputs = self.parallel_apply(replicas, inputs, kwargs)
72 | return self.gather(outputs, self.output_device)
73 |
74 | def replicate(self, module, device_ids):
75 | return replicate(module, device_ids)
76 |
77 | def scatter(self, inputs, kwargs, device_ids, chunk_sizes):
78 | return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes)
79 |
80 | def parallel_apply(self, replicas, inputs, kwargs):
81 | return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
82 |
83 | def gather(self, outputs, output_device):
84 | return gather(outputs, output_device, dim=self.dim)
85 |
86 |
87 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None):
88 | r"""Evaluates module(input) in parallel across the GPUs given in device_ids.
89 |
90 | This is the functional version of the DataParallel module.
91 |
92 | Args:
93 | module: the module to evaluate in parallel
94 | inputs: inputs to the module
95 | device_ids: GPU ids on which to replicate module
96 | output_device: GPU location of the output Use -1 to indicate the CPU.
97 | (default: device_ids[0])
98 | Returns:
99 | a Variable containing the result of module(input) located on
100 | output_device
101 | """
102 | if not isinstance(inputs, tuple):
103 | inputs = (inputs,)
104 |
105 | if device_ids is None:
106 | device_ids = list(range(torch.cuda.device_count()))
107 |
108 | if output_device is None:
109 | output_device = device_ids[0]
110 |
111 | inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim)
112 | if len(device_ids) == 1:
113 | return module(*inputs[0], **module_kwargs[0])
114 | used_device_ids = device_ids[:len(inputs)]
115 | replicas = replicate(module, used_device_ids)
116 | outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)
117 | return gather(outputs, output_device, dim)
118 |
119 | def DataParallel(module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
120 | if chunk_sizes is None:
121 | return torch.nn.DataParallel(module, device_ids, output_device, dim)
122 | standard_size = True
123 | for i in range(1, len(chunk_sizes)):
124 | if chunk_sizes[i] != chunk_sizes[0]:
125 | standard_size = False
126 | if standard_size:
127 | return torch.nn.DataParallel(module, device_ids, output_device, dim)
128 | return _DataParallel(module, device_ids, output_device, dim, chunk_sizes)
--------------------------------------------------------------------------------
/src/lib/models/model.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torchvision.models as models
6 | import torch
7 | import torch.nn as nn
8 | import os
9 |
10 | from .networks.msra_resnet import get_pose_net
11 | from .networks.dlav0 import get_pose_net as get_dlav0
12 | from .networks.pose_dla_dcn import get_pose_net as get_dla_dcn
13 | from .networks.resnet_dcn import get_pose_net as get_pose_net_dcn
14 | from .networks.large_hourglass import get_large_hourglass_net
15 |
16 | _model_factory = {
17 | 'res': get_pose_net, # default Resnet with deconv
18 | 'dlav0': get_dlav0, # default DLAup
19 | 'dla': get_dla_dcn,
20 | 'resdcn': get_pose_net_dcn,
21 | 'hourglass': get_large_hourglass_net,
22 | }
23 |
24 | def create_model(arch, heads, head_conv):
25 | num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0
26 | arch = arch[:arch.find('_')] if '_' in arch else arch
27 | get_model = _model_factory[arch]
28 | model = get_model(num_layers=num_layers, heads=heads, head_conv=head_conv)
29 | return model
30 |
31 | def load_model(model, model_path, optimizer=None, resume=False,
32 | lr=None, lr_step=None):
33 | start_epoch = 0
34 | checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
35 | print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
36 | state_dict_ = checkpoint['state_dict']
37 | state_dict = {}
38 |
39 | # convert data_parallal to model
40 | for k in state_dict_:
41 | if k.startswith('module') and not k.startswith('module_list'):
42 | state_dict[k[7:]] = state_dict_[k]
43 | else:
44 | state_dict[k] = state_dict_[k]
45 | model_state_dict = model.state_dict()
46 |
47 | # check loaded parameters and created model parameters
48 | msg = 'If you see this, your model does not fully load the ' + \
49 | 'pre-trained weight. Please make sure ' + \
50 | 'you have correctly specified --arch xxx ' + \
51 | 'or set the correct --num_classes for your own dataset.'
52 | for k in state_dict:
53 | if k in model_state_dict:
54 | if state_dict[k].shape != model_state_dict[k].shape:
55 | print('Skip loading parameter {}, required shape{}, '\
56 | 'loaded shape{}. {}'.format(
57 | k, model_state_dict[k].shape, state_dict[k].shape, msg))
58 | state_dict[k] = model_state_dict[k]
59 | else:
60 | print('Drop parameter {}.'.format(k) + msg)
61 | for k in model_state_dict:
62 | if not (k in state_dict):
63 | print('No param {}.'.format(k) + msg)
64 | state_dict[k] = model_state_dict[k]
65 | model.load_state_dict(state_dict, strict=False)
66 |
67 | # resume optimizer parameters
68 | if optimizer is not None and resume:
69 | if 'optimizer' in checkpoint:
70 | optimizer.load_state_dict(checkpoint['optimizer'])
71 | start_epoch = checkpoint['epoch']
72 | start_lr = lr
73 | for step in lr_step:
74 | if start_epoch >= step:
75 | start_lr *= 0.1
76 | for param_group in optimizer.param_groups:
77 | param_group['lr'] = start_lr
78 | print('Resumed optimizer with start lr', start_lr)
79 | else:
80 | print('No optimizer parameters in checkpoint.')
81 | if optimizer is not None:
82 | return model, optimizer, start_epoch
83 | else:
84 | return model
85 |
86 | def save_model(path, epoch, model, optimizer=None):
87 | if isinstance(model, torch.nn.DataParallel):
88 | state_dict = model.module.state_dict()
89 | else:
90 | state_dict = model.state_dict()
91 | data = {'epoch': epoch,
92 | 'state_dict': state_dict}
93 | if not (optimizer is None):
94 | data['optimizer'] = optimizer.state_dict()
95 | torch.save(data, path)
96 |
97 |
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | .idea
3 | *.so
4 | *.o
5 | *pyc
6 | _ext
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2019, Charles Shang
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | 3. Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/README.md:
--------------------------------------------------------------------------------
1 | ## Deformable Convolutional Networks V2 with Pytorch
2 |
3 | ### Build
4 | ```bash
5 | ./make.sh # build
6 | python test.py # run examples and gradient check
7 | ```
8 |
9 | ### An Example
10 | - deformable conv
11 | ```python
12 | from dcn_v2 import DCN
13 | input = torch.randn(2, 64, 128, 128).cuda()
14 | # wrap all things (offset and mask) in DCN
15 | dcn = DCN(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2).cuda()
16 | output = dcn(input)
17 | print(output.shape)
18 | ```
19 | - deformable roi pooling
20 | ```python
21 | from dcn_v2 import DCNPooling
22 | input = torch.randn(2, 32, 64, 64).cuda()
23 | batch_inds = torch.randint(2, (20, 1)).cuda().float()
24 | x = torch.randint(256, (20, 1)).cuda().float()
25 | y = torch.randint(256, (20, 1)).cuda().float()
26 | w = torch.randint(64, (20, 1)).cuda().float()
27 | h = torch.randint(64, (20, 1)).cuda().float()
28 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
29 |
30 | # mdformable pooling (V2)
31 | # wrap all things (offset and mask) in DCNPooling
32 | dpooling = DCNPooling(spatial_scale=1.0 / 4,
33 | pooled_size=7,
34 | output_dim=32,
35 | no_trans=False,
36 | group_size=1,
37 | trans_std=0.1).cuda()
38 |
39 | dout = dpooling(input, rois)
40 | ```
41 |
42 | ### Known Issues:
43 |
44 | - [x] Gradient check w.r.t offset (solved)
45 | - [ ] Backward is not reentrant (minor)
46 |
47 | This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op).
48 |
49 | I have ran the gradient check for many times with DOUBLE type. Every tensor **except offset** passes.
50 | However, when I set the offset to 0.5, it passes. I'm still wondering what cause this problem. Is it because some
51 | non-differential points?
52 |
53 | Update: all gradient check passes with double precision.
54 |
55 | Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for
56 | float `<1e-15` for double),
57 | so it may not be a serious problem (?)
58 |
59 | Please post an issue or PR if you have any comments.
60 |
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/src/lib/models/networks/DCNv2/__init__.py
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/build.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from torch.utils.ffi import create_extension
4 |
5 |
6 | sources = ['src/dcn_v2.c']
7 | headers = ['src/dcn_v2.h']
8 | defines = []
9 | with_cuda = False
10 |
11 | extra_objects = []
12 | if torch.cuda.is_available():
13 | print('Including CUDA code.')
14 | sources += ['src/dcn_v2_cuda.c']
15 | headers += ['src/dcn_v2_cuda.h']
16 | defines += [('WITH_CUDA', None)]
17 | extra_objects += ['src/cuda/dcn_v2_im2col_cuda.cu.o']
18 | extra_objects += ['src/cuda/dcn_v2_psroi_pooling_cuda.cu.o']
19 | with_cuda = True
20 | else:
21 | raise ValueError('CUDA is not available')
22 |
23 | extra_compile_args = ['-fopenmp', '-std=c99']
24 |
25 | this_file = os.path.dirname(os.path.realpath(__file__))
26 | print(this_file)
27 | sources = [os.path.join(this_file, fname) for fname in sources]
28 | headers = [os.path.join(this_file, fname) for fname in headers]
29 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
30 |
31 | ffi = create_extension(
32 | '_ext.dcn_v2',
33 | headers=headers,
34 | sources=sources,
35 | define_macros=defines,
36 | relative_to=__file__,
37 | with_cuda=with_cuda,
38 | extra_objects=extra_objects,
39 | extra_compile_args=extra_compile_args
40 | )
41 |
42 | if __name__ == '__main__':
43 | ffi.build()
44 |
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/build_double.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from torch.utils.ffi import create_extension
4 |
5 |
6 | sources = ['src/dcn_v2_double.c']
7 | headers = ['src/dcn_v2_double.h']
8 | defines = []
9 | with_cuda = False
10 |
11 | extra_objects = []
12 | if torch.cuda.is_available():
13 | print('Including CUDA code.')
14 | sources += ['src/dcn_v2_cuda_double.c']
15 | headers += ['src/dcn_v2_cuda_double.h']
16 | defines += [('WITH_CUDA', None)]
17 | extra_objects += ['src/cuda/dcn_v2_im2col_cuda_double.cu.o']
18 | extra_objects += ['src/cuda/dcn_v2_psroi_pooling_cuda_double.cu.o']
19 | with_cuda = True
20 | else:
21 | raise ValueError('CUDA is not available')
22 |
23 | extra_compile_args = ['-fopenmp', '-std=c99']
24 |
25 | this_file = os.path.dirname(os.path.realpath(__file__))
26 | print(this_file)
27 | sources = [os.path.join(this_file, fname) for fname in sources]
28 | headers = [os.path.join(this_file, fname) for fname in headers]
29 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
30 |
31 | ffi = create_extension(
32 | '_ext.dcn_v2_double',
33 | headers=headers,
34 | sources=sources,
35 | define_macros=defines,
36 | relative_to=__file__,
37 | with_cuda=with_cuda,
38 | extra_objects=extra_objects,
39 | extra_compile_args=extra_compile_args
40 | )
41 |
42 | if __name__ == '__main__':
43 | ffi.build()
44 |
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | cd src/cuda
3 |
4 | # compile dcn
5 | nvcc -c -o dcn_v2_im2col_cuda.cu.o dcn_v2_im2col_cuda.cu -x cu -Xcompiler -fPIC
6 | nvcc -c -o dcn_v2_im2col_cuda_double.cu.o dcn_v2_im2col_cuda_double.cu -x cu -Xcompiler -fPIC
7 |
8 | # compile dcn-roi-pooling
9 | nvcc -c -o dcn_v2_psroi_pooling_cuda.cu.o dcn_v2_psroi_pooling_cuda.cu -x cu -Xcompiler -fPIC
10 | nvcc -c -o dcn_v2_psroi_pooling_cuda_double.cu.o dcn_v2_psroi_pooling_cuda_double.cu -x cu -Xcompiler -fPIC
11 |
12 | cd -
13 | python build.py
14 | python build_double.py
15 |
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/cuda/dcn_v2_im2col_cuda.h:
--------------------------------------------------------------------------------
1 | /*!
2 | ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
3 | *
4 | * COPYRIGHT
5 | *
6 | * All contributions by the University of California:
7 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
8 | * All rights reserved.
9 | *
10 | * All other contributions:
11 | * Copyright (c) 2014-2017, the respective contributors
12 | * All rights reserved.
13 | *
14 | * Caffe uses a shared copyright model: each contributor holds copyright over
15 | * their contributions to Caffe. The project versioning records all such
16 | * contribution and copyright details. If a contributor wants to further mark
17 | * their specific copyright on a particular contribution, they should indicate
18 | * their copyright solely in the commit message of the change when it is
19 | * committed.
20 | *
21 | * LICENSE
22 | *
23 | * Redistribution and use in source and binary forms, with or without
24 | * modification, are permitted provided that the following conditions are met:
25 | *
26 | * 1. Redistributions of source code must retain the above copyright notice, this
27 | * list of conditions and the following disclaimer.
28 | * 2. Redistributions in binary form must reproduce the above copyright notice,
29 | * this list of conditions and the following disclaimer in the documentation
30 | * and/or other materials provided with the distribution.
31 | *
32 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
33 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
34 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
35 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
36 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
37 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
39 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
40 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
41 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 | *
43 | * CONTRIBUTION AGREEMENT
44 | *
45 | * By contributing to the BVLC/caffe repository through pull-request, comment,
46 | * or otherwise, the contributor releases their content to the
47 | * license and copyright terms herein.
48 | *
49 | ***************** END Caffe Copyright Notice and Disclaimer ********************
50 | *
51 | * Copyright (c) 2018 Microsoft
52 | * Licensed under The MIT License [see LICENSE for details]
53 | * \file modulated_deformable_im2col.h
54 | * \brief Function definitions of converting an image to
55 | * column matrix based on kernel, padding, dilation, and offset.
56 | * These functions are mainly used in deformable convolution operators.
57 | * \ref: https://arxiv.org/abs/1811.11168
58 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
59 | */
60 |
61 | /***************** Adapted by Charles Shang *********************/
62 |
63 | #ifndef DCN_V2_IM2COL_CUDA
64 | #define DCN_V2_IM2COL_CUDA
65 |
66 | #ifdef __cplusplus
67 | extern "C"
68 | {
69 | #endif
70 |
71 | void modulated_deformable_im2col_cuda(cudaStream_t stream,
72 | const float *data_im, const float *data_offset, const float *data_mask,
73 | const int batch_size, const int channels, const int height_im, const int width_im,
74 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
75 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
76 | const int dilation_h, const int dilation_w,
77 | const int deformable_group, float *data_col);
78 |
79 | void modulated_deformable_col2im_cuda(cudaStream_t stream,
80 | const float *data_col, const float *data_offset, const float *data_mask,
81 | const int batch_size, const int channels, const int height_im, const int width_im,
82 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
83 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
84 | const int dilation_h, const int dilation_w,
85 | const int deformable_group, float *grad_im);
86 |
87 | void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
88 | const float *data_col, const float *data_im, const float *data_offset, const float *data_mask,
89 | const int batch_size, const int channels, const int height_im, const int width_im,
90 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
91 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
92 | const int dilation_h, const int dilation_w,
93 | const int deformable_group,
94 | float *grad_offset, float *grad_mask);
95 |
96 | #ifdef __cplusplus
97 | }
98 | #endif
99 |
100 | #endif
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/cuda/dcn_v2_im2col_cuda_double.h:
--------------------------------------------------------------------------------
1 | /*!
2 | ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
3 | *
4 | * COPYRIGHT
5 | *
6 | * All contributions by the University of California:
7 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
8 | * All rights reserved.
9 | *
10 | * All other contributions:
11 | * Copyright (c) 2014-2017, the respective contributors
12 | * All rights reserved.
13 | *
14 | * Caffe uses a shared copyright model: each contributor holds copyright over
15 | * their contributions to Caffe. The project versioning records all such
16 | * contribution and copyright details. If a contributor wants to further mark
17 | * their specific copyright on a particular contribution, they should indicate
18 | * their copyright solely in the commit message of the change when it is
19 | * committed.
20 | *
21 | * LICENSE
22 | *
23 | * Redistribution and use in source and binary forms, with or without
24 | * modification, are permitted provided that the following conditions are met:
25 | *
26 | * 1. Redistributions of source code must retain the above copyright notice, this
27 | * list of conditions and the following disclaimer.
28 | * 2. Redistributions in binary form must reproduce the above copyright notice,
29 | * this list of conditions and the following disclaimer in the documentation
30 | * and/or other materials provided with the distribution.
31 | *
32 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
33 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
34 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
35 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
36 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
37 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
39 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
40 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
41 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 | *
43 | * CONTRIBUTION AGREEMENT
44 | *
45 | * By contributing to the BVLC/caffe repository through pull-request, comment,
46 | * or otherwise, the contributor releases their content to the
47 | * license and copyright terms herein.
48 | *
49 | ***************** END Caffe Copyright Notice and Disclaimer ********************
50 | *
51 | * Copyright (c) 2018 Microsoft
52 | * Licensed under The MIT License [see LICENSE for details]
53 | * \file modulated_deformable_im2col.h
54 | * \brief Function definitions of converting an image to
55 | * column matrix based on kernel, padding, dilation, and offset.
56 | * These functions are mainly used in deformable convolution operators.
57 | * \ref: https://arxiv.org/abs/1811.11168
58 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
59 | */
60 |
61 | /***************** Adapted by Charles Shang *********************/
62 |
63 | #ifndef DCN_V2_IM2COL_CUDA_DOUBLE
64 | #define DCN_V2_IM2COL_CUDA_DOUBLE
65 |
66 | #ifdef __cplusplus
67 | extern "C"
68 | {
69 | #endif
70 |
71 | void modulated_deformable_im2col_cuda(cudaStream_t stream,
72 | const double *data_im, const double *data_offset, const double *data_mask,
73 | const int batch_size, const int channels, const int height_im, const int width_im,
74 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
75 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
76 | const int dilation_h, const int dilation_w,
77 | const int deformable_group, double *data_col);
78 |
79 | void modulated_deformable_col2im_cuda(cudaStream_t stream,
80 | const double *data_col, const double *data_offset, const double *data_mask,
81 | const int batch_size, const int channels, const int height_im, const int width_im,
82 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
83 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
84 | const int dilation_h, const int dilation_w,
85 | const int deformable_group, double *grad_im);
86 |
87 | void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
88 | const double *data_col, const double *data_im, const double *data_offset, const double *data_mask,
89 | const int batch_size, const int channels, const int height_im, const int width_im,
90 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
91 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
92 | const int dilation_h, const int dilation_w,
93 | const int deformable_group,
94 | double *grad_offset, double *grad_mask);
95 |
96 | #ifdef __cplusplus
97 | }
98 | #endif
99 |
100 | #endif
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/cuda/dcn_v2_psroi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | /*!
2 | * Copyright (c) 2017 Microsoft
3 | * Licensed under The MIT License [see LICENSE for details]
4 | * \file deformable_psroi_pooling.cu
5 | * \brief
6 | * \author Yi Li, Guodong Zhang, Jifeng Dai
7 | */
8 | /***************** Adapted by Charles Shang *********************/
9 |
10 | #ifndef DCN_V2_PSROI_POOLING_CUDA
11 | #define DCN_V2_PSROI_POOLING_CUDA
12 |
13 | #ifdef __cplusplus
14 | extern "C"
15 | {
16 | #endif
17 |
18 | void DeformablePSROIPoolForward(cudaStream_t stream,
19 | const float *data,
20 | const float *bbox,
21 | const float *trans,
22 | float *out,
23 | float *top_count,
24 | const int batch,
25 | const int channels,
26 | const int height,
27 | const int width,
28 | const int num_bbox,
29 | const int channels_trans,
30 | const int no_trans,
31 | const float spatial_scale,
32 | const int output_dim,
33 | const int group_size,
34 | const int pooled_size,
35 | const int part_size,
36 | const int sample_per_part,
37 | const float trans_std);
38 |
39 | void DeformablePSROIPoolBackwardAcc(cudaStream_t stream,
40 | const float *out_grad,
41 | const float *data,
42 | const float *bbox,
43 | const float *trans,
44 | const float *top_count,
45 | float *in_grad,
46 | float *trans_grad,
47 | const int batch,
48 | const int channels,
49 | const int height,
50 | const int width,
51 | const int num_bbox,
52 | const int channels_trans,
53 | const int no_trans,
54 | const float spatial_scale,
55 | const int output_dim,
56 | const int group_size,
57 | const int pooled_size,
58 | const int part_size,
59 | const int sample_per_part,
60 | const float trans_std);
61 |
62 | #ifdef __cplusplus
63 | }
64 | #endif
65 |
66 | #endif
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/cuda/dcn_v2_psroi_pooling_cuda_double.h:
--------------------------------------------------------------------------------
1 | /*!
2 | * Copyright (c) 2017 Microsoft
3 | * Licensed under The MIT License [see LICENSE for details]
4 | * \file deformable_psroi_pooling.cu
5 | * \brief
6 | * \author Yi Li, Guodong Zhang, Jifeng Dai
7 | */
8 | /***************** Adapted by Charles Shang *********************/
9 |
10 | #ifndef DCN_V2_PSROI_POOLING_CUDA_DOUBLE
11 | #define DCN_V2_PSROI_POOLING_CUDA_DOUBLE
12 |
13 | #ifdef __cplusplus
14 | extern "C"
15 | {
16 | #endif
17 |
18 | void DeformablePSROIPoolForward(cudaStream_t stream,
19 | const double *data,
20 | const double *bbox,
21 | const double *trans,
22 | double *out,
23 | double *top_count,
24 | const int batch,
25 | const int channels,
26 | const int height,
27 | const int width,
28 | const int num_bbox,
29 | const int channels_trans,
30 | const int no_trans,
31 | const double spatial_scale,
32 | const int output_dim,
33 | const int group_size,
34 | const int pooled_size,
35 | const int part_size,
36 | const int sample_per_part,
37 | const double trans_std);
38 |
39 | void DeformablePSROIPoolBackwardAcc(cudaStream_t stream,
40 | const double *out_grad,
41 | const double *data,
42 | const double *bbox,
43 | const double *trans,
44 | const double *top_count,
45 | double *in_grad,
46 | double *trans_grad,
47 | const int batch,
48 | const int channels,
49 | const int height,
50 | const int width,
51 | const int num_bbox,
52 | const int channels_trans,
53 | const int no_trans,
54 | const double spatial_scale,
55 | const int output_dim,
56 | const int group_size,
57 | const int pooled_size,
58 | const int part_size,
59 | const int sample_per_part,
60 | const double trans_std);
61 |
62 | #ifdef __cplusplus
63 | }
64 | #endif
65 |
66 | #endif
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/dcn_v2.c:
--------------------------------------------------------------------------------
1 | #include