├── .gitignore ├── .travis.yml ├── LICENSE ├── NOTICE ├── README.md ├── data └── .gitignore ├── exp └── .gitignore ├── experiments ├── ctdet_coco_dla_1x.sh ├── ctdet_coco_dla_2x.sh ├── ctdet_coco_hg.sh ├── ctdet_coco_resdcn101.sh ├── ctdet_coco_resdcn18.sh ├── ctdet_pascal_dla_384.sh ├── ctdet_pascal_dla_512.sh ├── ctdet_pascal_resdcn101_384.sh ├── ctdet_pascal_resdcn101_512.sh ├── ctdet_pascal_resdcn18_384.sh ├── ctdet_pascal_resdcn18_512.sh ├── ddd_3dop.sh ├── ddd_sub.sh ├── exdet_coco_dla.sh ├── exdet_coco_hg.sh ├── multi_pose_dla_1x.sh ├── multi_pose_dla_3x.sh ├── multi_pose_hg_1x.sh └── multi_pose_hg_3x.sh ├── images ├── 16004479832_a748d55f21_k.jpg ├── 17790319373_bd19b24cfc_k.jpg ├── 18124840932_e42b3e377c_k.jpg ├── 19064748793_bb942deea1_k.jpg ├── 24274813513_0cfd2ce6d0_k.jpg ├── 33823288584_1d21cf0a26_k.jpg ├── 33887522274_eebd074106_k.jpg ├── 34501842524_3c858b3080_k.jpg └── NOTICE ├── models └── .gitignore ├── readme ├── DATA.md ├── DEVELOP.md ├── GETTING_STARTED.md ├── INSTALL.md ├── MODEL_ZOO.md ├── det1.png ├── det2.png ├── fig2.png ├── pose1.png ├── pose2.png └── pose3.png ├── requirements.txt └── src ├── _init_paths.py ├── demo.py ├── lib ├── datasets │ ├── dataset │ │ ├── coco.py │ │ ├── coco_hp.py │ │ ├── kitti.py │ │ └── pascal.py │ ├── dataset_factory.py │ └── sample │ │ ├── ctdet.py │ │ ├── ddd.py │ │ ├── exdet.py │ │ └── multi_pose.py ├── detectors │ ├── base_detector.py │ ├── ctdet.py │ ├── ddd.py │ ├── detector_factory.py │ ├── exdet.py │ └── multi_pose.py ├── external │ ├── .gitignore │ ├── Makefile │ ├── __init__.py │ ├── nms.pyx │ └── setup.py ├── logger.py ├── models │ ├── data_parallel.py │ ├── decode.py │ ├── losses.py │ ├── model.py │ ├── networks │ │ ├── DCNv2 │ │ │ ├── .gitignore │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── build.py │ │ │ ├── build_double.py │ │ │ ├── dcn_v2.py │ │ │ ├── dcn_v2_func.py │ │ │ ├── make.sh │ │ │ ├── src │ │ │ │ ├── cuda │ │ │ │ │ ├── dcn_v2_im2col_cuda.cu │ │ │ │ │ ├── dcn_v2_im2col_cuda.h │ │ │ │ │ ├── dcn_v2_im2col_cuda_double.cu │ │ │ │ │ ├── dcn_v2_im2col_cuda_double.h │ │ │ │ │ ├── dcn_v2_psroi_pooling_cuda.cu │ │ │ │ │ ├── dcn_v2_psroi_pooling_cuda.h │ │ │ │ │ ├── dcn_v2_psroi_pooling_cuda_double.cu │ │ │ │ │ └── dcn_v2_psroi_pooling_cuda_double.h │ │ │ │ ├── dcn_v2.c │ │ │ │ ├── dcn_v2.h │ │ │ │ ├── dcn_v2_cuda.c │ │ │ │ ├── dcn_v2_cuda.h │ │ │ │ ├── dcn_v2_cuda_double.c │ │ │ │ ├── dcn_v2_cuda_double.h │ │ │ │ ├── dcn_v2_double.c │ │ │ │ └── dcn_v2_double.h │ │ │ └── test.py │ │ ├── dlav0.py │ │ ├── large_hourglass.py │ │ ├── msra_resnet.py │ │ ├── pose_dla_dcn.py │ │ └── resnet_dcn.py │ ├── scatter_gather.py │ └── utils.py ├── opts.py ├── trains │ ├── base_trainer.py │ ├── ctdet.py │ ├── ddd.py │ ├── exdet.py │ ├── multi_pose.py │ └── train_factory.py └── utils │ ├── __init__.py │ ├── ddd_utils.py │ ├── debugger.py │ ├── image.py │ ├── oracle_utils.py │ ├── post_process.py │ └── utils.py ├── main.py ├── test.py └── tools ├── _init_paths.py ├── calc_coco_overlap.py ├── convert_hourglass_weight.py ├── convert_kitti_to_coco.py ├── eval_coco.py ├── eval_coco_hp.py ├── get_kitti.sh ├── get_pascal_voc.sh ├── kitti_eval ├── README.md ├── evaluate_object_3d.cpp ├── evaluate_object_3d_offline ├── evaluate_object_3d_offline.cpp └── mail.h ├── merge_pascal_json.py ├── reval.py ├── vis_pred.py └── voc_eval_lib ├── LICENSE ├── Makefile ├── __init__.py ├── datasets ├── __init__.py ├── bbox.pyx ├── ds_utils.py ├── imdb.py ├── pascal_voc.py └── voc_eval.py ├── model ├── __init__.py ├── bbox_transform.py ├── config.py ├── nms_wrapper.py └── test.py ├── nms ├── .gitignore ├── __init__.py ├── cpu_nms.c ├── cpu_nms.pyx ├── gpu_nms.cpp ├── gpu_nms.hpp ├── gpu_nms.pyx ├── nms_kernel.cu └── py_cpu_nms.py ├── setup.py └── utils ├── .gitignore ├── __init__.py ├── bbox.pyx ├── blob.py ├── timer.py └── visualization.py /.gitignore: -------------------------------------------------------------------------------- 1 | legacy/* 2 | .DS_Store 3 | debug/* 4 | *.DS_Store 5 | *.json 6 | *.mat 7 | src/.vscode/* 8 | preds/* 9 | *.h5 10 | *.pth 11 | *.checkpoint 12 | # Byte-compiled / optimized / DLL files 13 | __pycache__/ 14 | *.py[cod] 15 | *$py.class 16 | 17 | # C extensions 18 | *.so 19 | 20 | # Distribution / packaging 21 | .Python 22 | env/ 23 | build/ 24 | develop-eggs/ 25 | dist/ 26 | downloads/ 27 | eggs/ 28 | .eggs/ 29 | lib64/ 30 | parts/ 31 | sdist/ 32 | var/ 33 | wheels/ 34 | *.egg-info/ 35 | .installed.cfg 36 | *.egg 37 | 38 | # PyInstaller 39 | # Usually these files are written by a python script from a template 40 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 41 | *.manifest 42 | *.spec 43 | 44 | # Installer logs 45 | pip-log.txt 46 | pip-delete-this-directory.txt 47 | 48 | # Unit test / coverage reports 49 | htmlcov/ 50 | .tox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *.cover 57 | .hypothesis/ 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | local_settings.py 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # pyenv 84 | .python-version 85 | 86 | # celery beat schedule file 87 | celerybeat-schedule 88 | 89 | # SageMath parsed files 90 | *.sage.py 91 | 92 | # dotenv 93 | .env 94 | 95 | # virtualenv 96 | .venv 97 | venv/ 98 | ENV/ 99 | 100 | # Spyder project settings 101 | .spyderproject 102 | .spyproject 103 | 104 | # Rope project settings 105 | .ropeproject 106 | 107 | # mkdocs documentation 108 | /site 109 | 110 | # mypy 111 | .mypy_cache/ 112 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | group: travis_latest 2 | dist: xenial # ubuntu-16.04 3 | language: python 4 | cache: pip 5 | python: 6 | - 3.6 7 | - 3.7 8 | install: 9 | - pip install flake8 10 | - pip install -r requirements.txt 11 | before_script: 12 | # stop the build if there are Python syntax errors or undefined names 13 | - flake8 . --count --select=E9,F63,F72,F82 --show-source --statistics 14 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 15 | - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 16 | script: 17 | - true # add other tests here 18 | notifications: 19 | on_success: change 20 | on_failure: change # `always` will be the setting once code changes slow down 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Xingyi Zhou 4 | All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | 24 | -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /exp/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /experiments/ctdet_coco_dla_1x.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py ctdet --exp_id coco_dla_1x --batch_size 128 --master_batch 9 --lr 5e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 16 4 | # test 5 | python test.py ctdet --exp_id coco_dla_1x --keep_res --resume 6 | # flip test 7 | python test.py ctdet --exp_id coco_dla_1x --keep_res --resume --flip_test 8 | # multi scale test 9 | python test.py ctdet --exp_id coco_dla_1x --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5 10 | cd .. 11 | -------------------------------------------------------------------------------- /experiments/ctdet_coco_dla_2x.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py ctdet --exp_id coco_dla_2x --batch_size 128 --master_batch 9 --lr 5e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 16 --num_epochs 230 lr_step 180,210 4 | # or use the following command if your have coco_s2_dla_1x trained 5 | # python main.py ctdet --exp_id coco_dla_2x --batch_size 128 --master_batch 9 --lr 5e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 16 --load_model ../exp/ctdet/coco_dla_1x/model_90.pth --resume 6 | # test 7 | python test.py ctdet --exp_id coco_dla_2x --keep_res --resume 8 | # flip test 9 | python test.py ctdet --exp_id coco_dla_2x --keep_res --resume --flip_test 10 | # multi scale test 11 | python test.py ctdet --exp_id coco_dla_2x --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5 12 | cd .. 13 | -------------------------------------------------------------------------------- /experiments/ctdet_coco_hg.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py ctdet --exp_id coco_hg --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 --load_model ../models/ExtremeNet_500000.pth --gpus 0,1,2,3,4 4 | # test 5 | python test.py ctdet --exp_id coco_hg --arch hourglass --keep_res --resume 6 | # flip test 7 | python test.py ctdet --exp_id coco_hg --arch hourglass --keep_res --resume --flip_test 8 | # multi scale test 9 | python test.py ctdet --exp_id coco_hg --arch hourglass --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5 10 | cd .. -------------------------------------------------------------------------------- /experiments/ctdet_coco_resdcn101.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py ctdet --exp_id coco_resdcn101 --arch resdcn_101 --batch_size 96 --master_batch 5 --lr 3.75e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 16 4 | # test 5 | python test.py ctdet --exp_id coco_resdcn101 --keep_res --resume 6 | # flip test 7 | python test.py ctdet --exp_id coco_resdcn101 --keep_res --resume --flip_test 8 | # multi scale test 9 | python test.py ctdet --exp_id coco_resdcn101 --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5 10 | cd .. 11 | -------------------------------------------------------------------------------- /experiments/ctdet_coco_resdcn18.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py ctdet --exp_id coco_resdcn18 --arch resdcn_18 --batch_size 114 --master_batch 18 --lr 5e-4 --gpus 0,1,2,3 --num_workers 16 4 | # test 5 | python test.py ctdet --exp_id coco_resdcn18 --arch resdcn_18 --keep_res --resume 6 | # flip test 7 | python test.py ctdet --exp_id coco_resdcn18 --arch resdcn_18 --keep_res --resume --flip_test 8 | # multi scale test 9 | python test.py ctdet --exp_id coco_resdcn18 --arch resdcn_18 --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5 10 | cd .. 11 | -------------------------------------------------------------------------------- /experiments/ctdet_pascal_dla_384.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py ctdet --exp_id pascal_dla_384 --dataset pascal --num_epochs 70 --lr_step 45,60 4 | # test 5 | python test.py ctdet --exp_id pascal_dla_384 --dataset pascal --resume 6 | # flip test 7 | python test.py ctdet --exp_id pascal_dla_384 --dataset pascal --resume --flip_test 8 | cd .. 9 | -------------------------------------------------------------------------------- /experiments/ctdet_pascal_dla_512.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py ctdet --exp_id pascal_dla_512 --dataset pascal --input_res 512 --num_epochs 70 --lr_step 45,60 --gpus 0,1 4 | # test 5 | python test.py ctdet --exp_id pascal_dla_512 --dataset pascal --input_res 512 --resume 6 | # flip test 7 | python test.py ctdet --exp_id pascal_dla_512 --dataset pascal --input_res 512 --resume --flip_test 8 | cd .. 9 | -------------------------------------------------------------------------------- /experiments/ctdet_pascal_resdcn101_384.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py ctdet --exp_id pascal_resdcn101_384 --arch resdcn_101 --dataset pascal --num_epochs 70 --lr_step 45,60 --gpus 0,1 4 | # test 5 | python test.py ctdet --exp_id pascal_resdcn101_384 --arch resdcn_101 --dataset pascal --resume 6 | # flip test 7 | python test.py ctdet --exp_id pascal_resdcn101_384 --arch resdcn_101 --dataset pascal --resume --flip_test 8 | cd .. 9 | -------------------------------------------------------------------------------- /experiments/ctdet_pascal_resdcn101_512.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py ctdet --exp_id pascal_resdcn101_512 --arch resdcn_101 --dataset pascal --input_res 512 --num_epochs 70 --lr_step 45,60 --gpus 0,1,2,3 4 | # test 5 | python test.py ctdet --exp_id pascal_resdcn101_512 --arch resdcn_101 --dataset pascal --input_res 512 --resume 6 | # flip test 7 | python test.py ctdet --exp_id pascal_resdcn101_512 --arch resdcn_101 --dataset pascal --input_res 512 --resume --flip_test 8 | cd .. 9 | -------------------------------------------------------------------------------- /experiments/ctdet_pascal_resdcn18_384.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py ctdet --exp_id pascal_resdcn18_384 --arch resdcn_18 --dataset pascal --num_epochs 70 --lr_step 45,60 4 | # test 5 | python test.py ctdet --exp_id pascal_resdcn18_384 --arch resdcn_18 --dataset pascal --resume 6 | # flip test 7 | python test.py ctdet --exp_id pascal_resdcn18_384 --arch resdcn_18 --dataset pascal --resume --flip_test 8 | cd .. 9 | -------------------------------------------------------------------------------- /experiments/ctdet_pascal_resdcn18_512.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py ctdet --exp_id pascal_resdcn18_512 --arch resdcn_18 --dataset pascal --input_res 512 --num_epochs 70 --lr_step 45,60 4 | # test 5 | python test.py ctdet --exp_id pascal_resdcn18_512 --arch resdcn_18 --dataset pascal --input_res 512 --resume 6 | # flip test 7 | python test.py ctdet --exp_id pascal_resdcn18_512 --arch resdcn_18 --dataset pascal --input_res 512 --resume --flip_test 8 | cd .. 9 | -------------------------------------------------------------------------------- /experiments/ddd_3dop.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py ddd --exp_id 3dop --dataset kitti --kitti_split 3dop --batch_size 16 --master_batch 7 --num_epochs 70 --lr_step 45,60 --gpus 0,1 4 | # test 5 | python test.py ddd --exp_id 3dop --dataset kitti --kitti_split 3dop --resume 6 | cd .. 7 | -------------------------------------------------------------------------------- /experiments/ddd_sub.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py ddd --exp_id sub --dataset kitti --kitti_split subcnn --batch_size 16 --master_batch 7 --num_epochs 70 --lr_step 45,60 --gpus 0,1 4 | # test 5 | python test.py ddd --exp_id sub --dataset kitti --kitti_split subcnn --resume 6 | cd .. 7 | -------------------------------------------------------------------------------- /experiments/exdet_coco_dla.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py exdet --exp_id coco_dla --batch_size 64 --master_batch 1 --lr 2.5e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 8 4 | # test 5 | python test.py exdet --exp_id coco_dla --keep_res --resume 6 | # flip test 7 | python test.py exdet --exp_id coco_dla --keep_res --resume --flip_test 8 | # multi scale test 9 | python test.py exdet --exp_id coco_dla --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5 10 | cd .. 11 | -------------------------------------------------------------------------------- /experiments/exdet_coco_hg.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py exdet --exp_id coco_hg --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 --gpus 0,1,2,3,4 4 | # test 5 | python test.py exdet --exp_id coco_hg --arch hourglass --keep_res --resume 6 | # flip test 7 | python test.py exdet --exp_id coco_hg --arch hourglass --keep_res --resume --flip_test 8 | # multi scale test 9 | python test.py exdet --exp_id coco_hg --arch hourglass --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5 10 | cd .. 11 | -------------------------------------------------------------------------------- /experiments/multi_pose_dla_1x.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py multi_pose --exp_id dla_1x --dataset coco_hp --batch_size 128 --master_batch 9 --lr 5e-4 --load_model ../models/ctdet_coco_dla_2x.pth --gpus 0,1,2,3,4,5,6,7 --num_workers 16 4 | # test 5 | python test.py multi_pose --exp_id dla_1x --dataset coco_hp --keep_res --resume 6 | # flip test 7 | python test.py multi_pose --exp_id dla_1x --dataset coco_hp --keep_res --resume --flip_test 8 | cd .. 9 | -------------------------------------------------------------------------------- /experiments/multi_pose_dla_3x.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py multi_pose --exp_id dla_3x --dataset coco_hp --batch_size 128 --master_batch 9 --lr 5e-4 --load_model ../models/ctdet_coco_dla_2x.pth --gpus 0,1,2,3,4,5,6,7 --num_workers 16 --num_epochs 320 lr_step 270,300 4 | # or use the following command if your have dla_1x trained 5 | # python main.py multi_pose --exp_id dla_3x --dataset coco_hp --batch_size 128 --master_batch 9 --lr 5e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 16 --load_model ../exp/multi_pose/dla_1x/model_90.pth --resume 6 | # test 7 | python test.py multi_pose --exp_id dla_3x --dataset coco_hp --keep_res --resume 8 | # flip test 9 | python test.py multi_pose --exp_id dla_3x --dataset coco_hp --keep_res --resume --flip_test 10 | cd .. 11 | -------------------------------------------------------------------------------- /experiments/multi_pose_hg_1x.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py multi_pose --exp_id hg_1x --dataset coco_hp --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 --load_model ../models/ctdet_coco_hg.pth --gpus 0,1,2,3,4 --num_epochs 50 --lr_step 40 4 | # test 5 | python test.py multi_pose --exp_id hg_1x --dataset coco_hp --arch hourglass --keep_res --resume 6 | # flip test 7 | python test.py multi_pose --exp_id hg_1x --dataset coco_hp --arch hourglass --keep_res --resume --flip_test 8 | cd .. 9 | -------------------------------------------------------------------------------- /experiments/multi_pose_hg_3x.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py multi_pose --exp_id hg_3x --dataset coco_hp --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 -load_model ../models/ctdet_coco_hg.pth --gpus 0,1,2,3,4 --num_epochs 150 --lr_step 130 4 | # or use the following command if your have dla_1x trained 5 | # python main.py multi_pose --exp_id hg_3x --dataset coco_hp --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 --gpus 0,1,2,3,4 --num_epochs 150 --lr_step 130 --load_model ../exp/multi_pose/hg_1x/model_40.pth --resume 6 | # test 7 | python test.py multi_pose --exp_id hg_3x --dataset coco_hp --arch hourglass --keep_res --resume 8 | # flip test 9 | python test.py multi_pose --exp_id hg_3x --dataset coco_hp --arch hourglass --keep_res --resume --flip_test 10 | cd .. 11 | -------------------------------------------------------------------------------- /images/16004479832_a748d55f21_k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/16004479832_a748d55f21_k.jpg -------------------------------------------------------------------------------- /images/17790319373_bd19b24cfc_k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/17790319373_bd19b24cfc_k.jpg -------------------------------------------------------------------------------- /images/18124840932_e42b3e377c_k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/18124840932_e42b3e377c_k.jpg -------------------------------------------------------------------------------- /images/19064748793_bb942deea1_k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/19064748793_bb942deea1_k.jpg -------------------------------------------------------------------------------- /images/24274813513_0cfd2ce6d0_k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/24274813513_0cfd2ce6d0_k.jpg -------------------------------------------------------------------------------- /images/33823288584_1d21cf0a26_k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/33823288584_1d21cf0a26_k.jpg -------------------------------------------------------------------------------- /images/33887522274_eebd074106_k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/33887522274_eebd074106_k.jpg -------------------------------------------------------------------------------- /images/34501842524_3c858b3080_k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/34501842524_3c858b3080_k.jpg -------------------------------------------------------------------------------- /images/NOTICE: -------------------------------------------------------------------------------- 1 | The demo images are licensed as United States government work: 2 | https://www.usa.gov/government-works 3 | 4 | The image files were obtained on Jan 13, 2018 from the following 5 | URLs. 6 | 7 | 16004479832_a748d55f21_k.jpg 8 | https://www.flickr.com/photos/archivesnews/16004479832 9 | 10 | 18124840932_e42b3e377c_k.jpg 11 | https://www.flickr.com/photos/usnavy/18124840932 12 | 13 | 33887522274_eebd074106_k.jpg 14 | https://www.flickr.com/photos/usaid_pakistan/33887522274 15 | 16 | 15673749081_767a7fa63a_k.jpg 17 | https://www.flickr.com/photos/usnavy/15673749081 18 | 19 | 34501842524_3c858b3080_k.jpg 20 | https://www.flickr.com/photos/departmentofenergy/34501842524 21 | 22 | 24274813513_0cfd2ce6d0_k.jpg 23 | https://www.flickr.com/photos/dhsgov/24274813513 24 | 25 | 19064748793_bb942deea1_k.jpg 26 | https://www.flickr.com/photos/statephotos/19064748793 27 | 28 | 33823288584_1d21cf0a26_k.jpg 29 | https://www.flickr.com/photos/cbpphotos/33823288584 30 | 31 | 17790319373_bd19b24cfc_k.jpg 32 | https://www.flickr.com/photos/secdef/17790319373 33 | -------------------------------------------------------------------------------- /models/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /readme/DATA.md: -------------------------------------------------------------------------------- 1 | # Dataset preparation 2 | 3 | If you want to reproduce the results in the paper for benchmark evaluation and training, you will need to setup dataset. 4 | 5 | 6 | ### COCO 7 | - Download the images (2017 Train, 2017 Val, 2017 Test) from [coco website](http://cocodataset.org/#download). 8 | - Download annotation files (2017 train/val and test image info) from [coco website](http://cocodataset.org/#download). 9 | - Place the data (or create symlinks) to make the data folder like: 10 | 11 | ~~~ 12 | ${CenterNet_ROOT} 13 | |-- data 14 | `-- |-- coco 15 | `-- |-- annotations 16 | | |-- instances_train2017.json 17 | | |-- instances_val2017.json 18 | | |-- person_keypoints_train2017.json 19 | | |-- person_keypoints_val2017.json 20 | | |-- image_info_test-dev2017.json 21 | |---|-- train2017 22 | |---|-- val2017 23 | `---|-- test2017 24 | ~~~ 25 | 26 | - [Optional] If you want to train ExtremeNet, generate extreme point annotation from segmentation: 27 | 28 | ~~~ 29 | cd $CenterNet_ROOT/tools/ 30 | python gen_coco_extreme_points.py 31 | ~~~ 32 | It generates `instances_extreme_train2017.json` and `instances_extreme_val2017.json` in `data/coco/annotations/`. 33 | 34 | ### Pascal VOC 35 | 36 | - Run 37 | 38 | ~~~ 39 | cd $CenterNet_ROOT/tools/ 40 | bash get_pascal_voc.sh 41 | ~~~ 42 | - The above script includes: 43 | - Download, unzip, and move Pascal VOC images from the [VOC website](http://host.robots.ox.ac.uk/pascal/VOC/). 44 | - [Download](https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip) Pascal VOC annotation in COCO format (from [Detectron](https://github.com/facebookresearch/Detectron/tree/master/detectron/datasets/data)). 45 | - Combine train/val 2007/2012 annotation files into a single json. 46 | 47 | 48 | - Move the created `voc` folder to `data` (or create symlinks) to make the data folder like: 49 | 50 | ~~~ 51 | ${CenterNet_ROOT} 52 | |-- data 53 | `-- |-- voc 54 | `-- |-- annotations 55 | | |-- pascal_trainval0712.json 56 | | |-- pascal_test2017.json 57 | |-- images 58 | | |-- 000001.jpg 59 | | ...... 60 | `-- VOCdevkit 61 | 62 | ~~~ 63 | The `VOCdevkit` folder is needed to run the evaluation script from [faster rcnn](https://github.com/rbgirshick/py-faster-rcnn/blob/master/tools/reval.py). 64 | 65 | ### KITTI 66 | 67 | - Download [images](http://www.cvlibs.net/download.php?file=data_object_image_2.zip), [annotations](http://www.cvlibs.net/download.php?file=data_object_label_2.zip), and [calibrations](http://www.cvlibs.net/download.php?file=data_object_calib.zip) from [KITTI website](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d) and unzip. 68 | 69 | - Download the train-val split of [3DOP](https://xiaozhichen.github.io/files/mv3d/imagesets.tar.gz) and [SubCNN](https://github.com/tanshen/SubCNN/tree/master/fast-rcnn/data/KITTI) and place the data as below 70 | 71 | ~~~ 72 | ${CenterNet_ROOT} 73 | |-- data 74 | `-- |-- kitti 75 | `-- |-- training 76 | | |-- image_2 77 | | |-- label_2 78 | | |-- calib 79 | |-- ImageSets_3dop 80 | | |-- test.txt 81 | | |-- train.txt 82 | | |-- val.txt 83 | | |-- trainval.txt 84 | `-- ImageSets_subcnn 85 | |-- test.txt 86 | |-- train.txt 87 | |-- val.txt 88 | |-- trainval.txt 89 | ~~~ 90 | 91 | - Run `python convert_kitti_to_coco.py` in `tools` to convert the annotation into COCO format. You can set `DEBUG=True` in `line 5` to visualize the annotation. 92 | 93 | - Link image folder 94 | 95 | ~~~ 96 | cd ${CenterNet_ROOT}/data/kitti/ 97 | mkdir images 98 | ln -s training/image_2 images/trainval 99 | ~~~ 100 | 101 | - The data structure should look like: 102 | 103 | ~~~ 104 | ${CenterNet_ROOT} 105 | |-- data 106 | `-- |-- kitti 107 | `-- |-- annotations 108 | | |-- kitti_3dop_train.json 109 | | |-- kitti_3dop_val.json 110 | | |-- kitti_subcnn_train.json 111 | | |-- kitti_subcnn_val.json 112 | `-- images 113 | |-- trainval 114 | |-- test 115 | ~~~ 116 | -------------------------------------------------------------------------------- /readme/DEVELOP.md: -------------------------------------------------------------------------------- 1 | # Develop 2 | 3 | This document provides tutorials to develop CenterNet. `lib/src/opts` lists a few more options that the current version supports. 4 | 5 | ## New dataset 6 | Basically there are three steps: 7 | 8 | - Convert the dataset annotation to [COCO format](http://cocodataset.org/#format-data). Please refer to [src/tools/convert_kitti_to_coco.py](../src/tools/convert_kitti_to_coco.py) for an example to convert kitti format to coco format. 9 | - Create a dataset intilization file in `src/lib/datasets/dataset`. In most cases you can just copy `src/lib/datasets/dataset/coco.py` to your dataset name and change the category information, and annotation path. 10 | - Import your dataset at `src/lib/datasets/dataset_factory`. 11 | 12 | ## New task 13 | 14 | You will need to add files to `src/lib/datasets/sample/`, `src/lib/datasets/trains/`, and `src/lib/datasets/detectors/`, which specify the data generation during training, the training targets, and the testing, respectively. 15 | 16 | ## New architecture 17 | 18 | - Add your model file to `src/lib/models/networks/`. The model should accept a dict `heads` of `{name: channels}`, which specify the name of each network output and its number of channels. Make sure your model returns a list (for multiple stages. Single stage model should return a list containing a single element.). The element of the list is a dict contraining the same keys with `heads`. 19 | - Add your model in `model_factory` of `src/lib/models/model.py`. 20 | -------------------------------------------------------------------------------- /readme/GETTING_STARTED.md: -------------------------------------------------------------------------------- 1 | # Getting Started 2 | 3 | This document provides tutorials to train and evaluate CenterNet. Before getting started, make sure you have finished [installation](INSTALL.md) and [dataset setup](DATA.md). 4 | 5 | ## Benchmark evaluation 6 | 7 | First, download the models you want to evaluate from our [model zoo](MODEL_ZOO.md) and put them in `CenterNet_ROOT/models/`. 8 | 9 | ### COCO 10 | 11 | To evaluate COCO object detection with DLA 12 | run 13 | 14 | ~~~ 15 | python test.py ctdet --exp_id coco_dla --keep_res --load_model ../models/ctdet_coco_dla_2x.pth 16 | ~~~ 17 | 18 | This will give an AP of `37.4` if setup correctly. `--keep_res` is for keep the original image resolution. Without `--keep_res` it will resize the images to `512 x 512`. You can add `--flip_test` and `--flip_test --test_scales 0.5,0.75,1,1.25,1.5` to the above commend, for flip test and multi_scale test, respectively. The expected APs are `39.2` and `41.7`, respectively. 19 | 20 | To test with hourglass net, run 21 | 22 | ~~~ 23 | python test.py ctdet --exp_id coco_hg --arch hourglass --fix_res --load_model ../models/ctdet_coco_hg.pth 24 | ~~~ 25 | 26 | Similarly, to evaluate human pose estimation, run the following command for dla 27 | 28 | ~~~ 29 | python test.py multi_pose --exp_id dla --keep_res --load_model ../models/multi_pose_dla_3x.pth --flip_test 30 | ~~~ 31 | 32 | and the following for hourglass 33 | 34 | ~~~ 35 | python test.py multi_pose --exp_id hg --arch hourglass --keep_res --load_model ../models/multi_pose_dla_3x.pth --flip_test 36 | ~~~ 37 | 38 | The expected results can be found in the model zoo. 39 | 40 | ### Pascal 41 | 42 | To evaluate object detection on Pascal VOC (test2007), run 43 | 44 | ~~~ 45 | python test.py ctdet --exp_id dla --dataset pascal --load_model ../models/ctdet_pascal_dla.pth --flip_test 46 | ~~~ 47 | 48 | Note that we fix the resolution during testing. 49 | And you can change to other network architectures and resolutions by specifying `--arch` and `--input_res 512`. 50 | 51 | ### KITTI 52 | 53 | To evaluate the kitti dataset, first compile the evaluation tool (from [here](https://github.com/prclibo/kitti_eval)): 54 | 55 | ~~~ 56 | cd CenterNet_ROOT/src/tools/kitti_eval 57 | g++ -o evaluate_object_3d_offline evaluate_object_3d_offline.cpp -O3 58 | ~~~ 59 | 60 | Then run the evaluation with pretrained model: 61 | 62 | ~~~ 63 | python test.py ddd --exp_id 3dop --dataset kitti --kitti_split 3dop --load_model ../models/ddd_3dop.pth 64 | ~~~ 65 | 66 | to evaluate the 3DOP split. For the subcnn split, change `--kitti_split` to `subcnn` and load the corresponding models. 67 | Note that test time augmentation is not trivially applicable for 3D orientation. 68 | 69 | ## Training 70 | 71 | We have packed all the training scripts in the [experiments](../experiments) folder. 72 | The experiment names are correspond to the model name in the [model zoo](MODEL_ZOO.md). 73 | The number of GPUs for each experiments can be found in the scripts and the model zoo. 74 | In the case that you don't have 8 GPUs, you can follow the [linear learning rate rule](https://arxiv.org/abs/1706.02677) to scale the learning rate as batch size. 75 | For example, to train COCO object detection with dla on 2 GPUs, run 76 | 77 | ~~~ 78 | python main.py ctdet --exp_id coco_dla --batch_size 32 --master_batch 15 --lr 1.25e-4 --gpus 0,1 79 | ~~~ 80 | 81 | The default learning rate is `1.25e-4` for batch size `32` (on 2 GPUs). 82 | By default, pytorch evenly splits the total batch size to each GPUs. 83 | `--master_batch` allows using different batchsize for the master GPU, which usually costs more memory than other GPUs. 84 | If it encounters GPU memory out, using slightly less batch size (e.g., `112` of `128`) with the same learning is fine. 85 | 86 | If the training is terminated before finishing, you can use the same commond with `--resume` to resume training. It will found the lastest model with the same `exp_id`. 87 | 88 | Our HourglassNet model is finetuned from the pretrained [ExtremeNet model](https://drive.google.com/file/d/1JMbHgN4uLkP9MAyJU5EeHrgxwe101hwO) (from the [ExtremeNet repo](https://github.com/xingyizhou/ExtremeNet)). 89 | You will need to download the model, run `python convert_hourglass_weight.py` to convert the model format, and load the model for training (see the [script](../experiments/ctdet_coco_hg.sh)). 90 | -------------------------------------------------------------------------------- /readme/INSTALL.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | 4 | The code was tested on Ubuntu 16.04, with [Anaconda](https://www.anaconda.com/download) Python 3.6 and [PyTorch]((http://pytorch.org/)) v0.4.1. NVIDIA GPUs are needed for both training and testing. 5 | After install Anaconda: 6 | 7 | 0. [Optional but recommended] create a new conda environment. 8 | 9 | ~~~ 10 | conda create --name CenterNet python=3.6 11 | ~~~ 12 | And activate the environment. 13 | 14 | ~~~ 15 | conda activate CenterNet 16 | ~~~ 17 | 18 | 1. Install pytorch0.4.1: 19 | 20 | ~~~ 21 | conda install pytorch=0.4.1 torchvision -c pytorch 22 | ~~~ 23 | 24 | And disable cudnn batch normalization(Due to [this issue](https://github.com/xingyizhou/pytorch-pose-hg-3d/issues/16)). 25 | 26 | ~~~ 27 | # PYTORCH=/path/to/pytorch # usually ~/anaconda3/envs/CenterNet/lib/python3.6/site-packages/ 28 | # for pytorch v0.4.0 29 | sed -i "1194s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py 30 | # for pytorch v0.4.1 31 | sed -i "1254s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py 32 | ~~~ 33 | 34 | For other pytorch version, you can manually open `torch/nn/functional.py` and find the line with `torch.batch_norm` and replace the `torch.backends.cudnn.enabled` with `False`. We observed slight worse training results without doing so. 35 | 36 | 2. Install [COCOAPI](https://github.com/cocodataset/cocoapi): 37 | 38 | ~~~ 39 | # COCOAPI=/path/to/clone/cocoapi 40 | git clone https://github.com/cocodataset/cocoapi.git $COCOAPI 41 | cd $COCOAPI/PythonAPI 42 | make 43 | python setup.py install --user 44 | ~~~ 45 | 46 | 3. Clone this repo: 47 | 48 | ~~~ 49 | CenterNet_ROOT=/path/to/clone/CenterNet 50 | git clone https://github.com/xingyizhou/CenterNet $CenterNet_ROOT 51 | ~~~ 52 | 53 | 54 | 4. Install the requirements 55 | 56 | ~~~ 57 | pip install -r requirements.txt 58 | ~~~ 59 | 60 | 61 | 5. Compile deformable convolutional (from [DCNv2](https://github.com/CharlesShang/DCNv2/tree/pytorch_0.4)). 62 | 63 | ~~~ 64 | cd $CenterNet_ROOT/src/lib/models/networks/DCNv2 65 | ./make.sh 66 | ~~~ 67 | 6. [Optional, only required if you are using extremenet or multi-scale testing] Compile NMS if your want to use multi-scale testing or test ExtremeNet. 68 | 69 | ~~~ 70 | cd $CenterNet_ROOT/src/lib/external 71 | make 72 | ~~~ 73 | 74 | 7. Download pertained models for [detection]() or [pose estimation]() and move them to `$CenterNet_ROOT/models/`. More models can be found in [Model zoo](MODEL_ZOO.md). 75 | -------------------------------------------------------------------------------- /readme/det1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/readme/det1.png -------------------------------------------------------------------------------- /readme/det2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/readme/det2.png -------------------------------------------------------------------------------- /readme/fig2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/readme/fig2.png -------------------------------------------------------------------------------- /readme/pose1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/readme/pose1.png -------------------------------------------------------------------------------- /readme/pose2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/readme/pose2.png -------------------------------------------------------------------------------- /readme/pose3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/readme/pose3.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | Cython 3 | numba 4 | progress 5 | matplotlib 6 | easydict 7 | scipy 8 | -------------------------------------------------------------------------------- /src/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | def add_path(path): 5 | if path not in sys.path: 6 | sys.path.insert(0, path) 7 | 8 | this_dir = osp.dirname(__file__) 9 | 10 | # Add lib to PYTHONPATH 11 | lib_path = osp.join(this_dir, 'lib') 12 | add_path(lib_path) 13 | -------------------------------------------------------------------------------- /src/demo.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import _init_paths 6 | 7 | import os 8 | import cv2 9 | 10 | from opts import opts 11 | from detectors.detector_factory import detector_factory 12 | 13 | image_ext = ['jpg', 'jpeg', 'png', 'webp'] 14 | video_ext = ['mp4', 'mov', 'avi', 'mkv'] 15 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge'] 16 | 17 | def demo(opt): 18 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str 19 | opt.debug = max(opt.debug, 1) 20 | Detector = detector_factory[opt.task] 21 | detector = Detector(opt) 22 | 23 | if opt.demo == 'webcam' or \ 24 | opt.demo[opt.demo.rfind('.') + 1:].lower() in video_ext: 25 | cam = cv2.VideoCapture(0 if opt.demo == 'webcam' else opt.demo) 26 | detector.pause = False 27 | while True: 28 | _, img = cam.read() 29 | cv2.imshow('input', img) 30 | ret = detector.run(img) 31 | time_str = '' 32 | for stat in time_stats: 33 | time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat]) 34 | print(time_str) 35 | if cv2.waitKey(1) == 27: 36 | return # esc to quit 37 | else: 38 | if os.path.isdir(opt.demo): 39 | image_names = [] 40 | ls = os.listdir(opt.demo) 41 | for file_name in sorted(ls): 42 | ext = file_name[file_name.rfind('.') + 1:].lower() 43 | if ext in image_ext: 44 | image_names.append(os.path.join(opt.demo, file_name)) 45 | else: 46 | image_names = [opt.demo] 47 | 48 | for (image_name) in image_names: 49 | ret = detector.run(image_name) 50 | time_str = '' 51 | for stat in time_stats: 52 | time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat]) 53 | print(time_str) 54 | if __name__ == '__main__': 55 | opt = opts().init() 56 | demo(opt) 57 | -------------------------------------------------------------------------------- /src/lib/datasets/dataset/coco.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | from pycocotools.cocoeval import COCOeval 7 | import numpy as np 8 | import json 9 | import os 10 | 11 | import torch.utils.data as data 12 | 13 | class COCO(data.Dataset): 14 | num_classes = 80 15 | default_resolution = [512, 512] 16 | mean = np.array([0.40789654, 0.44719302, 0.47026115], 17 | dtype=np.float32).reshape(1, 1, 3) 18 | std = np.array([0.28863828, 0.27408164, 0.27809835], 19 | dtype=np.float32).reshape(1, 1, 3) 20 | 21 | def __init__(self, opt, split): 22 | super(COCO, self).__init__() 23 | self.data_dir = os.path.join(opt.data_dir, 'coco') 24 | self.img_dir = os.path.join(self.data_dir, '{}2017'.format(split)) 25 | if split == 'test': 26 | self.annot_path = os.path.join( 27 | self.data_dir, 'annotations', 28 | 'image_info_test-dev2017.json').format(split) 29 | else: 30 | if opt.task == 'exdet': 31 | self.annot_path = os.path.join( 32 | self.data_dir, 'annotations', 33 | 'instances_extreme_{}2017.json').format(split) 34 | else: 35 | self.annot_path = os.path.join( 36 | self.data_dir, 'annotations', 37 | 'instances_{}2017.json').format(split) 38 | self.max_objs = 128 39 | self.class_name = [ 40 | '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 41 | 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 42 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 43 | 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 44 | 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 45 | 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 46 | 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 47 | 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 48 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 49 | 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 50 | 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 51 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 52 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] 53 | self._valid_ids = [ 54 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 55 | 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 56 | 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 57 | 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 58 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 59 | 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 60 | 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 61 | 82, 84, 85, 86, 87, 88, 89, 90] 62 | self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)} 63 | self.voc_color = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32) \ 64 | for v in range(1, self.num_classes + 1)] 65 | self._data_rng = np.random.RandomState(123) 66 | self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571], 67 | dtype=np.float32) 68 | self._eig_vec = np.array([ 69 | [-0.58752847, -0.69563484, 0.41340352], 70 | [-0.5832747, 0.00994535, -0.81221408], 71 | [-0.56089297, 0.71832671, 0.41158938] 72 | ], dtype=np.float32) 73 | # self.mean = np.array([0.485, 0.456, 0.406], np.float32).reshape(1, 1, 3) 74 | # self.std = np.array([0.229, 0.224, 0.225], np.float32).reshape(1, 1, 3) 75 | 76 | self.split = split 77 | self.opt = opt 78 | 79 | print('==> initializing coco 2017 {} data.'.format(split)) 80 | self.coco = coco.COCO(self.annot_path) 81 | self.images = self.coco.getImgIds() 82 | self.num_samples = len(self.images) 83 | 84 | print('Loaded {} {} samples'.format(split, self.num_samples)) 85 | 86 | def _to_float(self, x): 87 | return float("{:.2f}".format(x)) 88 | 89 | def convert_eval_format(self, all_bboxes): 90 | # import pdb; pdb.set_trace() 91 | detections = [] 92 | for image_id in all_bboxes: 93 | for cls_ind in all_bboxes[image_id]: 94 | category_id = self._valid_ids[cls_ind - 1] 95 | for bbox in all_bboxes[image_id][cls_ind]: 96 | bbox[2] -= bbox[0] 97 | bbox[3] -= bbox[1] 98 | score = bbox[4] 99 | bbox_out = list(map(self._to_float, bbox[0:4])) 100 | 101 | detection = { 102 | "image_id": int(image_id), 103 | "category_id": int(category_id), 104 | "bbox": bbox_out, 105 | "score": float("{:.2f}".format(score)) 106 | } 107 | if len(bbox) > 5: 108 | extreme_points = list(map(self._to_float, bbox[5:13])) 109 | detection["extreme_points"] = extreme_points 110 | detections.append(detection) 111 | return detections 112 | 113 | def __len__(self): 114 | return self.num_samples 115 | 116 | def save_results(self, results, save_dir): 117 | json.dump(self.convert_eval_format(results), 118 | open('{}/results.json'.format(save_dir), 'w')) 119 | 120 | def run_eval(self, results, save_dir): 121 | # result_json = os.path.join(save_dir, "results.json") 122 | # detections = self.convert_eval_format(results) 123 | # json.dump(detections, open(result_json, "w")) 124 | self.save_results(results, save_dir) 125 | coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir)) 126 | coco_eval = COCOeval(self.coco, coco_dets, "bbox") 127 | coco_eval.evaluate() 128 | coco_eval.accumulate() 129 | coco_eval.summarize() 130 | -------------------------------------------------------------------------------- /src/lib/datasets/dataset/coco_hp.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | from pycocotools.cocoeval import COCOeval 7 | import numpy as np 8 | import json 9 | import os 10 | 11 | import torch.utils.data as data 12 | 13 | class COCOHP(data.Dataset): 14 | num_classes = 1 15 | num_joints = 17 16 | default_resolution = [512, 512] 17 | mean = np.array([0.40789654, 0.44719302, 0.47026115], 18 | dtype=np.float32).reshape(1, 1, 3) 19 | std = np.array([0.28863828, 0.27408164, 0.27809835], 20 | dtype=np.float32).reshape(1, 1, 3) 21 | flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], 22 | [11, 12], [13, 14], [15, 16]] 23 | def __init__(self, opt, split): 24 | super(COCOHP, self).__init__() 25 | self.edges = [[0, 1], [0, 2], [1, 3], [2, 4], 26 | [4, 6], [3, 5], [5, 6], 27 | [5, 7], [7, 9], [6, 8], [8, 10], 28 | [6, 12], [5, 11], [11, 12], 29 | [12, 14], [14, 16], [11, 13], [13, 15]] 30 | 31 | self.acc_idxs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] 32 | self.data_dir = os.path.join(opt.data_dir, 'coco') 33 | self.img_dir = os.path.join(self.data_dir, '{}2017'.format(split)) 34 | if split == 'test': 35 | self.annot_path = os.path.join( 36 | self.data_dir, 'annotations', 37 | 'image_info_test-dev2017.json').format(split) 38 | else: 39 | self.annot_path = os.path.join( 40 | self.data_dir, 'annotations', 41 | 'person_keypoints_{}2017.json').format(split) 42 | self.max_objs = 32 43 | self._data_rng = np.random.RandomState(123) 44 | self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571], 45 | dtype=np.float32) 46 | self._eig_vec = np.array([ 47 | [-0.58752847, -0.69563484, 0.41340352], 48 | [-0.5832747, 0.00994535, -0.81221408], 49 | [-0.56089297, 0.71832671, 0.41158938] 50 | ], dtype=np.float32) 51 | self.split = split 52 | self.opt = opt 53 | 54 | print('==> initializing coco 2017 {} data.'.format(split)) 55 | self.coco = coco.COCO(self.annot_path) 56 | image_ids = self.coco.getImgIds() 57 | 58 | if split == 'train': 59 | self.images = [] 60 | for img_id in image_ids: 61 | idxs = self.coco.getAnnIds(imgIds=[img_id]) 62 | if len(idxs) > 0: 63 | self.images.append(img_id) 64 | else: 65 | self.images = image_ids 66 | self.num_samples = len(self.images) 67 | print('Loaded {} {} samples'.format(split, self.num_samples)) 68 | 69 | def _to_float(self, x): 70 | return float("{:.2f}".format(x)) 71 | 72 | def convert_eval_format(self, all_bboxes): 73 | # import pdb; pdb.set_trace() 74 | detections = [] 75 | for image_id in all_bboxes: 76 | for cls_ind in all_bboxes[image_id]: 77 | category_id = 1 78 | for dets in all_bboxes[image_id][cls_ind]: 79 | bbox = dets[:4] 80 | bbox[2] -= bbox[0] 81 | bbox[3] -= bbox[1] 82 | score = dets[4] 83 | bbox_out = list(map(self._to_float, bbox)) 84 | keypoints = np.concatenate([ 85 | np.array(dets[5:39], dtype=np.float32).reshape(-1, 2), 86 | np.ones((17, 1), dtype=np.float32)], axis=1).reshape(51).tolist() 87 | keypoints = list(map(self._to_float, keypoints)) 88 | 89 | detection = { 90 | "image_id": int(image_id), 91 | "category_id": int(category_id), 92 | "bbox": bbox_out, 93 | "score": float("{:.2f}".format(score)), 94 | "keypoints": keypoints 95 | } 96 | detections.append(detection) 97 | return detections 98 | 99 | def __len__(self): 100 | return self.num_samples 101 | 102 | def save_results(self, results, save_dir): 103 | json.dump(self.convert_eval_format(results), 104 | open('{}/results.json'.format(save_dir), 'w')) 105 | 106 | 107 | def run_eval(self, results, save_dir): 108 | # result_json = os.path.join(opt.save_dir, "results.json") 109 | # detections = convert_eval_format(all_boxes) 110 | # json.dump(detections, open(result_json, "w")) 111 | self.save_results(results, save_dir) 112 | coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir)) 113 | coco_eval = COCOeval(self.coco, coco_dets, "keypoints") 114 | coco_eval.evaluate() 115 | coco_eval.accumulate() 116 | coco_eval.summarize() 117 | coco_eval = COCOeval(self.coco, coco_dets, "bbox") 118 | coco_eval.evaluate() 119 | coco_eval.accumulate() 120 | coco_eval.summarize() -------------------------------------------------------------------------------- /src/lib/datasets/dataset/kitti.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch.utils.data as data 6 | import pycocotools.coco as coco 7 | import numpy as np 8 | import torch 9 | import json 10 | import cv2 11 | import os 12 | import math 13 | 14 | import torch.utils.data as data 15 | 16 | 17 | class KITTI(data.Dataset): 18 | num_classes = 3 19 | default_resolution = [384, 1280] 20 | mean = np.array([0.485, 0.456, 0.406], np.float32).reshape(1, 1, 3) 21 | std = np.array([0.229, 0.224, 0.225], np.float32).reshape(1, 1, 3) 22 | 23 | def __init__(self, opt, split): 24 | super(KITTI, self).__init__() 25 | self.data_dir = os.path.join(opt.data_dir, 'kitti') 26 | self.img_dir = os.path.join(self.data_dir, 'images', 'trainval') 27 | if opt.trainval: 28 | split = 'trainval' if split == 'train' else 'test' 29 | self.img_dir = os.path.join(self.data_dir, 'images', split) 30 | self.annot_path = os.path.join( 31 | self.data_dir, 'annotations', 'kitti_{}.json').format(split) 32 | else: 33 | self.annot_path = os.path.join(self.data_dir, 34 | 'annotations', 'kitti_{}_{}.json').format(opt.kitti_split, split) 35 | self.max_objs = 50 36 | self.class_name = [ 37 | '__background__', 'Pedestrian', 'Car', 'Cyclist'] 38 | self.cat_ids = {1:0, 2:1, 3:2, 4:-3, 5:-3, 6:-2, 7:-99, 8:-99, 9:-1} 39 | 40 | self._data_rng = np.random.RandomState(123) 41 | self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571], 42 | dtype=np.float32) 43 | self._eig_vec = np.array([ 44 | [-0.58752847, -0.69563484, 0.41340352], 45 | [-0.5832747, 0.00994535, -0.81221408], 46 | [-0.56089297, 0.71832671, 0.41158938] 47 | ], dtype=np.float32) 48 | self.split = split 49 | self.opt = opt 50 | self.alpha_in_degree = False 51 | 52 | print('==> initializing kitti {}, {} data.'.format(opt.kitti_split, split)) 53 | self.coco = coco.COCO(self.annot_path) 54 | self.images = self.coco.getImgIds() 55 | self.num_samples = len(self.images) 56 | 57 | print('Loaded {} {} samples'.format(split, self.num_samples)) 58 | 59 | def __len__(self): 60 | return self.num_samples 61 | 62 | def _to_float(self, x): 63 | return float("{:.2f}".format(x)) 64 | 65 | def convert_eval_format(self, all_bboxes): 66 | pass 67 | 68 | def save_results(self, results, save_dir): 69 | results_dir = os.path.join(save_dir, 'results') 70 | if not os.path.exists(results_dir): 71 | os.mkdir(results_dir) 72 | for img_id in results.keys(): 73 | out_path = os.path.join(results_dir, '{:06d}.txt'.format(img_id)) 74 | f = open(out_path, 'w') 75 | for cls_ind in results[img_id]: 76 | for j in range(len(results[img_id][cls_ind])): 77 | class_name = self.class_name[cls_ind] 78 | f.write('{} 0.0 0'.format(class_name)) 79 | for i in range(len(results[img_id][cls_ind][j])): 80 | f.write(' {:.2f}'.format(results[img_id][cls_ind][j][i])) 81 | f.write('\n') 82 | f.close() 83 | 84 | def run_eval(self, results, save_dir): 85 | self.save_results(results, save_dir) 86 | os.system('./tools/kitti_eval/evaluate_object_3d_offline ' + \ 87 | '../data/kitti/training/label_val ' + \ 88 | '{}/results/'.format(save_dir)) 89 | 90 | -------------------------------------------------------------------------------- /src/lib/datasets/dataset/pascal.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | import numpy as np 7 | import torch 8 | import json 9 | import os 10 | 11 | import torch.utils.data as data 12 | 13 | class PascalVOC(data.Dataset): 14 | num_classes = 20 15 | default_resolution = [384, 384] 16 | mean = np.array([0.485, 0.456, 0.406], 17 | dtype=np.float32).reshape(1, 1, 3) 18 | std = np.array([0.229, 0.224, 0.225], 19 | dtype=np.float32).reshape(1, 1, 3) 20 | 21 | def __init__(self, opt, split): 22 | super(PascalVOC, self).__init__() 23 | self.data_dir = os.path.join(opt.data_dir, 'voc') 24 | self.img_dir = os.path.join(self.data_dir, 'images') 25 | _ann_name = {'train': 'trainval0712', 'val': 'test2007'} 26 | self.annot_path = os.path.join( 27 | self.data_dir, 'annotations', 28 | 'pascal_{}.json').format(_ann_name[split]) 29 | self.max_objs = 50 30 | self.class_name = ['__background__', "aeroplane", "bicycle", "bird", "boat", 31 | "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", 32 | "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", 33 | "train", "tvmonitor"] 34 | self._valid_ids = np.arange(1, 21, dtype=np.int32) 35 | self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)} 36 | self._data_rng = np.random.RandomState(123) 37 | self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571], 38 | dtype=np.float32) 39 | self._eig_vec = np.array([ 40 | [-0.58752847, -0.69563484, 0.41340352], 41 | [-0.5832747, 0.00994535, -0.81221408], 42 | [-0.56089297, 0.71832671, 0.41158938] 43 | ], dtype=np.float32) 44 | self.split = split 45 | self.opt = opt 46 | 47 | print('==> initializing pascal {} data.'.format(_ann_name[split])) 48 | self.coco = coco.COCO(self.annot_path) 49 | self.images = sorted(self.coco.getImgIds()) 50 | self.num_samples = len(self.images) 51 | 52 | print('Loaded {} {} samples'.format(split, self.num_samples)) 53 | 54 | def _to_float(self, x): 55 | return float("{:.2f}".format(x)) 56 | 57 | def convert_eval_format(self, all_bboxes): 58 | detections = [[[] for __ in range(self.num_samples)] \ 59 | for _ in range(self.num_classes + 1)] 60 | for i in range(self.num_samples): 61 | img_id = self.images[i] 62 | for j in range(1, self.num_classes + 1): 63 | if isinstance(all_bboxes[img_id][j], np.ndarray): 64 | detections[j][i] = all_bboxes[img_id][j].tolist() 65 | else: 66 | detections[j][i] = all_bboxes[img_id][j] 67 | return detections 68 | 69 | def __len__(self): 70 | return self.num_samples 71 | 72 | def save_results(self, results, save_dir): 73 | json.dump(self.convert_eval_format(results), 74 | open('{}/results.json'.format(save_dir), 'w')) 75 | 76 | def run_eval(self, results, save_dir): 77 | # result_json = os.path.join(save_dir, "results.json") 78 | # detections = self.convert_eval_format(results) 79 | # json.dump(detections, open(result_json, "w")) 80 | self.save_results(results, save_dir) 81 | os.system('python tools/reval.py ' + \ 82 | '{}/results.json'.format(save_dir)) 83 | -------------------------------------------------------------------------------- /src/lib/datasets/dataset_factory.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from .sample.ddd import DddDataset 6 | from .sample.exdet import EXDetDataset 7 | from .sample.ctdet import CTDetDataset 8 | from .sample.multi_pose import MultiPoseDataset 9 | 10 | from .dataset.coco import COCO 11 | from .dataset.pascal import PascalVOC 12 | from .dataset.kitti import KITTI 13 | from .dataset.coco_hp import COCOHP 14 | 15 | 16 | dataset_factory = { 17 | 'coco': COCO, 18 | 'pascal': PascalVOC, 19 | 'kitti': KITTI, 20 | 'coco_hp': COCOHP 21 | } 22 | 23 | _sample_factory = { 24 | 'exdet': EXDetDataset, 25 | 'ctdet': CTDetDataset, 26 | 'ddd': DddDataset, 27 | 'multi_pose': MultiPoseDataset 28 | } 29 | 30 | 31 | def get_dataset(dataset, task): 32 | class Dataset(dataset_factory[dataset], _sample_factory[task]): 33 | pass 34 | return Dataset 35 | 36 | -------------------------------------------------------------------------------- /src/lib/detectors/base_detector.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import cv2 6 | import numpy as np 7 | from progress.bar import Bar 8 | import time 9 | import torch 10 | 11 | from models.model import create_model, load_model 12 | from utils.image import get_affine_transform 13 | from utils.debugger import Debugger 14 | 15 | 16 | class BaseDetector(object): 17 | def __init__(self, opt): 18 | if opt.gpus[0] >= 0: 19 | opt.device = torch.device('cuda') 20 | else: 21 | opt.device = torch.device('cpu') 22 | 23 | print('Creating model...') 24 | self.model = create_model(opt.arch, opt.heads, opt.head_conv) 25 | self.model = load_model(self.model, opt.load_model) 26 | self.model = self.model.to(opt.device) 27 | self.model.eval() 28 | 29 | self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3) 30 | self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3) 31 | self.max_per_image = 100 32 | self.num_classes = opt.num_classes 33 | self.scales = opt.test_scales 34 | self.opt = opt 35 | self.pause = True 36 | 37 | def pre_process(self, image, scale, meta=None): 38 | height, width = image.shape[0:2] 39 | new_height = int(height * scale) 40 | new_width = int(width * scale) 41 | if self.opt.fix_res: 42 | inp_height, inp_width = self.opt.input_h, self.opt.input_w 43 | c = np.array([new_width / 2., new_height / 2.], dtype=np.float32) 44 | s = max(height, width) * 1.0 45 | else: 46 | inp_height = (new_height | self.opt.pad) + 1 47 | inp_width = (new_width | self.opt.pad) + 1 48 | c = np.array([new_width // 2, new_height // 2], dtype=np.float32) 49 | s = np.array([inp_width, inp_height], dtype=np.float32) 50 | 51 | trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height]) 52 | resized_image = cv2.resize(image, (new_width, new_height)) 53 | inp_image = cv2.warpAffine( 54 | resized_image, trans_input, (inp_width, inp_height), 55 | flags=cv2.INTER_LINEAR) 56 | inp_image = ((inp_image / 255. - self.mean) / self.std).astype(np.float32) 57 | 58 | images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height, inp_width) 59 | if self.opt.flip_test: 60 | images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) 61 | images = torch.from_numpy(images) 62 | meta = {'c': c, 's': s, 63 | 'out_height': inp_height // self.opt.down_ratio, 64 | 'out_width': inp_width // self.opt.down_ratio} 65 | return images, meta 66 | 67 | def process(self, images, return_time=False): 68 | raise NotImplementedError 69 | 70 | def post_process(self, dets, meta, scale=1): 71 | raise NotImplementedError 72 | 73 | def merge_outputs(self, detections): 74 | raise NotImplementedError 75 | 76 | def debug(self, debugger, images, dets, output, scale=1): 77 | raise NotImplementedError 78 | 79 | def show_results(self, debugger, image, results): 80 | raise NotImplementedError 81 | 82 | def run(self, image_or_path_or_tensor, meta=None): 83 | load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0 84 | merge_time, tot_time = 0, 0 85 | debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug==3), 86 | theme=self.opt.debugger_theme) 87 | start_time = time.time() 88 | pre_processed = False 89 | if isinstance(image_or_path_or_tensor, np.ndarray): 90 | image = image_or_path_or_tensor 91 | elif type(image_or_path_or_tensor) == type (''): 92 | image = cv2.imread(image_or_path_or_tensor) 93 | else: 94 | image = image_or_path_or_tensor['image'][0].numpy() 95 | pre_processed_images = image_or_path_or_tensor 96 | pre_processed = True 97 | 98 | loaded_time = time.time() 99 | load_time += (loaded_time - start_time) 100 | 101 | detections = [] 102 | for scale in self.scales: 103 | scale_start_time = time.time() 104 | if not pre_processed: 105 | images, meta = self.pre_process(image, scale, meta) 106 | else: 107 | # import pdb; pdb.set_trace() 108 | images = pre_processed_images['images'][scale][0] 109 | meta = pre_processed_images['meta'][scale] 110 | meta = {k: v.numpy()[0] for k, v in meta.items()} 111 | images = images.to(self.opt.device) 112 | torch.cuda.synchronize() 113 | pre_process_time = time.time() 114 | pre_time += pre_process_time - scale_start_time 115 | 116 | output, dets, forward_time = self.process(images, return_time=True) 117 | 118 | torch.cuda.synchronize() 119 | net_time += forward_time - pre_process_time 120 | decode_time = time.time() 121 | dec_time += decode_time - forward_time 122 | 123 | if self.opt.debug >= 2: 124 | self.debug(debugger, images, dets, output, scale) 125 | 126 | dets = self.post_process(dets, meta, scale) 127 | torch.cuda.synchronize() 128 | post_process_time = time.time() 129 | post_time += post_process_time - decode_time 130 | 131 | detections.append(dets) 132 | 133 | results = self.merge_outputs(detections) 134 | torch.cuda.synchronize() 135 | end_time = time.time() 136 | merge_time += end_time - post_process_time 137 | tot_time += end_time - start_time 138 | 139 | if self.opt.debug >= 1: 140 | self.show_results(debugger, image, results) 141 | 142 | return {'results': results, 'tot': tot_time, 'load': load_time, 143 | 'pre': pre_time, 'net': net_time, 'dec': dec_time, 144 | 'post': post_time, 'merge': merge_time} -------------------------------------------------------------------------------- /src/lib/detectors/ctdet.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import cv2 6 | import numpy as np 7 | from progress.bar import Bar 8 | import time 9 | import torch 10 | 11 | try: 12 | from external.nms import soft_nms 13 | except: 14 | print('NMS not imported! If you need it,' 15 | ' do \n cd $CenterNet_ROOT/src/lib/external \n make') 16 | from models.decode import ctdet_decode 17 | from models.utils import flip_tensor 18 | from utils.image import get_affine_transform 19 | from utils.post_process import ctdet_post_process 20 | from utils.debugger import Debugger 21 | 22 | from .base_detector import BaseDetector 23 | 24 | class CtdetDetector(BaseDetector): 25 | def __init__(self, opt): 26 | super(CtdetDetector, self).__init__(opt) 27 | 28 | def process(self, images, return_time=False): 29 | with torch.no_grad(): 30 | output = self.model(images)[-1] 31 | hm = output['hm'].sigmoid_() 32 | wh = output['wh'] 33 | reg = output['reg'] if self.opt.reg_offset else None 34 | if self.opt.flip_test: 35 | hm = (hm[0:1] + flip_tensor(hm[1:2])) / 2 36 | wh = (wh[0:1] + flip_tensor(wh[1:2])) / 2 37 | reg = reg[0:1] if reg is not None else None 38 | torch.cuda.synchronize() 39 | forward_time = time.time() 40 | dets = ctdet_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) 41 | 42 | if return_time: 43 | return output, dets, forward_time 44 | else: 45 | return output, dets 46 | 47 | def post_process(self, dets, meta, scale=1): 48 | dets = dets.detach().cpu().numpy() 49 | dets = dets.reshape(1, -1, dets.shape[2]) 50 | dets = ctdet_post_process( 51 | dets.copy(), [meta['c']], [meta['s']], 52 | meta['out_height'], meta['out_width'], self.opt.num_classes) 53 | for j in range(1, self.num_classes + 1): 54 | dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 5) 55 | dets[0][j][:, :4] /= scale 56 | return dets[0] 57 | 58 | def merge_outputs(self, detections): 59 | results = {} 60 | for j in range(1, self.num_classes + 1): 61 | results[j] = np.concatenate( 62 | [detection[j] for detection in detections], axis=0).astype(np.float32) 63 | if len(self.scales) > 1 or self.opt.nms: 64 | soft_nms(results[j], Nt=0.5, method=2) 65 | scores = np.hstack( 66 | [results[j][:, 4] for j in range(1, self.num_classes + 1)]) 67 | if len(scores) > self.max_per_image: 68 | kth = len(scores) - self.max_per_image 69 | thresh = np.partition(scores, kth)[kth] 70 | for j in range(1, self.num_classes + 1): 71 | keep_inds = (results[j][:, 4] >= thresh) 72 | results[j] = results[j][keep_inds] 73 | return results 74 | 75 | def debug(self, debugger, images, dets, output, scale=1): 76 | detection = dets.detach().cpu().numpy().copy() 77 | detection[:, :, :4] *= self.opt.down_ratio 78 | for i in range(1): 79 | img = images[i].detach().cpu().numpy().transpose(1, 2, 0) 80 | img = ((img * self.std + self.mean) * 255).astype(np.uint8) 81 | pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy()) 82 | debugger.add_blend_img(img, pred, 'pred_hm_{:.1f}'.format(scale)) 83 | debugger.add_img(img, img_id='out_pred_{:.1f}'.format(scale)) 84 | for k in range(len(dets[i])): 85 | if detection[i, k, 4] > self.opt.center_thresh: 86 | debugger.add_coco_bbox(detection[i, k, :4], detection[i, k, -1], 87 | detection[i, k, 4], 88 | img_id='out_pred_{:.1f}'.format(scale)) 89 | 90 | def show_results(self, debugger, image, results): 91 | debugger.add_img(image, img_id='ctdet') 92 | for j in range(1, self.num_classes + 1): 93 | for bbox in results[j]: 94 | if bbox[4] > self.opt.vis_thresh: 95 | debugger.add_coco_bbox(bbox[:4], j - 1, bbox[4], img_id='ctdet') 96 | debugger.show_all_imgs(pause=self.pause) 97 | -------------------------------------------------------------------------------- /src/lib/detectors/ddd.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import cv2 6 | import numpy as np 7 | from progress.bar import Bar 8 | import time 9 | import torch 10 | 11 | 12 | from models.decode import ddd_decode 13 | from models.utils import flip_tensor 14 | from utils.image import get_affine_transform 15 | from utils.post_process import ddd_post_process 16 | from utils.debugger import Debugger 17 | from utils.ddd_utils import compute_box_3d, project_to_image, alpha2rot_y 18 | from utils.ddd_utils import draw_box_3d, unproject_2d_to_3d 19 | 20 | from .base_detector import BaseDetector 21 | 22 | class DddDetector(BaseDetector): 23 | def __init__(self, opt): 24 | super(DddDetector, self).__init__(opt) 25 | self.calib = np.array([[707.0493, 0, 604.0814, 45.75831], 26 | [0, 707.0493, 180.5066, -0.3454157], 27 | [0, 0, 1., 0.004981016]], dtype=np.float32) 28 | 29 | 30 | def pre_process(self, image, scale, calib=None): 31 | height, width = image.shape[0:2] 32 | 33 | inp_height, inp_width = self.opt.input_h, self.opt.input_w 34 | c = np.array([width / 2, height / 2], dtype=np.float32) 35 | if self.opt.keep_res: 36 | s = np.array([inp_width, inp_height], dtype=np.int32) 37 | else: 38 | s = np.array([width, height], dtype=np.int32) 39 | 40 | trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height]) 41 | resized_image = image #cv2.resize(image, (width, height)) 42 | inp_image = cv2.warpAffine( 43 | resized_image, trans_input, (inp_width, inp_height), 44 | flags=cv2.INTER_LINEAR) 45 | inp_image = (inp_image.astype(np.float32) / 255.) 46 | inp_image = (inp_image - self.mean) / self.std 47 | images = inp_image.transpose(2, 0, 1)[np.newaxis, ...] 48 | calib = np.array(calib, dtype=np.float32) if calib is not None \ 49 | else self.calib 50 | images = torch.from_numpy(images) 51 | meta = {'c': c, 's': s, 52 | 'out_height': inp_height // self.opt.down_ratio, 53 | 'out_width': inp_width // self.opt.down_ratio, 54 | 'calib': calib} 55 | return images, meta 56 | 57 | def process(self, images, return_time=False): 58 | with torch.no_grad(): 59 | torch.cuda.synchronize() 60 | output = self.model(images)[-1] 61 | output['hm'] = output['hm'].sigmoid_() 62 | output['dep'] = 1. / (output['dep'].sigmoid() + 1e-6) - 1. 63 | wh = output['wh'] if self.opt.reg_bbox else None 64 | reg = output['reg'] if self.opt.reg_offset else None 65 | torch.cuda.synchronize() 66 | forward_time = time.time() 67 | 68 | dets = ddd_decode(output['hm'], output['rot'], output['dep'], 69 | output['dim'], wh=wh, reg=reg, K=self.opt.K) 70 | if return_time: 71 | return output, dets, forward_time 72 | else: 73 | return output, dets 74 | 75 | def post_process(self, dets, meta, scale=1): 76 | dets = dets.detach().cpu().numpy() 77 | detections = ddd_post_process( 78 | dets.copy(), [meta['c']], [meta['s']], [meta['calib']], self.opt) 79 | self.this_calib = meta['calib'] 80 | return detections[0] 81 | 82 | def merge_outputs(self, detections): 83 | results = detections[0] 84 | for j in range(1, self.num_classes + 1): 85 | if len(results[j] > 0): 86 | keep_inds = (results[j][:, -1] > self.opt.peak_thresh) 87 | results[j] = results[j][keep_inds] 88 | return results 89 | 90 | def debug(self, debugger, images, dets, output, scale=1): 91 | dets = dets.detach().cpu().numpy() 92 | img = images[0].detach().cpu().numpy().transpose(1, 2, 0) 93 | img = ((img * self.std + self.mean) * 255).astype(np.uint8) 94 | pred = debugger.gen_colormap(output['hm'][0].detach().cpu().numpy()) 95 | debugger.add_blend_img(img, pred, 'pred_hm') 96 | debugger.add_ct_detection( 97 | img, dets[0], show_box=self.opt.reg_bbox, 98 | center_thresh=self.opt.vis_thresh, img_id='det_pred') 99 | 100 | def show_results(self, debugger, image, results): 101 | debugger.add_3d_detection( 102 | image, results, self.this_calib, 103 | center_thresh=self.opt.vis_thresh, img_id='add_pred') 104 | debugger.add_bird_view( 105 | results, center_thresh=self.opt.vis_thresh, img_id='bird_pred') 106 | debugger.show_all_imgs(pause=self.pause) -------------------------------------------------------------------------------- /src/lib/detectors/detector_factory.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from .exdet import ExdetDetector 6 | from .ddd import DddDetector 7 | from .ctdet import CtdetDetector 8 | from .multi_pose import MultiPoseDetector 9 | 10 | detector_factory = { 11 | 'exdet': ExdetDetector, 12 | 'ddd': DddDetector, 13 | 'ctdet': CtdetDetector, 14 | 'multi_pose': MultiPoseDetector, 15 | } 16 | -------------------------------------------------------------------------------- /src/lib/detectors/exdet.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import _init_paths 6 | 7 | import os 8 | 9 | import cv2 10 | import numpy as np 11 | from progress.bar import Bar 12 | import time 13 | import torch 14 | 15 | from models.decode import exct_decode, agnex_ct_decode 16 | from models.utils import flip_tensor 17 | from utils.image import get_affine_transform, transform_preds 18 | from utils.post_process import ctdet_post_process 19 | from utils.debugger import Debugger 20 | 21 | from .base_detector import BaseDetector 22 | 23 | class ExdetDetector(BaseDetector): 24 | def __init__(self, opt): 25 | super(ExdetDetector, self).__init__(opt) 26 | self.decode = agnex_ct_decode if opt.agnostic_ex else exct_decode 27 | 28 | def process(self, images, return_time=False): 29 | with torch.no_grad(): 30 | torch.cuda.synchronize() 31 | output = self.model(images)[-1] 32 | t_heat = output['hm_t'].sigmoid_() 33 | l_heat = output['hm_l'].sigmoid_() 34 | b_heat = output['hm_b'].sigmoid_() 35 | r_heat = output['hm_r'].sigmoid_() 36 | c_heat = output['hm_c'].sigmoid_() 37 | torch.cuda.synchronize() 38 | forward_time = time.time() 39 | if self.opt.reg_offset: 40 | dets = self.decode(t_heat, l_heat, b_heat, r_heat, c_heat, 41 | output['reg_t'], output['reg_l'], 42 | output['reg_b'], output['reg_r'], 43 | K=self.opt.K, 44 | scores_thresh=self.opt.scores_thresh, 45 | center_thresh=self.opt.center_thresh, 46 | aggr_weight=self.opt.aggr_weight) 47 | else: 48 | dets = self.decode(t_heat, l_heat, b_heat, r_heat, c_heat, K=self.opt.K, 49 | scores_thresh=self.opt.scores_thresh, 50 | center_thresh=self.opt.center_thresh, 51 | aggr_weight=self.opt.aggr_weight) 52 | if return_time: 53 | return output, dets, forward_time 54 | else: 55 | return output, dets 56 | 57 | def debug(self, debugger, images, dets, output, scale=1): 58 | detection = dets.detach().cpu().numpy().copy() 59 | detection[:, :, :4] *= self.opt.down_ratio 60 | for i in range(1): 61 | inp_height, inp_width = images.shape[2], images.shape[3] 62 | pred_hm = np.zeros((inp_height, inp_width, 3), dtype=np.uint8) 63 | img = images[i].detach().cpu().numpy().transpose(1, 2, 0) 64 | img = ((img * self.std + self.mean) * 255).astype(np.uint8) 65 | parts = ['t', 'l', 'b', 'r', 'c'] 66 | for p in parts: 67 | tag = 'hm_{}'.format(p) 68 | pred = debugger.gen_colormap( 69 | output[tag][i].detach().cpu().numpy(), (inp_height, inp_width)) 70 | if p != 'c': 71 | pred_hm = np.maximum(pred_hm, pred) 72 | else: 73 | debugger.add_blend_img( 74 | img, pred, 'pred_{}_{:.1f}'.format(p, scale)) 75 | debugger.add_blend_img(img, pred_hm, 'pred_{:.1f}'.format(scale)) 76 | debugger.add_img(img, img_id='out_{:.1f}'.format(scale)) 77 | for k in range(len(detection[i])): 78 | # print('detection', detection[i, k, 4], detection[i, k]) 79 | if detection[i, k, 4] > 0.01: 80 | # print('detection', detection[i, k, 4], detection[i, k]) 81 | debugger.add_coco_bbox(detection[i, k, :4], detection[i, k, -1], 82 | detection[i, k, 4], 83 | img_id='out_{:.1f}'.format(scale)) 84 | 85 | def post_process(self, dets, meta, scale=1): 86 | out_width, out_height = meta['out_width'], meta['out_height'] 87 | dets = dets.detach().cpu().numpy().reshape(2, -1, 14) 88 | dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] 89 | dets = dets.reshape(1, -1, 14) 90 | dets[0, :, 0:2] = transform_preds( 91 | dets[0, :, 0:2], meta['c'], meta['s'], (out_width, out_height)) 92 | dets[0, :, 2:4] = transform_preds( 93 | dets[0, :, 2:4], meta['c'], meta['s'], (out_width, out_height)) 94 | dets[:, :, 0:4] /= scale 95 | return dets[0] 96 | 97 | def merge_outputs(self, detections): 98 | detections = np.concatenate( 99 | [detection for detection in detections], axis=0).astype(np.float32) 100 | classes = detections[..., -1] 101 | keep_inds = (detections[:, 4] > 0) 102 | detections = detections[keep_inds] 103 | classes = classes[keep_inds] 104 | 105 | results = {} 106 | for j in range(self.num_classes): 107 | keep_inds = (classes == j) 108 | results[j + 1] = detections[keep_inds][:, 0:7].astype(np.float32) 109 | soft_nms(results[j + 1], Nt=0.5, method=2) 110 | results[j + 1] = results[j + 1][:, 0:5] 111 | 112 | scores = np.hstack([ 113 | results[j][:, -1] 114 | for j in range(1, self.num_classes + 1) 115 | ]) 116 | if len(scores) > self.max_per_image: 117 | kth = len(scores) - self.max_per_image 118 | thresh = np.partition(scores, kth)[kth] 119 | for j in range(1, self.num_classes + 1): 120 | keep_inds = (results[j][:, -1] >= thresh) 121 | results[j] = results[j][keep_inds] 122 | return results 123 | 124 | 125 | def show_results(self, debugger, image, results): 126 | debugger.add_img(image, img_id='exdet') 127 | for j in range(1, self.num_classes + 1): 128 | for bbox in results[j]: 129 | if bbox[4] > self.opt.vis_thresh: 130 | debugger.add_coco_bbox(bbox[:4], j - 1, bbox[4], img_id='exdet') 131 | debugger.show_all_imgs(pause=self.pause) 132 | -------------------------------------------------------------------------------- /src/lib/detectors/multi_pose.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import cv2 6 | import numpy as np 7 | from progress.bar import Bar 8 | import time 9 | import torch 10 | 11 | try: 12 | from external.nms import soft_nms_39 13 | except: 14 | print('NMS not imported! If you need it,' 15 | ' do \n cd $CenterNet_ROOT/src/lib/external \n make') 16 | from models.decode import multi_pose_decode 17 | from models.utils import flip_tensor, flip_lr_off, flip_lr 18 | from utils.image import get_affine_transform 19 | from utils.post_process import multi_pose_post_process 20 | from utils.debugger import Debugger 21 | 22 | from .base_detector import BaseDetector 23 | 24 | class MultiPoseDetector(BaseDetector): 25 | def __init__(self, opt): 26 | super(MultiPoseDetector, self).__init__(opt) 27 | self.flip_idx = opt.flip_idx 28 | 29 | def process(self, images, return_time=False): 30 | with torch.no_grad(): 31 | torch.cuda.synchronize() 32 | output = self.model(images)[-1] 33 | output['hm'] = output['hm'].sigmoid_() 34 | if self.opt.hm_hp and not self.opt.mse_loss: 35 | output['hm_hp'] = output['hm_hp'].sigmoid_() 36 | 37 | reg = output['reg'] if self.opt.reg_offset else None 38 | hm_hp = output['hm_hp'] if self.opt.hm_hp else None 39 | hp_offset = output['hp_offset'] if self.opt.reg_hp_offset else None 40 | torch.cuda.synchronize() 41 | forward_time = time.time() 42 | 43 | if self.opt.flip_test: 44 | output['hm'] = (output['hm'][0:1] + flip_tensor(output['hm'][1:2])) / 2 45 | output['wh'] = (output['wh'][0:1] + flip_tensor(output['wh'][1:2])) / 2 46 | output['hps'] = (output['hps'][0:1] + 47 | flip_lr_off(output['hps'][1:2], self.flip_idx)) / 2 48 | hm_hp = (hm_hp[0:1] + flip_lr(hm_hp[1:2], self.flip_idx)) / 2 \ 49 | if hm_hp is not None else None 50 | reg = reg[0:1] if reg is not None else None 51 | hp_offset = hp_offset[0:1] if hp_offset is not None else None 52 | 53 | dets = multi_pose_decode( 54 | output['hm'], output['wh'], output['hps'], 55 | reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=self.opt.K) 56 | 57 | if return_time: 58 | return output, dets, forward_time 59 | else: 60 | return output, dets 61 | 62 | def post_process(self, dets, meta, scale=1): 63 | dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) 64 | dets = multi_pose_post_process( 65 | dets.copy(), [meta['c']], [meta['s']], 66 | meta['out_height'], meta['out_width']) 67 | for j in range(1, self.num_classes + 1): 68 | dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 39) 69 | # import pdb; pdb.set_trace() 70 | dets[0][j][:, :4] /= scale 71 | dets[0][j][:, 5:] /= scale 72 | return dets[0] 73 | 74 | def merge_outputs(self, detections): 75 | results = {} 76 | results[1] = np.concatenate( 77 | [detection[1] for detection in detections], axis=0).astype(np.float32) 78 | if self.opt.nms or len(self.opt.test_scales) > 1: 79 | soft_nms_39(results[1], Nt=0.5, method=2) 80 | results[1] = results[1].tolist() 81 | return results 82 | 83 | def debug(self, debugger, images, dets, output, scale=1): 84 | dets = dets.detach().cpu().numpy().copy() 85 | dets[:, :, :4] *= self.opt.down_ratio 86 | dets[:, :, 5:39] *= self.opt.down_ratio 87 | img = images[0].detach().cpu().numpy().transpose(1, 2, 0) 88 | img = np.clip((( 89 | img * self.std + self.mean) * 255.), 0, 255).astype(np.uint8) 90 | pred = debugger.gen_colormap(output['hm'][0].detach().cpu().numpy()) 91 | debugger.add_blend_img(img, pred, 'pred_hm') 92 | if self.opt.hm_hp: 93 | pred = debugger.gen_colormap_hp( 94 | output['hm_hp'][0].detach().cpu().numpy()) 95 | debugger.add_blend_img(img, pred, 'pred_hmhp') 96 | 97 | def show_results(self, debugger, image, results): 98 | debugger.add_img(image, img_id='multi_pose') 99 | for bbox in results[1]: 100 | if bbox[4] > self.opt.vis_thresh: 101 | debugger.add_coco_bbox(bbox[:4], 0, bbox[4], img_id='multi_pose') 102 | debugger.add_coco_hp(bbox[5:39], img_id='multi_pose') 103 | debugger.show_all_imgs(pause=self.pause) -------------------------------------------------------------------------------- /src/lib/external/.gitignore: -------------------------------------------------------------------------------- 1 | bbox.c 2 | bbox.cpython-35m-x86_64-linux-gnu.so 3 | bbox.cpython-36m-x86_64-linux-gnu.so 4 | 5 | nms.c 6 | nms.cpython-35m-x86_64-linux-gnu.so 7 | nms.cpython-36m-x86_64-linux-gnu.so 8 | -------------------------------------------------------------------------------- /src/lib/external/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | -------------------------------------------------------------------------------- /src/lib/external/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/src/lib/external/__init__.py -------------------------------------------------------------------------------- /src/lib/external/setup.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from distutils.core import setup 3 | from distutils.extension import Extension 4 | from Cython.Build import cythonize 5 | 6 | extensions = [ 7 | Extension( 8 | "nms", 9 | ["nms.pyx"], 10 | extra_compile_args=["-Wno-cpp", "-Wno-unused-function"] 11 | ) 12 | ] 13 | 14 | setup( 15 | name="coco", 16 | ext_modules=cythonize(extensions), 17 | include_dirs=[numpy.get_include()] 18 | ) 19 | -------------------------------------------------------------------------------- /src/lib/logger.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 6 | import os 7 | import time 8 | import sys 9 | import torch 10 | USE_TENSORBOARD = True 11 | try: 12 | import tensorboardX 13 | print('Using tensorboardX') 14 | except: 15 | USE_TENSORBOARD = False 16 | 17 | class Logger(object): 18 | def __init__(self, opt): 19 | """Create a summary writer logging to log_dir.""" 20 | if not os.path.exists(opt.save_dir): 21 | os.makedirs(opt.save_dir) 22 | if not os.path.exists(opt.debug_dir): 23 | os.makedirs(opt.debug_dir) 24 | 25 | time_str = time.strftime('%Y-%m-%d-%H-%M') 26 | 27 | args = dict((name, getattr(opt, name)) for name in dir(opt) 28 | if not name.startswith('_')) 29 | file_name = os.path.join(opt.save_dir, 'opt.txt') 30 | with open(file_name, 'wt') as opt_file: 31 | opt_file.write('==> torch version: {}\n'.format(torch.__version__)) 32 | opt_file.write('==> cudnn version: {}\n'.format( 33 | torch.backends.cudnn.version())) 34 | opt_file.write('==> Cmd:\n') 35 | opt_file.write(str(sys.argv)) 36 | opt_file.write('\n==> Opt:\n') 37 | for k, v in sorted(args.items()): 38 | opt_file.write(' %s: %s\n' % (str(k), str(v))) 39 | 40 | log_dir = opt.save_dir + '/logs_{}'.format(time_str) 41 | if USE_TENSORBOARD: 42 | self.writer = tensorboardX.SummaryWriter(log_dir=log_dir) 43 | else: 44 | if not os.path.exists(os.path.dirname(log_dir)): 45 | os.mkdir(os.path.dirname(log_dir)) 46 | if not os.path.exists(log_dir): 47 | os.mkdir(log_dir) 48 | self.log = open(log_dir + '/log.txt', 'w') 49 | try: 50 | os.system('cp {}/opt.txt {}/'.format(opt.save_dir, log_dir)) 51 | except: 52 | pass 53 | self.start_line = True 54 | 55 | def write(self, txt): 56 | if self.start_line: 57 | time_str = time.strftime('%Y-%m-%d-%H-%M') 58 | self.log.write('{}: {}'.format(time_str, txt)) 59 | else: 60 | self.log.write(txt) 61 | self.start_line = False 62 | if '\n' in txt: 63 | self.start_line = True 64 | self.log.flush() 65 | 66 | def close(self): 67 | self.log.close() 68 | 69 | def scalar_summary(self, tag, value, step): 70 | """Log a scalar variable.""" 71 | if USE_TENSORBOARD: 72 | self.writer.add_scalar(tag, value, step) 73 | -------------------------------------------------------------------------------- /src/lib/models/data_parallel.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.modules import Module 3 | from torch.nn.parallel.scatter_gather import gather 4 | from torch.nn.parallel.replicate import replicate 5 | from torch.nn.parallel.parallel_apply import parallel_apply 6 | 7 | 8 | from .scatter_gather import scatter_kwargs 9 | 10 | class _DataParallel(Module): 11 | r"""Implements data parallelism at the module level. 12 | 13 | This container parallelizes the application of the given module by 14 | splitting the input across the specified devices by chunking in the batch 15 | dimension. In the forward pass, the module is replicated on each device, 16 | and each replica handles a portion of the input. During the backwards 17 | pass, gradients from each replica are summed into the original module. 18 | 19 | The batch size should be larger than the number of GPUs used. It should 20 | also be an integer multiple of the number of GPUs so that each chunk is the 21 | same size (so that each GPU processes the same number of samples). 22 | 23 | See also: :ref:`cuda-nn-dataparallel-instead` 24 | 25 | Arbitrary positional and keyword inputs are allowed to be passed into 26 | DataParallel EXCEPT Tensors. All variables will be scattered on dim 27 | specified (default 0). Primitive types will be broadcasted, but all 28 | other types will be a shallow copy and can be corrupted if written to in 29 | the model's forward pass. 30 | 31 | Args: 32 | module: module to be parallelized 33 | device_ids: CUDA devices (default: all devices) 34 | output_device: device location of output (default: device_ids[0]) 35 | 36 | Example:: 37 | 38 | >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2]) 39 | >>> output = net(input_var) 40 | """ 41 | 42 | # TODO: update notes/cuda.rst when this class handles 8+ GPUs well 43 | 44 | def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None): 45 | super(_DataParallel, self).__init__() 46 | 47 | if not torch.cuda.is_available(): 48 | self.module = module 49 | self.device_ids = [] 50 | return 51 | 52 | if device_ids is None: 53 | device_ids = list(range(torch.cuda.device_count())) 54 | if output_device is None: 55 | output_device = device_ids[0] 56 | self.dim = dim 57 | self.module = module 58 | self.device_ids = device_ids 59 | self.chunk_sizes = chunk_sizes 60 | self.output_device = output_device 61 | if len(self.device_ids) == 1: 62 | self.module.cuda(device_ids[0]) 63 | 64 | def forward(self, *inputs, **kwargs): 65 | if not self.device_ids: 66 | return self.module(*inputs, **kwargs) 67 | inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes) 68 | if len(self.device_ids) == 1: 69 | return self.module(*inputs[0], **kwargs[0]) 70 | replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) 71 | outputs = self.parallel_apply(replicas, inputs, kwargs) 72 | return self.gather(outputs, self.output_device) 73 | 74 | def replicate(self, module, device_ids): 75 | return replicate(module, device_ids) 76 | 77 | def scatter(self, inputs, kwargs, device_ids, chunk_sizes): 78 | return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes) 79 | 80 | def parallel_apply(self, replicas, inputs, kwargs): 81 | return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)]) 82 | 83 | def gather(self, outputs, output_device): 84 | return gather(outputs, output_device, dim=self.dim) 85 | 86 | 87 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None): 88 | r"""Evaluates module(input) in parallel across the GPUs given in device_ids. 89 | 90 | This is the functional version of the DataParallel module. 91 | 92 | Args: 93 | module: the module to evaluate in parallel 94 | inputs: inputs to the module 95 | device_ids: GPU ids on which to replicate module 96 | output_device: GPU location of the output Use -1 to indicate the CPU. 97 | (default: device_ids[0]) 98 | Returns: 99 | a Variable containing the result of module(input) located on 100 | output_device 101 | """ 102 | if not isinstance(inputs, tuple): 103 | inputs = (inputs,) 104 | 105 | if device_ids is None: 106 | device_ids = list(range(torch.cuda.device_count())) 107 | 108 | if output_device is None: 109 | output_device = device_ids[0] 110 | 111 | inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim) 112 | if len(device_ids) == 1: 113 | return module(*inputs[0], **module_kwargs[0]) 114 | used_device_ids = device_ids[:len(inputs)] 115 | replicas = replicate(module, used_device_ids) 116 | outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids) 117 | return gather(outputs, output_device, dim) 118 | 119 | def DataParallel(module, device_ids=None, output_device=None, dim=0, chunk_sizes=None): 120 | if chunk_sizes is None: 121 | return torch.nn.DataParallel(module, device_ids, output_device, dim) 122 | standard_size = True 123 | for i in range(1, len(chunk_sizes)): 124 | if chunk_sizes[i] != chunk_sizes[0]: 125 | standard_size = False 126 | if standard_size: 127 | return torch.nn.DataParallel(module, device_ids, output_device, dim) 128 | return _DataParallel(module, device_ids, output_device, dim, chunk_sizes) -------------------------------------------------------------------------------- /src/lib/models/model.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torchvision.models as models 6 | import torch 7 | import torch.nn as nn 8 | import os 9 | 10 | from .networks.msra_resnet import get_pose_net 11 | from .networks.dlav0 import get_pose_net as get_dlav0 12 | from .networks.pose_dla_dcn import get_pose_net as get_dla_dcn 13 | from .networks.resnet_dcn import get_pose_net as get_pose_net_dcn 14 | from .networks.large_hourglass import get_large_hourglass_net 15 | 16 | _model_factory = { 17 | 'res': get_pose_net, # default Resnet with deconv 18 | 'dlav0': get_dlav0, # default DLAup 19 | 'dla': get_dla_dcn, 20 | 'resdcn': get_pose_net_dcn, 21 | 'hourglass': get_large_hourglass_net, 22 | } 23 | 24 | def create_model(arch, heads, head_conv): 25 | num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0 26 | arch = arch[:arch.find('_')] if '_' in arch else arch 27 | get_model = _model_factory[arch] 28 | model = get_model(num_layers=num_layers, heads=heads, head_conv=head_conv) 29 | return model 30 | 31 | def load_model(model, model_path, optimizer=None, resume=False, 32 | lr=None, lr_step=None): 33 | start_epoch = 0 34 | checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) 35 | print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch'])) 36 | state_dict_ = checkpoint['state_dict'] 37 | state_dict = {} 38 | 39 | # convert data_parallal to model 40 | for k in state_dict_: 41 | if k.startswith('module') and not k.startswith('module_list'): 42 | state_dict[k[7:]] = state_dict_[k] 43 | else: 44 | state_dict[k] = state_dict_[k] 45 | model_state_dict = model.state_dict() 46 | 47 | # check loaded parameters and created model parameters 48 | msg = 'If you see this, your model does not fully load the ' + \ 49 | 'pre-trained weight. Please make sure ' + \ 50 | 'you have correctly specified --arch xxx ' + \ 51 | 'or set the correct --num_classes for your own dataset.' 52 | for k in state_dict: 53 | if k in model_state_dict: 54 | if state_dict[k].shape != model_state_dict[k].shape: 55 | print('Skip loading parameter {}, required shape{}, '\ 56 | 'loaded shape{}. {}'.format( 57 | k, model_state_dict[k].shape, state_dict[k].shape, msg)) 58 | state_dict[k] = model_state_dict[k] 59 | else: 60 | print('Drop parameter {}.'.format(k) + msg) 61 | for k in model_state_dict: 62 | if not (k in state_dict): 63 | print('No param {}.'.format(k) + msg) 64 | state_dict[k] = model_state_dict[k] 65 | model.load_state_dict(state_dict, strict=False) 66 | 67 | # resume optimizer parameters 68 | if optimizer is not None and resume: 69 | if 'optimizer' in checkpoint: 70 | optimizer.load_state_dict(checkpoint['optimizer']) 71 | start_epoch = checkpoint['epoch'] 72 | start_lr = lr 73 | for step in lr_step: 74 | if start_epoch >= step: 75 | start_lr *= 0.1 76 | for param_group in optimizer.param_groups: 77 | param_group['lr'] = start_lr 78 | print('Resumed optimizer with start lr', start_lr) 79 | else: 80 | print('No optimizer parameters in checkpoint.') 81 | if optimizer is not None: 82 | return model, optimizer, start_epoch 83 | else: 84 | return model 85 | 86 | def save_model(path, epoch, model, optimizer=None): 87 | if isinstance(model, torch.nn.DataParallel): 88 | state_dict = model.module.state_dict() 89 | else: 90 | state_dict = model.state_dict() 91 | data = {'epoch': epoch, 92 | 'state_dict': state_dict} 93 | if not (optimizer is None): 94 | data['optimizer'] = optimizer.state_dict() 95 | torch.save(data, path) 96 | 97 | -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | .idea 3 | *.so 4 | *.o 5 | *pyc 6 | _ext -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, Charles Shang 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/README.md: -------------------------------------------------------------------------------- 1 | ## Deformable Convolutional Networks V2 with Pytorch 2 | 3 | ### Build 4 | ```bash 5 | ./make.sh # build 6 | python test.py # run examples and gradient check 7 | ``` 8 | 9 | ### An Example 10 | - deformable conv 11 | ```python 12 | from dcn_v2 import DCN 13 | input = torch.randn(2, 64, 128, 128).cuda() 14 | # wrap all things (offset and mask) in DCN 15 | dcn = DCN(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2).cuda() 16 | output = dcn(input) 17 | print(output.shape) 18 | ``` 19 | - deformable roi pooling 20 | ```python 21 | from dcn_v2 import DCNPooling 22 | input = torch.randn(2, 32, 64, 64).cuda() 23 | batch_inds = torch.randint(2, (20, 1)).cuda().float() 24 | x = torch.randint(256, (20, 1)).cuda().float() 25 | y = torch.randint(256, (20, 1)).cuda().float() 26 | w = torch.randint(64, (20, 1)).cuda().float() 27 | h = torch.randint(64, (20, 1)).cuda().float() 28 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 29 | 30 | # mdformable pooling (V2) 31 | # wrap all things (offset and mask) in DCNPooling 32 | dpooling = DCNPooling(spatial_scale=1.0 / 4, 33 | pooled_size=7, 34 | output_dim=32, 35 | no_trans=False, 36 | group_size=1, 37 | trans_std=0.1).cuda() 38 | 39 | dout = dpooling(input, rois) 40 | ``` 41 | 42 | ### Known Issues: 43 | 44 | - [x] Gradient check w.r.t offset (solved) 45 | - [ ] Backward is not reentrant (minor) 46 | 47 | This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op). 48 | 49 | I have ran the gradient check for many times with DOUBLE type. Every tensor **except offset** passes. 50 | However, when I set the offset to 0.5, it passes. I'm still wondering what cause this problem. Is it because some 51 | non-differential points? 52 | 53 | Update: all gradient check passes with double precision. 54 | 55 | Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for 56 | float `<1e-15` for double), 57 | so it may not be a serious problem (?) 58 | 59 | Please post an issue or PR if you have any comments. 60 | -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/src/lib/models/networks/DCNv2/__init__.py -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = ['src/dcn_v2.c'] 7 | headers = ['src/dcn_v2.h'] 8 | defines = [] 9 | with_cuda = False 10 | 11 | extra_objects = [] 12 | if torch.cuda.is_available(): 13 | print('Including CUDA code.') 14 | sources += ['src/dcn_v2_cuda.c'] 15 | headers += ['src/dcn_v2_cuda.h'] 16 | defines += [('WITH_CUDA', None)] 17 | extra_objects += ['src/cuda/dcn_v2_im2col_cuda.cu.o'] 18 | extra_objects += ['src/cuda/dcn_v2_psroi_pooling_cuda.cu.o'] 19 | with_cuda = True 20 | else: 21 | raise ValueError('CUDA is not available') 22 | 23 | extra_compile_args = ['-fopenmp', '-std=c99'] 24 | 25 | this_file = os.path.dirname(os.path.realpath(__file__)) 26 | print(this_file) 27 | sources = [os.path.join(this_file, fname) for fname in sources] 28 | headers = [os.path.join(this_file, fname) for fname in headers] 29 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 30 | 31 | ffi = create_extension( 32 | '_ext.dcn_v2', 33 | headers=headers, 34 | sources=sources, 35 | define_macros=defines, 36 | relative_to=__file__, 37 | with_cuda=with_cuda, 38 | extra_objects=extra_objects, 39 | extra_compile_args=extra_compile_args 40 | ) 41 | 42 | if __name__ == '__main__': 43 | ffi.build() 44 | -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/build_double.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = ['src/dcn_v2_double.c'] 7 | headers = ['src/dcn_v2_double.h'] 8 | defines = [] 9 | with_cuda = False 10 | 11 | extra_objects = [] 12 | if torch.cuda.is_available(): 13 | print('Including CUDA code.') 14 | sources += ['src/dcn_v2_cuda_double.c'] 15 | headers += ['src/dcn_v2_cuda_double.h'] 16 | defines += [('WITH_CUDA', None)] 17 | extra_objects += ['src/cuda/dcn_v2_im2col_cuda_double.cu.o'] 18 | extra_objects += ['src/cuda/dcn_v2_psroi_pooling_cuda_double.cu.o'] 19 | with_cuda = True 20 | else: 21 | raise ValueError('CUDA is not available') 22 | 23 | extra_compile_args = ['-fopenmp', '-std=c99'] 24 | 25 | this_file = os.path.dirname(os.path.realpath(__file__)) 26 | print(this_file) 27 | sources = [os.path.join(this_file, fname) for fname in sources] 28 | headers = [os.path.join(this_file, fname) for fname in headers] 29 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 30 | 31 | ffi = create_extension( 32 | '_ext.dcn_v2_double', 33 | headers=headers, 34 | sources=sources, 35 | define_macros=defines, 36 | relative_to=__file__, 37 | with_cuda=with_cuda, 38 | extra_objects=extra_objects, 39 | extra_compile_args=extra_compile_args 40 | ) 41 | 42 | if __name__ == '__main__': 43 | ffi.build() 44 | -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | cd src/cuda 3 | 4 | # compile dcn 5 | nvcc -c -o dcn_v2_im2col_cuda.cu.o dcn_v2_im2col_cuda.cu -x cu -Xcompiler -fPIC 6 | nvcc -c -o dcn_v2_im2col_cuda_double.cu.o dcn_v2_im2col_cuda_double.cu -x cu -Xcompiler -fPIC 7 | 8 | # compile dcn-roi-pooling 9 | nvcc -c -o dcn_v2_psroi_pooling_cuda.cu.o dcn_v2_psroi_pooling_cuda.cu -x cu -Xcompiler -fPIC 10 | nvcc -c -o dcn_v2_psroi_pooling_cuda_double.cu.o dcn_v2_psroi_pooling_cuda_double.cu -x cu -Xcompiler -fPIC 11 | 12 | cd - 13 | python build.py 14 | python build_double.py 15 | -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/src/cuda/dcn_v2_im2col_cuda.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ******************* BEGIN Caffe Copyright Notice and Disclaimer **************** 3 | * 4 | * COPYRIGHT 5 | * 6 | * All contributions by the University of California: 7 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents) 8 | * All rights reserved. 9 | * 10 | * All other contributions: 11 | * Copyright (c) 2014-2017, the respective contributors 12 | * All rights reserved. 13 | * 14 | * Caffe uses a shared copyright model: each contributor holds copyright over 15 | * their contributions to Caffe. The project versioning records all such 16 | * contribution and copyright details. If a contributor wants to further mark 17 | * their specific copyright on a particular contribution, they should indicate 18 | * their copyright solely in the commit message of the change when it is 19 | * committed. 20 | * 21 | * LICENSE 22 | * 23 | * Redistribution and use in source and binary forms, with or without 24 | * modification, are permitted provided that the following conditions are met: 25 | * 26 | * 1. Redistributions of source code must retain the above copyright notice, this 27 | * list of conditions and the following disclaimer. 28 | * 2. Redistributions in binary form must reproduce the above copyright notice, 29 | * this list of conditions and the following disclaimer in the documentation 30 | * and/or other materials provided with the distribution. 31 | * 32 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 33 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 34 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 35 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 36 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 37 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 38 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 39 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 40 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 41 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 | * 43 | * CONTRIBUTION AGREEMENT 44 | * 45 | * By contributing to the BVLC/caffe repository through pull-request, comment, 46 | * or otherwise, the contributor releases their content to the 47 | * license and copyright terms herein. 48 | * 49 | ***************** END Caffe Copyright Notice and Disclaimer ******************** 50 | * 51 | * Copyright (c) 2018 Microsoft 52 | * Licensed under The MIT License [see LICENSE for details] 53 | * \file modulated_deformable_im2col.h 54 | * \brief Function definitions of converting an image to 55 | * column matrix based on kernel, padding, dilation, and offset. 56 | * These functions are mainly used in deformable convolution operators. 57 | * \ref: https://arxiv.org/abs/1811.11168 58 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu 59 | */ 60 | 61 | /***************** Adapted by Charles Shang *********************/ 62 | 63 | #ifndef DCN_V2_IM2COL_CUDA 64 | #define DCN_V2_IM2COL_CUDA 65 | 66 | #ifdef __cplusplus 67 | extern "C" 68 | { 69 | #endif 70 | 71 | void modulated_deformable_im2col_cuda(cudaStream_t stream, 72 | const float *data_im, const float *data_offset, const float *data_mask, 73 | const int batch_size, const int channels, const int height_im, const int width_im, 74 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 75 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 76 | const int dilation_h, const int dilation_w, 77 | const int deformable_group, float *data_col); 78 | 79 | void modulated_deformable_col2im_cuda(cudaStream_t stream, 80 | const float *data_col, const float *data_offset, const float *data_mask, 81 | const int batch_size, const int channels, const int height_im, const int width_im, 82 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 83 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 84 | const int dilation_h, const int dilation_w, 85 | const int deformable_group, float *grad_im); 86 | 87 | void modulated_deformable_col2im_coord_cuda(cudaStream_t stream, 88 | const float *data_col, const float *data_im, const float *data_offset, const float *data_mask, 89 | const int batch_size, const int channels, const int height_im, const int width_im, 90 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 91 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 92 | const int dilation_h, const int dilation_w, 93 | const int deformable_group, 94 | float *grad_offset, float *grad_mask); 95 | 96 | #ifdef __cplusplus 97 | } 98 | #endif 99 | 100 | #endif -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/src/cuda/dcn_v2_im2col_cuda_double.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ******************* BEGIN Caffe Copyright Notice and Disclaimer **************** 3 | * 4 | * COPYRIGHT 5 | * 6 | * All contributions by the University of California: 7 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents) 8 | * All rights reserved. 9 | * 10 | * All other contributions: 11 | * Copyright (c) 2014-2017, the respective contributors 12 | * All rights reserved. 13 | * 14 | * Caffe uses a shared copyright model: each contributor holds copyright over 15 | * their contributions to Caffe. The project versioning records all such 16 | * contribution and copyright details. If a contributor wants to further mark 17 | * their specific copyright on a particular contribution, they should indicate 18 | * their copyright solely in the commit message of the change when it is 19 | * committed. 20 | * 21 | * LICENSE 22 | * 23 | * Redistribution and use in source and binary forms, with or without 24 | * modification, are permitted provided that the following conditions are met: 25 | * 26 | * 1. Redistributions of source code must retain the above copyright notice, this 27 | * list of conditions and the following disclaimer. 28 | * 2. Redistributions in binary form must reproduce the above copyright notice, 29 | * this list of conditions and the following disclaimer in the documentation 30 | * and/or other materials provided with the distribution. 31 | * 32 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 33 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 34 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 35 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 36 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 37 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 38 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 39 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 40 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 41 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 | * 43 | * CONTRIBUTION AGREEMENT 44 | * 45 | * By contributing to the BVLC/caffe repository through pull-request, comment, 46 | * or otherwise, the contributor releases their content to the 47 | * license and copyright terms herein. 48 | * 49 | ***************** END Caffe Copyright Notice and Disclaimer ******************** 50 | * 51 | * Copyright (c) 2018 Microsoft 52 | * Licensed under The MIT License [see LICENSE for details] 53 | * \file modulated_deformable_im2col.h 54 | * \brief Function definitions of converting an image to 55 | * column matrix based on kernel, padding, dilation, and offset. 56 | * These functions are mainly used in deformable convolution operators. 57 | * \ref: https://arxiv.org/abs/1811.11168 58 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu 59 | */ 60 | 61 | /***************** Adapted by Charles Shang *********************/ 62 | 63 | #ifndef DCN_V2_IM2COL_CUDA_DOUBLE 64 | #define DCN_V2_IM2COL_CUDA_DOUBLE 65 | 66 | #ifdef __cplusplus 67 | extern "C" 68 | { 69 | #endif 70 | 71 | void modulated_deformable_im2col_cuda(cudaStream_t stream, 72 | const double *data_im, const double *data_offset, const double *data_mask, 73 | const int batch_size, const int channels, const int height_im, const int width_im, 74 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 75 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 76 | const int dilation_h, const int dilation_w, 77 | const int deformable_group, double *data_col); 78 | 79 | void modulated_deformable_col2im_cuda(cudaStream_t stream, 80 | const double *data_col, const double *data_offset, const double *data_mask, 81 | const int batch_size, const int channels, const int height_im, const int width_im, 82 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 83 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 84 | const int dilation_h, const int dilation_w, 85 | const int deformable_group, double *grad_im); 86 | 87 | void modulated_deformable_col2im_coord_cuda(cudaStream_t stream, 88 | const double *data_col, const double *data_im, const double *data_offset, const double *data_mask, 89 | const int batch_size, const int channels, const int height_im, const int width_im, 90 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 91 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 92 | const int dilation_h, const int dilation_w, 93 | const int deformable_group, 94 | double *grad_offset, double *grad_mask); 95 | 96 | #ifdef __cplusplus 97 | } 98 | #endif 99 | 100 | #endif -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/src/cuda/dcn_v2_psroi_pooling_cuda.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2017 Microsoft 3 | * Licensed under The MIT License [see LICENSE for details] 4 | * \file deformable_psroi_pooling.cu 5 | * \brief 6 | * \author Yi Li, Guodong Zhang, Jifeng Dai 7 | */ 8 | /***************** Adapted by Charles Shang *********************/ 9 | 10 | #ifndef DCN_V2_PSROI_POOLING_CUDA 11 | #define DCN_V2_PSROI_POOLING_CUDA 12 | 13 | #ifdef __cplusplus 14 | extern "C" 15 | { 16 | #endif 17 | 18 | void DeformablePSROIPoolForward(cudaStream_t stream, 19 | const float *data, 20 | const float *bbox, 21 | const float *trans, 22 | float *out, 23 | float *top_count, 24 | const int batch, 25 | const int channels, 26 | const int height, 27 | const int width, 28 | const int num_bbox, 29 | const int channels_trans, 30 | const int no_trans, 31 | const float spatial_scale, 32 | const int output_dim, 33 | const int group_size, 34 | const int pooled_size, 35 | const int part_size, 36 | const int sample_per_part, 37 | const float trans_std); 38 | 39 | void DeformablePSROIPoolBackwardAcc(cudaStream_t stream, 40 | const float *out_grad, 41 | const float *data, 42 | const float *bbox, 43 | const float *trans, 44 | const float *top_count, 45 | float *in_grad, 46 | float *trans_grad, 47 | const int batch, 48 | const int channels, 49 | const int height, 50 | const int width, 51 | const int num_bbox, 52 | const int channels_trans, 53 | const int no_trans, 54 | const float spatial_scale, 55 | const int output_dim, 56 | const int group_size, 57 | const int pooled_size, 58 | const int part_size, 59 | const int sample_per_part, 60 | const float trans_std); 61 | 62 | #ifdef __cplusplus 63 | } 64 | #endif 65 | 66 | #endif -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/src/cuda/dcn_v2_psroi_pooling_cuda_double.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2017 Microsoft 3 | * Licensed under The MIT License [see LICENSE for details] 4 | * \file deformable_psroi_pooling.cu 5 | * \brief 6 | * \author Yi Li, Guodong Zhang, Jifeng Dai 7 | */ 8 | /***************** Adapted by Charles Shang *********************/ 9 | 10 | #ifndef DCN_V2_PSROI_POOLING_CUDA_DOUBLE 11 | #define DCN_V2_PSROI_POOLING_CUDA_DOUBLE 12 | 13 | #ifdef __cplusplus 14 | extern "C" 15 | { 16 | #endif 17 | 18 | void DeformablePSROIPoolForward(cudaStream_t stream, 19 | const double *data, 20 | const double *bbox, 21 | const double *trans, 22 | double *out, 23 | double *top_count, 24 | const int batch, 25 | const int channels, 26 | const int height, 27 | const int width, 28 | const int num_bbox, 29 | const int channels_trans, 30 | const int no_trans, 31 | const double spatial_scale, 32 | const int output_dim, 33 | const int group_size, 34 | const int pooled_size, 35 | const int part_size, 36 | const int sample_per_part, 37 | const double trans_std); 38 | 39 | void DeformablePSROIPoolBackwardAcc(cudaStream_t stream, 40 | const double *out_grad, 41 | const double *data, 42 | const double *bbox, 43 | const double *trans, 44 | const double *top_count, 45 | double *in_grad, 46 | double *trans_grad, 47 | const int batch, 48 | const int channels, 49 | const int height, 50 | const int width, 51 | const int num_bbox, 52 | const int channels_trans, 53 | const int no_trans, 54 | const double spatial_scale, 55 | const int output_dim, 56 | const int group_size, 57 | const int pooled_size, 58 | const int part_size, 59 | const int sample_per_part, 60 | const double trans_std); 61 | 62 | #ifdef __cplusplus 63 | } 64 | #endif 65 | 66 | #endif -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/src/dcn_v2.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | void dcn_v2_forward(THFloatTensor *input, THFloatTensor *weight, 6 | THFloatTensor *bias, THFloatTensor *ones, 7 | THFloatTensor *offset, THFloatTensor *mask, 8 | THFloatTensor *output, THFloatTensor *columns, 9 | const int pad_h, const int pad_w, 10 | const int stride_h, const int stride_w, 11 | const int dilation_h, const int dilation_w, 12 | const int deformable_group) 13 | { 14 | printf("only implemented in GPU"); 15 | } 16 | void dcn_v2_backward(THFloatTensor *input, THFloatTensor *weight, 17 | THFloatTensor *bias, THFloatTensor *ones, 18 | THFloatTensor *offset, THFloatTensor *mask, 19 | THFloatTensor *output, THFloatTensor *columns, 20 | THFloatTensor *grad_input, THFloatTensor *grad_weight, 21 | THFloatTensor *grad_bias, THFloatTensor *grad_offset, 22 | THFloatTensor *grad_mask, THFloatTensor *grad_output, 23 | int kernel_h, int kernel_w, 24 | int stride_h, int stride_w, 25 | int pad_h, int pad_w, 26 | int dilation_h, int dilation_w, 27 | int deformable_group) 28 | { 29 | printf("only implemented in GPU"); 30 | } -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/src/dcn_v2.h: -------------------------------------------------------------------------------- 1 | void dcn_v2_forward(THFloatTensor *input, THFloatTensor *weight, 2 | THFloatTensor *bias, THFloatTensor *ones, 3 | THFloatTensor *offset, THFloatTensor *mask, 4 | THFloatTensor *output, THFloatTensor *columns, 5 | const int pad_h, const int pad_w, 6 | const int stride_h, const int stride_w, 7 | const int dilation_h, const int dilation_w, 8 | const int deformable_group); 9 | void dcn_v2_backward(THFloatTensor *input, THFloatTensor *weight, 10 | THFloatTensor *bias, THFloatTensor *ones, 11 | THFloatTensor *offset, THFloatTensor *mask, 12 | THFloatTensor *output, THFloatTensor *columns, 13 | THFloatTensor *grad_input, THFloatTensor *grad_weight, 14 | THFloatTensor *grad_bias, THFloatTensor *grad_offset, 15 | THFloatTensor *grad_mask, THFloatTensor *grad_output, 16 | int kernel_h, int kernel_w, 17 | int stride_h, int stride_w, 18 | int pad_h, int pad_w, 19 | int dilation_h, int dilation_w, 20 | int deformable_group); -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/src/dcn_v2_cuda.h: -------------------------------------------------------------------------------- 1 | // #ifndef DCN_V2_CUDA 2 | // #define DCN_V2_CUDA 3 | 4 | // #ifdef __cplusplus 5 | // extern "C" 6 | // { 7 | // #endif 8 | 9 | void dcn_v2_cuda_forward(THCudaTensor *input, THCudaTensor *weight, 10 | THCudaTensor *bias, THCudaTensor *ones, 11 | THCudaTensor *offset, THCudaTensor *mask, 12 | THCudaTensor *output, THCudaTensor *columns, 13 | int kernel_h, int kernel_w, 14 | const int stride_h, const int stride_w, 15 | const int pad_h, const int pad_w, 16 | const int dilation_h, const int dilation_w, 17 | const int deformable_group); 18 | void dcn_v2_cuda_backward(THCudaTensor *input, THCudaTensor *weight, 19 | THCudaTensor *bias, THCudaTensor *ones, 20 | THCudaTensor *offset, THCudaTensor *mask, 21 | THCudaTensor *columns, 22 | THCudaTensor *grad_input, THCudaTensor *grad_weight, 23 | THCudaTensor *grad_bias, THCudaTensor *grad_offset, 24 | THCudaTensor *grad_mask, THCudaTensor *grad_output, 25 | int kernel_h, int kernel_w, 26 | int stride_h, int stride_w, 27 | int pad_h, int pad_w, 28 | int dilation_h, int dilation_w, 29 | int deformable_group); 30 | 31 | void dcn_v2_psroi_pooling_cuda_forward(THCudaTensor * input, THCudaTensor * bbox, 32 | THCudaTensor * trans, 33 | THCudaTensor * out, THCudaTensor * top_count, 34 | const int no_trans, 35 | const float spatial_scale, 36 | const int output_dim, 37 | const int group_size, 38 | const int pooled_size, 39 | const int part_size, 40 | const int sample_per_part, 41 | const float trans_std); 42 | 43 | void dcn_v2_psroi_pooling_cuda_backward(THCudaTensor * out_grad, 44 | THCudaTensor * input, THCudaTensor * bbox, 45 | THCudaTensor * trans, THCudaTensor * top_count, 46 | THCudaTensor * input_grad, THCudaTensor * trans_grad, 47 | const int no_trans, 48 | const float spatial_scale, 49 | const int output_dim, 50 | const int group_size, 51 | const int pooled_size, 52 | const int part_size, 53 | const int sample_per_part, 54 | const float trans_std); 55 | 56 | // #ifdef __cplusplus 57 | // } 58 | // #endif 59 | 60 | // #endif -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/src/dcn_v2_cuda_double.h: -------------------------------------------------------------------------------- 1 | // #ifndef DCN_V2_CUDA 2 | // #define DCN_V2_CUDA 3 | 4 | // #ifdef __cplusplus 5 | // extern "C" 6 | // { 7 | // #endif 8 | 9 | void dcn_v2_cuda_forward(THCudaDoubleTensor *input, THCudaDoubleTensor *weight, 10 | THCudaDoubleTensor *bias, THCudaDoubleTensor *ones, 11 | THCudaDoubleTensor *offset, THCudaDoubleTensor *mask, 12 | THCudaDoubleTensor *output, THCudaDoubleTensor *columns, 13 | int kernel_h, int kernel_w, 14 | const int stride_h, const int stride_w, 15 | const int pad_h, const int pad_w, 16 | const int dilation_h, const int dilation_w, 17 | const int deformable_group); 18 | void dcn_v2_cuda_backward(THCudaDoubleTensor *input, THCudaDoubleTensor *weight, 19 | THCudaDoubleTensor *bias, THCudaDoubleTensor *ones, 20 | THCudaDoubleTensor *offset, THCudaDoubleTensor *mask, 21 | THCudaDoubleTensor *columns, 22 | THCudaDoubleTensor *grad_input, THCudaDoubleTensor *grad_weight, 23 | THCudaDoubleTensor *grad_bias, THCudaDoubleTensor *grad_offset, 24 | THCudaDoubleTensor *grad_mask, THCudaDoubleTensor *grad_output, 25 | int kernel_h, int kernel_w, 26 | int stride_h, int stride_w, 27 | int pad_h, int pad_w, 28 | int dilation_h, int dilation_w, 29 | int deformable_group); 30 | 31 | void dcn_v2_psroi_pooling_cuda_forward(THCudaDoubleTensor * input, THCudaDoubleTensor * bbox, 32 | THCudaDoubleTensor * trans, 33 | THCudaDoubleTensor * out, THCudaDoubleTensor * top_count, 34 | const int no_trans, 35 | const double spatial_scale, 36 | const int output_dim, 37 | const int group_size, 38 | const int pooled_size, 39 | const int part_size, 40 | const int sample_per_part, 41 | const double trans_std); 42 | 43 | void dcn_v2_psroi_pooling_cuda_backward(THCudaDoubleTensor * out_grad, 44 | THCudaDoubleTensor * input, THCudaDoubleTensor * bbox, 45 | THCudaDoubleTensor * trans, THCudaDoubleTensor * top_count, 46 | THCudaDoubleTensor * input_grad, THCudaDoubleTensor * trans_grad, 47 | const int no_trans, 48 | const double spatial_scale, 49 | const int output_dim, 50 | const int group_size, 51 | const int pooled_size, 52 | const int part_size, 53 | const int sample_per_part, 54 | const double trans_std); 55 | 56 | 57 | // #ifdef __cplusplus 58 | // } 59 | // #endif 60 | 61 | // #endif -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/src/dcn_v2_double.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | void dcn_v2_forward(THDoubleTensor *input, THDoubleTensor *weight, 6 | THDoubleTensor *bias, THDoubleTensor *ones, 7 | THDoubleTensor *offset, THDoubleTensor *mask, 8 | THDoubleTensor *output, THDoubleTensor *columns, 9 | const int pad_h, const int pad_w, 10 | const int stride_h, const int stride_w, 11 | const int dilation_h, const int dilation_w, 12 | const int deformable_group) 13 | { 14 | printf("only implemented in GPU"); 15 | } 16 | void dcn_v2_backward(THDoubleTensor *input, THDoubleTensor *weight, 17 | THDoubleTensor *bias, THDoubleTensor *ones, 18 | THDoubleTensor *offset, THDoubleTensor *mask, 19 | THDoubleTensor *output, THDoubleTensor *columns, 20 | THDoubleTensor *grad_input, THDoubleTensor *grad_weight, 21 | THDoubleTensor *grad_bias, THDoubleTensor *grad_offset, 22 | THDoubleTensor *grad_mask, THDoubleTensor *grad_output, 23 | int kernel_h, int kernel_w, 24 | int stride_h, int stride_w, 25 | int pad_h, int pad_w, 26 | int dilation_h, int dilation_w, 27 | int deformable_group) 28 | { 29 | printf("only implemented in GPU"); 30 | } -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/src/dcn_v2_double.h: -------------------------------------------------------------------------------- 1 | void dcn_v2_forward(THDoubleTensor *input, THDoubleTensor *weight, 2 | THDoubleTensor *bias, THDoubleTensor *ones, 3 | THDoubleTensor *offset, THDoubleTensor *mask, 4 | THDoubleTensor *output, THDoubleTensor *columns, 5 | const int pad_h, const int pad_w, 6 | const int stride_h, const int stride_w, 7 | const int dilation_h, const int dilation_w, 8 | const int deformable_group); 9 | void dcn_v2_backward(THDoubleTensor *input, THDoubleTensor *weight, 10 | THDoubleTensor *bias, THDoubleTensor *ones, 11 | THDoubleTensor *offset, THDoubleTensor *mask, 12 | THDoubleTensor *output, THDoubleTensor *columns, 13 | THDoubleTensor *grad_input, THDoubleTensor *grad_weight, 14 | THDoubleTensor *grad_bias, THDoubleTensor *grad_offset, 15 | THDoubleTensor *grad_mask, THDoubleTensor *grad_output, 16 | int kernel_h, int kernel_w, 17 | int stride_h, int stride_w, 18 | int pad_h, int pad_w, 19 | int dilation_h, int dilation_w, 20 | int deformable_group); -------------------------------------------------------------------------------- /src/lib/models/scatter_gather.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | from torch.nn.parallel._functions import Scatter, Gather 4 | 5 | 6 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None): 7 | r""" 8 | Slices variables into approximately equal chunks and 9 | distributes them across given GPUs. Duplicates 10 | references to objects that are not variables. Does not 11 | support Tensors. 12 | """ 13 | def scatter_map(obj): 14 | if isinstance(obj, Variable): 15 | return Scatter.apply(target_gpus, chunk_sizes, dim, obj) 16 | assert not torch.is_tensor(obj), "Tensors not supported in scatter." 17 | if isinstance(obj, tuple): 18 | return list(zip(*map(scatter_map, obj))) 19 | if isinstance(obj, list): 20 | return list(map(list, zip(*map(scatter_map, obj)))) 21 | if isinstance(obj, dict): 22 | return list(map(type(obj), zip(*map(scatter_map, obj.items())))) 23 | return [obj for targets in target_gpus] 24 | 25 | return scatter_map(inputs) 26 | 27 | 28 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None): 29 | r"""Scatter with support for kwargs dictionary""" 30 | inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else [] 31 | kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else [] 32 | if len(inputs) < len(kwargs): 33 | inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) 34 | elif len(kwargs) < len(inputs): 35 | kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) 36 | inputs = tuple(inputs) 37 | kwargs = tuple(kwargs) 38 | return inputs, kwargs 39 | -------------------------------------------------------------------------------- /src/lib/models/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | def _sigmoid(x): 9 | y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4) 10 | return y 11 | 12 | def _gather_feat(feat, ind, mask=None): 13 | dim = feat.size(2) 14 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) 15 | feat = feat.gather(1, ind) 16 | if mask is not None: 17 | mask = mask.unsqueeze(2).expand_as(feat) 18 | feat = feat[mask] 19 | feat = feat.view(-1, dim) 20 | return feat 21 | 22 | def _transpose_and_gather_feat(feat, ind): 23 | feat = feat.permute(0, 2, 3, 1).contiguous() 24 | feat = feat.view(feat.size(0), -1, feat.size(3)) 25 | feat = _gather_feat(feat, ind) 26 | return feat 27 | 28 | def flip_tensor(x): 29 | return torch.flip(x, [3]) 30 | # tmp = x.detach().cpu().numpy()[..., ::-1].copy() 31 | # return torch.from_numpy(tmp).to(x.device) 32 | 33 | def flip_lr(x, flip_idx): 34 | tmp = x.detach().cpu().numpy()[..., ::-1].copy() 35 | shape = tmp.shape 36 | for e in flip_idx: 37 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \ 38 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy() 39 | return torch.from_numpy(tmp.reshape(shape)).to(x.device) 40 | 41 | def flip_lr_off(x, flip_idx): 42 | tmp = x.detach().cpu().numpy()[..., ::-1].copy() 43 | shape = tmp.shape 44 | tmp = tmp.reshape(tmp.shape[0], 17, 2, 45 | tmp.shape[2], tmp.shape[3]) 46 | tmp[:, :, 0, :, :] *= -1 47 | for e in flip_idx: 48 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \ 49 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy() 50 | return torch.from_numpy(tmp.reshape(shape)).to(x.device) -------------------------------------------------------------------------------- /src/lib/trains/base_trainer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import time 6 | import torch 7 | from progress.bar import Bar 8 | from models.data_parallel import DataParallel 9 | from utils.utils import AverageMeter 10 | 11 | 12 | class ModelWithLoss(torch.nn.Module): 13 | def __init__(self, model, loss): 14 | super(ModelWithLoss, self).__init__() 15 | self.model = model 16 | self.loss = loss 17 | 18 | def forward(self, batch): 19 | outputs = self.model(batch['input']) 20 | loss, loss_stats = self.loss(outputs, batch) 21 | return outputs[-1], loss, loss_stats 22 | 23 | class BaseTrainer(object): 24 | def __init__( 25 | self, opt, model, optimizer=None): 26 | self.opt = opt 27 | self.optimizer = optimizer 28 | self.loss_stats, self.loss = self._get_losses(opt) 29 | self.model_with_loss = ModelWithLoss(model, self.loss) 30 | 31 | def set_device(self, gpus, chunk_sizes, device): 32 | if len(gpus) > 1: 33 | self.model_with_loss = DataParallel( 34 | self.model_with_loss, device_ids=gpus, 35 | chunk_sizes=chunk_sizes).to(device) 36 | else: 37 | self.model_with_loss = self.model_with_loss.to(device) 38 | 39 | for state in self.optimizer.state.values(): 40 | for k, v in state.items(): 41 | if isinstance(v, torch.Tensor): 42 | state[k] = v.to(device=device, non_blocking=True) 43 | 44 | def run_epoch(self, phase, epoch, data_loader): 45 | model_with_loss = self.model_with_loss 46 | if phase == 'train': 47 | model_with_loss.train() 48 | else: 49 | if len(self.opt.gpus) > 1: 50 | model_with_loss = self.model_with_loss.module 51 | model_with_loss.eval() 52 | torch.cuda.empty_cache() 53 | 54 | opt = self.opt 55 | results = {} 56 | data_time, batch_time = AverageMeter(), AverageMeter() 57 | avg_loss_stats = {l: AverageMeter() for l in self.loss_stats} 58 | num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters 59 | bar = Bar('{}/{}'.format(opt.task, opt.exp_id), max=num_iters) 60 | end = time.time() 61 | for iter_id, batch in enumerate(data_loader): 62 | if iter_id >= num_iters: 63 | break 64 | data_time.update(time.time() - end) 65 | 66 | for k in batch: 67 | if k != 'meta': 68 | batch[k] = batch[k].to(device=opt.device, non_blocking=True) 69 | output, loss, loss_stats = model_with_loss(batch) 70 | loss = loss.mean() 71 | if phase == 'train': 72 | self.optimizer.zero_grad() 73 | loss.backward() 74 | self.optimizer.step() 75 | batch_time.update(time.time() - end) 76 | end = time.time() 77 | 78 | Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format( 79 | epoch, iter_id, num_iters, phase=phase, 80 | total=bar.elapsed_td, eta=bar.eta_td) 81 | for l in avg_loss_stats: 82 | avg_loss_stats[l].update( 83 | loss_stats[l].mean().item(), batch['input'].size(0)) 84 | Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(l, avg_loss_stats[l].avg) 85 | if not opt.hide_data_time: 86 | Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \ 87 | '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time) 88 | if opt.print_iter > 0: 89 | if iter_id % opt.print_iter == 0: 90 | print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix)) 91 | else: 92 | bar.next() 93 | 94 | if opt.debug > 0: 95 | self.debug(batch, output, iter_id) 96 | 97 | if opt.test: 98 | self.save_result(output, batch, results) 99 | del output, loss, loss_stats 100 | 101 | bar.finish() 102 | ret = {k: v.avg for k, v in avg_loss_stats.items()} 103 | ret['time'] = bar.elapsed_td.total_seconds() / 60. 104 | return ret, results 105 | 106 | def debug(self, batch, output, iter_id): 107 | raise NotImplementedError 108 | 109 | def save_result(self, output, batch, results): 110 | raise NotImplementedError 111 | 112 | def _get_losses(self, opt): 113 | raise NotImplementedError 114 | 115 | def val(self, epoch, data_loader): 116 | return self.run_epoch('val', epoch, data_loader) 117 | 118 | def train(self, epoch, data_loader): 119 | return self.run_epoch('train', epoch, data_loader) -------------------------------------------------------------------------------- /src/lib/trains/ctdet.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import numpy as np 7 | 8 | from models.losses import FocalLoss 9 | from models.losses import RegL1Loss, RegLoss, NormRegL1Loss, RegWeightedL1Loss 10 | from models.decode import ctdet_decode 11 | from models.utils import _sigmoid 12 | from utils.debugger import Debugger 13 | from utils.post_process import ctdet_post_process 14 | from utils.oracle_utils import gen_oracle_map 15 | from .base_trainer import BaseTrainer 16 | 17 | class CtdetLoss(torch.nn.Module): 18 | def __init__(self, opt): 19 | super(CtdetLoss, self).__init__() 20 | self.crit = torch.nn.MSELoss() if opt.mse_loss else FocalLoss() 21 | self.crit_reg = RegL1Loss() if opt.reg_loss == 'l1' else \ 22 | RegLoss() if opt.reg_loss == 'sl1' else None 23 | self.crit_wh = torch.nn.L1Loss(reduction='sum') if opt.dense_wh else \ 24 | NormRegL1Loss() if opt.norm_wh else \ 25 | RegWeightedL1Loss() if opt.cat_spec_wh else self.crit_reg 26 | self.opt = opt 27 | 28 | def forward(self, outputs, batch): 29 | opt = self.opt 30 | hm_loss, wh_loss, off_loss = 0, 0, 0 31 | for s in range(opt.num_stacks): 32 | output = outputs[s] 33 | if not opt.mse_loss: 34 | output['hm'] = _sigmoid(output['hm']) 35 | 36 | if opt.eval_oracle_hm: 37 | output['hm'] = batch['hm'] 38 | if opt.eval_oracle_wh: 39 | output['wh'] = torch.from_numpy(gen_oracle_map( 40 | batch['wh'].detach().cpu().numpy(), 41 | batch['ind'].detach().cpu().numpy(), 42 | output['wh'].shape[3], output['wh'].shape[2])).to(opt.device) 43 | if opt.eval_oracle_offset: 44 | output['reg'] = torch.from_numpy(gen_oracle_map( 45 | batch['reg'].detach().cpu().numpy(), 46 | batch['ind'].detach().cpu().numpy(), 47 | output['reg'].shape[3], output['reg'].shape[2])).to(opt.device) 48 | 49 | hm_loss += self.crit(output['hm'], batch['hm']) / opt.num_stacks 50 | if opt.wh_weight > 0: 51 | if opt.dense_wh: 52 | mask_weight = batch['dense_wh_mask'].sum() + 1e-4 53 | wh_loss += ( 54 | self.crit_wh(output['wh'] * batch['dense_wh_mask'], 55 | batch['dense_wh'] * batch['dense_wh_mask']) / 56 | mask_weight) / opt.num_stacks 57 | elif opt.cat_spec_wh: 58 | wh_loss += self.crit_wh( 59 | output['wh'], batch['cat_spec_mask'], 60 | batch['ind'], batch['cat_spec_wh']) / opt.num_stacks 61 | else: 62 | wh_loss += self.crit_reg( 63 | output['wh'], batch['reg_mask'], 64 | batch['ind'], batch['wh']) / opt.num_stacks 65 | 66 | if opt.reg_offset and opt.off_weight > 0: 67 | off_loss += self.crit_reg(output['reg'], batch['reg_mask'], 68 | batch['ind'], batch['reg']) / opt.num_stacks 69 | 70 | loss = opt.hm_weight * hm_loss + opt.wh_weight * wh_loss + \ 71 | opt.off_weight * off_loss 72 | loss_stats = {'loss': loss, 'hm_loss': hm_loss, 73 | 'wh_loss': wh_loss, 'off_loss': off_loss} 74 | return loss, loss_stats 75 | 76 | class CtdetTrainer(BaseTrainer): 77 | def __init__(self, opt, model, optimizer=None): 78 | super(CtdetTrainer, self).__init__(opt, model, optimizer=optimizer) 79 | 80 | def _get_losses(self, opt): 81 | loss_states = ['loss', 'hm_loss', 'wh_loss', 'off_loss'] 82 | loss = CtdetLoss(opt) 83 | return loss_states, loss 84 | 85 | def debug(self, batch, output, iter_id): 86 | opt = self.opt 87 | reg = output['reg'] if opt.reg_offset else None 88 | dets = ctdet_decode( 89 | output['hm'], output['wh'], reg=reg, 90 | cat_spec_wh=opt.cat_spec_wh, K=opt.K) 91 | dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) 92 | dets[:, :, :4] *= opt.down_ratio 93 | dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) 94 | dets_gt[:, :, :4] *= opt.down_ratio 95 | for i in range(1): 96 | debugger = Debugger( 97 | dataset=opt.dataset, ipynb=(opt.debug==3), theme=opt.debugger_theme) 98 | img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) 99 | img = np.clip((( 100 | img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) 101 | pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy()) 102 | gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) 103 | debugger.add_blend_img(img, pred, 'pred_hm') 104 | debugger.add_blend_img(img, gt, 'gt_hm') 105 | debugger.add_img(img, img_id='out_pred') 106 | for k in range(len(dets[i])): 107 | if dets[i, k, 4] > opt.center_thresh: 108 | debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], 109 | dets[i, k, 4], img_id='out_pred') 110 | 111 | debugger.add_img(img, img_id='out_gt') 112 | for k in range(len(dets_gt[i])): 113 | if dets_gt[i, k, 4] > opt.center_thresh: 114 | debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], 115 | dets_gt[i, k, 4], img_id='out_gt') 116 | 117 | if opt.debug == 4: 118 | debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) 119 | else: 120 | debugger.show_all_imgs(pause=True) 121 | 122 | def save_result(self, output, batch, results): 123 | reg = output['reg'] if self.opt.reg_offset else None 124 | dets = ctdet_decode( 125 | output['hm'], output['wh'], reg=reg, 126 | cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) 127 | dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) 128 | dets_out = ctdet_post_process( 129 | dets.copy(), batch['meta']['c'].cpu().numpy(), 130 | batch['meta']['s'].cpu().numpy(), 131 | output['hm'].shape[2], output['hm'].shape[3], output['hm'].shape[1]) 132 | results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0] -------------------------------------------------------------------------------- /src/lib/trains/exdet.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import numpy as np 7 | import cv2 8 | import sys 9 | import time 10 | from utils.debugger import Debugger 11 | from models.data_parallel import DataParallel 12 | from models.losses import FocalLoss, RegL1Loss 13 | from models.decode import agnex_ct_decode, exct_decode 14 | from models.utils import _sigmoid 15 | from .base_trainer import BaseTrainer 16 | 17 | class ExdetLoss(torch.nn.Module): 18 | def __init__(self, opt): 19 | super(ExdetLoss, self).__init__() 20 | self.crit = torch.nn.MSELoss() if opt.mse_loss else FocalLoss() 21 | self.crit_reg = RegL1Loss() 22 | self.opt = opt 23 | self.parts = ['t', 'l', 'b', 'r', 'c'] 24 | 25 | def forward(self, outputs, batch): 26 | opt = self.opt 27 | hm_loss, reg_loss = 0, 0 28 | for s in range(opt.num_stacks): 29 | output = outputs[s] 30 | for p in self.parts: 31 | tag = 'hm_{}'.format(p) 32 | output[tag] = _sigmoid(output[tag]) 33 | hm_loss += self.crit(output[tag], batch[tag]) / opt.num_stacks 34 | if p != 'c' and opt.reg_offset and opt.off_weight > 0: 35 | reg_loss += self.crit_reg(output['reg_{}'.format(p)], 36 | batch['reg_mask'], 37 | batch['ind_{}'.format(p)], 38 | batch['reg_{}'.format(p)]) / opt.num_stacks 39 | loss = opt.hm_weight * hm_loss + opt.off_weight * reg_loss 40 | loss_stats = {'loss': loss, 'off_loss': reg_loss, 'hm_loss': hm_loss} 41 | return loss, loss_stats 42 | 43 | class ExdetTrainer(BaseTrainer): 44 | def __init__(self, opt, model, optimizer=None): 45 | super(ExdetTrainer, self).__init__(opt, model, optimizer=optimizer) 46 | self.decode = agnex_ct_decode if opt.agnostic_ex else exct_decode 47 | 48 | def _get_losses(self, opt): 49 | loss_states = ['loss', 'hm_loss', 'off_loss'] 50 | loss = ExdetLoss(opt) 51 | return loss_states, loss 52 | 53 | def debug(self, batch, output, iter_id): 54 | opt = self.opt 55 | detections = self.decode(output['hm_t'], output['hm_l'], 56 | output['hm_b'], output['hm_r'], 57 | output['hm_c']).detach().cpu().numpy() 58 | detections[:, :, :4] *= opt.input_res / opt.output_res 59 | for i in range(1): 60 | debugger = Debugger( 61 | dataset=opt.dataset, ipynb=(opt.debug==3), theme=opt.debugger_theme) 62 | pred_hm = np.zeros((opt.input_res, opt.input_res, 3), dtype=np.uint8) 63 | gt_hm = np.zeros((opt.input_res, opt.input_res, 3), dtype=np.uint8) 64 | img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) 65 | img = ((img * self.opt.std + self.opt.mean) * 255.).astype(np.uint8) 66 | for p in self.parts: 67 | tag = 'hm_{}'.format(p) 68 | pred = debugger.gen_colormap(output[tag][i].detach().cpu().numpy()) 69 | gt = debugger.gen_colormap(batch[tag][i].detach().cpu().numpy()) 70 | if p != 'c': 71 | pred_hm = np.maximum(pred_hm, pred) 72 | gt_hm = np.maximum(gt_hm, gt) 73 | if p == 'c' or opt.debug > 2: 74 | debugger.add_blend_img(img, pred, 'pred_{}'.format(p)) 75 | debugger.add_blend_img(img, gt, 'gt_{}'.format(p)) 76 | debugger.add_blend_img(img, pred_hm, 'pred') 77 | debugger.add_blend_img(img, gt_hm, 'gt') 78 | debugger.add_img(img, img_id='out') 79 | for k in range(len(detections[i])): 80 | if detections[i, k, 4] > 0.1: 81 | debugger.add_coco_bbox(detections[i, k, :4], detections[i, k, -1], 82 | detections[i, k, 4], img_id='out') 83 | if opt.debug == 4: 84 | debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) 85 | else: 86 | debugger.show_all_imgs(pause=True) -------------------------------------------------------------------------------- /src/lib/trains/train_factory.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from .ctdet import CtdetTrainer 6 | from .ddd import DddTrainer 7 | from .exdet import ExdetTrainer 8 | from .multi_pose import MultiPoseTrainer 9 | 10 | train_factory = { 11 | 'exdet': ExdetTrainer, 12 | 'ddd': DddTrainer, 13 | 'ctdet': CtdetTrainer, 14 | 'multi_pose': MultiPoseTrainer, 15 | } 16 | -------------------------------------------------------------------------------- /src/lib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/src/lib/utils/__init__.py -------------------------------------------------------------------------------- /src/lib/utils/ddd_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | import cv2 7 | 8 | def compute_box_3d(dim, location, rotation_y): 9 | # dim: 3 10 | # location: 3 11 | # rotation_y: 1 12 | # return: 8 x 3 13 | c, s = np.cos(rotation_y), np.sin(rotation_y) 14 | R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32) 15 | l, w, h = dim[2], dim[1], dim[0] 16 | x_corners = [l/2, l/2, -l/2, -l/2, l/2, l/2, -l/2, -l/2] 17 | y_corners = [0,0,0,0,-h,-h,-h,-h] 18 | z_corners = [w/2, -w/2, -w/2, w/2, w/2, -w/2, -w/2, w/2] 19 | 20 | corners = np.array([x_corners, y_corners, z_corners], dtype=np.float32) 21 | corners_3d = np.dot(R, corners) 22 | corners_3d = corners_3d + np.array(location, dtype=np.float32).reshape(3, 1) 23 | return corners_3d.transpose(1, 0) 24 | 25 | def project_to_image(pts_3d, P): 26 | # pts_3d: n x 3 27 | # P: 3 x 4 28 | # return: n x 2 29 | pts_3d_homo = np.concatenate( 30 | [pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32)], axis=1) 31 | pts_2d = np.dot(P, pts_3d_homo.transpose(1, 0)).transpose(1, 0) 32 | pts_2d = pts_2d[:, :2] / pts_2d[:, 2:] 33 | # import pdb; pdb.set_trace() 34 | return pts_2d 35 | 36 | def compute_orientation_3d(dim, location, rotation_y): 37 | # dim: 3 38 | # location: 3 39 | # rotation_y: 1 40 | # return: 2 x 3 41 | c, s = np.cos(rotation_y), np.sin(rotation_y) 42 | R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32) 43 | orientation_3d = np.array([[0, dim[2]], [0, 0], [0, 0]], dtype=np.float32) 44 | orientation_3d = np.dot(R, orientation_3d) 45 | orientation_3d = orientation_3d + \ 46 | np.array(location, dtype=np.float32).reshape(3, 1) 47 | return orientation_3d.transpose(1, 0) 48 | 49 | def draw_box_3d(image, corners, c=(0, 0, 255)): 50 | face_idx = [[0,1,5,4], 51 | [1,2,6, 5], 52 | [2,3,7,6], 53 | [3,0,4,7]] 54 | for ind_f in range(3, -1, -1): 55 | f = face_idx[ind_f] 56 | for j in range(4): 57 | cv2.line(image, (corners[f[j], 0], corners[f[j], 1]), 58 | (corners[f[(j+1)%4], 0], corners[f[(j+1)%4], 1]), c, 2, lineType=cv2.LINE_AA) 59 | if ind_f == 0: 60 | cv2.line(image, (corners[f[0], 0], corners[f[0], 1]), 61 | (corners[f[2], 0], corners[f[2], 1]), c, 1, lineType=cv2.LINE_AA) 62 | cv2.line(image, (corners[f[1], 0], corners[f[1], 1]), 63 | (corners[f[3], 0], corners[f[3], 1]), c, 1, lineType=cv2.LINE_AA) 64 | return image 65 | 66 | def unproject_2d_to_3d(pt_2d, depth, P): 67 | # pts_2d: 2 68 | # depth: 1 69 | # P: 3 x 4 70 | # return: 3 71 | z = depth - P[2, 3] 72 | x = (pt_2d[0] * depth - P[0, 3] - P[0, 2] * z) / P[0, 0] 73 | y = (pt_2d[1] * depth - P[1, 3] - P[1, 2] * z) / P[1, 1] 74 | pt_3d = np.array([x, y, z], dtype=np.float32) 75 | return pt_3d 76 | 77 | def alpha2rot_y(alpha, x, cx, fx): 78 | """ 79 | Get rotation_y by alpha + theta - 180 80 | alpha : Observation angle of object, ranging [-pi..pi] 81 | x : Object center x to the camera center (x-W/2), in pixels 82 | rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi] 83 | """ 84 | rot_y = alpha + np.arctan2(x - cx, fx) 85 | if rot_y > np.pi: 86 | rot_y -= 2 * np.pi 87 | if rot_y < -np.pi: 88 | rot_y += 2 * np.pi 89 | return rot_y 90 | 91 | def rot_y2alpha(rot_y, x, cx, fx): 92 | """ 93 | Get rotation_y by alpha + theta - 180 94 | alpha : Observation angle of object, ranging [-pi..pi] 95 | x : Object center x to the camera center (x-W/2), in pixels 96 | rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi] 97 | """ 98 | alpha = rot_y - np.arctan2(x - cx, fx) 99 | if alpha > np.pi: 100 | alpha -= 2 * np.pi 101 | if alpha < -np.pi: 102 | alpha += 2 * np.pi 103 | return alpha 104 | 105 | 106 | def ddd2locrot(center, alpha, dim, depth, calib): 107 | # single image 108 | locations = unproject_2d_to_3d(center, depth, calib) 109 | locations[1] += dim[0] / 2 110 | rotation_y = alpha2rot_y(alpha, center[0], calib[0, 2], calib[0, 0]) 111 | return locations, rotation_y 112 | 113 | def project_3d_bbox(location, dim, rotation_y, calib): 114 | box_3d = compute_box_3d(dim, location, rotation_y) 115 | box_2d = project_to_image(box_3d, calib) 116 | return box_2d 117 | 118 | 119 | if __name__ == '__main__': 120 | calib = np.array( 121 | [[7.070493000000e+02, 0.000000000000e+00, 6.040814000000e+02, 4.575831000000e+01], 122 | [0.000000000000e+00, 7.070493000000e+02, 1.805066000000e+02, -3.454157000000e-01], 123 | [0.000000000000e+00, 0.000000000000e+00, 1.000000000000e+00, 4.981016000000e-03]], 124 | dtype=np.float32) 125 | alpha = -0.20 126 | tl = np.array([712.40, 143.00], dtype=np.float32) 127 | br = np.array([810.73, 307.92], dtype=np.float32) 128 | ct = (tl + br) / 2 129 | rotation_y = 0.01 130 | print('alpha2rot_y', alpha2rot_y(alpha, ct[0], calib[0, 2], calib[0, 0])) 131 | print('rotation_y', rotation_y) -------------------------------------------------------------------------------- /src/lib/utils/oracle_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | import numba 7 | 8 | @numba.jit(nopython=True, nogil=True) 9 | def gen_oracle_map(feat, ind, w, h): 10 | # feat: B x maxN x featDim 11 | # ind: B x maxN 12 | batch_size = feat.shape[0] 13 | max_objs = feat.shape[1] 14 | feat_dim = feat.shape[2] 15 | out = np.zeros((batch_size, feat_dim, h, w), dtype=np.float32) 16 | vis = np.zeros((batch_size, h, w), dtype=np.uint8) 17 | ds = [(0, 1), (0, -1), (1, 0), (-1, 0)] 18 | for i in range(batch_size): 19 | queue_ind = np.zeros((h*w*2, 2), dtype=np.int32) 20 | queue_feat = np.zeros((h*w*2, feat_dim), dtype=np.float32) 21 | head, tail = 0, 0 22 | for j in range(max_objs): 23 | if ind[i][j] > 0: 24 | x, y = ind[i][j] % w, ind[i][j] // w 25 | out[i, :, y, x] = feat[i][j] 26 | vis[i, y, x] = 1 27 | queue_ind[tail] = x, y 28 | queue_feat[tail] = feat[i][j] 29 | tail += 1 30 | while tail - head > 0: 31 | x, y = queue_ind[head] 32 | f = queue_feat[head] 33 | head += 1 34 | for (dx, dy) in ds: 35 | xx, yy = x + dx, y + dy 36 | if xx >= 0 and yy >= 0 and xx < w and yy < h and vis[i, yy, xx] < 1: 37 | out[i, :, yy, xx] = f 38 | vis[i, yy, xx] = 1 39 | queue_ind[tail] = xx, yy 40 | queue_feat[tail] = f 41 | tail += 1 42 | return out -------------------------------------------------------------------------------- /src/lib/utils/post_process.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | from .image import transform_preds 7 | from .ddd_utils import ddd2locrot 8 | 9 | 10 | def get_pred_depth(depth): 11 | return depth 12 | 13 | def get_alpha(rot): 14 | # output: (B, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, 15 | # bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos] 16 | # return rot[:, 0] 17 | idx = rot[:, 1] > rot[:, 5] 18 | alpha1 = np.arctan2(rot[:, 2], rot[:, 3]) + (-0.5 * np.pi) 19 | alpha2 = np.arctan2(rot[:, 6], rot[:, 7]) + ( 0.5 * np.pi) 20 | return alpha1 * idx + alpha2 * (1 - idx) 21 | 22 | 23 | def ddd_post_process_2d(dets, c, s, opt): 24 | # dets: batch x max_dets x dim 25 | # return 1-based class det list 26 | ret = [] 27 | include_wh = dets.shape[2] > 16 28 | for i in range(dets.shape[0]): 29 | top_preds = {} 30 | dets[i, :, :2] = transform_preds( 31 | dets[i, :, 0:2], c[i], s[i], (opt.output_w, opt.output_h)) 32 | classes = dets[i, :, -1] 33 | for j in range(opt.num_classes): 34 | inds = (classes == j) 35 | top_preds[j + 1] = np.concatenate([ 36 | dets[i, inds, :3].astype(np.float32), 37 | get_alpha(dets[i, inds, 3:11])[:, np.newaxis].astype(np.float32), 38 | get_pred_depth(dets[i, inds, 11:12]).astype(np.float32), 39 | dets[i, inds, 12:15].astype(np.float32)], axis=1) 40 | if include_wh: 41 | top_preds[j + 1] = np.concatenate([ 42 | top_preds[j + 1], 43 | transform_preds( 44 | dets[i, inds, 15:17], c[i], s[i], (opt.output_w, opt.output_h)) 45 | .astype(np.float32)], axis=1) 46 | ret.append(top_preds) 47 | return ret 48 | 49 | def ddd_post_process_3d(dets, calibs): 50 | # dets: batch x max_dets x dim 51 | # return 1-based class det list 52 | ret = [] 53 | for i in range(len(dets)): 54 | preds = {} 55 | for cls_ind in dets[i].keys(): 56 | preds[cls_ind] = [] 57 | for j in range(len(dets[i][cls_ind])): 58 | center = dets[i][cls_ind][j][:2] 59 | score = dets[i][cls_ind][j][2] 60 | alpha = dets[i][cls_ind][j][3] 61 | depth = dets[i][cls_ind][j][4] 62 | dimensions = dets[i][cls_ind][j][5:8] 63 | wh = dets[i][cls_ind][j][8:10] 64 | locations, rotation_y = ddd2locrot( 65 | center, alpha, dimensions, depth, calibs[0]) 66 | bbox = [center[0] - wh[0] / 2, center[1] - wh[1] / 2, 67 | center[0] + wh[0] / 2, center[1] + wh[1] / 2] 68 | pred = [alpha] + bbox + dimensions.tolist() + \ 69 | locations.tolist() + [rotation_y, score] 70 | preds[cls_ind].append(pred) 71 | preds[cls_ind] = np.array(preds[cls_ind], dtype=np.float32) 72 | ret.append(preds) 73 | return ret 74 | 75 | def ddd_post_process(dets, c, s, calibs, opt): 76 | # dets: batch x max_dets x dim 77 | # return 1-based class det list 78 | dets = ddd_post_process_2d(dets, c, s, opt) 79 | dets = ddd_post_process_3d(dets, calibs) 80 | return dets 81 | 82 | 83 | def ctdet_post_process(dets, c, s, h, w, num_classes): 84 | # dets: batch x max_dets x dim 85 | # return 1-based class det dict 86 | ret = [] 87 | for i in range(dets.shape[0]): 88 | top_preds = {} 89 | dets[i, :, :2] = transform_preds( 90 | dets[i, :, 0:2], c[i], s[i], (w, h)) 91 | dets[i, :, 2:4] = transform_preds( 92 | dets[i, :, 2:4], c[i], s[i], (w, h)) 93 | classes = dets[i, :, -1] 94 | for j in range(num_classes): 95 | inds = (classes == j) 96 | top_preds[j + 1] = np.concatenate([ 97 | dets[i, inds, :4].astype(np.float32), 98 | dets[i, inds, 4:5].astype(np.float32)], axis=1).tolist() 99 | ret.append(top_preds) 100 | return ret 101 | 102 | 103 | def multi_pose_post_process(dets, c, s, h, w): 104 | # dets: batch x max_dets x 40 105 | # return list of 39 in image coord 106 | ret = [] 107 | for i in range(dets.shape[0]): 108 | bbox = transform_preds(dets[i, :, :4].reshape(-1, 2), c[i], s[i], (w, h)) 109 | pts = transform_preds(dets[i, :, 5:39].reshape(-1, 2), c[i], s[i], (w, h)) 110 | top_preds = np.concatenate( 111 | [bbox.reshape(-1, 4), dets[i, :, 4:5], 112 | pts.reshape(-1, 34)], axis=1).astype(np.float32).tolist() 113 | ret.append({np.ones(1, dtype=np.int32)[0]: top_preds}) 114 | return ret 115 | -------------------------------------------------------------------------------- /src/lib/utils/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | 7 | class AverageMeter(object): 8 | """Computes and stores the average and current value""" 9 | def __init__(self): 10 | self.reset() 11 | 12 | def reset(self): 13 | self.val = 0 14 | self.avg = 0 15 | self.sum = 0 16 | self.count = 0 17 | 18 | def update(self, val, n=1): 19 | self.val = val 20 | self.sum += val * n 21 | self.count += n 22 | if self.count > 0: 23 | self.avg = self.sum / self.count -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import _init_paths 6 | 7 | import os 8 | 9 | import torch 10 | import torch.utils.data 11 | from opts import opts 12 | from models.model import create_model, load_model, save_model 13 | from models.data_parallel import DataParallel 14 | from logger import Logger 15 | from datasets.dataset_factory import get_dataset 16 | from trains.train_factory import train_factory 17 | 18 | 19 | def main(opt): 20 | torch.manual_seed(opt.seed) 21 | torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test 22 | Dataset = get_dataset(opt.dataset, opt.task) 23 | opt = opts().update_dataset_info_and_set_heads(opt, Dataset) 24 | print(opt) 25 | 26 | logger = Logger(opt) 27 | 28 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str 29 | opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') 30 | 31 | print('Creating model...') 32 | model = create_model(opt.arch, opt.heads, opt.head_conv) 33 | optimizer = torch.optim.Adam(model.parameters(), opt.lr) 34 | start_epoch = 0 35 | if opt.load_model != '': 36 | model, optimizer, start_epoch = load_model( 37 | model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step) 38 | 39 | Trainer = train_factory[opt.task] 40 | trainer = Trainer(opt, model, optimizer) 41 | trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) 42 | 43 | print('Setting up data...') 44 | val_loader = torch.utils.data.DataLoader( 45 | Dataset(opt, 'val'), 46 | batch_size=1, 47 | shuffle=False, 48 | num_workers=1, 49 | pin_memory=True 50 | ) 51 | 52 | if opt.test: 53 | _, preds = trainer.val(0, val_loader) 54 | val_loader.dataset.run_eval(preds, opt.save_dir) 55 | return 56 | 57 | train_loader = torch.utils.data.DataLoader( 58 | Dataset(opt, 'train'), 59 | batch_size=opt.batch_size, 60 | shuffle=True, 61 | num_workers=opt.num_workers, 62 | pin_memory=True, 63 | drop_last=True 64 | ) 65 | 66 | print('Starting training...') 67 | best = 1e10 68 | for epoch in range(start_epoch + 1, opt.num_epochs + 1): 69 | mark = epoch if opt.save_all else 'last' 70 | log_dict_train, _ = trainer.train(epoch, train_loader) 71 | logger.write('epoch: {} |'.format(epoch)) 72 | for k, v in log_dict_train.items(): 73 | logger.scalar_summary('train_{}'.format(k), v, epoch) 74 | logger.write('{} {:8f} | '.format(k, v)) 75 | if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: 76 | save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), 77 | epoch, model, optimizer) 78 | with torch.no_grad(): 79 | log_dict_val, preds = trainer.val(epoch, val_loader) 80 | for k, v in log_dict_val.items(): 81 | logger.scalar_summary('val_{}'.format(k), v, epoch) 82 | logger.write('{} {:8f} | '.format(k, v)) 83 | if log_dict_val[opt.metric] < best: 84 | best = log_dict_val[opt.metric] 85 | save_model(os.path.join(opt.save_dir, 'model_best.pth'), 86 | epoch, model) 87 | else: 88 | save_model(os.path.join(opt.save_dir, 'model_last.pth'), 89 | epoch, model, optimizer) 90 | logger.write('\n') 91 | if epoch in opt.lr_step: 92 | save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), 93 | epoch, model, optimizer) 94 | lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1)) 95 | print('Drop LR to', lr) 96 | for param_group in optimizer.param_groups: 97 | param_group['lr'] = lr 98 | logger.close() 99 | 100 | if __name__ == '__main__': 101 | opt = opts().parse() 102 | main(opt) -------------------------------------------------------------------------------- /src/test.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import _init_paths 6 | 7 | import os 8 | import json 9 | import cv2 10 | import numpy as np 11 | import time 12 | from progress.bar import Bar 13 | import torch 14 | 15 | from external.nms import soft_nms 16 | from opts import opts 17 | from logger import Logger 18 | from utils.utils import AverageMeter 19 | from datasets.dataset_factory import dataset_factory 20 | from detectors.detector_factory import detector_factory 21 | 22 | class PrefetchDataset(torch.utils.data.Dataset): 23 | def __init__(self, opt, dataset, pre_process_func): 24 | self.images = dataset.images 25 | self.load_image_func = dataset.coco.loadImgs 26 | self.img_dir = dataset.img_dir 27 | self.pre_process_func = pre_process_func 28 | self.opt = opt 29 | 30 | def __getitem__(self, index): 31 | img_id = self.images[index] 32 | img_info = self.load_image_func(ids=[img_id])[0] 33 | img_path = os.path.join(self.img_dir, img_info['file_name']) 34 | image = cv2.imread(img_path) 35 | images, meta = {}, {} 36 | for scale in opt.test_scales: 37 | if opt.task == 'ddd': 38 | images[scale], meta[scale] = self.pre_process_func( 39 | image, scale, img_info['calib']) 40 | else: 41 | images[scale], meta[scale] = self.pre_process_func(image, scale) 42 | return img_id, {'images': images, 'image': image, 'meta': meta} 43 | 44 | def __len__(self): 45 | return len(self.images) 46 | 47 | def prefetch_test(opt): 48 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str 49 | 50 | Dataset = dataset_factory[opt.dataset] 51 | opt = opts().update_dataset_info_and_set_heads(opt, Dataset) 52 | print(opt) 53 | Logger(opt) 54 | Detector = detector_factory[opt.task] 55 | 56 | split = 'val' if not opt.trainval else 'test' 57 | dataset = Dataset(opt, split) 58 | detector = Detector(opt) 59 | 60 | data_loader = torch.utils.data.DataLoader( 61 | PrefetchDataset(opt, dataset, detector.pre_process), 62 | batch_size=1, shuffle=False, num_workers=1, pin_memory=True) 63 | 64 | results = {} 65 | num_iters = len(dataset) 66 | bar = Bar('{}'.format(opt.exp_id), max=num_iters) 67 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge'] 68 | avg_time_stats = {t: AverageMeter() for t in time_stats} 69 | for ind, (img_id, pre_processed_images) in enumerate(data_loader): 70 | ret = detector.run(pre_processed_images) 71 | results[img_id.numpy().astype(np.int32)[0]] = ret['results'] 72 | Bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format( 73 | ind, num_iters, total=bar.elapsed_td, eta=bar.eta_td) 74 | for t in avg_time_stats: 75 | avg_time_stats[t].update(ret[t]) 76 | Bar.suffix = Bar.suffix + '|{} {tm.val:.3f}s ({tm.avg:.3f}s) '.format( 77 | t, tm = avg_time_stats[t]) 78 | bar.next() 79 | bar.finish() 80 | dataset.run_eval(results, opt.save_dir) 81 | 82 | def test(opt): 83 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str 84 | 85 | Dataset = dataset_factory[opt.dataset] 86 | opt = opts().update_dataset_info_and_set_heads(opt, Dataset) 87 | print(opt) 88 | Logger(opt) 89 | Detector = detector_factory[opt.task] 90 | 91 | split = 'val' if not opt.trainval else 'test' 92 | dataset = Dataset(opt, split) 93 | detector = Detector(opt) 94 | 95 | results = {} 96 | num_iters = len(dataset) 97 | bar = Bar('{}'.format(opt.exp_id), max=num_iters) 98 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge'] 99 | avg_time_stats = {t: AverageMeter() for t in time_stats} 100 | for ind in range(num_iters): 101 | img_id = dataset.images[ind] 102 | img_info = dataset.coco.loadImgs(ids=[img_id])[0] 103 | img_path = os.path.join(dataset.img_dir, img_info['file_name']) 104 | 105 | if opt.task == 'ddd': 106 | ret = detector.run(img_path, img_info['calib']) 107 | else: 108 | ret = detector.run(img_path) 109 | 110 | results[img_id] = ret['results'] 111 | 112 | Bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format( 113 | ind, num_iters, total=bar.elapsed_td, eta=bar.eta_td) 114 | for t in avg_time_stats: 115 | avg_time_stats[t].update(ret[t]) 116 | Bar.suffix = Bar.suffix + '|{} {:.3f} '.format(t, avg_time_stats[t].avg) 117 | bar.next() 118 | bar.finish() 119 | dataset.run_eval(results, opt.save_dir) 120 | 121 | if __name__ == '__main__': 122 | opt = opts().parse() 123 | if opt.not_prefetch_test: 124 | test(opt) 125 | else: 126 | prefetch_test(opt) -------------------------------------------------------------------------------- /src/tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | def add_path(path): 5 | if path not in sys.path: 6 | sys.path.insert(0, path) 7 | 8 | this_dir = osp.dirname(__file__) 9 | 10 | # Add lib to PYTHONPATH 11 | lib_path = osp.join(this_dir, '../lib') 12 | add_path(lib_path) 13 | -------------------------------------------------------------------------------- /src/tools/convert_hourglass_weight.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | MODEL_PATH = '../../models/ExtremeNet_500000.pkl' 6 | OUT_PATH = '../../models/ExtremeNet_500000.pth' 7 | 8 | import torch 9 | state_dict = torch.load(MODEL_PATH) 10 | key_map = {'t_heats': 'hm_t', 'l_heats': 'hm_l', 'b_heats': 'hm_b', \ 11 | 'r_heats': 'hm_r', 'ct_heats': 'hm_c', \ 12 | 't_regrs': 'reg_t', 'l_regrs': 'reg_l', \ 13 | 'b_regrs': 'reg_b', 'r_regrs': 'reg_r'} 14 | 15 | out = {} 16 | for k in state_dict.keys(): 17 | changed = False 18 | for m in key_map.keys(): 19 | if m in k: 20 | if 'ct_heats' in k and m == 't_heats': 21 | continue 22 | new_k = k.replace(m, key_map[m]) 23 | out[new_k] = state_dict[k] 24 | changed = True 25 | print('replace {} to {}'.format(k, new_k)) 26 | if not changed: 27 | out[k] = state_dict[k] 28 | data = {'epoch': 0, 29 | 'state_dict': out} 30 | torch.save(data, OUT_PATH) 31 | -------------------------------------------------------------------------------- /src/tools/eval_coco.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | from pycocotools.cocoeval import COCOeval 7 | import sys 8 | import cv2 9 | import numpy as np 10 | import pickle 11 | import os 12 | 13 | this_dir = os.path.dirname(__file__) 14 | ANN_PATH = this_dir + '../../data/coco/annotations/instances_val2017.json' 15 | print(ANN_PATH) 16 | if __name__ == '__main__': 17 | pred_path = sys.argv[1] 18 | coco = coco.COCO(ANN_PATH) 19 | dets = coco.loadRes(pred_path) 20 | img_ids = coco.getImgIds() 21 | num_images = len(img_ids) 22 | coco_eval = COCOeval(coco, dets, "bbox") 23 | coco_eval.evaluate() 24 | coco_eval.accumulate() 25 | coco_eval.summarize() 26 | 27 | 28 | -------------------------------------------------------------------------------- /src/tools/eval_coco_hp.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | from pycocotools.cocoeval import COCOeval 7 | import sys 8 | import cv2 9 | import numpy as np 10 | import pickle 11 | import os 12 | 13 | this_dir = os.path.dirname(__file__) 14 | ANN_PATH = this_dir + '../../data/coco/annotations/person_keypoints_val2017.json' 15 | print(ANN_PATH) 16 | if __name__ == '__main__': 17 | pred_path = sys.argv[1] 18 | coco = coco.COCO(ANN_PATH) 19 | dets = coco.loadRes(pred_path) 20 | img_ids = coco.getImgIds() 21 | num_images = len(img_ids) 22 | coco_eval = COCOeval(coco, dets, "keypoints") 23 | coco_eval.evaluate() 24 | coco_eval.accumulate() 25 | coco_eval.summarize() 26 | coco_eval = COCOeval(coco, dets, "bbox") 27 | coco_eval.evaluate() 28 | coco_eval.accumulate() 29 | coco_eval.summarize() 30 | 31 | -------------------------------------------------------------------------------- /src/tools/get_kitti.sh: -------------------------------------------------------------------------------- 1 | mkdir kitti 2 | cd kitti 3 | wget http://www.cvlibs.net/download.php?file=data_object_image_2.zip 4 | wget http://www.cvlibs.net/download.php?file=data_object_label_2.zip 5 | wget http://www.cvlibs.net/download.php?file=data_object_calib.zip 6 | unzip data_object_image_2.zip 7 | unzip data_object_label_2.zip 8 | unzip data_object_calib.zip 9 | 10 | -------------------------------------------------------------------------------- /src/tools/get_pascal_voc.sh: -------------------------------------------------------------------------------- 1 | mkdir voc 2 | cd voc 3 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar 4 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar 5 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar 6 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 7 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCdevkit_18-May-2011.tar 8 | tar xvf VOCtrainval_06-Nov-2007.tar 9 | tar xvf VOCtest_06-Nov-2007.tar 10 | tar xvf VOCdevkit_08-Jun-2007.tar 11 | tar xvf VOCtrainval_11-May-2012.tar 12 | tar xvf VOCdevkit_18-May-2011.tar 13 | rm VOCtrainval_06-Nov-2007.tar 14 | rm VOCtest_06-Nov-2007.tar 15 | rm VOCdevkit_08-Jun-2007.tar 16 | rm VOCtrainval_11-May-2012.tar 17 | rm VOCdevkit_18-May-2011.tar 18 | mkdir images 19 | cp VOCdevkit/VOC2007/JPEGImages/* images/ 20 | cp VOCdevkit/VOC2012/JPEGImages/* images/ 21 | wget https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip 22 | unzip PASCAL_VOC.zip 23 | rm PASCAL_VOC.zip 24 | mv PASCAL_VOC annotations/ 25 | cd .. 26 | python merge_pascal_json.py 27 | -------------------------------------------------------------------------------- /src/tools/kitti_eval/README.md: -------------------------------------------------------------------------------- 1 | # kitti_eval 2 | 3 | `evaluate_object_3d_offline.cpp`evaluates your KITTI detection locally on your own computer using your validation data selected from KITTI training dataset, with the following metrics: 4 | 5 | - overlap on image (AP) 6 | - oriented overlap on image (AOS) 7 | - overlap on ground-plane (AP) 8 | - overlap in 3D (AP) 9 | 10 | Compile `evaluate_object_3d_offline.cpp` with dependency of Boost and Linux `dirent.h` (You should already have it under most Linux). 11 | 12 | Run the evalutaion by: 13 | 14 | ./evaluate_object_3d_offline groundtruth_dir result_dir 15 | 16 | Note that you don't have to detect over all KITTI training data. The evaluator only evaluates samples whose result files exist. 17 | 18 | 19 | ### Updates 20 | 21 | - June, 2017: 22 | * Fixed the bug of detection box filtering based on min height according to KITTI's note on 25.04.2017. 23 | -------------------------------------------------------------------------------- /src/tools/kitti_eval/evaluate_object_3d_offline: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/src/tools/kitti_eval/evaluate_object_3d_offline -------------------------------------------------------------------------------- /src/tools/kitti_eval/mail.h: -------------------------------------------------------------------------------- 1 | #ifndef MAIL_H 2 | #define MAIL_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | class Mail { 9 | 10 | public: 11 | 12 | Mail (std::string email = "") { 13 | if (email.compare("")) { 14 | mail = popen("/usr/lib/sendmail -t -f noreply@cvlibs.net","w"); 15 | fprintf(mail,"To: %s\n", email.c_str()); 16 | fprintf(mail,"From: noreply@cvlibs.net\n"); 17 | fprintf(mail,"Subject: KITTI Evaluation Benchmark\n"); 18 | fprintf(mail,"\n\n"); 19 | } else { 20 | mail = 0; 21 | } 22 | } 23 | 24 | ~Mail() { 25 | if (mail) { 26 | pclose(mail); 27 | } 28 | } 29 | 30 | void msg (const char *format, ...) { 31 | va_list args; 32 | va_start(args,format); 33 | if (mail) { 34 | vfprintf(mail,format,args); 35 | fprintf(mail,"\n"); 36 | } 37 | vprintf(format,args); 38 | printf("\n"); 39 | va_end(args); 40 | } 41 | 42 | private: 43 | 44 | FILE *mail; 45 | 46 | }; 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /src/tools/merge_pascal_json.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | # ANNOT_PATH = '/home/zxy/Datasets/VOC/annotations/' 4 | ANNOT_PATH = 'voc/annotations/' 5 | OUT_PATH = ANNOT_PATH 6 | INPUT_FILES = ['pascal_train2012.json', 'pascal_val2012.json', 7 | 'pascal_train2007.json', 'pascal_val2007.json'] 8 | OUTPUT_FILE = 'pascal_trainval0712.json' 9 | KEYS = ['images', 'type', 'annotations', 'categories'] 10 | MERGE_KEYS = ['images', 'annotations'] 11 | 12 | out = {} 13 | tot_anns = 0 14 | for i, file_name in enumerate(INPUT_FILES): 15 | data = json.load(open(ANNOT_PATH + file_name, 'r')) 16 | print('keys', data.keys()) 17 | if i == 0: 18 | for key in KEYS: 19 | out[key] = data[key] 20 | print(file_name, key, len(data[key])) 21 | else: 22 | out['images'] += data['images'] 23 | for j in range(len(data['annotations'])): 24 | data['annotations'][j]['id'] += tot_anns 25 | out['annotations'] += data['annotations'] 26 | print(file_name, 'images', len(data['images'])) 27 | print(file_name, 'annotations', len(data['annotations'])) 28 | tot_anns = len(out['annotations']) 29 | print('tot', len(out['annotations'])) 30 | json.dump(out, open(OUT_PATH + OUTPUT_FILE, 'w')) 31 | -------------------------------------------------------------------------------- /src/tools/reval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Fast R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # Modified by Xingyi Zhou 9 | # -------------------------------------------------------- 10 | 11 | # Reval = re-eval. Re-evaluate saved detections. 12 | from __future__ import absolute_import 13 | from __future__ import division 14 | from __future__ import print_function 15 | 16 | import sys 17 | import os.path as osp 18 | sys.path.insert(0, osp.join(osp.dirname(__file__), 'voc_eval_lib')) 19 | 20 | from model.test import apply_nms 21 | from datasets.pascal_voc import pascal_voc 22 | import pickle 23 | import os, argparse 24 | import numpy as np 25 | import json 26 | 27 | def parse_args(): 28 | """ 29 | Parse input arguments 30 | """ 31 | parser = argparse.ArgumentParser(description='Re-evaluate results') 32 | parser.add_argument('detection_file', type=str) 33 | parser.add_argument('--output_dir', help='results directory', type=str) 34 | parser.add_argument('--imdb', dest='imdb_name', 35 | help='dataset to re-evaluate', 36 | default='voc_2007_test', type=str) 37 | parser.add_argument('--matlab', dest='matlab_eval', 38 | help='use matlab for evaluation', 39 | action='store_true') 40 | parser.add_argument('--comp', dest='comp_mode', help='competition mode', 41 | action='store_true') 42 | parser.add_argument('--nms', dest='apply_nms', help='apply nms', 43 | action='store_true') 44 | 45 | if len(sys.argv) == 1: 46 | parser.print_help() 47 | sys.exit(1) 48 | 49 | args = parser.parse_args() 50 | return args 51 | 52 | 53 | def from_dets(imdb_name, detection_file, args): 54 | imdb = pascal_voc('test', '2007') 55 | imdb.competition_mode(args.comp_mode) 56 | imdb.config['matlab_eval'] = args.matlab_eval 57 | with open(os.path.join(detection_file), 'rb') as f: 58 | if 'json' in detection_file: 59 | dets = json.load(f) 60 | else: 61 | dets = pickle.load(f, encoding='latin1') 62 | # import pdb; pdb.set_trace() 63 | if args.apply_nms: 64 | print('Applying NMS to all detections') 65 | test_nms = 0.3 66 | nms_dets = apply_nms(dets, test_nms) 67 | else: 68 | nms_dets = dets 69 | 70 | print('Evaluating detections') 71 | imdb.evaluate_detections(nms_dets) 72 | 73 | 74 | if __name__ == '__main__': 75 | args = parse_args() 76 | 77 | imdb_name = args.imdb_name 78 | from_dets(imdb_name, args.detection_file, args) 79 | -------------------------------------------------------------------------------- /src/tools/vis_pred.py: -------------------------------------------------------------------------------- 1 | import pycocotools.coco as coco 2 | from pycocotools.cocoeval import COCOeval 3 | import sys 4 | import cv2 5 | import numpy as np 6 | import pickle 7 | IMG_PATH = '../../data/coco/val2017/' 8 | ANN_PATH = '../../data/coco/annotations/instances_val2017.json' 9 | DEBUG = True 10 | 11 | def _coco_box_to_bbox(box): 12 | bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]], 13 | dtype=np.int32) 14 | return bbox 15 | 16 | _cat_ids = [ 17 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 18 | 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 19 | 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 20 | 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 21 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 22 | 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 23 | 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 24 | 82, 84, 85, 86, 87, 88, 89, 90 25 | ] 26 | num_classes = 80 27 | _classes = { 28 | ind + 1: cat_id for ind, cat_id in enumerate(_cat_ids) 29 | } 30 | _to_order = {cat_id: ind for ind, cat_id in enumerate(_cat_ids)} 31 | coco = coco.COCO(ANN_PATH) 32 | CAT_NAMES = [coco.loadCats([_classes[i + 1]])[0]['name'] \ 33 | for i in range(num_classes)] 34 | COLORS = [((np.random.random((3, )) * 0.6 + 0.4)*255).astype(np.uint8) \ 35 | for _ in range(num_classes)] 36 | 37 | 38 | def add_box(image, bbox, sc, cat_id): 39 | cat_id = _to_order[cat_id] 40 | cat_name = CAT_NAMES[cat_id] 41 | cat_size = cv2.getTextSize(cat_name + '0', cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0] 42 | color = np.array(COLORS[cat_id]).astype(np.int32).tolist() 43 | txt = '{}{:.0f}'.format(cat_name, sc * 10) 44 | if bbox[1] - cat_size[1] - 2 < 0: 45 | cv2.rectangle(image, 46 | (bbox[0], bbox[1] + 2), 47 | (bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2), 48 | color, -1) 49 | cv2.putText(image, txt, 50 | (bbox[0], bbox[1] + cat_size[1] + 2), 51 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1) 52 | else: 53 | cv2.rectangle(image, 54 | (bbox[0], bbox[1] - cat_size[1] - 2), 55 | (bbox[0] + cat_size[0], bbox[1] - 2), 56 | color, -1) 57 | cv2.putText(image, txt, 58 | (bbox[0], bbox[1] - 2), 59 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1) 60 | cv2.rectangle(image, 61 | (bbox[0], bbox[1]), 62 | (bbox[2], bbox[3]), 63 | color, 2) 64 | return image 65 | 66 | if __name__ == '__main__': 67 | dets = [] 68 | img_ids = coco.getImgIds() 69 | num_images = len(img_ids) 70 | for k in range(1, len(sys.argv)): 71 | pred_path = sys.argv[k] 72 | dets.append(coco.loadRes(pred_path)) 73 | # import pdb; pdb.set_trace() 74 | for i, img_id in enumerate(img_ids): 75 | img_info = coco.loadImgs(ids=[img_id])[0] 76 | img_path = IMG_PATH + img_info['file_name'] 77 | img = cv2.imread(img_path) 78 | gt_ids = coco.getAnnIds(imgIds=[img_id]) 79 | gts = coco.loadAnns(gt_ids) 80 | gt_img = img.copy() 81 | for j, pred in enumerate(gts): 82 | bbox = _coco_box_to_bbox(pred['bbox']) 83 | cat_id = pred['category_id'] 84 | gt_img = add_box(gt_img, bbox, 0, cat_id) 85 | for k in range(len(dets)): 86 | pred_ids = dets[k].getAnnIds(imgIds=[img_id]) 87 | preds = dets[k].loadAnns(pred_ids) 88 | pred_img = img.copy() 89 | for j, pred in enumerate(preds): 90 | bbox = _coco_box_to_bbox(pred['bbox']) 91 | sc = pred['score'] 92 | cat_id = pred['category_id'] 93 | if sc > 0.2: 94 | pred_img = add_box(pred_img, bbox, sc, cat_id) 95 | cv2.imshow('pred{}'.format(k), pred_img) 96 | # cv2.imwrite('vis/{}_pred{}.png'.format(i, k), pred_img) 97 | cv2.imshow('gt', gt_img) 98 | # cv2.imwrite('vis/{}_gt.png'.format(i), gt_img) 99 | cv2.waitKey() 100 | # coco_eval.evaluate() 101 | # coco_eval.accumulate() 102 | # coco_eval.summarize() 103 | 104 | 105 | -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Xinlei Chen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | clean: 5 | rm -rf */*.pyc 6 | rm -rf */*.so 7 | -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/src/tools/voc_eval_lib/__init__.py -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/src/tools/voc_eval_lib/datasets/__init__.py -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/datasets/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps 56 | 57 | -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/datasets/ds_utils.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Ross Girshick 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | 12 | 13 | def unique_boxes(boxes, scale=1.0): 14 | """Return indices of unique boxes.""" 15 | v = np.array([1, 1e3, 1e6, 1e9]) 16 | hashes = np.round(boxes * scale).dot(v) 17 | _, index = np.unique(hashes, return_index=True) 18 | return np.sort(index) 19 | 20 | 21 | def xywh_to_xyxy(boxes): 22 | """Convert [x y w h] box format to [x1 y1 x2 y2] format.""" 23 | return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1)) 24 | 25 | 26 | def xyxy_to_xywh(boxes): 27 | """Convert [x1 y1 x2 y2] box format to [x y w h] format.""" 28 | return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1)) 29 | 30 | 31 | def validate_boxes(boxes, width=0, height=0): 32 | """Check that a set of boxes are valid.""" 33 | x1 = boxes[:, 0] 34 | y1 = boxes[:, 1] 35 | x2 = boxes[:, 2] 36 | y2 = boxes[:, 3] 37 | assert (x1 >= 0).all() 38 | assert (y1 >= 0).all() 39 | assert (x2 >= x1).all() 40 | assert (y2 >= y1).all() 41 | assert (x2 < width).all() 42 | assert (y2 < height).all() 43 | 44 | 45 | def filter_small_boxes(boxes, min_size): 46 | w = boxes[:, 2] - boxes[:, 0] 47 | h = boxes[:, 3] - boxes[:, 1] 48 | keep = np.where((w >= min_size) & (h > min_size))[0] 49 | return keep 50 | -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/src/tools/voc_eval_lib/model/__init__.py -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/model/bbox_transform.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | 13 | def bbox_transform(ex_rois, gt_rois): 14 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 15 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 16 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 17 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 18 | 19 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 20 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 21 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 22 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 23 | 24 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 25 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 26 | targets_dw = np.log(gt_widths / ex_widths) 27 | targets_dh = np.log(gt_heights / ex_heights) 28 | 29 | targets = np.vstack( 30 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 31 | return targets 32 | 33 | 34 | def bbox_transform_inv(boxes, deltas): 35 | if boxes.shape[0] == 0: 36 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) 37 | 38 | boxes = boxes.astype(deltas.dtype, copy=False) 39 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 40 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 41 | ctr_x = boxes[:, 0] + 0.5 * widths 42 | ctr_y = boxes[:, 1] + 0.5 * heights 43 | 44 | dx = deltas[:, 0::4] 45 | dy = deltas[:, 1::4] 46 | dw = deltas[:, 2::4] 47 | dh = deltas[:, 3::4] 48 | 49 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 50 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 51 | pred_w = np.exp(dw) * widths[:, np.newaxis] 52 | pred_h = np.exp(dh) * heights[:, np.newaxis] 53 | 54 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) 55 | # x1 56 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 57 | # y1 58 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 59 | # x2 60 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w 61 | # y2 62 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h 63 | 64 | return pred_boxes 65 | 66 | 67 | def clip_boxes(boxes, im_shape): 68 | """ 69 | Clip boxes to image boundaries. 70 | """ 71 | 72 | # x1 >= 0 73 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 74 | # y1 >= 0 75 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 76 | # x2 < im_shape[1] 77 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 78 | # y2 < im_shape[0] 79 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 80 | return boxes 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/model/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from model.config import cfg 12 | from nms.gpu_nms import gpu_nms 13 | from nms.cpu_nms import cpu_nms 14 | 15 | def nms(dets, thresh, force_cpu=False): 16 | """Dispatch to either CPU or GPU NMS implementations.""" 17 | 18 | if dets.shape[0] == 0: 19 | return [] 20 | if cfg.USE_GPU_NMS and not force_cpu: 21 | return gpu_nms(dets, thresh, device_id=0) 22 | else: 23 | return cpu_nms(dets, thresh) 24 | -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/nms/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/src/tools/voc_eval_lib/nms/.gitignore -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/src/tools/voc_eval_lib/nms/__init__.py -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/setup.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | import numpy as np 11 | from distutils.core import setup 12 | from distutils.extension import Extension 13 | from Cython.Distutils import build_ext 14 | 15 | def find_in_path(name, path): 16 | "Find a file in a search path" 17 | #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 18 | for dir in path.split(os.pathsep): 19 | binpath = pjoin(dir, name) 20 | if os.path.exists(binpath): 21 | return os.path.abspath(binpath) 22 | return None 23 | 24 | def locate_cuda(): 25 | """Locate the CUDA environment on the system 26 | 27 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 28 | and values giving the absolute path to each directory. 29 | 30 | Starts by looking for the CUDAHOME env variable. If not found, everything 31 | is based on finding 'nvcc' in the PATH. 32 | """ 33 | 34 | # first check if the CUDAHOME env variable is in use 35 | if 'CUDAHOME' in os.environ: 36 | home = os.environ['CUDAHOME'] 37 | nvcc = pjoin(home, 'bin', 'nvcc') 38 | else: 39 | # otherwise, search the PATH for NVCC 40 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 41 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 42 | if nvcc is None: 43 | raise EnvironmentError('The nvcc binary could not be ' 44 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 45 | home = os.path.dirname(os.path.dirname(nvcc)) 46 | 47 | cudaconfig = {'home':home, 'nvcc':nvcc, 48 | 'include': pjoin(home, 'include'), 49 | 'lib64': pjoin(home, 'lib64')} 50 | for k, v in cudaconfig.items(): 51 | if not os.path.exists(v): 52 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 53 | 54 | return cudaconfig 55 | CUDA = locate_cuda() 56 | 57 | # Obtain the numpy include directory. This logic works across numpy versions. 58 | try: 59 | numpy_include = np.get_include() 60 | except AttributeError: 61 | numpy_include = np.get_numpy_include() 62 | 63 | def customize_compiler_for_nvcc(self): 64 | """inject deep into distutils to customize how the dispatch 65 | to gcc/nvcc works. 66 | 67 | If you subclass UnixCCompiler, it's not trivial to get your subclass 68 | injected in, and still have the right customizations (i.e. 69 | distutils.sysconfig.customize_compiler) run on it. So instead of going 70 | the OO route, I have this. Note, it's kindof like a wierd functional 71 | subclassing going on.""" 72 | 73 | # tell the compiler it can processes .cu 74 | self.src_extensions.append('.cu') 75 | 76 | # save references to the default compiler_so and _comple methods 77 | default_compiler_so = self.compiler_so 78 | super = self._compile 79 | 80 | # now redefine the _compile method. This gets executed for each 81 | # object but distutils doesn't have the ability to change compilers 82 | # based on source extension: we add it. 83 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 84 | print(extra_postargs) 85 | if os.path.splitext(src)[1] == '.cu': 86 | # use the cuda for .cu files 87 | self.set_executable('compiler_so', CUDA['nvcc']) 88 | # use only a subset of the extra_postargs, which are 1-1 translated 89 | # from the extra_compile_args in the Extension class 90 | postargs = extra_postargs['nvcc'] 91 | else: 92 | postargs = extra_postargs['gcc'] 93 | 94 | super(obj, src, ext, cc_args, postargs, pp_opts) 95 | # reset the default compiler_so, which we might have changed for cuda 96 | self.compiler_so = default_compiler_so 97 | 98 | # inject our redefined _compile method into the class 99 | self._compile = _compile 100 | 101 | # run the customize_compiler 102 | class custom_build_ext(build_ext): 103 | def build_extensions(self): 104 | customize_compiler_for_nvcc(self.compiler) 105 | build_ext.build_extensions(self) 106 | 107 | ext_modules = [ 108 | Extension( 109 | "utils.cython_bbox", 110 | ["utils/bbox.pyx"], 111 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 112 | include_dirs = [numpy_include] 113 | ), 114 | Extension( 115 | "nms.cpu_nms", 116 | ["nms/cpu_nms.pyx"], 117 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 118 | include_dirs = [numpy_include] 119 | ), 120 | Extension('nms.gpu_nms', 121 | ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'], 122 | library_dirs=[CUDA['lib64']], 123 | libraries=['cudart'], 124 | language='c++', 125 | runtime_library_dirs=[CUDA['lib64']], 126 | # this syntax is specific to this build system 127 | # we're only going to use certain compiler args with nvcc and not with gcc 128 | # the implementation of this trick is in customize_compiler() below 129 | extra_compile_args={'gcc': ["-Wno-unused-function"], 130 | 'nvcc': ['-arch=sm_61', 131 | '--ptxas-options=-v', 132 | '-c', 133 | '--compiler-options', 134 | "'-fPIC'"]}, 135 | include_dirs = [numpy_include, CUDA['include']] 136 | ) 137 | ] 138 | 139 | setup( 140 | name='tf_faster_rcnn', 141 | ext_modules=ext_modules, 142 | # inject our custom trigger 143 | cmdclass={'build_ext': custom_build_ext}, 144 | ) 145 | -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/utils/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.h 4 | *.hpp 5 | -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/utils/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps 56 | 57 | -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import numpy as np 14 | import cv2 15 | 16 | 17 | def im_list_to_blob(ims): 18 | """Convert a list of images into a network input. 19 | 20 | Assumes images are already prepared (means subtracted, BGR order, ...). 21 | """ 22 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 23 | num_images = len(ims) 24 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 25 | dtype=np.float32) 26 | for i in range(num_images): 27 | im = ims[i] 28 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 29 | 30 | return blob 31 | 32 | 33 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 34 | """Mean subtract and scale an image for use in a blob.""" 35 | im = im.astype(np.float32, copy=False) 36 | im -= pixel_means 37 | im_shape = im.shape 38 | im_size_min = np.min(im_shape[0:2]) 39 | im_size_max = np.max(im_shape[0:2]) 40 | im_scale = float(target_size) / float(im_size_min) 41 | # Prevent the biggest axis from being more than MAX_SIZE 42 | if np.round(im_scale * im_size_max) > max_size: 43 | im_scale = float(max_size) / float(im_size_max) 44 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 45 | interpolation=cv2.INTER_LINEAR) 46 | 47 | return im, im_scale 48 | -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | class Timer(object): 11 | """A simple timer.""" 12 | def __init__(self): 13 | self.total_time = 0. 14 | self.calls = 0 15 | self.start_time = 0. 16 | self.diff = 0. 17 | self.average_time = 0. 18 | 19 | def tic(self): 20 | # using time.time instead of time.clock because time time.clock 21 | # does not normalize for multithreading 22 | self.start_time = time.time() 23 | 24 | def toc(self, average=True): 25 | self.diff = time.time() - self.start_time 26 | self.total_time += self.diff 27 | self.calls += 1 28 | self.average_time = self.total_time / self.calls 29 | if average: 30 | return self.average_time 31 | else: 32 | return self.diff 33 | -------------------------------------------------------------------------------- /src/tools/voc_eval_lib/utils/visualization.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | from six.moves import range 12 | import PIL.Image as Image 13 | import PIL.ImageColor as ImageColor 14 | import PIL.ImageDraw as ImageDraw 15 | import PIL.ImageFont as ImageFont 16 | 17 | STANDARD_COLORS = [ 18 | 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', 19 | 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', 20 | 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', 21 | 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', 22 | 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', 23 | 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', 24 | 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', 25 | 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', 26 | 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', 27 | 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', 28 | 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', 29 | 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', 30 | 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', 31 | 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', 32 | 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', 33 | 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', 34 | 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 35 | 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', 36 | 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', 37 | 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', 38 | 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', 39 | 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', 40 | 'WhiteSmoke', 'Yellow', 'YellowGreen' 41 | ] 42 | 43 | NUM_COLORS = len(STANDARD_COLORS) 44 | 45 | try: 46 | FONT = ImageFont.truetype('arial.ttf', 24) 47 | except IOError: 48 | FONT = ImageFont.load_default() 49 | 50 | def _draw_single_box(image, xmin, ymin, xmax, ymax, display_str, font, color='black', thickness=4): 51 | draw = ImageDraw.Draw(image) 52 | (left, right, top, bottom) = (xmin, xmax, ymin, ymax) 53 | draw.line([(left, top), (left, bottom), (right, bottom), 54 | (right, top), (left, top)], width=thickness, fill=color) 55 | text_bottom = bottom 56 | # Reverse list and print from bottom to top. 57 | text_width, text_height = font.getsize(display_str) 58 | margin = np.ceil(0.05 * text_height) 59 | draw.rectangle( 60 | [(left, text_bottom - text_height - 2 * margin), (left + text_width, 61 | text_bottom)], 62 | fill=color) 63 | draw.text( 64 | (left + margin, text_bottom - text_height - margin), 65 | display_str, 66 | fill='black', 67 | font=font) 68 | 69 | return image 70 | 71 | def draw_bounding_boxes(image, gt_boxes, im_info): 72 | num_boxes = gt_boxes.shape[0] 73 | gt_boxes_new = gt_boxes.copy() 74 | gt_boxes_new[:,:4] = np.round(gt_boxes_new[:,:4].copy() / im_info[2]) 75 | disp_image = Image.fromarray(np.uint8(image[0])) 76 | 77 | for i in range(num_boxes): 78 | this_class = int(gt_boxes_new[i, 4]) 79 | disp_image = _draw_single_box(disp_image, 80 | gt_boxes_new[i, 0], 81 | gt_boxes_new[i, 1], 82 | gt_boxes_new[i, 2], 83 | gt_boxes_new[i, 3], 84 | 'N%02d-C%02d' % (i, this_class), 85 | FONT, 86 | color=STANDARD_COLORS[this_class % NUM_COLORS]) 87 | 88 | image[0, :] = np.array(disp_image) 89 | return image 90 | --------------------------------------------------------------------------------